Sometimes it is necessary to read an HTML File, to replace some sections and to response the achievment. A good way to to this is using regular expressions.
For example:
Read the HTML file:
string strHTMLBefore = ReadHTML(@"c:\example.html");
Execute regEx operations:
string strHTMLAfter = ExecuteRegExOperations(strHTMLBefore);
Print string to a literal:
litHTMLAfterReplace.Text = strHTMLAfter;
C#:
/// <summary>
/// reads an html file from filesystem and returns ist
/// </summary>
/// <param name="filePath">path to file on disk</param>
/// <returns>file content</returns>
private string ReadHTML(string filePath)
{
bool bExists = File.Exists(filePath);
string fileOriginal = null;
if (bExists)
{
StreamReader objStreamReader = new StreamReader(filePath, Encoding.Default);
fileOriginal = objStreamReader.ReadToEnd();
objStreamReader.Close();
}
else
{
lblMessage.Visible = true;
lblMessage.Text = "file not found";
}
return fileOriginal;
}
/// <summary>
/// executes RegEx operations
/// </summary>
/// <param name="strHTMLBefore">input string</param>
/// <returns>output string</returns>
private string ExecuteRegExOperations(string strHTMLBefore)
{
string strBeforeRegEx = strHTMLBefore;
string strAfterRegEx = null;
//vacancyTitle:
string patternExample = "(<!--###JS24Begin:Name_Title###-->)(([\\r,\\n]|.)*?)(<!--###JS24End:Name_Title###-->)";
Regex myRegex = new Regex(patternExample);
strAfterRegEx = myRegex.Replace(strBeforeRegEx, "string to be inserted");
return strAfterRegEx;
}
example.html:
<td width="40%" style="padding: 5px; border: 1px solid #666666">
<!--###JS24Begin:Name_Title###-->this text will be replaced by regex<!--###JS24End:Name_Title###-->
</td>
It works fine. But a malfunction must be described. If the regex does not find the "Begin" tag in aspx code, for example a mistake in writing, the program does not stop and the text will not be replaced.
That may be ok. But if the regex does not find the "end" tag the program will be lead to an endless loop and your cpu willl work up to 100% :-)
Until now I didn't found a way to avoid this. Do you have an idea?