C # Read HTML file content and write it to notepad
Try
{
Int Totalfile = 0 ;
// String dirpath = @ "E: \ chfumetarnet \ bsc6810 alarm \";
If ( This . Textbox1.text. Trim () = "" )
{
MessageBox. Show ( " Enter the HTML file path! " );
}
Else
{
String Dirpath = This . Textbox1.text. Trim ();
If (! Dirpath. substring (dirpath. Length- 1 ). Contains ( " \\ " ))
{
Dirpath = dirpath + " \\ " ;
}
Streamwriter SW;
Directoryinfo dirinfo = New Directoryinfo (dirpath );
Fileinfo [] files = dirinfo. getfiles ();
String Filename = dirpath + " Warning experience database information .txt " ;
If (File. exists (filename ))
{
Sw = file. appendtext (filename );
}
Else
{
Sw = file. createtext (filename );
}
Foreach (Fileinfo In Files)
{
If (Fileinfo. extension. Equals (" . Htm " )) // Traverse all HTM files
{
Totalfile = totalfile + 1 ;
Webrequest mywebrequest = webrequest. Create (dirpath + fileinfo. Name );
Webresponse mywebresponse = mywebrequest. getresponse ();
Stream mystream = mywebresponse. getresponsestream ();
Encoding encode = system. Text. encoding. getencoding ( " Gb2312 " );
Streamreader mystreamreader = New Streamreader (mystream, encode );
String Strhtml = mystreamreader. readtoend ();
Mywebresponse. Close ();
String Stroutput = strhtml;
RegEx = New RegEx ( @" <[^>] +> | </[^>] +> " ); // Remove the regular expression marked in HTML
String Tmpstr = " <H4> ([^ <] *) </H4> " ; // Get the expression between <H4>
Match titlematch = RegEx. Match (strhtml, tmpstr, regexoptions. ignorecase | regexoptions. multiline );
String Causename = titlematch. value. tostring (); // Including <H4> and </H4> tags
Causename = RegEx. Replace (causename, " [\ N | \ r | \ t] " , " " ); // Remove line breaks and Tab key characters
Causename = causename. Trim ();
String Cause = causename. substring ( 4 , Causename. Length- 9 ); // Alarm reason obtained
String Titlestr = " <Title> ([^ <] *) </title> " ;
Titlematch = RegEx. Match (strhtml, titlestr, regexoptions. ignorecase | regexoptions. multiline );
String Titlename = titlematch. value. tostring ();
Titlename = RegEx. Replace (titlename, " [\ N | \ r | \ t] " , "" ); // Remove line breaks and Tab key characters
Titlename = titlename. Trim ();
String Regexstr = " <Ul> <li> (? <Key> .*?) </Ul> " ; // Obtain the content following <ul> <li> until the end of </ul>.
RegEx r = New RegEx (regexstr, regexoptions. None );
Strhtml = RegEx. Replace (strhtml, " [\ N | \ r | \ t] " , "" ); // Remove line breaks and Tab key characters
Match MC = R. Match (strhtml );
String Datastr = mc. Groups [ " Key " ]. Value;
Datastr = " <Ul> <li> " + Datastr + " </Ul> " ; // Obtain the complete source code between <ul> </ul>.
Strhtml = strhtml. Replace (datastr, "" ); // Removes the <ul> </ul> source code from line breaks and Tab keys.
Strhtml = strhtml. Replace (titlename, "" ); // Remove <title> </title>
Strhtml = RegEx. Replace (strhtml, " " ); // Filter out HTML tags
Strhtml = strhtml. Replace ( " & Nbsp; " , "" ); // Remove space characters
String [] Arr = cause. Split ( ' ' );
String Zhcause = arr [arr. Length- 1 ]; // Get the last element of the array: Alert cause
Sw. writeline ( " The " + Totalfile + " Files: " + Fileinfo. Name );
Sw. writeline ( " ----- Alarm reason ------: " );
// Sw. writeline (cause ); // ALM-1 meta startup
Zhcause = This . Chinastring (zhcause );
Sw. writeline (zhcause ); // Network Element startup
Sw. writeline ( " ----- Processing experience ------: " );
Sw. writeline (strhtml );
Sw. writeline ();
Sw. Flush ();
}
}
Sw. Close ();
MessageBox. Show ( " Operation successful! " , " Prompt " , Messageboxbuttons. OK, messageboxicon. information );
}
}
Catch (Exception ee)
{
MessageBox. Show ( " Operation failed: " + Ee. Message );
}