C # Read HTML file content and write it to notepad

Source: Internet
Author: User
C # Read HTML file content and write it to notepad


 Try 
{
Int Totalfile = 0 ;
// String dirpath = @ "E: \ chfumetarnet \ bsc6810 alarm \";
If ( This . Textbox1.text. Trim () = "" )
{
MessageBox. Show ( " Enter the HTML file path! " );
}
Else
{
String Dirpath = This . Textbox1.text. Trim ();
If (! Dirpath. substring (dirpath. Length- 1 ). Contains ( " \\ " ))
{
Dirpath = dirpath + " \\ " ;
}
Streamwriter SW;
Directoryinfo dirinfo = New Directoryinfo (dirpath );
Fileinfo [] files = dirinfo. getfiles ();
String Filename = dirpath + " Warning experience database information .txt " ;
If (File. exists (filename ))
{
Sw = file. appendtext (filename );
}
Else
{
Sw = file. createtext (filename );
}
Foreach (Fileinfo In Files)
{
If (Fileinfo. extension. Equals (" . Htm " )) // Traverse all HTM files
{
Totalfile = totalfile + 1 ;
Webrequest mywebrequest = webrequest. Create (dirpath + fileinfo. Name );
Webresponse mywebresponse = mywebrequest. getresponse ();
Stream mystream = mywebresponse. getresponsestream ();
Encoding encode = system. Text. encoding. getencoding ( " Gb2312 " );
Streamreader mystreamreader = New Streamreader (mystream, encode );
String Strhtml = mystreamreader. readtoend ();
Mywebresponse. Close ();
String Stroutput = strhtml;
RegEx = New RegEx ( @" <[^>] +> | </[^>] +> " ); // Remove the regular expression marked in HTML
String Tmpstr = " <H4> ([^ <] *) </H4> " ; // Get the expression between <H4>
Match titlematch = RegEx. Match (strhtml, tmpstr, regexoptions. ignorecase | regexoptions. multiline );
String Causename = titlematch. value. tostring (); // Including <H4> and </H4> tags
Causename = RegEx. Replace (causename, " [\ N | \ r | \ t] " , " " ); // Remove line breaks and Tab key characters
Causename = causename. Trim ();
String Cause = causename. substring ( 4 , Causename. Length- 9 ); // Alarm reason obtained
String Titlestr = " <Title> ([^ <] *) </title> " ;
Titlematch = RegEx. Match (strhtml, titlestr, regexoptions. ignorecase | regexoptions. multiline );
String Titlename = titlematch. value. tostring ();
Titlename = RegEx. Replace (titlename, " [\ N | \ r | \ t] " , "" ); // Remove line breaks and Tab key characters
Titlename = titlename. Trim ();
String Regexstr = " <Ul> <li> (? <Key> .*?) </Ul> " ; // Obtain the content following <ul> <li> until the end of </ul>.
RegEx r = New RegEx (regexstr, regexoptions. None );
Strhtml = RegEx. Replace (strhtml, " [\ N | \ r | \ t] " , "" ); // Remove line breaks and Tab key characters
Match MC = R. Match (strhtml );
String Datastr = mc. Groups [ " Key " ]. Value;
Datastr = " <Ul> <li> " + Datastr + " </Ul> " ; // Obtain the complete source code between <ul> </ul>.
Strhtml = strhtml. Replace (datastr, "" ); // Removes the <ul> </ul> source code from line breaks and Tab keys.
Strhtml = strhtml. Replace (titlename, "" ); // Remove <title> </title>
Strhtml = RegEx. Replace (strhtml, " " ); // Filter out HTML tags
Strhtml = strhtml. Replace ( " & Nbsp; " , "" ); // Remove space characters
String [] Arr = cause. Split ( ' ' );
String Zhcause = arr [arr. Length- 1 ]; // Get the last element of the array: Alert cause
Sw. writeline ( " The " + Totalfile + " Files: " + Fileinfo. Name );
Sw. writeline ( " ----- Alarm reason ------: " );
// Sw. writeline (cause ); // ALM-1 meta startup
Zhcause = This . Chinastring (zhcause );
Sw. writeline (zhcause ); // Network Element startup
Sw. writeline ( " ----- Processing experience ------: " );
Sw. writeline (strhtml );
Sw. writeline ();
Sw. Flush ();

}
}
Sw. Close ();
MessageBox. Show ( " Operation successful! " , " Prompt " , Messageboxbuttons. OK, messageboxicon. information );
}
}
Catch (Exception ee)
{
MessageBox. Show ( " Operation failed: " + Ee. Message );
}

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.