Function:
Parse the original HTML code into an ihtmldocumet2 object, convert ihtmldocumet2 to ihtmldocumet3, and use domnode to display HTML as a tree. This parsing does not execute any scripts, and does not download any materials from the Internet. It is a plain text parsing.
(Method parse (string Str)A lightweight parsing implementation. ThisCodeNo materials will be downloaded from the InternetAnd will not execute any script, which is purely parsing.
Parsing is throughMshtml. To use this code correctly, you need to addMshtmlReference .)
Compile the following code correctly,You also need to modify the unsafe (enable unsafe mode) compiler option to enable it.
Method: Go to "project"> "<ApplicationProgramIn the "Name> Properties" dialog box, open "configuration properties", select "generate", and change "allow Insecure code blocks" to "true.
[C #]
Using system;
Using system. drawing;
Using system. collections;
Using system. componentmodel;
Using system. Windows. forms;
Using system. Data;
Using Mshtml ;
Using system. runtime. interopservices;
Using system. IO;
Namespace windowsapplication1
{
[Comvisible (true), comimport (), GUID ("7fd52380-4e07-101b-ae2d-08002b2ec713"), interfacetypeattribute (cominterfacetype. interfaceisiunknown)]
Public interface ipersiststreaminit
{
Void getclassid ([In, out] ref guid pclassid );
[Return: financialas (unmanagedtype. I4)] [preservesig]
Int isdirty ();
Void load ([IN, financialas (unmanagedtype. Interface)] ucomistream PSTM );
Void save ([IN, financialas (unmanagedtype. Interface)] ucomistream PSTM,
[IN, financialas (unmanagedtype. I4)] int fcleardirty );
Void getsizemax ([out, financialas (unmanagedtype. lparray)] Long queue size );
Void initnew ();
}
/// <Summary>
/// Summary of form1.
/// </Summary>
Public class form1: system. Windows. Forms. Form
{
Private system. Windows. Forms. Button button1;
Private system. Windows. Forms. Treeview treeview1;
/// <Summary>
/// Required designer variables.
/// </Summary>
Private system. componentmodel. Container components = NULL;
Public form1 ()
{
//
// Required for Windows Form Designer support
//
Initializecomponent ();
//
// Todo: add Any constructor code after initializecomponent calls
//
}
/// <Summary>
/// Clear all resources in use.
/// </Summary>
Protected override void dispose (bool disposing)
{
If (disposing)
{
If (components! = NULL)
{
Components. Dispose ();
}
}
Base. Dispose (disposing );
}
# Region code generated by Windows Form Designer
/// <Summary>
/// The designer supports the required methods-do not use the code editor to modify
/// Content of this method.
/// </Summary>
Private void initializecomponent ()
{
This. button1 = new system. Windows. Forms. Button ();
This. treeview1 = new system. Windows. Forms. Treeview ();
This. suspendlayout ();
//
// Button1
//
This. button1.location = new system. Drawing. Point (24, 16 );
This. button1.name = "button1 ";
This. button1.size = new system. Drawing. Size (88, 24 );
This. button1.tabindex = 0;
This. button1.text = "button1 ";
This. button1.click + = new system. eventhandler (this. button#click );
//
// Treeview1
//
This. treeview1.imageindex =-1;
This. treeview1.location = new system. Drawing. Point (280, 96 );
This. treeview1.name = "treeview1 ";
This. treeview1.selectedimageindex =-1;
This. treeview1.size = new system. Drawing. Size (288,224 );
This. treeview1.tabindex = 1;
//
// Form1
//
This. autoscalebasesize = new system. Drawing. Size (6, 14 );
This. clientsize = new system. Drawing. Size (664,333 );
This. Controls. Add (this. treeview1 );
This. Controls. Add (this. button1 );
This. Name = "form1 ";
This. Text = "form1 ";
This. resumelayout (false );
}
# Endregion
/// <Summary>
/// Main entry point of the application.
/// </Summary>
[Stathread]
Static void main ()
{
Application. Run (New form1 ());
}
Unsafe ihtmldocument2 parse (string S)
{
Ihtmldocument2 pdocument = new htmldocumentclass ();
If (pdocument! = NULL)
{
Ipersiststreaminit ppersist = pdocument as ipersiststreaminit;
Ppersist. initnew ();
Ppersist = NULL;
Imarkupservices MS = pdocument as imarkupservices;
If (MS! = NULL)
{
Imarkupcontainer PMC = NULL;
Imarkuppointer pstart, pend;
Ms. createmarkuppointer (Out pstart );
Ms. createmarkuppointer (Out pend );
System. Text. stringbuilder sb = new system. Text. stringbuilder (s );
Intptr psource = marshal. stringtohglobaluni (s );
Ms. parsestring (ref * (ushort *) psource. topointer (), 0, out PMC, pstart, pend );
If (PMC! = NULL)
{
Marshal. Release (psource );
Return PMC as ihtmldocument2;
}
Marshal. Release (psource );
}
}
Return NULL;
}
Private void button#click (Object sender, system. eventargs E)
{
String html = "";
String filename = "D: \ netC #Program \ HTML \ 163.htm ";
If (! File. exists (filename ))
{
Console. writeline ("the file does not exist ");
Return;
}
Streamreader SR1 = new streamreader (
(System. Io. Stream) file. openread (filename), system. Text. encoding. Default );
Html = "";
While (sr1.peek ()>-1)
{
Html = HTML + sr1.readtoend ();
}
Sr1.close ();
Ihtmldocument2 doc2 = parse (HTML );
Console. writeline (doc2.stylesheets. Length );
Ihtmldocument3 htmldocument = (ihtmldocument3) doc2;
Ihtmldomnode rootdomnodemetadata (ihtmldomnodemetadata htmldocument.doc umentelement;
Treenode root = treeview1.nodes. Add ("html ");
Insertdomnodes (rootdomnode, root );
}
Private void insertdomnodes (ihtmldomnode parentnode, treenode tree_node)
{
If (parentnode. haschildnodes () // whether a subnode exists
{
Ihtmldomchildrencollection allchild = (ihtmldomchildrencollection) parentnode. childnodes;
Int length = allchild. length;
For (INT I = 0; I <length; I ++) // process each subnode. First, retrieve the attributes of each subnode and then perform recursion.
{
Ihtmldomnode child_node = (ihtmldomnode) allchild. item (I );
String m_snodename = child_node.nodename;
Object m_onodevalue = child_node.nodevalue;
String m_snodetype = child_node.nodetype.tostring ();
String m_snodevalue = "";
If (m_onodevalue! = NULL)
M_snodevalue = m_onodevalue.tostring (). Trim ();
Treenode tempnode = NULL;
If (child_node.nodename.equals ("# text "))
{
If (m_snodevalue! = NULL )&&(! M_snodevalue.equals ("")))
{
Tempnode = tree_node.nodes.add (m_snodevalue );
}
}
Else
{
Tempnode = tree_node.nodes.add (child_node.nodename );
Insertdomnodes (child_node, tempnode );
}
}
}
}
}
}