IE has several useful interfaces that we can use to extract all links on the web page.
I. Basic Principles
First, use get_links of ihtmldocument2 to obtain the ihtmlelementcollection interface, and then use ihtmlelementcollection to obtain ihtmlanchorelement. The get_href of the ihtmlanchorelement interface is what we want and is obtained through loops, now we can get all the links on the webpage!
II. Specific Code
Code
{
Tchar hostname [ 2 * Max_path];
Ccomptr < Idispatch > Spdispatch;
Ccomqiptr < Ihtmldocument2, & Iid_ihtmldocument2 > Pdoc2;
Ccomptr < Ihtmlelementcollection > Pelementcol;
Ccomptr < Ihtmlanchorelement > Ploct;
// Todo: add your control notification handler code here
Int N = M_linkslist.getitemcount (); // Getcount ();
For ( Int I = 0 ; I < N; I ++ ){
Iwebbrowser2 * Pbrowser = (Iwebbrowser2 * ) M_linkslist.getitemdata (I );
If (Pbrowser ){
Pbrowser -> Release ();
}
}
M_linkslist.deleteallitems ();
M_linksnum = 0 ;
Log ( " **************************************** ********************** " );
Log ( " \ R \ n " );
If (M_spshwinds ){
Int N = M_spshwinds -> Getcount ();
For ( Int I = 0 ; I < N; I ++ ){
_ Variant_t v = ( Long ) I;
Idispatchptr spdisp = M_spshwinds -> Item (v );
Shdocvw: iwebbrowser2ptr spbrowser (spdisp ); // Generate a smart pointer for an IE window
If (Spbrowser ){
If (Succeeded (spbrowser -> Get_document ( & Spdispatch )))
Pdoc2 = Spdispatch;
If (Pdoc2 ! = Null)
{
If (Succeeded (pdoc2 -> Get_links ( & Pelementcol )))
{
// Afxmessagebox ("ihtmlelementcollection ");
Long P = 0 ;
If (Succeeded (pelementcol -> Get_length ( & P )))
If (P ! = 0 )
{
M_linksnum = M_linksnum + P;
Updatedata (false );
For ( Long I = 0 ; I <= (P - 1 ); I ++ )
{
BSTR string;
_ Variant_t Index = I;
If (Succeeded (pelementcol -> Item (index, index, & Spdispatch )))
If (Succeeded (spdispatch -> QueryInterface (iid_ihtmlanchorelement ,( Void ** ) & Ploct )))
Ploct -> Get_href ( & String );
Zeromemory (hostname, 2 * Max_path );
Lstrcpy (hostname, _ bstr_t (string ));
M_linkslist.insertitem (I, hostname );
M_linkslist.setcheck (I, true );
Ploct -> Get_hostname ( & String );
Zeromemory (hostname, 2 * Max_path );
Lstrcpy (hostname, _ bstr_t (string ));
If (Lstrlen (hostname ))
{
M_linkslist.setitemtext (I, 1 , Hostname );
Log (hostname );
Log ( " \ R \ n " );
}
}
}
}
}
}
}
}
}
BenProgramCompiled in vc7 + WINXP. For details, seeSource code!