Have you ever seen the "Download all links from Internet Express" feature of "Internet Express? To implement it, we can do this:
IE has several useful interfaces that we can use to extract all links on the web page.
I. Basic Principles
First, use get_links of IHTMLDocument2 to obtain the IHTMLElementCollection interface, and then use IHTMLElementCollection to obtain IHTMLAnchorElement. The get_href of the IHTMLAnchorElement interface is what we want and is obtained through loops, now we can get all the links on the webpage!
II. Specific Code
{
TCHAR HostName [2 * MAX_PATH];
CComPtr <IDispatch> spDispatch;
CComQIPtr <IHTMLDocument2, & IID_IHTMLDocument2> pDoc2;
CComPtr <IHTMLElementCollection> pElementCol;
CComPtr <IHTMLAnchorElement> pLoct;
// TODO: Add your control notification handler code here
Int n = m_LinksList.GetItemCount (); // GetCount ();
For (int I = 0; I <n; I ++ ){
IWebBrowser2 * pBrowser = (IWebBrowser2 *) m_LinksList.GetItemData (I );
If (pBrowser ){
Pbroase-> Release ();
}
}
M_LinksList.DeleteAllItems ();
M_LinksNum = 0;
Log ("************************************* *************************");
Log ("");
If (m_spSHWinds ){
Int n = m_spSHWinds-> GetCount ();
For (int I = 0; I <n; I ++ ){
_ Variant_t v = (long) I;
IDispatchPtr spDisp = m_spSHWinds-> Item (v );
SHDocVw: IWebBrowser2Ptr spBrowser (spDisp); // generate a smart pointer for an IE window
If (spBrowser ){
If (SUCCEEDED (spBrowser-> get_Document (& spDispatch )))
PDoc2 = spDispatch;
If (pDoc2! = NULL)
{
If (SUCCEEDED (pDoc2-> get_links (& pElementCol )))
{
// AfxMessageBox ("IHTMLElementCollection ");
Long p = 0;
If (SUCCEEDED (pElementCol-> get_length (& p )))
If (p! = 0)
{
M_LinksNum = m_LinksNum + p;
UpdateData (FALSE );
For (long I = 0; I <= (p-1); I ++)
{
BSTR String;
_ Variant_t index = I;
If (SUCCEEDED (pElementCol-> item (index, index, & spDispatch )))
If (SUCCEEDED (spDispatch-> QueryInterface (IID_IHTMLAnchorElement, (void **) & pLoct )))
PLoct-> get_href (& String );
ZeroMemory (HostName, 2 * MAX_PATH );
Lstrcpy (HostName, _ bstr_t (String ));
M_LinksList.InsertItem (I, HostName );
M_LinksList.SetCheck (I, TRUE );
PLoct-> get_hostname (& String );
ZeroMemory (HostName, 2 * MAX_PATH );
Lstrcpy (HostName, _ bstr_t (Strin