Copy codeThe Code is as follows:
<? Php
$ OldSetting = libxml_use_internal_errors (true );
Libxml_clear_errors ();
/**
*
*-+ -----------------------------------
* | PHP5 Framework-2011
* | Web Site: www. iblue. cc
* | E-mail: mejinke@gmail.com
* | Date: 2012-10-12
*-+ -----------------------------------
*
* @ Desc HTML Parser
* @ Author jingke
*/
Class XF_HtmlDom
{
Private $ _ xpath = null;
Private $ _ nodePath = '';
Public function _ construct ($ xpath = null, $ nodePath = '')
{
$ This-> _ xpath = $ xpath;
$ This-> _ nodePath = $ nodePath;
}
Public function loadHtml ($ url)
{
Ini_set ('user _ agent', 'mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) version/4.0 Mobile Safari/530.17-Nexus ');
$ Content = '';
If (strpos (strtolower ($ url), 'HTTP ') ===false)
{
$ Content = file_get_contents ($ url );
}
Else
{
$ Ch = curl_init ();
$ User_agent = "Baiduspider + (+ http://www.baidu.com/search/spider.htm )";
$ User_agent1 = 'mozilla/5.0 (Windows NT 5.1; rv: 6.0) Gecko/20100101 Firefox/123456 ';
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_HEADER, false );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1 );
Curl_setopt ($ ch, CURLOPT_REFERER, $ url );
Curl_setopt ($ ch, CURLOPT_USERAGENT, $ user_agent1 );
Curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, 1 );
$ Content = curl_exec ($ ch );
Curl_close ($ ch );
}
$ Html = new DOMDocument ();
$ Html-> loadHtml ($ content );
$ This-> _ xpath = new DOMXPath ($ html );
// Return $ this;
}
Public function find ($ query, $ index = null)
{
If ($ this-> _ nodePath = '')
$ This-> _ nodePath = '//';
Else
$ This-> _ nodePath. = '/';
$ Nodes = $ this-> _ xpath-> query ($ this-> _ nodePath. $ query );
// Echo $ nodes-> item (0)-> getNodePath (); exit;
If ($ index = null &&! Is_numeric ($ index ))
{
$ Tmp = array ();
Foreach ($ nodes as $ node)
{
$ Tmp [] = new XF_HtmlDom ($ this-> _ xpath, $ node-> getNodePath ());
}
Return $ tmp;
}
Return new XF_HtmlDom ($ this-> _ xpath, $ this-> _ xpath-> query ($ this-> _ nodePath. $ query)-> item ($ index)-> getNodePath ());
}
/**
* Get content
*/
Public function text ()
{
If ($ this-> _ nodePath! = ''& $ This-> _ xpath! = Null)
Return $ this-> _ xpath-> query ($ this-> _ nodePath)-> item (0)-> textContent;
Else
Return false;
}
/**
* Get attribute values
*/
Public function getAttribute ($ name)
{
If ($ this-> _ nodePath! = ''& $ This-> _ xpath! = Null)
Return $ this-> _ xpath-> query ($ this-> _ nodePath)-> item (0)-> getAttribute ($ name );
Else
Return false;
}
Public function _ get ($ name)
{
If ($ name = 'innertext ')
Return $ this-> text ();
Else
Return $ this-> getAttribute ($ name );
}
}
$ Xp = new xf_HtmlDom ();
$ Xp-> loadHtml ('HTTP: // www.aizhan.com/siteall/www.opendir.cn /');
$ Rows = $ xp-> find ("td [@ id = 'baidu']/a", 0)-> innertext;
Print_r ($ rows );