Tencent QQ Micro-Blog API interface to obtain micro-blog content _php Instance

Source: Internet
Author: User
Tags cdata comments curl lowercase prepare prev strlen xpath

Just look at the code.

Copy Code code as follows:



<?php


Define (' Hdom_type_element ', 1);


Define (' Hdom_type_comment ', 2);


Define (' Hdom_type_text ', 3);


Define (' Hdom_type_endtag ', 4);


Define (' Hdom_type_root ', 5);


Define (' Hdom_type_unknown ', 6);


Define (' Hdom_quote_double ', 0);


Define (' Hdom_quote_single ', 1);


Define (' Hdom_quote_no ', 3);


Define (' Hdom_info_begin ', 0);


Define (' Hdom_info_end ', 1);


Define (' Hdom_info_quote ', 2);


Define (' Hdom_info_space ', 3);


Define (' Hdom_info_text ', 4);


Define (' Hdom_info_inner ', 5);


Define (' Hdom_info_outer ', 6);


Define (' Hdom_info_endspace ', 7);

Helper functions
// -----------------------------------------------------------------------------
Get HTML DOM form file
function file_get_html () {
$dom = new Simple_html_dom;
$args = Func_get_args ();
$dom->load (Call_user_func_array (' file_get_contents ', $args), true);
return $dom;
}

Get HTML DOM form string
function str_get_html ($str, $lowercase =true) {
$dom = new Simple_html_dom;
$dom->load ($str, $lowercase);
return $dom;
}

Dump HTML DOM tree
function Dump_html_tree ($node, $show _attr=true, $deep =0) {
$lead = Str_repeat (", $deep);
echo $lead. $node->tag;
if ($show _attr && count ($node->attr) >0) {
Echo ' (';
foreach ($node->attr as $k => $v)
echo "[$k]=>\" ". $node-> $k. '", ';
echo ') ';
}
echo "\ n";

foreach ($node->nodes as $c)
Dump_html_tree ($c, $show _attr, $deep + 1);
}

Get DOM form file (deprecated)
function File_get_dom () {
$dom = new Simple_html_dom;
$args = Func_get_args ();
$dom->load (Call_user_func_array (' file_get_contents ', $args), true);
return $dom;
}

Get DOM form string (deprecated)
function Str_get_dom ($str, $lowercase =true) {
$dom = new Simple_html_dom;
$dom->load ($str, $lowercase);
return $dom;
}

Simple HTML DOM node
// -----------------------------------------------------------------------------
Class Simple_html_dom_node {
Public $nodetype = Hdom_type_text;
Public $tag = ' text ';
Public $attr = Array ();
Public $children = Array ();
Public $nodes = Array ();
public $parent = null;
Public $_ = Array ();
Private $dom = null;

function __construct ($dom) {
$this->dom = $dom;
$dom->nodes[] = $this;
}

function __destruct () {
$this->clear ();
}

function __tostring () {
return $this->outertext ();
}

Clean up memory due to PHP5 circular references memory leak ...
function Clear () {
$this->dom = null;
$this->nodes = null;
$this->parent = null;
$this->children = null;
}

Dump node ' s tree
function dump ($show _attr=true) {
Dump_html_tree ($this, $show _attr);
}

Returns the parent of node
function parent () {
return $this->parent;
}

Returns children of node
function children ($idx =-1) {
if ($idx ===-1) return $this->children;
if (Isset ($this->children[$idx]) return $this->children[$idx];
return null;
}

Returns the "a" of node
function First_child () {
if (count ($this->children) >0) return $this->children[0];
return null;
}

   //Returns the last child of node
    function Last_child () {
  & nbsp;     if ($count =count ($this->children)) >0) return $this->children[$count-1];
        return null;
   }

   //Returns the next sibling of node   
    function next_sibling () {
        if ($this->parent===null) return null;
        $idx = 0;
        $count = count ($this->parent->children);
        while ($idx < $count && $this!== $this->parent-> children[$idx])
            + + $idx;
        if (+ + $idx >= $count) return null;
        return $this->parent->children[$idx];
   }

   //Returns the previous sibling of node
    function prev_sibling () {
 & nbsp;      if ($this->parent===null) return null;
        $idx = 0;
        $count = count ($this->parent->children);
        while ($idx < $count && $this!== $this->parent-> children[$idx])
            + + $idx;
        if (--$idx <0) return null;
        return $this->parent->children[$idx];
   }

Get DOM node ' s inner HTML
function innertext () {
if (Isset ($this->_[hdom_info_inner]) return $this->_[hdom_info_inner];
if (Isset ($this->_[hdom_info_text])) return $this->dom->restore_noise ($this->_[hdom_info_text]);

$ret = ';
foreach ($this->nodes as $n)
$ret. = $n->outertext ();
return $ret;
}

Get DOM node ' s outer-text (with tag)
function Outertext () {
if ($this->tag=== ' root ') return $this->innertext ();

Trigger Callback
if ($this->dom->callback!==null)
Call_user_func_array ($this->dom->callback, Array ($this));

if (Isset ($this->_[hdom_info_outer]) return $this->_[hdom_info_outer];
if (Isset ($this->_[hdom_info_text])) return $this->dom->restore_noise ($this->_[hdom_info_text]);

Render begin Tag
$ret = $this->dom->nodes[$this->_[hdom_info_begin]]->makeup ();

Render inner text
if (Isset ($this->_[hdom_info_inner]))
$ret. = $this->_[hdom_info_inner];
else {
foreach ($this->nodes as $n)
$ret. = $n->outertext ();
}

Render end Tag
if (Isset ($this->_[hdom_info_end]) && $this->_[hdom_info_end]!=0)
$ret. = ' </'. $this->tag. ' > ';
return $ret;
}

   //Get DOM node ' s plain text
    function text () {
    & nbsp;   if (isset ($this->_[hdom_info_inner])) return $this->_[hdom_info_inner];
        switch ($this->nodetype) {
             Case Hdom_type_text:return $this->dom->restore_noise ($this->_[hdom_ Info_text]);
            case Hdom_type_comment:return ';
            case Hdom_type_unknown:return ';
       }
        if (strcasecmp ($this->tag, ' script ') ===0) return ';
        if (strcasecmp ($this->tag, ' style ') ===0) return ';

        $ret = ';
        foreach ($this->nodes as $n)
             $ret. = $n->text ();
        return $ret;
   }

    function xmltext () {
        $ret = $this-> InnerText ();
        $ret = Str_ireplace (' <![ Cdata[', ', $ret);
        $ret = str_replace (']]> ', ', ', $ret);
        return $ret;
   }

Build node ' s-Text with tag
function makeup () {
Text, comment, unknown
if (Isset ($this->_[hdom_info_text])) return $this->dom->restore_noise ($this->_[hdom_info_text]);

$ret = ' < '. $this->tag;
$i =-1;

foreach ($this->attr as $key => $val) {
+ + $i;

Skip removed attribute
if ($val ===null | | $val ===false)
Continue

$ret. = $this-&gt;_[hdom_info_space][$i][0];


No value attr:nowrap, checked selected ...


if ($val ===true)


$ret. = $key;


else {


Switch ($this-&gt;_[hdom_info_quote][$i]) {


Case hdom_quote_double: $quote = ' "'; Break


Case Hdom_quote_single: $quote = ' \ '; Break


Default: $quote = ';


}


$ret. = $key. $this-&gt;_[hdom_info_space][$i][1]. ' = '. $this-&gt;_[hdom_info_space][$i][2]. $quote. $val. $quote;


}


}


$ret = $this-&gt;dom-&gt;restore_noise ($ret);


Return $ret. $this-&gt;_[hdom_info_endspace]. ' &gt; ';


}

Find elements by CSS selector
function Find ($selector, $idx =null) {
$selectors = $this->parse_selector ($selector);
if ($count =count ($selectors)) ===0 return array ();
$found _keys = Array ();

Find each Selector
for ($c =0; $c < $count; + + $c) {
if ($levle =count ($selectors [0])) ===0 return array ();
if (!isset ($this->_[hdom_info_begin])) return array ();

$head = Array ($this->_[hdom_info_begin]=>1);

Handle descendant selectors, no recursive!


for ($l =0; $l &lt; $levle; + + $l) {


$ret = Array ();


foreach ($head as $k =&gt; $v) {


$n = ($k ===-1)? $this-&gt;dom-&gt;root: $this-&gt;dom-&gt;nodes[$k];


$n-&gt;seek ($selectors [$c] [$l], $ret);


}


$head = $ret;


}

foreach ($head as $k => $v) {
if (!isset ($found _keys[$k]))
$found _keys[$k] = 1;
}
}

Sort keys
Ksort ($found _keys);

$found = Array ();
foreach ($found _keys as $k => $v)
$found [] = $this->dom->nodes[$k];

return Nth-element or array
if (Is_null ($IDX)) return $found;
else if ($idx <0) $idx = count ($found) + $idx;
Return (Isset ($found [$idx]))? $found [$IDX]: null;
}

Seek for given conditions
protected function Seek ($selector, & $ret) {
List ($tag, $key, $val, $exp, $no _key) = $selector;

XPath index


if ($tag &amp;&amp; $key &amp;&amp; is_numeric ($key)) {


$count = 0;


foreach ($this-&gt;children as $c) {


if ($tag = = = ' * ' | | | $tag = = = $c-&gt;tag) {


if (+ + $count = = $key) {


$ret [$c-&gt;_[hdom_info_begin]] = 1;


Return


}


}


}


Return


}

        $end = (!empty ($this->_[hdom_info_end]))? $this->_[hdom_info_ end]: 0;
        if ($end ==0) {
             $parent = $this->parent;
            while (!isset ($parent->_[hdom_info_end ] && $parent!==null) {
                 $end-= 1;
                $parent = $parent- >parent;
           }
            $end + + $parent->_[hdom_info_end];
       }

for ($i = $this->_[hdom_info_begin]+1; $i < $end; + + $i) {
$node = $this->dom->nodes[$i];
$pass = true;

if ($tag = = ' * ' &&! $key) {
if (In_array ($node, $this->children, True)
$ret [$i] = 1;
Continue
}

Compare tag


if ($tag &amp;&amp; $tag!= $node-&gt;tag &amp;&amp; $tag!== ' * ') {$pass =false;}


Compare key


if ($pass &amp;&amp; $key) {


if ($no _key) {


if (Isset ($node-&gt;attr[$key])) $pass =false;


}


else if (!isset ($node-&gt;attr[$key)) $pass =false;


}


Compare value


if ($pass &amp;&amp; $key &amp;&amp; $val &amp;&amp; $val!== ' * ') {


$check = $this-&gt;match ($exp, $val, $node-&gt;attr[$key]);


Handle Multiple class


if (! $check &amp;&amp; strcasecmp ($key, ' class ') ===0) {


foreach (Explode (', $node-&gt;attr[$key]) as $k) {


$check = $this-&gt;match ($exp, $val, $k);


if ($check) break;


}


}


if (! $check) $pass = false;


}


if ($pass) $ret [$i] = 1;


Unset ($node);


}


}

protected function Match ($exp, $pattern, $value) {


Switch ($EXP) {


Case ' = ':


return ($value = = = $pattern);


Case '!= ':


Return ($value!== $pattern);


Case ' ^= ':


Return Preg_match ("/^". Preg_quote ($pattern, '/'). " /", $value);


Case ' $= ':


Return Preg_match ("/" Preg_quote ($pattern, '/'). " $/", $value);


Case ' *= ':


if ($pattern [0]== '/')


Return Preg_match ($pattern, $value);


Return Preg_match ("/". $pattern. " /i ", $value);


}


return false;


}

    protected function Parse_selector ($selector _string) {
        //pattern of CSS selectors, modified from MooTools
        $pattern = "/([\w-:\*]*) (?: \ # ([\w-]+) |\. ([\w-]+)]? (?:\ [@? (!? [\w-]+) (?:( [!*^$]?=)[\"']? (.*?) [\"']?)? \])? ([\/,]+)/is ";
        preg_match_all ($pattern, Trim ($selector _string). ', $matches, Preg_set_order);
        $selectors = Array ();
        $result = Array ();
       //print_r ($matches);

        foreach ($matches as $m) {
             $m [0] = Trim ($m [0]);
            if ($m [0]===] | | $m [0]===] | | | | $m [0]=== '//] ) continue;
           //For Borwser grnreated XPath
             if ($m [1]=== ' tbody ') continue;

            list ($tag, $key, $val, $exp, $no _key) = Array ($ M[1], NULL, NULL, ' = ', false);
            if (!empty ($m [2])) {$key = ' id '; $val = $m [2];}
            if (!empty ($m [3])) {$key = ' class '; $val = $m [3 ];}
            if (!empty ($m [4])) {$key = $m [4];}
            if (!empty ($m [5])) {$exp = $m [5];}
            if (!empty ($m [6])) {$val = $m [6];}

           //Convert to lowercase
             if ($this->dom->lowercase) {$tag =strtolower ($tag); Key=strtolower ($key);
           //elements that does not have the specified Attribute
            if (isset ($key [0]) && $key [0]=== '! '] {$key =substr ($key, 1); $no _key=true}

            $result [] = Array ($tag, $key, $val, $exp, $no _ Key);
            if (Trim ($m [7]) = = ', ') {
                 $selectors [] = $result;
                $result = Array ();
           }
       }
        if (count ($result) >0)
             $selectors [] = $result;
        return $selectors;
   }

function __get ($name) {
if (Isset ($this->attr[$name]) return $this->attr[$name];
Switch ($name) {
Case ' outertext ': Return $this->outertext ();
Case ' innertext ': Return $this->innertext ();
Case ' plaintext ': Return $this->text ();
Case ' xmltext ': Return $this->xmltext ();
Default:return array_key_exists ($name, $this->attr);
}
}

function __set ($name, $value) {


Switch ($name) {


Case ' outertext ': return $this-&gt;_[hdom_info_outer] = $value;


Case ' innertext ':


if (Isset ($this-&gt;_[hdom_info_text])) return $this-&gt;_[hdom_info_text] = $value;


return $this-&gt;_[hdom_info_inner] = $value;


}


if (!isset ($this-&gt;attr[$name])) {


$this-&gt;_[hdom_info_space][] = Array (', ', ', ');


$this-&gt;_[hdom_info_quote][] = hdom_quote_double;


}


$this-&gt;attr[$name] = $value;


}

function __isset ($name) {
Switch ($name) {
Case ' Outertext ': return true;
Case ' innertext ': return true;
Case ' plaintext ': return true;
}
No value attr:nowrap, checked selected ...
Return (Array_key_exists ($name, $this->attr))? True:isset ($this->attr[$name]);
}

function __unset ($name) {
if (Isset ($this->attr[$name]))
unset ($this->attr[$name]);
}

Camel Naming conventions


function Getallattributes () {return $this-&gt;attr;}


function GetAttribute ($name) {return $this-&gt;__get ($name);}


function setattribute ($name, $value) {$this-&gt;__set ($name, $value);


function Hasattribute ($name) {return $this-&gt;__isset ($name);}


function RemoveAttribute ($name) {$this-&gt;__set ($name, null);}


function getElementById ($id) {return $this-&gt;find ("# $id", 0);}


function GetElementsById ($id, $idx =null) {return $this-&gt;find ("# $id", $IDX);}


function Getelementbytagname ($name) {return $this-&gt;find ($name, 0);}


function getElementsByTagName ($name, $idx =null) {return $this-&gt;find ($name, $IDX);


function ParentNode () {return $this-&gt;parent ();}


function ChildNodes ($idx =-1) {return $this-&gt;children ($IDX);}


function FirstChild () {return $this-&gt;first_child ();}


function LastChild () {return $this-&gt;last_child ();}


function nextSibling () {return $this-&gt;next_sibling ();}


function previoussibling () {return $this-&gt;prev_sibling ();}


}

Simple HTML DOM Parser


// -----------------------------------------------------------------------------


Class Simple_html_dom {


public $root = null;


Public $nodes = Array ();


public $callback = null;


Public $lowercase = false;


protected $pos;


protected $doc;


protected $char;


protected $size;


protected $cursor;


protected $parent;


Protected $noise = Array ();


protected $token _blank = "\t\r\n";


protected $token _equal = ' =/&gt; ';


protected $token _slash = "/&gt;\r\n\t";


protected $token _attr = ' &gt; ';


Use Isset instead of In_array, performance boost about 30% ...


protected $self _closing_tags = Array (' img ' =&gt;1, ' BR ' =&gt;1, ' input ' =&gt;1, ' meta ' =&gt;1, ' link ' =&gt;1, ' hr ' =&gt;1, ' Base ' =&gt;1, ' embed ' =&gt;1, ' spacer ' =&gt;1);


protected $block _tags = array (' root ' =&gt;1, ' body ' =&gt;1, ' form ' =&gt;1, ' div ' =&gt;1, ' span ' =&gt;1, ' table ' =&gt;1);


protected $optional _closing_tags = Array (


' TR ' =&gt;array (' tr ' =&gt;1, ' TD ' =&gt;1, ' th ' =&gt;1),


' th ' =&gt;array (' th ' =&gt;1),


' TD ' =&gt;array (' TD ' =&gt;1),


' Li ' =&gt;array (' Li ' =&gt;1),


' DT ' =&gt;array (' dt ' =&gt;1, ' DD ' =&gt;1),


' DD ' =&gt;array (' dd ' =&gt;1, ' DT ' =&gt;1),


' DL ' =&gt;array (' dd ' =&gt;1, ' DT ' =&gt;1),


' P ' =&gt;array (' P ' =&gt;1),


' nobr ' =&gt;array (' nobr ' =&gt;1),


);

function __construct ($str =null) {
if ($STR) {
if (Preg_match ("/^http:\/\//i", $str) | | is_file ($STR))
$this->load_file ($STR);
Else
$this->load ($STR);
}
}
http://www.devdao.com/
function __destruct () {
$this->clear ();
}

Load HTML from string


function Load ($str, $lowercase =true) {


Prepare


$this-&gt;prepare ($str, $lowercase);


Strip out comments


$this-&gt;remove_noise ("' &lt;!--(. *?) --&gt; ' is ');


Strip out CDATA


$this-&gt;remove_noise ("' &lt;!\[cdata\[") \]\]&gt; ' is ', true);


Strip out &lt;style&gt; tags


$this-&gt;remove_noise ("' &lt;\s*style[^&gt;]*[^/]&gt;. *?) &lt;\s*/\s*style\s*&gt; ' is ');


$this-&gt;remove_noise ("' &lt;\s*style\s*&gt;. *?) &lt;\s*/\s*style\s*&gt; ' is ');


Strip out &lt;script&gt; tags


$this-&gt;remove_noise ("' &lt;\s*script[^&gt;]*[^/]&gt;. *?) &lt;\s*/\s*script\s*&gt; ' is ');


$this-&gt;remove_noise ("' &lt;\s*script\s*&gt;. *?) &lt;\s*/\s*script\s*&gt; ' is ');


Strip out preformatted tags


$this-&gt;remove_noise ("' &lt;\s* (?: code) [^&gt;]*&gt;" (. *?) &lt;\s*/\s* (?: code) \s*&gt; ' is ');


Strip out server side scripts


$this-&gt;remove_noise ("' (&lt;\?) (.*?) (\?&gt;) ' s ", true);


Strip Smarty Scripts


$this-&gt;remove_noise ("' (\{\w) (. *?) (\})' S ", true);

Parsing
while ($this->parse ());
End
$this->root->_[hdom_info_end] = $this->cursor;
}

Load HTML from File
function Load_file () {
$args = Func_get_args ();
$this->load (Call_user_func_array (' file_get_contents ', $args), true);
}

Set callback function
function Set_callback ($function _name) {
$this->callback = $function _name;
}

Remove callback function
function Remove_callback () {
$this->callback = null;
}

Save Dom As String
function Save ($filepath = ' ") {
$ret = $this->root->innertext ();
if ($filepath!== ') file_put_contents ($filepath, $ret);
return $ret;
}

   //Find DOM node by CSS selector
    function Find ($selector, $idx =null) {
&nb sp;       return $this->root->find ($selector, $IDX);
   }

Clean up memory due to PHP5 circular references memory leak ...
function Clear () {
foreach ($this->nodes as $n) {$n->clear (); $n = null;}
if (Isset ($this->parent)) {$this->parent->clear (); unset ($this->parent);}
if (Isset ($this->root)) {$this->root->clear (); unset ($this->root);}
unset ($this->doc);
unset ($this->noise);
}

function dump ($show _attr=true) {
$this->root->dump ($show _attr);
}

Prepare HTML data and init everything


protected function Prepare ($STR, $lowercase =true) {


$this-&gt;clear ();


$this-&gt;doc = $str;


$this-&gt;pos = 0;


$this-&gt;cursor = 1;


$this-&gt;noise = Array ();


$this-&gt;nodes = Array ();


$this-&gt;lowercase = $lowercase;


$this-&gt;root = new Simple_html_dom_node ($this);


$this-&gt;root-&gt;tag = ' root ';


$this-&gt;root-&gt;_[hdom_info_begin] =-1;


$this-&gt;root-&gt;nodetype = hdom_type_root;


$this-&gt;parent = $this-&gt;root;


Set the length of content


$this-&gt;size = strlen ($STR);


if ($this-&gt;size&gt;0) $this-&gt;char = $this-&gt;doc[0];


}

Parse HTML content
protected function Parse () {
if ($s = $this->copy_until_char (' < ')) = = = ')
return $this->read_tag ();

Text
$node = new Simple_html_dom_node ($this);
+ + $this->cursor;
$node->_[hdom_info_text] = $s;
$this->link_nodes ($node, false);
return true;
}

   //Read tag info
    protected function Read_tag () {
         if ($this->char!== ' < ') {
             $this->root->_[hdom_info_end] = $this->cursor;
            return false;
       }
        $begin _tag_pos = $this->pos;
        $this->char = (+ + $this->pos< $this->size)? $this->doc [$this->pos]: null; Next

End tag
if ($this->char=== '/') {
$this->char = (+ + $this->pos< $this->size)? $this->doc[$this->pos]: null; Next
$this->skip ($this->token_blank_t);
$tag = $this->copy_until_char (' > ');

Skip attributes in end tag
if ($pos = Strpos ($tag, '))!==false)
$tag = substr ($tag, 0, $pos);

$parent _lower = strtolower ($this->parent->tag);
$tag _lower = Strtolower ($tag);

if ($parent _lower!== $tag _lower) {
if (Isset ($this->optional_closing_tags[$parent _lower]) && isset ($this->block_tags[$tag _lower])) {
$this->parent->_[hdom_info_end] = 0;
$org _parent = $this->parent;

while (($this->parent->parent) && strtolower ($this->parent->tag)!== $tag _lower)
$this->parent = $this->parent->parent;

if (Strtolower ($this-&gt;parent-&gt;tag)!== $tag _lower) {


$this-&gt;parent = $org _parent; Restore Origonal Parent


if ($this-&gt;parent-&gt;parent) $this-&gt;parent = $this-&gt;parent-&gt;parent;


$this-&gt;parent-&gt;_[hdom_info_end] = $this-&gt;cursor;


return $this-&gt;as_text_node ($tag);


}


}


else if (($this-&gt;parent-&gt;parent) &amp;&amp; isset ($this-&gt;block_tags[$tag _lower)) {


$this-&gt;parent-&gt;_[hdom_info_end] = 0;


$org _parent = $this-&gt;parent;

while (($this->parent->parent) && strtolower ($this->parent->tag)!== $tag _lower)
$this->parent = $this->parent->parent;

if (Strtolower ($this-&gt;parent-&gt;tag)!== $tag _lower) {


$this-&gt;parent = $org _parent; Restore Origonal Parent


$this-&gt;parent-&gt;_[hdom_info_end] = $this-&gt;cursor;


return $this-&gt;as_text_node ($tag);


}


}


else if (($this-&gt;parent-&gt;parent) &amp;&amp; strtolower ($this-&gt;parent-&gt;parent-&gt;tag) = = = $tag _lower) {


$this-&gt;parent-&gt;_[hdom_info_end] = 0;


$this-&gt;parent = $this-&gt;parent-&gt;parent;


}


Else


return $this-&gt;as_text_node ($tag);


}

$this->parent->_[hdom_info_end] = $this->cursor;
if ($this->parent->parent) $this->parent = $this->parent->parent;

$this->char = (+ + $this->pos< $this->size)? $this->doc[$this->pos]: null; Next
return true;
}

$node = new Simple_html_dom_node ($this);
$node->_[hdom_info_begin] = $this->cursor;
+ + $this->cursor;
$tag = $this->copy_until ($this->token_slash);

DOCTYPE, CDATA & Comments ...
if (isset ($tag [0]) && $tag [0]=== '! ') {
$node->_[hdom_info_text] = ' < '. $tag. $this->copy_until_char (' > ');

            if (Isset ($tag [2]) && $tag [1]=== '-' && $tag [2]=== '-') {
                 $node->nodetype = hdom_type_comment;
                $node->tag = ' Comment ';
           } else {
                 $node->nodetype = Hdom_type_unknown;
                $node->tag = ' Unknown ';
           }

            if ($this->char=== ' > ') $node->_[ Hdom_info_text].= ' > ';
            $this->link_nodes ($node, true);
            $this->char = (+ + $this->pos<$ this->size)? $this->doc[$this->pos]: null; Next
            return true;
       }

       //Text
        if ($pos = Strpos ($tag, ' < ')!==false) {
            $tag = ' < ' . substr ($tag, 0,-1);
            $node->_[hdom_info_text] = $tag;
            $this->link_nodes ($node, false);
            $this->char = $this->doc[--$this- >pos]; Prev
            return true;
       }

        if (!preg_match ("/^[\w-:]+$/", $tag)) {
             $node->_[hdom_info_text] = ' < '. $tag. $this->copy_until (' <> ');
            if ($this->char=== ' < ') {
                 $this->link_nodes ($node , false);
                return true;
           }

if ($this->char=== ' > ') $node->_[hdom_info_text].= ' > ';
$this->link_nodes ($node, false);
$this->char = (+ + $this->pos< $this->size)? $this->doc[$this->pos]: null; Next
return true;
}

Begin Tag
$node->nodetype = hdom_type_element;
$tag _lower = Strtolower ($tag);
$node->tag = ($this->lowercase)? $tag _lower: $tag;

       //Handle optional closing tags
         if (isset ($this->optional_closing_tags[$tag _lower]) {
             while (isset ($this->optional_closing_tags[$tag _lower][strtolower ($this-> Parent->tag)]) {
                 $this->parent->_[hdom_info_end] = 0;
                $this->parent = $this->parent->parent;
           }
            $node->parent = $this->parent;
       }

        $guard = 0;//Prevent Infinity Loop
     & nbsp;  $space = Array ($this->copy_skip ($this->token_blank), ",");

       //attributes
        do {
            if ($this->char!==null && $space [0]= = = "") break;
            $name = $this->copy_until ($this-> Token_equal);
            if ($guard = = $this->pos) {
                 $this->char = (+ + $this- >pos< $this->size)? $this->doc[$this->pos]: null; Next
                continue;
           }
             $guard = $this->pos;

Handle Endless ' &lt; '


if ($this-&gt;pos&gt;= $this-&gt;size-1 &amp;&amp; $this-&gt;char!== ' &gt; ') {


$node-&gt;nodetype = Hdom_type_text;


$node-&gt;_[hdom_info_end] = 0;


$node-&gt;_[hdom_info_text] = ' &lt; '. $tag. $space [0]. $name;


$node-&gt;tag = ' text ';


$this-&gt;link_nodes ($node, false);


return true;


}

Handle mismatch ' &lt; '


if ($this-&gt;doc[$this-&gt;pos-1]== ' &lt; ') {


$node-&gt;nodetype = Hdom_type_text;


$node-&gt;tag = ' text ';


$node-&gt;attr = Array ();


$node-&gt;_[hdom_info_end] = 0;


$node-&gt;_[hdom_info_text] = substr ($this-&gt;doc, $begin _tag_pos, $this-&gt;pos-$begin _tag_pos-1);


$this-&gt;pos-= 2;


$this-&gt;char = (+ + $this-&gt;pos&lt; $this-&gt;size)? $this-&gt;doc[$this-&gt;pos]: null; Next


$this-&gt;link_nodes ($node, false);


return true;


}

if ($name!== '/' &amp;&amp; $name!== ') {


$space [1] = $this-&gt;copy_skip ($this-&gt;token_blank);


$name = $this-&gt;restore_noise ($name);


if ($this-&gt;lowercase) $name = Strtolower ($name);


if ($this-&gt;char=== ' = ') {


$this-&gt;char = (+ + $this-&gt;pos&lt; $this-&gt;size)? $this-&gt;doc[$this-&gt;pos]: null; Next


$this-&gt;parse_attr ($node, $name, $space);


}


else {


No value attr:nowrap, checked selected ...


$node-&gt;_[hdom_info_quote][] = hdom_quote_no;


$node-&gt;attr[$name] = true;


if ($this-&gt;char!= ' &gt; ') $this-&gt;char = $this-&gt;doc[--$this-&gt;pos]; Prev


}


$node-&gt;_[hdom_info_space][] = $space;


$space = Array ($this-&gt;copy_skip ($this-&gt;token_blank), ', ' ";


}


Else


Break


while ($this-&gt;char!== ' &gt; ' &amp;&amp; $this-&gt;char!== '/');

$this->link_nodes ($node, true);
$node->_[hdom_info_endspace] = $space [0];

Check self closing


if ($this-&gt;copy_until_char_escape (' &gt; ') = = = '/') {


$node-&gt;_[hdom_info_endspace]. = '/';


$node-&gt;_[hdom_info_end] = 0;


}


else {


Reset Parent


if (!isset ($this-&gt;self_closing_tags[strtolower ($node-&gt;tag))) $this-&gt;parent = $node;


}


$this-&gt;char = (+ + $this-&gt;pos&lt; $this-&gt;size)? $this-&gt;doc[$this-&gt;pos]: null; Next


return true;


}

Parse attributes


protected function parse_attr ($node, $name, &amp; $space) {


$space [2] = $this-&gt;copy_skip ($this-&gt;token_blank);


Switch ($this-&gt;char) {


Case ' "':


$node-&gt;_[hdom_info_quote][] = hdom_quote_double;


$this-&gt;char = (+ + $this-&gt;pos&lt; $this-&gt;size)? $this-&gt;doc[$this-&gt;pos]: null; Next


$node-&gt;attr[$name] = $this-&gt;restore_noise ($this-&gt;copy_until_char_escape (' "));


$this-&gt;char = (+ + $this-&gt;pos&lt; $this-&gt;size)? $this-&gt;doc[$this-&gt;pos]: null; Next


Break


Case ' \ ':


$node-&gt;_[hdom_info_quote][] = Hdom_quote_single;


$this-&gt;char = (+ + $this-&gt;pos&lt; $this-&gt;size)? $this-&gt;doc[$this-&gt;pos]: null; Next


$node-&gt;attr[$name] = $this-&gt;restore_noise ($this-&gt;copy_until_char_escape (' \ '));


$this-&gt;char = (+ + $this-&gt;pos&lt; $this-&gt;size)? $this-&gt;doc[$this-&gt;pos]: null; Next


Break


Default


$node-&gt;_[hdom_info_quote][] = hdom_quote_no;


$node-&gt;attr[$name] = $this-&gt;restore_noise ($this-&gt;copy_until ($this-&gt;token_attr));


}


}

   //Link node ' s parent
    protected function link_nodes (& $node, $is _child) {
        $node->parent = $this->parent;
        $this->parent->nodes[] = $node;
        if ($is _child)
             $this->parent->children[] = $node;
   }

   //As a text node
    protected function As_text_node ($tag) {
  & nbsp;     $node = new Simple_html_dom_node ($this);
        + + $this->cursor;
        $node->_[hdom_info_text] = ' </'. $tag. ' > ';
        $this->link_nodes ($node, false);
        $this->char = (+ + $this->pos< $this->size)? $this->doc [$this->pos]: null; Next
        return true;
   }

protected function Skip ($chars) {
$this->pos + + strspn ($this->doc, $chars, $this->pos);
$this->char = ($this->pos< $this->size)? $this->doc[$this->pos]: null; Next
}

protected function Copy_skip ($chars) {
$pos = $this->pos;
$len = strspn ($this->doc, $chars, $pos);
$this->pos + + $len;
$this->char = ($this->pos< $this->size)? $this->doc[$this->pos]: null; Next
if ($len ===0) return ";
Return substr ($this->doc, $pos, $len);
}

protected function Copy_until ($chars) {
$pos = $this->pos;
$len = strcspn ($this->doc, $chars, $pos);
$this->pos + + $len;
$this->char = ($this->pos< $this->size)? $this->doc[$this->pos]: null; Next
Return substr ($this->doc, $pos, $len);
}

protected function Copy_until_char ($char) {
if ($this->char===null) return ";

        if (($pos = Strpos ($this->doc, $char, $this->pos)) ===false) {
            $ret = substr ($this->doc, $this->pos, $ this->size-$this->pos);
            $this->char = null;
            $this->pos = $this->size;
            return $ret;
       }

if ($pos = = = $this->pos) return ";
$pos _old = $this->pos;
$this->char = $this->doc[$pos];
$this->pos = $pos;
Return substr ($this->doc, $pos _old, $pos-$pos _old);
}

protected function Copy_until_char_escape ($char) {
if ($this->char===null) return ";

$start = $this->pos;
while (1) {
if (($pos = Strpos ($this->doc, $char, $start)) ===false) {
$ret = substr ($this->doc, $this->pos, $this->size-$this->pos);
$this->char = null;
$this->pos = $this->size;
return $ret;
}

if ($pos = = = $this->pos) return ";

if ($this->doc[$pos -1]=== ' \ ") {
$start = $pos +1;
Continue
}

$pos _old = $this->pos;
$this->char = $this->doc[$pos];
$this->pos = $pos;
Return substr ($this->doc, $pos _old, $pos-$pos _old);
}
}

Remove noise from HTML content
protected function Remove_noise ($pattern, $remove _tag=false) {
$count = Preg_match_all ($pattern, $this->doc, $matches, preg_set_order| Preg_offset_capture);

for ($i = $count-1; $i >-1;-$i) {
$key = ' ___noise___ '. sprintf ('% 3d ', count ($this->noise) +100);
$idx = ($remove _tag)? 0:1;
$this->noise[$key] = $matches [$i] [$idx][0];
$this->doc = Substr_replace ($this->doc, $key, $matches [$i] [$idx][1], strlen ($matches [$i] [$idx][0]);
}

       /Reset the length of content
         $this->size = strlen ($this->doc);
        if ($this->size>0) $this->char = $this->doc[0];
   }

Restore noise to HTML content
function Restore_noise ($text) {
while (($pos =strpos ($text, ' ___noise___ '))!==false) {
$key = ' ___noise___ '. $text [$pos +11]. $text [$pos +12]. $text [$pos +13];
if (Isset ($this->noise[$key]))
$text = substr ($text, 0, $pos). $this->noise[$key].substr ($text, $pos +14);
}
return $text;
}

function __tostring () {
return $this->root->innertext ();
}

    function __get ($name) {
        switch ($name) {
            case ' outertext ': Return $this->root-> InnerText ();
            case ' innertext ': Return $this->root- >innertext ();
            case ' plaintext ': return $this->root- >text ();
       }
   }

Camel Naming conventions
function ChildNodes ($idx =-1) {return $this->root->childnodes ($IDX);}
function FirstChild () {return $this->root->first_child ();}
function LastChild () {return $this->root->last_child ();}
function getElementById ($id) {return $this->find ("# $id", 0);}
function GetElementsById ($id, $idx =null) {return $this->find ("# $id", $IDX);}
function Getelementbytagname ($name) {return $this->find ($name, 0);}
function getElementsByTagName ($name, $idx =-1) {return $this->find ($name, $IDX);
function LoadFile () {$args = Func_get_args (); $this->load (Call_user_func_array (' file_get_contents ', $args), true);
}
?>

tqq.php

&lt;?php


/*******************************************************************************


version:1.11 ($Rev: 175 $)


Website:http://www.115.co


AUTHOR:S.C Chen &lt;admin@185.cm&gt;


Acknowledge:jose Solorzano (http://www.115.co/)


Contributions BY:QQ Exchange Group: 89097023


Yousuke Kumakura (Attribute filters)


Vadim Voituk (Negative indexes supports of "find" method)


Antcs (constructor with automatically load contents either text or File/url)


Licensed under the MIT License


Redistributions of files must retain the above copyright notice.


*******************************************************************************/


Cache time, units: seconds


$t = 360;


if (!is_file (' index.html ') | | (Time ()-filemtime (' index.html ')) &gt; $t) {


Micro Blog Account


$QQ = ' Kuaisubeian ';


After Tencent's Md5_3 () encrypted password


$pwd = ' 624d3274815f2237817a7c62f42dd26a ';


$verifyURL = ' http://ptlogin2.qq.com/check?uin=@ '. $qq. ' &amp;appid=46000101 ';


$loginURL = ' http://ptlogin2.qq.com/login? ';





Get the authentication code and the first cookie


$curl = Curl_init ($verifyURL);


$cookie _jar = Tempnam ('. ', ' Cookie ');


curl_setopt ($curl, Curlopt_returntransfer, 1);


curl_setopt ($curl, Curlopt_cookiejar, $cookie _jar);


$verifyCode = curl_exec ($curl);


Curl_close ($curl);


$verifyCode = Strtoupper (substr ($verifyCode, 18, 4));


echo ' Verification code: '. $verifyCode;


echo ' &lt;hr/&gt; ';


Echo ' Cookies: '. $cookie _jar;


echo ' &lt;hr/&gt; ';





Send a login request and get a second cookie


$loginURL. = ' u=@ '. $qq. ' &amp;p= '. MD5 ($pwd. $verifyCode). ' &amp;verifycode= '. $verifyCode. ' &amp;aid=46000101&amp;u1=http%3a%2f%2ft.qq.com&amp;h=1&amp;from_ui=1&amp;fp=loginerroralert ';


Echo ' Login address: '. $loginURL;


echo ' &lt;hr/&gt; ';


$curl = Curl_init ($loginURL);


curl_setopt ($curl, Curlopt_returntransfer, 1);


curl_setopt ($curl, Curlopt_cookiejar, $cookie _jar);


curl_setopt ($curl, Curlopt_cookiefile, $cookie _jar);


curl_setopt ($curl, Curlopt_cookiejar, $cookie _jar);


$loginResult = curl_exec ($curl);


Curl_close ($curl);


Echo ' Login verification Result: '. $loginResult;


echo ' &lt;hr/&gt; ';


Http://www.knowsky.com


Get a third time cookie


$curl = Curl_init (' http://t.qq.com ');


curl_setopt ($curl, Curlopt_returntransfer, 1);


curl_setopt ($curl, Curlopt_cookiejar, $cookie _jar);


curl_setopt ($curl, Curlopt_cookiefile, $cookie _jar);


curl_setopt ($curl, Curlopt_cookiejar, $cookie _jar);


$loginResult = curl_exec ($curl);


Curl_close ($curl);





four times


$curl = Curl_init (' http://t.qq.com/'. $qq. '/mine ');


curl_setopt ($curl, Curlopt_returntransfer, 1);


curl_setopt ($curl, Curlopt_cookiejar, $cookie _jar);


curl_setopt ($curl, Curlopt_cookiefile, $cookie _jar);


curl_setopt ($curl, Curlopt_cookiejar, $cookie _jar);


$loginResult = curl_exec ($curl);


Curl_close ($curl);





Unlink ($cookie _jar);


File_put_contents (' index.html ', $loginResult);


}


Include (' cnz.php ');


$html = file_get_html (' index.html ');


$talkList = $html-&gt;find (' #talkList ');


$lastTalk = $talkList [0];





$userName = $lastTalk-&gt;children (0)-&gt;children (1)-&gt;find ('. UserName ');


$msgCnt = $lastTalk-&gt;children (0)-&gt;children (1)-&gt;find ('. msgcnt ');


$pubInfo = $lastTalk-&gt;children (0)-&gt;children (1)-&gt;find ('. PubInfo ');





$userName = $userName [0]-&gt;plaintext;





$result = ';


More than two are broadcast

if (count ($msgCnt) < 10) {
$pi = $pubInfo [0]->find ('. Left ');
$result = $userName. $msgCnt [0]->plaintext.] <p style= "Text-align:center;" > <span style= "Display:none" ></span></p>
<title>qqweiboqq|www.beiantuan.com</title><meta http-equiv= "Content-type" content= "text/html; Charset=utf-8 "/><span style=" font-style:italic; Color:rgb (238, 29, 36); > $pi [0]->children (0)->plaintext. ' '. $pi [0]->children (1)->plaintext. ' </span> ';
}else{
$pi = $pubInfo [1]->find ('. Left ');
$result = $userName. $msgCnt [0]->plaintext.] ['. $msgCnt [1]->plaintext. '] <span style= "font-style:italic; Color:rgb (149, 158, 135); > '. $pi [0]->plaintext.] </span> ';
}
echo $result;


?>

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.