Or a side dish, the first time to share code ah, this is their own previous write a PHP collection class, I have been in use, self-feeling is very simple and powerful, as long as a little bit of knowledge of the selector can be collected any page, also support HTTPS page, do simple collection enough.
-
- /**
- * Universal List Acquisition Class
- * Version V1.3
- * Author: JAE
- * Blog: http://blog.jaekj.com
- */
- Require_once '. /phpquery/phpquery/phpquery.php ';
- Class querylist{
- Private $pageURL;
- Private $REGARR = Array ();
- Public $JSONARR = Array ();
- Private $regRange;
- Private $html;
- /************************************************
- * Parameters: Page Address selector array block selector
- * "Selector array" description: Format Array ("Name" =>array ("selector", "type"),.......)
- * "Type" description: Value "text", "HTML", "Properties"
- * "Block selector": refers to the rule to select a few chunks, and then separately in the block to make relevant choices
- *************************************************/
- function Querylist ($pageURL, $regArr =array (), $regRange = ")
- {
- $this->pageurl = $pageURL;
- In order to get https://
- $ch = Curl_init ();
- curl_setopt ($ch, Curlopt_url, $this->pageurl);
- curl_setopt ($ch, Curlopt_ssl_verifypeer, false);
- curl_setopt ($ch, Curlopt_ssl_verifyhost, false);
- curl_setopt ($ch, curlopt_returntransfer,1);
- $this->html = curl_exec ($ch);
- Curl_close ($ch);
- if (!empty ($REGARR))
- {
- $this->regarr = $REGARR;
- $this->regrange = $regRange;
- $this->getlist ();
- }
- }
- function Setquery ($REGARR, $regRange = ")
- {
- $this->jsonarr=array ();
- $this->regarr = $REGARR;
- $this->regrange = $regRange;
- $this->getlist ();
- }
- Private Function GetList ()
- {
- $hobj = phpquery::newdocumenthtml ($this->html);
- if (!empty ($this->regrange))
- {
- $robj = PQ ($hobj)->find ($this->regrange);
- $i = 0;
- foreach ($robj as $item)
- {
- while (list ($key, $reg _value) =each ($this->regarr))
- {
- $iobj = PQ ($item)->find ($reg _value[0]);
- Switch ($reg _value[1])
- {
- Case ' text ':
- $this->jsonarr[$i] [$key] = Trim (PQ ($iobj)->text ());
- Break
- Case ' HTML ':
- $this->jsonarr[$i] [$key] = Trim (PQ ($iobj)->html ());
- Break
- Default
- $this->jsonarr[$i] [$key] = PQ ($iobj)->attr ($reg _value[1]);
- Break
- }
- }
- Resetting an array pointer
- Reset ($this->regarr);
- $i + +;
- }
- }
- Else
- {
- while (list ($key, $reg _value) =each ($this->regarr))
- {
- $lobj = PQ ($hobj)->find ($reg _value[0]);
- $i = 0;
- foreach ($lobj as $item)
- {
- Switch ($reg _value[1])
- {
- Case ' text ':
- $this->jsonarr[$i ++][$key] = Trim (PQ ($item)->text ());
- Break
- Case ' HTML ':
- $this->jsonarr[$i ++][$key] = Trim (PQ ($item)->html ());
- Break
- Default
- $this->jsonarr[$i ++][$key] = PQ ($item)->attr ($reg _value[1]);
- Break
- }
- }
- }
- }
- }
- function Getjson ()
- {
- Return Json_encode ($this->jsonarr);
- }
- }
Copy Code
- Require ' query/querylist.class.php ';
- Acquisition of the OSC code share list, Title link author
- $url = "Http://www.oschina.net/code/list";
- $reg = Array ("title" =>array (". Code_title a:eq (0)", "text"), "url" =>array (". Code_title a:eq (0)", "href"), "author "=>array (" img "," title "));
- $rang = ". Code_list li";
- $HJ = new Querylist ($url, $reg, $rang);
- $arr = $hj->jsonarr;
- Print_r ($arr);
- If you want to take the TOP40 active contributor image to the right of the current page, get the JSON data, you can write this
- $reg = Array ("Portrait" =>array (". Hot_top img", "src"));
- $hj->setquery ($reg);
- $json = $hj->getjson ();
- Echo $json. " ";
- Content page content of OSC
- $url = "http://www.oschina.net/code/snippet_186288_23816";
- $reg = Array ("title" =>array (". Qtitle H1 "," text ")," Con "=>array (". Content "," html "));
- $HJ = new Querylist ($url, $reg);
- $arr = $hj->jsonarr;
- Print_r ($arr);
- So many examples, is not used to do collection is very convenient
Copy Code
-
- /**
- * Self-written Baidu and Google search API
- * Version V2.0
- * Author: JAE
- * Blog: http://blog.jaekj.com
- **/
- Require_once ' querylist_class.php ';
- Class Searcher
- {
- Private $searcher;
- Private $key;
- Private $num;
- Private $page;
- Private $REGARR;
- Private $regRange;
- Private $regZnum;
- Public $JSONARR;
- Parameter search engine Search keyword returns the number of result bars page
- function Searcher ($searcher, $key, $num, $page)
- {
- if ($searcher = = ' Baidu ')
- {
- $this->regarr = Array ("title" =>array ("h3.t A, #ting_singlesong_box a", "text"), "TCon" =>array (" Div.c-abstract,font:slice (0,2), div#weibo,table tr:eq (0), Div.c-abstract-size p:eq (0), Div.vd_sitcom_new_tinfo "," Text ")," url "=>array (" h3.t A, #ting_singlesong_box a "," href "));
- $this->regrange = ' table.result,table.result-op ';
- $this->regznum=array ("Znum" =>array ("Span.nums", "text"));
- }
- else if ($searcher = = ' Google ')
- {
- $this->regarr = Array ("title" =>array ("H3.r a", "text"), "TCon" =>array ("Span.st", "text"), "url" =>array (" H3.R a "," href "));
- $this->regrange = ' LI.G ';
- $this->regznum=array ("Znum" =>array ("Div#resultstats", "text"));
- }
- $this->searcher = $searcher;
- $this->key = $key;
- $this->num = $num;
- $this->page = $page-1;
- $this->getlist ();
- }
- Private Function GetList ()
- {
- $s = UrlEncode ($this->key);
- $num = $this->num;
- $start = $this->num* $this->page;
- if ($this->searcher== ' Baidu ')
- {
- $url = "http://www.baidu.com/s?pn= $start &rn= $num &wd= $s";
- $reg _znum= '/[\d,]+/';
- }
- else if ($this->searcher== ' Google ')
- {
- $url = "Https://www.google.com.hk/search?filter=0&lr=&newwindow=1&safe=images&hl=en&as_qdr= all&num= $num &start= $start &q= $s ";
- $reg _znum= '/([\d,]+) result (s)?/';
- }
- $SEARCHEROBJ = new Querylist ($url, $this->regarr, $this->regrange);
- for ($i =0; $i Jsonarr); $i + +)
- {
- if ($this->searcher== ' Baidu ')
- {
- $SEARCHEROBJ->jsonarr[$i] [' url '] = $this->getbaidurealurl ($searcherObj->jsonarr[$i] [' url ']);
- }
- else if ($this->searcher== ' Google ')
- {
- $SEARCHEROBJ->jsonarr[$i] [' url '] = $this->getgooglerealurl ($searcherObj->jsonarr[$i] [' url ']);
- }
- }
- $this->jsonarr = $SEARCHEROBJ->jsonarr;
- Get total number of result bars
- $SEARCHEROBJ->setquery ($this->regznum);
- $zNum = $searcherObj->jsonarr[0][' znum ');
- Preg_match ($reg _znum, $zNum, $arr)? $zNum = $arr [0]: $zNum = 0;
- $zNum = (int) str_replace (', ', ', $zNum);
- Calculate Total Pages
- $zPage = Ceil ($zNum/$this->num);
- $this->jsonarr=array (' num ' = $this->num, ' page ' = = (int) $this->page+1), ' znum ' = = $zNum, ' zpage ' = > $zPage, "s" = "$this->key", ' Other ' =>array (' author ' + ' JAE ', ' QQ ' = ' 734708094 ', ' blog ' = ' http ') ://blog.jaekj.com '), ' data ' = $this->jsonarr);
- }
- function Getjson ()
- {
- Return Json_encode ($this->jsonarr);
- }
- Private Function Getbaidurealurl ($url)
- {
- Get Baidu jump to the real address
- $header = Get_headers ($url, 1);
- if (Strpos ($header [0], ' 301 ') | | strpos ($header [0], ' 302 '))
- {
- if (Is_array ($header [' location ')])
- {
- return $header [' Location '][count ($header [' Location '])-1];
- return $header [' Location '][0];
- }
- Else
- {
- return $header [' Location '];
- }
- }
- Else
- {
- return $url;
- }
- }
- Private Function Getgooglerealurl ($url)
- {
- $reg _url = '/q= (. +) &/u ';
- Return Preg_match ($reg _url, $url, $arr)? UrlDecode ($arr [1]): $url;
- }
- }
- $HJ = new Searcher (' Google ', ' Oschina ', 20,2);
- Print_r ($hj->jsonarr);
- Effect Demo Address
- Http://blog.jaekj.com//jae/demo/searcher/Searcher_class.php?searcher=baidu&s=jaekj&num=20&page=1
Copy Code |