Write your own PHP Phpquery-based Universal Collection class

Source: Internet
Author: User
Or a side dish, the first time to share code ah, this is their own previous write a PHP collection class, I have been in use, self-feeling is very simple and powerful, as long as a little bit of knowledge of the selector can be collected any page, also support HTTPS page, do simple collection enough.
  1. /**
  2. * Universal List Acquisition Class
  3. * Version V1.3
  4. * Author: JAE
  5. * Blog: http://blog.jaekj.com
  6. */
  7. Require_once '. /phpquery/phpquery/phpquery.php ';
  8. Class querylist{
  9. Private $pageURL;
  10. Private $REGARR = Array ();
  11. Public $JSONARR = Array ();
  12. Private $regRange;
  13. Private $html;
  14. /************************************************
  15. * Parameters: Page Address selector array block selector
  16. * "Selector array" description: Format Array ("Name" =>array ("selector", "type"),.......)
  17. * "Type" description: Value "text", "HTML", "Properties"
  18. * "Block selector": refers to the rule to select a few chunks, and then separately in the block to make relevant choices
  19. *************************************************/
  20. function Querylist ($pageURL, $regArr =array (), $regRange = ")
  21. {
  22. $this->pageurl = $pageURL;
  23. In order to get https://
  24. $ch = Curl_init ();
  25. curl_setopt ($ch, Curlopt_url, $this->pageurl);
  26. curl_setopt ($ch, Curlopt_ssl_verifypeer, false);
  27. curl_setopt ($ch, Curlopt_ssl_verifyhost, false);
  28. curl_setopt ($ch, curlopt_returntransfer,1);
  29. $this->html = curl_exec ($ch);
  30. Curl_close ($ch);
  31. if (!empty ($REGARR))
  32. {
  33. $this->regarr = $REGARR;
  34. $this->regrange = $regRange;
  35. $this->getlist ();
  36. }
  37. }
  38. function Setquery ($REGARR, $regRange = ")
  39. {
  40. $this->jsonarr=array ();
  41. $this->regarr = $REGARR;
  42. $this->regrange = $regRange;
  43. $this->getlist ();
  44. }
  45. Private Function GetList ()
  46. {
  47. $hobj = phpquery::newdocumenthtml ($this->html);
  48. if (!empty ($this->regrange))
  49. {
  50. $robj = PQ ($hobj)->find ($this->regrange);
  51. $i = 0;
  52. foreach ($robj as $item)
  53. {
  54. while (list ($key, $reg _value) =each ($this->regarr))
  55. {
  56. $iobj = PQ ($item)->find ($reg _value[0]);
  57. Switch ($reg _value[1])
  58. {
  59. Case ' text ':
  60. $this->jsonarr[$i] [$key] = Trim (PQ ($iobj)->text ());
  61. Break
  62. Case ' HTML ':
  63. $this->jsonarr[$i] [$key] = Trim (PQ ($iobj)->html ());
  64. Break
  65. Default
  66. $this->jsonarr[$i] [$key] = PQ ($iobj)->attr ($reg _value[1]);
  67. Break
  68. }
  69. }
  70. Resetting an array pointer
  71. Reset ($this->regarr);
  72. $i + +;
  73. }
  74. }
  75. Else
  76. {
  77. while (list ($key, $reg _value) =each ($this->regarr))
  78. {
  79. $lobj = PQ ($hobj)->find ($reg _value[0]);
  80. $i = 0;
  81. foreach ($lobj as $item)
  82. {
  83. Switch ($reg _value[1])
  84. {
  85. Case ' text ':
  86. $this->jsonarr[$i ++][$key] = Trim (PQ ($item)->text ());
  87. Break
  88. Case ' HTML ':
  89. $this->jsonarr[$i ++][$key] = Trim (PQ ($item)->html ());
  90. Break
  91. Default
  92. $this->jsonarr[$i ++][$key] = PQ ($item)->attr ($reg _value[1]);
  93. Break
  94. }
  95. }
  96. }
  97. }
  98. }
  99. function Getjson ()
  100. {
  101. Return Json_encode ($this->jsonarr);
  102. }
  103. }
Copy Code
  1. Require ' query/querylist.class.php ';
  2. Acquisition of the OSC code share list, Title link author
  3. $url = "Http://www.oschina.net/code/list";
  4. $reg = Array ("title" =>array (". Code_title a:eq (0)", "text"), "url" =>array (". Code_title a:eq (0)", "href"), "author "=>array (" img "," title "));
  5. $rang = ". Code_list li";
  6. $HJ = new Querylist ($url, $reg, $rang);
  7. $arr = $hj->jsonarr;
  8. Print_r ($arr);
  9. If you want to take the TOP40 active contributor image to the right of the current page, get the JSON data, you can write this
  10. $reg = Array ("Portrait" =>array (". Hot_top img", "src"));
  11. $hj->setquery ($reg);
  12. $json = $hj->getjson ();
  13. Echo $json. " ";
  14. Content page content of OSC
  15. $url = "http://www.oschina.net/code/snippet_186288_23816";
  16. $reg = Array ("title" =>array (". Qtitle H1 "," text ")," Con "=>array (". Content "," html "));
  17. $HJ = new Querylist ($url, $reg);
  18. $arr = $hj->jsonarr;
  19. Print_r ($arr);
  20. So many examples, is not used to do collection is very convenient
Copy Code
  1. /**
  2. * Self-written Baidu and Google search API
  3. * Version V2.0
  4. * Author: JAE
  5. * Blog: http://blog.jaekj.com
  6. **/
  7. Require_once ' querylist_class.php ';
  8. Class Searcher
  9. {
  10. Private $searcher;
  11. Private $key;
  12. Private $num;
  13. Private $page;
  14. Private $REGARR;
  15. Private $regRange;
  16. Private $regZnum;
  17. Public $JSONARR;
  18. Parameter search engine Search keyword returns the number of result bars page
  19. function Searcher ($searcher, $key, $num, $page)
  20. {
  21. if ($searcher = = ' Baidu ')
  22. {
  23. $this->regarr = Array ("title" =>array ("h3.t A, #ting_singlesong_box a", "text"), "TCon" =>array (" Div.c-abstract,font:slice (0,2), div#weibo,table tr:eq (0), Div.c-abstract-size p:eq (0), Div.vd_sitcom_new_tinfo "," Text ")," url "=>array (" h3.t A, #ting_singlesong_box a "," href "));
  24. $this->regrange = ' table.result,table.result-op ';
  25. $this->regznum=array ("Znum" =>array ("Span.nums", "text"));
  26. }
  27. else if ($searcher = = ' Google ')
  28. {
  29. $this->regarr = Array ("title" =>array ("H3.r a", "text"), "TCon" =>array ("Span.st", "text"), "url" =>array (" H3.R a "," href "));
  30. $this->regrange = ' LI.G ';
  31. $this->regznum=array ("Znum" =>array ("Div#resultstats", "text"));
  32. }
  33. $this->searcher = $searcher;
  34. $this->key = $key;
  35. $this->num = $num;
  36. $this->page = $page-1;
  37. $this->getlist ();
  38. }
  39. Private Function GetList ()
  40. {
  41. $s = UrlEncode ($this->key);
  42. $num = $this->num;
  43. $start = $this->num* $this->page;
  44. if ($this->searcher== ' Baidu ')
  45. {
  46. $url = "http://www.baidu.com/s?pn= $start &rn= $num &wd= $s";
  47. $reg _znum= '/[\d,]+/';
  48. }
  49. else if ($this->searcher== ' Google ')
  50. {
  51. $url = "Https://www.google.com.hk/search?filter=0&lr=&newwindow=1&safe=images&hl=en&as_qdr= all&num= $num &start= $start &q= $s ";
  52. $reg _znum= '/([\d,]+) result (s)?/';
  53. }
  54. $SEARCHEROBJ = new Querylist ($url, $this->regarr, $this->regrange);
  55. for ($i =0; $i Jsonarr); $i + +)
  56. {
  57. if ($this->searcher== ' Baidu ')
  58. {
  59. $SEARCHEROBJ->jsonarr[$i] [' url '] = $this->getbaidurealurl ($searcherObj->jsonarr[$i] [' url ']);
  60. }
  61. else if ($this->searcher== ' Google ')
  62. {
  63. $SEARCHEROBJ->jsonarr[$i] [' url '] = $this->getgooglerealurl ($searcherObj->jsonarr[$i] [' url ']);
  64. }
  65. }
  66. $this->jsonarr = $SEARCHEROBJ->jsonarr;
  67. Get total number of result bars
  68. $SEARCHEROBJ->setquery ($this->regznum);
  69. $zNum = $searcherObj->jsonarr[0][' znum ');
  70. Preg_match ($reg _znum, $zNum, $arr)? $zNum = $arr [0]: $zNum = 0;
  71. $zNum = (int) str_replace (', ', ', $zNum);
  72. Calculate Total Pages
  73. $zPage = Ceil ($zNum/$this->num);
  74. $this->jsonarr=array (' num ' = $this->num, ' page ' = = (int) $this->page+1), ' znum ' = = $zNum, ' zpage ' = > $zPage, "s" = "$this->key", ' Other ' =>array (' author ' + ' JAE ', ' QQ ' = ' 734708094 ', ' blog ' = ' http ') ://blog.jaekj.com '), ' data ' = $this->jsonarr);
  75. }
  76. function Getjson ()
  77. {
  78. Return Json_encode ($this->jsonarr);
  79. }
  80. Private Function Getbaidurealurl ($url)
  81. {
  82. Get Baidu jump to the real address
  83. $header = Get_headers ($url, 1);
  84. if (Strpos ($header [0], ' 301 ') | | strpos ($header [0], ' 302 '))
  85. {
  86. if (Is_array ($header [' location ')])
  87. {
  88. return $header [' Location '][count ($header [' Location '])-1];
  89. return $header [' Location '][0];
  90. }
  91. Else
  92. {
  93. return $header [' Location '];
  94. }
  95. }
  96. Else
  97. {
  98. return $url;
  99. }
  100. }
  101. Private Function Getgooglerealurl ($url)
  102. {
  103. $reg _url = '/q= (. +) &/u ';
  104. Return Preg_match ($reg _url, $url, $arr)? UrlDecode ($arr [1]): $url;
  105. }
  106. }
  107. $HJ = new Searcher (' Google ', ' Oschina ', 20,2);
  108. Print_r ($hj->jsonarr);
  109. Effect Demo Address
  110. Http://blog.jaekj.com//jae/demo/searcher/Searcher_class.php?searcher=baidu&s=jaekj&num=20&page=1
Copy Code
  • Contact Us

    The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

    If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

    A Free Trial That Lets You Build Big!

    Start building with 50+ products and up to 12 months usage for Elastic Compute Service

    • Sales Support

      1 on 1 presale consultation

    • After-Sales Support

      24/7 Technical Support 6 Free Tickets per Quarter Faster Response

    • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.