Use curl to crawl http://jobs.hubu.edu.cn/Detail.aspx alone? articlechannelid=81&articleid=5722 is possible, but if you crawl a series of sites of the same type, you will get an error, put them in the array
$linkList, respectively http://jobs.hubu.edu.cn /detail.aspx? articlechannelid=81&articleid=5722, http://jobs.hubu.edu.cn/Detail.aspx? articlechannelid=81&articleid=5325 and so on.
function Getjobshubunotice () {$curl = Curl_init (' http://jobs.hubu.edu.cn/List.aspx? Articlechannelid=81 '); curl_setopt ($curl, Curlopt_returntransfer, 1); $result = curl_exec ($curl); Content processing $result = strip_tags ($result, '); $result = Stristr ($result, ' nbsp Current position: '); $result = Str_replace (' nbsp Current position: ', ' ', $result); $result = Stristr ($result, ' current 1/2 page ', true); $result = Stristr ($result, ' notice of announcement '); $result = Str_replace (' Notice of announcement ', ' ', $result); Preg_match_all ('/(? <=href=\ '). *? =\ ")/', $result, $arrayTemp); $linkList = $arrayTemp [0]; Preg_match_all ('/(? <=title=\ '). *? =\ ")/', $result, $arrayTemp); $titleList = $arrayTemp [0]; Preg_match_all ('/(<=\[) \d*\-\d* (? =\])/', $result, $arrayTemp); $dateList _temp = $arrayTemp [0]; $dateList = Array (); $linkList = Str_replace (' detail.aspx ', ' http://jobs.hubu.edu.cn/Detail.aspx ', $linkList); foreach ($dateList _temp as $key = + $value) {$dateList [$key] = date (' Y '). ' -'.$Value } $JobsHubu = Array (); Get the title, link, time $JobsHubu [0] = $dateList of each notice on the page respectively; $JobsHubu [1] = $titleList; $JobsHubu [2] = $linkList; return $JobsHubu;} function Makepage ($link) {...//omit part of code else if (starts ($link, ' Jobs.hubu ')) {echo "into Makep Age function "; echo "Process Web page". $link. '
'; $curl = Curl_init (); curl_setopt ($curl, Curlopt_url, $link); curl_setopt ($curl, Curlopt_returntransfer, 1); $result = curl_exec ($curl); echo $result; echo "Result End"; $result = Strip_tags ($result); $result = Stristr ($result, ' > announcement '); $result = Str_replace (' > Announcement ', ' ', $result); $result = Stristr ($result, ' $ (document). Ready ', true); $result = Trim ($result); $result = Str_replace ("\ r \ n", '
', $result); $result = Preg_replace ('/(\
) {1,}/', '
', $result); echo $result; Echo '
'; echo "Exit Makepage function"; return $result; }}
First use the Getjobshubunotice () function to get the news link, title, date, and then use the Makepage () function to get the content
This is the result of printing the link inside the Makepage, the link opens with the browser without problems.
Reply content:
Use curl to crawl http://jobs.hubu.edu.cn/Detail.aspx alone? articlechannelid=81&articleid=5722 is possible, but if you crawl a series of sites of the same type, you will get an error, put them in the array
$linkList, respectively http://jobs.hubu.edu.cn /detail.aspx? articlechannelid=81&articleid=5722, http://jobs.hubu.edu.cn/Detail.aspx? articlechannelid=81&articleid=5325 and so on.
function Getjobshubunotice () {$curl = Curl_init (' http://jobs.hubu.edu.cn/List.aspx? Articlechannelid=81 '); curl_setopt ($curl, Curlopt_returntransfer, 1); $result = curl_exec ($curl); Content processing $result = strip_tags ($result, '); $result = Stristr ($result, ' nbsp Current position: '); $result = Str_replace (' nbsp Current position: ', ' ', $result); $result = Stristr ($result, ' current 1/2 page ', true); $result = Stristr ($result, ' notice of announcement '); $result = Str_replace (' Notice of announcement ', ' ', $result); Preg_match_all ('/(? <=href=\ '). *? =\ ")/', $result, $arrayTemp); $linkList = $arrayTemp [0]; Preg_match_all ('/(? <=title=\ '). *? =\ ")/', $result, $arrayTemp); $titleList = $arrayTemp [0]; Preg_match_all ('/(<=\[) \d*\-\d* (? =\])/', $result, $arrayTemp); $dateList _temp = $arrayTemp [0]; $dateList = Array (); $linkList = Str_replace (' detail.aspx ', ' http://jobs.hubu.edu.cn/Detail.aspx ', $linkList); foreach ($dateList _temp as $key = + $value) {$dateList [$key] = date (' Y '). ' -'.$Value } $JobsHubu = Array (); Get the title, link, time $JobsHubu [0] = $dateList of each notice on the page respectively; $JobsHubu [1] = $titleList; $JobsHubu [2] = $linkList; return $JobsHubu;} function Makepage ($link) {...//omit part of code else if (starts ($link, ' Jobs.hubu ')) {echo "into Makep Age function "; echo "Process Web page". $link. '
'; $curl = Curl_init (); curl_setopt ($curl, Curlopt_url, $link); curl_setopt ($curl, Curlopt_returntransfer, 1); $result = curl_exec ($curl); echo $result; echo "Result End"; $result = Strip_tags ($result); $result = Stristr ($result, ' > announcement '); $result = Str_replace (' > Announcement ', ' ', $result); $result = Stristr ($result, ' $ (document). Ready ', true); $result = Trim ($result); $result = Str_replace ("\ r \ n", '
', $result); $result = Preg_replace ('/(\
) {1,}/', '
', $result); echo $result; Echo '
'; echo "Exit Makepage function"; return $result; }}
First use the Getjobshubunotice () function to get the news link, title, date, and then use the Makepage () function to get the content
This is the result of printing the link inside the Makepage, the link opens with the browser without problems.
Where's Your code???
Your PHP code is not wrong, the initial suspicion is that you requested the URL is not correct, see:
The error that you output in your code is actually the output of the page you obtained.
Update again, I think I know what the reason for your request to go wrong:
The URL you get from the Web page is: Detail.aspx?ArticleChannelId=81&ArticleId=2777
,
Where &
this character is &
the HTML entity, when you output (that is, you) it shows &
that, and when you go to the real request, is the use of the following things:
You just need to restore it or simply, replace it with the URL &
&
and then request it.
Update again:
ch = curl_init (); curl_setopt ($this->ch, Curlopt_useragent, ' mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; trident/4.0; Qqdownload 685; SLCC2;. NET CLR 2.0.50727;. NET CLR 3.5.30729;. NET CLR 3.0.30729;. net4.0c;. net4.0e)//ua curl_setopt ($this->ch, Curlopt_timeout, 40); curl_setopt ($this->ch, curlopt_followlocation, TRUE); curl_setopt ($this->ch, Curlopt_autoreferer, true); curl_setopt ($this->ch, Curlopt_returntransfer, TRUE); curl_setopt ($this->ch, curlopt_encoding, ' UTF-8 '); curl_setopt ($this->ch, Curlopt_cookiejar, $cookie _jar); curl_setopt ($this->ch, Curlopt_cookiefile, $cookie _jar); } function __destruct () {curl_close ($this->ch); Final public Function setreferer ($ref = ') {if ($ref! = ') {curl_setopt ($this->ch, Curlopt_refere R, $ref); }} Final Public function Get ($url, $header =false, $nobody =false) {curl_setopt ($this->ch, CurloPt_post, false); curl_setopt ($this->ch, Curlopt_url, $url); curl_setopt ($this->ch, Curlopt_header, $header); curl_setopt ($this->ch, Curlopt_nobody, $nobody); Return curl_exec ($this->ch); Final public Function Post ($url, $data =array (), $header =false, $nobody =false) {curl_setopt ($this->ch, Curlo Pt_url, $url); curl_setopt ($this->ch, Curlopt_header, $header); curl_setopt ($this->ch, Curlopt_nobody, $nobody); curl_setopt ($this->ch, Curlopt_post, true); curl_setopt ($this->ch, Curlopt_postfields, Http_build_query ($data)); Return curl_exec ($this->ch); }}const root_url = ' http://jobs.hubu.edu.cn/'; $home = ' http://jobs.hubu.edu.cn/List.aspx? Articlechannelid=81 '; $http = new HttpClient (' cookie.txt ');//Get list page $html = $http->get ($home);//use regular matches to all articles in the current page preg _match_all ('/. +?<\/a>\s+\[(\d+\-\d+) \]/', $html, $links); Array_shift ($links);//delete the first $size = count ($links [0]); for ($i =0; $i < $size; $i + +) {//has a match to the result $title = $links [0][$i]; $url = Htmlspecialchars_decode ($links [1][$i]);//Restore the HTML entity in the URL is the original character $date = date (' Y '). '-' . $links [2][$i]; echo $date, "\ T", $title, "\ T", $url, "\ n"; Makepage ($url);} function Makepage ($url) {global $http;//Use the HttpClient instance in the global variable $html = $http->get (root_url. $url);//stitching the full URL $html is the content of the page}
Because of boredom to help you update the code, the above for my own according to your previous code, as a result of running ( makePage
add your own code):
Makepage ($link)
Do $link have a value, type right?