Page capturing ajax asynchronous content is not much different from page capturing normal content. Ajax is just an asynchronous http request. the following example shows that the page for capturing ajax asynchronous content is slightly different from the page for capturing common content. Ajax only implements an asynchronous http request. you only need to use tools similar to firebug to find the request's backend service url and value passing parameters, and then crawl the url passing parameters.
Network tools using Firebug
If the page is captured, the data not displayed in the content is a bunch of JS code.
Code
$ Cookie_file = tempnam ('./temp', 'cooker'); $ ch = curl_init (); $ url1 =" http://www.cdut.edu.cn/default.html "; Curl_setopt ($ ch, CURLOPT_URL, $ url1); curl_setopt ($ ch, CURLOPT_HTTP_VERSION, expires); curl_setopt ($ ch, CURLOPT_HEADER, 0); curl_setopt ($ ch, expires, expires, 1); curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt ($ ch, CURLOPT_ENCODING, 'gzip '); // add gzip resolution // Set the file curl_setopt ($ ch, CURLOPT_COOKIEJAR, $ cookie_file) for storing cookie information after the connection ends; $ content = curl_exec ($ ch ); curl_close ($ ch); $ ch3 = curl_init (); $ url3 =" http://www.cdut.edu.cn/xww/dwr/call/plaincall/portalAjax.getNewsXml.dwr "; $ CurlPost = "callCount = 1 & page =/xww/type/custom 20118.html & httpSessionId = Role & scriptSessionId = Role & c0-scriptName = portalAjax & c0-methodName = getNewsXml & c0-id = 0 & c0-param0 = string: 10000201 & c0-param1 = string: 1000020118 & c0-param2 = string: news _ & c0-param3 = number: 5969 & c0-param4 = number: 1 & c0-param5 = null: null & c0-param6 = null: null & batchId = 0 "; curl_setopt ($ ch3, CURLOPT_URL, $ url3); curl_setopt ($ ch3, CURLOPT_POST, 1); curl_setopt ($ ch3, CURLOPT_POSTFIELDS, $ curlPost ); // Set the file curl_setopt ($ ch3, CURLOPT_COOKIEFILE, $ cookie_file) for storing cookie information after the connection ends; $ content1 = curl_exec ($ ch3); curl_close ($ ch3 );