Au3 Crawl embarrassing Encyclopedia website
URL: ' http://www.qiushibaike.com/8hr/page/' & $pagenum & '? s=4512150 '
#include <IE.au3>#include<File.au3>#include<String.au3>#include<Array.au3>#include<Debug.au3>#include<Date.au3>; codeTryTo collect Qiushibaike storiesinchqiushibaike.comlocal $STRURL 1="http://www.qiushibaike.com/8hr/page/2?s=4512150"Local $filename 1="Qiushibaike"$filename 1= $filename 1 &'_'&@MON $filename 1= $filename 1 &@MDAY $filename 1= $filename 1 &'. txt'Local $filesave= @TempDir &"\qb.html"Local $pageindexLocal $startindex=2Local $endindex=TenLocal $sHTMLLocal $storycount=0_filecreate ($filename 1) Local $file= FileOpen ($filename 1,1) If $file= -1Then MsgBox (0,"Error","Unable to open file.") Exit endiffor $pageindex= $startindex to $endindex Step1$STRURL 1=Makeupurl ($pageindex) Local $hDownload= Inetget ($strUrl 1, $filesave,1,1) do Sleep ( -) Until inetgetinfo ($hDownload,2) Local $nBytes= Inetgetinfo ($hDownload,0) Inetclose ($hDownload) consolewrite ($pageindex&'/'& $endindex &"---down bytes ="& $nBytes &@LF) $fsize=$nBytes; Consolewrite ($pageindex&'-filesize ='& $fsize &@LF) $ftemp= FileOpen ($filesave,0) $getsize=filegetsize ($filesave) $sHTML=FileRead ($ftemp, $getsize) FileClose ($ftemp) filedelete ($filesave) Local $aArray= Stringregexp ($sHTML,'(?<=<span>) \n+[^/]+\n+ (?=</span>)',3) Consolewrite ("array size ="& UBound ($aArray) &@CRLF) For $i=0To (UBound ($aArray)-1) Step1Local $item=$aArray [$i] If Stringlen ($item)>0Then $strnum= $storycount +1$strnum= $strnum &"."&@CRLF FileWrite ($file, $strnum) $storycontent= StringReplace ($item, @LF,"') $storycontent= $storycontent &@CRLF FileWrite ($file, $storycontent) $storycount= $storycount +1EndIf Next nextfileclose ($file) MsgBox (0,"QSBK","Complete , story count ="& $storycount &', story='&$filename 1) Exitfunc Makeupurl ($pagenum) $strUrl='http://www.qiushibaike.com/8hr/page/'& $pagenum &'? s=4512150' return$STRURL Endfunc
Au3 Crawl embarrassing Encyclopedia website