Au3 Crawl No Sister website
Website address: http://www.budejie.com/text/
The regular is somewhat awkward, laughed at.
Code:
#include <IE.au3>#include<File.au3>#include<String.au3>#include<Array.au3>#include<Debug.au3>#include<Date.au3>; codeTryTo collect Budejie stories of www.budejie.comLocal $strUrl 1="http://www.budejie.com/text/"Local $filename 1="Budejie"$filename 1= $filename 1 &'_'&@MON $filename 1= $filename 1 &@MDAY $filename 1= $filename 1 &'. txt'Local $filesave= @TempDir &"\budejie.html"Local $pageindexLocal $startindex=1Local $endindex=5Local $sHTMLLocal $storycount=0_filecreate ($filename 1) Local $file= FileOpen ($filename 1,1) If $file= -1Then MsgBox (0,"Error","Unable to open file.") Exit endiffor $pageindex= $startindex to $endindex Step1$STRURL 1=Makeupurl ($pageindex) Local $hDownload= Inetget ($strUrl 1, $filesave,1,1) do Sleep ( -) Until inetgetinfo ($hDownload,2) Local $nBytes= Inetgetinfo ($hDownload,0) Inetclose ($hDownload) consolewrite ($pageindex&' / '& $endindex &"---down bytes ="& $nBytes &@LF) $fsize=$nBytes $ftemp= FileOpen ($filesave,0) $getsize=filegetsize ($filesave) $sHTML=FileRead ($ftemp, $getsize) FileClose ($ftemp) filedelete ($filesave) Local $aArray= Stringregexp ($sHTML,'<div class= "J-r-list-c-desc" >[\n\r]+<a href= "/detail-\d+.html" >[^<]+ (?=</a>)',3) Consolewrite ("Aarray size ="& UBound ($aArray) &@CRLF) If UBound ($aArray)<=0Then continueloop EndIf $max= UBound ($aArray)-1For $i=0To $max Step1Local $item=$aArray [$i] If Stringlen ($item)>0Then $strnum= $storycount +1$strnum= $strnum &"."&@CRLF FileWrite ($file, $strnum) $storycontent= Stringregexpreplace ($item,'<div class= "J-r-list-c-desc" >[\n\r]+<a href= "/detail-\d+.html" >',"") $storycontent= $storycontent &@CRLF FileWrite ($file, $storycontent) $storycount= $storycount +1EndIf Next nextfileclose ($file) MsgBox (0,"Budejie","Complete , story count ="& $storycount &', story='&$filename 1) Exitfunc Makeupurl ($pagenum) If $pagenum==1Then $strUrl='http://www.budejie.com/text/'Else $strUrl='http://www.budejie.com/text/'&$pagenum EndIfreturn$STRURL Endfunc
Au3 Crawl No Sister website