The curl and regular expression are used to capture novels from non-vip chapters of the Chinese text Network, and the novel ID can be input to download novels. Dependency: curl The curl, regular expression, ajax and other technologies are used in a simple look. this is suitable for beginners. During local testing, you must ensure that the network is connected and the curl mode is enabled for php.
- Session_start ();
- // Encapsulate the content into a class to enable automatic article capturing
- # Header ("Refresh: 30; http://www.test.com: 8080 ");
- Class SpiderTools {
- //////////////////////////////////////// //////////////////////////////////////// //////////////////////////
- /* Input the article ID to parse the article title */
- //////////////////////////////////////// //////////////////////////////////////// //////////////////////////
- Public function getBookNameById ($ aid ){
- // Initialize curl
- $ Ch = curl_init ();
- // Url
- $ Url = 'http: // www.motie.com/book/'.w.aid;
- If (is_numeric ($ aid )){
- // Regular expression matching
- $ Ru = "/\ s * (. *) \ s * <\/a> \ s * <\/h1> /";
- }
- Else {
- //The Family Survival path of the zombie outbreak _ Chapter 1 The zombie outbreak is updated for my friendly music ~ _ Iron grinding
- $ Ru = "/(. *) <\/Title>/"; <li >}< li> // Set options, including URL <li> curl_setopt ($ ch, CURLOPT_URL, $ url ); <li> curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1); // The content is not automatically output <li> curl_setopt ($ ch, CURLOPT_HEADER, 0 ); // no header information is returned <li> curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT_MS, 0); <li> // run the curl <li> $ output = curl_exec ($ ch ); <li> // error message <li> if (curl_exec ($ ch) === false) {<li> die (curl_error ($ ch )); <li >}< li> // check for errors <li> if (curl_errno ($ ch) {<li> echo 'curl error :'. curl_error ($ ch); <li >}< li> // release the curl handle <li> curl_close ($ ch); <li> $ arr = array (); <li> preg_match_all ($ ru, $ output, $ arr); <li> return $ arr [1] [0]; <li >}< li> ///////////////////////////////// //////////////////////////////////////// /// // <li> /* ID parsing article content */<li> /////////////////////////////// //////////////////////////////////////// /// // <li> public function getBookContextById ($ aid) {<li> // start parsing the article <li> $ ids = array (); <li> $ ids = explode ("_", $ aid ); <li> $ titleId = trim ($ ids [0]); <li> $ aticleId = trim ($ ids [1]); <li> $ ch = curl_init (); <li> $ ru = "/<p class = \" page-content \ "> [\ s \ S] * <pre ondragstart = \" return false \ "oncopy = \ "return false; \ "oncut = \" return false; \ "oncontextmenu = \" return false \ "class = \" note \ "id = \" html_content _ \ d * \ "> [\ s \ S] * (. *) <\/pre>/ui "; <li> $ url =' http://www.motie.com/book/ '. $ Aid; <li> // regular expression match <li> // Set options, including URL <li> curl_setopt ($ ch, CURLOPT_URL, $ url ); <li> curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1); // The content is not automatically output <li> curl_setopt ($ ch, CURLOPT_HEADER, 0 ); // no header information is returned <li> curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT_MS, 0); <li> // run the curl <li> $ output = curl_exec ($ ch ); <li> // error message <li> if (curl_exec ($ ch) === false) {<li> die (curl_error ($ ch )); <li >}< li> // check for errors <li> if (curl_errno ($ ch) {<li> echo 'Curl error :'. curl_error ($ ch); <li >}< li >$ arr = array (); <li >$ arr2 = array (); <li> preg_match_all ($ ru, $ output, $ arr); <li> curl_close ($ ch); <li ># var_dump ($ arr ); <li> $ s = $ arr [0] [0]; <li> $ s = substr ($ s, 180 ); <li> $ arr2 = explode ("return trim ($ arr2 [0]); <li >}< li> <li> ////////////////////////////// //////////////////////////////////////// /// // <li>/ * static method @ generate novel file can be called directly */<li> ////////////////// //////////////////////////////////////// //////////////////////////////////////// ///// <Li> public static function createBookById ($ id) {<li> if (! Is_numeric ($ id) {<li> echo "<br/> init begin start write! "; <Li> $ st = new self (); <li> $ cons = $ st-> getBookContextById ($ id ); <li> $ title = $ st-> getBookNameById ($ id); <li> $ cons = trim ($ cons); <li> $ t = explode ("", $ title); <li> // Construct the directory <li> $ dir = array (); <li> $ dir = explode ("_", $ t [0]); <li> $ wzdir = $ dir [0]; // The name of the book as the directory name <li> $ wzchapter = $ dir [1]; // Chapter <li> // create a directory <li> $ wzdir2 = iconv ("UTF-8", "GBK", $ wzdir ); // Directory encoding note that the reference to the $ wzdir string is retained here to construct the file name. it cannot be used here to prevent secondary encoding <li> if (! File_exists ($ wzdir2) {<li> mkdir ($ wzdir2); // create a directory <li >}< li> // Construct a file name <li> $ wztitle = ". /". $ wzdir. "/". "$ t [0]". ". txt "; <li> // ensure that the name of the saved file is not garbled <li> $ wztitle = iconv (" UTF-8 "," GBK ", $ wztitle ); <li> $ f = fopen ($ wztitle, "w +"); <li> fwrite ($ f, $ cons); <li> echo "$ wzdir ". $ wzchapter. "Write successful"; <li> fclose ($ f); <li> <li >}< li> else {<li> $ ids = self :: getBookIdsById ($ id); <li> // The server may be offline, so it is best to use session record loop <li> # for ($ I =$ _ SESSION ["$ id ". "_ fid"]; $ I <= count ($ ids); $ _ SESSION ["$ id ". "_ fid"] ++, $ I ++) {<li> # self: createBookById ($ id. "_". $ ids [$ _ SESSION ["$ id ". "_ fid"] + +]); // Construct the id <li >#}< li> <li> for ($ I =$ _ SESSION ["$ id ". "_ fid"]; $ I <= count ($ ids); $ _ SESSION ["$ id ". "_ fid"] ++, $ I ++) {<li> self: createBookById ($ id. "_". $ ids [$ I]); // Construct the id <li >}< li> <li> # echo "<pr/> <br/> the write operation is complete "; <li> # echo $ id. "_". $ ids [0]. "<br/>"; <li> # var_dump ($ ids ); <li> <li >}< li>/* <li> obtain all novel IDs <li> @ param $ ID article id <li> @ return array; <li> */<li> public static function getBookIdsById ($ aid) {<li> $ ch = curl_init (); <li> $ url =' http://www.motie.com/book/ '. $ Aid. "/chapter"; <li> // pay attention to this? You can obtain the minimum matching item <li> $ ru = '/[\ s \ S] *? <Li class = \ "\" createdate = \ "\ d {4} \-\ d {2} \-\ d {2} \ d {2 }: \ d {2 }:\ d {2} \ "> [\ s \ S] *?. *? <\/A> .*? /U'; // regular expression match <li> // Set options, including URL <li> curl_setopt ($ ch, CURLOPT_URL, $ url ); <li> curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1); // The content is not automatically output <li> curl_setopt ($ ch, CURLOPT_HEADER, 0 ); // no header information is returned <li> curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT_MS, 0); <li> // run the curl <li> $ output = curl_exec ($ ch ); <li> // check for errors <li> if (curl_errno ($ ch) {<li> echo 'curl error :'. curl_error ($ ch); <li >}< li> // release the curl handle <li> curl_close ($ ch); <li> $ arr = arr Ay (); <li> preg_match_all ($ ru, $ output, $ arr, PREG_PATTERN_ORDER); <li> return $ arr [1]; <li >}< li> <li>?> </Ol> </p> <em onclick = "copycode ($ ('code _ sfk ')); "> </em> </p> <p class =" blockcode "> <p id =" code_Zt6 "> <ol> <li> <? Php <li> session_start (); <li> require_once ("SpiderTools. class. php "); <li> if ($ _ REQUEST [" bid "]) {<li> if (is_numeric ($ _ REQUEST [" bid "]) {<li> SpiderTools: createBookById (trim ($ _ REQUEST ["bid"]); <li >}< li> else {<li> echo "<br/> enter the correct article ID <br/> "; <li >}< li >?> <Li> </ol> </p> <em onclick = "copycode ($ ('code _ Zt6 ')); "> </em> </p> <p class =" blockcode "> <p id =" code_ievaluate "> <ol> <li> <ptml> <li> <pead> <meta charset = "UTF-8"/> </pead> <li> <title> download novels
- Enter the ID number of the novel you want to see to download the novel.
-
|