Share a novel collection code

Source: Internet
Author: User
Share the collection code of a novel. Recently, ET used the locomotive to collect novels, which were often unavailable according to their rules. I encountered an issue where iframe was used in the novel 520 and I wrote one myself. at first, I thought that two regular expressions would solve the problem of [list, content, writing is complicated. I have made a good change to several versions. The biggest trouble is how to encapsulate the collection code of share a novel.
Recently, ET used the locomotive to collect novels, which were often unavailable according to their rules. I encountered an issue where iframe was used in the novel 520 and I wrote one myself. at first, I thought that two regular expressions would solve the problem of [list, content, writing is complicated.
If you have modified several editions, the biggest headache is that the cost of Code Encapsulation is low when you use different sites. Here we use a small decision maker model. Then, the encapsulated function is sent to you. During the next collection, the skip mechanism of collected chapters should be avoided. [after all, it is very depressing to have a novel station with several thousand articles interrupted once and cannot be picked up.]

PHP code
  Class grep extends Controller {var $ tableName = 'grep'; var $ pagesize = 31; var $ order_string = "grep_order desc, grep_id desc"; var $ filter_field = "grep_title "; var $ check_repeat_field = "grep_title"; var $ buttons = array (); var $ description = "[crawling novels]"; function index () {// get the story list $ story_model = "story_model"; $ this-> load-> model ($ story_model); $ where = array ("story_id <445 "); $ rows_story = $ this-> $ story_model-> get ($ where); foreach ($ rows_story as $ key => $ val_story): if ($ key <237) continue; $ url =" http://www.xiaoxiaoshuo.net/ ". $ Val_story-> storycate_vtitle. "/". $ val_story-> story_vtitle; $ src_content = file_get_contents ($ url); $ src_content = iconv ("GBK", "UTF-8 // IGNORE", $ src_content ); $ src_content = str_replace ("/style = \" border-width: 0px \ s * 1px \ s * 1px \ s * 0px; border-color: # C8D8B8; border-style: solid; padding: 3px; float: left; width: 313px; \ "/I", "", $ src_content ); $ src_content = str_replace ("style = \" BORDER-RIGHT: # c8d8b8 1px solid; PADDING-RIGHT: 3px; BORDER-TOP: # c8d8b8 0px solid; PADDING-LEFT: 3px; FLOAT: left; PADDING-BOTTOM: 3px; BORDER-LEFT: # c8d8b8 0px solid; WIDTH: 313px; PADDING-TOP: 3px; BORDER-BOTTOM: # c8d8b8 1px solid \ "", "", $ src_content); $ src_content = preg_replace ("/title = \" [^ \ "] * \"/iU ", "", $ src_content); $ src_content = preg_replace ("/title = \" [^ \ "] * \"/iU "," ", $ src_content ); $ src_content = preg_replace ("/
  
   
] *>/IU "," ", $ src_content); $ src_content = preg_replace ("/<\/LI [^>] *>/iU ","", $ src_content); $ src_content = preg_replace ("// iU", $ src_content, $ arr_dstorycate); $ dstorycate_arr = $ arr_dstorycate [1]; foreach ($ dstorycate_arr as $ key_dstorycate => $ val_dstory_cate) {preg_match_all ("/([^ <] *) <\/font>/I", $ scheme, $ dcate_title ); $ datacate ["dstorycate_pid"] = $ val_story-> story_id; $ datacate ["dstorycat E_title "] = $ dcate_title [1] [0]; // Obtain the category object. mark the previous category as downloaded $ dtitle = $ datacate [" dstorycate_title "]. $ obj_storycate = $ this-> check_dcate ($ dtitle, $ val_story); // pr ($ obj_storycate); if ($ obj_storycate-> dstorycate_ishot = 1) {$ this-> log ("$ val_story-> story_title-$ dtitle, skip"); continue;} preg_match_all ("/] *> (?! <\/A>) ([\ d \ D] *) <\/a>/iU ", $ val_dstory_cate, $ dinfo_list); $ list_story_url = $ dinfo_list [1]; $ list_story_title = $ dinfo_list [2]; $ story_url =" http://www.xiaoxiaoshuo.net/ ". $ Val_story-> storycate_vtitle. "/". $ val_story-> story_vtitle; $ this-> add_storyxxs_info ($ obj_storycate, $ list_story_url, $ list_story_title, $ story_url);} endforeach;} function status () {$ SQL = "select count (dstory_id) as all_story from dstory;"; $ query = $ this-> db-> query ($ SQL ); $ cont_all = $ query-> row (); echo $ cont_all-> all_story; $ SQL = "select count (dstory_id) as story1 from dstory where dstory_status = 1 "; $ query = $ this-> db-> query ($ SQL); $ cont_all = $ query-> row (); echo "--". $ cont_all-> story1; $ SQL = "select max (dstorycate_id) as max_id, max (dstorycate_pid) as max_pid from dstorycate "; $ query = $ this-> db-> query ($ SQL); $ cont_all = $ query-> row (); echo "--". $ cont_all-> max_id. "--". $ cont_all-> max_pid;}/*** download the section after 445 **/function index445 () {$ story_model = "story_model "; $ this-> load-> model ($ story_model); $ where = array ("story_id> 445 "); $ rows_story = $ this-> $ story_model-> get ($ where); foreach ($ rows_story as $ key => $ val_story ): // get the story_content $ story_url_arr = explode ("/", $ val_story-> story_url); $ story_url = $ story_url_arr [1]. "/". $ story_url_arr [2]. "/". $ story_url_arr [3]. "/". $ story_url_arr [4]; $ dest_url =" http://www.xiaoshuo520.com/ ". $ Story_url; $ src_content = CS_file_get_contents ($ dest_url); $ src_content = iconv ("GBK", "UTF-8 // IGNORE", $ src_content ); // group data by category. preg_match_all ("/([\ d \ D] *) $ val_dstory_cate) {preg_match_all ("/([\ d \ D] *) <\/div>/I ", $ val_dstory_cate, $ dcate_title); $ datacate ["dstorycate_pid"] = $ val_story-> story_id; $ datacate ["dstorycate_title"] = $ dcate_title [1] [0] // Obtain the category object. mark the previous category as downloaded $ dtitle = $ datacate ["dstory Cate_title "]; $ response = $ this-> check_dcate ($ dtitle, $ val_story); // pr ($ obj_storycate); if ($ obj_storycate-> dstorycate_ishot = 1) {$ this-> log ("$ val_story-> story_title-$ dtitle, skip"); continue;} preg_match_all ("/] *> (?! <\/A>) ([\ d \ D] *) <\/a>/iU ", $ val_dstory_cate, $ dinfo_list); $ list_story_url = $ dinfo_list [1]; $ list_story_title = $ dinfo_list [2]; $ this-> add_story520_info ($ obj_storycate, $ list_story_url, $ list_story_title, $ story_url);} endforeach ;} /*** obtain the title object based on the chapter standard and novel object **/function check_dcate ($ title, $ obj_story) {$ dstorycate_model = "dstorycate_model "; $ this-> load-> model ($ dstorycate_model); $ where = array ("dstorycate _ Pid = $ obj_story-> story_id "," dstorycate_title = '$ title' "); $ rows = $ this-> $ dstorycate_model-> get ($ where); if (! $ Rows) {$ datacate ["dstorycate_pid"] = $ obj_story-> story_id; $ datacate ["dstorycate_title"] = $ title; $ this-> $ dstorycate_model-> insert ($ datacate); $ obj_cate_id = $ this-> db-> insert_id (); $ where = array ("dstorycate_id = $ obj_cate_id ", "dstorycate_title = '$ title'"); $ rows = $ this-> $ dstorycate_model-> get ($ where); $ this-> log ("this book has no category, the novel $ obj_story-> story_title-$ title ");} else {$ this-> log (" already exists Related Novel category $ obj_story-> story_title-$ title, skip ");} $ obj_cate = $ rows [0]; $ SQL = "update dstorycate set dstorycate_published = 1 where dstorycate_pid = $ obj_story-> story_id & dstorycate_id <$ obj_cate-> dstorycate_id "; $ this-> db-> query ($ SQL); return $ obj_cate;}/***** add novel to */function add_story520_info ($ cate_obj, $ list_story_url, $ list_story_title, $ url) {$ dstory_model = "dstory_model"; $ this-> load-> mod El ($ dstory_model); $ min_key = intval ($ cate_obj-> dstorycate_pvcount); if (! $ Min_key) $ min_key = 0; foreach ($ list_story_url as $ key => $ val): if ($ key <$ min_key) {continue ;} $ this-> check_dstory ($ cate_obj ," http://www.xiaoshuo520.com/ ". $ Url. "/". $ val, $ list_story_title [$ key], "grep_520_info"); endforeach;} function add_storyxxs_info ($ cate_obj, $ list_story_url, $ list_story_title, $ url) {$ dstory_model = "dstory_model"; $ this-> load-> model ($ dstory_model); $ min_key = intval ($ cate_obj-> dstorycate_pvcount); if (! $ Min_key) $ min_key = 0; foreach ($ list_story_url as $ key => $ val): if ($ key <$ min_key) {$ this-> log ("$ cate_obj-> dstorycate_id No. $ cate_obj-> dstorycate_title ". $ list_story_title [$ key]. "Chapter $ key <$ min_key"); continue;} $ this-> check_dstory ($ cate_obj, $ url. "/". $ val, $ list_story_title [$ key], "grep_xxs_info"); endforeach ;}
  

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.