The crawler source used in JavaScript is used to crawl the product information on the still makeup web.
Paste the code into the arrow hand Cloud Crawler platform (http://www.shenjianshou.cn/) to run directly,
You do not need to install the compilation environment. To crawl other sites, you can change the source code.
Code execution specific step point here
Code detailed explanation point here
More source download points here
varConfigs ={domains: ["Www.showjoy.com", "list.showjoy.com", "item.showjoy.com"], scanurls: ["Http://list.showjoy.com/search/?q=cateIds%3A1,cateName%3A%E9%9D%A2%E8%86%9C"], contenturlregexes: ["Http://item\\.showjoy\\.com/sku/\\d+\\.html"], helperurlregexes: ["Http://list\\.showjoy\\.com/search/\\?q=cateids%3a1,catename%3a%e9%9d%a2%e8%86%9c (\\&page=\\d+)?"], fields: [{//First extracted itemName: "title", selector:"//h3[contains (@class, ' Choose-hd ')]", Required:true }, { //the second extractName: "Comment", selector:"//div[contains (@class, ' DTABS-HD ')]/ul/li[2]", Required:false }, { //the third extractName: "Sales", selector:"//div[contains (@class, ' DTABS-HD ')]/ul/li[3]", Required:false}, {name:"Skuid", selector:"//input[@id = ' j_uitemid ']/@value",}, {name:"Price", SOURCETYPE:SOURCETYPE.ATTACHEDURL, Attachedurl:"Http://item.showjoy.com/product/getprice?skuid={skuid}", SelectorType:SelectorType.JsonPath, selector:"$.data.price", } ]}; Configs.onprocesshelperurl=function(URL, content, site) {if(!content.indexof ("No matching items")){ //if not to the last page, add 1 pages varCurrentPage = parseint (url.substring (Url.indexof ("&page=") + 6)); varpage = currentpage + 1; varNexturl = Url.replace ("&page=" + currentpage, "&page=" +page); Site.addurl (Nexturl); } return true;} Configs.afterextractfield=function(fieldName, data) {if(FieldName = = "comment" | | fieldName = = "Sales"){ varRegex =/.* ((\d+)). */; return(Data.match (Regex)) [1]; } returndata;}varCrawler =NewCrawler (configs); Crawler.start ();
Still Makeup mesh crawler source