node. JS implements the crawl proxy IP
Main document: Index.js
/** Support: node. js v7.9.0*/Const Cheerio=require (' Cheerio '); const FETCH=require (' Node-fetch '); Const Promise=require (' Bluebird '); let Mongoose=require (' Mongoose '); Promise.promisifyall (Mongoose); let Schema=Mongoose. Schema;mongoose.connect (' Mongodb://localhost:27017/ipproxypool '); let Ippool=NewSchema ({ip:{type:string,unique:true}}) let Ipproxy=mongoose.model (' IP ', Ippool);functionfetchurl (URL) {Fetch (url,{method:' Get ', headers:{}}). Then (res=Res.text ()). Then (Body={let $=Cheerio.load (body); Let length=$ (' #list table Tbody '). Find (' tr ')). length; for(Let i=0;i<length;i++) {Let IPAddress= $ (' #list table Tbody '). Find (' tr '). EQ (i). Find (' TD '). EQ (0). text (); Let Port= $ (' #list table Tbody '). Find (' tr '). EQ (i). Find (' TD '). EQ (1). text (); Console.log (' Ip:${ipaddress}:${port} '); Let IP=' ${ipaddress}:${port} ' let Ippool=Newipproxy ({ip:ip}) Ippool.save (); } })}varSleep =function(time) {return NewPromise (function(Resolve, Reject) {SetTimeout (function() {Resolve (' OK '); }, time); })};const PageNumber=10;varStart = Asyncfunction(){ for(Let j=1;j<pagenumber;j++{console.log (' current is ${j} times wait: `); Fetchurl (' http://www.kuaidaili.com/free/inha/${j}/');Await sleep (1500); }}start ();
Package Support: Package.json
{ "Name": "Demo-4-ipproxypool", "Version": "1.0.0", "description": "", "Main": "Index.js", "Scripts": { "Test": "Echo \" Error:no test specified\ "&& exit 1" }, "Author": "False-l", "License": "", "Devdependencies": { "babel-preset-es2015": "^6.24.1", "Babel-preset-react": "^6.24.1", "Babel-preset-stage-3": "^6.24.1" }, "Dependencies": { "Babel-core": "^6.24.1", "Bluebird": "^3.5.0", "Cheerio": "^0.22.0", "KOA": "^2.2.0", "Koa-router": "^7.1.1", "Mongoose": "^4.9.6", "Node-fetch": "^1.6.3" }}
The MongoDB database needs to be installed locally to store the crawled IP, and IP authentication is not yet implemented. Writing this is mostly in curiosity.
The above code can be implemented to crawl IP proxy site IP coexistence into the MongoDB database.
The following is a simple server implementation that releases a KOA2-based API interface
Server
Const Promise=require (' Bluebird '); let Mongoose=require (' Mongoose '); Const KOA=require (' KOA '); Const app=NewKOA ();varRouter = require (' Koa-router '))();
Promise.promisifyall (Mongoose); let Schema=Mongoose. Schema;mongoose.connect (' Mongodb://localhost:27017/ipproxypool '); let Ippool=NewSchema ({ip:{type:string,unique:true}}) let Ipproxy=mongoose.model (' IP ', Ippool); App.use (Async (CTX, next)= {await next (); varData=await ipproxy.find ({},function(err,ips) {varipmap=[]; Ips.foreach (function(IP) {ipmap[ip._id]=IP; //console.log (IP) }); }) varMap=data.map (ip=>Ip.ip); Ctx.response.type= ' Text/json '; Ctx.response.body=map;}); App.listen (3000); Console.log (' Server listen:3000 ')
As for why there are both promise and async, because the asynchronous grammar is not very familiar, how can write.
How to use:
According to Package.json
NPM Install//installation support
Node Index.js//Get proxy IP
Node Server.js//Run Simple IP interface
node. js Crawl Proxy IP