node. js Crawl Proxy IP

Source: Internet
Author: User
Tags node server

node. JS implements the crawl proxy IP

Main document: Index.js

/** Support: node. js v7.9.0*/Const Cheerio=require (' Cheerio '); const FETCH=require (' Node-fetch '); Const Promise=require (' Bluebird '); let Mongoose=require (' Mongoose '); Promise.promisifyall (Mongoose); let Schema=Mongoose. Schema;mongoose.connect (' Mongodb://localhost:27017/ipproxypool '); let Ippool=NewSchema ({ip:{type:string,unique:true}}) let Ipproxy=mongoose.model (' IP ', Ippool);functionfetchurl (URL) {Fetch (url,{method:' Get ', headers:{}}). Then (res=Res.text ()). Then (Body={let $=Cheerio.load (body); Let length=$ (' #list table Tbody '). Find (' tr ')). length;  for(Let i=0;i<length;i++) {Let IPAddress= $ (' #list table Tbody '). Find (' tr '). EQ (i). Find (' TD '). EQ (0). text (); Let Port= $ (' #list table Tbody '). Find (' tr '). EQ (i). Find (' TD '). EQ (1). text ();       Console.log (' Ip:${ipaddress}:${port} '); Let IP=' ${ipaddress}:${port} ' let Ippool=Newipproxy ({ip:ip}) Ippool.save (); }    })}varSleep =function(time) {return NewPromise (function(Resolve, Reject) {SetTimeout (function() {Resolve (' OK ');    }, time); })};const PageNumber=10;varStart = Asyncfunction(){     for(Let j=1;j<pagenumber;j++{console.log (' current is ${j} times wait:        `); Fetchurl (' http://www.kuaidaili.com/free/inha/${j}/');Await sleep (1500); }}start ();

Package Support: Package.json

{  "Name": "Demo-4-ipproxypool",  "Version": "1.0.0",  "description": "",  "Main": "Index.js",  "Scripts": {    "Test": "Echo \" Error:no test specified\ "&& exit 1"  },  "Author": "False-l",  "License": "",  "Devdependencies": {    "babel-preset-es2015": "^6.24.1",    "Babel-preset-react": "^6.24.1",    "Babel-preset-stage-3": "^6.24.1"  },  "Dependencies": {    "Babel-core": "^6.24.1",    "Bluebird": "^3.5.0",    "Cheerio": "^0.22.0",    "KOA": "^2.2.0",    "Koa-router": "^7.1.1",    "Mongoose": "^4.9.6",    "Node-fetch": "^1.6.3"  }}

The MongoDB database needs to be installed locally to store the crawled IP, and IP authentication is not yet implemented. Writing this is mostly in curiosity.

The above code can be implemented to crawl IP proxy site IP coexistence into the MongoDB database.

The following is a simple server implementation that releases a KOA2-based API interface

Server

Const Promise=require (' Bluebird '); let Mongoose=require (' Mongoose '); Const KOA=require (' KOA '); Const app=NewKOA ();varRouter = require (' Koa-router '))();
Promise.promisifyall (Mongoose); let Schema=Mongoose. Schema;mongoose.connect (' Mongodb://localhost:27017/ipproxypool '); let Ippool=NewSchema ({ip:{type:string,unique:true}}) let Ipproxy=mongoose.model (' IP ', Ippool); App.use (Async (CTX, next)= {await next (); varData=await ipproxy.find ({},function(err,ips) {varipmap=[]; Ips.foreach (function(IP) {ipmap[ip._id]=IP; //console.log (IP) }); }) varMap=data.map (ip=>Ip.ip); Ctx.response.type= ' Text/json '; Ctx.response.body=map;}); App.listen (3000); Console.log (' Server listen:3000 ')

As for why there are both promise and async, because the asynchronous grammar is not very familiar, how can write.

How to use:

According to Package.json

NPM Install//installation support

Node Index.js//Get proxy IP

Node Server.js//Run Simple IP interface

node. js Crawl Proxy IP

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.