node.js 抓取代理ip

來源:互聯網
上載者:User

標籤:i++   error   babel   tag   node   log   color   sleep   資料   

node.js實現抓取代理ip 

主要檔案:index.js

/**  支援:node.js v7.9.0*/const cheerio=require(‘cheerio‘);const fetch =require(‘node-fetch‘);const Promise=require(‘bluebird‘);let  mongoose=require(‘mongoose‘);Promise.promisifyAll(mongoose);let Schema=mongoose.Schema;mongoose.connect(‘mongodb://localhost:27017/ipproxypool‘);let IPpool=new Schema({    ip:{type:String,unique:true}})let Ipproxy=mongoose.model(‘IP‘,IPpool);function fetchUrl(url){    fetch(url,{        method:‘get‘,        headers:{        }    })    .then(res=>res.text())    .then(body=>{       let $=cheerio.load(body);       let length=$(‘#list table tbody‘).find(‘tr‘).length;       for (let i=0;i<length;i++){       let ipaddress= $(‘#list table tbody‘).find(‘tr‘).eq(i).find(‘td‘).eq(0).text() ;       let port = $(‘#list table tbody‘).find(‘tr‘).eq(i).find(‘td‘).eq(1).text();       console.log(`IP:${ipaddress}:${port}`);       let ip=`${ipaddress}:${port}`       let ippool=new Ipproxy({           ip:ip       })       ippool.save();       }    })}var sleep = function (time) {    return new Promise(function (resolve, reject) {        setTimeout(function () {            resolve(‘ok‘);        }, time);    })};const pageNumber=10;var start = async function(){    for(let j=1;j<pageNumber;j++){         console.log(`當前是第${j}次等待..`);        fetchUrl(`http://www.kuaidaili.com/free/inha/${j}/`);        await sleep(1500);    }}start();

包支援 : package.json

{  "name": "demo-4-ipproxypool",  "version": "1.0.0",  "description": "",  "main": "index.js",  "scripts": {    "test": "echo \"Error: no test specified\" && exit 1"  },  "author": "false-l",  "license": "",  "devDependencies": {    "babel-preset-es2015": "^6.24.1",    "babel-preset-react": "^6.24.1",    "babel-preset-stage-3": "^6.24.1"  },  "dependencies": {    "babel-core": "^6.24.1",    "bluebird": "^3.5.0",    "cheerio": "^0.22.0",    "koa": "^2.2.0",    "koa-router": "^7.1.1",    "mongoose": "^4.9.6",    "node-fetch": "^1.6.3"  }}

本地需要安裝mongodb資料庫,用於儲存抓取到的ip,目前還未實現ip驗證。寫這個主要是處於好奇。

上面的代碼就可以實現抓取ip代理網站的ip並存到mongodb資料庫中。

下面在放出一個基於koa2的api介面的簡易伺服器實現

server

const Promise=require(‘bluebird‘);let  mongoose=require(‘mongoose‘);const koa=require(‘koa‘);const app=new koa();var router = require(‘koa-router‘)();
Promise.promisifyAll(mongoose);let Schema=mongoose.Schema;mongoose.connect(‘mongodb://localhost:27017/ipproxypool‘);let IPpool=new Schema({ ip:{type:String,unique:true}})let Ipproxy=mongoose.model(‘IP‘,IPpool);app.use(async (ctx, next) => { await next(); var data=await Ipproxy.find({},function(err,ips){ var ipmap=[]; ips.forEach(function(ip){ ipmap[ip._id]=ip; //console.log(ip) }); }) var map=data.map(ip=>ip.ip); ctx.response.type = ‘text/json‘; ctx.response.body = map;});app.listen(3000);console.log(‘server listen:3000‘)

至於為什麼既有promise又有async,是因為對非同步文法還不是很熟,怎麼會怎麼寫了。

使用方式:

 根據package.json

npm install   // 安裝支援

node index.js  //擷取代理 ip 

node server.js  //運行簡易ip介面

 

node.js 抓取代理ip

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.