Required: MySQLdb
Here is the data table structure:
/*navicat MySQL Data Transfer Source server : 127.0.0.1Source server Version:50509source Host : 127.0.0.1:3306so Urce Database : Wooyun Target server Type : Mysqltarget server version:50509file Encoding : 65001 date:2015 -09-24 17:38:14*/SET foreign_key_checks=0; --------------------------------table structure for Wooyun_vul------------------------------DROP table IF EXISTS ' Wooyun_vul '; CREATE TABLE ' Wooyun_vul ' ( ' id ' int (8) NOT NULL auto_increment, ' corpsname ' varchar (255) DEFAULT NULL, ' Corpsurl ' varchar (255) default NULL, ' vulcount ' int (255) default NULL, PRIMARY KEY (' id ')) engine=innodb default Charset=latin1;
Python script:
#conding =utf-8import urllib2import urllibimport reimport mysqldb url = "http://wooyun.org/corps/page/" def Getwooyuncorps (URL): request = Urllib2. Request (URL) request.add_header (' user-agent ', ' mozilla/5.0 (Windows NT 6.1) applewebkit/537.36 (khtml, like Gecko) chrom e/41.0.2272.89 safari/537.36 ') reponse = urllib2.urlopen (request) content = Reponse.read () pattern1 = Re.compile ( R ' <td width= "370" ><a href= "\/corps\/(. *?)" >.*?<\/a><\/td> ') pattern2 = Re.compile (R ' <a rel= "nofollow" href= "(. *?)" target= ") Corps = pattern 1.findall (content) Corpsurl = pattern2.findall (content) return Corps,corpsurl def getcorpscount (URL): request = U Rllib2. Request (URL) request.add_header (' user-agent ', ' mozilla/5.0 (Windows NT 6.1) applewebkit/537.36 (khtml, like Gecko) chrom e/41.0.2272.89 safari/537.36 ') reponse = urllib2.urlopen (request) content = Reponse.read () pattern = Re.compile (r ' <p class= ' page ' >.*? (\d+). * ') Count = Pattern.findall(content) return count corpslist = []corpsurllist = []countlist = []for i in Range (1,37): Corps,corpsurl = Getwooyun Corps (URL+STR (i)) for Corp. in Corps:corpslist.append (Corp) for URLs in CorpsUrl:corpsurllist.append ( URLs) Print len (corpslist), Len (corpsurllist) for I in Range (0,len (corpslist)): Newurl = "http://www.wooyun.org/corps/" + Urllib.quote (Corpslist[i]) #print newurl count = Getcorpscount (newurl) #print count for CountA in count: Countlist.append (CountA) #print Len (countlist) conn = MySQLdb.connect (' localhost ', ' root ', ' ', ' wooyun ') cur = Conn.cursor () sql = "Set names ' UTF8 '" Cur.execute (SQL) Conn.commit () for S in range (0,len (countlist)): sql = ' Insert ' Wooyun_vul (Corpsname,corpsurl,vulcount) VALUES ("%s", "%s",%d) '% (Corpslist[s],corpsurllist[s],int (Countlist[s])) Print SQL Cur.execute (SQL) Conn.commit () conn.close () print "Success"
Python crawls cloud All vendor name, URL, total number of holes and deposit into database