烏雲漏洞爬蟲的資料庫版本(mysql)

來源:互聯網
上載者:User

標籤:style   blog   http   color   os   資料   for   ar   

特別鳴謝阮思綺同學!雖然感覺這個冷冷的部落格也沒人看23333

import mysql.connectorimport sys, osimport urllib.requestimport reimport itertoolsuser = ‘root‘pwd  = ‘‘host = ‘127.0.0.1‘db   = ‘test‘data_file = ‘wooyun.dat‘create_table_sql = "CREATE TABLE IF NOT EXISTS mytable (id int(10) AUTO_INCREMENT PRIMARY KEY, type varchar(300) , info varchar(1000) , detail varchar(5000) , repair varchar(1000) )CHARACTER SET utf8"insert_sql = "INSERT INTO mytable (type, info, detail, repair) VALUES ( %s, %s, %s, %s)"select_sql = "SELECT id, type, info, detail, repair FROM mytable"cnx = mysql.connector.connect(user=user, password=pwd, host=host, database=db)cursor = cnx.cursor()def create_table_sql_api(a):    try:        cursor.execute(a)    except mysql.connector.Error as err:        print("create table ‘mytable‘ failed.")        print("Error: {}".format(err.msg))        sys.exit()def insert_sql_api(a,b):    try:        cursor.execute(a,b)    except mysql.connector.Error as err:        print("insert table ‘mytable‘ failed.")        print("Error: {}".format(err.msg))        sys.exit()def select_sql_api(a):    try:        cursor.execute(a)        for (id, type, info, detail, repair) in cursor:            print("ID:{}  type:{}  info:{}  repair:{}".format(id, type, info, detail, repair))    except mysql.connector.Error as err:        print("query table ‘mytable‘ failed.")        print("Error: {}".format(err.msg))        sys.exit()def get_html_response(url):    html_response = urllib.request.urlopen(url).read().decode(‘utf-8‘)    return html_responsedef geturl(starturl):    a=get_html_response(starturl)    childurl=(re.findall(r‘/bugs/wooyun-\w*-\w*\b‘,a))    return childurldef get_nextpage(starturl):    d=get_html_response(starturl)    nextpage=(re.findall(r‘searchbug.php\?q=6YeR6J6N&pNO=\w‘,d))    return nextpagestarturl="http://www.wooyun.org/searchbug.php?q=6YeR6J6N"result=[]final=[]type_wooyun_n=[]info_n=[]detail_n=[]repair_n=[]#output=open("D:\\wooyun.csv","w+")create_table_sql_api(create_table_sql)for i in get_nextpage(starturl):    result+=geturl(‘http://wooyun.org/‘+re.sub(‘金融‘,‘6YeR6J6N‘,i))    #掃描各種漏洞的url地址放入result中result=set(result)#去除result中重複的地址for i in result:    k=get_html_response(‘http://wooyun.org/‘+re.sub(‘金融‘,‘%E9%87%91%E8%9E%8D‘,i))#下載頁面到k    type_wooyun=re.findall(r‘漏洞類型:.*.</h3>‘,k)    info=re.findall(r‘<h3>\w*:.*.</h3>‘,k)#空白字元用/s,尋找所有適用於<h3>標籤的文字    detail=re.findall(r‘<p class="detail">.*.</p>‘,k)    repair=re.findall(r‘修複方案:</h3>\s*<p class="detail">.*.\s*</p>‘,k)    for j in type_wooyun:#漏洞類型,為之後進行資料庫分類做準備        j=re.sub(r‘:\s‘,‘:‘,j)        j=re.sub(r‘\t‘,‘‘,j)        j=re.sub(r‘</h3>‘,‘‘,j)        type_wooyun_n+=j    for j in info:#處理概要        j=re.sub(r‘:\s‘,‘:‘,j)        j=re.sub(r‘<h3>‘,‘‘,j)        j=re.sub(r‘</h3>‘,‘‘,j)        j=re.sub(r‘<a\shref=".*.">‘,‘‘,j)        j=re.sub(r‘</a>‘,‘‘,j)        j=re.sub(r‘<imgheight=".*./>‘,‘‘,j)        j=j.split()        info_n+=j    for j in detail:#處理詳情        j=re.sub(r‘:\s‘,‘:‘,j)        j=re.sub(r‘<p\sclass="detail">‘,‘‘,j)        j=re.sub(r‘</p>‘,‘‘,j)        j=re.sub(r‘"\starget="_blank"><img\ssrc="/upload/.*.width="600"/></a>‘,‘,‘,j)        j=re.sub(r‘<a href="‘,‘ http://www.wooyun.org‘,j)        j=re.sub(r‘對本漏洞資訊進行評價,.*.備學習價值‘,‘‘,j)        detail_n+=j    for j in repair:#處理回複方法        j=re.sub(r‘</br>‘,‘,‘,j)        j=re.sub(r‘</p>‘,‘,‘,j)        j=re.sub(r‘</h3>‘,‘,‘,j)        j=re.sub(r‘<p\sclass="detail">‘,‘‘,j)        j=re.sub(r‘:‘,‘:‘,j)        j=j.split()        repair_n+=j        type_wooyun_str="".join(itertools.chain(*type_wooyun_n))    info_str="".join(itertools.chain(*info_n))    detail_str="".join(itertools.chain(*detail_n))      repair_str="".join(itertools.chain(*repair_n))     final.append(type_wooyun_str)    final.append(info_str)    final.append(detail_str)    final.append(repair_str)    insert_sql_api(insert_sql,tuple(final))    select_sql_api(select_sql)    #output.writelines(final)    #output.writelines(‘\n\n‘)    final.clear()    repair_n.clear()    info_n.clear()    type_wooyun_n.clear()    detail_n.clear()        cnx.commit()cursor.close()cnx.close()#output.close()

 

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.