Special thanks to Ransi students! Although it feels like this cold blog is no one to look at 23333
ImportMysql.connectorImportsys, OSImporturllib.requestImportReImportItertoolsuser='Root'pwd="'Host='127.0.0.1'DB='Test'data_file='Wooyun.dat'Create_table_sql="CREATE TABLE IF not EXISTS mytable (id int (TEN) auto_increment PRIMARY KEY, type varchar (+), info varchar (), DE Tail varchar (repair varchar) CHARACTER SET UTF8"Insert_sql="INSERT into MyTable (type, info, detail, repair) VALUES (%s,%s,%s,%s)"Select_sql="SELECT ID, type, info, detail, repair from MyTable"CNX= Mysql.connector.connect (User=user, Password=pwd, Host=host, database=db) Cursor=cnx.cursor ()defCreate_table_sql_api (a):Try: Cursor.execute (a)exceptMysql.connector.Error as err:Print("CREATE table ' MyTable ' failed.") Print("Error: {}". Format (err.msg)) Sys.exit ()defInsert_sql_api (A, b):Try: Cursor.execute (A, b)exceptMysql.connector.Error as err:Print("Insert Table ' mytable ' failed.") Print("Error: {}". Format (err.msg)) Sys.exit ()defSelect_sql_api (a):Try: Cursor.execute (a) for(ID, type, info, detail, repair)inchcursor:Print("id:{} type:{} info:{} repair:{}". Format (ID, type, info, detail, repair))exceptMysql.connector.Error as err:Print("query table ' mytable ' failed.") Print("Error: {}". Format (err.msg)) Sys.exit ()defget_html_response (URL): Html_response= Urllib.request.urlopen (URL). read (). Decode ('Utf-8') returnHtml_responsedefGeturl (StartURL): a=get_html_response (starturl) Childurl= (Re.findall (r'/bugs/wooyun-\w*-\w*\b', a)) returnChildurldefget_nextpage (StartURL): D=get_html_response (starturl) nextpage= (Re.findall (r'searchbug.php\?q=6yer6j6n&pno=\w', D)) returnNextpagestarturl="Http://www.wooyun.org/searchbug.php?q=6YeR6J6N"result=[]final=[]type_wooyun_n=[]info_n=[]detail_n=[]repair_n=[]#Output=open ("D:\\wooyun.csv", "w+")Create_table_sql_api (create_table_sql) forIinchget_nextpage (starturl): Result+=geturl ('http://wooyun.org/'+re.sub ('Financial','6yer6j6n', i)) #Scan the URL addresses of the various vulnerabilities into resultResult=set (Result)#Remove duplicate addresses from result forIinchresult:k=get_html_response ('http://wooyun.org/'+re.sub ('Financial','%e9%87%91%e8%9e%8d', i))#download page to KType_wooyun=re.findall (R'Vulnerability Type:.*.', k) Info=re.findall (R''K#blank characters with/s, find all the text that applies to Detail=re.findall (R'<p class= "Detail" >.*.</p>', k) Repair=re.findall (R'FIX: ', K) forJinchType_wooyun:#vulnerability type, prepare for database classification afterJ=re.sub (R': \s',':', J) J=re.sub (R'\ t',"', J) J=re.sub (R'',"', j) Type_wooyun_n+=J forJinchInfo#Processing SummaryJ=re.sub (R': \s',':', J) J=re.sub (R'',"', J) J=re.sub (R'',"', J) J=re.sub (R'<a\shref= ". *." >',"', J) J=re.sub (R'</a>',"', J) J=re.sub (R'',"', J) J=j.split () info_n+=J forJinchDetail#Processing DetailsJ=re.sub (R': \s',':', J) J=re.sub (R'<p\sclass= "Detail" >',"', J) J=re.sub (R'</p>',"', J) J=re.sub (R'"\starget=" _blank "></a> "',',', J) J=re.sub (R'<a href= "','http://www.wooyun.org', J) J=re.sub (R'The vulnerability information is evaluated,. *. Prepare learning Value',"', j) Detail_n+=J forJinchRepair#Handling Reply MethodsJ=re.sub (R'</br>',',', J) J=re.sub (R'</p>',',', J) J=re.sub (R'',',', J) J=re.sub (R'<p\sclass= "Detail" >',"', J) J=re.sub (R':',':', J) J=j.split () repair_n+=J Type_wooyun_str="". Join (Itertools.chain (*type_wooyun_n)) Info_str="". Join (Itertools.chain (*info_n)) Detail_str="". Join (Itertools.chain (*Detail_n)) Repair_str="". Join (Itertools.chain (*repair_n)) Final.append (TYPE_WOOYUN_STR) final.append (INFO_STR) final.append (DETAIL_STR) final.append (REPAIR_STR) Insert_ Sql_api (Insert_sql,tuple (Final)) Select_sql_api (Select_sql)#output.writelines (Final) #output.writelines (' \ n ')final.clear () repair_n.clear () info_n.clear () type_wooyun_n.clear () detail_n.clear () Cnx.commit () Cursor.close () Cnx.close ( )#output.close ()