Crawl the enterprise directory and store it in the database.
#-*-coding:utf-8-*-Import requests import MySQLdb Import re from BS4 import beautifulsoup headers = {"User-agent" : "mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/54.0.2840.87 safari/537.36 "} cookies = {' Aspsessionidqcsrsbrs ': ' FBEPJPLCHEEMEHNLHFKCBCGB ', ' hm_lvt_819e30d55b0d1cf6f2c4563aa3c36208 ': ' 1483118719 ', ' hm_lpvt_ 819e30d55b0d1cf6f2c4563aa3c36208 ': ' 1483120442 '} db = MySQLdb.connect ("localhost", "root", ' liao1234 ', ' Liao ', CharSet = "UTF8") cursor = db.cursor () ' sql = ' "' CREATE Table company (name Char. Not NULL, type char (), addr char (1 , leader char (date char) "" "Cursor.execute (SQL) ' #获取各地区链接 r = Requests.get (" http://hangzhou.11467 . com/", headers=headers,cookies=cookies) html = r.text soup = beautifulsoup (html) for tag in Soup.find (' div ', class_= ' box s Idesubcat t0 '). Find_all (' a '): Print tag.attrs[' href '] Base_url = "http://hangzhou.11467.com" +tag.attrs[' href ']+ ' p
n ' for I in Range (1,10): url = base_url + str (i) r1 = Requests.get (url,headers=headers,cookies=cookies) HTML1 = R1.text
Soup1 = BeautifulSoup (HTML1) for TAG1 in Soup1.find (' ul ', id= ' slist '). Find_all (' Li '): ss = [] For CC in Tag1.find_all (' a '): Print cc.string ss.append (cc.string) for AA
In Tag1.find_all (' DD '): Print aa.string ss.append (aa.string) If Len (ss) = 3: Ss.append ("None") Ss.append ("None") Elif len (ss) = = 4:ss.appe nd ("none") Else:pass print len (ss) If Len (ss) ==0:con Tinue sql = INSERT INTO company (name,type,addr,leader,date) VALUES ('%s ', '%s ', '%s ', '%s ', '%s ')% (ss[0],ss[1],ss
[2],ss[3],ss[4]) cursor.execute (SQL) print Ss[0] Db.close ()