1. Grab the Dean's Office interface and save it in a TXT document.
Import requests
File_path = r "E:\ the Dean's office. txt"
try:
kv = {' user-agent ': ' mozilla/5.0 '}
r = Requests.get (" http://jwch.sdut.edu.cn/", headers=kv)
r.raise_for_status ()
r.encoding = r.apparent_encoding with
Open (File_path, ' W ') as File_obj:
file_obj.write (R.text)
except:
print ("Crawl failed")
2. Baidu Search keyword.
Import Requests
Try:
kv = {' WD ': ' Python '}
r = Requests.get ("http://www.baidu.com/s", PARAMS=KV) # Baidu's keyword interface: Http://www.baidu.com/s?wd=keyword
print (r.request.url)
r.raise_for_status ()
print (Len ( R.text)
except:
print ("Crawl failed")
3. Grab pictures.
Import requests
import os
url = "http://img1001.pocoimg.cn/image/poco/works/36/2018/0307/21/ 15204284272111499_46378737_h1920.jpg "
root = ' e://pics//'
image_path = root + url.split ('/') [-1]
try:
r = requests.get (URL)
r.raise_for_status ()
if not os.path.exists (root):
os.mkdir (Root)
if not Os.path.exists (image_path):
r = requests.get (URL)
with open (Image_path, ' WB ') as File_obj:
file_ Obj.write (r.content)
print (' picture retention succeeded ')
except:
print ("Crawl failed")
4. ip138 Crawl
Import requests
URL = "http://m.ip138.com/ip.asp?ip=" #ip138 query interface
IP = ' 202.204.80.112 '
try:
r = Requests.get (URL + IP)
r.raise_for_status ()
r.encoding= r.apparent_encoding
print (r.text[-500:])
print ("Crawl succeeded.") ")
except:
print (" Crawl failed. ") ")
5. Crawl Chinese University Rankings
From BS4 import beautifulsoup import requests import BS4 kv = {"User-agent": "mozilla/5.0"} def gethtmltext (URL): try: r = Requests.get (URL, headers = kv, timeout =) R.raise_for_status () r.encoding = R.apparent_encodin G return R.text Except:print ("Gethtmltext fail") return "Def fillunivlist (ulist, HTML): up = BeautifulSoup (HTML, "Html.parser") for TR in Soup.find (' tbody '). Children:if isinstance (TR, bs4.element.t AG): TDS = TR (' TD ') Ulist.append ([Tds[0].string, Tds[1].string, tds[3].string]) def printunivlist (Ulist, num): Prmod = "{0:^10}\t {1:{3}^10}\t {2:{3}^10}\t" Print (Prmod.format ("Rank", "school", "Total Score", Chr (12288))) F or I in range (num): Print (Prmod.format (ulist[i][0],ulist[i][1], ulist[i][2], Chr (12288)) def main (): Uinfo = [] url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2016.html" html = gethtmltext (URL) fillunivlist (Uinfo, HTML) printunivlist (Uinfo Main ()
6. Taobao Grab Merchandise information
Import RE import requests def gethtml (URL): try:kv = {"Ueser-agent": "mozalli/5.0"} r = Requests.get ( URL, timeout = headers = kv) r.encoding = r.apparent_encoding r.raise_for_status () return R.te
XT Except:print ("gethtml faild.")
Return "" Def parserhtml (HTML, good_list): Regename = R ' "Raw_title": ". *?" ' Regexprice = R ' "View_price": "[\d.] * "' Regexn = Re.compile (regename) regexp = Re.compile (regexprice) names = Regexn.findall (HTML) prices = RE Gexp.findall (HTML) for I in range (len (names)): Name = eval (Names[i].split (":") [1]) Price = eval (Prices [I].split (":") [1]) good_list.append ([name, Price]) def display (good_list): Print_mode = "{0:{3}<4}\t{1:{3}" <16}\t {2:{3}<8}\t "cnt = 1 for i in range (len (good_list)): Print (CNT, good_list[i
][1], good_list[i][0], Chr (12288)) CNT + + 1 def main (): name = input ("Enter cargo name:") Raw_url = "https://s.taobao.com/search?q=" + name base = num = input ("Input query Depth:") num = int (num) cnt = 1 good_list = [] Print_mode = "{0:{3}<4}\t{1:{3}<16}\t {2:{3}<8}\t" Print (Print_mode.format ("ordinal", "price ", Commodity name, Chr (12288))) for I in range (num): try:html = gethtml (Raw_url + ' &s= ' + str (num * i) ) parserhtml (HTML, good_list) except:continue good_list.sort (key = Lambda a:float (a[ 1]) display (good_list) main ()
7. Crawl Stock Information
Import re import requests from BS4 import BeautifulSoup urllist = "http://quote.eastmoney.com/stocklist.html" Urlbaidu = "https://gupiao.baidu.com/stock/" def gethtml (URL): kv = {"User-agent": "mozilla/5.0"} try:r = Requests.ge T (URL, headers=kv) r.raise_for_status () r.encoding = r.apparent_encoding return r.text except
: Return ' Def getstocklist (): HTML = gethtml (urllist) soup = beautifulsoup (html, "Html.parser") TMP = Soup.find (' div ', attrs={' class ': ' Qox '}) Taga = Tmp.find_all (' div ', attrs={' class ': ' Quotebody '}) Taga = tmp
. Find_all (' a ') regex = R ' [s][hz]\d{6} ' regex = Re.compile (regex) Stockid = [] For a in Taga:
Try:href = a.attrs[' href '] sid = Regex.findall (HREF) [0] stockid.append (SID)
Except:continue return Stockid def getinfodict (): Stockid = Getstocklist () stocklist =[]
For ID in Stockid: try:infodict = {} URL = urlbaidu + id + '. html ' html = gethtml (URL) If html = = ': Continue soup = beautifulsoup (html, ' html.parser ') tables = soup.fi
nd (' div ', attrs={' class ': ' Stock-bets '}) name = Tables.find (attrs={' class ': ' Bets-name '}). Text.split () [0] Infodict.update ({"Stock name:") print ("Stock Name:%s%s"% (infodict["stock name", id)) div = tables.
Find (' div ', attrs={' class ': ' Bets-content '}) DTS = div.find_all (' dt ') DDS = Div.find_all (' dd ') For I in Range (len (dts)): Print (dts[i].string + ': ' + dds[i].string) Infodict[dts [I].string] = dds[i].string stocklist.append (infodict) except:continue return Stockli
St Getinfodict ()