First Use requests library Crawl code

Last Update:2018-07-24 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

1. Grab the Dean's Office interface and save it in a TXT document.

Import requests
File_path = r "E:\ the Dean's office. txt"
try:
	kv = {' user-agent ': ' mozilla/5.0 '}
	r = Requests.get (" http://jwch.sdut.edu.cn/", headers=kv)
	r.raise_for_status ()
	r.encoding = r.apparent_encoding with
	Open (File_path, ' W ') as File_obj:
		file_obj.write (R.text)
except:
	print ("Crawl failed")

2. Baidu Search keyword.

Import Requests
Try:
	kv = {' WD ': ' Python '}
	r = Requests.get ("http://www.baidu.com/s", PARAMS=KV) # Baidu's keyword interface: Http://www.baidu.com/s?wd=keyword
	print (r.request.url)
	r.raise_for_status ()
	print (Len ( R.text)
except:
	print ("Crawl failed")

3. Grab pictures.

Import requests
import os

url = "http://img1001.pocoimg.cn/image/poco/works/36/2018/0307/21/ 15204284272111499_46378737_h1920.jpg "
root = ' e://pics//'
image_path = root + url.split ('/') [-1]
try:
	r = requests.get (URL)
	r.raise_for_status ()
	if not os.path.exists (root):
		os.mkdir (Root)
	if not Os.path.exists (image_path):
		r = requests.get (URL)
		with open (Image_path, ' WB ') as File_obj:
			file_ Obj.write (r.content)
			print (' picture retention succeeded ')
except:
	print ("Crawl failed")

4. ip138 Crawl

Import requests

URL = "http://m.ip138.com/ip.asp?ip=" #ip138 query interface
IP = ' 202.204.80.112 '
try:
    r = Requests.get (URL + IP)
    r.raise_for_status ()
    r.encoding= r.apparent_encoding
    print (r.text[-500:])
    print ("Crawl succeeded.") ")
except:
    print (" Crawl failed. ") ")

5. Crawl Chinese University Rankings

From BS4 import beautifulsoup import requests import BS4 kv = {"User-agent": "mozilla/5.0"} def gethtmltext (URL): try: r = Requests.get (URL, headers = kv, timeout =) R.raise_for_status () r.encoding = R.apparent_encodin G return R.text Except:print ("Gethtmltext fail") return "Def fillunivlist (ulist, HTML): up = BeautifulSoup (HTML, "Html.parser") for TR in Soup.find (' tbody '). Children:if isinstance (TR, bs4.element.t AG): TDS = TR (' TD ') Ulist.append ([Tds[0].string, Tds[1].string, tds[3].string]) def printunivlist (Ulist, num): Prmod = "{0:^10}\t {1:{3}^10}\t {2:{3}^10}\t" Print (Prmod.format ("Rank", "school", "Total Score", Chr (12288))) F or I in range (num): Print (Prmod.format (ulist[i][0],ulist[i][1], ulist[i][2], Chr (12288)) def main (): Uinfo = [] url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2016.html" html = gethtmltext (URL) fillunivlist (Uinfo, HTML) printunivlist (Uinfo Main ()

6. Taobao Grab Merchandise information

Import RE import requests def gethtml (URL): try:kv = {"Ueser-agent": "mozalli/5.0"} r = Requests.get ( URL, timeout = headers = kv) r.encoding = r.apparent_encoding r.raise_for_status () return R.te
        XT Except:print ("gethtml faild.")
    Return "" Def parserhtml (HTML, good_list): Regename = R ' "Raw_title": ". *?" ' Regexprice = R ' "View_price": "[\d.] * "' Regexn = Re.compile (regename) regexp = Re.compile (regexprice) names = Regexn.findall (HTML) prices = RE Gexp.findall (HTML) for I in range (len (names)): Name = eval (Names[i].split (":") [1]) Price = eval (Prices [I].split (":") [1]) good_list.append ([name, Price]) def display (good_list): Print_mode = "{0:{3}<4}\t{1:{3}" <16}\t {2:{3}<8}\t "cnt = 1 for i in range (len (good_list)): Print (CNT, good_list[i
  ][1], good_list[i][0], Chr (12288)) CNT + + 1 def main (): name = input ("Enter cargo name:")  Raw_url = "https://s.taobao.com/search?q=" + name base = num = input ("Input query Depth:") num = int (num) cnt = 1 good_list = [] Print_mode = "{0:{3}<4}\t{1:{3}<16}\t {2:{3}<8}\t" Print (Print_mode.format ("ordinal", "price ", Commodity name, Chr (12288))) for I in range (num): try:html = gethtml (Raw_url + ' &s= ' + str (num * i) ) parserhtml (HTML, good_list) except:continue good_list.sort (key = Lambda a:float (a[ 1]) display (good_list) main ()

7. Crawl Stock Information

Import re import requests from BS4 import BeautifulSoup urllist = "http://quote.eastmoney.com/stocklist.html" Urlbaidu = "https://gupiao.baidu.com/stock/" def gethtml (URL): kv = {"User-agent": "mozilla/5.0"} try:r = Requests.ge T (URL, headers=kv) r.raise_for_status () r.encoding = r.apparent_encoding return r.text except
     : Return ' Def getstocklist (): HTML = gethtml (urllist) soup = beautifulsoup (html, "Html.parser") TMP = Soup.find (' div ', attrs={' class ': ' Qox '}) Taga = Tmp.find_all (' div ', attrs={' class ': ' Quotebody '}) Taga = tmp
         . Find_all (' a ') regex = R ' [s][hz]\d{6} ' regex = Re.compile (regex) Stockid = [] For a in Taga:
         Try:href = a.attrs[' href '] sid = Regex.findall (HREF) [0] stockid.append (SID)
    Except:continue return Stockid def getinfodict (): Stockid = Getstocklist () stocklist =[]
   For ID in Stockid:     try:infodict = {} URL = urlbaidu + id + '. html ' html = gethtml (URL) If html = = ': Continue soup = beautifulsoup (html, ' html.parser ') tables = soup.fi
            nd (' div ', attrs={' class ': ' Stock-bets '}) name = Tables.find (attrs={' class ': ' Bets-name '}). Text.split () [0] Infodict.update ({"Stock name:") print ("Stock Name:%s%s"% (infodict["stock name", id)) div = tables.
            Find (' div ', attrs={' class ': ' Bets-content '}) DTS = div.find_all (' dt ') DDS = Div.find_all (' dd ') For I in Range (len (dts)): Print (dts[i].string + ': ' + dds[i].string) Infodict[dts [I].string] = dds[i].string stocklist.append (infodict) except:continue return Stockli
 St Getinfodict ()

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

First Use requests library Crawl code

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

First Use requests library Crawl code

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support