Just learned the Python combination data type Here, the chance coincidence got a public lesson video, also read the predecessor wrote, take its essence, go to its dross crawl a bit:
Import Urllib
From Urllib Import Request
From JSON import loads
Import re
Import OS
Import pymssql
# SQL Server database
class DB:
def __init__ (self,host,user,password,database,charset):
self.conn = Pymssql.connect (Host=host, User=user, Password=password, Database=database, Charset=charset)
self.cur = self.conn.cursor ()
# Operation
def operatedb (self,sql):
if not self.cur:
print (' Connection failed ')
Else:
self.cur.execute (SQL)
Self.conn.commit ()
# Close
def close (self):
self.cur.close ()
self.conn.close ()
# Initiating a request
def getrequestbody (URL):
return Urllib.request.urlopen (URL). read (). Decode (' GBK ')
# Remove the characters not in the string
def setstrlegal (Body,son):
Start_index = Body.find (son)
End_index = Body.find (son) + len (son)
return Body[:start_index] + Body[end_index:]
# get all the model information on page, packaged in a dictionary
def getshowinfolist (page):
# URL of page request
url = ' https://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8¤tpage={} '. Format (page)
BODY = getrequestbody (URL)
Dicts = loads (body)
Print (dicts[' data ' [' searchdolist '])
Return dicts
# Get information about a model
def getInfo (userId):
# The URL of the model card for each model
url = ' https://mm.taobao.com/self/info/model_info_show.htm?user_id={} '. Format (userId)
BODY = getrequestbody (URL). Strip ()
reg = R ' <span> (. *?) </span> '
list_re = Re.findall (Reg, body)
list_re[1] = Setstrlegal (list_re[1], ' '). Strip ()
# handling Error Messages
If Len (list_re)-1 >= 8:
List_re.pop ()
If Len (list_re)-1 < 7:
list_re.append (")
if list_re[4] = = ":
List_re[4] = List_re[4].replace ("', ' unknown '). Strip ()
if list_re[6] = = ":
List_re[6] = List_re[6].replace ("', ' unknown '). Strip ()
if list_re[7] = = ":
List_re[7] = List_re[7].replace ("', ' unknown '). Strip ()
list_re[5] = list_re[5].split (' ')
REG1 = R ' <p> (. *?) </p> '
list_re1 = Re.findall (REG1, body)
list_re1[0] = list_re1[0][:list_re1[0].find (' CM ')]
list_re1[1] = list_re1[1][:list_re1[1].find (' KG ')]
Try:
list_re1[4] = Int (list_re1[4][:list_re1[4].find (' Code ')])
except ValueError:
List_re1[4] = 0
return list_re + list_re1
# All the albums of a man
def getalbumlist (userId):
# request URL when clicking on album, Request return all albums
url = ' https://mm.taobao.com/self/album/open_album_list.htm?_charset=utf-8&user_id%20={} '. Format (userId)
# Get positive side expressions for each album
BODY = getrequestbody (URL)
Reg = R ' <a href= "(. *?)" target= "_blank" > "
Return Re.findall (Reg,body)
def getphotolist (userid,albumid): #url
# Access albums per photo
# url = ' https:%s '%url
# Print (URL)
url = ' Https://mm.taobao.com/album/json/get_album_photo_list.htm?user_id={}&album_id={}&top_pic_id=0 ' \
' &cover=%2f%2fimg.alicdn.com%2fimgextra%2fi1%2f176817195%2ftb1jfcmkfxxxxblxfxxxxxxxxxx_!! 0-tstar.jpg ' \
' &page=1&_ksts=1516189198030_166&callback=jsonp167 '. Format (userid,albumid)
# Get the content of each album, specific to the individual
BODY = getrequestbody (URL) # JSONP data
data = GetData (Body, ' jsonp167 (', ') ')
dicts = loads (data)
return dicts[' piclist ']
# Take a string between Strat_son to End_son
def getData (Body,strat_son,end_son):
# Strat_son is not inside the body, return to its position, otherwise-1
Temp_index = Body.find (Strat_son)
Start_index = Temp_index + len (strat_son)
End_index = Body.find (End_son, Start_index)
return Body[start_index:end_index]
def savephoto (photourl,path,picid):
file_path = ' f:/mm/' + path
if not os.path.exists (file_path):
os.makedirs (File_path)
Auto_down (Photourl, File_path + '/' + picid + '. jpg ')
# Recursive call to prevent errors due to incomplete file download
def auto_down (url,file_path):
Try:
Urllib.request.urlretrieve (Url,file_path)
except Urllib.request.ContentTooShortError:
print (' Network conditions is not good. Reloading. ')
Auto_down (Url,file_path)
# Implementation
page = 1
While True:
showinfo_dicts = getshowinfolist (page)
if showinfo_dicts[' status '] = = 1:
For key in showinfo_dicts[' data ' [' Searchdolist ']:# geturllist ():
userid = key[' userid ']
showinfo_list = GetInfo (userId)
print (showinfo_list)
print (' Saving information for {} '. Format (key[' realname ') )
db = db (' localhost ', ' sa ', ' 123456c ', ' Models ', ' UTF8 ')
Try:
sql = "' BEGIN Tran Insert_data
INSERT into user_basic values (' {0} ', ' {1} ', ' {2} ', ' {3} ', ' {4} ');
INSERT into user_extend values (' {5} ', ' {6} ', ' {7} ', ' {8} ', ' {9} ', ' {ten} ');
INSERT into User_standard values (' {one} ', {12},{13}, ' {+} ', ' {+} ', {+});
commit Tran Insert_data ". Format (userid,showinfo_list[0],showinfo_list[2],showinfo_list[1],showinfo_l IST[4],
Userid,showinfo_list[3],showinfo_list[5][0],showinfo_list[5][1],sh OWINFO_LIST[6],
Showinfo_list[7],
userid,float (showinfo_list[8]), float (showinfo_list[9]), Showinfo_li ST[10],
Showinfo_list[11],int (showinfo_list[12]))
except ValueError:
Continue
Try:
db.operatedb (SQL)
except Pymssql. Operationalerror:
Continue
Print (' Save Information Complete ')
# get each album URL, sealed in list
urllist = getalbumlist (userId)
print (urllist)
# Go to duplicate value, the URL of the album
For Album_url in Urllist[::7]:
albumID = GetData (Album_url, ' album_id= ', ' & ')
photolist = getphotolist (UserId, albumID) # Gets the JSONP data for the encapsulated photo
print (photolist)
For Photo_url in photolist:
photourl = ' '
if photo_url[' Picurl '].find ('. jpg_290x10000 ')! =-1:
photourl = ' https: ' + photo_url[' picurl '].strip ('. jpg_290x10000 ') + '. jpg ' # delete specified characters
Else:
photourl = ' https: ' + photo_url[' picurl ']
path = photo_url[' userId '] + '/' + photo_url[' albumid ']
Savephoto (Photourl, Path, photo_url[' Picid ')
Else:
print (' The page already has no data on it, it's time to change! ')
Break
page + = 1
You can only climb 16 sheets per album.
The data will be inserted into the data, the following code, for the resulting data, do a simple processing:
Import pymssql
From Pychartdir Import *
def avgmap (data):
# The labels for the bar chart
labels = ["Avg_height", "Avg_weight", "Avg_cup", "avg_shoesize"]
# The colors for the bars
colors = [0xcc0000, 0x66aaee, 0xeebb22, 0XCCCCCC, 0XCC88FF]
# Create A Piechart object of size x 380 pixels.
C = XYChart (380)
# Use the white in black palette, which means the default text and line colors is white
c.setcolors (Whiteonblackpalette)
# Use a vertical gradient color from blue (0000cc) to Deep Blue (000044) as background. Use rounded
# corners of pixels radius for the top-left and Bottom-right corners.
C.setbackground (c.lineargradientcolor (0, 0, 0, c.getheight (), 0x0000cc, 0x000044))
c.setroundedframe (0xffffff, 0, 0)
# Add a title using 18pt times New Roman Bold Italic font. ADD 6 pixels top and bottom margins to
# the title.
title = C.addtitle ("Model average map", "Timesbi.ttf",)
title.setmargin2 (0, 0, 6, 6)
# ADD A separator line in white color just under the title
C.addline (Title.getheight (), C.getwidth ()-Title.getheight (), 0XFFFFFF)
# tentatively set the PlotArea at (+) and of 480 x pixels in size. Use transparent Border
# and white grid lines
C.setplotarea (480, 1, 1, Transparent, 0XFFFFFF)
# Swap The axis So, the bars is drawn horizontally
C.swapxy ()
# Add A multi-color bar chart layer using the supplied data. Use bar gradient lighting with the
# Light intensity from 0.75 to 2.0
c.addbarlayer3 (data, colors). setBorderColor (Transparent, barlighting (0.75, 2.0))
# Set The labels on the x axis.
C.xaxis (). Setlabels (labels)
# Show The same scale in the left and right y-axes
C.syncyaxis ()
# Set The bottom y-axis title using 10pt Arial Bold font
# C.yaxis (). Settitle ("USD (millions)", "Arialbd.ttf", ten)
# Set y-axes to Transparent
C.yaxis (). Setcolors (Transparent)
C.yaxis2 (). Setcolors (Transparent)
# Disable ticks on the x-axis by setting the tick color to transparent
C.xaxis (). Settickcolor (Transparent)
# Set The label styles of all axes to 8pt Arial Bold font
C.xaxis (). Setlabelstyle ("Arialbd.ttf", 8)
C.yaxis (). Setlabelstyle ("Arialbd.ttf", 8)
C.yaxis2 (). Setlabelstyle ("Arialbd.ttf", 8)
# Adjust The plot area size, such this bounding box (inclusive of axes) is a pixels from the
# left edge, pixels below the title, "Pixels from the right edge, and" Pixels from the bottom
# Edge.
C.packplotarea (Title.getheight () +, C.getwidth ()-C.getheight ()
# Output the chart
C.makechart ("Avg.png")
conn = pymssql.connect (host= ' localhost ', user= ' sa ', password= ' 123456c ', database = ' Models ', charset= "UTF8")
cur = Conn.cursor ()
cur.execute (" "" Select AVG (height), avg (weight), avg (CAST (cup,2) as int), AVG (shoesize) from user _standard
where Cup not in (' 0A ', ' B ', ' M ', ' 3X ', ' Please select ', ' Do not pick ', ' C ', ' * * ', ' qwe '); "" ")
row = Cur.fetchone ()
data_ AVG = []
while row:
Data_avg = List (row)
print (DATA_AVG)
row = Cur.fetchone ()
avgmap (data_avg)
# Close
Cur.close ()
Conn.close ()
Result diagram:
Python3 Climb the Amoy Girl