Python3 Climb the Amoy Girl

Source: Internet
Author: User
Tags italic font

Just learned the Python combination data type Here, the chance coincidence got a public lesson video, also read the predecessor wrote, take its essence, go to its dross crawl a bit:

Import Urllib
From Urllib Import Request
From JSON import loads
Import re
Import OS
Import pymssql
# SQL Server database
class DB:

def __init__ (self,host,user,password,database,charset):
self.conn = Pymssql.connect (Host=host, User=user, Password=password, Database=database, Charset=charset)
self.cur = self.conn.cursor ()

# Operation
def operatedb (self,sql):
if not self.cur:
print (' Connection failed ')
Else:
self.cur.execute (SQL)
Self.conn.commit ()

# Close
def close (self):
self.cur.close ()
self.conn.close ()
# Initiating a request
def getrequestbody (URL):
return Urllib.request.urlopen (URL). read (). Decode (' GBK ')
# Remove the characters not in the string
def setstrlegal (Body,son):
Start_index = Body.find (son)
End_index = Body.find (son) + len (son)
return Body[:start_index] + Body[end_index:]
# get all the model information on page, packaged in a dictionary
def getshowinfolist (page):
# URL of page request
url = ' https://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8&currentpage={} '. Format (page)
BODY = getrequestbody (URL)
Dicts = loads (body)
Print (dicts[' data ' [' searchdolist '])
Return dicts
# Get information about a model
def getInfo (userId):
# The URL of the model card for each model
url = ' https://mm.taobao.com/self/info/model_info_show.htm?user_id={} '. Format (userId)
BODY = getrequestbody (URL). Strip ()
reg = R ' <span> (. *?) </span> '
list_re = Re.findall (Reg, body)
list_re[1] = Setstrlegal (list_re[1], ' &nbsp; '). Strip ()
# handling Error Messages
If Len (list_re)-1 >= 8:
List_re.pop ()
If Len (list_re)-1 < 7:
list_re.append (")
if list_re[4] = = ":
List_re[4] = List_re[4].replace ("', ' unknown '). Strip ()
if list_re[6] = = ":
List_re[6] = List_re[6].replace ("', ' unknown '). Strip ()
if list_re[7] = = ":
List_re[7] = List_re[7].replace ("', ' unknown '). Strip ()
list_re[5] = list_re[5].split (' &nbsp;&nbsp;&nbsp;&nbsp; ')

REG1 = R ' <p> (. *?) </p> '
list_re1 = Re.findall (REG1, body)
list_re1[0] = list_re1[0][:list_re1[0].find (' CM ')]
list_re1[1] = list_re1[1][:list_re1[1].find (' KG ')]
Try:
list_re1[4] = Int (list_re1[4][:list_re1[4].find (' Code ')])
except ValueError:
List_re1[4] = 0
return list_re + list_re1
# All the albums of a man
def getalbumlist (userId):
# request URL when clicking on album, Request return all albums
url = ' https://mm.taobao.com/self/album/open_album_list.htm?_charset=utf-8&user_id%20={} '. Format (userId)
# Get positive side expressions for each album
BODY = getrequestbody (URL)
Reg = R ' <a href= "(. *?)" target= "_blank" > "
Return Re.findall (Reg,body)

def getphotolist (userid,albumid): #url
# Access albums per photo
# url = ' https:%s '%url
# Print (URL)
url = ' Https://mm.taobao.com/album/json/get_album_photo_list.htm?user_id={}&album_id={}&top_pic_id=0 ' \
' &cover=%2f%2fimg.alicdn.com%2fimgextra%2fi1%2f176817195%2ftb1jfcmkfxxxxblxfxxxxxxxxxx_!! 0-tstar.jpg ' \
' &page=1&_ksts=1516189198030_166&callback=jsonp167 '. Format (userid,albumid)
# Get the content of each album, specific to the individual
BODY = getrequestbody (URL) # JSONP data
data = GetData (Body, ' jsonp167 (', ') ')
dicts = loads (data)
return dicts[' piclist ']
# Take a string between Strat_son to End_son
def getData (Body,strat_son,end_son):
# Strat_son is not inside the body, return to its position, otherwise-1
Temp_index = Body.find (Strat_son)
Start_index = Temp_index + len (strat_son)
End_index = Body.find (End_son, Start_index)
return Body[start_index:end_index]

def savephoto (photourl,path,picid):
file_path = ' f:/mm/' + path
if not os.path.exists (file_path):
os.makedirs (File_path)
Auto_down (Photourl, File_path + '/' + picid + '. jpg ')

# Recursive call to prevent errors due to incomplete file download
def auto_down (url,file_path):
Try:
Urllib.request.urlretrieve (Url,file_path)
except Urllib.request.ContentTooShortError:
print (' Network conditions is not good. Reloading. ')
Auto_down (Url,file_path)
# Implementation
page = 1
While True:
showinfo_dicts = getshowinfolist (page)
if showinfo_dicts[' status '] = = 1:
For key in showinfo_dicts[' data ' [' Searchdolist ']:# geturllist ():
userid = key[' userid ']
showinfo_list = GetInfo (userId)
print (showinfo_list)
print (' Saving information for {} '. Format (key[' realname ') )
db = db (' localhost ', ' sa ', ' 123456c ', ' Models ', ' UTF8 ')
Try:
sql = "' BEGIN Tran Insert_data
INSERT into user_basic values (' {0} ', ' {1} ', ' {2} ', ' {3} ', ' {4} ');
INSERT into user_extend values (' {5} ', ' {6} ', ' {7} ', ' {8} ', ' {9} ', ' {ten} ');
INSERT into User_standard values (' {one} ', {12},{13}, ' {+} ', ' {+} ', {+});
commit Tran Insert_data ". Format (userid,showinfo_list[0],showinfo_list[2],showinfo_list[1],showinfo_l IST[4],
Userid,showinfo_list[3],showinfo_list[5][0],showinfo_list[5][1],sh OWINFO_LIST[6],
Showinfo_list[7],
userid,float (showinfo_list[8]), float (showinfo_list[9]), Showinfo_li ST[10],
Showinfo_list[11],int (showinfo_list[12]))
except ValueError:
Continue
Try:
db.operatedb (SQL)
except Pymssql. Operationalerror:
Continue
Print (' Save Information Complete ')
# get each album URL, sealed in list
urllist = getalbumlist (userId)
print (urllist)
# Go to duplicate value, the URL of the album
For Album_url in Urllist[::7]:
albumID = GetData (Album_url, ' album_id= ', ' & ')
photolist = getphotolist (UserId, albumID) # Gets the JSONP data for the encapsulated photo
print (photolist)
For Photo_url in photolist:
photourl = ' '
if photo_url[' Picurl '].find ('. jpg_290x10000 ')! =-1:
photourl = ' https: ' + photo_url[' picurl '].strip ('. jpg_290x10000 ') + '. jpg ' # delete specified characters
Else:
photourl = ' https: ' + photo_url[' picurl ']
path = photo_url[' userId '] + '/' + photo_url[' albumid ']
Savephoto (Photourl, Path, photo_url[' Picid ')
Else:
print (' The page already has no data on it, it's time to change! ')
Break
page + = 1

You can only climb 16 sheets per album.
The data will be inserted into the data, the following code, for the resulting data, do a simple processing:
Import pymssql
From Pychartdir Import *
def avgmap (data):
# The labels for the bar chart
labels = ["Avg_height", "Avg_weight", "Avg_cup", "avg_shoesize"]

# The colors for the bars
colors = [0xcc0000, 0x66aaee, 0xeebb22, 0XCCCCCC, 0XCC88FF]

# Create A Piechart object of size x 380 pixels.
C = XYChart (380)

# Use the white in black palette, which means the default text and line colors is white
c.setcolors (Whiteonblackpalette)

# Use a vertical gradient color from blue (0000cc) to Deep Blue (000044) as background. Use rounded
# corners of pixels radius for the top-left and Bottom-right corners.
C.setbackground (c.lineargradientcolor (0, 0, 0, c.getheight (), 0x0000cc, 0x000044))
c.setroundedframe (0xffffff, 0, 0)

# Add a title using 18pt times New Roman Bold Italic font. ADD 6 pixels top and bottom margins to
# the title.
title = C.addtitle ("Model average map", "Timesbi.ttf",)
title.setmargin2 (0, 0, 6, 6)

# ADD A separator line in white color just under the title
C.addline (Title.getheight (), C.getwidth ()-Title.getheight (), 0XFFFFFF)

# tentatively set the PlotArea at (+) and of 480 x pixels in size. Use transparent Border
# and white grid lines
C.setplotarea (480, 1, 1, Transparent, 0XFFFFFF)

# Swap The axis So, the bars is drawn horizontally
C.swapxy ()

# Add A multi-color bar chart layer using the supplied data. Use bar gradient lighting with the
# Light intensity from 0.75 to 2.0
c.addbarlayer3 (data, colors). setBorderColor (Transparent, barlighting (0.75, 2.0))

# Set The labels on the x axis.
C.xaxis (). Setlabels (labels)

# Show The same scale in the left and right y-axes
C.syncyaxis ()

# Set The bottom y-axis title using 10pt Arial Bold font
# C.yaxis (). Settitle ("USD (millions)", "Arialbd.ttf", ten)

# Set y-axes to Transparent
C.yaxis (). Setcolors (Transparent)
C.yaxis2 (). Setcolors (Transparent)

# Disable ticks on the x-axis by setting the tick color to transparent
C.xaxis (). Settickcolor (Transparent)

# Set The label styles of all axes to 8pt Arial Bold font
C.xaxis (). Setlabelstyle ("Arialbd.ttf", 8)
C.yaxis (). Setlabelstyle ("Arialbd.ttf", 8)
C.yaxis2 (). Setlabelstyle ("Arialbd.ttf", 8)

# Adjust The plot area size, such this bounding box (inclusive of axes) is a pixels from the
# left edge, pixels below the title, "Pixels from the right edge, and" Pixels from the bottom
# Edge.
C.packplotarea (Title.getheight () +, C.getwidth ()-C.getheight ()

# Output the chart
C.makechart ("Avg.png")
 conn = pymssql.connect (host= ' localhost ', user= ' sa ', password= ' 123456c ', database = ' Models ', charset= "UTF8")  
cur = Conn.cursor ()

cur.execute (" "" Select AVG (height), avg (weight), avg (CAST (cup,2) as int), AVG (shoesize) from user _standard
where Cup not in (' 0A ', ' B ', ' M ', ' 3X ', ' Please select ', ' Do not pick ', ' C ', ' * * ', ' qwe '); "" ")
row = Cur.fetchone ()
data_ AVG = []
while row:
Data_avg = List (row)
print (DATA_AVG)
row = Cur.fetchone ()
avgmap (data_avg)
# Close
Cur.close ()
Conn.close ()

Result diagram:



Python3 Climb the Amoy Girl

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.