Import Urllib.request
Import re
Import OS
url = "http://www.budejie.com/" # crawled Address
def get_page (URL):
page = Urllib.request.urlopen (URL). Read () # get all the content to that address
# page=page.decode (' GBK ') #转码
page = Page.decode (' UTF8 ')
Return page
#print (page)
def get_content (HTML):
ZZ = R ' <div class= "j-r-list-c" >.+?</div>.+?</div> '
Rge = Re.findall (zz,html,re. S
# reg = Re.compile (R ' <div class= "j-r-list-c" >.+?</div>.+?</div> ")
# return Re.findall (reg,html)
Return "". Join (RGE)
# Print (get_page (URL))
# Print (get_content (get_page (URL)))
ZZ = R ' data-original= ' (. +?) "title=". +? "alt=" (. +?) " /> '
html = Re.findall (zz,get_content (get_page (URL)))
Print (HTML)
def mkdir (path):
folder = os.path.exists (path)
If not folder: # Determine if there are folders that are created as folders if they do not exist
Os.makedirs (Path) # Makedirs creates this path if the path does not exist when the file is created
Print ("Create new Folder")
Print ("created successfully")
Else
Print ("This folder already exists")
Img_path = ' d:/photo/'
mkdir (Img_path)
Img_path = "d:/photo/"
mkdir (Img_path)
i = 0
For line in HTML:
line = Html[i]
# to determine if it is a GIF picture
# if STR (line[0]). EndsWith (". gif"):
P1 = line[0]
P2 = line[1]
Print (P2 + "" + p1)
url = P1
# download GIF images into the d:/photo/folder
web = Urllib.request.urlopen (URL)
data = Web.read ()
f = open (Img_path + p2 + ". gif", "WB")
F.write (data)
F.close ()
i = i + 1
Python's first reptile script