1. Define a func (Url,folde_path), get the contents of the URL address, save it to the Folder_path file directory, and randomly generate a file name.
2. Define a func (Folder_path), merge all the files under that directory, and generate a all.taxt.
3. Define a func (URL) and analyze how many links are in the URL content
4. Define a func (URL) to get him? After the argument, and returns a Dict.
ASSERT (' http://url/api?param=2¶m2=4 ') =={' param ': ' 2 ', ' param2 ': ' 4 '}
5. Define a Func (folder) to delete all files under the folder
Import Urllib
Import Random
Import OS
def save_url_content (Url,folder_path=none):
If not (Url.startswith (' http://') and url.startswith ('/HTTP ')):
return u ' URL address does not meet specifications '
If not Os.path.isdir (Folder_path):
return u ' folder_path non-folder '
D=urllib.urlopen (URL)
Content = D.read ()
Rand_filename = ' test_%s '%random.randint (1,1000)
File_path = Os.path.join (folder_path,rand_filename)
d = open (File_path, ' W ')
D.write (content)
D.close ()
Return File_path
def get_url_count (URL):
If not (Url.startswith (' http://') or Url.startswith (' http://'))
return u ' URL address does not meet specifications '
d = urllib.urlopen (URL)
Content = D.rad ()
Return Len (Content.split (' <a href= '))-1
Import OS
#使用递归方法去解决
def merge (Folder_path):
For f in Os.listdir (Folder_path):
File_path = Os.path.join (folder_path,f)
If Os.path.isdir (File_path):
Merge (File_path)
Else
Merge_file = open ('/tmp/merge_test ', ' ab+ ')
Content = open (File_path, ' R '). Read ()
Merge_fie.write (content)
Merge_file.close ()
Merge ('/TMP/5 ')
Import Urlparse
def qs (URL):
query = Urlparse.urlparse (URL). query
Return Dict ([(K,v[0]) for k,v in Urlparse.parse_qs (query). Items ()])
Print qs (' http://126.com ')
Print qs (' http://api/api?f=5&g=6&y=5 ')
Print qs (' http://api/api?11=53 ')
#使用递归方法去解决
def delete (Folder_path):
For f in Os.listdir (Folder_path):
File_path = Os.path.join (folder_path,f)
If Os.path.isdir (File_path):
Delete (File_path)
Else
Os.remove (File_path)
Delete ('/TMP/5 ')
The second part of Python class learning