Code As follows:
Copy code The Code is as follows: Import sys, urllib
Import datetime, time
Def getdate ():
Strday = datetime. datetime. Now (). _ STR __()
Strday = strday. Split () [0]
Return strday
# Url = "http://www.kingnic.com/list/2009-06-16.txt"
Def geturl (datestr = none ):
Baseurl = "http://www.kingnic.com/list"
If datestr:
Return baseurl + datestr + ". txt"
Thisdate = getdate ();
If not thisdate:
Print "error date! "
Return none;
Url = baseurl + thisdate + ". txt"
Return URL
Def getsource (URL ):
Source = urllib. urlopen (URL). Read ()
Return Source
Def save (source, filename = "domains.txt "):
Fp = open (filename, "W ")
FP. Write (source)
FP. Close ()
Return true;
Def loadlist (filename = "domains.txt "):
Fp = open ("domains.txt", "R ")
Source = FP. readlines ()
FP. Close ()
Return source;
Def getprefix (domain ):
Return domain. Split ('.') [0]
Def getpostfix (domain ):
Return domain. Split ('.') [1]
Def hasmidline (domain ):
If '-' in domain:
Return true
Else:
Return false
Def Parser (domains ):
Max = 4
Min = 0
Keyword = ('sky', 'access', 'job ')
Result = []
Len_num = 0;
Mid_line_num = 0;
For domain in domains:
Prefix = getprefix (domain)
Postfix = getpostfix (domain)
Domainlen = Len (prefix)
If (domainlen <min) or (domainlen> MAX ):
Len_num + = 1
Continue
If hasmidline (prefix ):
Mid_line_num + = 1
Continue
Result. append (domain)
Print "log: \ n"
Print "ALL: \ t", Len (domains)
Print "Len not in [% s, % s] \ t: % s" % (max, Min, len_num)
Print "contain '-': \ t", mid_line_num
Print "remain: \ t", Len (result)
Return result;
If _ name _ = "_ main __":
Url = geturl ()
Source = getsource (URL)
Save (source)
Domains = loadlist ()
Result = Parser (domains)
Save ("". Join (result), "result.txt ")
Print ("\ n \ nfinished !! ")
Output file:
Domains.txt: domain name released on the day of kingnic.com;
Result.txt: The domain name that meets the filtering conditions;
Log output:Copy codeThe Code is as follows: All: 55500.
Len not in [55019 ]:
Contain '-': 32
Remain: 449
Finished !!
Filter the suffix, length, and hyphen (-). The filtering conditions are a little small. If you want to filter the suffix, length, and hyphen (-), add them later.