The code is as follows:
Copy CodeThe code is as follows:
Import SYS, urllib
Import Datetime,time
Def getDate ():
Strday=datetime.datetime.now (). __str__ ()
Strday=strday.split () [0]
Return Strday
#url = "Http://www.kingnic.com/list/2009-06-16.txt"
def getUrl (Datestr=none):
BASEURL = "http://www.kingnic.com/list/"
If Datestr:
Return baseurl+datestr+ ". txt"
Thisdate = GetDate ();
If not thisdate:
Print "Error date!"
return None;
url = baseurl+thisdate+ ". txt"
Return URL
def getsource (URL):
Source = Urllib.urlopen (URL). Read ()
return source
def save (source,filename= "Domains.txt"):
fp = open (filename, "w")
Fp.write (source)
Fp.close ()
return True;
def loadlist (filename= "Domains.txt"):
fp = open ("Domains.txt", "R")
Source = Fp.readlines ()
Fp.close ()
return source;
def getprefix (domain):
Return Domain.split ('. ') [0]
def getpostfix (domain):
Return Domain.split ('. ') [1]
def hasmidline (domain):
If '-' in domain:
Return True
Else
Return False
def parser (domains):
Max =4
Min =0
Keyword = (' sky ', ' see ', ' job ')
Result=[]
Len_num = 0;
Mid_line_num = 0;
For domain in domains:
prefix = getprefix (domain)
Postfix = getpostfix (domain)
Domainlen = len (prefix)
if (Domainlen < min) or (Domainlen > Max):
Len_num +=1
Continue
If Hasmidline (prefix):
Mid_line_num +=1
Continue
Result.append (Domain)
print "log: \ n"
print "All: \ T", Len (Domains)
Print "Len not in [%s,%s] \ t:%s"% (Max,min,len_num)
Print "Contain '-': \ T", mid_line_num
Print "Remain:\t", Len (Result)
return result;
if __name__ = = "__main__":
url = getUrl ()
Source = GetSource (URL)
Save (source)
Domains =loadlist ()
result = Parser (domains)
Save ("". Join (Result), "Result.txt")
Print ("\n\n\nfinished!!")
Output file:
domains.txt:kingnic.com the domain name that was released on that day;
Result.txt: The domain name that conforms to the filter condition;
Log output:
Copy CodeThe code is as follows:
all:55500
Len not in [4,0]: 55019
Contain '-': 32
remain:449
finished!!
For suffix, length and there is no "-" filter, the filter condition is a little, and other later if necessary to add.