Use python to implement domain name analysis. The data source code is as follows:
The code is as follows:
Import sys, urllib
Import datetime, time
Def getDate ():
Strday = datetime. datetime. now (). _ str __()
Strday = strday. split () [0]
Return strday
# Url = "http://www.kingnic.com/list/2009-06-16.txt"
Def getUrl (dateStr = None ):
BaseUrl = "http://www.kingnic.com/list"
If dateStr:
Return baseUrl + dateStr + ". txt"
ThisDate = getDate ();
If not thisDate:
Print "Error Date! "
Return None;
Url = baseUrl + thisDate + ". txt"
Return url
Def getSource (url ):
Source = urllib. urlopen (url). read ()
Return source
Def save (source, filename = "domains.txt "):
Fp = open (filename, "w ")
Fp. write (source)
Fp. close ()
Return True;
Def loadList (fileName = "domains.txt "):
Fp = open ("domains.txt", "r ")
Source = fp. readlines ()
Fp. close ()
Return source;
Def getPrefix (domain ):
Return domain. split ('.') [0]
Def getPostfix (domain ):
Return domain. split ('.') [1]
Def hasMidLine (domain ):
If '-' in domain:
Return True
Else:
Return False
Def parser (domains ):
Max = 4
Min = 0
Keyword = ('sky', 'access', 'job ')
Result = []
Len_num = 0;
Mid_line_num = 0;
For domain in domains:
Prefix = getPrefix (domain)
Postfix = getPostfix (domain)
Domainlen = len (prefix)
If (domainlen <min) or (domainlen> max ):
Len_num + = 1
Continue
If hasMidLine (prefix ):
Mid_line_num + = 1
Continue
Result. append (domain)
Print "log: \ n"
Print "all: \ t", len (domains)
Print "len not in [% s, % s] \ t: % s" % (max, min, len_num)
Print "contain '-': \ t", mid_line_num
Print "remain: \ t", len (result)
Return result;
If _ name _ = "_ main __":
Url = getUrl ()
Source = getSource (url)
Save (source)
Domains = loadList ()
Result = parser (domains)
Save ("". join (result), "result.txt ")
Print ("\ n \ nfinished !! ")
Output file:
Domains.txt: Domain name released on the day of kingnic.com;
Result.txt: the domain name that meets the filtering conditions;
Log output:
The code is as follows:
All: 55500
Len not in [55019 ]:
Contain '-': 32
Remain: 449
Finished !!
Filter the suffix, length, and hyphen (-). The filtering conditions are a little small. if you want to filter the suffix, length, and hyphen (-), add them later.