#! /Usr/bin/ENV Python
Import math
From operator import itemgetter
Def freq (word, document ):
Open (r 'd: \ fenci.txt ')
Return document. Split (none). Count (word)
Def wordcount (document ):
Open (r 'd: \ fenci.txt ')
Return Len (document. Split (none ))
Def numdocscontaining (word, documentlist ):
Count = 0
For document in documentlist:
If freq (word, document)> 0:
Count + = 1
Return count
Def TF (word, document ):
Open (r 'd: \ fenci.txt ')
Return (freq (word, document)/float (wordcount (document )))
Def IDF (word, documentlist ):
Open (r 'd: \ fenci.txt ')
Return math. Log (LEN (documentlist)/numdocscontaining (word, documentlist ))
Def TFIDF (word, document, documentlist ):
Open (r 'd: \ fenci.txt ')
Return (Tf (word, document) * IDF (word, documentlist ))
If _ name _ = '_ main __':
Documentlist = []
Documentlist. append ('d: \ syntax .txt ')
Documentlist. append ('d: \ separator .txt ')
Words = {}
Documentnumber = 0
For word in documentlist [documentnumber]. Split (none ):
Words [word] = TFIDF (word, documentlist [documentnumber], documentlist)
For item in sorted (words. Items (), Key = itemgetter (1), reverse = true ):
Print ("% F <= % s" % (item [1], item [0])
Why cannot we extract keywords? Please help