# Word frequency in a text # Tested with Python24 vegaseat 25aug2005 # Chinese wisdom... Str1 = "" Man who run in front of car, get tired. Man who run behind car, get exhausted .""" Print "Original string :" Print str1 Print # Create a list of words separated at whitespaces WordList1 = str1.split (None) # Strip any punctuation marks and build modified word list # Start with an empty list WordList2 = [] For word1 in wordList1: # Last character of each word Lastchar = word1 [-1:] # Use a list of punctuation marks If lastchar in [",",".","! ","? ","; "]: Word2 = word1.rstrip (lastchar) Else: Word2 = word1 # Build a wordList of lower case modified words WordList2.append (word2.lower ()) Print "Word list created from modified string :" Print wordList2 Print # Create a wordfrequency dictionary # Start with an empty dictionary FreqD2 = {} For word2 in wordList2: FreqD2 [word2] = freqD2.get (word2, 0) + 1 # Create a list of keys and sort the list # All words are lower case already KeyList = freqD2.keys () KeyList. sort () Print "Frequency of each word in the word list (sorted ):" For key2 in keyList: Print "%-10 s % d" % (key2, freqD2 [key2]) |