#!/usr/bin/python #-*-Coding:utf-8-*- # #author: Rex #blog: http://iregex.org #filename counter.py #created: Mon Sep 20 21:00:52 2010 #desc: Convert. py file to HTML with VIM. Import Sys Import re From operator Import Itemgetter def ReadFile (f): With file (F, "R") as PFile: Return Pfile.read () Def divide (c, regex): #the regex below is only valid for UTF8 coding return Regex.findall (c) def update_dict (Di,li): For I in Li: If Di.has_key (i): Di[i]+=1 Else Di[i]=1 return di def main (): #receive files from bash Files=sys.argv[1:] #regex compile only once Regex=re.compile (? x) (?: [w-]+ | [X80-xff] {3}) ") dict={} #get all words from files For f in Files: Words=divide (ReadFile (f), regex) Dict=update_dict (dict, words) #sort Dictionary by value #dict is now a list. Dict=sorted (Dict.items (), Key=itemgetter (1), reverse=true) #output to Standard-output For I in Dict: Print i[0], i[1] If __name__== ' __main__ ': Main () |