During the bug quality analysis, the word segmentation should be sorted and de-duplicated, so we thought of sort and uniq.
It is very convenient to call shell through python.
#-*-Coding: UTF-8 -*-
Import OS
Import sys
Def cws_statistics (flag, afile, bfile ):
If not OS. Path. isfile (afile ):
Print "wordlist dose't exist! "
Else:
If flag = "0 ":
OS. system ("sort-F" + afile + "| uniq-c | sort-K 1-n-r | awk 'function trim (STR) {sub (/^ [] */, \ "\", STR); Return STR} {print trim ($0)} '> "+ bfile)
Elif flag = "1 ":
OS. system ("sort" + afile + "| uniq-c | sort-K 1-n-r | awk 'function trim (STR) {sub (/^ [] */, \ "\", STR); Return STR} {print trim ($0)} '| awk-f'' {If (length ($2 )! = 1) Print $0} '> "+ bfile)
Pass
Print "cws_statistics is OK! % S => % s "% (afile, bfile)
If _ name _ = "_ main __":
If Len (SYS. argv) = 4:
# Print sys. argv
Cws_statistics (SYS. argv [1], SYS. argv [2], SYS. argv [3])
Else:
Print "Usage: Python cws_statistics.py [flag] [wordlist] [resultfile]"