"' Import osimport Jieba # Word packet import NumPy # NumPy Compute Package Import Codecs # codecs provides the open method to specify the language encoding of the opened file, It is automatically converted to the internal Unicodeimport Pandas # Statistics Toolkit on read, import matplotlib.pyplot as Pltfrom wordcloud import Wordcloud, Imagecolorgenerator # Word Cloud Pack from Scipy.misc import imreadfrom time import Sleepdef join_txt (): #----Merge TXT file # Get the path to the destination folder me Ragefiledir = OS.GETCWD () + ' \\corpus ' # gets the list of file names in the current folder filenames = Os.listdir (meragefiledir) # Open the Result.txt file under the current directory, if not There is the creation of file = open (' All_result.txt ', ' W ') # to write the word to the files Mr. Foo traverse the filename for filename in filenames:filepath = meragefiledir + ' \ \ ' Filep Ath = filepath + filename # traverse a single file, read the number of rows for line in open (filepath,encoding= ' utf-8 '): File.writelines (lines) file.write (' \ n ') ) File.close () def make_pic (): # import text, word breaker file = Codecs.open (U ' all_result.txt ', ' r ') content = File.read () file.close () se Gment = [] Segs = jieba.cut (content) # using Jieba participle for seg in segs:if len (SEG) > 1 and seg! = ' \ r \ n ': Segment.append (seg # Go to stop Word (text denoising) WORDS_DF = Pandas. DataFrame ({' segment ': segment}) WOrds_df.head () Stopwords = Pandas.read_csv ("Stopword.txt", Index_col=false, quoting=3, sep= ' \ t ', names=[' Stopword '], encoding= "UTF8") WORDS_DF = Words_df[~words_df.segment.isin (Stopwords.stopword)] # print (Words_df.head (6)) # Vocabulary Frequency table Words_stat = Words_df.groupby (by=[' segment ']) [' segment '].agg ({"Count": numpy.size}) Words_stat = words_stat.reset_ Index (). Sort_values (by= "Count", Ascending=false) # custom word cloud Background bimg = Imread (' mangguo.png ') Wordcloud = Wordcloud ( Background_color= "White", mask=bimg, font_path= ' Msyh.ttf ') Wordcloud = Wordcloud.fit_words (Dict (Words_stat.head ( 990000). Itertuples (Index=false)) # Generate color values from the background image bimgcolors = Imagecolorgenerator (bimg) plt.axis ("Off") Plt.imshow ( Wordcloud.recolor (color_func=bimgcolors)) # plt.show () wordcloud.to_file ("Ciyun.png") if __name__ = = ' __main__ ': join_ TXT () sleep (2) print (' TXT file integration complete! ----') Make_pic () print (' Word cloud image generation completed-----ciyun.png ') '
Win on Wordcloud package need to install themselves, you can go to https://www.lfd.uci.edu/~gohlke/pythonlibs/
Download the corresponding WHL version.
need to note:
Wordcloud = Wordcloud.fit_words (Dict (Words_stat.head (990000). Itertuples (Index=false)))
This is a dict type.
Python text processing: participle and word cloud