Xlsx reading and word cloud output, and xlsx reading

Source: Internet
Author: User

Xlsx reading and word cloud output, and xlsx reading

# Coding = UTF-8

Import sys
Reload (sys)
Sys. setdefaultencoding ('utf-8 ')

Import xlrd
Import jieba
Import codecs
Import csv
Import numpy as np
From wordcloud import WordCloud
Import jieba. posseg
Import logging

Def read_xlsx (filename ):
Workbook = xlrd. open_workbook (filename)
Booksheet = workbook. sheet_by_name ('sheet1 ')
P = list ()
Count = 0
For row in range (booksheet. nrows ):
Count + = 1
# If (count = 1000): break
Row_data = []
For col in range (booksheet. ncols ):
Cel = booksheet. cell (row, col)
Val = cel. value
Try:
Val = cel. value
Val = re. sub (R' \ s + ', '', val)
Except t:
Pass

If type (val) = float:
Val = int (val)
Else:
Val = str (val)
Row_data.append (val)
P. append (row_data)
Print 'the size of p is '+ str (len (p ))
Return p


Def seperate (p ):
Result = {}
Count = 0
For I in p:
Count + = 1
If (count % 100 = 0): print 'have seperate: # '+ str (count) +' # word'
For j in I:
Seg_list = jieba. posseg. cut (j)
Try:
For k in seg_list:
If k. flag. startswith ('n '):
V = 1
W = k. word
If result. has_key (w ):
V = result [w]
V + = 1
Result [w] = v
Except t Exception, e:
Print Exception, ":", e
Return result

Def is_chinese (s ):
Rt = False
If s> = u "\ u4e00" and s <= u "\ u9fa6 ":
Rt = True
Return rt

Def cutdict (p, top ):
Result = {}
Biggerone = {}
Vs = []
For (k, v) in p. items ():
If (len (k)> 1 and is_chinese (k )):
Vs. append (v)
Biggerone [k] = v
Vs. sort (reverse = True)
Top_v = np. percentile (vs, top)
For (k, v) in biggerone. items ():
If v> = top_v:
Result [k] = v
Return result

Def initfile (filename ):
Csvfile = open (filename, 'wb ')
Csvfile. write (codecs. BOM_UTF8)
Writer = csv. writer (csvfile)
Writer. writerow (['word', 'word frequency '])
Return csvfile, writer

Def ci2file (csvfile, writer, result ):
For (k, v) in result. items ():
If (len (k)> 1 ):
Row = []
Row. append (k)
Row. append (v)
Writer. writerow (row)
Csvfile. flush ()
Csvfile. close ()

P = read_xlsx('user_tweets_2.xlsx ')
Logging.info ('done read tweets ')
Result = seperate (p)
Csvfile, writer = initfile('user_tweets_2.csv ')
Ci2file (csvfile, writer, result)
Result = cutdict (result, 90)
Logging.info ('done Cut result ')
Print 'the size of final result is '+ str (len (result ))
# Generate a word cloud image the text method is used here. Use frequencies instead.
# Wordcloud = WordCloud (). generate (text

Import random

# Forming a unique gray-black tone
Def grey_color_func (word, font_size, position, orientation, random_state = None, ** kwargs ):
Return "hsl (0, 0% %, % d %)" % random. randint (60,100)

From PIL import Image
Import matplotlib. pyplot as plt
Mask = np.array(Image.open('timg2.png '))
Logging.info ('done Read image ')

Wordcloud = WordCloud (max_words = 1000, mask = mask,
Margin = 10, font_path = '/Library/Fonts/ 文. ttf ')


Wordcloud. fit_words (result)

Default_colors = wordcloud. to_array ()
Plt. title ("Custom colors ")
Plt. imshow (wordcloud. recolor (color_func = grey_color_func, random_state = 3 ))
Wordcloud. to_file ("a_new_hope.png ")
Plt. axis ("off ")
Plt. figure ()
Plt. title ("Default colors ")
Plt. imshow (default_colors)
Plt. axis ("off ")
Plt. show ()
# Display the generated image:
# The matplotlib way:

# Plt. imshow (wordcloud)
# Plt. axis ("off ")
# Plt. show ()

# From operator import itemgetter
# Item1 = itemgetter (1)
# Frequencies = sorted (result. items (), key = item1, reverse = True)

Print 'done'

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.