Python data analysis: two-color ball statistics method with a high proportion of a single red and blue ball, python Data Analysis
This article describes how to calculate the ratio of a single red ball to a blue ball by using the two-color ball in Python data analysis. We will share this with you for your reference. The details are as follows:
Count the combination of a single red ball and a blue ball. The first 19 groups of data are displayed.
#! /Usr/bin/python #-*-coding: UTF-8-*-import pandas as pdimport numpy as npimport matplotlib. pyplot as pltimport operatordf = pd.read_table('newdata.txt ', header = None, sep =', ') tdate = sorted (df. loc [:, 0]) # print tdateh1 = df. loc [:,]. values # Take the first red ball and blue ball # print h1h2 = df. loc [:,]. values # Take the second column of red ball and blue ball h3 = df. loc [:, :7:4]. valuesh4 = df. loc [:,]. valuesh5 = df. loc [:,]. valuesh6 = df. loc [:,]. values # tblue = df. loc [:, 7] # combine all the data split above into data = np. append (h1, h2, axis = 0) data = np. append (data, h3, axis = 0) data = np. append (data, h4, axis = 0) data = np. append (data, h5, axis = 0) data = np. append (data, h6, axis = 0) # print datadata1 = pd. dataFrame (data) # print data1#write into a file data1.to_csv('hldata.csv ', index = None, header = None) # Read the file, combine the statistics, and sort from large to small f = open ("hldata.csv ") count_dict ={} for line in f. readlines (): line = line. strip () count = count_dict.setdefault (line, 0) count + = 1 count_dict [line] = countsorted_count_dict = sorted (count_dict.iteritems (), key = operator. itemgetter (1), reverse = True) # for item in sorted_count_dict: # print "% s, % d" % (item [0], item [1]) # print sorted_count_dictfenzu = pd. dataFrame (sorted_count_dict ). set_index ([0]) # print fenzu # extract the first 19 data records from the first and second columns and put them in x y. x = list (fenzu. index [: 19]) y = list (fenzu. values [: 19]) print xprint y # returns x to the corresponding value. Otherwise, the drawing error s = pd. series (range (1, len (x) + 1), index = x) # Set the drawing attribute plt. figure (figsize = (12, 6), dpi = 80) plt. legend (loc = 'best') # plt. plot (fenzu, color = 'red') plt. bar (s, y, alpha =. 5, color = 'R', width = 0.8) plt. title ('the one red and one blue ball number') plt. xlabel ('one red and one blue number') plt. ylabel ('Times') # The label character # for I in range (): # plt. text (int (I + 1.4), 25, x [I], color = 'B', size = 10) # plt. text (1.4, 20, x [0], color = 'G', ha = 'center') # Replace ['1, 12', '26, 9 ', '5, 13. xticks (s, x, rotation = 10, size = 10, ha = 'left') plt. show ()
The result is as follows:
It can be seen that the red ball 1 and the blue ball 12 appear the most frequently, followed by the red ball 26 and the blue ball 9
Refer:
Import matplotlib. pyplot as pltimport numpy as npplt. rc ('font', family = 'simhei', size = 13) num = np. array ([13325,940 3, 9227,865 1]) ratio = np. array ([0.75, 0.76, 0.72, 0.75]) men = num * ratiowomen = num * (1-ratio) x = ['chat', 'pa ', 'Group buying \ n coupon ', 'Online video'] width = 0.5idx = np. arange (len (x) plt. bar (idx, men, width, color = 'red', label = 'male') plt. bar (idx, women, width, bottom = men, color = 'yellow', label = 'female user') plt. xlabel ('application categories') plt. ylabel ('male distribution ') plt. xticks (idx + width/2, x, rotation = 40) plt. legend ()