Big Data statistics 1. Project requirements, statistical probability distribution of a parameter in massive data
2. Implementation process
#!/usr/bin env python#-*-coding:utf-8-*-import redef preprocess (fileName, pattern): "The data set is preprocessed, such as the data from the Rssi column :p Aram FileName: Receive a relative path:p aram pattern: a template that receives a regular expression: return: Returns a region of interest dataset ' with open ( FileName, ' R ', encoding= ' utf-8 ') as F, open (' LaterText.txt ', ' W ', encoding= ' Utf-8 ') as F2:for line in F: result = Re.findall (pattern, line) # '. * (-\d{2}), ' If result:newcontent = result[0] + ' \ n ' F2.write (newcontent) return ' LaterText.txt ' def sort (fileName): ' ' Remove the contents of the region of interest dataset and put it in a The list then sorts the list and then counts the contents of the list:p Aram the path to the Filename:roi DataSet: return: ' S1 = [] S_result = [] with open (fil ename, ' R ', encoding= ' Utf-8 ') as F:for line in f:line = Line.split () [0] S1.append (line) S1 = sorted (S1) for i in s1:flage = False for J in S_result:if I in j:a, B = J.split (': ') New_j = a + ': ' + str (int (b) + 1) s_result.remove (j) s_result.append (New_j) Flage = True else:continue if flage = = False:new_str = i + ': ' + ' 1 ' S_result.append (NEW_STR) return s_resultdef Finaltext (list1): "" writes the list of statistics to the file, the result is more intuitive:p Aram List1: Statistics List after: Return:true ' with open (' Result.txt ', ' W ', encoding= ' Utf-8 ') as F2:for i in List1:ne W_line = i + ' \ n ' f2.write (new_line) return trueif __name__ = = ' __main__ ': inputfile = input (' Enter a fil E path: ') # example of a relative path to the input file traintext.csv pattern = input (' Enter a RE expression: ') #输入正则表达式 example. * (-\d{2}), L Atertext = preprocess (inputfile, pattern) # Latertext receives the path to the preprocessed file ' laterText.txt ' List1 = sort (latertext) # takes the preprocessed file contents Out, put the list to sort and count the number of occurrences of each element in the list, and return a list if Finaltext (list1): # Put the elements inside the list into a result.txt print (' statistics complete, result reference Result.txt ')
3.Demo
-47:1-48:2-49:7-50:7-51:23-52:22-53:33-54:58-55:157-56:81-57:200-58:149-59:214-60:269-61:603-62:256-63:636-64:427-65:525 -66:585-67:1233-68:483-69:1127-70:654-71:676-72:735-73:1133-74:432-75:766-76:418-77:411-78:395-79:519-80:184-81:321-82:13 7-83:146-84:138-85:128-86:110-87:96-88:36-89:38-90:20-91:7-92:11-93:1
1.python Small item: Big Data statistics