# -- * -- Coding: UTF-8 -- * -- import mathimport itertoolsdef mean (t): "mean" Return float (sum (t)/Len (t) def E (X, p): "discretization mathematical expectation (also called mean) of a random variable ): the sum of the product of random variable X and probability P "" Return sum ([x [I] * P [I] For I in range (LEN (x)]) def median (t): "median" arr = sorted (t) idx = (LEN (ARR)-1)/2 If type (idx) is int: return arr [idx] If type (idx) is float: Return mean (ARR [int (math. floor (idx): int (math. ceil (idx) + 1]) def mode (t): "Mode" if not T: Return none arr = _ getfreq (t) if arr [0] [0] = 1: Return none else: For K, G in itertools. groupby (ARR, key = Lambda X: X [0]): return [T [1] for T in G] def _ getfreq (t ): "getting each value in T and its occurrences" arr = sorted (t) alist = [] for K, G in itertools. groupby (ARR): alist. append (LEN (List (g), k) alist. sort (Key = Lambda X: X [0], reverse = true) return alistdef VAR (T, Mu = none): "Variance" If Mu is none: MU = mean (t) # compute the squared deviations and return their mean. dev2 = [(X-mu) ** 2 for X in T] Var = mean (dev2) return vardef d (x, p): "" variance of random discrete variables: (the difference between x and X's expected (mean) squared) Expectation (mean) "# Calculated by definition _ 0 # E = E (x, P) # Return sum ([(X [I]-e) ** 2 * P [I] For I in range (LEN (x)]) # Calculated by definition _ 1, construct a new random variable y # E = E (X, p) # Y = [(X [I]-E) ** 2 for I in range (LEN (x)] # Return e (Y, p) # Calculated by the simplified formula E = E (x, P) e_1 = E ([x [I] ** 2 for I in range (LEN (x)], p) return E_1-e ** 2def svar (t ): "sample variance" if not T: Return none mu = mean (t) return sum ([(X-mu) ** 2 for X in T]) /(LEN (t)-1) def meanvar (t): "mean and variance" Mu = mean (t) Var = VAR (T, Mu) return Mu, vardef stdvar (T, Mu = none): "standard deviation" If Mu is none: Mu = mean (t) Import math return math. SQRT (VAR (T, Mu) def range (t): "Very poor" if not T: Return none return max (t)-min (t) def Cov (x, y): "mean function of the product of the covariance X and Y corresponding deviation (X-mu): If the deviation variation direction is the same, the positive and negative signs are the same, the product is a positive number defect: x y is the height and weight, for example, cm * kg does not make sense, so the standard score is generally used to solve the problem. For details, see correlation coefficient ==> def pearson_correlation (x, y ): the unit of the standard score is 1, the mean is 0, and the unit of the correlation coefficient of variance 1 is 1, which is better understood than the unit of covariance "" mu_x = mean (X) mu_y = mean (y) # calculate the mean deviation: if the variation direction of x y is the same, then X, the mean deviation of Y should have the same plus and minus signs # D_x = [X-mu_x for X in X] # d_y = [Y-mu_y for Y in Y] # mean deviation => covariance # Return mean ([D_x [I] * d_y [I] For I in range (LEN (x)]) total = 0.0 for X, Y in zip (x, y): Total + = (x-mu_x) * (y-mu_y) return total/Len (x) def standardsore (x, Mu, SIGMA): "standard score X-mu => deviation: The difference between x and the mean X-mu/SIGMA to achieve normalization: the unit of the transformed standardized variable Z is 1, the mean is 0, and the variance is 1 "Return (X-mu)/sigmadef pearson_correlation (x, y ): "" correlation coefficient: converts x and y in the covariance to the standard score. The mean of the product of the standard score is the correlation coefficient (X-mu) /SIGMA realizes the unit of normalization correlation coefficient is 1. Compared with the unit of covariance, the covariance is better understood as 1. The two are completely related and know a value, the covariance of another value can be accurately predicted to be-1, and this completely negative correlation "" # calculate the mean (expected) mu_x = mean (x) mu_y = mean (y) # Calculate the standard deviation sigma_x = stdvar (x) sigma_y = stdvar (y) # The mean of the standard score is: Correlation coefficient p = [standardsocre( X [I], mu_x, sigma_x) * standardsocre( y [I], mu_y, sigma_y) for I in range (LEN (x)] Return mean (P) # Calculate the correlation coefficient using a simple formula # sigma_x = stdvar (x) # sigma_y = stdvar (y) # Return Cov (x, y)/(sigma_x * sigma_y) # correlation coefficient = x, y covariance divided by X standard deviation * y standard deviation # xbar, varx = meanvar (x) # ybar, vary = meanvar (y) # corr = Cov (XS, ys)/math. SQRT (varx * vary) # Return corrif _ name _ = '_ main __': astr = '93 62 51 93 75 82 93 62 65 51 'alist = [int (e) For E in astr. split ()] print 'mean: ', mean (alist) print 'median:', median (alist) print 'mode: ', mode (alist) print' range :', range (alist) print 'Population variance: ', VAR (alist) print 'sample variance', svar (alist) print 'standard deviation: ', stdvar (alist) print 'verify covariance Cov (x, x) = VAR (x)' print Cov (alist, alist) print VAR (alist) print 'the covariance is equal to the variance, it indicates that the function written in the formula is correct'