Python pandas NumPy matplotlib common methods and functions

Source: Internet
Author: User
Tags index sort save file

Import NumPy as Npimport pandas as Pdimport Matplotlib.pyplot as Plt---------------numpy-----------------------arr = np.a Rray ([Np.zeros], Dtype=np.float64) ((3,6)) Np.empty ((2,3,2)) Np.arange () Arr.dtype Arr.ndim Arr.shapearr.astype (Np.int32) #np. Float64 np.string_ Np.unicode_arr * arr Arr-arr 1/arrarr= np.arange (+) reshape ((8,4 ) Arr[1:3,:] #正常切片arr [[+]] #花式索引arr.   T Arr.transpose ((...))   Arr.swapaxes (...) #转置arr. Dot #矩阵内积np. sqrt (arr) np.exp (arr) Randn (8) # Normal distribution value np.maximum (x, y) np.where (cond, Xarr, Yarr) # when Cond is true, take xarr, otherwise take Yarrarr.mean () Arr.mean (Axis=1) #算术平均数arr. SUM () arr.std () Arr.var () #和, standard deviation, Variance arr.min () arr.m    Ax () #最小值, Max Arr.argmin () Arr.argmax () #最小索引, Max index arr.cumsum () Arr.cumprod () #所有元素的累计和, Cumulative product Arr.all () Arr.any () # Check if the array is all true, partially true Arr.sort () arr.sort (1) #排序, 1 axes up arr.unique () #去重np. IN1D (arr1, arr2) #arr1的值是否在arr2中np. Load () n P.loadtxt () Np.save () Np.savez () # Read, save file np.concatenate ([arr, arr], Axis=1) # Connect two arr, in the direction of the row---------------Pandas-----------------------Ser = series () Ser = series ([...], index=[...]) #一维数组, dictionaries can be converted directly to Seriesser.values ser.index Ser.reindex ([...], fill_value=0) #数组的值, index of array, redefine index ser.isnull () pd.isn  Ull (Ser) pd.notnull (Ser) #检测缺失数据ser. name= ser.index.name= #ser本身的名字, ser index name Ser.drop (' x ') #丢弃索引x对应的值ser +ser #算术运算ser. Sort_index () Ser.order () # Sort by index, sort by value df = DataFrame (data, columns=[...]) #表结构的数据结构, both row index and column index D f.ix[' x '] #索引为x的值 for series, direct use ser[' x ']del df[' ly ') #用del删除第ly列df. T #转置df. Index.name df.columns.name Df.valuesdf.drop ([...]) DF + DF Df1.add (DF2, fill_vaule=0) #算术运算df-ser #df与ser的算术运算f =lambda X:x.max ()-x.min () df.apply (f) df.sort_index (Axi   S=1, Ascending=false) #按行索引排序df. Sort_index (by=[' A ', ' B ']) #按a, column B index sort ser.rank () Df.rank (Axis=1) #排序, add a rank value df.sum () Df.sum (Axis=1) #按列, Sum Df.mean (Axis=1, Skipna=false) #求各行的平均值, consider the presence of Na Df.idxmax () #返回最大值的索引df. Cumsum () #累计求和df. des Cribe () ser.describe () #返回count mean std min max equivalent Ser.unique () #去重ser. Value_counts () df.value_counts () # Returns a series whose index is a unique value, with a value of frequency ser.isin ([' X ', ' y ']) #判断ser的值是否为x, y , get Boolean value Ser.dropna () ser.isnull () Ser.notnull () Ser.fillna (0) #处理缺失数据, DF same df.unstack () #行列索引和值互换 df.unstack (). Stack () Df.swaplevel (' Key1 ', ' Key2 ') #接受两个级别编号或名称, and Interchange Df.sortlevel (1) #根据级别1进行排序, DF Row, column index can have two levels of df.set_index ([' C ', ' d '], drop= False) #将c, D two columns are converted to rows, because drop is false, the C,dread_csv read_table READ_FWF #读取文件分隔符为逗号 in the column, the delimiter is a tab (' \ t '), no delimiter (fixed column width) pd.read _csv (' ... ', nrows=5) #读取文件前5行pd. Read_csv (' ... ', chunksize=1000) #按块读取, avoid large files taking up memory pd.load () #pd也有load方法, used to read the binary file PD. Excelfile (' ... xls '). Parse (' Sheet1 ') # Read the Excel file in the Sheet1df.to_csv (' ... csv ', sep= ' | ', Index=false, Header=false) # Writes data to the CSV file, to | As a delimiter, by default, as delimiter, disable column, row label Pd.merge (DF1, DF2, on= ' key ', suffixes= (' _left ', ' _right ')) #合并两个数据集, database-like inner Join, with the key column in common, suffixes two keys named Key_left, Key_rightpd.merge (DF1, DF2, left_on= ' Lkey ', right_on= ' Rkey ') #合并, Inner joins of similar databases, but they do not have the same column names, respectively, as referenced pd.merge for merging (DF1, DF2, how= ' ouTer ') #合并, but is outer join;how= ' left ' is the Cartesian product, how= ' inner ' is ...; Multiple keys can also be combined Df1.join (DF2, on= ' key ', how= ' outer ') #也是合并pd. Concat ([Ser1, Ser2, Ser3], Axis=1) #连接三个序列, in the direction of the row Ser1.combine_   First (Ser2) Df1.combine_first (DF2) #把2合并到1上, and Align Df.stack () Df.unstack () #列旋转为行, row rotation to column df.pivot () df.duplicated () Df.drop_duplicates () #判断是否为重复数据, remove duplicate Data df['].map (lambda x:abs (x)) #将函数映射到df的指定列ser. Replace ( -999, Np.nan) # Replace-999 all with Nandf.rename (index={}, columns={}, Inplace=true) #修改索引, inplace to True indicates in-place modify DataSet Pd.cut (Ser, bins) # According to the surface element bin to determine which section of the SER's data belong to, there is labels, levels attribute df[(Np.abs (DF) >3). Any (1)] #输出含有 "more than 3 or 3" lines permutation take # Used for random reordering of pd.get_dummies (df[' key '), prefix= ' key ') #给df的所有列索引加前缀keydf [...]. Str.contains () df[...]. Str.findall (Pattern, flags=re. IGNORECASE) df[...].    Str.match (pattern, flags= ...) Df[...]. Str.get () #矢量化的字符串函数----Drawing Ser.plot () Df.plot () #pandas的绘图工具, parametric label, AX, style, alpha, kind, logy, use_index, Rot, XTick S, Xlim, grid, etc., see page257kind= ' kde ' #密度图kind = ' bar ' kind= ' Barh ' #垂直柱状图, horizontal histogram, StackeD=true for stacked plot ser.hist (bins=50) #直方图plt. Scatter (x, y) #绘制x, y-composed scatter plot pd.scatter_matrix (DF, diagonal= ' KDE ', color= ' K ', alpha = ' 0.3 ') #将df各列分别组合绘制散点图----aggregation grouping groupby () is grouped by default on the axis=0 axis, or it can be grouped on a group of 1; You can use for to group iterations df.groupby (df[' Key1 ') # The DF is grouped according to Key1 df[' Key2 '].groupby (df[' key1 ') #根据key1对key2列进行分组df [' Key3 '].groupby (df[' Key1 '], df[' Key2 ')) #先根据key1,  The Key3 column is then grouped df[' Key2 '].groupby (df[' key1 ') according to Key2. Size () #size () returns a seriesdf.groupby (df[' Key1 ') that contains the packet size [' data1 '] Equivalent to df[' data1 '].groupby (df[' Key1 ')) df.groupby (df[' key1 ']) [[' Data1 ']] equivalent to df[[' data1 ']].groupby (df[' Key1 ')) Df.groupby (mapping, Axis=1) ser (mapping) #定义mapping字典, grouping df.groupby (len) #通过函数来进行分组 based on the grouping of dictionaries, such as according to the Len function Df.groupby ( Level= ' ... ', Axis=1) #根据索引级别来分组df. GroupBy ([], As_index=false) #禁用索引, returns data Df.groupby (...) in the form of no index. AGG ([' mean ', ' std ']) #一次使用多个聚合函数时, using the Agg method Df.groupby (...). Transform (Np.mean) #transform () can use its functions for each grouping df.groupby (). Apply () #apply方法会将待处理的对象拆分成多个片段, and then call the incoming function on each fragment. Finally try to combine each fragment together----perspective cross df.pivot_table ([' ', ' '], rows=[', '], cols= ', MargiNs=true) #margins为真时会加一列allpd. Crosstab (Df.col1, Df.col2, Margins=true) # Margins function Ibid.---------------matplotlib---------------fig=plt.figure () # Image of the Base Object Ax=fig.add_subplot (2,2,1) #2 Currently selected 1th Fig, axes = plt.subplots (nrows, Nclos, Sharex, Sharey) #创建图像, specify row, column, shared x-axis scale, shared y-axis scale plt.subplots_adjust (Left=none, Bottom=none, Right=none, Top=none, Wspace=none, Hspace=none) #调整subplot之间的距离, Wspace, hspace to control width, height percentage ax.plot (x, Y, Linestyle= '--', color= ' G ') #依据x, y-coordinate paint, set linetype, Color ax.set_xticks ([...]) ax.set_xticklabels ([...]) #设置x轴刻度ax. Set_xlabel (' ... ') #设置x轴名称ax. Set_title (' ... ') # Set the chart name Ax.legend (loc= ' best ') #设置图例, loc Specifies to place the legend in the appropriate position ax.text (x, y, ' hello ', family= ' Monospace ', fontsize=10) #将注释hello放在x, y, font size is 10ax.add_patch () #在图中添加块plt. Savefig (' ... png ', dpi=400, bbox_inches= ' Tight ') #保存图片, DPI is resolution, bbox=tight means that the blank portion------------------------------------------from Mpl_toolkits.basemap is trimmed Import Basemapimport Matplotlib.pyplot as plt# can be used to draw maps-----------------time series--------------------------Pd.to_    DateTime (DATESTRS)#将字符串型日期解析为日期格式pd. Date_range (' 1/1/2000 ', periods=1000) #生成时间序列ts. Resample (' D ', how= ' mean ') #采样, converts the time series to a fixed frequency per day, and calculates the mean value; how= ' OHLC ' is a stock of four indices; # resampling aggregates, the short frequency (day) becomes a long frequency (month), the corresponding value superimposed; # l sampling will interpolate, the long frequency becomes a short frequency, the middle produces a new value Ts.shift (2, freq= ' D ') ts.shift ( -2, freq= ' D ') #后移, move forward 2 days Now+day () now+monthend () Import Pytz pytz.timezone (' Us/eastern ') #时区操作, need to install PYTZPD. Period (' All ', freq= ' A-dec ') #period represents the time interval, called the period PD.    Periodindex #时期索引ts. To_period (' M ') #时间转换为时期pd. Rolling_mean (...)   PD.ROLLING_STD (...) #移动窗口函数-average, standard deviation

  

Python pandas NumPy matplotlib common methods and functions

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.