標籤:資料 4.0 from axis 建立 max type 字典 class
import numpy as npimport pandas as pdfrom pandas import Series,DataFrames=Series([1,2,3],index=[‘a‘,‘b‘,‘c‘])print(s)‘‘‘a 1b 2c 3dtype: int64‘‘‘print(np.max(s))#可以進行np運算s.name=‘rank‘s.index.name=‘name‘print(s)#建立DataFramesdata1={‘name‘:[‘a‘,‘b‘,‘c‘],‘rank‘:[1,2,3],‘score‘:[98,89,54]}print(sdata1)#字典df1=DataFrame(sdata1)print(df1)‘‘‘ name rank score0 a 1 981 b 2 892 c 3 54‘‘‘df2=DataFrame(sdata1,columns=[‘score‘,‘name‘,‘rank‘])print(df2)‘‘‘可以自動對齊,只是位置變化 score name rank0 98 a 11 89 b 22 54 c 3‘‘‘df3=DataFrame(sdata1,columns=[‘score‘,‘name‘,‘rank‘,‘class‘],index=[‘1‘,‘2‘,‘3‘])print(df3)‘‘‘class這列是缺失值 score name rank class1 98 a 1 NaN2 89 b 2 NaN3 54 c 3 NaN‘‘‘df4=df3.reindex([‘1‘,‘2‘,‘3‘,‘4‘])print(df4)‘‘‘重新索引 score name rank class1 98.0 a 1.0 NaN2 89.0 b 2.0 NaN3 54.0 c 3.0 NaN4 NaN NaN NaN NaN‘‘‘print(df4[‘score‘])print(df4.ix[‘1‘])print(df2[df2[‘score‘]>60])#返回df2中score大於60的值‘‘‘ score name rank0 98 a 11 89 b 2‘‘‘del df3[‘class‘]print(df3)#刪除class這列sdata1={‘name‘:[‘a‘,‘b‘,‘c‘],‘rank‘:[1,2,3],‘score‘:[98,89,54]}print(sdata1)df3=DataFrame(sdata1,columns=[‘score‘,‘name‘,‘rank‘,‘class‘],index=[‘1‘,‘2‘,‘3‘])del df3[‘class‘]print(df3)print(df3.reindex([‘1‘,‘2‘,‘3‘,‘4‘]))print(df3.reindex([‘1‘,‘2‘,‘3‘,‘4‘],fill_value=0))#缺失值賦值為0‘‘‘ score name rank1 98 a 12 89 b 23 54 c 34 0 0 0‘‘‘print(df3.reindex([‘0‘,‘1‘,‘2‘,‘3‘]))‘‘‘ score name rank0 NaN NaN NaN1 98.0 a 1.02 89.0 b 2.03 54.0 c 3.0‘‘‘print(df3.reindex([‘0‘,‘1‘,‘2‘,‘3‘],method=‘bfill‘))#向後填充‘‘‘ score name rank0 98 a 11 98 a 12 89 b 23 54 c 3‘‘‘print(df3.drop(‘1‘))#刪除第一行print(df3.drop(‘score‘,axis=1))#刪除指定列,axis是維數,0是行,1是列print(df3.T)#轉置
Python 清洗資料