From pandas import Series,dataframe
Import Pandas as PD
Import Matplotlib.pyplot as Plt
Import NumPy as NP
DF = DataFrame ({' Key1 ': [' a ', ' a ', ' B ', ' B ', ' A '], ' key2 ': [' one ', ' one ', ' one ', ' one ', ' one ', ' one '),
' Data1 ': Np.random.randn (5),
' Data2 ': Np.random.randn (5)})
grouped=df[' data1 '].groupby (df[' Key1 '])
Grouped.mean ()
means = df[' data1 '].groupby ([df[' Key1 '], df[' Key2 ']). Mean ()
Means
OUT[10]:
Key1 Key2
A one-0.230076
1.248653
b one-0.196613
0.689761
Name:data1, Dtype:float64
Means.unstack (Level=-1)
OUT[12]:
Key2 One
Key1
a-0.230076 1.248653
b-0.196613 0.689761
Means.unstack (level=0)
OUT[13]:
Key1 a B
Key2
one-0.230076-0.196613
1.248653 0.689761
states = Np.array ([' Ohio ', ' California ', ' California ', ' Ohio ', ' Ohio '])
years = Np.array ([2005, 2005, 2006, 2005, 2006])
Df
OUT[16]:
Data1 data2 Key1 Key2
0-0.169761-0.297803 a One
1 1.248653 0.116745 A
2-0.196613-0.224198 B One
3 0.689761 1.436277 b
4-0.290392 1.292000 a One
df[' data1 '].groupby ([States,years]). Mean ()
OUT[17]:
California 2005 1.248653
2006-0.196613
Ohio 2005 0.260000
2006-0.290392
Name:data1, Dtype:float64
#被聚合的只有数值列
Df.groupby (df[' key1 '). Mean ()
OUT[19]:
Data1 data2
Key1
A 0.262833 0.370314
b 0.246574 0.606039
Df.groupby ([' Key1 ', ' Key2 ']). Mean ()
OUT[20]:
Data1 data2
Key1 Key2
A one-0.230076 0.497098
1.248653 0.116745
b one-0.196613-0.224198
0.689761 1.436277
For Name,group in Df.groupby ([' Key1 ']):
Print (name)
Print (group)
A
Data1 data2 Key1 Key2
0-0.169761-0.297803 a One
1 1.248653 0.116745 A
4-0.290392 1.292000 a One
B
Data1 data2 Key1 Key2
2-0.196613-0.224198 B One
3 0.689761 1.436277 b
for (K1,K2), group in Df.groupby ([' Key1 ', ' Key2 ']):
Print (K1,K2)
Print (group)
A one
Data1 data2 Key1 Key2
0-0.169761-0.297803 a One
4-0.290392 1.292000 a One
A
Data1 data2 Key1 Key2
1 1.248653 0.116745 A
b One
Data1 data2 Key1 Key2
2-0.196613-0.224198 B One
b
Data1 data2 Key1 Key2
3 0.689761 1.436277 b
Df.dtypes
OUT[29]:
Data1 float64
Data2 float64
Key1 Object
Key2 Object
Dtype:object
Grouped=df.groupby (Df.dtypes,axis=1)
Dict (list (grouped))
OUT[31]:
{dtype (' float64 '): Data1 data2
0-0.169761-0.297803
1 1.248653 0.116745
2-0.196613-0.224198
3 0.689761 1.436277
4-0.290392 1.292000, Dtype (' O '): Key1 Key2
0 a one
1 A
2 B One
3 b
4 a one}
Df.groupby ([' Key1 ', ' Key2 ']) [[' Data2 ']].mean ()
OUT[15]:
Data2
Key1 Key2
A one 0.942926
two-0.671692
B One 0.366026
two-0.794155
df.ix[2:3,[' data1 ', ' data2 ']]=np.nan
Df
OUT[22]:
Data1 data2 Key1 Key2
0-1.090066 0.132120 a One
1-0.683913-0.671692 A
2 Nan nan b one
3 Nan nan b
4-0.003452 1.753732 a One
Python for data analysis GroupBy basic operations