# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
df = pd.DataFrame({'key1':list('aabba'),
'key2': ['one','two','one','two','one'],
'data1': ['1','3','5','7','9'],
'data2': ['2','4','6','8','10']})
print df
grouped = df.groupby(['key1']).size() #按key1的值分组,并统计个数
print grouped
print '++++++++++++++'
grouped1 = df['data1'].astype(float).groupby(df['key1']).mean() #先将data1转换成浮点型,然后分组求均值
print grouped1
print type(grouped1) #series类型
print '++++++++++++++++++'
df['add'] = ['AA','BB',"CC",'DD','EE'] #dataframe追加一列
print df
grouped2=df.groupby(['key1','key2']).size() #按两列属性分组
#注意若groupby前面用df的形式则后面参数直接用['key1']的形式
print grouped2
print type(grouped2)
print '++++++++++++++++++'
grouped3=df['data1'].astype(float).groupby([df['key1'],df['add']]).mean() #按key1与key2分组,求data1这一列均值
#注意若groupby前面用df['data1']的形式则后面参数必须用df['key1']的形式
print grouped3
print type(grouped3) #series