# pandas的groupby问题

1：首先frozenset() 返回一个冻结的集合，冻结后集合不能再添加或删除任何元素；

2：groupby函数的使用：

import pandas as pd

import numpy as np

print(df)

# data1 data2 key1 key2

# 0 -0.074640 0.351577 a one

# 1 1.419406 1.999888 a two

# 2 -0.029690 0.839932 b one

# 3 -2.540797 -0.753175 b two

# 4 -0.069440 0.076043 a one

print('first:\n',df.groupby('key1')['key2'].size())

# first:

# key1

# a 3

# b 2

# Name: key2, dtype: int64

print('second:\n',df.groupby(['key1', 'key2']).size())

# second:

# key1 key2

# a one 2

# two 1

# b one 1

# two 1

# dtype: int64

favorable_reviews_by_users = dict((k, frozenset(v.values)) for k, v in df.groupby('key1')['key2'])

print('third:\n',favorable_reviews_by_users)

# third:

# {'a': frozenset({'two', 'one'}), 'b': frozenset({'two', 'one'})}

print('fourth:\n',df.groupby('key1').mean())

# fourth:

# data1 data2

# key1

# a 0.371597 0.884543

# b 0.318975 -0.412998

for k, v in df.groupby('key1')['key2']:

print(k)

print('haha')

print(v)

#a

# haha

# 0 one

# 1 two

# 4 one

# Name: key2, dtype: object

# b

# haha

# 2 one

# 3 two

# Name: key2, dtype: object

