
import numpy as np
import pandas as pd
from pandas import Series, DataFrames1 = Series(np.random.rand(6))
s10 0.710042
1 0.901424
2 0.050802
3 0.870486
4 0.919496
5 0.483373
dtype: float64# 创建多级index(相当于Series的list,里面被划分为1,2两个Series)
s1 = Series(np.random.rand(6), index=[[1,1,1,2,2,2],['a','b','c','a','b','c']])
s11 a 0.005413
b 0.668101
c 0.540828
2 a 0.922140
b 0.046360
c 0.207378
dtype: float64s1[1]a 0.005413
b 0.668101
c 0.540828
dtype: float64# 多级index的其中一级 Series类型
type(s1[1])pandas.core.series.Series# 取值
s1[1]['a']0.005413335166173483# 可以切片取值(不同级的同一个key)
s1[:,'a']1 0.005413
2 0.922140
dtype: float64# 切片的多级index也是Series类型
type(s1[:,'a'])pandas.core.series.Series# 转换DataFrame
df1 = s1.unstack()
df1a | b | c | |
|---|---|---|---|
1 | 0.005413 | 0.668101 | 0.540828 |
2 | 0.922140 | 0.046360 | 0.207378 |
# 由两个Series组成DataFrame
df2 = DataFrame([s1[1],s1[1]])
df2a | b | c | |
|---|---|---|---|
0 | 0.005413 | 0.668101 | 0.540828 |
1 | 0.005413 | 0.668101 | 0.540828 |
# 转化(但分级不对)
s2 = df1.unstack()
s2a 1 0.005413
2 0.922140
b 1 0.668101
2 0.046360
c 1 0.540828
2 0.207378
dtype: float64# 转化(解决分级不对,转置T)
s2 = df1.T.unstack()
s21 a 0.005413
b 0.668101
c 0.540828
2 a 0.922140
b 0.046360
c 0.207378
dtype: float64df2 = DataFrame(np.arange(16).reshape(4,4))
df20 | 1 | 2 | 3 | |
|---|---|---|---|---|
0 | 0 | 1 | 2 | 3 |
1 | 4 | 5 | 6 | 7 |
2 | 8 | 9 | 10 | 11 |
3 | 12 | 13 | 14 | 15 |
# 通过index,columns分级
df2 = DataFrame(np.arange(16).reshape(4,4), index=[['a','a','b','b'],[1,2,1,2]],columns=[['BJ','BJ','SH','SH'],[4,6,4,6]])
df2BJ | SH | ||||
|---|---|---|---|---|---|
4 | 6 | 4 | 6 | ||
a | 1 | 0 | 1 | 2 | 3 |
2 | 4 | 5 | 6 | 7 | |
b | 1 | 8 | 9 | 10 | 11 |
2 | 12 | 13 | 14 | 15 | |
# 取数据默认是 列索引 输出
df2['BJ']4 | 6 | ||
|---|---|---|---|
a | 1 | 0 | 1 |
2 | 4 | 5 | |
b | 1 | 8 | 9 |
2 | 12 | 13 |
type(df2['BJ'])pandas.core.frame.DataFramedf2['BJ'][4]a 1 0
2 4
b 1 8
2 12
Name: 4, dtype: int64