import pandas as pd
import numpy as np
obj = pd.Series([4,7,-8,3])
obj
0 4
1 7
2 -8
3 3
dtype: int64
obj.values
array([ 4, 7, -8, 3])
obj.index
RangeIndex(start=0, stop=4, step=1)
obj1 = pd.Series([7,2,-6,9], index=['a','d','c','b'])
obj1
a 7
d 2
c -6
b 9
dtype: int64
obj1.index
Index(['a', 'd', 'c', 'b'], dtype='object')
obj1['d'] # 通过自建的索引获取数据
2
obj1[1] # 通过默认的数字索引进行获取
2
obj1[obj1 > 0]
a 7
d 2
b 9
dtype: int64
np.exp(obj1)
a 1096.633158
d 7.389056
c 0.002479
b 8103.083928
dtype: float64
# 通过字典创建Series
data = {'xiaoming': 2000, 'xiaohong': 1000, 'zhangsan': 1500}
obj2 = pd.Series(data)
obj2
xiaoming 2000
xiaohong 1000
zhangsan 1500
dtype: int64
# lisi这个键不存在,默认为NaN,表示缺失值
dataIndex = ['xiaoming', 'xiaohong', 'zhangsan', 'lisi']
obj3 = pd.Series(data, index=dataIndex)
obj3
xiaoming 2000.0
xiaohong 1000.0
zhangsan 1500.0
lisi NaN
dtype: float64
pd.isnull(obj3)
xiaoming False
xiaohong False
zhangsan False
lisi True
dtype: bool
obj3.isnull()
xiaoming False
xiaohong False
zhangsan False
lisi True
dtype: bool
obj2 + obj3
lisi NaN
xiaohong 2000.0
xiaoming 4000.0
zhangsan 3000.0
dtype: float64
obj3.name = 'person'
obj3.index.name = 'personName'
obj3
personName
xiaoming 2000.0
xiaohong 1000.0
zhangsan 1500.0
lisi NaN
Name: person, dtype: float64
# 通过赋值就地修改索引
obj
0 4
1 7
2 -8
3 3
dtype: int64
obj.index = ['xiaoming', 'xiaohong', 'zhangsan', 'lisi'] # 直接修改
obj
xiaoming 4
xiaohong 7
zhangsan -8
lisi 3
dtype: int64
# 创建
data = {'city': ['深圳', '上海', '长沙', '广州', '北京'],
'year': [2004, 2007, 2009, 2005, 2002],
'pop': [4.4, 2.8, 3.9, 4.2, 3.6]}
df = pd.DataFrame(data)
df
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
city | year | pop | |
---|---|---|---|
0 | 深圳 | 2004 | 4.4 |
1 | 上海 | 2007 | 2.8 |
2 | 长沙 | 2009 | 3.9 |
3 | 广州 | 2005 | 4.2 |
4 | 北京 | 2002 | 3.6 |
df.head(3)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
city | year | pop | |
---|---|---|---|
0 | 深圳 | 2004 | 4.4 |
1 | 上海 | 2007 | 2.8 |
2 | 长沙 | 2009 | 3.9 |
# 改变列属性的顺序
pd.DataFrame(data, columns=['year', 'city', 'pop'])
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
year | city | pop | |
---|---|---|---|
0 | 2004 | 深圳 | 4.4 |
1 | 2007 | 上海 | 2.8 |
2 | 2009 | 长沙 | 3.9 |
3 | 2005 | 广州 | 4.2 |
4 | 2002 | 北京 | 3.6 |
df1 = pd.DataFrame(data, columns=['year', 'city', 'pop', 'debt'],
index=['one', 'two', 'three','four', 'five'])
df1 # 传入的列debt不在原数据中,引起缺失值
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
year | city | pop | debt | |
---|---|---|---|---|
one | 2004 | 深圳 | 4.4 | NaN |
two | 2007 | 上海 | 2.8 | NaN |
three | 2009 | 长沙 | 3.9 | NaN |
four | 2005 | 广州 | 4.2 | NaN |
five | 2002 | 北京 | 3.6 | NaN |
df1.columns
Index(['year', 'city', 'pop', 'debt'], dtype='object')
df1['city'] # 这种方式适合任何列名的访问
one 深圳
two 上海
three 长沙
four 广州
five 北京
Name: city, dtype: object
df1.city # 只有当列名存在的时候才会有用
one 深圳
two 上海
three 长沙
four 广州
five 北京
Name: city, dtype: object
# 访问一行数据
df1.loc['three'] # 通过创建的索引
year 2009
city 长沙
pop 3.9
debt NaN
Name: three, dtype: object
df1.iloc[2] # 通过数字索引
year 2009
city 长沙
pop 3.9
debt NaN
Name: three, dtype: object
# 列赋值
df1['debt'] = 2
df1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
year | city | pop | debt | |
---|---|---|---|---|
one | 2004 | 深圳 | 4.4 | 2 |
two | 2007 | 上海 | 2.8 | 2 |
three | 2009 | 长沙 | 3.9 | 2 |
four | 2005 | 广州 | 4.2 | 2 |
five | 2002 | 北京 | 3.6 | 2 |
df1['debt'] = np.arange(5.0)
df1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
year | city | pop | debt | |
---|---|---|---|---|
one | 2004 | 深圳 | 4.4 | 0.0 |
two | 2007 | 上海 | 2.8 | 1.0 |
three | 2009 | 长沙 | 3.9 | 2.0 |
four | 2005 | 广州 | 4.2 | 3.0 |
five | 2002 | 北京 | 3.6 | 4.0 |
# 填充某个Series,精确匹配到DF的索引,空位都将补上缺失值
val = pd.Series([-1.2, -1.7, 2], index=['three', 'one', 'five'])
df1['debt'] = val
df1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
year | city | pop | debt | |
---|---|---|---|---|
one | 2004 | 深圳 | 4.4 | -1.7 |
two | 2007 | 上海 | 2.8 | NaN |
three | 2009 | 长沙 | 3.9 | -1.2 |
four | 2005 | 广州 | 4.2 | NaN |
five | 2002 | 北京 | 3.6 | 2.0 |
# 创建一个 bool 列
df1['south'] = (df1.city == '深圳')
df1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
year | city | pop | debt | south | |
---|---|---|---|---|---|
one | 2004 | 深圳 | 4.4 | -1.7 | True |
two | 2007 | 上海 | 2.8 | NaN | False |
three | 2009 | 长沙 | 3.9 | -1.2 | False |
four | 2005 | 广州 | 4.2 | NaN | False |
five | 2002 | 北京 | 3.6 | 2.0 | False |
del df1['south'] # 删除某列
df1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
year | city | pop | debt | |
---|---|---|---|---|
one | 2004 | 深圳 | 4.4 | -1.7 |
two | 2007 | 上海 | 2.8 | NaN |
three | 2009 | 长沙 | 3.9 | -1.2 |
four | 2005 | 广州 | 4.2 | NaN |
five | 2002 | 北京 | 3.6 | 2.0 |
# 嵌套字典:外层字典的键当作列,内层的键当作行索引
pop = {'Nevada':{2001: 2.2, 2002: 2.9},
'Ohio': {2000:1.5, 2001:1.7, 2002: 3.6}}
df2 = pd.DataFrame(pop)
df2
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
Nevada | Ohio | |
---|---|---|
2000 | NaN | 1.5 |
2001 | 2.2 | 1.7 |
2002 | 2.9 | 3.6 |
df2.T
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
2000 | 2001 | 2002 | |
---|---|---|---|
Nevada | NaN | 2.2 | 2.9 |
Ohio | 1.5 | 1.7 | 3.6 |
# 改变行索引
pd.DataFrame(pop, index=[2001, 2002, 2003])
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
Nevada | Ohio | |
---|---|---|
2001 | 2.2 | 1.7 |
2002 | 2.9 | 3.6 |
2003 | NaN | NaN |
# 指定行和列索引的名字
df2.index.name = 'year';df2.columns.name = 'state'
df2
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
state | Nevada | Ohio |
---|---|---|
year | ||
2000 | NaN | 1.5 |
2001 | 2.2 | 1.7 |
2002 | 2.9 | 3.6 |
df2.values
array([[nan, 1.5],
[2.2, 1.7],
[2.9, 3.6]])
pandas
的索引对象负责管理轴标签和其他元数据obj = pd.Series(range(3), index=['a','b','c'])
index = obj.index
index
Index(['a', 'b', 'c'], dtype='object')
obj
a 0
b 1
c 2
dtype: int64
index[1:]
Index(['b', 'c'], dtype='object')
labels = pd.Index(np.arange(3))
labels
Int64Index([0, 1, 2], dtype='int64')
obj1 = pd.Series([1.2, 1.8, 4] ,index=labels)
obj1
0 1.2
1 1.8
2 4.0
dtype: float64
obj = pd.Series([4.5, 7.2, -5.3, 3.6],
index=['a', 'b', 'c', 'd'])
obj
a 4.5
b 7.2
c -5.3
d 3.6
dtype: float64
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e']) # 重新索引reindex
obj2
a 4.5
b 7.2
c -5.3
d 3.6
e NaN
dtype: float64
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0,2,4])
obj3
0 blue
2 purple
4 yellow
dtype: object
obj3.reindex(range(6), method='ffill')
0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
df = pd.DataFrame(np.arange(9).reshape(3,3), # 数据value值
index=['a', 'c', 'd'], # 行索引
columns=['Ohio','Texas','California'] # 列索引
)
df
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
Ohio | Texas | California | |
---|---|---|---|
a | 0 | 1 | 2 |
c | 3 | 4 | 5 |
d | 6 | 7 | 8 |
df.reindex(['a', 'b', 'c', 'd'])
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
Ohio | Texas | California | |
---|---|---|---|
a | 0.0 | 1.0 | 2.0 |
b | NaN | NaN | NaN |
c | 3.0 | 4.0 | 5.0 |
d | 6.0 | 7.0 | 8.0 |
states = ['Texas', 'Utah', 'California'] # 改变一个列属性
df.reindex(columns=states)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
Texas | Utah | California | |
---|---|---|---|
a | 1 | NaN | 2 |
c | 4 | NaN | 5 |
d | 7 | NaN | 8 |
df.loc[['a','b','c','d'], states]
/Applications/downloads/anaconda/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:1494: FutureWarning:
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.
See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
return self._getitem_tuple(key)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
Texas | Utah | California | |
---|---|---|---|
a | 1.0 | NaN | 2.0 |
b | NaN | NaN | NaN |
c | 4.0 | NaN | 5.0 |
d | 7.0 | NaN | 8.0 |
# 丢弃drop
obj = pd.Series(np.arange(5.0), index=list("abcde"))
obj
a 0.0
b 1.0
c 2.0
d 3.0
e 4.0
dtype: float64
new_obj = obj.drop('c')
new_obj
a 0.0
b 1.0
d 3.0
e 4.0
dtype: float64
obj # 原来的数据是不变的
a 0.0
b 1.0
c 2.0
d 3.0
e 4.0
dtype: float64
obj.drop(['d', 'c']) # 丢弃多个值用列表的形式
a 0.0
b 1.0
e 4.0
dtype: float64
# drop 删除任意轴上的数据
data = pd.DataFrame(np.arange(16).reshape((4,4)),
index=['Ohio', 'Colorado','Utah','NY'],
columns=['one','two', 'three','four'])
data
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | three | four | |
---|---|---|---|---|
Ohio | 0 | 1 | 2 | 3 |
Colorado | 4 | 5 | 6 | 7 |
Utah | 8 | 9 | 10 | 11 |
NY | 12 | 13 | 14 | 15 |
data.drop(['Colorado', 'Ohio']) # 删除行记录
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | three | four | |
---|---|---|---|---|
Utah | 8 | 9 | 10 | 11 |
NY | 12 | 13 | 14 | 15 |
data.drop('two', axis=1) # 指定列 axis=1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | three | four | |
---|---|---|---|
Ohio | 0 | 2 | 3 |
Colorado | 4 | 6 | 7 |
Utah | 8 | 10 | 11 |
NY | 12 | 14 | 15 |
data.drop('three', axis='columns') # 通过columns
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | four | |
---|---|---|---|
Ohio | 0 | 1 | 3 |
Colorado | 4 | 5 | 7 |
Utah | 8 | 9 | 11 |
NY | 12 | 13 | 15 |
data # 原来的数据不会变
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | three | four | |
---|---|---|---|---|
Ohio | 0 | 1 | 2 | 3 |
Colorado | 4 | 5 | 6 | 7 |
Utah | 8 | 9 | 10 | 11 |
NY | 12 | 13 | 14 | 15 |
# 原地修改对象:只能删除行记录
data.drop('NY', inplace=True) # 通过 inplace 参数来改变原数据
data
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | three | four | |
---|---|---|---|---|
Ohio | 0 | 1 | 2 | 3 |
Colorado | 4 | 5 | 6 | 7 |
Utah | 8 | 9 | 10 | 11 |
obj = pd.Series(np.arange(4.0),
index=['a','b','c','d'])
obj
a 0.0
b 1.0
c 2.0
d 3.0
dtype: float64
obj['c']
2.0
obj[2]
2.0
obj[1:3]
b 1.0
c 2.0
dtype: float64
obj[['a','d','c']]
a 0.0
d 3.0
c 2.0
dtype: float64
obj['b':'c'] # 标签切片的末端是包含的
b 1.0
c 2.0
dtype: float64
# 利用切片进行赋值
obj['b':'c'] = 6
obj
a 0.0
b 6.0
c 6.0
d 3.0
dtype: float64
data = pd.DataFrame(np.arange(16).reshape((4,4)),
index=['Ohio', 'Colorado','Utah','NY'],
columns=['one','two', 'three','four'])
data[['three', 'two']]
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
three | two | |
---|---|---|
Ohio | 2 | 1 |
Colorado | 6 | 5 |
Utah | 10 | 9 |
NY | 14 | 13 |
data[data['three'] > 5] # 布尔值选取
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | three | four | |
---|---|---|---|---|
Colorado | 4 | 5 | 6 | 7 |
Utah | 8 | 9 | 10 | 11 |
NY | 12 | 13 | 14 | 15 |
data[data < 5] = 0 # 小于5的数赋值为0
data
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | three | four | |
---|---|---|---|---|
Ohio | 0 | 0 | 0 | 0 |
Colorado | 0 | 5 | 6 | 7 |
Utah | 8 | 9 | 10 | 11 |
NY | 12 | 13 | 14 | 15 |
- loc 通过轴的标签进行索引
- iloc 通过数字进行索引
data.loc['Utah', ['two' ,'three']]
two 9
three 10
Name: Utah, dtype: int64
data.iloc[[1,2],[3,0,1]]
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
four | one | two | |
---|---|---|---|
Colorado | 7 | 0 | 5 |
Utah | 11 | 8 | 9 |
data.loc[:'Utah', 'two']
Ohio 0
Colorado 5
Utah 9
Name: two, dtype: int64
data.iloc[:, :3][data.three > 5]
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | three | |
---|---|---|---|
Colorado | 0 | 5 | 6 |
Utah | 8 | 9 | 10 |
NY | 12 | 13 | 14 |
data.iloc[:, :3]
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
one | two | three | |
---|---|---|---|
Ohio | 0 | 0 | 0 |
Colorado | 0 | 5 | 6 |
Utah | 8 | 9 | 10 |
NY | 12 | 13 | 14 |
s1 = pd.Series([7.3, -2.6, 3.4, 1.4],
index=['a', 'c', 'd', 'e'])
s2 = pd.Series([4.3, -9.6, 1.2, 2.9, 3.1],
index=['a', 'c', 'e', 'f', 'g'])
s1
a 7.3
c -2.6
d 3.4
e 1.4
dtype: float64
s2
a 4.3
c -9.6
e 1.2
f 2.9
g 3.1
dtype: float64
s1 + s2 # 自动对齐功能:在不重叠的索引处引入 NaN 值
a 11.6
c -12.2
d NaN
e 2.6
f NaN
g NaN
dtype: float64
df1 = pd.DataFrame({'A': [1,2]})
df2 = pd.DataFrame({'B': [3,4]})
df1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
A | |
---|---|
0 | 1 |
1 | 2 |
df2
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
B | |
---|---|
0 | 3 |
1 | 4 |
df1 - df2
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
A | B | |
---|---|---|
0 | NaN | NaN |
1 | NaN | NaN |
df1 = pd.DataFrame(np.arange(12.).reshape((3,4)),
columns=list('abcd'))
df1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
a | b | c | d | |
---|---|---|---|---|
0 | 0.0 | 1.0 | 2.0 | 3.0 |
1 | 4.0 | 5.0 | 6.0 | 7.0 |
2 | 8.0 | 9.0 | 10.0 | 11.0 |
df2 = pd.DataFrame(np.arange(20.).reshape((4,5)),
columns=list('abcde'))
df2
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
a | b | c | d | e | |
---|---|---|---|---|---|
0 | 0.0 | 1.0 | 2.0 | 3.0 | 4.0 |
1 | 5.0 | 6.0 | 7.0 | 8.0 | 9.0 |
2 | 10.0 | 11.0 | 12.0 | 13.0 | 14.0 |
3 | 15.0 | 16.0 | 17.0 | 18.0 | 19.0 |
df1 + df2 # 没有重叠的位置产生 NaN
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
a | b | c | d | e | |
---|---|---|---|---|---|
0 | 0.0 | 2.0 | 4.0 | 6.0 | NaN |
1 | 9.0 | 11.0 | 13.0 | 15.0 | NaN |
2 | 18.0 | 20.0 | 22.0 | 24.0 | NaN |
3 | NaN | NaN | NaN | NaN | NaN |
df1.add(df2, fill_value=0) # 没有重叠的部分使用其中某个的值
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
a | b | c | d | e | |
---|---|---|---|---|---|
0 | 0.0 | 2.0 | 4.0 | 6.0 | 4.0 |
1 | 9.0 | 11.0 | 13.0 | 15.0 | 9.0 |
2 | 18.0 | 20.0 | 22.0 | 24.0 | 14.0 |
3 | 15.0 | 16.0 | 17.0 | 18.0 | 19.0 |
# 算术方法的副本通过 r 开头
1 / df1
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
a | b | c | d | |
---|---|---|---|---|
0 | inf | 1.000000 | 0.500000 | 0.333333 |
1 | 0.250 | 0.200000 | 0.166667 | 0.142857 |
2 | 0.125 | 0.111111 | 0.100000 | 0.090909 |
df1.rdiv(1)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
a | b | c | d | |
---|---|---|---|---|
0 | inf | 1.000000 | 0.500000 | 0.333333 |
1 | 0.250 | 0.200000 | 0.166667 | 0.142857 |
2 | 0.125 | 0.111111 | 0.100000 | 0.090909 |
二者之间的运算是通过广播机制来实现的
arr = np.arange(12.).reshape((3, 4))
arr
array([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
arr[0]
array([0., 1., 2., 3.])
arr - arr[0] # 每行都去减掉arr[0]
array([[0., 0., 0., 0.],
[4., 4., 4., 4.],
[8., 8., 8., 8.]])
df = pd.DataFrame(np.arange(12.).reshape(4,3),
columns=list('bde'),
index=['Utah', 'Ohio', 'Texas','Oregon'])
df
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
b | d | e | |
---|---|---|---|
Utah | 0.0 | 1.0 | 2.0 |
Ohio | 3.0 | 4.0 | 5.0 |
Texas | 6.0 | 7.0 | 8.0 |
Oregon | 9.0 | 10.0 | 11.0 |
series = df.iloc[0]
series
b 0.0
d 1.0
e 2.0
Name: Utah, dtype: float64
df - series # 将series的索引匹配到 DF 的每列
.dataframe tbody tr th:only-of-type { vertical-align: middle; } <pre><code>.dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </code></pre>
b | d | e | |
---|---|---|---|
Utah | 0.0 | 0.0 | 0.0 |
Ohio | 3.0 | 3.0 | 3.0 |
Texas | 6.0 | 6.0 | 6.0 |
Oregon | 9.0 | 9.0 | 9.0 |
series1 = frame['d']
series1
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-105-08a27b7e84ad> in <module>
----> 1 series1 = frame['d']
2 series1
NameError: name 'frame' is not defined
df.sub(series1, axis='index')