!pip install --upgrade pandas
!pip install --upgrade seaborn
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
# 检查版本
print(np.__version__)
print(pd.__version__)
print(sb.__version__)
# train_data = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-29/nyc_squirrels.csv")
train_data = pd.read_csv("work/data.csv")
print(type(train_data))
print(train_data.head(4))
ss
1.19.5
1.3.5
0.12.2
<class 'pandas.core.frame.DataFrame'>
long lat unique_squirrel_id hectare shift date \
0 -73.956134 40.794082 37F-PM-1014-03 37F PM 10142018
1 -73.957044 40.794851 37E-PM-1006-03 37E PM 10062018
2 -73.976831 40.766718 2E-AM-1010-03 02E AM 10102018
3 -73.975725 40.769703 5D-PM-1018-05 05D PM 10182018
a = np.arange(18).reshape(3,6)
print(a)
data_frame = pd.DataFrame(np.arange(18).reshape(3,6),
index=["a", "b", "c"],#行索引
columns=["A", "B", "C", "D", "E", "F"])#列索引
print(data_frame)
print(data_frame.index)
print(data_frame.columns)
print(data_frame.dtypes)
[[ 0 1 2 3 4 5]
[ 6 7 8 9 10 11]
[12 13 14 15 16 17]]
A B C D E F
a 0 1 2 3 4 5
b 6 7 8 9 10 11
c 12 13 14 15 16 17
Index(['a', 'b', 'c'], dtype='object')
Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')
A int64
B int64
C int64
D int64
E int64
F int64
dtype: object
A B C D E F
a 0 1 2 3 4 5
b 6 7 8 9 10 11
c 12 13 14 15 16 17
A B C D E F
a 0 1 2 3 4 5
b 6 7 8 9 10 11
c 12 13 14 15 16 17
# DataFrame 查看特定类型
df = pd.DataFrame(np.ones((3,3)), columns=["a", "b", "c"])
print(df)
print(df.dtypes)
# DataFrame 的特定类型的列 select_dtypes
print(df.select_dtypes(include=["int64"])) #只看整数型
print(df.select_dtypes(exclude=["float64"])) #去掉浮点型
print("\nexclude int:\n",df.select_dtypes(exclude=["int64"]))
a b c
0 1.0 1.0 1.0
1 1.0 1.0 1.0
2 1.0 1.0 1.0
a float64
b float64
c float64
dtype: object
Empty DataFrame
Columns: []
Index: [0, 1, 2]
Empty DataFrame
Columns: []
Index: [0, 1, 2]
exclude int:
a b c
0 1.0 1.0 1.0
1 1.0 1.0 1.0
2 1.0 1.0 1.0
# 取出特定样本、特征
df = pd.DataFrame(np.ones((3,3)), columns=["a", "b", "c"])
print(df,"\n----------------------------")
# 取出第0行和第2行数据
print(df.loc[[0,2], :], "\n-----------------------")
# 取出第0行b标签数据
print(df.loc[0, "b"], "\n----------------------")
# 取出所有行b列、c列数据
print(df.loc[:, ["b", "c"]])
a b c
0 1.0 1.0 1.0
1 1.0 1.0 1.0
2 1.0 1.0 1.0
----------------------------
a b c
0 1.0 1.0 1.0
2 1.0 1.0 1.0
-----------------------
1.0
----------------------
b c
0 1.0 1.0
1 1.0 1.0
2 1.0 1.0
# 截断
print(df.truncate(before=0, after=1))# 默认方向是行 axis="index"
print(df.truncate(before="b", after="c", axis='columns'))
# 丢掉默写特征
df.pop("b")
print(df)
a b c
0 1.0 1.0 1.0
1 1.0 1.0 1.0
b c
0 1.0 1.0
1 1.0 1.0
2 1.0 1.0
a c
0 1.0 1.0
1 1.0 1.0
2 1.0 1.0
# 合并 DataFrame(0, 1) => DataFrame(行, 列), 若维度不同进行合并, Nana 补填
print(pd.concat([df, df], axis=0))#行维度
print(pd.concat([df, df], axis=1))#列维度
a c
0 1.0 1.0
1 1.0 1.0
2 1.0 1.0
0 1.0 1.0
1 1.0 1.0
2 1.0 1.0
a c a c
0 1.0 1.0 1.0 1.0
1 1.0 1.0 1.0 1.0
2 1.0 1.0 1.0 1.0
# 数学运算
df = pd.DataFrame(np.ones((3,3)), columns=["a", "b", "c"])
print(df,"\n----------------------------")
#add, sub, mul, div, mod, pow: +, -, *, /, //, %, **
print(df.add(df))
print(df.sub(df))
print(df.mul(df))
print(df.div(df))
a b c
0 1.0 1.0 1.0
1 1.0 1.0 1.0
2 1.0 1.0 1.0
----------------------------
a b c
0 2.0 2.0 2.0
1 2.0 2.0 2.0
2 2.0 2.0 2.0
a b c
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
a b c
0 1.0 1.0 1.0
1 1.0 1.0 1.0
2 1.0 1.0 1.0
a b c
0 1.0 1.0 1.0
1 1.0 1.0 1.0
2 1.0 1.0 1.0
# 在一定范围内的数的个数统计
x = [0,2,9,2,5,6,2]
plt.hist(x)
plt.show()
# hist()绘制直方图
plt.hist(train_data["hectare_squirrel_number"])
plt.show()
plt.figure(0)
plt.title("hectare_squirrel_number")
plt.xlabel("squirrel")
plt.ylabel("hectare")
ax = train_data["hectare_squirrel_number"].hist(bins=30, grid=True, color="green")
print(ax.patches)#使用patchespatches查看plot的输出的一系列图块
for rect in ax.patches:
if rect.get_x() >= 10:
rect.set_color("blue")
plt.show()
<Axes.ArtistList of 30 patches>
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。