上周我们学习了可视化工具Matplotlib包的基础画图,今天我们来进一步优化。
1.Pandas Plot
Pandas整合了Matplotlib的画图功能,可以直接在Series,Dataframe上用plot()方法进行绘图,具体请参考:
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.plot.html
导入包:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
举个栗子:
births = pd.read_csv('births.csv')
quartiles = np.percentile(births['births'], [25, 50, 75])
mu, sig = quartiles[1], 0.74 * (quartiles[2] - quartiles[0])
births = births.query('(births > @mu - 5 * @sig) & (births
births['day'] = births['day'].astype(int)
births.index = pd.to_datetime(10000 * births.year +100 * births.month +births.day, format='%Y%m%d')
births_by_date = births.pivot_table('births',
[births.index.month, births.index.day])
births_by_date.index = [pd.datetime(2012, month, day)
for (month, day) in births_by_date.index]
fig, ax = plt.subplots(figsize=(12, 4))
births_by_date.plot(ax=ax);
## http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.plot.html
fig, ax = plt.subplots(2,1,figsize=(8, 10))
births.groupby('year').births.sum().plot(kind='pie',x='year',y='births',ax=ax[0])
births.groupby('month').births.sum().plot(kind='pie',x='year',y='births',ax=ax[1])
births.plot(subplots=True)
2.Seaborn
##密度曲线
for col in 'xy':
sns.kdeplot(data[col], shade=True)
sns.distplot(data['x'])
sns.distplot(data['y']);##直方图与密度曲线同时显示
iris = sns.load_dataset("iris")
sns.pairplot(iris, hue='species', size=2.5); ##hue最终显示的标签颜色
tips = sns.load_dataset('tips') ##小费的数据集
tips['tip_pct'] = 100 * tips['tip'] / tips['total_bill']
grid = sns.FacetGrid(tips, row="sex", col="time", margin_titles=True)
grid.map(plt.hist, "tip_pct", bins=np.linspace(0, 40, 15));
## 显示x/y的直方图,右上角为相关系数
with sns.axes_style('white'):
sns.jointplot("total_bill", "tip", data=tips, kind='hex')
DIG