# Python实践：seaborn的散点图矩阵（Pairs Plots）可视化数据

Seaborn的散点图矩阵（Pairs Plots）

# Seaborn visualization libraryimport seaborn as sns# Create the default pairplotsns.pairplot(df)

# Take the log of population and gdp_per_capitadf['log_pop'] = np.log10(df['pop'])df['log_gdp_per_cap'] = np.log10(df['gdp_per_cap'])# Drop the non-transformed columnsdf = df.drop(columns = ['pop', 'gdp_per_cap'])

sns.pairplot(df, hue = 'continent')

# Create a pair plot colored by continent with a density plot of the # diagonal and format the scatter plots.sns.pairplot(df, hue = 'continent', diag_kind = 'kde',
plot_kws = {'alpha': 0.6, 's': 80, 'edgecolor': 'k'},
size = 4)

# Plot colored by continent for years 2000-2007sns.pairplot(df[df['year'] >= 2000],
vars = ['life_exp', 'log_pop', 'log_gdp_per_cap'],
hue = 'continent', diag_kind = 'kde',
plot_kws = {'alpha': 0.6, 's': 80, 'edgecolor': 'k'},
size = 4);# Title plt.suptitle('Pair Plot of Socioeconomic Data for 2000-2007',
size = 28);

### 使用PairGrid进行自定义

sns.pairplot函数相反，sns.PairGrid是一个类，它意味着它不会自动填充我们的网格plot。相反，我们创建一个类实例，然后将特定函数映射到网格的不同部分。要用我们的数据创建一个PairGrid实例，我们使用下面的代码，这也限制了我们将显示的变量：

# Create an instance of the PairGrid class.grid = sns.PairGrid(data= df_log[df_log['year'] == 2007],
vars = ['life_exp', 'log_pop',
'log_gdp_per_cap'], size = 4)

# Map a scatter plot to the upper trianglegrid = grid.map_upper(plt.scatter, color = 'darkred')

# Map a histogram to the diagonalgrid = grid.map_diag(plt.hist, bins = 10, color = 'darkred',
edgecolor = 'k')# Map a density plot to the lower trianglegrid = grid.map_lower(sns.kdeplot, cmap = 'Reds')

# Function to calculate correlation coefficient between two arraysdef corr(x, y, **kwargs):
# Calculate the value
coef = np.corrcoef(x, y)[0][1]
# Make the label
label = r'$\rho$ = ' + str(round(coef, 2))
# Add the label to the plot
ax = plt.gca()
ax.annotate(label, xy = (0.2, 0.95), size = 20, xycoords = ax.transAxes)# Create a pair grid instancegrid = sns.PairGrid(data= df[df['year'] == 2007],
vars = ['life_exp', 'log_pop', 'log_gdp_per_cap'], size = 4)# Map the plots to the locationsgrid = grid.map_upper(plt.scatter, color = 'darkred')grid = grid.map_upper(corr)grid = grid.map_lower(sns.kdeplot, cmap = 'Reds')grid = grid.map_diag(plt.hist, bins = 10, edgecolor =  'k', color = 'darkred');

87 篇文章9 人订阅

0 条评论