既往内容基于Python绘制了纵向、横向和比例条形图(柱状图):https://mp.weixin.qq.com/s/n_JCFlg0_FaoQJEmrrWu9A。
本次基于Python绘制一下火山图,R语言版本的差异分析及火山图绘制也在既往的推文中出现过:https://mp.weixin.qq.com/s/L98KtJH5-l4mF0n9dOIsxQ
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scanpy as sc
import numpy as np
import anndata as ad
import pooch
import os
# 确定位置
os.getcwd()
# 读取数据
adata = sc.read_h5ad('./scRNA_V5.h5ad')
adata.obs
adata.obs["celltype"].unique()
Tgroup = ["sample1", "sample3", "sample5"]
# np.where有点像ifelse
adata.obs['type'] = np.where(adata.obs['orig.ident'].isin(Tgroup), 'Tumor', 'Normal')
选择EGFR、CDK4和CDK6基因进行探查
mycolors = {
'B-cells':"#326795",
"CD8+ T-cells":"#aec0db",
"CD4+ T-cells":"#da7a31",
"Adipocytes":"#e9b581",
"Neutrophils":"#318736",
"Fibroblasts":"#94ce8c",
"Monocytes": "#b63b3a",
"Endothelial cells": "#f4c542",
"NK cells": "#e71d36"
}
width_cm = 60
height_cm = 10
plt.figure(figsize=(width_cm / 2.54, height_cm / 2.54))
genes = ["EGFR", "CDK4", "CDK6"]
types = adata.obs["type"].unique()
# 每个 type 生成一个小提琴图
for t in types:
# 筛选子集数据
sub_adata = adata[adata.obs["type"] == t]
# 创建图形
width_cm = 20
height_cm = 8
plt.figure(figsize=(width_cm / 2.54, height_cm / 2.54))
sc.pl.violin(
sub_adata,
genes,
groupby="celltype",
palette=mycolors, # 这里传入颜色映射字典
stripplot=False,
inner="box",
show=False,
)
fig = plt.gcf()
for ax in fig.axes:
ax.tick_params(axis='x', labelsize=12, rotation=45)
ax.tick_params(axis='y', labelsize=12)
ax.xaxis.label.set_size(14)
ax.yaxis.label.set_size(14)
ax.title.set_fontsize(16)
plt.suptitle(f"Gene Expression in Type: {t}", fontsize=16, y=1.02, ha='center')
plt.tight_layout()
plt.savefig(f"violin_{t}.pdf", dpi=300, bbox_inches='tight')
plt.show()
# 对type做一下差异分析
sc.tl.rank_genes_groups(adata,
groupby="type",
method="wilcoxon",
pts=True)
import adjustText
# 提取type细胞的差异分析结果并构建DataFrame
df = pd.DataFrame({
'gene': adata.uns['rank_genes_groups']['names']['Tumor'],
'log2FC': adata.uns['rank_genes_groups']['logfoldchanges']['Tumor'], # log2 fold change
'pval': adata.uns['rank_genes_groups']['pvals']['Tumor'], # p值
'pvals_adj': adata.uns['rank_genes_groups']['pvals_adj']['Tumor'] # 校正后的p值
})
# 数据整理:计算-log10(pval)
df['-log10(pval)'] = -np.log10(df['pval'])
# 提取表达百分比数据(pct)
pts = pd.DataFrame({
'pct.1': adata.uns['rank_genes_groups']['pts']['Tumor'], # CD4 T中的表达百分比
'pct.2': adata.uns['rank_genes_groups']['pts']['Normal'] # CD8 T中的表达百分比
}).reset_index().rename(columns={'index': 'gene'}) # 重置索引并将索引列命名为'gene'
# 合并差异分析结果和表达百分比数据
df = df.merge(pts, on='gene')
# 在绘图代码前添加全局设置(影响所有文本元素)
plt.rcParams.update({
'font.size': 12, # 全局基础字体大小
'axes.titlesize': 16, # 标题字体大小
'axes.labelsize': 14, # 坐标轴标签大小
'xtick.labelsize': 12, # X轴刻度大小
'ytick.labelsize': 12, # Y轴刻度大小
'legend.fontsize': 12, # 图例字体大小
'legend.title_fontsize': 14 # 图例标题大小
})
# 火山图绘制
from adjustText import adjust_text # 用于标签防重叠
from matplotlib.patches import Patch
pvals = 0.05
log_FC = 1
dif = 0.2
# 假设 df 是之前得到的差异分析结果DataFrame
# 添加差异百分比列(模拟R代码中的Difference)
df['Difference'] = df['pct.1'] - df['pct.2']
# 定义基因变化方向(模拟R代码中的change列)
df['change'] = np.where(
(df['pvals_adj'] < pvals) & (df['log2FC'] > log_FC), 'up',
np.where(
(df['pvals_adj'] < pvals) & (df['log2FC'] < -log_FC), 'down',
'ns'
)
)
# 统计上下调基因数量(用于图例)
up_count = sum(df['change'] == 'up')
down_count = sum(df['change'] == 'down') # 注意:原R代码中ns包含非显著基因
# 设置绘图风格
sns.set_style("white")
plt.figure(figsize=(9, 9))
# 绘制散点图(核心火山图)
scatter = sns.scatterplot(
data=df,
x="Difference",
y="log2FC",
hue="change",
palette={
'down': '#0d1b46', # 深蓝色
'ns': 'grey',
'up': 'tomato' # 红色
},
s=15, # 点大小
alpha=0.7,
edgecolor=None
)
# 添加标签
texts = []
for _, row in df.iterrows():
if (row['pvals_adj'] <= pvals) and (
(row['log2FC'] >= log_FC and row['Difference'] >= dif) or
(row['log2FC'] <= -log_FC and row['Difference'] <= -dif)
):
texts.append(plt.text(
row['Difference'],
row['log2FC'],
row['gene'],
fontsize=10,
color='black'
))
# 自动调整标签位置(防重叠)
adjust_text(
texts,
arrowprops=dict(arrowstyle='-', color='black', lw=1.2, alpha=0.8),
expand_points=(1.4, 1.4), # 加大扩展距离,减少重叠
force_text=0.8,
force_points=0.5,
lim=100 # 最大调整迭代次数
)
# 添加参考线
plt.axvline(0, linestyle='--', color='black', alpha=0.7, linewidth=1.5, zorder=1)
plt.axhline(0, linestyle='--', color='black', alpha=0.7, linewidth=1.5, zorder=1)
# 设置图例(包含计数)
custom_legend = [
Patch(facecolor='tomato', label=f'up ({up_count})'),
Patch(facecolor='grey', label='ns'),
Patch(facecolor='#0d1b46', label=f'down ({down_count})')
]
plt.legend(
handles=custom_legend,
title='Change',
loc='center left',
bbox_to_anchor=(1, 0.5),
frameon=True
)
# 坐标轴标签
plt.xlabel('Percentage difference (pct.1 - pct.2)')
plt.ylabel('Log2 Fold Change')
# 增加title
plt.title('Tumor vs. Normal volcano', fontsize=18, weight='bold')
# 保存图像
plt.savefig('volcanoplot1.pdf', bbox_inches='tight', dpi=300)
plt.show()
注:若对内容有疑惑或者有发现明确错误的朋友,请联系后台(欢迎交流)。更多相关内容可关注公众号:生信方舟 。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。