复现文章信息:
文章题目:Single-cell analysis reveals prognostic fibroblast subpopulations linked to molecular and immunological subtypes of lung cancer 期刊:Nature Communications 日期:2023年1月31日 DOI: 10.1038/s41467-023-35832-6
轨迹推断识别与肺泡或外膜成纤维细胞向肌成纤维细胞转分化相关的共有基因模块
代码如下:
library(Seurat)
library(ggplot2)
library(WGCNA)
library(tidyverse)
library(ggpubr)
library(ggsci)
library(destiny)
library(metap)
library(pheatmap)
library(xlsx)
library(ggplot2)
library(ggfittext)
data_directory <- "H:\\文献复现\\6"
source(paste0(data_directory, "0_NewFunctions.R"))
load(paste0(data_directory, "Trajectory Analysis results.Rdata"))
load(paste0(data_directory, "IntegratedFibs_Zenodo.Rdata"))
load(paste0(data_directory, "MxIHC_Zenodo.Rdata"))
REACTOME<-read.xlsx("41467_2023_35832_MOESM16_ESM.xlsx",
as.data.frame=T,
header=T)
#在文章中的Supplementary Data 13可以下载
Fibs.integrated <- AddMetaData(Fibs.integrated, DPT_output)
A.M_pT_res_stats2$Gene <- rownames(A.M_pT_res_stats2)
T.M_pT_res_stats2$Gene <- rownames(T.M_pT_res_stats2)
探究从肺泡和外膜成纤维细胞亚群(在对照组织中富集)到肌成纤维细胞(在肿瘤组织中富集)的分化过程,对scRNA-seq数据集进行轨迹推断。结果表明外膜和肺泡成纤维细胞可以作为独立的祖细胞,肌成纤维细胞可以从这些祖细胞转分化
pA <- ggplot(DM_dim_data, aes(x = DC1, y = DC2, colour = Fibs_subpopulation)) +
theme_pubr(base_size = 10) +
theme(axis.text = element_blank(), legend.title = element_blank(), legend.position = "right") +
geom_point(size = 0.1) +
scale_color_manual(values = Fibs_col.palette) +
guides(colour = guide_legend(override.aes = list(size = 2)))
#突出显示分配给每个扩散拟时序(DPT)分支的细胞
pB <- ggplot(DM_dim_data, aes(x = DC1, y = DC2, colour = factor(dpt_branch))) +
theme_pubr(base_size = 10) +
theme(axis.text = element_blank(), legend.position = "right") +
geom_point(size = 0.1) +
scale_color_brewer(name = "DPT branch") +
guides(colour = guide_legend(override.aes = list(size = 2), title.position = "top", title.hjust = 0.5))
#对细胞进行拟时序排序,代表它们向肌成纤维细胞表型的相对进展
pC <- ggplot(DM_dim_data, aes(x = DC1, y = DC2, colour = All_pT)) +
theme_pubr(base_size = 10) +
theme(axis.text = element_blank(), legend.key.width = unit(5, "pt"),
legend.position = "right", legend.key.height = unit(10, "pt")) +
geom_point(size = 0.1) +
scale_color_viridis_c(name = "Pseudotime") +
guides(colour = guide_colourbar(title.position = "top", title.hjust = 0.5))
Fig_4ABC <- ggarrange(pA,NULL,pB,NULL,pC, ncol = 5, widths = c(1,0.1,1,0.1), align = "h",
labels = c("a", "", "b", "", "c"))
Fig_4ABC
names(A.M_pT_res)
datExpr_Full <- list()
for(i in names(A.M_pT_res)){
datExpr_Full[[i]]<- t(A.M_pT_res[[i]]$pT_fit)
}
sig_genes <- as.character(na.omit(rownames(A.M_pT_res_stats2)[A.M_pT_res_stats2$meta_p_adj.BH < 1e-10]))
length(sig_genes)
datExpr_CTR.TUMOUR <- list()
for(i in names(datExpr_Full)[c(1:4,7,8)]){
datExpr_CTR.TUMOUR[[i]] <- datExpr_Full[[i]][, sig_genes]
}
datExpr_CTR.TUMOUR_median <-
Reduce(pmedian, datExpr_CTR.TUMOUR)
fit <- hclust(as.dist(1-cor(datExpr_CTR.TUMOUR_median)), method="ward.D2")
plot(fit)
groups <- cutree(fit, k=4)
pheatmap(t(datExpr_CTR.TUMOUR_median[, order(groups)]), scale = "row",
cluster_cols = F,
cluster_rows = F,
clustering_distance_rows = "correlation",
clustering_method = "ward.D2",
show_rownames = F,
annotation_row = data.frame(
row.names = sig_genes,
cluster = as.factor(groups)
))
rowMatch <- match(rownames(A.M_pT_res_stats2) ,sig_genes)
A.M_pT_res_stats2$Cluster <- as.factor(groups)[rowMatch]
A.M_pT_res_stats2$Module <- factor(
A.M_pT_res_stats2$Cluster,
levels = c(1:4),
labels = c("Progenitor","Differentiation","Transient","Proto.Differentiation")
)
levels(A.M_pT_res_stats2$Module)
A.M_pT_res_stats2$Module <- factor(A.M_pT_res_stats2$Module, levels = c("Progenitor", "Transient", "Proto.Differentiation", "Differentiation"))
gene_order <- factor(
groups,
levels = c(1:4),
labels = c("Progenitor","Differentiation","Transient","Proto.Differentiation")
)
gene_order <- factor(gene_order, levels = c("Progenitor", "Transient", "Proto.Differentiation", "Differentiation"))
Mod_cols <- RColorBrewer::brewer.pal(5, "Set1")
names(Mod_cols) <- levels(A.M_pT_res_stats2$Module)
热图显示DPT中肺泡成纤维细胞进展为肌成纤维细胞时差异表达的基因。用层次聚类法将这些基因分组到DPT表达谱定义的模块中
AM.PT_TUMOUR_Heatmap <-
pheatmap(t(datExpr_CTR.TUMOUR_median)[order(gene_order), ], scale = "row",
cluster_cols = F, cluster_rows = F,
breaks = seq(-3,3,length=101),
show_rownames = F, show_colnames = F,
legend = F,
annotation_legend = F,
fontsize = 7,
annotation_names_row = F,
annotation_names_col = F,
gaps_row = c(
sum(as.numeric(gene_order) == 1, na.rm = T),
sum(as.numeric(gene_order) %in% c(1:2), na.rm = T),
sum(as.numeric(gene_order) %in% c(1:3), na.rm = T),
sum(as.numeric(gene_order) %in% c(1:4), na.rm = T)
),
annotation_row = data.frame(
row.names = A.M_pT_res_stats2$Gene[A.M_pT_res_stats2$Gene %in% sig_genes],
Module = A.M_pT_res_stats2$Module[A.M_pT_res_stats2$Gene %in% sig_genes]
),
annotation_col = data.frame(
row.names = rownames(datExpr_CTR.TUMOUR_median),
Pseudotime = as.numeric(rownames(datExpr_CTR.TUMOUR_median))
),
annotation_colors = list(
Pseudotime = viridis::viridis(3),
Module = Mod_cols
)
)
AM.PT_TUMOUR_Heatmap
DC_plotting.df <- data.frame(
DM_dim_data,
Fibs.integrated@meta.data
)
names(DC_plotting.df)
AM_pT_plot <-
DC_plotting.df %>%
ggplot(aes(x = DC1, y = DC2, colour = A.M_pT)) +
geom_point(size = 0.1) +
theme_void(base_size = 10) +
theme(legend.position = c(0.1,1), legend.justification = c("left", "top"),
legend.direction = "horizontal",
legend.key.width = unit(8, "pt"), legend.key.height = unit(2, "pt")) +
guides(colour = guide_colourbar(title.position="top", title.hjust = 0.5)) +
scale_color_viridis_c(name = "DPT", na.value = "grey80")
AM_pT_plot
Fig_4D <- ggarrange(
ggarrange(NULL, AM_pT_plot, ncol = 2),
AM.PT_TUMOUR_Heatmap$gtable,
nrow = 2, heights = c(2.5,6.5)
)
Fig_4D
names(T.M_pT_res)
datExpr_Full <- list()
for(i in names(T.M_pT_res)){
datExpr_Full[[i]]<- t(T.M_pT_res[[i]]$pT_fit)
}
sig_genes <- as.character(na.omit(rownames(T.M_pT_res_stats2)[T.M_pT_res_stats2$meta_p_adj.BH < 1e-10]))
length(sig_genes)
datExpr_CTR.TUMOUR <- list()
for(i in names(datExpr_Full)[c(1:4,7,8)]){
datExpr_CTR.TUMOUR[[i]] <- datExpr_Full[[i]][, sig_genes]
}
datExpr_CTR.TUMOUR_median <-
Reduce(pmedian, datExpr_CTR.TUMOUR)
fit <- hclust(as.dist(1-cor(datExpr_CTR.TUMOUR_median)), method="ward.D2")
plot(fit) # display dendogram
groups <- cutree(fit, k=4)
pheatmap(t(datExpr_CTR.TUMOUR_median[, order(groups)]), scale = "row",
cluster_cols = F,
cluster_rows = F,
clustering_distance_rows = "correlation",
clustering_method = "ward.D2",
#cutree_rows = 4,
show_rownames = F,
annotation_row = data.frame(
row.names = sig_genes,
cluster = as.factor(groups)
))
rowMatch <- match(rownames(T.M_pT_res_stats2) ,sig_genes)
T.M_pT_res_stats2$Cluster <- as.factor(groups)[rowMatch]
T.M_pT_res_stats2$Module <- factor(
T.M_pT_res_stats2$Cluster,
levels = c(1:4),
labels = c("Proto.Differentiation","Progenitor","Transient","Differentiation")
)
levels(T.M_pT_res_stats2$Module)
T.M_pT_res_stats2$Module <- factor(T.M_pT_res_stats2$Module, levels = c("Progenitor", "Transient", "Proto.Differentiation", "Differentiation"))
gene_order <- factor(
groups, levels = c(1:4),
labels = c("Proto.Differentiation","Progenitor","Transient","Differentiation")
)
gene_order <- factor(gene_order, levels = c("Progenitor", "Transient", "Proto.Differentiation", "Differentiation"))
Mod_cols <- RColorBrewer::brewer.pal(5, "Set1")
names(Mod_cols) <- levels(T.M_pT_res_stats2$Module)
Heatmap_df <- t(datExpr_CTR.TUMOUR_median)[order(gene_order), ]
Heatmap_df <- Heatmap_df[!is.na(gene_order[order(gene_order)]), ]
dim(Heatmap_df)
热图显示DPT中外膜成纤维细胞进展为肌成纤维细胞时差异表达的基因。用层次聚类法将这些基因分组到DPT表达谱定义的模块中
TM.PT_TUMOUR_Heatmap <-
pheatmap(Heatmap_df, scale = "row",
cluster_cols = F, cluster_rows = F,
breaks = seq(-3,3,length=101),
show_rownames = F, show_colnames = F,
legend = F,
annotation_legend = F,
fontsize = 7,
annotation_names_row = F,
annotation_names_col = F,
gaps_row = c(
sum(as.numeric(gene_order) == 1, na.rm = T),
sum(as.numeric(gene_order) %in% c(1:2), na.rm = T),
sum(as.numeric(gene_order) %in% c(1:3), na.rm = T),
sum(as.numeric(gene_order) %in% c(1:4), na.rm = T)
),
annotation_row = data.frame(
row.names = T.M_pT_res_stats2$Gene[T.M_pT_res_stats2$Gene %in% sig_genes],
Module = T.M_pT_res_stats2$Module[T.M_pT_res_stats2$Gene %in% sig_genes]
),
annotation_col = data.frame(
row.names = rownames(datExpr_CTR.TUMOUR_median),
Pseudotime = as.numeric(rownames(datExpr_CTR.TUMOUR_median))
),
annotation_colors = list(
Pseudotime = viridis::viridis(3),
Module = Mod_cols
)
)
TM_pT_plot <-
DC_plotting.df %>%
ggplot(aes(x = DC1, y = DC2, colour = T.M_pT)) +
geom_point(size = 0.1) +
theme_void(base_size = 10) +
theme(legend.position = c(0.1,1), legend.justification = c("left", "top"),
legend.direction = "horizontal",
legend.key.width = unit(8, "pt"), legend.key.height = unit(2, "pt")) +
guides(colour = guide_colourbar(title.position="top", title.hjust = 0.5)) +
scale_color_viridis_c(name = "DPT", na.value = "grey80")
TM_pT_plot
Fig_4E <- ggarrange(
ggarrange(NULL, TM_pT_plot, ncol = 2),
TM.PT_TUMOUR_Heatmap$gtable,
nrow = 2, heights = c(2.5,6.5)
)
Fig_4E
这鉴定了两条轨迹上的四个模块,代表转分化的不同阶段:祖细胞、早期激活、原分化和分化
损失曲线图显示了由对照和肿瘤样本或仅对照组织样本组成的数据集中每个共有DPT模块的表达谱。共有模块由在肺泡成纤维细胞至肌成纤维细胞和外膜成纤维细胞至肌成纤维细胞轨迹中分配到相同簇的基因组成,列出了每个模块的10个代表性基因
Mod.Overlap_df <- cbind(A.M_pT_res_stats2[,c("Gene", "Module", "meta_p_adj.BH")],
T.M_pT_res_stats2[,c("Gene", "Module", "meta_p_adj.BH")])
names(Mod.Overlap_df) <- c("GeneA", "Alv2M_Module", "Alv2M_adjP", "GeneB", "Adv2M_Module", "Adv2M_adjP")
Core.Mods <- Mod.Overlap_df[Mod.Overlap_df$Alv2M_Module == Mod.Overlap_df$Adv2M_Module, ]
Module.Genes_list <- list()
for(i in levels(Core.Mods$Alv2M_Module)) {
Module.Genes_list[[i]] <- as.character(na.omit(Core.Mods$GeneA[Core.Mods$Alv2M_Module == i]))
}
names(Fibs.integrated@meta.data)
DefaultAssay(Fibs.integrated) <- "RNA"
Fibs.integrated <-
AddModuleScore(Fibs.integrated,
features = Module.Genes_list)
names(Fibs.integrated@meta.data)
names(Fibs.integrated@meta.data)[grep("^Cluster", names(Fibs.integrated@meta.data))] <-
paste0("Core_", levels(T.M_pT_res_stats2$Module))
pT_df <- data.frame(
Fibs.integrated@meta.data[, c("Sample.type", "All_pT", "dpt_branch", "Fibs_MajorClusters", "Dataset", "Sample.Subtype",
paste("Core", levels(T.M_pT_res_stats2$Module), sep = "_"))]
)
variables = paste("Core", levels(T.M_pT_res_stats2$Module), sep = "_")
pT_df_long <- reshape2::melt(pT_df[, c("All_pT", "Sample.type", "Dataset",variables)],
id.vars = c("All_pT","Sample.type", "Dataset"), na.rm = T)
levels(pT_df_long$variable)
pT_df_long$Dataset_ordered <-
factor(pT_df_long$Dataset, levels = levels(pT_df_long$Dataset) )
pT_df_long$Dataset_type <-
factor(pT_df_long$Dataset_ordered, levels = levels(pT_df_long$Dataset_ordered),
labels = c("CTR & Tumour", "CTR Only", "CTR Only", rep("CTR & Tumour", 6)))
names(pT_df_long)
levels(pT_df_long$variable)
Mod_cols2 <- Mod_cols
names(Mod_cols2) <- levels(pT_df_long$variable)
pT_df_long$Mod.label <- factor(pT_df_long$variable,
labels = c("Progenitor", "Early-activation",
"Proto-differentiation", "Differentiation"))
Fig_4F <-
pT_df_long[!is.na(pT_df_long$variable), ] %>%
ggplot(aes(x = All_pT, y = value, colour = variable)) +
facet_grid(Mod.label~Dataset_type, scales = "free_y") +
geom_smooth(method = "loess") +
guides(colour = guide_legend(override.aes = list(size = 2))) +
theme_bw(base_size = 15) +
theme(legend.position = "none", panel.grid = element_blank(),
strip.text = element_text(size = 10)) +
xlab("Pseudotime") + ylab("Module Score") +
scale_color_manual(values = Mod_cols2)
Fig_4F
箱形图显示按组织类型和拟时序五分位数分组的细胞中共有DPT模块的表达
Fibs.integrated$All_pT_5cat <- factor(Hmisc::cut2(Fibs.integrated$All_pT, g = 5),
labels = paste0("pT q", 1:5))
#Early−activation模块
test <- aggregate(x =Fibs.integrated@meta.data,
by =Core_Transient ~ All_pT_5cat + SampleID + Sample.type,
FUN = median)
Transient.boxplot <-
test %>%
drop_na(All_pT_5cat) %>%
ggplot(aes(x = Sample.type, y = Core_Transient, fill = Sample.type)) +
theme_pubr(base_size = 15) +
theme(axis.text.x = element_blank()) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5, size = 0.1) +
facet_grid(~All_pT_5cat) +
ylab("Early-activation Module\n(Normalised exprs)") +
stat_compare_means(comparisons = list(c("Control", "Tumour")),
size = 2.5,
bracket.size = 0.1, tip.length = 0.01,
label.y = max(test$Core_Transient)+0.1,
#vjust = 0.5,
hide.ns = T)+
expand_limits(y = 2)
#Proto−Differentation模块
test <- aggregate(x =Fibs.integrated@meta.data,
by =Core_Proto.Differentiation ~ All_pT_5cat + SampleID + Sample.type,
FUN = median)
PD.boxplot <-
test %>%
drop_na(All_pT_5cat) %>%
ggplot(aes(x = Sample.type, y = Core_Proto.Differentiation, fill = Sample.type)) +
theme_pubr(base_size = 15) +
theme(axis.text.x = element_blank()) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5, size = 0.1) +
facet_grid(~All_pT_5cat) +
ylab("Proto-Differentation Module\n(Normalised exprs)") +
stat_compare_means(comparisons = list(c("Control", "Tumour")),
size = 2.5,
bracket.size = 0.1, tip.length = 0.01,
label.y = max(test$Core_Proto.Differentiation)+0.1,
#vjust = 0.5,
hide.ns = T)+
expand_limits(y = 2)
#Differentation模块
test <- aggregate(x =Fibs.integrated@meta.data,
by =Core_Differentiation ~ All_pT_5cat + SampleID + Sample.type,
FUN = median)
Differentiation.boxplot <-
test %>%
drop_na(All_pT_5cat) %>%
ggplot(aes(x = Sample.type, y = Core_Differentiation, fill = Sample.type)) +
theme_pubr(base_size = 15) +
theme(axis.text.x = element_blank()) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5, size = 0.1) +
facet_grid(~All_pT_5cat) +
ylab("Differentation Module\n(Normalised exprs)") +
stat_compare_means(comparisons = list(c("Control", "Tumour")),
size = 2.5,
bracket.size = 0.1, tip.length = 0.01,
label.y = max(test$Core_Differentiation)+0.1,
hide.ns = T)+
expand_limits(y = 1.5)
Fig_4G <-
ggarrange(
Transient.boxplot,
PD.boxplot,
Differentiation.boxplot,
nrow = 3, common.legend = T, legend = "right")
Fig_4G
条形图显示每个共识DPT模块的REACTOME途径富集结果
REACTOME<-REACTOME[c(5,1,18,49,59,58),]
REACTOME$"Pathway genes in Consensus Module (%)"<-(REACTOME$Coverage.pct)*100
REACTOME<-REACTOME[rev(rownames(REACTOME)),]
col<-rep(c("#984EA3","#4DAF4A","#377EB8"),each = 2)
names(col)<-REACTOME$Name
ggplot(REACTOME, aes(x=factor(Name,levels=REACTOME$Name), y=`Pathway genes in Consensus Module (%)`,
fill = Name,label=paste0(REACTOME$Name," (adj.P=", signif(REACTOME$q.value.FDR.B.H, 3),") ")))+
geom_bar(stat = "identity", position = "dodge",color = "black")+
labs(x="REACTOME pathway", y="Pathway genes in Consensus Module (%)", fill="Module") +
theme_bw() +
theme(axis.text.y = element_blank())+
coord_flip()+
scale_fill_manual(values = col)+theme(legend.position = "none")+
geom_bar_text(position="stack",
place="left",
color="white",
min.size = 10,
reflow = TRUE)+
scale_y_continuous(expand = c(0,0),
limits = c(0,55))+
scale_x_discrete(expand=c(0,0))
线图显示了通过mxIHC的每个成纤维细胞亚群的配对肿瘤和相邻正常组织之间的平均HSPA1A表达水平
Fibs_HSPA1A.agg <- aggregate(
MxIHC_Fibroblasts$HSPA1A,
by = list(Sample = MxIHC_Fibroblasts$Slide.name,
Subpop = MxIHC_Fibroblasts$Cell.type,
TvN = MxIHC_Fibroblasts$Sample.Region.TvN,
Subtype = MxIHC_Fibroblasts$Subtype,
Inflam.Fibro = MxIHC_Fibroblasts$Control...Fibrosis.Inflammation,
exclusion = MxIHC_Fibroblasts$Include.Normal.95pct),
median)
Fig_4I <-
Fibs_HSPA1A.agg %>%
drop_na(TvN) %>%
filter(!Sample == "Drop-Seq Cohort 1") %>%
ggplot(aes(x = TvN, y = x)) +
theme_pubr(base_size = 15) +
theme(legend.position = "none", axis.title.x = element_blank()) +
facet_grid(~Subpop) +
geom_point(size = 0.1, aes(colour = Subpop)) +
geom_line(aes(group = Sample, colour = Subpop)) +
scale_colour_manual(values = Fibs_col.palette) +
rotate_x_text(angle = 45) +
ylab("HSPA1A\n(MxIHC Median per cell exprs)") +
stat_compare_means(comparisons = list(c("Control", "Tumour")), paired = T,
size = 4,
method = "wilcox", label.y = 0.18, tip.length = 0.01)+
expand_limits(y = 0.2)
Fig_4I
这些结果表明了两种成纤维细胞亚群(外膜和肺泡)都可以作为肌成纤维细胞的组织驻留祖细胞。这些数据还表明,无论祖细胞亚群如何,转分化过程都重要:其涉及炎症基因上调的短暂阶段,独立于肿瘤的相互作用关系;随后是涉及热休克反应信号的原始分化,通过与肿瘤的相互作用而增加;最终导致完全分化的肌成纤维细胞表型,其ECM组织和胶原形成能力增加,这在肿瘤相关刺激下显著增强。