复现文章信息:
文章题目:Single-cell analysis reveals prognostic fibroblast subpopulations linked to molecular and immunological subtypes of lung cancer 期刊:Nature Communications 日期:2023年1月31日 DOI: 10.1038/s41467-023-35832-6
多重IHC(mxIHC)和 CIBERSORT显示成纤维细胞亚群占据空间离散的小生境和不同的NSCLC组织亚型富集
在非小细胞肺癌的对照肺组织中,上皮和肺泡成纤维细胞亚群富集,并被肌成纤维细胞所取代。此外,NSCLC亚型之间的成纤维细胞亚群丰度也不同,LUAD肿瘤在三个亚群之间表现出更大的异质性,而LUSC肿瘤始终具有高水平的肌成纤维细胞。
library(tidyverse)
library(ggsci)
library(ggplot2)
library(ggpubr)
library(Hmisc)
library(Seurat)
data_directory <- "H:\\文献复现\\6\\"
source(paste0(data_directory, "0_NewFunctions.R"))
load(paste0(data_directory, "MxIHC_Zenodo.Rdata"))
load(paste0(data_directory, "IntegratedFibs_Zenodo.Rdata"))
load(paste0(data_directory, "CIBERSORT_Zenodo.Rdata"))
load(paste0(data_directory, "BulkData_Zenodo.Rdata"))
散点图显示了推定亚群标志物的差异表达统计量,突出显示了适用于mxIHC分析的标志物
分别选择CD34、AOC3和POSTN或ACTA2 (α-SMA)作为内皮细胞、肺泡细胞和肌成纤维细胞的最佳IHC标记
fibs_names <- c("Adventitial", "Alveolar", "Myo")
names(All.Markers)
All.Markers$cluster <- factor(
All.Markers$cluster,
levels = levels(All.Markers$cluster)[3:1]
)
All.Markers$label <- All.Markers$gene
All.Markers$label[All.Markers$p_val_adj.Sample > 0.0001 | All.Markers$log2FC.Sample < 0.75 |
!All.Markers$HPA_reliability == "Enhanced" |
!All.Markers$Detected.Intracellular == T |
is.na(All.Markers$HPA_reliability) |
is.na(All.Markers$Detected.Intracellular)] <- NA
All.Markers$label2 <- All.Markers$gene
All.Markers$label2[!All.Markers$gene %in% c("AOC3", "CD34", "POSTN", "ACTA2")] <- NA
All.Markers$label[All.Markers$label %in% c("AOC3", "CD34", "POSTN", "ACTA2")] <- NA
Fig_3A <-
All.Markers[All.Markers$p_val_adj.Sample < 0.01, ] %>%
drop_na(p_val_adj.Sample) %>%
ggplot(aes(x = log2FC.Sample, y = -log10(p_val_adj.Sample) ,
size = pct.1,
color = HPA_reliability == "Enhanced" & Detected.Intracellular == T &
!is.na(HPA_reliability) & !is.na(Detected.Intracellular))) +
theme_pubr(base_size = 7) +
geom_point() +
scale_size(range = c(0.1,2), name = "Detection rate") +
scale_color_manual(name = "IHC QC passed", values = c("grey70", "red")) +
facet_wrap(~cluster, ncol = 3) +
ggrepel::geom_text_repel(aes(label = label), colour = "black", min.segment.length = 0, size = 2, nudge_x = 0.2, fontface = "italic") +
ggrepel::geom_label_repel(aes(label = label2), colour = "black", label.padding = 0.15,
min.segment.length = 0, size = 3, nudge_x = 0.2, fontface = "italic") +
theme(legend.position = "right") +
ylab("Sample level -log10(adj.P)") +
xlab("Sample level Average log2(FC)")
Fig_3A
显示不同成纤维细胞群空间分布的点模式图
#LUAD样本
Slide = "Lung Adeno 1"
sample_data <- MxIHC_Fibroblasts %>% filter(Sample == Slide)
XY_cols <- names(sample_data)[1:2]
convex.hull.idx <- chull(as.matrix(sample_data[sample_data$Sample.Region.TvN == "Tumour", XY_cols]))
convex.hull.pts <- rownames(sample_data[sample_data$Sample.Region.TvN == "Tumour",])[convex.hull.idx]
LUAD_eg <-
sample_data %>%
slice_sample(prop = 0.25) %>%
ggplot(aes(x = X.Center..Pxl., y = Y.Center..Pxl., colour = Cell.type)) +
geom_point(size = 0.1) +
theme_void(base_size = 7) +
guides(colour = guide_legend(override.aes = list(size = 2))) +
theme(legend.position = "none") +
scale_color_manual(values = Fibs_col.palette) +
geom_polygon(data = MxIHC_Fibroblasts[convex.hull.pts, XY_cols],
colour = "black", alpha = 0, linetype = "dotted", size = 2)
LUAD_eg
#LUSC样本
Slide = "Drop-Seq Cohort 10"
sample_data <- MxIHC_Fibroblasts %>% filter(Sample == Slide)
XY_cols <- names(sample_data)[1:2]
convex.hull.idx <- chull(as.matrix(sample_data[sample_data$Sample.Region.TvN == "Tumour", XY_cols]))
convex.hull.pts <- rownames(sample_data[sample_data$Sample.Region.TvN == "Tumour",])[convex.hull.idx]
LUSC_eg <-
sample_data %>%
slice_sample(prop = 0.25) %>%
ggplot(aes(x = X.Center..Pxl., y = Y.Center..Pxl., colour = Cell.type)) +
geom_point(size = 0.1) +
theme_void(base_size = 7) +
guides(colour = guide_legend(override.aes = list(size = 2))) +
theme(legend.position = "none") +
scale_color_manual(values = Fibs_col.palette) +
geom_polygon(data = MxIHC_Fibroblasts[convex.hull.pts, XY_cols],
colour = "black", alpha = 0, linetype = "dotted", size = 2)
LUSC_eg
layer_scales(LUSC_eg)$x$range$range
xmax.diff <- max(layer_scales(LUSC_eg)$x$range$range) - max(layer_scales(LUAD_eg)$x$range$range)
LUAD_xMax <- max(layer_scales(LUAD_eg)$x$range$range)
ymax.diff <- max(layer_scales(LUSC_eg)$y$range$range) - max(layer_scales(LUAD_eg)$y$range$range)
LUAD_yMax <- max(layer_scales(LUAD_eg)$y$range$range)
Fig_3C <-
ggarrange(LUAD_eg + xlim(c(-0.5*xmax.diff, LUAD_xMax + 0.5*xmax.diff)) +
ylim(c(-0.5*ymax.diff, LUAD_yMax + 0.5*ymax.diff)),
LUSC_eg , ncol = 1, nrow = 2)
Fig_3C
箱形图显示了不同NSCLC亚型中各成纤维细胞亚群的相对丰度
Fibs.integrated$SampleID2 <- paste(Fibs.integrated$Dataset, Fibs.integrated$SampleID, sep = "_")
Sample_MetaData$SampleID2 <- paste(Sample_MetaData$Dataset, Sample_MetaData$SampleID, sep = "_")
Sample_MetaData$Sample.type[Sample_MetaData$Sample.type == "Tumor"] <- "Tumour"
Sample_MetaData$Sample.Subtype2 <- droplevels(Sample_MetaData$Sample.Subtype, exclude = c("Carcinoid" , "Pleiomorphic.carcinoma", "LCLC") )
dt <- as.table(as.matrix(table(Fibs.integrated$SampleID2,
Fibs.integrated$Fibs_MajorClusters)))
Sample.pct_long <- as.data.frame(dt/rowSums(dt)*100)
Sample.pct <- reshape2::dcast(as.data.frame(dt), formula = Var1 ~ Var2)
rownames(Sample.pct) <- Sample.pct$Var1
Sample.pct <- Sample.pct[,2:4]/rowSums(Sample.pct[,2:4])*100
scRNAseq_MetaData2 <- merge(Sample_MetaData, Sample.pct_long,
by.x = "SampleID2", by.y = "Var1")
names(scRNAseq_MetaData2)[23] <- "Residual"
names(scRNAseq_MetaData2)[22] <- "Fibs_SubPop"
scRNAseq_MetaData2$Fibs_SubPop <- factor(
as.character(scRNAseq_MetaData2$Fibs_SubPop),
levels = c("Adventitial", "Alveolar", "Myo")
)
my.comparisons <- list(c("Control", "LUAD"), c("Control", "LUSC"), c("LUAD", "LUSC"))
Fig_3E <-
scRNAseq_MetaData2 %>%
drop_na(Sample.Subtype2) %>%
ggplot(aes(x = Sample.Subtype2, y = Residual)) +
theme_pubr(base_size = 15) +
theme(legend.position = "none",
axis.title.x = element_blank()) +
facet_grid(~Fibs_SubPop) +
geom_boxplot(outlier.shape = NA, aes(fill = Fibs_SubPop)) +
scale_fill_manual(values = Fibs_col.palette) +
geom_jitter(alpha = 0.5, width = 0.15, size = 0.9) +
rotate_x_text(angle = 45) +
ylab("% of Fibroblasts per sample\n(scRNA-seq)") +
scale_y_continuous(breaks = c(0,25,50, 75, 100),
limits = c(0,135)) +
stat_compare_means(comparisons = my.comparisons,
size = 4,
method = "wilcox.test", tip.length = 0.01)
Fig_3E
检测对照、LUAD和LUSC组织样本中每个亚群的相对丰度。在scRNA-seq数据集中,与LUAD和LUSC相比,对照组织样本中的外膜成纤维细胞显著更丰富。对照组织中的肺泡成纤维细胞同样最丰富,但在一些LUAD样本中也检测到高水平,而在LUSC中很少出现。相反,与对照组织相比,LUAD和LUSC中的肌成纤维细胞丰度增加,但与LUAD相比,LUSC中的肌成纤维细胞丰度显著更高
通过mxIHC测量,分析病理学注释的LUAD或LUSC肿瘤区域和组织块内的非肿瘤区域作为对照
MxIHC_Fibroblasts$TvN.Grouping.ID <- paste(MxIHC_Fibroblasts$Slide.name, MxIHC_Fibroblasts$Sample.Region.TvN, sep = "_")
dt <- as.table(as.matrix(table(MxIHC_Fibroblasts$TvN.Grouping.ID,
MxIHC_Fibroblasts$Cell.type)))
Sample.pct_long <- as.data.frame(dt/rowSums(dt)*100)
Sample.pct_long$Slide.name <- do.call(rbind, strsplit(as.character(Sample.pct_long$Var1), "_", fixed = T))[,1]
Sample.pct_long$TvN <- do.call(rbind, strsplit(as.character(Sample.pct_long$Var1), "_", fixed = T))[,2]
TvN.Data_long <- merge(MxIHC_Sample.metaData, Sample.pct_long,
by = "Slide.name")
names(TvN.Data_long)[names(TvN.Data_long) == "Freq"] <- "Region.pct"
names(TvN.Data_long)[names(TvN.Data_long) == "Var2"] <- "Fibs_SubPop"
TvN.Data_long$SubtypeTvN <- TvN.Data_long$Subtype
TvN.Data_long$SubtypeTvN[TvN.Data_long$TvN == "Control"] <- "Control"
TvN.Data_long <- TvN.Data_long[!TvN.Data_long$TvN == "NA", ]
names(TvN.Data_long)
Fig_3F <-
TvN.Data_long[] %>%
ggplot(aes(x = SubtypeTvN, y = Region.pct)) +
theme_pubr(base_size = 15) +
theme(legend.position = "none",
axis.title.x = element_blank()) +
facet_grid(~Fibs_SubPop) +
geom_boxplot(outlier.shape = NA, aes(fill = Fibs_SubPop)) +
scale_fill_manual(values = Fibs_col.palette) +
geom_jitter(alpha = 0.5, width = 0.15, size = 0.9) +
rotate_x_text(angle = 45) +
ylab("% of Fibroblasts per region\n(MxIHC)") +
scale_y_continuous(breaks = c(0,25,50, 75, 100),
limits = c(0,135)) +
stat_compare_means(comparisons = my.comparisons, size = 4,
method = "wilcox.test", tip.length = 0.01)
Fig_3F
使用CIBERSORTx分析TCGA的RNA-seq数据集
filteredTraits_long <- reshape2::melt(filteredTraits, id.vars = c("Sample.Subtype", "Grade", "LUAD.MolSubtype_recalculated", "TP53.Status"),
measure.vars = c("Adventitial_Fibs.pct", "Alveolar_Fibs.pct", "Myo_Fibs.pct"))
filteredTraits_long$Fibs_SubPop <- factor(
filteredTraits_long$variable,
levels = levels(filteredTraits_long$variable),
labels = c("Adventitial", "Alveolar", "Myo")
)
Fig_3G <-
filteredTraits_long %>%
drop_na(Sample.Subtype) %>%
ggplot(aes(x = Sample.Subtype, y = value)) +
theme_pubr(base_size = 15) +
theme(legend.position = "none", axis.title.x = element_blank()) +
facet_grid(~Fibs_SubPop) +
geom_boxplot(outlier.shape = NA, aes(fill = Fibs_SubPop)) +
scale_fill_manual(values = Fibs_col.palette) +
geom_jitter(alpha = 0.5, width = 0.15, size = 0.9) +
rotate_x_text(angle = 45) +
ylab("% of Fibroblasts per sample\n(CIBERSORTx)") +
scale_y_continuous(breaks = c(0,25,50, 75, 100),
limits = c(0,135)) +
stat_compare_means(comparisons = my.comparisons, size = 4,
label = "p.signif",
method = "wilcox.test", tip.length = 0.01)
Fig_3G
证实了成纤维细胞亚群和组织类型之间的相关性