复现文章信息:
文章题目:Single-cell analysis reveals prognostic fibroblast subpopulations linked to molecular and immunological subtypes of lung cancer 期刊:Nature Communications 日期:2023年1月31日 DOI: 10.1038/s41467-023-35832-6
通过整合7个scRNA-seq数据集研究NSCLC中的成纤维细胞异质性
非小细胞肺癌中存在的成纤维细胞与非癌性肺组织中确定的三个主要亚群一致,并且可能对ECM维持/重构进行差异调节。这些数据还表明,与NSCLC肿瘤的相互作用导致这些亚群中基因表达的显著变化,除了亚群特异性表型变化外,还持续涉及间质胶原的上调。此外,与对照肺成纤维细胞相比,非小细胞肺癌的myoCAF基因特征增加,而对照肺组织的iCAF基因特征增加。
library(Seurat)
library(WGCNA)
library(tidyverse)
library(ggpubr)
library(ggsci)
library(msigdbr)
library(GSVA)
library(limma)
library(readxl)
library(org.Hs.eg.db)
library(hpar)
data_directory <- "H:\\文献复现\\6\\"
source(paste0(data_directory, "0_NewFunctions.R"))
load(paste0(data_directory, "IntegratedFibs_Zenodo.Rdata"))
load(paste0(data_directory, "HCLA_annotation.Rdata"))
Fibroblast_GeneSets_v2_2021_0623 <- read_excel(paste0(data_directory, "Fibroblast GeneSets v2 2021_0623.xlsx"))
#https://static-content.springer.com/esm/art%3A10.1038%2Fs41467-023-35832-6/MediaObjects/41467_2023_35832_MOESM21_ESM.xlsx
Figure2i <- read_excel(paste0(data_directory, "Figure 2i.xlsx"))
Figure2j <- read_excel(paste0(data_directory, "Figure 2j.xlsx"))
作者使用来自NSCLC和对照组织样本的另外6个scRNA-seq数据集重复了成纤维细胞分选(不包括壁细胞)的过程。生成了一个包含9673个成纤维细胞的数据集(5183个细胞来自39个对照组样本,3440个来自46个LUAD样本,654个来自16个LUSC样本)。通过无监督聚类识别的三个主要亚群,并且鉴定的三个簇与先前在对照肺组织中描述的成纤维细胞亚群一致(外膜、肺泡和肌成纤维细胞)。
DefaultAssay(Fibs.integrated) <- "integrated"
Fibs.integrated <- ScaleData(Fibs.integrated)
Fibs.integrated <- RunPCA(Fibs.integrated)
ElbowPlot(Fibs.integrated, ndims = 50)
PCs2use <- c(1:30)
Fibs.integrated <- RunUMAP(Fibs.integrated, dims = PCs2use)
Fibs.integrated <- FindNeighbors(Fibs.integrated, dims = PCs2use, verbose = FALSE)
Fibs.integrated <- AddMetaData(Fibs.integrated, as.data.frame(Fibs.integrated@reductions$umap@cell.embeddings))
Fibs.integrated <- FindClusters(Fibs.integrated, resolution = 0.1, verbose = FALSE)
Fibs.integrated$Fibs_MajorClusters <- factor(Fibs.integrated$seurat_clusters, levels = c(2,1,0), labels = c("Adventitial", "Alveolar", "Myo"))
在7套数据中作者为了整合和纠正数据集之间的批效应,使用典型相关分析并且使用共享最近邻模块化优化算法执行无监督聚类。为了确定最具生物学信息量的聚类解决方案,改变分辨率参数,迭代增加识别的聚类数量,并检查每个聚类识别的标记基因数量(样本水平平均log foldchange>1,adj.P<0.01,至少有50%的样本表示)。这表明,三个主要的集群被一致地确定,而更高分辨率的聚类导致鉴定集群没有或很少的标记基因符合这些标准
Fig_2B <-
DimPlot(Fibs.integrated, group.by = "Fibs_MajorClusters", pt.size = 1.2) &
theme_pubr(base_size = 15) &
scale_colour_manual(values = Fibs_col.palette) &
theme(legend.position = c(0,0),
legend.justification = c(0,0),
legend.background = element_blank(),
plot.title = element_blank(),
legend.key.size = unit(7,"pt")
)
Fig_2B
首先计算每个亚群(上皮细胞、肺泡细胞和肌成纤维细胞)的样品水平基因表达谱(单个细胞的平均值),然后进行差异基因表达分析
Fibs.integrated <- SetIdent(Fibs.integrated, value = "Fibs_MajorClusters")
All.Markers <- FindAllMarkers(Fibs.integrated, test.use = "bimod", assay = "RNA", only.pos = T)
All.Markers$ENSEMBL <- mapIds(org.Hs.eg.db, keys = All.Markers$gene, keytype = "SYMBOL", column="ENSEMBL")
test <- hpar::getHpa(All.Markers$ENSEMBL)
table(test$Reliability)
test <- test[!duplicated(test$Gene.name), ]
rownames(test) <- test$Gene.name
All.Markers$HPA_reliability <- test[All.Markers$gene, "Reliability"]
test <- hpar::getHpa(All.Markers$ENSEMBL, hpadata = "hpaSubcellularLoc")
test <- test[!duplicated(test$Gene.name), ]
rownames(test) <- test$Gene.name
test$membrane <- 1:nrow(test) %in% grep("membrane", test$GO.id, fixed = T)
test$cytosol <- 1:nrow(test) %in% grep("Cytosol", test$GO.id, fixed = T)
test$filaments <- 1:nrow(test) %in% grep("filaments", test$GO.id, fixed = T)
test$nuclear <- 1:nrow(test) %in% grep("Nucle", test$GO.id, fixed = T)
All.Markers$subcell.loc <- test[All.Markers$gene,
c("membrane","cytosol","filaments","nuclear")]
All.Markers$pct.diff <- All.Markers$pct.1 - All.Markers$pct.2
All.Markers$Detected.Intracellular <- rowSums(All.Markers$subcell.loc) > 0
#样本层面成纤维细胞标记物分析
Fibs.integrated <- SetIdent(Fibs.integrated, value = "Fibs_MajorClusters")
Fibs.integrated_samples <- AverageExpression(Fibs.integrated, assays = "RNA", return.seurat = T,
group.by = c("Fibs_MajorClusters", "SampleID2"),
slot = "data")
Fibs.integrated_samples <- NormalizeData(Fibs.integrated_samples)
dim(Fibs.integrated_samples)
#从样本名称中提取元数据
Fibs.integrated_samples@meta.data <-
Fibs.integrated_samples@meta.data %>%
mutate(Fibs_MajorClusters = str_split_fixed(colnames(Fibs.integrated_samples), "_", 3)[,1],
Dataset = str_split_fixed(colnames(Fibs.integrated_samples), "_", 3)[,2],
SampleID = str_split_fixed(colnames(Fibs.integrated_samples), "_", 3)[,3])
#整理样本和样本差异分析结果
Fibs.Sample_MetaData <- Fibs.integrated_samples@meta.data[,"SampleID", drop = F]
matchSamples <- match(Fibs.Sample_MetaData$SampleID, Sample_MetaData$SampleID)
table(is.na(matchSamples))
Fibs.Sample_MetaData <- cbind(Fibs.Sample_MetaData, Sample_MetaData[matchSamples, ])
keepCols_metaData <- c("SampleID", "nCount_RNA", "nFeature_RNA",
"Fibs_MajorClusters", "Dataset")
Fibs.integrated_samples@meta.data <- Fibs.integrated_samples@meta.data[, keepCols_metaData]
Fibs.integrated_samples <- AddMetaData(Fibs.integrated_samples, Fibs.Sample_MetaData)
Fibs.integrated_samples$Sample.type[Fibs.integrated_samples$Sample.type == "Tumor"] <- "Tumour"
Fibs.integrated_samples$pN <- droplevels(Fibs.integrated_samples$pN, exclude = c(NA, "NA"))
levels(Fibs.integrated_samples$pN)
Fibs.integrated_samples <- SetIdent(Fibs.integrated_samples, value = "Fibs_MajorClusters")
Sample.level_Markers <- FindAllMarkers(Fibs.integrated_samples, test.use = "wilcox", assay = "RNA",
only.pos = F, return.thresh = 1, min.pct = 0.25, logfc.threshold = 0)
Marker.match <- match(paste(All.Markers$cluster, All.Markers$gene),
paste(Sample.level_Markers$cluster, Sample.level_Markers$gene))
All.Markers$p_val_adj.Sample <- Sample.level_Markers$p_val_adj[Marker.match]
All.Markers$log2FC.Sample <- Sample.level_Markers$avg_log2FC[Marker.match]
显示每个亚群10个最显著标志物的样品水平表达(单细胞平均值)的热图
All.Markers$cluster <- factor(as.character(All.Markers$cluster), levels = c("Adventitial", "Alveolar", "Myo"))
All.Markers %>%
group_by(cluster) %>%
filter(avg_log2FC > 0) %>%
top_n(w = -p_val_adj.Sample, n = 10) -> top10_markers
DefaultAssay(Fibs.integrated_samples) <- "RNA"
Fibs.integrated_samples <- ScaleData(Fibs.integrated_samples, features = top10_markers$gene, assay = "RNA")
levels(Fibs.integrated_samples$Fibs_MajorClusters)
Fibs.integrated_samples$Fibs_MajorClusters <- factor(Fibs.integrated_samples$Fibs_MajorClusters,
levels = c("Adventitial", "Alveolar", "Myo")
)
Fig_2C <-
DoHeatmap(Fibs.integrated_samples, features = rev(top10_markers$gene),
assay = "RNA",
angle = 45,
group.by = c("Fibs_MajorClusters") ,
group.colors = Fibs_col.palette,
label = T, disp.max = 2) &
theme(axis.text.y = element_text(face = "italic", size = 10),legend.position = "none")
Fig_2C
使用REACTOME通路进行GSVA,以及使用先前描述的成纤维细胞亚群的基因标记进行GSVA
h_gene_sets = msigdbr(species = "human", category = "C2", subcategory = "CP:REACTOME")
Reactome_pathwaysH = split(x = h_gene_sets$human_gene_symbol, f = h_gene_sets$gs_name)
REACTOME_gsva_res <- gsva(as.matrix(Fibs.integrated_samples@assays$RNA@data), gset.idx.list = Reactome_pathwaysH, method="gsva",
min.sz=10, max.sz=500)
#通过经验Bayes进行差异分析
f <- factor(Fibs.integrated_samples$Fibs_MajorClusters, levels = c("Myo","Alveolar", "Adventitial"))
design <- model.matrix(~0+f)
colnames(design) <- c("Myo", "Alveolar", "Adventitial")
REACTOME_gsva.fit <- lmFit(REACTOME_gsva_res, design)
REACTOME_gsva.fit <- eBayes(REACTOME_gsva.fit)
contrast.matrix <- makeContrasts(Myo.v.ALL = Myo-(Alveolar+Adventitial)/2,
Alv.v.ALL = Alveolar-(Myo+Adventitial)/2,
Adv.v.ALL = Adventitial-(Myo+Alveolar)/2,
Myo.v.Alv = Myo - Alveolar,
Myo.v.Adv = Myo-Adventitial,
Alv.v.Adv = Alveolar-Adventitial,
levels=design)
REACTOME_gsva.fit2 <- contrasts.fit(REACTOME_gsva.fit, contrast.matrix)
REACTOME_gsva.fit2 <- eBayes(REACTOME_gsva.fit2)
res2 <- decideTests(REACTOME_gsva.fit2, p.value=0.01, method = "global")
summary(res2)
REACTOME_tt.res <- list(
Myo = topTable(REACTOME_gsva.fit2, coef = "Myo.v.ALL", n=1270),
Alveolar = topTable(REACTOME_gsva.fit2, coef = "Alv.v.ALL", n=1270),
Adventitial = topTable(REACTOME_gsva.fit2, coef = "Adv.v.ALL", n=1270)
)
REACTOME_tt.res_df <- do.call(rbind, REACTOME_tt.res)
REACTOME_tt.res_df$Cluster <- do.call(rbind, strsplit(rownames(REACTOME_tt.res_df), ".", 2))[,1]
REACTOME_tt.res_df$Pathway <- do.call(rbind, strsplit(rownames(REACTOME_tt.res_df), ".", 2))[,2]
REACTOME_tt.res_df$Pathway.label <- gsub("REACTOME_", "", REACTOME_tt.res_df$Pathway)
REACTOME_tt.res_df$Pathway.label <- gsub("_", " ", REACTOME_tt.res_df$Pathway.label)
REACTOME_tt.res_subset <- REACTOME_tt.res_df[REACTOME_tt.res_df$logFC > 0, ] %>%
group_by(Cluster) %>%
top_n(wt = -adj.P.Val, n = 5)
REACTOME_tt.res_subset <- REACTOME_tt.res_subset[-10, ]
REACTOME_tt.res_subset <- rbind(REACTOME_tt.res_subset, REACTOME_tt.res_df["Alveolar.REACTOME_TRP_CHANNELS", ])
条形图显示每个亚群中最显著上调的REACTOME通路的log2倍数变化
Fig_2D <-
REACTOME_tt.res_subset %>%
ggplot(aes(x = reorder(Pathway.label, logFC), y = logFC, label = paste0("adj.P=", signif(adj.P.Val, 3)),
fill = Cluster)) +
theme_pubr(base_size = 15)+
theme(axis.title.y = element_blank(),legend.title = element_blank(),
axis.text.y = element_text(size = 7),
legend.position = "top", strip.text = element_blank(), legend.key.size = unit(10, "pt"))+
geom_bar(stat = "identity", alpha = 0.75)+
facet_grid(Cluster~., scales = "free", space = "free") +
geom_text(hjust = 1.1, colour = "white", size = 3) +
scale_fill_manual(values = Fibs_col.palette) +
coord_flip()
Fig_2D
检测每个成纤维细胞亚群是否上调了与特定基质体类别(基底膜、间质胶原、ECM糖蛋白和蛋白聚糖),结果表明肌成纤维细胞上调了每个基质体类别中的多个基因,包括大多数上调的间质胶原
CGP_gene_sets = msigdbr(species = "human", category = "C2")
CGP_pathwaysH = split(x = CGP_gene_sets$human_gene_symbol, f = CGP_gene_sets$gs_name)
All.Markers$NABA_Matrisome <- All.Markers$gene %in% CGP_pathwaysH$NABA_MATRISOME
All.Markers$NABA_BASEMENT_MEMBRANES <- All.Markers$gene %in% CGP_pathwaysH$NABA_BASEMENT_MEMBRANES
All.Markers$NABA_COLLAGENS <- All.Markers$gene %in% CGP_pathwaysH$NABA_COLLAGENS
All.Markers$NABA_ECM_GLYCOPROTEINS <- All.Markers$gene %in% CGP_pathwaysH$NABA_ECM_GLYCOPROTEINS
All.Markers$NABA_PROTEOGLYCANS <- All.Markers$gene %in% CGP_pathwaysH$NABA_PROTEOGLYCANS
names(All.Markers)
All.Markers$type <- factor(
apply(All.Markers[,c("NABA_BASEMENT_MEMBRANES","NABA_COLLAGENS","NABA_ECM_GLYCOPROTEINS","NABA_PROTEOGLYCANS")],1, which.max),
levels = 1:5,
labels = c("Basement.Membranes", "Interstitial.Collagens",
"ECM.Glycoproteins", "Proteoglycans", "Other")
)
All.Markers$type[All.Markers$type == "Basement.Membranes" &
All.Markers$NABA_BASEMENT_MEMBRANES == F] <- "Other"
条形图显示每个亚群差异表达的不同基质体组分的比例
Fig_2E <-
All.Markers[!All.Markers$type == "Other" &
All.Markers$p_val_adj.Sample < 0.01, ] %>%
ggplot(aes(x = type, fill = cluster)) +
theme_pubr(base_size = 15) +
theme(legend.position = "top", legend.key.size = unit(20,"pt"), legend.title = element_blank()) +
geom_bar(position = "fill") +
scale_fill_manual(values = Fibs_col.palette) +
rotate_x_text(angle = 45) +
ylab("Proportion of DEGs") +
xlab("Matrisome Category")
Fig_2E
#计算每个基质体类别的模块评分,并比较了成纤维细胞亚群之间的总体表达)。
Fibs.integrated_samples <- AddModuleScore(
Fibs.integrated_samples,
features = list(
CGP_pathwaysH$NABA_BASEMENT_MEMBRANES,
CGP_pathwaysH$NABA_COLLAGENS,
CGP_pathwaysH$NABA_ECM_GLYCOPROTEINS,
CGP_pathwaysH$NABA_PROTEOGLYCANS,
CGP_pathwaysH$NABA_COLLAGENS[!CGP_pathwaysH$NABA_COLLAGENS %in% CGP_pathwaysH$NABA_BASEMENT_MEMBRANES]
),
assay = "RNA"
)
names(Fibs.integrated_samples@meta.data)[grep("^Cluster", names(Fibs.integrated_samples@meta.data))] <-
c("Basement.Membranes", "Collagens", "ECM.Glycoproteins", "Proteoglycans", "Interstitial.Collagens")
comparisons <- list(c("Myo", "Adventitial"), c("Myo", "Alveolar"), c("Alveolar", "Adventitial"))
#箱形图显示编码每个亚群基底膜组分的基因的样品水平表达
pA <-
Fibs.integrated_samples@meta.data %>%
ggplot(aes(x = Fibs_MajorClusters, y = Basement.Membranes,
fill = Fibs_MajorClusters)) +
theme_pubr(base_size = 15) +
theme(legend.position = "right", axis.title.x = element_blank()) +
geom_boxplot(outlier.shape = NA, alpha = 0.75) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(comparisons = list(c("Myo", "Adventitial"), c("Alveolar", "Adventitial")),
size = 5,
hide.ns = T,label.y = c(0.65,0.8)) +
scale_fill_manual(values = Fibs_col.palette) +
rotate_x_text(angle = 45) +
ylab("Basement Membranes") +
guides(fill = "none") + expand_limits(y = 0.9)
#箱形图显示了每个亚群间质胶原编码基因的样品水平表达
pB <-
Fibs.integrated_samples@meta.data %>%
ggplot(aes(x = Fibs_MajorClusters, y = Interstitial.Collagens,
fill = Fibs_MajorClusters)) +
theme_pubr(base_size = 15) +
theme(legend.position = "right", axis.title.x = element_blank()) +
geom_boxplot(outlier.shape = NA, alpha = 0.75) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(comparisons = list(c("Myo", "Adventitial"), c("Myo", "Alveolar")),
size = 5,
hide.ns = T, label.y = c(0.9,1.1)) +
scale_fill_manual(values = Fibs_col.palette) +
ylab("Interstitial Collagens") +
rotate_x_text(angle = 45) +
guides(fill = "none") + expand_limits(y = 1.25)
Fig_2FG <- ggarrange(pA,pB, ncol = 2, labels = c("f", "g"))
Fig_2FG
#这表明与外膜和肺泡成纤维细胞相比,肌成纤维细胞显著上调了间质胶原;而与外膜成纤维细胞相比,肺泡和肌成纤维细胞的基底膜基因表达均显著增加)。
箱形图显示了按组织类型划分的每个亚群间质胶原编码基因的样本水平表达
comparisons <- list(c("Control", "Tumour"))
Fig_2H <-
Fibs.integrated_samples@meta.data %>%
drop_na(Sample.type) %>%
ggplot(aes(x = Sample.type, y = Interstitial.Collagens)) +
theme_pubr(base_size = 15) +
facet_wrap(~Fibs_MajorClusters) +
geom_boxplot(aes(fill = Fibs_MajorClusters), outlier.shape = NA, alpha = 0.75) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(comparisons = comparisons,
size = 5,
label.y = 0.9) +
scale_fill_manual(values = Fibs_col.palette) +
rotate_x_text(angle = 45) +
ylab("Interstitial Collagens") +
guides(fill = "none") +
theme(axis.title.x = element_blank(),
legend.position = "none") + expand_limits(y = 1.1)
Fig_2H
在三个亚群的肿瘤样品中间质胶原表达显著增加
箱形图显示按组织类型划分的每个亚群编码myoCAF标志物的基因的样本水平表达
name<-colnames(Figure2i)
name[3]<-"myoCAF"
colnames(Figure2i)<-name
Figure2i<-cbind(Fibs.integrated_samples@meta.data,Figure2i[,c(1,3)])
comparisons <- list(c("Control", "Tumour"))
Fig_2I <-
Figure2i %>%
drop_na(Sample.type) %>%
ggplot(aes(x = Sample.type, y = myoCAF signature (relative expression))) +
theme_pubr(base_size = 5) +
facet_wrap(~Fibs_MajorClusters) +
geom_boxplot(aes(fill = Fibs_MajorClusters), outlier.shape = NA, alpha = 0.75) +
geom_jitter(width = 0.2, alpha = 0.5, size = 1) +
stat_compare_means(comparisons = comparisons,
size = 2.5,
label.y = 0.9) +
scale_fill_manual(values = Fibs_col.palette) +
rotate_x_text(angle = 45) +
ylab("myoCAF Signature") +
guides(fill = "none") +
theme(axis.title.x = element_blank(),
legend.position = "none") + expand_limits(y = 1.1)
Fig_2I
myoCAF基因标签在肿瘤样品中显著上调
箱形图显示了按组织类型划分的各亚群中编码iCAF标志物的基因的样本水平表达
name<-colnames(Figure2j)
name[3]<-"iCAF"
colnames(Figure2j)<-name
Figure2j<-cbind(Fibs.integrated_samples@meta.data,Figure2j[,c(1,3)])
comparisons <- list(c("Control", "Tumour"))
Fig_2J <-
Figure2j %>%
drop_na(Sample.type) %>%
ggplot(aes(x = Sample.type, y = iCAF)) +
theme_pubr(base_size = 15) +
facet_wrap(~Fibs_MajorClusters) +
geom_boxplot(aes(fill = Fibs_MajorClusters), outlier.shape = NA, alpha = 0.75) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
stat_compare_means(comparisons = comparisons,
size = 5,
label.y = 0.9) +
scale_fill_manual(values = Fibs_col.palette) +
rotate_x_text(angle = 45) +
ylab("Interstitial Collagens") +
guides(fill = "none") +
theme(axis.title.x = element_blank(),
legend.position = "none") + expand_limits(y = 1.1)
Fig_2J
与对照组织相比,从肿瘤样品分离的成纤维细胞中iCAF基因标记显著下调