前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >学徒作业|GSE217845-胰腺癌中的巨噬细胞细分亚群

学徒作业|GSE217845-胰腺癌中的巨噬细胞细分亚群

作者头像
生信技能树jimmy
发布2024-04-15 12:40:08
1540
发布2024-04-15 12:40:08
举报
文章被收录于专栏:单细胞天地单细胞天地

这篇推文是数据处理过程中的合理的质量控制是很有必要的中的学徒作业,文章为:《IL-1β+ macrophages fuel pathogenic inflammation in pancreatic cancer》。数据集为GSE217845。

学徒作业的要求是:从上面的数据集GSE217845里面的10个胰腺癌的10x技术单细胞转录组数据的第一层次降维聚类分群里面提前髓系免疫细胞后,继续细分降维聚类拿到里面的巨噬细胞,然后继续细分巨噬细胞看看能否复现文章里面的:

复现文章的图1a

第一层次降维聚类分群

在做学徒作业的时候,sce.all.filt变量与上述推文中自行探索最佳质量控制流程部分是一样的。

第一层次降维聚类分群的代码如下,strict version部分沿用了推文中的严格过滤参数:

代码语言:javascript
复制
rm(list=ls())
options(stringsAsFactors = F) 
source('scRNA_scripts/lib.R')
getwd()

###### step1: 导入数据 ######   
# 付费环节 800 元人民币
# 参考:https://mp.weixin.qq.com/s/tw7lygmGDAbpzMTx57VvFw

dir='GSE217845_RAW/'
samples=list.dirs( dir )
samples
samples=samples[grep("Tumor",samples)]
sceList = lapply(samples,function(pro){ 
  # pro=samples[1] 
  print(pro)  
  tmp = Read10X(file.path(pro )) 
  if(length(tmp)==2){
    ct = tmp[[1]] 
  }else{ct = tmp}
  print(dim(ct))
  sce =CreateSeuratObject(counts =  ct ,
                          project =  pro  ,
                          min.cells = 5,
                          min.features = 300 )
  return(sce)
}) 
do.call(rbind,lapply(sceList, dim))

sce.all=merge(x=sceList[[1]],
              y=sceList[ -1 ],
              add.cell.ids = samples  ) 
names(sce.all@assays$RNA@layers)
sce.all[["RNA"]]$counts 
# Alternate accessor function with the same result
LayerData(sce.all, assay = "RNA", layer = "counts")
sce.all <- JoinLayers(sce.all)
dim(sce.all[["RNA"]]$counts )
 
as.data.frame(sce.all@assays$RNA$counts[1:10, 1:2])
head(sce.all@meta.data, 10)
table(sce.all$orig.ident) 

sp='human'

#### loose version:
{
  # 如果为了控制代码复杂度和行数 
  # 可以省略了质量控制环节
  ###### step2: QC质控 ######
  dir.create("./1-QC.loose")
  setwd("./1-QC.loose")
  # 如果过滤的太狠,就需要去修改这个过滤代码
  source('../scRNA_scripts/qc.R')
  sce.all.filt = basic_qc(sce.all)
  print(dim(sce.all))
  print(dim(sce.all.filt))
  setwd('../')
  getwd()
  
  ###### step3: harmony整合多个单细胞样品 ######
  
  if(T){
    dir.create("2-harmony.loose")
    getwd()
    setwd("2-harmony.loose")
    source('../scRNA_scripts/harmony.R')
    # 默认 ScaleData 没有添加"nCount_RNA", "nFeature_RNA"
    # 默认的
    sce.all.int = run_harmony(sce.all.filt)
    setwd('../')
    
  }
  
  ###### step4:  看标记基因库 ######
  # 原则上分辨率是需要自己肉眼判断,取决于个人经验
  # 为了省力,我们直接看  0.1和0.8即可
  
  table(Idents(sce.all.int))
  table(sce.all.int$seurat_clusters)
  table(sce.all.int$RNA_snn_res.0.1) 
  table(sce.all.int$RNA_snn_res.0.8) 
  
  getwd()
  dir.create('check-by-0.1.loose')
  setwd('check-by-0.1.loose')
  sel.clust = "RNA_snn_res.0.1"
  sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
  table(sce.all.int@active.ident) 
  
  source('../scRNA_scripts/check-all-markers.R')
  setwd('../') 
  getwd()
}

#### strict version:
{
  sce.all.filt=subset(sce.all.filt,subset = percent_mito < 40 & 
                        nCount_RNA <10000 & 
                        nFeature_RNA <7500&
                        nFeature_RNA>500)
  sce.all.filt 

  ###### step3: harmony整合多个单细胞样品 ######
  
  if(T){
    dir.create("2-harmony")
    getwd()
    setwd("2-harmony")
    source('../scRNA_scripts/harmony.R')
    # 默认 ScaleData 没有添加"nCount_RNA", "nFeature_RNA"
    # 默认的
    sce.all.int = run_harmony(sce.all.filt)
    setwd('../')
    
  }

}

###### step4:  看标记基因库 ######
# 原则上分辨率是需要自己肉眼判断,取决于个人经验
# 为了省力,我们直接看  0.1和0.8即可

table(Idents(sce.all.int))
table(sce.all.int$seurat_clusters)
table(sce.all.int$RNA_snn_res.0.1) 
table(sce.all.int$RNA_snn_res.0.8) 

getwd()
dir.create('check-by-0.1')
setwd('check-by-0.1')
sel.clust = "RNA_snn_res.0.1"
sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all.int@active.ident) 

source('../scRNA_scripts/check-all-markers.R')
setwd('../') 
getwd()

提取髓系细胞亚群重新进行标准化以及降维聚类分群

在0.1的分辨率下,也提取了髓系免疫细胞(cluster 2):

代码语言:javascript
复制
sce_myeloid <- subset(sce.all.int, subset = (RNA_snn_res.0.1 %in% c(2)))

sce_myeloid对象再次进行标准化,样本整合以及细分亚群:

代码语言:javascript
复制
sce_myeloid <- CreateSeuratObject(counts = sce_myeloid@assays$RNA$counts,
                              meta.data = sce_myeloid@meta.data[,1:6])
  sce_myeloid <- NormalizeData(sce_myeloid,normalization.method = "LogNormalize",
                         scale.factor = 1e4)
  GetAssay(sce_myeloid,assay = "RNA")
  sce_myeloid <- FindVariableFeatures(sce_myeloid,
                                selection.method = "vst",nfeatures = 2000)
  sce_myeloid <- ScaleData(sce_myeloid)
  sce_myeloid <- RunPCA(object = sce_myeloid,pc.genes = VariableFeatures(sce_myeloid))
  
  {
    sce_myeloid <- RunHarmony(sce_myeloid, "orig.ident")
    names(sce_myeloid@reductions)
    sce_myeloid <- RunUMAP(sce_myeloid,  dims = 1:15, 
                         reduction = "harmony")
  }
  
  sce_myeloid
  sce_myeloid <- FindNeighbors(sce_myeloid,dims = 1:15,reduction = "harmony")
  for (res in c(0.01, 0.05, 0.1, 0.2 ,0.3, 0.5, 0.8, 1)) {
    sce_myeloid = FindClusters(sce_myeloid,
                         resolution = res, algorithm = 1)  
  }

复现文章中的命名

接下来想按照文章中的标记基因尝试进行命名:

在0.2的分辨率下有7个cluster,但对应文章中的标记基因不明显:

于是提高分辨率去看,在0.8的分辨率下有17个cluster,下图中红色的标记文字为我认为标记基因对应的cluster编号。

代码语言:javascript
复制
celltype=data.frame(ClusterID=0:17 ,
                      celltype= 0:17 ) 
  #定义细胞亚群 
  # myeloids_paper_list 和上面一样
  myeloids_paper_list =list(
    MKI67=c("MKI67","TOP2A","PCLAF","UBE2C","TK1"),
    HSP=c("HSPA6","SERPINH1","BAG3","HSPB1","HSPD1"),
    SPP1=c("SPP1","MARCO","FBP1","APOC1","LIPA"),
    IL1B=c("IL1B","IL1A","NLRP3","PTGS2","CCL3"),
    FOLR2=c("FOLR2","LYVE1","SELENOP","SLC40A1","MRC1"),
    MT=c("MT1H","MT1G","MT1X","MT1E","MT2A")
  )
  celltype[celltype$ClusterID %in% c(13),2]='MKI67+'   
  celltype[celltype$ClusterID %in% c(4),2]='HSP+'  
  celltype[celltype$ClusterID %in% c(2),2]='SPP1+' 
  celltype[celltype$ClusterID %in% c(0,6,11),2]='IL1B+' 
  celltype[celltype$ClusterID %in% c(3),2]='FOLR2+' 
  celltype[celltype$ClusterID %in% c(7,15,16,17),2]='MT+'
  
  table(celltype$celltype)
  # 1     10     12     14      5      8      9 FOLR2+   HSP+  IL1B+ MKI67+    MT+  SPP1+ 
  # 1      1      1      1      1      1      1      1      1      3      1      4      1 

发现1,5,8,9,10,12,14还没有命名。检查了每个cluster的前10个差异基因。有以下发现:

  • cluster 14 有S100A8,S100A12基因高表达,是中性粒细胞或者单核细胞。(S100A8、S100A9 和 S100A12 主要由中性粒细胞和单核细胞表达:https://zhuanlan.zhihu.com/p/440315960)
  • cluster 9 还不能确定,和IL1B+的表达模式有些接近,但是多了几个基因的表达。
  • cluster 5 应该是cDC2细胞,高表达 CD1C, FCER1A, CLEC10A基因。
  • cluster 12 有CD3E基因高表达,所以是T细胞。
  • cluster 10 无法确定。
  • cluster 8 无法确定,和 cluster 9 有点像。

去除非巨噬细胞的细胞类型:5是cDC2细胞,12是T细胞。将其他文章中没有出现的巨噬细胞类型命名为other,得到最终的命名结果。

最终命名的代码如下:

代码语言:javascript
复制
# 去除非巨噬细胞的细胞类型:5是cDC2细胞,12是T细胞
  sce_myeloid = sce_myeloid[, !(Idents(sce_myeloid) %in% c(5,12))]
  
  celltype[celltype$ClusterID %in% c( 1,10,12,14,5,8,9 ),2]='other'
  
  sce_myeloid@meta.data$celltype = "NA"
  
  for(i in 1:nrow(celltype)){
    sce_myeloid@meta.data[which(sce_myeloid@meta.data$RNA_snn_res.0.8 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
  Idents(sce_myeloid)=sce_myeloid$celltype
}

if("celltype" %in% colnames(sce_myeloid@meta.data ) ){
  
  sel.clust = "celltype"
  sce_myeloid <- SetIdent(sce_myeloid, value = sel.clust)
  table(sce_myeloid@active.ident) 
  
  dir.create('myeloid-check-by-celltype')
  setwd('myeloid-check-by-celltype')
  source('../scRNA_scripts/check-paper-myeloid-markers.R')
  setwd('../') 
  getwd()
  phe=sce_myeloid@meta.data
  save(phe,file = 'myeloid_phe.Rdata')
  pdf('myeloid-celltype-vs-orig.ident.pdf',width = 10)
  gplots::balloonplot(table(sce_myeloid$celltype,sce_myeloid$orig.ident))
  dev.off()
  
  th=theme(axis.text.x=element_text(angle=45,hjust = 1))
  
  DotPlot(sce_myeloid, features = myeloids_paper_list,
                        assay='RNA' ,group.by = 'celltype' )  + coord_flip()+th
  
  ggsave('myeloid-markers-dotplot.pdf',width = 12,height = 6)
  DimPlot(sce_myeloid, reduction = "umap", group.by = "celltype",label = T,label.box = T)
  ggsave('myeloid-markers_umap-celltype.pdf',width = 8,height = 8)
}

画图代码分享

附上前面标记基因(文章中的)和差异基因的dotplot图的代码。

标记基因(文章中)dotplot图的代码

代码语言:javascript
复制
pro = 'qc-'
myeloids_paper_list =list(
    MKI67=c("MKI67","TOP2A","PCLAF","UBE2C","TK1"),
    HSP=c("HSPA6","SERPINH1","BAG3","HSPB1","HSPD1"),
    SPP1=c("SPP1","MARCO","FBP1","APOC1","LIPA"),
    IL1B=c("IL1B","IL1A","NLRP3","PTGS2","CCL3"),
    FOLR2=c("FOLR2","LYVE1","SELENOP","SLC40A1","MRC1"),
    MT=c("MT1H","MT1G","MT1X","MT1E","MT2A")
  )
  
  markers_list<- c('myeloids_paper_list')
  
  lapply(markers_list, function(x){
    # x=markers_list[1]
    genes_to_check = lapply(get(x), str_to_upper)
    dup=names(table(unlist(genes_to_check)))[table(unlist(genes_to_check))>1]
    genes_to_check = lapply(genes_to_check, function(x) x[!x %in% dup])
    
    DotPlot(sce_myeloid , features = genes_to_check )  + 
      # coord_flip() + 
      theme(axis.text.x=element_text(angle=45,hjust = 1))
    
    w=length( unique(unlist(genes_to_check)) )/5+6;w
    ggsave(paste0('check_for_',x,'.pdf'),width  = w,height=6)
  })

差异基因的dotplot图的代码

代码语言:javascript
复制
## top10 genes
  marker_cosg <- cosg(
    sce_myeloid,
    groups='all',
    assay='RNA',
    slot='data',
    mu=1,
    n_genes_user=100)
  
  library(dplyr)  
  top_10 <- unique(as.character(apply(marker_cosg$names,2,head,10)))
  # width <-0.006*dim(sce_myeloid)[2];width
  # height <- 0.25*length(top_10)+4.5;height
  
  width <- 15+0.5*length(unique(Idents(sce_myeloid)));width
  height <- 8+0.1*length(top_10);height
  
  sce.Scale <- ScaleData(sce_myeloid ,features =  top_10  )  
  
  DoHeatmap(  sce.Scale , top_10 , 
              size=3)
  
  ggsave(filename=paste0(pro,'DoHeatmap_check_top10_markers_by_clusters.pdf') ,
         # limitsize = FALSE,
         units = "cm",width=width,height=height)
  width <- 8+0.6*length(unique(Idents(sce_myeloid)));width
  height <- 8+0.2*length(top_10);height
  DotPlot(sce_myeloid, features = top_10 ,
          assay='RNA'  )  + coord_flip() +FontSize(y.text = 4) + theme(axis.text.x=element_text(angle=45,hjust = 1))
  ggsave(paste0(pro,'DotPlot_check_top10_markers_by_clusters.pdf'),
         units = "cm",width=width,height=height)

小结

  • 根据分析目的,合理调整过滤参数
  • 该文献对细胞亚群的命名规则为:提取出只在某些 cluster 中特异性、高表达的基因(假设为基因A),将其定义为 A+ 细胞。巨噬细胞的这种规则的亚群命名还是挺常见的,比如另一篇文献《Pan-Cancer Single-Cell and Spatial-Resolved Profiling Reveals the Immunosuppressive Role of APOE Macrophages in Immune Checkpoint Inhibitor Therapy》。其他单细胞亚群的命名规则详见生信技能树最近的推文:单细胞亚群的生物学命名的4个规则
  • 复现文献时,由于参数不同,结果会有一定的出入,但是在这里原则是一样的,即根据差异基因对cluster进行命名:

文章图1a的说明

本文参与 腾讯云自媒体分享计划,分享自微信公众号。
原始发表:2024-04-12,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 单细胞天地 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 第一层次降维聚类分群
  • 提取髓系细胞亚群重新进行标准化以及降维聚类分群
  • 复现文章中的命名
  • 画图代码分享
    • 标记基因(文章中)dotplot图的代码
      • 差异基因的dotplot图的代码
      • 小结
      领券
      问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档