书接上回,已经做好数据质控、过滤、去批次、降维聚类分群后,接下来就是进行细胞注释方面的工作
# 原则上分辨率是需要自己肉眼判断,取决于个人经验
# 为了省力,我们直接看 0.1和0.8即可
table(Idents(sce.all.int))
0 1 2 3 4 5
5241 4859 1365 1261 1062 138
table(sce.all.int$seurat_clusters)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
2460 2200 1998 1878 1061 936 690 635 598 511 428 241 139 92 59
table(sce.all.int$RNA_snn_res.0.1)
0 1 2 3 4 5
5241 4859 1365 1261 1062 138
table(sce.all.int$RNA_snn_res.0.8)
getwd()
#选择分辨率为0.1的分群进行标记
dir.create('check-by-0.1')
setwd('check-by-0.1')
sel.clust = "RNA_snn_res.0.1"
sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all.int@active.ident)
source('../scRNA_scripts/check-all-markers.R')
setwd('../')
getwd()
#选择分辨率为0.8的分群进行标记
dir.create('check-by-0.8')
setwd('check-by-0.8')
sel.clust = "RNA_snn_res.0.8"
sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all.int@active.ident)
source('../scRNA_scripts/check-all-markers.R')
setwd('../')
getwd()
last_markers_to_check
# 代码需要保证统一
# 只能是 sce.all
gastric_cancer_markers = c('PTPRC',
'MUC2' , 'ITLN1',
'FABP1' , 'APOA1',
'CEACAM5' , 'CEACAM6',
'EPCAM', 'KRT18', 'MUC1',
'MUC6' , 'TFF2',
'PGA4' , 'PGA3',
'MUC5AC' , 'TFF1','CHGA' , 'CHGB')
Myo=c("Krt17", "Krt14", "Krt5", "Acta2", "Myl9", "Mylk", "Myh11")
Lum=c("Krt19", "Krt18", "Krt8")
Hs=c("Prlr", "Cited1", "Pgr", "Prom1", "Esr1")
AV=c("Mfge8", "Trf", "Csn3", "Wfdc18", "Elf5", "Ltf")
Lp=c("Kit", "Aldh1a3", "Cd14")
Fib=c("Col1a1", "Col1a2", "Col3a1", "Fn1")
GSE150580_breast_cancer_markers_list =list(
Myo=Myo,
Lum=Lum,
Hs=Hs,
AV=AV,
Lp=Lp,
Fib=Fib
)
# macrophages (Adgre1, Cd14, and Fcgr3),
# cDCs (Xcr1, Flt3, and Ccr7),
# pDCs (Siglech, Clec10a, and Clec12a),
# monocytes (Ly6c2 and Spn),
# neutrophils (Csf3r, S100a8, and Cxcl3),
macrophages=c('Adgre1', 'Cd14', 'Fcgr3')
cDCs=c('Xcr1', 'Flt3', 'Ccr7')
pDCs=c('Siglech', 'Clec10a', 'Clec12a')
monocytes=c('Ly6c2' , 'Spn')
neutrophils=c('Csf3r', 'S100a8', 'Cxcl3')
SCP1661_meyloids_markers_list =list(
macrophages=macrophages,
cDCs=cDCs,
pDCs=pDCs,
monocytes=monocytes,
neutrophils=neutrophils
)
lung_epi_markers = c('TPPP3',"SPRR3","GDPD3","SPRR1A","SPRR2A","RARRES2","TMPRSS11E",
"ASCL3","CFTR","FOXI2","1SG20","FOXI1",
"SAA4","SAA2","EFHC1","CCDC153","CCDC113","SAA1","CDC20B","FOXJ1",
"MYCL","FOXN4","CCNO",
"PIGR","BP1","MUC5A","VMO1","SCGB3A1","CYP2A13","CYP2B6","SCGB1A1",
"BCAM","KRT15","KRT5","TP63")
myeloids_markers_list1 =list(
CM=c("TTN","MYH7","MYH6","TNNT2") ,
EC=c("VWF", "IFI27", "PECAM1","MGP"),
FB=c("DCN", "C7" ,"LUM","FBLN1","COL1A2"),
MP=c("CD163", "CCL4", "CXCL8","PTPRC"),
SMC=c("ACTA2", "CALD1", "MYH11"),
Tc=c("CD3D","CD3E"),
DC1 = c( 'Clec9a', 'Xcr1', 'Wdfy4'),
DC2 = c('Itgax', 'Sirpa', 'Cd209a'),
mregDCs= c('Ccr7', 'Cd80', 'Cd200', 'Cd247') ,
hypoxia=c('Hif1a', 'Slc2a1', 'Vegfa', 'Hmox1',
'Bnip3', 'Nos2', 'Mmp2', 'Sod3',
'Cited2', 'Ldha'),
peric=c("ABCC9","PDGFRB","RGS5")
)
myeloids_markers_list2 = list(pDC = c("CLEC4C","IRF7","TCF4","GZMB"),
cDC1 = c("XCR1","CLNK","CLEC9A"),
cDC2 = c("FCER1A","HLA-DPB1","HLA-DQB1","CD1E","CD1C","CLEC10A","HLA-DQA2"),
DC3 = c("CCL19","LAMP3","IDO1","IDO2","LAD1","FSCN1","CCR7","LY75","CCL22","CD40","BIRC3","NFKB2"),
Macrophages = c("APOC1","HLA-DRB5","C1QA","C1QB"),
RTMs = c("THBS1"),#Resident tissue macrophages
Lam = c("APOE"),#Lipid associated macrophages
Monocytes = c("LYZ","HLA-DRB1","TIMP1","S100A11","CXCL8","IL1B","PTGS2","S100A9","S100A8","MMP19"),
Mono_C = c('CD14'),#Mono_CD14
Mono_F = c('FCGR3A'),#Mono_FCGR3A
Mast = c('TPSAB1' , 'TPSB2'))
Tcells_markers = c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A',
'CCR7', 'SELL' , 'TCF7','CXCR6' , 'ITGA1',
'FOXP3', 'IL2RA', 'CTLA4','GZMB', 'GZMK','CCL5',
'IFNG', 'CCL4', 'CCL3' ,
'PRF1' , 'NKG7')
###CD4T
CD4_markers_list =list(
Tc=c("CD3D","CD3E"),
CD4=c("CD4" ),
Treg=c("TNFRSF4","BATF","TNFRSF18","FOXP3","IL2RA","IKZF2"),
naive=c("CCR7","SELL","CD5"),
Tfh=c("CXCR5","BCL6","ICA1","TOX","TOX2","IL6ST"),#滤泡辅助性T细胞
ILC=c("TNFRSF25","KRT81","LST1","AREG","LTB","CD69")
)
###CD8T
CD8_markers_list1 =list(
CD8=c("CD8A","CD8B"),
TN_TCM=c("CCR7","SELL","TCF7","LEF1"),
TEM=c("GZMK" ),
TEFF=c("TBX21","FCGR3A","FGFBP2"),
TRM=c("XCL1","XCL2","ITGAE","CD69"),
IEL_T = c("TMIGD2"),
yT1c=c("GNLY","PTGDS","GZMB","TRDC"),
yT2c=c("TMN1","HMGB2","TYMS"),
MAIT_T = c("SLC4A10")
)
CD8_markers_list2 =list(
CD8T=c("CD8A","CD8B"),
MAIT=c("ZBTB16","NCR3","RORA"),
ExhaustedCD8T=c("LAG3","TIGIT","PDCD1","HAVCR2","CTLA4"),
EffMemoryCD8=c("EOMES","ITM2C"),
Resting_NK=c("XCL1","XCL2","KLRC1"),
Cytotoxic_NK=c("CX3CR1","FGFBP2","FCGR3A","KLRD1"),
Pre_exhausted=c("IFNG","PRF1","GNLY","GZMA","NKG7","GZMK")
)
cd4_and_cd8T_markers_list =list(
naive=c("CCR7","SELL","TCF7","IL7R","CD27","CD28","LEF1","S1PR1"),
CD8Trm=c("XCL1","XCL2","MYADM"),
NKTc=c("GNLY","GZMA"),
Tfh=c("CXCR5","BCL6","ICA1","TOX","TOX2","IL6ST"),
th17=c("IL17A","KLRB1","CCL20","ANKRD28","IL23R","RORC","FURIN","CCR6","CAPG","IL22"),
CD8Tem=c("CXCR4","GZMH","CD44","GZMK"),
Treg=c("FOXP3","IL2RA","TNFRSF18","IKZF2"),
naive=c("CCR7","SELL","TCF7","IL7R","CD27","CD28"),
CD8Trm=c("XCL1","XCL2","MYADM"),
MAIT=c("KLRB1","ZBTB16","NCR3","RORA"),
yT1c=c("GNLY","PTGDS","GZMB","TRDC"),
yT2c=c("TMN1","HMGB2","TYMS"),
yt=c("TRGV9","TRDV2")
)
# CD20 (MS4A1)表达于除plasma B 之外的所有B,很关键的区分naive 和plasma的marker
# SDC1 = CD138 plasma B (接受抗原,可表达抗体)
Bcels_markers_list = list(
All = c('MS4A1','SDC1','CD27','CD38','CD19', 'CD79A'),
GC_B = c('IL4R','TCL1A','LRMP','SUGCT'),
IGA_plasm_B= c ( 'IGHA1'),
IGG_plasm_B= c ( 'IGHG1')
)
Hepatic_stellate_markers_list =list(
qHSC=c("Lrat","Ecm1","Angptl6","Vipr1" ),
S1=c("Ccl2" ,"Cxcl10" ,"Cxcl1" ,"Ccl7" ),
S2=c("Acta2" ,"Tpm1" ,"Vim" ,"Tagln","Tnc","Tpm2"),
S3=c("Col1a1","Col1a2","Col3a1" ,"Lox","Lum" )
)
# arteries (HEY1, IGFBP3), capillaries (CD36, CA4), veins (ACKR1) and
# lymphatic ECs (LECs; CCL21, PROX1).
stromal_markers = c('TEK',"PTPRC","EPCAM","PDPN",
"PECAM1",'PDGFRB',"PLVAP",'PROX1','ACKR1','CA4','HEY1',
'CSPG4','GJB2', 'RGS5','ITGA7',
'ACTA2','RBP1','CD36',
'ADGRE5','COL11A1','FGF7', 'MME')
last_markers = c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A',
'CD19', 'CD79A', 'MS4A1' ,
'IGHG1', 'MZB1', 'SDC1',
'CD68', 'CD163', 'CD14',
'TPSAB1' , 'TPSB2', # mast cells,
'RCVRN','FPR1' , 'ITGAM' ,
'C1QA', 'C1QB', # mac
'S100A9', 'S100A8', 'MMP19',# monocyte
'FCGR3A',
'LAMP3', 'IDO1','IDO2',## DC3
'CD1E','CD1C', # DC2
'KLRB1','NCR1', # NK
'FGF7','MME', 'ACTA2', ## human fibo
'GJB2', 'RGS5',
'DCN', 'LUM', 'GSN' , ## mouse PDAC fibo
'MKI67' , 'TOP2A',
'PECAM1', 'VWF', ## endo
"PLVAP",'PROX1','ACKR1','CA4','HEY1',
'EPCAM' , 'KRT19','KRT7', # epi
'FYXD2', 'TM4SF4', 'ANXA4',# cholangiocytes
'APOC3', 'FABP1', 'APOA1', # hepatocytes
'Serpina1c',
'PROM1', 'ALDH1A1' )
gastric_cancer_markers
lung_epi_markers
Tcells_markers
stromal_markers
last_markers
GSE150580_breast_cancer_markers_list
SCP1661_meyloids_markers_list
myeloids_markers_list1
myeloids_markers_list2
CD4_markers_list
CD8_markers_list1
CD8_markers_list2
cd4_and_cd8T_markers_list
Bcels_markers_list
Hepatic_stellate_markers_list
markers = c('gastric_cancer_markers','lung_epi_markers',
'Tcells_markers',
'stromal_markers',
'last_markers' )
markers_list <- c(
'GSE150580_breast_cancer_markers_list' ,
'SCP1661_meyloids_markers_list' ,
'myeloids_markers_list1' ,
'myeloids_markers_list2' ,
'CD4_markers_list' ,
'CD8_markers_list1' ,
'CD8_markers_list2' ,
'cd4_and_cd8T_markers_list' ,
'Bcels_markers_list' ,
'Hepatic_stellate_markers_list'
)
p_umap=DimPlot(sce.all.int, reduction = "umap",raster = F,
label = T,repel = T)
p_umap
if(sp=='human'){
lapply(markers, function(x){
#x=markers[1]
genes_to_check=str_to_upper(get(x))
DotPlot(sce.all.int , features = genes_to_check ) +
coord_flip() +
theme(axis.text.x=element_text(angle=45,hjust = 1))
h=length( genes_to_check )/6+3;h
ggsave(paste('check_for_',x,'.pdf'),height = h)
})
lapply(markers_list, function(x){
# x=markers_list[1]
genes_to_check = lapply(get(x), str_to_upper)
dup=names(table(unlist(genes_to_check)))[table(unlist(genes_to_check))>1]
genes_to_check = lapply(genes_to_check, function(x) x[!x %in% dup])
DotPlot(sce.all.int , features = genes_to_check ) +
# coord_flip() +
theme(axis.text.x=element_text(angle=45,hjust = 1))
w=length( unique(unlist(genes_to_check)) )/5+6;w
ggsave(paste('check_for_',x,'.pdf'),width = w)
})
last_markers_to_check <<- str_to_upper(last_markers )
}else if(sp=='mouse'){
lapply(markers, function(x){
#x=markers[1]
genes_to_check=str_to_title(get(x))
DotPlot(sce.all.int , features = genes_to_check ) +
coord_flip() +
theme(axis.text.x=element_text(angle=45,hjust = 1))
h=length( genes_to_check )/6+3;h
ggsave(paste('check_for_',x,'.pdf'),height = h)
})
lapply(markers_list, function(x){
# x=markers_list[1]
genes_to_check = lapply(get(x), str_to_title)
dup=names(table(unlist(genes_to_check)))[table(unlist(genes_to_check))>1]
genes_to_check = lapply(genes_to_check, function(x) x[!x %in% dup])
DotPlot(sce.all.int , features = genes_to_check ) +
# coord_flip() +
theme(axis.text.x=element_text(angle=45,hjust = 1))
w=length( unique(unlist(genes_to_check)) )/5+6;w
ggsave(paste('check_for_',x,'.pdf'),width = w)
})
last_markers_to_check <<- str_to_title(last_markers )
}else {
print('we only accept human or mouse')
}
p_all_markers = DotPlot(sce.all.int , features = last_markers_to_check ) +
coord_flip() +
theme(axis.text.x=element_text(angle=45,hjust = 1))
p_all_markers+p_umap
h=length( last_markers_to_check )/6+3;h
w=length( unique( Idents(sce.all.int)) )/5+10;w
ggsave(paste('last_markers_and_umap.pdf'),width = w,height = h)
pro = 'qc-'
if("percent_mito" %in% colnames(sce.all.int@meta.data ) ){
#可视化细胞的上述比例情况
feats <- c("nFeature_RNA", "nCount_RNA", "percent_mito", "percent_ribo", "percent_hb")
feats <- c("nFeature_RNA", "nCount_RNA")
p1=VlnPlot(sce.all.int , features = feats, pt.size = 0, ncol = 2) +
NoLegend()
w=length(unique(sce.all.int$orig.ident))/3+5;w
ggsave(filename=paste0(pro,"Vlnplot1.pdf"),plot=p1,width = w,height = 5)
feats <- c("percent_mito", "percent_ribo", "percent_hb")
p2=VlnPlot(sce.all.int, features = feats, pt.size = 0, ncol = 3, same.y.lims=T) +
scale_y_continuous(breaks=seq(0, 100, 5)) +
NoLegend()
w=length(unique(sce.all.int$orig.ident))/2+5;w
ggsave(filename=paste0(pro,"Vlnplot2.pdf"),plot=p2,width = w,height = 5)
}
p3=FeatureScatter(sce.all.int , "nCount_RNA", "nFeature_RNA",
pt.size = 0.5)
ggsave(filename=paste0(pro,"Scatterplot.pdf"),plot=p3)
if(T){
# remotes::install_github('genecell/COSGR')
# genexcell <- Seurat::GetAssayData(object = object[[assay]],slot = slot)
marker_cosg <- cosg(
sce.all.int,
groups='all',
assay='RNA',
slot='data',
mu=1,
n_genes_user=100)
save(marker_cosg,file = paste0(pro,'_marker_cosg.Rdata'))
head(marker_cosg)
## Top10 genes
library(dplyr)
top_10 <- unique(as.character(apply(marker_cosg$names,2,head,10)))
# width <-0.006*dim(sce.all.int)[2];width
# height <- 0.25*length(top_10)+4.5;height
width <- 15+0.5*length(unique(Idents(sce.all.int)));width
height <- 8+0.1*length(top_10);height
sce.Scale <- ScaleData(sce.all.int ,features = top_10 )
DoHeatmap( sce.Scale , top_10 ,
size=3)
ggsave(filename=paste0(pro,'DoHeatmap_check_top10_markers_by_clusters.pdf') ,
# limitsize = FALSE,
units = "cm",width=width,height=height)
width <- 8+0.6*length(unique(Idents(sce.all.int)));width
height <- 8+0.2*length(top_10);height
DotPlot(sce.all.int, features = top_10 ,
assay='RNA' ) + coord_flip() +FontSize(y.text = 4)
ggsave(paste0(pro,'DotPlot_check_top10_markers_by_clusters.pdf'),
units = "cm",width=width,height=height)
## Top3 genes
top_3 <- unique(as.character(apply(marker_cosg$names,2,head,3)))
width <- 15+0.2*length(unique(Idents(sce.all.int)));width
height <- 8+0.1*length(top_3);height
sce.Scale <- ScaleData(sce.all.int ,features = top_3 )
DoHeatmap( sce.Scale , top_3 ,
size=3)
ggsave(filename=paste0(pro,'DoHeatmap_check_top3_markers_by_clusters.pdf') ,
units = "cm",width=width,height=height)
width <- 8+0.2*length(unique(Idents(sce.all.int)));width
height <- 8+0.1*length(top_3);height
DotPlot(sce.all.int, features = top_3 ,
assay='RNA' ) + coord_flip()
ggsave(paste0(pro,'DotPlot_check_top3_markers_by_clusters.pdf'),width=width,height=height)
}
这些基因标记列表可以在各种生物医学研究中帮助识别和分析不同的细胞类型及其在疾病中的作用
p_umap=DimPlot(sce.all.int, reduction = "umap",raster = F,
label = T,repel = T)
reduction = "umap": 指定使用UMAP进行降维。
raster = F: 禁用栅格化,使用矢量图形。
label = T: 在图中标记每个细胞群的标签。
repel = T: 避免标签重叠。
if(sp=='human'){
lapply(markers, function(x){
genes_to_check=str_to_upper(get(x))
DotPlot(sce.all.int , features = genes_to_check ) +
coord_flip() +
theme(axis.text.x=element_text(angle=45,hjust = 1))
h=length( genes_to_check )/6+3;h
ggsave(paste('check_for_',x,'.pdf'),height = h)
})
....
lapply
函数对 markers
列表中的每个元素执行指定操作。 将当前标记列表 x
中的所有基因名称转换为大写,以便与人类基因名称匹配。
分别对markers、markers_list列表中的基因进行可视化的操作。
last_markers_to_check <<- str_to_upper(last_markers )
将变量 last_markers 中的所有基因名称转换为大写,并将结果存储在一个全局变量 last_markers_to_check中。
注:
<<- 是 R 语言中的一种赋值操作符,用于在函数内部将值赋给全局变量或父级作用域中的变量。与常规的 <-
不同,<<- 可以改变当前函数环境之外的变量。
else if(sp=='mouse'){
lapply(markers, function(x){
genes_to_check=str_to_title(get(x)
...
}
lapply(markers_list, function(x){
genes_to_check = lapply(get(x), str_to_title)
...
}
last_markers_to_check <<- str_to_title(last_markers )
与human不同,小鼠的基因名需要首字母大写
基因名称转换为首字母大写(str_to_title),因为小鼠基因的标准命名方式是首字母大写。
marker_cosg <- cosg(
sce.all.int,
groups='all',
assay='RNA',
slot='data',
mu=1,
n_genes_user=100)
save(marker_cosg,file = paste0(pro,'_marker_cosg.Rdata'))
head(marker_cosg)
sce.all.int: 输入的单细胞对象(SingleCellExperiment 或 Seurat 对象),包含了所有细胞的数据。
groups = all: 指定要对所有群体进行分析,即识别所有群体的marker基因。
assay = 'RNA': 指定使用RNA测序数据进行分析。
slot = 'data': 指定从数据的哪个部分提取表达值,这里选择 data插槽,通常包含标准化后的表达数据。
mu = 1: 参数 mu可能控制分析过程中的一个参数,具体含义取决于 cosg
函数的实现。通常可能与某种筛选标准或过滤条件有关。
n_genes_user = 100: 指定要识别的每个群体的marker基因数量,这里设置为100个。
## Top10 genes
library(dplyr)
top_10 <- unique(as.character(apply(marker_cosg$names,2,head,10)))
width <- 15+0.5*length(unique(Idents(sce.all.int)));width
height <- 8+0.1*length(top_10);height
sce.Scale <- ScaleData(sce.all.int ,features = top_10 )
DoHeatmap( sce.Scale , top_10 ,
size=3)
ggsave(filename=paste0(pro,'DoHeatmap_check_top10_markers_by_clusters.pdf') ,
units = "cm",width=width,height=height)
width <- 8+0.6*length(unique(Idents(sce.all.int)));width
height <- 8+0.2*length(top_10);height
DotPlot(sce.all.int, features = top_10 ,
assay='RNA' ) + coord_flip() +FontSize(y.text = 4)
ggsave(paste0(pro,'DotPlot_check_top10_markers_by_clusters.pdf'),
units = "cm",width=width,height=height)
## Top3 genes
...
使用 DoHeatmap
函数绘制热图,显示 top_10
中基因在不同细胞群体中的表达情况。
使用 DotPlot
函数绘制点图,显示 top_10
基因在不同细胞群体中的表达情况。
以下代码均是分辨率为0.1的运行结果,其他分辨率类似
dir.create('check-by-0.1')
setwd('check-by-0.1')
sel.clust = "RNA_snn_res.0.1"
sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all.int@active.ident)
source('../scRNA_scripts/check-all-markers.R')
即这一步会生成21张图
前8张为各个基因list中,在各个细胞分群中的表达情况:
如
checkfor Tcells_markers .pdf
checkfor myeloids_markers_list2 .pdf
后面的last_markers_and_umap.pdf同时可视化了umap降维图,和last_markers基因列表(包含多数细胞类型的特征基因)中在各个分群中的表达情况。
last_markers_and_umap.pdf
后面的四张图是可视化了当前分辨率中各个分群中的top10基因的热图和点图。
qc-DoHeatmap_check_top10_markers_by_clusters.pdf
qc-DotPlot_check_top10_markers_by_clusters.pdf
最后3张是可视化了当前分群中的nFeature、nCount、线粒体核糖体血红细胞基因的分布、以及相关性图。
qc-Vlnplot1.pdf
qc-Vlnplot2.pdf
qc-Scatterplot.pdf
一般来说,为了节省工作量,我们选择0.1的分辨率进行命名
因为命名这个步骤是纯人工 操作
除非0.1确实分群太粗狂了,我们就选择0.8
根据上述图片手动给细胞分群命名
这里因为用的是测试数据集,其自带细胞注释。因此用气泡图比较了本身和手动注释的区别。
source('scRNA_scripts/lib.R')
sce.all.int = readRDS('2-harmony/sce.all_int.rds')
sp='human'
colnames(sce.all.int@meta.data)
table(sce.all.int$RNA_snn_res.0.1)
# pbmc_small <- BuildClusterTree(object = sce.all.int)
# plot(Tool(object = pbmc_small, slot = 'BuildClusterTree'))
# plot(pbmc_small@tools$BuildClusterTree)
# 如果是手动给各个单细胞亚群命名
if(F){
sce.all.int
celltype=data.frame(ClusterID=0:5 ,
celltype= 0:5)
#定义细胞亚群
celltype[celltype$ClusterID %in% c( 1 ,4 ),2]='mono'
celltype[celltype$ClusterID %in% c( 0,3 ),2]='Tcells'
celltype[celltype$ClusterID %in% c( 2 ),2]='Bcells'
celltype[celltype$ClusterID %in% c( 5 ),2]='pDC'
head(celltype)
celltype
table(celltype$celltype)
sce.all.int@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
sce.all.int@meta.data[which(sce.all.int@meta.data$RNA_snn_res.0.1 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
Idents(sce.all.int)=sce.all.int$celltype
}
table(sce.all.int$seurat_annotations)
#比较和数据集自带的细胞分群的差别
gplots::balloonplot(
table(sce.all.int$RNA_snn_res.0.8,
sce.all.int$seurat_annotations)
)
gplots::balloonplot(
table(sce.all.int$celltype,
sce.all.int$seurat_annotations)
)
注:
sce.all.int = readRDS('2-harmony/sce.all_int.rds')
celltype=data.frame(ClusterID=0:5 ,
celltype= 0:5)
celltype[celltype$ClusterID %in% c( 1 ,4 ),2]='mono'
celltype[celltype$ClusterID %in% c( 0,3 ),2]='Tcells'
celltype[celltype$ClusterID %in% c( 2 ),2]='Bcells'
celltype[celltype$ClusterID %in% c( 5 ),2]='pDC'
这个步骤创建了一个名为 celltype 的数据框,包含两列:
这里的0~5需要根据当前分辨率的细胞分群修改对应的数
#初始化celltype列
sce.all.int@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
sce.all.int@meta.data[which(sce.all.int@meta.data$RNA_snn_res.0.1 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
Idents(sce.all.int)=sce.all.int$celltype
which()函数返回满足条件的索引位置,即返回 TRUE 的位置。这个表达式会找到 sce.all.int对象中 RNA_snn_res.0.1 列中等于当前 ClusterID 的所有行索引。
sce.all.int@meta.datawhich(...),'celltype'
对选择出的那些行中的 celltype列进行操作。
sce.all.int@meta.datawhich(...),'celltype' <- celltype$celltypei
将特定聚类 ID 对应的细胞的 celltype 列更新为相应的细胞类型。
这里手动注释还是比较粗糙的,为了方便显示,还是使用数据集本身自带的细胞注释进行后续的分析
#DimPlot(sce.all.int, reduction = "umap",raster = F,group.by = 'RNA_snn_res.0.1',
# label = T,repel = T) +
# DimPlot(sce.all.int, reduction = "umap",raster = F,group.by = 'seurat_annotations',
# label = T,repel = T)
# 如果前面成功的给各个细胞亚群命名了
# 就可以运行下面的代码
sce.all.int$celltype = as.character(sce.all.int$seurat_annotations)
if("celltype" %in% colnames(sce.all.int@meta.data ) ){
sel.clust = "celltype"
sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all.int@active.ident)
dir.create('check-by-celltype')
setwd('check-by-celltype')
source('../scRNA_scripts/check-all-markers.R')
setwd('../')
getwd()
phe=sce.all.int@meta.data
save(phe,file = 'phe.Rdata')
pdf('celltype-vs-orig.ident.pdf',width = 10)
gplots::balloonplot(table(sce.all.int$orig.ident,
sce.all.int$celltype))
dev.off()
}
同样再跑下check-all-markers.R这个脚本,会出21张图。如
qc-Vlnplot1.pdf
qc-DoHeatmap_check_top3_markers_by_clusters.pdf
qc-DotPlot_check_top3_markers_by_clusters.pdf
last_markers_and_umap.pdf
最后
pdf('celltype-vs-orig.ident.pdf',width = 10)
gplots::balloonplot(table(sce.all.int$orig.ident,
sce.all.int$celltype))
展示了每个细胞群中在多分组中的分布情况
celltype-vs-orig.ident.pdf
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。