单细胞 一文打通 拟时序分析monocle2
上个月发了一文全打通系列之后,有人问还有没有后续。肯定有呀 关于monocle2,最难的一步好像在软件的正确安装,因为有的版本中,某些函数总是报错,需要找到正确的版本。
这次排版不太好,还请见谅。
.libPaths(c("/home/data/refdir/Rlib/", "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local/lib/R/library"))
library(Seurat)
library(dplyr)
library(tibble)
library(ggplot2)
getwd()
dir.create("/home/data/t040413/sp_/monocle")
setwd("/home/data/t040413/sp_/monocle")
加载数据
load("./neutrophil(rTEM+not_rTEM).rds")
subset_data@meta.data %>%head()
subset_data$celltype=subset_data$groups
DimPlot(subset_data,label = T,group.by = "celltype")
########################################################
subset_data$cell.type=Idents(subset_data)
制作矩阵
subset_data$cell.type=Idents(subset_data)
#Idents(subset_data)=subset_data$Idents.subset_data.
###注意使用RNA 还是SCT
DefaultAssay(subset_data)
DefaultAssay(subset_data)="RNA"
table(duplicated(rownames(subset_data)))
table(duplicated(colnames(subset_data)))
table(Idents(subset_data))
DefaultAssay(subset_data)
new.metadata <- merge(subset_data@meta.data,
data.frame(Idents(subset_data)),
by = "row.names",sort = FALSE)
head(new.metadata)
rownames(new.metadata)<-new.metadata[,1]
#可选
head(subset_data@meta.data)
new.metadata=new.metadata[,-1]
head(subset_data@meta.data)
identical(rownames(new.metadata),rownames(subset_data@meta.data))
subset_data@meta.data<-new.metadata
table(subset_data$cell.type,Idents(subset_data))
head(subset_data)
expression_matrix <- as(as.matrix(subset_data@assays$RNA@counts), 'sparseMatrix')
head(expression_matrix)
identical(colnames(expression_matrix),rownames(new.metadata))
cell_metadata <- new('AnnotatedDataFrame',data=subset_data@meta.data)
head(subset_data@meta.data)
head(cell_metadata)
gene_annotation <- new('AnnotatedDataFrame',data=data.frame(gene_short_name = row.names(subset_data),
row.names = row.names(subset_data)))
devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")
monocle_cds <- monocle::newCellDataSet(expression_matrix,
phenoData = cell_metadata,
featureData = gene_annotation,
lowerDetectionLimit = 0.5,
expressionFamily = negbinomial.size())
归一化 差异分析
##归一化######
cds <- monocle_cds
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds) ## Removing 110 outliers #下面的cell.type 为subset_Data 的meta信息
library("BiocGenerics")#并行计算
devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")
diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~ cell.type")
### inference the pseudotrajectory########################################################
# step1: select genes for orderding setOrderingFilter() #
ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
length(ordering_genes)# 6354
cds <- setOrderingFilter(cds, ordering_genes)
# step2: dimension reduction=> reduceDimension() DDRTree #
cds <- reduceDimension(cds, max_components = 2,method = 'DDRTree')
#package.version(pkg = "monocle")
# step3: ordering the cells=> orderCells()
#getwd()
#source("./order_cells.R")
#unloadNamespace('monocle')
#devtools::load_all("../monocle_2.26.0 (1).tar/monocle_2.26.0 (1)/monocle/")
devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")
cds <- orderCells(cds)
出图
pdf("1.pseudutime.cell.type.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "cell.type")
dev.off()
pdf("1.pseudutime.stim.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "stim")
dev.off()
pdf("1.pseudutime.State.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "State")
dev.off()
###### split ########
pdf("2.split.pseudutime.Seurat.cell.type.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~cell.type)
dev.off()
pdf("2.split.pseudutime.stim.pdf")
plot_cell_trajectory(cds, color_by = "stim") + facet_wrap(~stim)
dev.off()
pdf("4.split.pseudutime.Seurat.State.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~State)
dev.off()
pdf("3.split.pseudutime.Seurat.cell.type_State.pdf")
plot_cell_trajectory(cds, color_by = 'State') + facet_wrap(~cell.type)
dev.off()
table(pData(cds)$State,pData(cds)$cell.type)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$cell.type), "State_cellType_summary.xlsx", colnames=T, rownames=T)
table(pData(cds)$State,pData(cds)$stim)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$stim), "State_Stim_summary.xlsx", colnames=T, rownames=T)
getwd()
设置谁是root
##we set the state 2 as root ########state 2
#这里设置谁为root??
DimPlot(subset_data,label = T)
table(Idents(subset_data))
DefaultAssay(subset_data)
DefaultAssay(subset_data)<-"SCT"
DefaultAssay(subset_data)<-"RNA"
DimPlot(subset_data,label = T)
dev.off()
table(subset_data$cell.type)
getwd()
#设置root
ds <- orderCells(cds,root_state=1)
getwd()# "/home/data/t040413/ipf/fibro_myofibro_recluster/+meso_monocle"
pdf("4.pseudutime.Pseudotime.pdf")
p=plot_cell_trajectory(cds, color_by = "Pseudotime")
print(p)
dev.off()
save(cds,file="./cds_fibroblast_using_RNA_slot.rds")
最后产生这些图
其中一张