前段时间做了一下SCENIC单细胞转录因子分析,在重新配置SCENIC的运行环境时,发现这个包的函数和数据库数据有了很大的冲突,导致流程根本无法运行,以下说明一下如何解决这个问题。
cisTarget的数据库文件下载在https://resources.aertslab.org/cistarget/databases/,根据需求下载,human需要下载的两个文件是:
# hg19-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather
hg19-500bp-upstream-7species.mc9nr.feather
# hg19-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather
hg19-tss-centered-10kb-7species.mc9nr.feather
在运行SCENIC分析时,主要报错点有两个,分别叙述如下:
分析代码如下:
scenicOptions <- initializeScenic(org="hgnc", dbDir="SCENIC_libs", dbIndexCol = "motifs", nCores=10)
实际执行时,这句代码会报错。使用rstudio的代码调试工具,可以发现initializeScenic报错的第一现场是在其调用的getDbAnnotations函数的最后一句,下面附上getDbAnnotations的源码:
function (scenicOptions)
{
dbAnnotFiles <- scenicOptions@settings$db_annotFiles
if (!is.null(dbAnnotFiles)) {
motifAnnotations <- NULL
for (annotPath in dbAnnotFiles) {
motifAnnot <- data.table::fread(annotPath)
motifAnnot$annotationSource <- factor(motifAnnot$annotationSource)
colnames(motifAnnot)[1] <- "motif"
levels(motifAnnot$annotationSource) <- c(levels(motifAnnot$annotationSource),
c("directAnnotation", "inferredBy_Orthology",
"inferredBy_MotifSimilarity", "inferredBy_MotifSimilarity_n_Orthology"))
motifAnnotations <- rbind(motifAnnotations, motifAnnot)
}
}
else {
if (is.na(getDatasetInfo(scenicOptions, "org")))
stop("Please provide an organism (scenicOptions@inputDatasetInfo$org).")
org <- getDatasetInfo(scenicOptions, "org")
if (is.na(org))
stop("Please provide an organism (scenicOptions@inputDatasetInfo$org).")
if (!org %in% c("hgnc", "mgi", "dmel"))
stop("Organism not recognized (scenicOptions@inputDatasetInfo$org).")
if (org == "hgnc")
motifAnnotName <- "motifAnnotations_hgnc" # <-------
if (org == "mgi")
motifAnnotName <- "motifAnnotations_mgi"
if (org == "dmel")
motifAnnotName <- "motifAnnotations_dmel"
if (!is.null(scenicOptions@settings$db_mcVersion)) {
if (scenicOptions@settings$db_mcVersion == "v8")
motifAnnotName <- paste0(motifAnnotName, "_v8")
}
library(RcisTarget)
data(list = motifAnnotName, package = "RcisTarget", verbose = FALSE)
motifAnnotations <- eval(as.name(motifAnnotName))
}
return(motifAnnotations)
}
出错的原因是实际执行时变量motifAnnotName会被赋值为motifAnnotations_hgnc,然而RcisTarget包中附录的motifAnnotations_hgnc.Rdata里面的变量名是motifAnnotations,并不是motifAnnotations_hgnc,所以下面这句话自然无法执行成功:
# data(list = motifAnnotName, package = "RcisTarget", verbose = FALSE)
# 上面的data用于将RcisTarget包的data文件夹中的所有Rdata数据载入R
motifAnnotations <- eval(as.name(motifAnnotName))
解决方案:将motifAnnotations改名为motifAnnotations_hgnc,然后导出为Rdata对象,重新置于RcisTarget R包的data文件夹下即可。
data(package = "RcisTarget", list = "motifAnnotations_hgnc")
motifAnnotations_hgnc <- motifAnnotations
save(motifAnnotations_hgnc, file = "motifAnnotations_hgnc.RData")
RcisTarget_pkg_dir <- system.file("data", package = "RcisTarget")
list.files(RcisTarget_pkg_dir)
# windows下将mv改为move
system(str_glue('mv {RcisTarget_pkg_dir}/motifAnnotations_hgnc.RData {RcisTarget_pkg_dir}/motifAnnotations_hgnc2.RData'))
system(str_glue('mv motifAnnotations_hgnc.RData {RcisTarget_pkg_dir}'))
由于下载的数据库feature文件中只有motif列名,没有feature列名,而checkAnnots函数必须要求feature列存在。
可以发现class_ScenicOptions.R有两处checkAnnots出现,一处是函数定义,一处是函数调用(这个调用正是位于initializeScenic函数体内),runSCENIC_2_createRegulons.R中有一个处函数调用。
# 在linux下通过如下方式查看checkAnnots函数的所在位置
# 下载SCENIC R包文件,解压后的R子文件夹即是R代码所在
grep checkAnnots *R
# class_ScenicOptions.R: featuresWithAnnot <- checkAnnots(object, motifAnnot)
# class_ScenicOptions.R:checkAnnots <- function(object, motifAnnot)
# runSCENIC_2_createRegulons.R: featuresWithAnnot <- checkAnnots(scenicOptions, motifAnnot)
解决方案很简单,既然checkAnnots强制要求存在feature列,那么就添加一个参数用于自定义这个feature。
解决方案:
# #### 1 ####
# class_ScenicOptions.R的initializeScenic函数:
# 将此句代码featuresWithAnnot <- checkAnnots(object, motifAnnot)
# 改为如下:
featuresWithAnnot <- checkAnnots(object, motifAnnot, rnktype = dbIndexCol)
# #### 2 ####
# class_ScenicOptions.R的checkAnnots函数
# 改为如下版本:
# 1. 添加rnktype参数
# 2. 注释函数体中的赋值语句rnktype = "features"
checkAnnots <- function(object, motifAnnot, rnktype = "features")
{
allFeaturesInAnnot <- unlist(motifAnnot[,1]) # motif or track
featuresWithAnnot <- lapply(getDatabases(object), function(dbFile)
{
# rnktype = "features" #TODO: add as option for custom dbs
nRnks <- getRanking(RcisTarget::importRankings(dbFile, columns = rnktype))
nRnks <- dplyr::pull(nRnks, rnktype)
length(intersect(allFeaturesInAnnot,nRnks))/length(unique(c(allFeaturesInAnnot,nRnks)))
})
return(featuresWithAnnot)
}
# #### 3 ####
# 同时修改runSCENIC_2_createRegulons.R文件的如下位置:
# 1. 注释rnktype <- "features"
# 2. 改为参数传递 rnktype <- dbIndexCol
# 3. checkAnnots函数调用那里,添加rnktype = dbIndexCol参数
genesInDb <- unique(unlist(lapply(getDatabases(scenicOptions), function(dbFilePath) {
rf <- arrow::ReadableFile$create(dbFilePath)
fr <- arrow::FeatherReader$create(rf)
genesInDb <- names(fr)
# rnktype <- "features" #TODO: add as option for custom dbs
rnktype <- dbIndexCol
genesInDb <- genesInDb[genesInDb != rnktype]
})))
## Check if annotation and rankings (potentially) match:
featuresWithAnnot <- checkAnnots(scenicOptions, motifAnnot, rnktype = dbIndexCol)
然后,将改完代码的SECENIC包重新进行本地安装即可。
如何本地安装R包,可以参见以前的推文:
Rtips:如何安装旧版本的R包
https://mp.weixin.qq.com/s/3eK3XB6QZreALopLgx6VsQ
library(Seurat)
library(tidyverse)
# 载入单细胞数据
load("sc.RData")
# expr
exprMat <- as.matrix(GetAssayData(Fibroblast, assay = "RNA", slot = "counts"))
cellInfo <- Fibroblast$RNA_snn_res.0.25
library(SCENIC)
# dbIndexCol: motifs tracks features
scenicOptions <- initializeScenic(org="hgnc", dbDir="SCENIC_libs", dbIndexCol = "motifs", nCores=10)
saveRDS(scenicOptions, file="int/scenicOptions.Rds")
# Co-expression network
genesKept <- geneFiltering(exprMat, scenicOptions)
exprMat_filtered <- exprMat[genesKept, ]
runCorrelation(exprMat_filtered, scenicOptions) # ?10min
exprMat_filtered_log <- log2(exprMat_filtered+1)
Sys.time()
runGenie3(exprMat_filtered_log, scenicOptions)
Sys.time()
scenicOptions <- initializeScenic(org="hgnc", dbDir="SCENIC_libs", dbIndexCol = "motifs", nCores=10)
scenicOptions@inputDatasetInfo$cellInfo <- "int/cellInfo.Rds"
### Build and score the GRN
exprMat_log <- log2(exprMat+1)
saveRDS(cellInfo, file="int/cellInfo.Rds")
# scenicOptions@settings$dbs <- scenicOptions@settings$dbs["10kb"] # Toy run settings
scenicOptions <- runSCENIC_1_coexNetwork2modules(scenicOptions)
scenicOptions <- runSCENIC_2_createRegulons(scenicOptions, dbIndexCol = "motifs") # 20min
scenicOptions <- runSCENIC_3_scoreCells(scenicOptions, exprMat_log)
scenicOptions <- runSCENIC_4_aucell_binarize(scenicOptions)