单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析17

小胡子刺猬的生信学习123

发布于 2022-09-22 21:13:15

3160

发布于 2022-09-22 21:13:15

文章被收录于专栏：文献分享及代码学习

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析1：https://cloud.tencent.com/developer/article/2055573

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析2：https://cloud.tencent.com/developer/article/2072069

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析3：https://cloud.tencent.com/developer/article/2078159

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析4：https://cloud.tencent.com/developer/article/2078348

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析5：https://cloud.tencent.com/developer/article/2084580

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析6：https://cloud.tencent.com/developer/article/2085385

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析7：https://cloud.tencent.com/developer/article/2085705

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析8：https://cloud.tencent.com/developer/article/2086805

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析9：https://cloud.tencent.com/developer/article/2087563

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析10：https://cloud.tencent.com/developer/article/2090290

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析11：https://cloud.tencent.com/developer/article/2093123

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析12：https://cloud.tencent.com/developer/article/2093208

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析13：https://cloud.tencent.com/developer/article/2093567

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析14: https://cloud.tencent.com/developer/article/2094034

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析15：https://cloud.tencent.com/developer/article/2102671

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析16:https://cloud.tencent.com/developer/article/2103030

这部分是作者为了寻找转录因子footpring进行的分析，主要是对相关的方法进行了改进

Part I: set intersection of distal regulatory elements with differentially accessible peaks

library(ArchR)
library(ChIPpeakAnno)
library(stats)
ArchR::addArchRThreads(threads = 32)
source("./Archr_Peak_Null_Permute.R")
source("./Archr_Peak_RawPval.R")
source("./getMatrixFromProject.R")
h5disableFileLocking()
SAMPLE.ID <- "All"
key.word.1 <- "pithelia"
key.word.2 <- "-Ciliated"
proj <- readRDS("./final_archr_proj_archrGS.rds")

p2g.df.obs <- readRDS("./All_P2G_Observed.rds")
#Subset to postive correlation P2Gs
p2g.df.sub <- dplyr::filter(p2g.df.obs,RawPVal <=1e-12 & Correlation >= 0.45& peakType == "Distal")


# Marker Peaks for malignant clusters with 100% patient specificity 
markersPeaks <- getMarkerFeatures(
  ArchRProj = proj, 
  useMatrix = "PeakMatrix", 
  groupBy = "predictedGroup_ArchR",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  testMethod = "wilcoxon"
)

markerList <- getMarkers(markersPeaks, cutOff = "FDR <= 0.01 & Log2FC >= 1.25")
saveRDS(markerList,"markerspeaks.rds")


print(names(markerList))

idx.1 <- grep(key.word.1,names(markerList))
idx.2 <- grep(key.word.2,names(markerList))
idx.3 <- grep("16-Fibroblast",names(markerList))
idx.4 <- grep("0-Fibroblast",names(markerList))
idx.5 <- grep("27-Fibroblast",names(markerList))

idx <- c(idx.1,idx.2,idx.3,idx.4,idx.5)

markerList.sub <- markerList[idx]

print(names(markerList.sub))

# Only tumor cell clusters that are 100% patient specific 

df <- as.data.frame(proj@cellColData) %>% dplyr::group_by(predictedGroup_ArchR) %>% 
  dplyr::count(Sample) 

idents <- table(df$predictedGroup_ArchR)
idents.sub <- idents[idents ==1 & names(idents) %in% names(markerList.sub)]

markerList.sub <- markerList.sub[names(idents.sub)] 
print(names(markerList.sub))
# Intersect marker peaks for each cluster with P2Gs and write to bed file
for ( i in names(markerList.sub)){
  data <- unlist(markerList.sub[i])
  markers <- paste0(data$seqnames,":",data$start,"-",data$end)
  markers.int <- intersect(p2g.df.sub$peakName,markers)
  markers.int <- stringr::str_replace(string=markers.int,pattern = ":",replacement = "-")
  bed <- data.frame(do.call(rbind, strsplit(markers.int, "-", fixed=TRUE)))
  write.table(bed,paste0("Marker_Enhancers_ArchR_",i,".bed"),row.names = F,col.names = F,quote = F,sep = "\t")
}

# Concatenate marker peaks from each cluster and remove redundant peaks
data <- unlist(markerList.sub)
markers <- paste0(data$seqnames,":",data$start,"-",data$end)
markers.int <- intersect(p2g.df.sub$peakName,markers)
markers.int <- unique(markers.int)
markers.int <- stringr::str_replace(string=markers.int,pattern = ":",replacement = "-")
bed <- data.frame(do.call(rbind, strsplit(markers.int, "-", fixed=TRUE)))
write.table(bed,"Marker_Enhancers_ArchR-nodups.bed",row.names = F,col.names = F,quote = F,sep = "\t")

writeLines(capture.output(sessionInfo()), "sessionInfo_Intersect_DiffPeaks_P2Gs.txt")

Part II: Generating the input matrices for the TFSEE computation

#!/bin/bash
#SBATCH --job-name Reformat 
#SBATCH --cpus-per-task 1
#SBATCH -c 1
#SBATCH --mem 2g
#SBATCH --partition allnodes

# Remove whitespace from file names 

for file in *; do mv "$file" `echo $file | tr ' ' '_'` ; done

library(ArchR)
source("./Archr_Peak_Null_Permute.R")
source("./Archr_Peak_RawPval.R")
source("./getMatrixFromProject.R")
library(stringr)
library(utils)
library(dplyr)
ArchR::addArchRThreads(threads = 32)
h5disableFileLocking()
# ONlY USE 100% patient-specfic 
labels <- list.files(pattern = "Marker_Enhancers_ArchR")
labels <- str_remove(labels,"Marker_Enhancers_ArchR_")
labels <- str_remove(labels,".bed")
labels <- labels[-12]# Remove extra
print(labels)

enhancers <- read.delim("Marker_Enhancers_ArchR-nodups.bed",header = F)
enhancers <- paste0(enhancers$V1,":",enhancers$V2,"-",enhancers$V3)

# Pseudobulk ATAC enhancer matrix
proj.archr <- readRDS("./final_archr_proj_archrGS-P2Gs.rds")
peak.mat <- getMatrixFromProject.mod(proj.archr,useMatrix = "PeakMatrix")
cell.names <- colnames(assay(peak.mat))
peak.names <- paste0(seqnames(rowRanges(peak.mat)),":", start(ranges(rowRanges(peak.mat))),"-", end(ranges(rowRanges(peak.mat))))


peak.mat <- assay(peak.mat)

dim(peak.mat)
length(cell.names)
length(peak.names)

colnames(peak.mat) <- cell.names
rownames(peak.mat) <- peak.names
head(peak.mat[,1:4])


peaks.pseudobulk <- data.frame(rownames(peak.mat))
# Run twice to hit both whitespaces 
proj.archr$predictedGroup_ArchR <- str_replace(proj.archr$predictedGroup_ArchR," ","_")
proj.archr$predictedGroup_ArchR <- str_replace(proj.archr$predictedGroup_ArchR," ","_")
for (i in labels){
  cells <- rownames(dplyr::filter(as.data.frame(proj.archr@cellColData),predictedGroup_ArchR == i))
  
  peak.mat.sub <- peak.mat[,colnames(peak.mat) %in% cells]
  
  peaks.bulk <- Matrix::rowSums(peak.mat.sub)
  
  peaks.pseudobulk$i <- peaks.bulk
  colnames(peaks.pseudobulk)[dim(peaks.pseudobulk)[2]] <- i
  
  print("Iteration complete")
}

rownames(peaks.pseudobulk) <- peaks.pseudobulk[,1]
peaks.pseudobulk <- peaks.pseudobulk[,-1]

print(labels)
print(colnames(peaks.pseudobulk))
dim(peaks.pseudobulk)
head(peaks.pseudobulk[,1:6])



peaks.pseudobulk <- peaks.pseudobulk[rownames(peaks.pseudobulk) %in% unique(enhancers),]
dim(peaks.pseudobulk)
# Remove enhancer peaks that have ANY zero counts in ANY sample (no replicates)
peaks.pseudobulk[peaks.pseudobulk == 0] <- NA
peaks.pseudobulk <- peaks.pseudobulk[complete.cases(peaks.pseudobulk),]
dim(peaks.pseudobulk)
head(peaks.pseudobulk[,1:6])


# Subset original Marker enhancer lists to new updated nonzero enhancers
for ( i in 1:length(labels)){
  file <- paste0("Marker_Enhancers_ArchR_",labels[i],".bed")
  
  bed <- read.delim(file,header = F)
  rownames(bed) <- paste0(bed$V1,":",bed$V2,"-",bed$V3)
  
  bed.new <- bed[rownames(bed) %in% rownames(peaks.pseudobulk),]
  
  write.table(bed.new,paste0("Marker_Enhancers_ArchR_",labels[i],"-updated.bed"),row.names = F,col.names = F,quote = F,sep = "\t")
}



writeLines(capture.output(sessionInfo()), "sessionInfo_TFSEE_Step1_NonZero_Enhancers.txt")

本文系外文翻译，前往查看

如有侵权，请联系 cloudcommunity@tencent.com 删除。

https