视频地址:http://mpvideo.qpic.cn/0bc3tiabgaaaneakrfjtfvrvbgwdconaaeya.f10002.mp4?
参考:
代码:
rm(list = ls())
# setwd("H:/MedBioInfoCloud/analysis/TCGA/new/conventionalAnalysis")
options(stringsAsFactors = F)
library(TCGAbiolinks)
library(WGCNA)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(corrplot)
library(pheatmap)
FilePath <- dir("H:/MedBioInfoCloud/analysis/TCGA/new/processedTCGAdata/TCGA-STAR_Exp/",
"STARdata.Rdata$",full.names = T)
gene <- "EGFR"
type <- "protein_coding"
opt <- "output/003-CorrelationsBetween_mRNAs/01-screening/"
ifelse(dir.exists(opt),"The folder already exists.",dir.create(opt,recursive = T))
record_files <- dir(opt,".csv$",full.names = T)
ifelse(length(record_files)>0,file.remove(record_files),
"There is no record file that can be removed")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/filterGeneTypeExpr.R")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/del_dup_sample.R")
###TCGA数据库中33中癌症类型
project <- getGDCprojects()$project_id
project <- project[grep("TCGA-",project)]
# proj = "TCGA-LUAD"
cut_coef <- 0.3
cut_p <- 0.01
allcancercor <- data.frame()
for(proj in project){
message("===============================")
message(proj)
load(FilePath[grep(proj,FilePath)])#STARdata
tpm <- STARdata[["tpm"]]
anoinfo <- tpm[,1:3]
tpm <- filterGeneTypeExpr(expr = tpm,
fil_col = "gene_type",
filter = FALSE)
##过滤不表达的基因
tpm <- tpm[apply(tpm,1,var) !=0,]
if((gene %in% rownames(tpm)) & length(gene) == 1){
##正常组织样本ID
SamN <- TCGAquery_SampleTypes(barcode = colnames(tpm),
typesample = c("NT","NB","NBC","NEBV","NBM"))
##肿瘤组织样本ID
SamT <- setdiff(colnames(tpm),SamN)
###去除重复样本
tur_exp <- del_dup_sample(tpm[,SamT],col_rename = T)
###long2转换
tur_exp <- log2(tur_exp + 1)
gs2 <- intersect(anoinfo$gene_name[anoinfo$gene_type == type],rownames(tur_exp))
gs2 <- setdiff(gs2,gene)
g1exp <- data.frame(t(tur_exp[gene,]))
g2exp <- data.frame(t(tur_exp[gs2,]))
####皮尔森相关性
pearson_coef <- cor(g2exp,g1exp,method = "pearson" )
colnames(pearson_coef) <- "pearson"
pearson_p <- corPvalueStudent(pearson_coef,nrow(g1exp))
colnames(pearson_p) <- "pearson_p.value"
pear_dat <- cbind(pearson_coef,pearson_p)
####spearman相关性
spearman_coef <- cor(g2exp,g1exp,method = "spearman" )
colnames(spearman_coef) <- "spearman"
spearman_p <- corPvalueStudent(spearman_coef,nrow(g1exp))
colnames(spearman_p) <- "spearman_p.value"
spear_dat <- cbind(spearman_coef,spearman_p)
cor_data <- cbind(pear_dat,spear_dat) %>% as.data.frame()
head(cor_data)
screening_cor <- cor_data[((abs(cor_data[,1]) > cut_coef & (cor_data[,2] < cut_p)) & (abs(cor_data[,3]) > cut_coef & cor_data[,4] < cut_p)),]
screening_cor$cancer <- proj
screening_cor <- screening_cor %>% mutate(cor_gene = rownames(screening_cor),.before = 1)
rownames(screening_cor) <- c(1:nrow(screening_cor))
write.csv(screening_cor,file = paste0(opt,gene,"-",proj,"-correlation.csv"))
allcancercor <- rbind(allcancercor,screening_cor)
}
}
###统计相关性在所有癌症中的情况
library(tidyr)
stat <- table(allcancercor$cor_gene) %>% as.data.frame()
stat <- arrange(stat,desc(Freq))
top <- stat$Var1[1:10]
index <- lapply(top, function(x)grep(paste0("^",x,"$"),allcancercor$cor_gene)) %>% unlist()
pdata <- allcancercor[index,]
unique(pdata$cor_gene)
pea <- pdata[,c(1,2,6)]
head(pea)
pea <- spread(pea,cancer,pearson)
rownames(pea) <- pea$cor_gene
pea <- pea[,-1]
spe_p <- pdata[,c(1,4,6)]
head(spe_p)
spe_p <- spread(spe_p,cancer,spearman)
rownames(spe_p) <- spe_p$cor_gene
spe_p <- spe_p[,-1]
col2 <- colorRampPalette(c("#3300CC","#3399FF","white","#FF3333","#CC0000"),alpha = TRUE)
pheatmap_dat <- list(pearson= pea,spearman = spe_p)
d <- pheatmap_dat[[1]]
# 相关性热图
pdf(paste0(opt,gene,"-top30-in-All-cancer.pdf"),width = 7,height = 6)
for(m in names(pheatmap_dat)){
d <- pheatmap_dat[[m]]
pheatmap(d,
#annotation_col =annotation_col,
# annotation_row = annotation_row,
color = col2(50),
cluster_cols =T,
fontsize=6,
cluster_rows = T,
fontsize_row=6,
cellwidth =10,
cellheight =10,
fontsize_number = 16,
#scale="row",
show_colnames=T,
fontsize_col=8,
main = paste0(m," correlation")) %>% print()
}
dev.off()
本文分享自 MedBioInfoCloud 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!