视频地址:http://mpvideo.qpic.cn/0bc3ueaacaaagqalujrtqfrvbiodagqqaaia.f10002.mp4?
代码:
library(TCGAbiolinks)
library(reshape2)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(RColorBrewer)
FilePath <- dir("H:/MedBioInfoCloud/analysis/TCGA/new/processedTCGAdata/TCGA-miRNA_Isoform_Exp",
"Isoform_Expression_data.Rdata$",
full.names = T)
mirs <- c("hsa-miR-5590-3p","hsa-miR-142-5p","hsa-let-7f-5p")
opt <- "output/002-miR_ExpInNorAndTur/"
record_files <- dir(opt,".txt$",full.names = T)
ifelse(length(record_files)>0,file.remove(record_files),
"There is no record file that can be removed")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/filterGeneTypeExpr.R")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/del_dup_sample.R")
###TCGA数据库中33中癌症类型
project <- getGDCprojects()$project_id
project <- project[grep("TCGA-",project)]
# proj = "TCGA-BLCA"
norn <- 10 #正常样本数最小数量
geneexpdata <- data.frame()
for(proj in project){
message(proj)
load(FilePath[grep(proj,FilePath)])#
rpm <- miRNA_data[["RPM"]]
conmirs <- intersect(rownames(rpm),mirs)
if(length(conmirs)> 0){
##正常组织样本
SamN <- TCGAquery_SampleTypes(barcode = colnames(rpm),
typesample = c("NT","NB","NBC","NEBV","NBM"))
if(length(SamN)>= norn){
##肿瘤组织样本
SamT <- setdiff(colnames(rpm),SamN)
# dim(rpm[,SamT])
#过滤重复样本
nor_exp = del_dup_sample(rpm[,sort(SamN)],col_rename = T)
tur_exp = del_dup_sample(rpm[,sort(SamT)],col_rename = T)
###============非配对样本==
##构建数据框
nor_gsExp <- log2(data.frame(nor_exp[conmirs,],check.names = T) + 1) %>% t() %>%
as.data.frame() %>% mutate(Sample = rep(paste0("Normal(n=",ncol(nor_exp),")"),ncol(nor_exp)),
.before = 1)%>% melt(id.vars = "Sample")
tur_gsExp <- log2(data.frame(tur_exp[conmirs,],check.names = T) + 1) %>% t() %>%
as.data.frame() %>% mutate(Sample = rep(paste0("Tumor(n=",ncol(tur_exp),")"),ncol(tur_exp)),
.before = 1)%>% melt(id.vars = "Sample")
data <- rbind(nor_gsExp,tur_gsExp)
head(data)
colnames(data) <- c("Sample","miR","exp")
compaired <- list(unique(data$Sample))
data <- mutate(data,cancer = proj,.before = 1)
geneexpdata <- rbind(geneexpdata,data)
##======绘图===
##输出路径
# opt <- "output/001-RNASeq_ExpInNorAndTur/"
fp_boxplot <- paste0(opt,proj,"/boxplot")
ifelse(dir.exists(fp_boxplot),"The folder already exists.",dir.create(fp_boxplot,recursive = T))
###==============多个基因在同一个图中
if(length(conmirs)> 1){
p <- ggplot(data, aes(x= miR, y = exp,color = Sample)) +
#geom_jitter(size = 0.5,aes(x=variable, y=value,color = Group))+
geom_boxplot(aes(color = Sample),alpha =1,
lwd = 0.1, outlier.size = 1,
outlier.colour = "white")+ #color = c("red", "blue"),
# geom_violin(aes(fill = Sample),trim = FALSE)+
# geom_boxplot(width = 0.1,fill = "white")+
theme_bw()+
labs(y= expression(log[2](RPM + 1)))+
scale_color_manual(values = c(brewer.pal(3,"Dark2")))+ #c("#3300CC","#CC0000")
stat_compare_means(label = "p.signif") +
#labs(title = 'ImmuneCellAI') +
theme(legend.position = "top",
axis.text.x = element_text(angle = 45,face = "bold",colour = "#1A1A1A",hjust = 1,vjust = 1),
axis.text.y = element_text(face = "bold",colour = "#1A1A1A"),
axis.title = element_text(size = 12,face = "bold", colour = "#1A1A1A"),
legend.title = element_blank(),
axis.title.x = element_blank(),
legend.text = element_text(size = 12, face = "bold",colour = "#1A1A1A")
)
ggsave(filename = paste0(fp_boxplot,"/all_miR.pdf"),
plot = p,height=5,width= ifelse(length(conmirs)< 5,5,length(conmirs)*0.9))
}
# g <- conmirs[1]
####================单一绘制==============
f2 <- lapply(conmirs, function(g){
ldat <- data[data$miR == g,]
p = ggplot(ldat,aes(Sample,exp,fill= Sample))+
geom_boxplot(aes(fill = Sample),notch = FALSE,
position = position_dodge(width =0.01,preserve = "single"),
outlier.alpha =1,width=0.4) +
scale_fill_manual(values = c(brewer.pal(5,"Set1")))+
geom_signif(comparisons = compaired,
step_increase = 0.1,
map_signif_level = T,
margin_top = 0.2,
test = "wilcox.test")+
labs(y= paste0("The expression of ",g,"\nlog2(RPM + 1)"),title= proj)+
theme_classic()+
theme(panel.background=element_rect(fill="white",colour="black",size=0.25),
plot.title = element_text(hjust = 0.5),
axis.line=element_line(colour="black",size=0.25),
axis.title=element_text(size=10,face="plain",color="black"),
axis.text.x = element_text(angle = 45,face = "plain",colour = "black",hjust=1,vjust=1),
axis.title.x=element_text(size=12,face="plain",color="black"),
axis.title.y=element_text(size=12,face="plain",color="black"),
axis.text = element_text(size=12,color="black"),
legend.position="none"
)
#p
ggsave(filename = paste0(fp_boxplot,"/",g,"-Unpaired .pdf"),
plot = p,width = 2.5,height = 5)
})
###===============
fp_violin <- paste0(opt,proj,"/violin")
ifelse(dir.exists(fp_violin),"The folder already exists.",dir.create(fp_violin,recursive = T))
lapply(conmirs, function(g){
ldat <- data[data$miR == g,]
p = ggplot(ldat, aes(Sample,exp,fill= Sample))+
geom_violin(aes(fill = Sample),trim = FALSE)+
geom_signif(comparisons = compaired,
step_increase = 0.1,
map_signif_level = T,
margin_top=0.2,
tip_length =0.02,
test = "wilcox.test")+
geom_boxplot(width = 0.1,fill = "white")+
scale_fill_manual(values=c(brewer.pal(3,"Dark2")))+
theme_classic()+
labs(y= paste0("The expression of ",g,"\nlog2(RPM + 1)"),title= proj)+
theme(panel.background=element_rect(fill="white",colour="black",size=0.25),
plot.title = element_text(hjust = 0.5),
axis.line=element_line(colour="black",size=0.25),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45,face = "plain",colour = "black",hjust=1,vjust=1),
axis.text = element_text(size=12,face="plain",color="black"),
legend.position="none"
)
p
ggsave(filename = paste0(fp_violin,"/",g,"-Unpaired .pdf"),plot = p,width = 3,height = 5)
})
###===========配对样本=========
id <- intersect(colnames(nor_exp),colnames(tur_exp))
if(length(id) > 3){
paired_nor_gsExp <- nor_exp[conmirs,id]
paired_tur_gsExp <- tur_exp[conmirs,id]
paired_nor_gsExp <- log2(data.frame(paired_nor_gsExp,check.names = T) + 1) %>% t() %>%
as.data.frame() %>% mutate(Sample = rep("Normal",ncol(paired_nor_gsExp)),
id = id,
.before = 1)%>% melt(id.vars = c("Sample","id"))
paired_tur_gsExp <- log2(data.frame(paired_tur_gsExp,check.names = T) + 1) %>% t() %>%
as.data.frame() %>% mutate(Sample = rep("Tumor",ncol(paired_tur_gsExp)),
id = id,
.before = 1)%>% melt(id.vars =c("Sample","id"))
paired_data <- rbind(paired_nor_gsExp, paired_tur_gsExp)
head(paired_data)
colnames(paired_data) <- c("Sample","id","miR","exp")
paired_compaired <- list(unique(paired_data$Sample))
###===============
lapply(conmirs, function(g){
ldat <- paired_data[paired_data$miR == g,]
#为了防止配对样本信息错乱,先构造一个配对样本的数据集
dat1_paried <- reshape2::dcast(ldat[c('Sample', 'id', 'exp')], Sample~id)
rownames(dat1_paried) <- dat1_paried$Sample
dat1_paried <- t(dat1_paried[,-1] )%>%as.data.frame()
head(dat1_paried)
#配对 t 检验,双侧检验
p.value <- t.test(dat1_paried$Normal, dat1_paried$Tumor, paired = TRUE, alternative = 'two.sided', conf.level = 0.95)
pv <- p.value[["p.value"]]
stasig <- ifelse(pv >= 0.001,paste0('p value = ',round(pv,3)),"p value < 0.01")
p1 <- ggplot(ldat, aes(x = Sample, y = exp,colour = Sample)) +
# geom_boxplot(aes(fill = Sample), show.legend = FALSE, width = 0.6) + #绘制箱线图展示肿瘤组织和正常组织的两组基因表达整体分布
geom_point(size = 3) + #绘制散点表示单个样本的基因表达信息
geom_line(aes(group = id), color = 'black', lwd = 0.05) + #绘制样本连线,通过 aes(group) 参数指定配对样本信息
scale_fill_manual(values = c('#FE7280', '#AC88FF')) + #以下是颜色分配、主题更改等,不再多说
theme_classic()+
labs(y= paste0("The expression of ",g,"\nlog2(TPM + 1)"),title= proj)+
theme(panel.background=element_rect(fill="white",colour="black",size=0.25),
plot.title = element_text(hjust = 0.5),
axis.line=element_line(colour="black",size=0.25),
axis.title.x = element_blank(),
axis.text.x = element_text(face = "plain",colour = "black"),
axis.text = element_text(size=12,face="plain",color="black"),
legend.position="none"
)+ annotate('text',
x= ifelse(min(dat1_paried$Normal) > min(dat1_paried$Tumor),1,2),
y= min(ldat$exp + 0.5),
label = stasig ,size = 4)
ggsave(filename = paste0(fp_boxplot,"/",g,"-paired .pdf"),plot = p1,width = 3,height = 3)
})
}
##记录分析的样本信息,只记录一次
opinfo <- paste0(proj,":\n\t","Normal(n)=",length(SamN),";Tumor(n)=",length(SamT))
output <- file(paste0(opt,"annotation.txt"), open="a+b")
writeLines(opinfo, con=output)
close(output)
}
else if(length(SamN) == 0){
output <- file(paste0(opt," zeroNormalSamples_CancerType.txt"), open="a+b")
writeLines(proj, con=output)
close(output)
}else {
opinfo <- paste0(proj,":\n\t","Normal(n)=",length(SamN))
output <- file(paste0(opt,"lessNormalSamples_CancerType.txt"), open="a+b")
writeLines(opinfo, con=output)
close(output)
}
}
}
###============单基因泛癌=============
###===============
fp_pan <- paste0(opt,"/pan-cancer")
ifelse(dir.exists(fp_pan),"The folder already exists.",dir.create(fp_pan,recursive = T))
lapply(unique(geneexpdata$miR), function(g){
# g = unique(geneexpdata$miR)[1]
pan_exp <- geneexpdata[geneexpdata$miR == g,]
head(pan_exp)
pan_exp$Sample[grep("Normal",pan_exp$Sample)] <- "Normal"
pan_exp$Sample[grep("Tumor",pan_exp$Sample)] <- "Tumor"
# c(brewer.pal(3,"Dark2"))
p <- ggplot(pan_exp, aes(x=cancer, y = exp,color = Sample)) +
#geom_jitter(size = 0.5,aes(x=variable, y=value,color = Group))+
geom_boxplot(aes(color = Sample),alpha =1,
lwd = 0.1, outlier.size = 1,
outlier.colour = "white")+ #color = c("red", "blue"),
# geom_violin(aes(fill = Sample),trim = FALSE)+
# geom_boxplot(width = 0.1,fill = "white")+
theme_bw()+
labs(y= paste0("The expression of ",g,"\nlog2(RPM + 1)"))+
scale_color_manual(values = c(brewer.pal(3,"Dark2")))+ #c("#3300CC","#CC0000")
stat_compare_means(label = "p.signif") +
#labs(title = 'ImmuneCellAI') +
theme(legend.position = "top",
axis.text.x = element_text(angle = 45,face = "bold",colour = "#1A1A1A",hjust=1,vjust=1),
axis.text.y = element_text(face = "bold",colour = "#1A1A1A"),
axis.title = element_text(size = 12,face = "bold", colour = "#1A1A1A"),
legend.title = element_blank(),
axis.title.x = element_blank(),
legend.text = element_text(size = 12, face = "bold",colour = "#1A1A1A")
)
ggsave(filename = paste0(fp_pan,"/",g,"_in_pan-cancer.pdf"),
plot = p,height=5,width=9)
})
本文分享自 MedBioInfoCloud 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!