前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >TCGA分析-数据整理2

TCGA分析-数据整理2

原创
作者头像
素素
发布2023-10-31 21:56:21
2810
发布2023-10-31 21:56:21
举报

title: "三大R包差异分析"

output: html_document

editor_options:

chunk_output_type: console


1.三大R包差异分析

代码语言:text
复制
rm(list = ls())
load("GSE106899.Rdata")
table(group)
#> group
#> control   tumor 
#>      22      44
#deseq2----
library(DESeq2)
colData <- data.frame(row.names =colnames(exp6), 
                      condition=group)
if(!file.exists(paste0(proj,"_dd.Rdata"))){
  dds <- DESeqDataSetFromMatrix(
  countData = exp6,
  colData = colData,
  design = ~ condition)
  dds <- DESeq(dds)
  save(dds,file = paste0(proj,"_dd.Rdata"))
}
load(file = paste0(proj,"_dd.Rdata"))
class(dds)
#> [1] "DESeqDataSet"
#> attr(,"package")
#> [1] "DESeq2"
res <- results(dds, contrast = c("condition",rev(levels(group))))
#constrast
c("condition",rev(levels(group)))
#> [1] "condition" "tumor"     "control"
class(res)
#> [1] "DESeqResults"
#> attr(,"package")
#> [1] "DESeq2"
DEG1 <- as.data.frame(res)
library(dplyr)
DEG1 <- arrange(DEG1,pvalue)
DEG1 = na.omit(DEG1)
head(DEG1)
#>          baseMean log2FoldChange     lfcSE     stat       pvalue
#> Pfn2     57.60350       2.684809 0.4787181 5.608330 2.042883e-08
#> Slc2a1   49.24526       2.384014 0.4511164 5.284698 1.259126e-07
#> Sfrp2    27.72403       2.623599 0.5023184 5.222981 1.760653e-07
#> Marcksl1 10.95315       2.349047 0.4500747 5.219238 1.796607e-07
#> Ddit4    12.19504       1.611006 0.3203432 5.029000 4.930432e-07
#> Bcl11a   71.45106       2.362877 0.4746990 4.977632 6.436673e-07
#>                  padj
#> Pfn2     9.979483e-05
#> Slc2a1   2.194106e-04
#> Sfrp2    2.194106e-04
#> Marcksl1 2.194106e-04
#> Ddit4    4.817032e-04
#> Bcl11a   5.240524e-04

#添加change列标记基因上调下调
logFC_t = 0.585
pvalue_t = 0.05

k1 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange < -logFC_t);table(k1)
#> k1
#> FALSE  TRUE 
#>  4759   126
k2 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange > logFC_t);table(k2)
#> k2
#> FALSE  TRUE 
#>  4256   629
DEG1$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG1$change)
#> 
#> DOWN  NOT   UP 
#>  126 4130  629
head(DEG1)
#>          baseMean log2FoldChange     lfcSE     stat       pvalue
#> Pfn2     57.60350       2.684809 0.4787181 5.608330 2.042883e-08
#> Slc2a1   49.24526       2.384014 0.4511164 5.284698 1.259126e-07
#> Sfrp2    27.72403       2.623599 0.5023184 5.222981 1.760653e-07
#> Marcksl1 10.95315       2.349047 0.4500747 5.219238 1.796607e-07
#> Ddit4    12.19504       1.611006 0.3203432 5.029000 4.930432e-07
#> Bcl11a   71.45106       2.362877 0.4746990 4.977632 6.436673e-07
#>                  padj change
#> Pfn2     9.979483e-05     UP
#> Slc2a1   2.194106e-04     UP
#> Sfrp2    2.194106e-04     UP
#> Marcksl1 2.194106e-04     UP
#> Ddit4    4.817032e-04     UP
#> Bcl11a   5.240524e-04     UP

#edgeR----
library(edgeR)

dge <- DGEList(counts=exp6,group=group)
dge$samples$lib.size <- colSums(dge$counts)
dge <- calcNormFactors(dge) 

design <- model.matrix(~group)

dge <- estimateGLMCommonDisp(dge, design)
dge <- estimateGLMTrendedDisp(dge, design)
dge <- estimateGLMTagwiseDisp(dge, design)

fit <- glmFit(dge, design)
fit <- glmLRT(fit) 

DEG2=topTags(fit, n=Inf)
class(DEG2)
#> [1] "TopTags"
#> attr(,"package")
#> [1] "edgeR"
DEG2=as.data.frame(DEG2)
head(DEG2)
#>            logFC    logCPM       LR       PValue          FDR
#> Errfi1 -1.978325 0.7286484 27.51085 1.562158e-07 0.0008635609
#> Tiparp -1.866452 0.2415403 24.49273 7.459089e-07 0.0015882179
#> Ddit4   1.520452 1.3282727 24.17949 8.776197e-07 0.0015882179
#> Bcl11a  2.363640 3.7313587 23.11293 1.527598e-06 0.0015882179
#> Sfrp2   2.595633 2.4405642 22.21239 2.440938e-06 0.0015882179
#> Mcm2    4.686831 1.2813389 22.13340 2.543453e-06 0.0015882179

k1 = (DEG2$PValue < pvalue_t)&(DEG2$logFC < -logFC_t)
k2 = (DEG2$PValue < pvalue_t)&(DEG2$logFC > logFC_t)
DEG2$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))

head(DEG2)
#>            logFC    logCPM       LR       PValue          FDR
#> Errfi1 -1.978325 0.7286484 27.51085 1.562158e-07 0.0008635609
#> Tiparp -1.866452 0.2415403 24.49273 7.459089e-07 0.0015882179
#> Ddit4   1.520452 1.3282727 24.17949 8.776197e-07 0.0015882179
#> Bcl11a  2.363640 3.7313587 23.11293 1.527598e-06 0.0015882179
#> Sfrp2   2.595633 2.4405642 22.21239 2.440938e-06 0.0015882179
#> Mcm2    4.686831 1.2813389 22.13340 2.543453e-06 0.0015882179
#>        change
#> Errfi1   DOWN
#> Tiparp   DOWN
#> Ddit4      UP
#> Bcl11a     UP
#> Sfrp2      UP
#> Mcm2       UP
table(DEG2$change)
#> 
#> DOWN  NOT   UP 
#>  217 4735  576
#limma----
library(limma)
dge <- edgeR::DGEList(counts=exp6)
dge <- edgeR::calcNormFactors(dge)
design <- model.matrix(~group)
v <- voom(dge,design, normalize="quantile")

fit <- lmFit(v, design)
fit= eBayes(fit)

DEG3 = topTable(fit, coef=2, n=Inf)
DEG3 = na.omit(DEG3)

k1 = (DEG3$P.Value < pvalue_t)&(DEG3$logFC < -logFC_t)
k2 = (DEG3$P.Value < pvalue_t)&(DEG3$logFC > logFC_t)
DEG3$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG3$change)
#> 
#> DOWN  NOT   UP 
#>  217 5019  292
head(DEG3)
#>               logFC    AveExpr         t      P.Value  adj.P.Val
#> Ddit4      1.486366  0.7589110  5.020394 2.727289e-06 0.01507645
#> Six1       1.934545  1.0338629  4.768701 7.426821e-06 0.01759464
#> Col1a2     1.865652  2.0734033  4.704515 9.548468e-06 0.01759464
#> Klf6      -1.236666  4.9984036 -4.621729 1.316885e-05 0.01795344
#> Trp53inp2 -1.298168 -1.1309887 -4.567344 1.623864e-05 0.01795344
#> Trib2      1.265511 -0.2644945  4.351003 3.687606e-05 0.03397515
#>                  B change
#> Ddit4     4.321273     UP
#> Six1      3.450022     UP
#> Col1a2    3.208490     UP
#> Klf6      2.972140   DOWN
#> Trp53inp2 2.789282   DOWN
#> Trib2     2.067142     UP


tj = data.frame(deseq2 = as.integer(table(DEG1$change)),
           edgeR = as.integer(table(DEG2$change)),
           limma_voom = as.integer(table(DEG3$change)),
           row.names = c("down","not","up")
          );tj
#>      deseq2 edgeR limma_voom
#> down    126   217        217
#> not    4130  4735       5019
#> up      629   576        292
save(DEG1,DEG2,DEG3,group,tj,file = paste0(proj,"_DEG.Rdata"))

2.可视化

代码语言:text
复制
library(ggplot2)
library(tinyarray)
exp6[1:4,1:4]
#>       SVA492 SVA493 SVA494 SVA495
#> Itm2a      0      3      0      0
#> Dhx9       0     43      0      7
#> Ssu72      7     49      6      1
#> Mks1       0      3      0      0
# cpm 去除文库大小的影响
dat = log2(cpm(exp6)+1)
pca.plot = draw_pca(dat,group);pca.plot
代码语言:text
复制
save(pca.plot,file = paste0(proj,"_pcaplot.Rdata"))

cg1 = rownames(DEG1)[DEG1$change !="NOT"]
cg2 = rownames(DEG2)[DEG2$change !="NOT"]
cg3 = rownames(DEG3)[DEG3$change !="NOT"]

h1 = draw_heatmap(dat[cg1,],group)
h2 = draw_heatmap(dat[cg2,],group)
h3 = draw_heatmap(dat[cg3,],group)

v1 = draw_volcano(DEG1,pkg = 1,logFC_cutoff = logFC_t)
v2 = draw_volcano(DEG2,pkg = 2,logFC_cutoff = logFC_t)
v3 = draw_volcano(DEG3,pkg = 3,logFC_cutoff = logFC_t)

library(patchwork)
(h1 + h2 + h3) / (v1 + v2 + v3) +plot_layout(guides = 'collect') &theme(legend.position = "none")
代码语言:text
复制
ggsave(paste0(proj,"_heat_vo.png"),width = 15,height = 10)

3.三大R包差异基因对比

代码语言:text
复制
UP=function(df){
  rownames(df)[df$change=="UP"]
}
DOWN=function(df){
  rownames(df)[df$change=="DOWN"]
}
up = intersect(intersect(UP(DEG1),UP(DEG2)),UP(DEG3))
down = intersect(intersect(DOWN(DEG1),DOWN(DEG2)),DOWN(DEG3))
dat = log2(cpm(exp6)+1)
dat=as.data.frame(dat)
hp = draw_heatmap(dat[c(up,down),],group)

#上调、下调基因分别画维恩图
up_genes = list(Deseq2 = UP(DEG1),
          edgeR = UP(DEG2),
          limma = UP(DEG3))

down_genes = list(Deseq2 = DOWN(DEG1),
          edgeR = DOWN(DEG2),
          limma = DOWN(DEG3))

up.plot <- draw_venn(up_genes,"UPgene")
down.plot <- draw_venn(down_genes,"DOWNgene")

#维恩图拼图,终于搞定

library(patchwork)
#up.plot + down.plot
# 拼图
pca.plot + hp+up.plot +down.plot+ plot_layout(guides = "collect")
ggsave(paste0(proj,"_heat_ve_pca.png"),width = 15,height = 10)

分组聚类的热图

代码语言:text
复制
library(ComplexHeatmap)
library(circlize)
col_fun = colorRamp2(c(-2, 0, 2), c("#2fa1dd", "white", "#f87669"))
top_annotation = HeatmapAnnotation(
  cluster = anno_block(gp = gpar(fill = c("#f87669","#2fa1dd")),
                       labels = levels(group),
                       labels_gp = gpar(col = "white", fontsize = 12)))

m = Heatmap(t(scale(t(exp6[c(up,down),]))),name = " ",
            col = col_fun,
        top_annotation = top_annotation,
        column_split = group,
        show_heatmap_legend = T,
        border = F,
        show_column_names = F,
        show_row_names = F,
        use_raster = F,
        cluster_column_slices = F,
        column_title = NULL)
m

引自生信技能树

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 1.三大R包差异分析
  • 2.可视化
  • 3.三大R包差异基因对比
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档