if(!require("BiocManager")) install.packages("BiocManager",update = F,ask = F)
cran_packages <- c('tidyr',
'tibble',
'dplyr',
'stringr',
'ggplot2',
'ggpubr',
'factoextra',
'FactoMineR',
'devtools',
'cowplot',
'patchwork',
'basetheme',
'paletteer',
'AnnoProbe',
'ggthemes',
'VennDiagram',
'tinyarray')
Biocductor_packages <- c('GEOquery',
'hgu133plus2.db',
'ggnewscale',
"limma",
"impute",
"GSEABase",
"GSVA",
"clusterProfiler",
"org.Hs.eg.db",
"preprocessCore",
"enrichplot")
for (pkg in cran_packages){
if (! require(pkg,character.only=T,quietly = T) ) {
install.packages(pkg,ask = F,update = F)
require(pkg,character.only=T)
}
}
for (pkg in Biocductor_packages){
if (! require(pkg,character.only=T,quietly = T) ) {
BiocManager::install(pkg,ask = F,update = F)
require(pkg,character.only=T)
}
}
#打破下载时间的限制,改前60秒,改后10w秒
options(timeout = 100000)
options(scipen = 20)#不要以科学计数法表示
eSet = getGEO("GSE7305", destdir = '.', getGPL = F)
## Found 1 file(s)
## GSE7305_series_matrix.txt.gz
## Using locally cached version: ./GSE7305_series_matrix.txt.gz
#研究一下这个eSet
class(eSet)
## [1] "list"
length(eSet)
## [1] 1
eSet = eSet[[1]]
class(eSet)
## [1] "ExpressionSet"
## attr(,"package")
## [1] "Biobase"
exp <- exprs(eSet)
dim(exp)
## [1] 54675 20
range(exp)#看数据范围决定是否需要log,是否有负值,异常值
## [1] 5.020951 22011.934000
#发现取值范围在5-22011之间,需要log
exp = log2(exp+1)
boxplot(exp,las = 2) #看是否有异常样本
#发现没有异常值,每个样本的median都差不多,很开心,进行下一步
pd <- pData(eSet)
#由于数据本身的原因,表达矩阵的列名可能和临床信息的行名不是一一对应的,为了之后的操作方便,我们在这里把它们的顺序调整到完全一致
p = identical(rownames(pd),colnames(exp));p
## [1] TRUE
if(!p) {
s = intersect(rownames(pd),colnames(exp))
exp = exp[,s]
pd = pd[s,]
}
gpl_number <- eSet@annotation;gpl_number
## [1] "GPL570"
#捷径
find_anno(gpl_number) #打出找注释的代码
## `library(hgu133plus2.db);ids <- toTable(hgu133plus2SYMBOL)` and `ids <- AnnoProbe::idmap('GPL570')` are both avaliable
ids <- AnnoProbe::idmap('GPL570')
## file downloaded in C:/Users/shang/Desktop/生信技能树/Rnote
#如果捷径找不到探针注释,参考老师上课代码02
k = str_detect(pd$title,"Normal");table(k)
## k
## FALSE TRUE
## 10 10
Group = ifelse(k,"Normal","Disease")
dat=as.data.frame(t(exp))
dat.pca <- PCA(dat, graph = FALSE)
fviz_pca_ind(dat.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = Group, # color by groups
palette = c("#00AFBB", "#E7B800"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
#哇,太棒了,数据组内重复好,组间差别大!
library(pheatmap)
cg = names(tail(sort(apply(exp,1,sd)),1000))
n = exp[cg,]
annotation_col = data.frame(group=Group)
rownames(annotation_col) = colnames(n)
pheatmap(n,
show_colnames =F,
show_rownames = F,
annotation_col=annotation_col,
scale = "row",
breaks = seq(-3,3,length.out = 100)
)
#哇,太棒啦,处理组和对照组表达量对比明显!
design = model.matrix(~Group)
fit = lmFit(exp,design)
fit = eBayes(fit)
deg = topTable(fit,coef = 2,number = Inf)
#加probe_id列
deg = mutate(deg,probe_id = rownames(deg))
#加上探针注释
ids = distinct(ids,symbol,.keep_all = T)
deg = inner_join(deg,ids,by="probe_id")
nrow(deg)
## [1] 20188
#加change列,标记上下调基因
logFC_t = 1
p_t = 0.05
k1 = (deg$P.Value < p_t)&(deg$logFC < -logFC_t)
k2 = (deg$P.Value < p_t)&(deg$logFC > logFC_t)
deg = mutate(deg,change = ifelse(k1,"down",ifelse(k2,"up","stable")))
table(deg$change)
##
## down stable up
## 610 19009 569
p <- ggplot(data = deg,
aes(x = logFC,
y = -log10(P.Value))) +
geom_point(alpha=0.4, size=3.5,
aes(color=change)) +
scale_color_manual(values=c("blue", "grey","red"))+
geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",linewidth=0.8) +
geom_hline(yintercept = -log10(p_t),lty=4,col="black",linewidth=0.8) +
theme_bw()
p
exp = exp[deg$probe_id,]
rownames(exp) = deg$symbol
cg = deg$symbol[deg$change !="stable"]
n = exp[cg,]
library(pheatmap)
annotation_col = data.frame(group = Group)
rownames(annotation_col) = colnames(n)
pheatmap(n,show_colnames =F,
show_rownames = F,
scale = "row",
#cluster_cols = F,
annotation_col=annotation_col,
breaks = seq(-3,3,length.out = 100)
)
#加ENTREZID列
s2e = bitr(deg$symbol,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)#人类
## 'select()' returned 1:many mapping between keys and columns
nrow(deg)
## [1] 20188
deg = inner_join(deg,s2e,by=c("symbol"="SYMBOL"))
nrow(deg)
## [1] 19207
#差异基因数据
gene_up = deg$ENTREZID[deg$change == 'up']
gene_down = deg$ENTREZID[deg$change == 'down']
gene_diff = c(gene_up,gene_down)
ego <- enrichGO(gene = gene_diff,
OrgDb= org.Hs.eg.db,
ont = "ALL",
readable = TRUE)
ego_BP <- enrichGO(gene = gene_diff,
OrgDb= org.Hs.eg.db,
ont = "BP",
readable = TRUE)
class(ego)
## [1] "enrichResult"
## attr(,"package")
## [1] "DOSE"
#条带图
barplot(ego)
barplot(ego, split = "ONTOLOGY") +
facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y")
#气泡图
dotplot(ego)
dotplot(ego, split = "ONTOLOGY") +
facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y")
#展示top通路的共同基因,要放大看。
gl = deg$logFC
names(gl) = deg$ENTREZID
head(gl)
## 730 1475 375295 5740 2627 5270
## -6.270309 -3.943359 -2.318498 -4.905540 -4.878195 -4.106051
cnetplot(ego,
layout = "star",
color.params = list(foldChange = gl),
showCategory = 3)
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
goplot(ego_BP)
kk.up <- enrichKEGG(gene = gene_up,
organism = 'hsa')
kk.down <- enrichKEGG(gene = gene_down,
organism = 'hsa')
kk.diff <- enrichKEGG(gene = gene_diff,
organism = 'hsa')
#看看富集到了吗? KEGG有可能出现富集不到的情况
table(kk.diff@result$p.adjust<0.05)
##
## FALSE TRUE
## 302 11
table(kk.up@result$p.adjust<0.05)
##
## FALSE TRUE
## 257 6
table(kk.down@result$p.adjust<0.05)
##
## FALSE TRUE
## 254 16
#条带图
barplot(kk.diff)
#气泡图
dotplot(kk.diff)
#展示top通路的共同基因,要放大看。
gl = deg$logFC
names(gl) = deg$ENTREZID
head(gl)
## 730 1475 375295 5740 2627 5270
## -6.270309 -3.943359 -2.318498 -4.905540 -4.878195 -4.106051
kk.diff <- setReadable(kk.diff,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID")
cnetplot(kk.diff,
#layout = "star",
color.params = list(foldChange = gl),
showCategory = 3)
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
#双向图
source("kegg_plot_function.R")
g_kegg <- kegg_plot(kk.up,kk.down,p.adjust = T)
g_kegg
g_kegg +scale_y_continuous(labels = c(15,10,5,0,5,10))
以上信息均来自生信技能树
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。