导语
GUIDE ╲
基于模型拟合的常见绘图注释有模型方程、显着性检验和各种拟合优度指标。哪些注释最有用取决于是将 x 和 y 都映射到连续变量,还是将 y 映射到连续变量,以及将 x 映射到因子。在某些情况下,可能需要添加方差分析表或汇总表作为绘图注释。
背景介绍
在ggplots中支持基于计算和模型拟合的注释可以作为新的统计信息来实现,这些统计信息对绘图数据进行计算,并将结果传递给现有几何图形。然而这种方法相当繁琐且容易出错,因此小编给大家介绍一个可以为各种模型拟合函数绘制预测值、残差、偏差和权重的R包ggpmisc,可以轻松地实现与拟合模型相关的注释和绘图!
R包安装
BiocManager::install("ggpmisc")
library(ggpmisc)
library(tibble)
library(dplyr)
library(quantreg)
##加载一些需要的包
eval_nlme <- requireNamespace("nlme", quietly = TRUE)
if (eval_nlme) library(nlme)
eval_broom <- requireNamespace("broom", quietly = TRUE)
if (eval_broom) library(broom)
eval_broom_mixed <- requireNamespace("broom.mixed", quietly = TRUE)
if (eval_broom_mixed) library(broom.mixed)
应用实例
01
函数介绍
stat_correlation() 计算 Pearson、Kendall 或 Spearman 相关系数之一,并测试它们是否不为零。首先生成一组测试数据。
set.seed(4321)
# generate artificial data
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x,
y,
group = c("A", "B"),
y2 = y * c(0.5,2),
block = c("a", "a", "b", "b"),
wt = sqrt(x))
默认计算person相关系数
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_correlation()
#可以设置method改变相关性计算方法
ggplot(my.data, aes(x, y, color = group)) +
geom_point() +
stat_correlation(method = "spearman")
可以按颜色分组
ggplot(my.data, aes(x, y, color = group)) +
geom_point() +
stat_correlation()
stat_correlation()生成多个标签,可以在对aes()的调用中自由地将它们组合起来,以自定义注释。
ggplot(my.data, aes(x, y, color = group)) +
geom_point() +
stat_correlation(mapping = aes(label = paste(after_stat(cor.label),
after_stat(t.value.label),
after_stat(p.value.label),
after_stat(n.label),
sep = '*"; "*')))
支持分面
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_correlation() +
facet_wrap(~group)
可以为图形添加方程和曲线,首先生成一组测试数据。
set.seed(4321)
# generate artificial data
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x,
y,
group = c("A", "B"),
y2 = y * c(0.5,2),
block = c("a", "a", "b", "b"),
wt = sqrt(x))
第一个示例使用默认值。
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_poly_line(formula = formula) +
stat_poly_eq(formula = formula)
将方程也作为一个字符串返回,该字符串需要解析为一个表达式以供显示。
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_poly_line(formula = formula) +
stat_poly_eq(aes(label = after_stat(eq.label)), formula = formula)
分组进行颜色设置
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, colour = group)) +
geom_point() +
stat_poly_line(formula = formula) +
stat_poly_eq(aes(label = after_stat(eq.label)), formula = formula, vstep = 0.06)
描述x,y之间关系的斜率
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_ma_line() +
stat_ma_eq(aes(label = after_stat(eq.label)))
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_ma_line(color = "blue") +
stat_ma_eq(aes(label = after_stat(eq.label)), color = "blue") +
stat_ma_line(color = "red", orientation = "y") +
stat_ma_eq(aes(label = after_stat(eq.label)), color = "red", orientation = "y",
label.y = 0.9)
添加置信区间
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_quant_band(formula = y ~ poly(x, 2))
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_quant_line(formula = y ~ poly(x, 2), quantiles = 0.5)
stat_fit_deviations
实现残差统计
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", formula = formula) +
stat_fit_deviations(formula = formula, colour = "red") +
geom_point()
添加权重
my.data.outlier <- my.data
my.data.outlier[6, "y"] <- my.data.outlier[6, "y"] * 5
ggplot(my.data.outlier, aes(x, y)) +
stat_smooth(method = MASS::rlm, formula = formula) +
stat_fit_deviations(formula = formula, method = "rlm",
mapping = aes(colour = after_stat(weights)),
show.legend = TRUE) +
scale_color_gradient(low = "red", high = "blue", limits = c(0, 1)) +
geom_point()
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_fit_glance(method = "lm",
method.args = list(formula = formula),
label.x = "right",
label.y = "bottom",
aes(label = paste("italic(P)*\"-value = \"*",
signif(after_stat(p.value), digits = 4),
sep = "")),
parse = TRUE)
stat_fit_tb
可以为任何拟合模型添加汇总表或方差分析表
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_fit_tb(method = "lm",
method.args = list(formula = formula),
tb.vars = c(Parameter = "term",
Estimate = "estimate",
"s.e." = "std.error",
"italic(t)" = "statistic",
"italic(P)" = "p.value"),
label.y = "top", label.x = "left",
parse = TRUE)
02
应用实例
火山图实例
volcano_example.df %>%
mutate(., outcome.fct = outcome2factor(outcome)) %>%
ggplot(., aes(logFC, PValue, colour = outcome.fct)) +
geom_point() +
scale_x_logFC(name = "Transcript abundance%unit") +
scale_y_Pvalue() +
scale_colour_outcome() +
stat_quadrant_counts(data = . %>% filter(outcome != 0))
象限图实例
quadrant_example.df %>%
mutate(.,
outcome.x.fct = outcome2factor(outcome.x),
outcome.y.fct = outcome2factor(outcome.y),
outcome.xy.fct = xy_outcomes2factor(outcome.x, outcome.y)) %>%
filter(outcome.xy.fct != "none") %>%
ggplot(., aes(logFC.x, logFC.y, colour = outcome.x.fct, fill = outcome.y.fct)) +
geom_quadrant_lines(linetype = "dotted") +
stat_quadrant_counts(size = 3, colour = "white") +
geom_point(shape = "circle filled") +
scale_x_logFC(name = "Transcript abundance for x%unit") +
scale_y_logFC(name = "Transcript abundance for y%unit") +
scale_colour_outcome() +
scale_fill_outcome() +
theme_dark()
小编总结
作为ggplot2的扩展包,ggpmisc可以方便的给我们的图片添加公式、残差等等多种注释,ggpmisc包也在不断更新中,我们也期待以后会有更强大的功能!