前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >R语言实现高级的韦恩图可视化

R语言实现高级的韦恩图可视化

作者头像
一粒沙
发布2021-10-11 17:09:12
1.2K0
发布2021-10-11 17:09:12
举报
文章被收录于专栏:R语言交流中心R语言交流中心

韦恩图大家应该都不陌生,主要用来展示不同事物或者组之间的数学或逻辑关系,主要用于集合的运算结果展示。今天给大家介绍一个在R语言中更加高级的展示形式,实现此功能的R包是UpSetR。首先看下包的安装:

代码语言:javascript
复制
install.packages("UpSetR")

接下来我们直接通过实例来看下如何进行数据的可视化:

代码语言:javascript
复制
##载入包
library(UpSetR)
library(ggplot2)
library(grid)
library(plyr)
代码语言:javascript
复制
##构建数据

# example oflist input (list of named vectors)
listInput <-list(one = c(1, 2, 3, 5, 7, 8, 11, 12, 13), two = c(1, 2, 4, 5, 10), three =c(1, 5, 6, 7, 8, 9, 10, 12, 13))
 
# example ofexpression input
expressionInput<- c(one = 2, two = 1, three = 2, `one&two` = 1, `one&three` = 4,`two&three` = 1, `one&two&three` = 2)
代码语言:javascript
复制
##可视化结果
upset(fromList(listInput),order.by = "freq")
代码语言:javascript
复制
upset(fromExpression(expressionInput),order.by = "freq")
代码语言:javascript
复制
##载入数据并绘图
movies <-read.csv(system.file("extdata", "movies.csv", package ="UpSetR"), header = T, sep = ";")
 
## nsets(频数最多的前六个变量),text.scale =c(intersection size title, intersection size ticklabels, set size title, set size tick labels, set names, numbers above bars)
upset(movies,nsets = 6, number.angles = 30, point.size = 3.5, line.size = 2, mainbar.y.label = "Genre Intersections", sets.x.label = "Movies Per Genre", text.scale =c(1.3, 1.3, 1, 1, 2, 0.75))
代码语言:javascript
复制
##自定义交集的组
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "freq")
代码语言:javascript
复制
##基于相交的等级进行排序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "degree")
代码语言:javascript
复制
##基于等级和频率共同排序,通过先后来确定排序顺序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = c("degree", "freq"))
代码语言:javascript
复制
##保留各组频数,不排序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "freq", keep.order = TRUE)
代码语言:javascript
复制
##对交集结果进行分组,nintersects交叉点的数目阈值,cutoff交叉结果阈值。
upset(movies,nintersects = 70, group.by = "sets", cutoff = 7)
代码语言:javascript
复制
##空的交叉点展示
upset(movies,empty.intersections = "on", order.by = "freq")
代码语言:javascript
复制
##利用不同的颜色显示重要的信息
upset(movies,queries = list(list(query = intersects, params = list("Drama", "Comedy","Action"), color = "orange", active = T), list(query =intersects, params = list("Drama"), color = "red", active =F), list(query = intersects, params = list("Action","Drama"), active = T)))
代码语言:javascript
复制
##通过设置阈值进行标记
upset(movies,queries = list(list(query = elements, params = list("AvgRating", 3.5,4.1), color = "blue", active = T), list(query = elements, params =list("ReleaseDate", 1980, 1990, 2000), color = "red",active = F)))
代码语言:javascript
复制
##通过expression进行筛选
upset(movies,queries = list(list(query = intersects, params = list("Action", "Drama"),active = T), list(query = elements, params = list("ReleaseDate", 1980,1990, 2000), color = "red", active = F)), expression ="AvgRating > 3 & Watches > 100")
代码语言:javascript
复制
##自定义的query结构
Myfunc <-function(row, release, rating) {data <- (row["ReleaseDate"] %in%release) & (row["AvgRating"] > rating)}#row数据源,release,rating指的是parms中的第一,二个参数
upset(movies,queries = list(list(query = Myfunc, params = list(c(1970, 1980, 1990, 1999,2000), 2.5), color = "blue", active = T)))
代码语言:javascript
复制
##增加query的标签legend
upset(movies,query.legend = "top", queries = list(list(query = intersects,
    params = list("Drama","Comedy", "Action"), color = "orange", active =T,
    query.name = "Funny action"),list(query = intersects, params = list("Drama"),
    color = "red", active = F),list(query = intersects, params = list("Action",
"Drama"), active = T, query.name = "Emotionalaction")))
代码语言:javascript
复制
##综合前面的方式的完整例子
upset(movies, query.legend = "bottom", queries =list(list(query = Myfunc, params = list(c(1970,
    1980, 1990, 1999, 2000),2.5), color = "orange", active = T), list(query = intersects,
    params = list("Action","Drama"), active = F), list(query = elements, params =list("ReleaseDate",
    1980, 1990, 2000), color ="red", active = F, query.name = "Decades")),
    expression ="AvgRating > 3 & Watches > 100")
代码语言:javascript
复制
##通过柱状图增加变量的其它数据信息其中type=bar plot("hist") or heat map ("heat"/“bool”)
sets <- names(movies[3:19])
avgRottenTomatoesScore <- round(runif(17, min = 0, max = 90))
metadata <- as.data.frame(cbind(sets, avgRottenTomatoesScore))
names(metadata) <- c("sets","avgRottenTomatoesScore")
metadata$avgRottenTomatoesScore <-as.numeric(as.character(metadata$avgRottenTomatoesScore))
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
    column ="avgRottenTomatoesScore", assign = 20))))
代码语言:javascript
复制
##增加热图信息
Cities <- sample(c("Boston", "NYC","LA"), 17, replace = T)
metadata <- cbind(metadata, Cities)
metadata$Cities <- as.character(metadata$Cities)
metadata[which(metadata$sets %in% c("Drama","Comedy", "Action", "Thriller",
    "Romance")), ]
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "heat",
    column ="Cities", assign = 10, colors = c(Boston = "green", NYC ="navy",
        LA ="purple")))))
代码语言:javascript
复制
##增加文字信息
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "text",
    column ="Cities", assign = 10, colors = c(Boston = "green", NYC ="navy",
        LA ="purple")))))
代码语言:javascript
复制
##直接设置连线区域背景
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
    column ="avgRottenTomatoesScore", assign = 20), list(type ="matrix_rows",
    column ="Cities", colors = c(Boston = "green", NYC ="navy", LA = "purple"),
    alpha = 0.5))))
代码语言:javascript
复制
##一次添加多种信息
accepted <- round(runif(17, min = 0, max = 1))
metadata <- cbind(metadata, accepted)
metadata[which(metadata$sets %in% c("Drama","Comedy", "Action", "Thriller", "Romance")),]
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist", column = "avgRottenTomatoesScore",assign = 20), list(type = "bool", column = "accepted", assign= 5, colors = c("#FF3333", "#006400")), list(type ="text", column = "Cities", assign = 5, colors = c(Boston ="green", NYC = "navy", LA = "purple")))))
代码语言:javascript
复制
##混合图的绘制,通过attribute.plots添加
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
    column ="avgRottenTomatoesScore", assign = 20), list(type = "bool",column = "accepted",
    assign = 5, colors =c("#FF3333", "#006400")), list(type = "text",column = "Cities",
    assign = 5, colors =c(Boston = "green", NYC = "navy", LA ="purple")),
    list(type ="matrix_rows", column = "Cities", colors = c(Boston ="green",
        NYC ="navy", LA = "purple"), alpha = 0.5))), queries =list(list(query = intersects,
    params =list("Drama"), color = "red", active = F), list(query =intersects,
    params =list("Action", "Drama"), active = T), list(query =intersects,
    params =list("Drama", "Comedy", "Action"), color ="orange", active = T)),
    attribute.plots =list(gridrows = 45, plots = list(list(plot = scatter_plot,
        x ="ReleaseDate", y = "AvgRating", queries = T), list(plot =scatter_plot,
        x ="AvgRating", y = "Watches", queries = F)), ncols = 2),query.legend = "bottom")
代码语言:javascript
复制
##自定义绘图函数的混合绘图

myplot <- function(mydata, x, y) {
    plot <- (ggplot(data =mydata, aes_string(x = x, y = y, colour = "color")) +
        geom_point() + scale_color_identity()+ theme(plot.margin = unit(c(0,
        0, 0, 0),"cm")))
}
 
another.plot <- function(data, x, y) {
    data$decades <-round_any(as.integer(unlist(data[y])), 10, ceiling)
    data <-data[which(data$decades >= 1970), ]
    myplot <- (ggplot(data,aes_string(x = x)) + geom_density(aes(fill = factor(decades)),
        alpha = 0.4) +theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.key.size =unit(0.4,
        "cm")))
}
upset(movies, main.bar.color = "black", queries =list(list(query = intersects,
    params =list("Drama"), color = "red", active = F), list(query =intersects,
    params =list("Action", "Drama"), active = T), list(query =intersects,
    params =list("Drama", "Comedy", "Action"), color ="orange", active = T)),
    attribute.plots =list(gridrows = 45, plots = list(list(plot = myplot, x ="ReleaseDate",
        y ="AvgRating", queries = T), list(plot = another.plot, x ="AvgRating",
        y ="ReleaseDate", queries = F)), ncols = 2))
代码语言:javascript
复制
##增加箱线图
upset(movies, boxplot.summary = c("AvgRating","ReleaseDate"))

欢迎大家互相学习交流!

本文参与 腾讯云自媒体分享计划,分享自微信公众号。
原始发表:2021-09-30,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 R语言交流中心 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档