TVP

R的数据可视化-各种图表展现

1.散点图

#添加标签

#添加回归拟合线

#渐变色

#处理大面积重叠情况（调低透明度、热图展示）

1.1气泡图

#实际是三维散点图中的第三个变量颜色的设置变成了大小的设置

2.折线图

#不同的节点和线段会有不同的展示效果

#从上两幅图可以看出密集程度。

2.1 时序图

3.面积图

4.条形图

#添加标签

5.火柴图

6.直方图

#密度分布图

7.箱线图+小提琴图

8.二位密度图

9.玫瑰图

#附code：

install.packages("ggplot2")

install.packages("gcookbook")

install.packages("hexbin")

library(ggplot2)

library(gcookbook)

library(plyr)

library(hexbin)

####读取数据

#手动为列名赋值

names(data)=c("a","b","c")

#转换为因子

data\$sex=factor(data\$sex)

str(data)

install.packages("xlsx")

library(xlsx)

install.packages("gdata")

library(gdata)

install.packages("foreign")

library(foreign)

####绘制散点图

qplot(mtcars\$wt,mtcars\$mpg)

qplot(wt,mpg,data=mtcars)

ggplot(mtcars,aes(x=wt,y=mpg))+geom_point()

heightweight[,c("ageYear","heightIn")]

ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point()

ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(shape=21)#空心圈代替实心圈

ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(size=3)

#使用点型和颜色属性，基于某变量对数据进行分组

heightweight[,c("sex","ageYear","heightIn")]

ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=sex))+geom_point()

ggplot(heightweight,aes(x=ageYear,y=heightIn,shape=sex))+geom_point()

ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=sex,shape=sex))+geom_point()

ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=sex,shape=sex))+geom_point()+

scale_shape_manual(values = c(3,2))+

scale_color_brewer(palette = "Set1")

#生成一个数据副本

hw=heightweight

##四分类：将数据按照是否大于100磅分为两组

hw\$weightGroup=cut(hw\$weightLb,breaks=c(-Inf,100,Inf),labels = c("100"))

#使用具有颜色和填充色的点型及对应空值和填充色的颜色

ggplot(hw,aes(x=ageYear,y=heightIn,shape=sex,fill=weightGroup))+geom_point()+

scale_shape_manual(values = c(21,24))+

scale_fill_manual(values=c(NA,"black"),guide=guide_legend(override.aes=list(shape=21)))

#将第三个连续型变量映射给颜色

ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=weightLb))+geom_point()

ggplot(heightweight,aes(x=ageYear,y=heightIn,fill=weightLb))+geom_point(shape=21,size=2.5)+

ggplot(heightweight,aes(x=ageYear,y=heightIn,size=weightLb,colour=sex))+geom_point(size=1.5)+

scale_size_area()+#使数据点面积正比于变量值

scale_color_brewer(palette="Set1")

#将第三个连续型变量映射给大小

ggplot(heightweight,aes(x=ageYear,y=heightIn,size=weightLb))+geom_point()

ggplot(heightweight,aes(x=ageYear,y=heightIn,fill=weightLb))+geom_point(shape=21,size=2.5)+

##处理图形重叠

sp=ggplot(diamonds,aes(x=carat,y=price))

sp+geom_point()

#增加透明度

sp+geom_point(alpha=.1)

sp+geom_point(alpha=.01)

#stat_bin2d()对数据进行分箱

sp+stat_bin2d()

sp+stat_bin2d(bins=50)+

#stat_binhex()对数据进行分箱

library(hexbin)

#position_jitter()给数据点增加随机扰动

sp1=ggplot(ChickWeight,aes(x=Time,y=weight))

sp1+geom_point()#变量x为离散型的数据集

sp1+geom_point(position="jitter")#添加随机扰动

sp1+geom_point(position=position_jitter(width=.5,height=0))#只在水平方向添加随机扰动

sp1+geom_boxplot(aes(group=Time))#当数据集对应于一个离散型数据轴和一个连续型数据轴时，箱线图可能是一种比较好的表达方式。

##添加回归拟合线

#基本绘图对象

sp=ggplot(heightweight,aes(x=ageYear,y=heightIn))

sp+geom_point()+stat_smooth(method = lm)

#99%置信域

sp+geom_point()+stat_smooth(method=lm,level=0.99)

#没有置信域

sp+geom_point()+stat_smooth(method = lm,se=FALSE)

#调整颜色

sp+geom_point(colour="grey60")+stat_smooth(method = lm,se=FALSE,colour="black")

#loess曲线:默认(局部加权多项式拟合线)

sp+geom_point(colour="grey60")+stat_smooth()

sp+geom_point(colour="grey60")+stat_smooth(method = loess)

#逻辑回归拟合线

library(MASS)

b=biopsy

b\$classn[b\$class=="benign"]=0

b\$classn[b\$class=="malignant"]=1

b

ggplot(b,aes(x=V1,y=classn))+

geom_point(position = position_jitter(width=0.3,height=0.06),alpha=0.4,shape=21,size=1.5)+

stat_smooth(method = glm,family=binomial)

##根据已有模型向散点图添加拟合线

model=lm(heightIn~ageYear+I(ageYear^2),heightweight)

model

#创建一个包含ageYear的列，并对其进行插值

xmin=min(heightweight\$ageYear)

xmax=max(heightweight\$ageYear)

predicted=data.frame(ageYear=seq(xmin,xmax,length.out = 100))

#计算变量heightIn的预测值

predicted\$heightIn=predict(model,predicted)

predicted

#绘制图表

sp=ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(colour="grey40")

sp+geom_line(data=predicted,size=1)

############################################## P80-P86

#建立模型:predictvals的函数还未写

modlinear=lm(heightIn~ageYear,heightweight)

modloess=loess(heightIn~ageYear,heightweight)

lm_predicted=predictvals(modlinear,"ageYear","heightIn")

loess_predicted=predictvals(modloess,"ageYear","heightIn")

sp+geom_line(data=lm_predicted,colour="red",size=.8)+

geom_line(data=loess_predicted,colour="blue",size=.8)

#############################################################

##向散点图添加边际地毯

ggplot(faithful,aes(x=eruptions,y=waiting))+geom_point()+geom_rug()

ggplot(faithful,aes(x=eruptions,y=waiting))+geom_point()+geom_rug(position="jitter",size=.2)

##向散点图添加标签annotate()\geom_text()

subset(countries,Year==2009 & healthexp>2000)

sp=ggplot(subset(countries,Year==2009 & healthexp>2000),

aes(x=healthexp,y=infmortality))+

geom_point()

annotate("text",x=7400,y=6.8,label="USA")

sp+geom_text(aes(label=Name),size=4)

sp+geom_text(aes(label=Name),size=4,vjust=0)#vjust=0标签文本的基线会与数据点对齐，vjust=1标签文本的顶部会与数据点对齐

sp+geom_text(aes(y=infmortality+1.7,label=Name),size=4,vjust=0)

sp+geom_text(aes(label=Name),size=4,hjust=0)#hjust=0左对齐，hjust=1右对齐

sp+geom_text(aes(x=healthexp+400,label=Name),size=4,hjust=0)

cdat=subset(countries,Year== 2009 & healthexp >2000)

cdat\$Name1=cdat\$Name

idx

cdat\$Name1[!idx]=NA

cdat

ggplot(cdat,aes(x=healthexp,y=infmortality))+geom_point()+

geom_text(aes(x=healthexp+100,label=Name1),size=4,hjust=0)+

xlim(2000,10000)

####绘制气泡图

cdat=subset(countries,Year==2009 & healthexp >5000)

p=ggplot(cdat,aes(x=healthexp,y=infmortality,size=GDP))+

geom_point(shape=21,colour="black",fill="cornsilk")

p

p+scale_size_area(max_size =15)

#当x轴,y轴皆为分类变量时，气泡图可以用来表示网格点上的变量值

#对男性组和女性组求和

hec=HairEyeColor[,,"Male"]+HairEyeColor[,,"Female"]

#转化为长格式

library(reshape2)

hec=melt(hec,value.name = "count")

ggplot(hec,aes(x=Eye,y=Hair))+

geom_point(aes(size=count),shape=21,colour="black",fill="cornsilk")+

scale_size_area(max_size = 20,guide=FALSE)+

geom_text(aes(y=as.numeric(Hair)-sqrt(count)/22,label=count),vjust=1,colour="grey60",size=4)

##绘制散点图矩阵

library(gcookbook)

c2009=subset(countries,Year==2009,

select=c(Name,GDP,laborrate,healthexp,infmortality))

c2009

pairs(c2009[,2:5])

####折线图

qplot(pressure\$temperature,pressure\$pressure,geom="line")

qplot(pressure\$temperature,pressure\$pressure,geom=c("line","point"))

qplot(temperature,pressure,data=pressure,geom=c("line","point"))

ggplot(pressure,aes(x=temperature,y=pressure))+geom_line()+geom_point()

BOD1=BOD#将数据复制一份

BOD1\$Time=factor(BOD1\$Time)

ggplot(BOD1,aes(x=Time,y=demand,group=1))+geom_line()

#改变y轴长度

ggplot(BOD,aes(x=Time,y=demand))+geom_line()+ylim(0,max(BOD\$demand))

ggplot(BOD,aes(x=Time,y=demand))+geom_line()+expand_limits(y=0)

#像折线图添加数据标签

ggplot(BOD,aes(x=Time,y=demand))+geom_line()+geom_point()

ggplot(worldpop,aes(x=Year,y=Population))+geom_line()+geom_point()

ggplot(worldpop,aes(x=Year,y=Population))+geom_line()+geom_point()+scale_y_log10()#y轴取对数

##绘制多重折线图

#对ToothGrowth数据集进行汇总

tg=ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len))

#将supp映射颜色colour

ggplot(tg,aes(x=dose,y=length,colour=supp))+geom_line()

#将supp映射给线型linetype

ggplot(tg,aes(x=dose,y=length,linetype=supp))+geom_line()

#颜色+线型

ggplot(tg,aes(x=dose,y=length,linetype=supp,colour=supp))+geom_line()

#dose变离散型后作图

ggplot(tg,aes(x=factor(dose),y=length,colour=supp,group=supp))+geom_line()

ggplot(tg,aes(x=dose,y=length))+geom_line()#不可缺少group语句

##添加数据点

ggplot(tg,aes(x=dose,y=length,shape=supp))+geom_line()+geom_point(size=4)

ggplot(tg,aes(x=dose,y=length,fill=supp))+geom_line()+geom_point(size=4,shape=21)

#避免重叠，左移或右移0.2

ggplot(tg,aes(x=dose,y=length,shape=supp))+geom_line(position = position_dodge(0.2))+

geom_point(position=position_dodge(0.2),size=4)

##修改线条样式:线型、宽度、颜色

ggplot(BOD,aes(x=Time,y=demand))+

geom_line(linetype="dashed",size=1,colour="blue")

tg=ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len))#类似数据透视

ggplot(tg,aes(x=dose,y=length,colour=supp))+geom_line()+

scale_color_brewer(palette = "Set1")

##修改数据标记样式

ggplot(BOD,aes(x=Time,y=demand))+geom_line()+

geom_point(size=4,shape=22,colour="darkred",fill="pink")

ggplot(BOD,aes(x=Time,y=demand))+geom_line()+

geom_point(size=4,shape=21,colour="darkred",fill="white")

tg=ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len))#类似数据透视

ggplot(tg,aes(x=dose,y=length,fill=supp))+geom_line(position = position_dodge(0.2))+

geom_point(shape=21,size=3,position=position_dodge(0.2))+

scale_fill_manual(values = c('black','white'))

####绘制面积图

#将sunspot.year数据集转化为数据框

sunspotyear=data.frame(Year=as.numeric(time(sunspot.year)),

Sunspots=as.numeric(sunspot.year))

ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area()

ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area(colour="black",fill="blue",alpha=0.2)#alpha设置透明度

ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area(fill="blue",alpha=0.2)+geom_line()

##绘制堆积面积图

ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+geom_area()

ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+

geom_area(colour="black",size=0.2,alpha=0.4)+

scale_fill_brewer(palette = "Blues",breaks=rev(levels(uspopage\$AgeGroup)))

ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup,order=desc(AgeGroup)))+

geom_area(colour=NA,alpha=0.4)+

scale_fill_brewer(palette = "Blues")+

geom_line(position = "stack",size=0.2)

#绘制百分比堆积面积图

uspopage_prop=ddply(uspopage,"Year",transform,Percent=Thousands/sum(Thousands)*100)

ggplot(uspopage_prop,aes(x=Year,y=Percent,fill=AgeGroup))+

geom_area(colour="black",size=0.2,alpha=0.4)+

scale_fill_brewer(palette="Blues",breaks=rev(levels(uspopage\$AgeGroup)))

#####添加置信域

#抓取climate一个子集

clim=subset(climate,Source=="Berkeley",select = c("Year","Anomaly10y","Unc10y"))

clim

#将置信域绘制为阴影

ggplot(clim,aes(x=Year,y=Anomaly10y))+

geom_ribbon(aes(ymin=Anomaly10y-Unc10y,ymax=Anomaly10y+Unc10y),alpha=0.2)+#alpha=0.2将阴影部分的透明度设定为80%

geom_line()

#用虚线表示置信域的上下边界

ggplot(clim,aes(x=Year,y=Anomaly10y))+

geom_line(aes(y=Anomaly10y-Unc10y),colour="grey50",linetype="dotted")+

geom_line(aes(y=Anomaly10y+Unc10y),colour="grey50",linetype="dotted")+

geom_line()

####条形图

qplot(BOD\$Time,BOD\$demand,geom="bar",stat="identity")

'''Error: stat_count() must not be used with a y aesthetic.

`stat` is deprecated '''

#将x转化为因子变量，可视为离散值

qplot(factor(BOD\$Time),BOD\$demand,geom="bar",stat="identity")

ggplot(BOD,aes(x=Time,y=demand))+geom_bar(stat="identity")

ggplot(BOD,aes(x=factor(Time),y=demand))+geom_bar(stat="identity")

#添加颜色fill=和边框线colour=

ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat="identity",fill="lightblue",colour="black")

##簇状条形图

cabbage_exp

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(position="dodge",stat="identity")

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+

geom_bar(position="dodge",stat="identity",colour="black")+

scale_fill_brewer(palette="Pastell")#pastell是调色盘

ce=cabbage_exp[1:5,1:3]#复制删除了最后一行的数据集

ce

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+

geom_bar(position="dodge",stat="identity",colour="black")+

scale_fill_brewer(palette="Pastell")

#频数条形图

qplot(mtcars\$cyl)

qplot(factor(mtcars\$cyl))

qplot(factor(cyl),data=mtcars)

ggplot(diamonds,aes(x=cut))+geom_bar()

#条形图着色

upc=subset(uspopchange,rank(Change)>40)

upc

ggplot(upc,aes(x=Abb,y=Change,fill=Region))+geom_bar(stat="identity")

#颜色的重新设定

ggplot(upc,aes(x=reorder(Abb,Change),y=Change,fill=Region))+

geom_bar(stat="identity")+scale_fill_manual(values=c("#669933","#FFCC66"))+xlab("State")

#对正负条形图进行调色

csub=subset(climate,Source=="Berkeley"&Year>=1900)#通过筛选条件取子集

csub\$pos=csub\$Anomaly10y>=0

csub

ggplot(csub,aes(x=Year,y=Anomaly10y,fill=pos))+

geom_bar(stat="identity",position = "identity")

ggplot(csub,aes(x=Year,y=Anomaly10y,fill=pos))+

geom_bar(stat="identity",position = "identity",colour="black",size=0.25)+

scale_fill_manual(values=c("#CCEEFF","#FFDDDD"),guide=FALSE)

#调整条形宽度和条形间距

ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat="identity")

ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat="identity",width=0.5)#窄

ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat="identity",width=1)#宽

#更窄的簇状条形图

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+

geom_bar(stat="identity",width=0.5,position="dodge")

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+

geom_bar(stat="identity",width=0.5,position=position_dodge(0.7))#调组距

#堆积条形图

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat="identity")

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat="identity")+

guides(fill=guide_legend(reverse=TRUE))#图例颜色顺序改变

install.packages("plyr")#为了用desc

library(plyr)

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar,order=desc(Cultivar)))+geom_bar(stat="identity")

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat="identity",colour="blue")+

guides(fill=guide_legend(reverse=TRUE))+

scale_fill_brewer(palette = "Pastell")

#绘制百分比堆积条形图

#通过plyr中的ddply()\transform()函数将每组条形对应的数据标准化为100%

#以Date为切割变量（）对每组数据进行transform()

ce=ddply(cabbage_exp,"Date",transform,percent_weight=Weight/sum(Weight)*100)#新增一个percent_weight变量

ggplot(ce,aes(x=Date,y=percent_weight,fill=Cultivar))+geom_bar(stat="identity")

ggplot(ce,aes(x=Date,y=percent_weight,fill=Cultivar))+geom_bar(stat="identity",colour="blue")+

guides(fill=guide_legend(reverse=TRUE))+

scale_fill_brewer(palette = "Pastell")

##添加数据标签

#在条形图顶端下方

ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+

geom_bar(stat = "identity")+

geom_text(aes(label=Weight),vjust=1.5,colour="white")

#在条形图顶端上方

ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+

geom_bar(stat = "identity")+

geom_text(aes(label=Weight),vjust=-0.2,colour="brown")

#将y轴上限变大

ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+

geom_bar(stat = "identity")+

geom_text(aes(label=Weight),vjust=-0.2)+

ylim(0,max(cabbage_exp\$Weight)*1.25)

#设定标签的y轴位置使其略高于条形图顶端-y轴范围会自动调整

ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+

geom_bar(stat = "identity")+

geom_text(aes(y=Weight+0.1,label=Weight),vjust=-0.2)#标签的位置上调0.1

#簇状条形图的标签

ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+

geom_bar(stat = "identity",position = "dodge")+

geom_text(aes(label=Weight),vjust=1.5,colour="white",position = position_dodge(.9),size=5)#标签的位置上调0.1

#堆积条形图的数据标签

#根据日期和性别进行排序

ce=arrange(cabbage_exp,Date,desc(Cultivar))

#计算累积和

ce=ddply(ce,"Date",transform,label_y=cumsum(Weight))#label_y是位置信息

ce

ggplot(ce,aes(x=Date,y=Weight,fill=Cultivar))+

geom_bar(stat = "identity")+

geom_text(aes(y=label_y,label=Weight),vjust=1.2,colour="white")

ce=arrange(cabbage_exp,Date,desc(Cultivar))

#计算y轴的位置，将数据标签置于条形中部

ce=ddply(ce,"Date",transform,label_y=cumsum(Weight)-0.5*Weight)

ggplot(ce,aes(x=Date,y=Weight,fill=Cultivar))+

geom_bar(stat = "identity")+

geom_text(aes(y=label_y,label=Weight),colour="white")

####绘制Cleveland点图（有时用来代替点图）

tophit=tophitters2001[1:25,]#取前25个数据

ggplot(tophit,aes(x=avg,y=name))+geom_point()

tophit[,c("name","lg","avg")]#取其中3列

ggplot(tophit,aes(x=avg,y=reorder(name,avg)))+

geom_point(size=3)+

theme_bw()+#删除背景，用虚线代替

#将点图的x轴和y轴互换

ggplot(tophit,aes(x=reorder(name,avg),y=avg))+

geom_point(size=3)+

theme_bw()+

theme(axis.text.x = element_text(angle=60,hjust=1),#横坐标轴的文字方向

#提取出name,依次根据lg,avg进行排序

nameorder=tophit\$name[order(tophit\$lg,tophit\$avg)]

#将name转化为因子， 因子水平与nameorder保持一致

tophit\$name=factor(tophit\$name,levels=nameorder)

#借助geom_segment()用“以数据点为端点的线段”代替网格线

ggplot(tophit,aes(x=avg,y=name))+

geom_segment(aes(yend=name),xend=0,colour="grey50")+

geom_point(size=3,aes(colour=lg))+

theme_bw()+

legend.position = c(1,0.55),#将图例放置在绘图区中

legend.justification = c(1,0.5))

#以队为分组的火柴图

ggplot(tophit,aes(x=avg,y=name))+

geom_segment(aes(yend=name),xend=0,colour="grey50")+

geom_point(size=3,aes(colour=lg))+

scale_color_brewer(palette="Set1",limits=c("NL","AL"),guide=FALSE)+

theme_bw()+

facet_grid(lg~.,scales="free_y",space="free_y")#没有生效

####直方图

qplot(mtcars\$mpg)

qplot(mpg,data=mtcars,binwidth=4)

ggplot(mtcars,aes(x=mpg))+geom_histogram(binwidth=6)#binwidth为组距

##简单直方图

ggplot(faithful,aes(x=waiting))+geom_histogram()#从坐标就是横坐标的计数

#将变量值保存为一个基本向量

w=faithful\$waiting

ggplot(NULL,aes(x=w))+geom_histogram()

#设定组距为5

ggplot(faithful,aes(x=waiting))+geom_histogram(binwidth=5,fill="white",colour="black")

#将x的取值切分成15组

binsize=diff(range(faithful\$waiting))/15

ggplot(faithful,aes(x=waiting))+geom_histogram(binwidth=binsize,fill="white",colour="black")

ggplot(faithful,aes(x=waiting))+geom_bar()

ggplot(faithful,aes(x=waiting))+geom_histogram(binwidth=binsize,fill="white",colour="black",boundary=25)

##基于分组数据绘制分组直方图

library(MASS)

#使用smoke作为分面变量

ggplot(birthwt,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(smoke~.)

#修改分面标签

birthwt1=birthwt

#将smoke转化为因子

birthwt1\$smoke=factor(birthwt1\$smoke)

levels(birthwt1\$smoke)

birthwt1\$smoke=revalue(birthwt1\$smoke,c("0"="no smoke","1"="smoke"))

birthwt1\$smoke=revalue(birthwt1\$smoke,c("no smoke"="不抽烟","smoke"="抽烟"))

ggplot(birthwt1,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(smoke~.)

#使用race作为分面变量

ggplot(birthwt,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(race~.)

ggplot(birthwt,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(race~.,scales="free")

#用颜色来区别分组

#将smoke转化为因子

birthwt1\$smoke=factor(birthwt1\$smoke)

#将smoke映射给fill,取消条形堆叠，并使图形半透明

ggplot(birthwt1,aes(x=bwt,fill=smoke))+geom_histogram(position="identity",alpha=0.4)

###绘制密度曲线

ggplot(faithful,aes(x=waiting))+geom_density()

ggplot(faithful,aes(waiting))+geom_line(stat="density")+expand_limits(y=0)#扩大y轴范围以包含0点

#将变量值保存在一个简单向量里

w=faithful\$waiting

ggplot(NULL,aes(x=w))+geom_density()

ggplot(faithful,aes(waiting))+geom_density(fill="blue",colour=NA,alpha=.2)+

geom_line(stat="density")+xlim(35,105)

ggplot(faithful,aes(waiting,y=..density..))+geom_histogram(fill="cornsilk",colour="grey60",size=.2)+

geom_density()+xlim(35,105)

##基于分组数据绘制分组密度曲线

library(MASS)

birthwt1=birthwt

#将smoke转化为因子

birthwt1\$smoke=factor(birthwt1\$smoke)

#将smoke映射给colour

ggplot(birthwt1,aes(x=bwt,colour=smoke))+geom_density()

#将smoke映射给fill

ggplot(birthwt1,aes(x=bwt,fill=smoke))+geom_density(alpha=.3)

#研究下bwt和smoke之间的关系

ggplot(birthwt1,aes(x=bwt))+geom_density()+facet_grid(smoke~.)

ggplot(birthwt1,aes(x=bwt,y=..density..))+geom_histogram(binwidth=200,fill="cornsilk",colour="grey60",size=.2)+

geom_density()+facet_grid(smoke~.)

###绘制频数多边形

ggplot(faithful,aes(x=waiting))+geom_freqpoly()

ggplot(faithful,aes(x=waiting))+geom_freqpoly(binwidth=4)

#将组数设定为15

binsize=diff(range(faithful\$waiting))/15

ggplot(faithful,aes(waiting))+geom_freqpoly(binwidth=binsize)

####箱线图

qplot(ToothGrowth\$supp,ToothGrowth\$len,geom="boxplot")

ggplot(ToothGrowth,aes(x=supp,y=len))+geom_boxplot()

qplot(interaction(ToothGrowth\$supp,ToothGrowth\$dose),ToothGrowth\$len,geom="boxplot")

ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot()

ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot(width=.2)

ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot(outlier.size=3,outlier.shape=21)

#添加槽口

ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot(notch=TRUE)

#添加均值

library(MASS)

ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot()+

stat_summary(fun.y="mean",geom="point",shape=23,size=3,fill="white")

###绘制小提琴图

ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin()

ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin()+

geom_boxplot(width=.1,fill="black",outlier.colour = NA)+

stat_summary(fun.y=median,geom="point",fill="white",shape=21,size=2.5)

ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin(trim=FALSE)#保留小提琴的尾部

ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin(scale="count")#校准小提琴图面积，使其与频数保持一致

####绘制winkinson点图

countries2009=subset(countries,Year==2009&healthexp>2000)

p=ggplot(countries2009,aes(x=infmortality))

p+geom_dotplot()

p+geom_dotplot(binwidth=.25)+geom_rug()+

scale_y_continuous(breaks=NULL)+#移除刻度线

theme(axis.title.y = element_blank())#移除y坐标轴标签

p+geom_dotplot(method="histodot",binwidth=.25)+geom_rug()+

scale_y_continuous(breaks=NULL)+#移除刻度线

theme(axis.title.y = element_blank())#移除y坐标轴标签

p+geom_dotplot(binwidth=.25,stackdir = "center")+geom_rug()+

scale_y_continuous(breaks=NULL)+#移除刻度线

theme(axis.title.y = element_blank())#移除y坐标轴标签

p+geom_dotplot(binwidth=.25,stackdir = "centerwhole")+geom_rug()+

scale_y_continuous(breaks=NULL)+#移除刻度线

theme(axis.title.y = element_blank())#移除y坐标轴标签

ggplot(heightweight,aes(x=sex,y=heightIn))+

geom_dotplot(binaxis = "y",binwidth=.5,stackdir = "center")

####绘制二维数据密度图

p

p + geom_point() + stat_density2d()

p + stat_density2d(aes(colour=..level..))

#将密度估计映射给填充色

p+stat_density2d(aes(fill=..density..),geom="raster",contour = FALSE)

#带数据点，并将密度估计映射给alpha的瓦片图

p+geom_point()+stat_density2d(aes(alpha=..density..),geom="tile",contour = FALSE)

p+stat_density2d(aes(fill=..density..),geom="raster",contour = FALSE,h=c(.5,5))

####注解

#添加文本注解

p

p + annotate("text", x=3, y=48, label="Group 1") +

annotate("text", x=4.5, y=66, label="Group 2")

p + annotate("text", x=3, y=48, label="Group 1", family="serif",

fontface="italic", colour="darkred", size=3) +

annotate("text", x=4.5, y=66, label="Group 2", family="serif",

fontface="italic", colour="darkred", size=3)

p + annotate("text", x=3, y=48, label="Group 1", alpha=.1) + # Normal

geom_text(x=4.5, y=66, label="Group 2", alpha=.1)

#添加边缘线注释

p + annotate("text", x=-Inf, y=Inf, label="Upper left", hjust=-.2, vjust=2) +

annotate("text", x=mean(range(faithful\$eruptions)), y=-Inf, vjust=-0.4,

label="Bottom middle")

#添加公式注解

p

p + annotate("text", x=2, y=0.3, parse=TRUE,

label="frac(1, sqrt(2 * pi)) * e ^ {-x^2 / 2}")

p + annotate("text", x=0, y=0.05, parse=TRUE, size=4,

label="'Function: ' * y==frac(1, sqrt(2*pi)) * e^{-x^2/2}")

#添加直线

p

#添加横线和竖线

p + geom_hline(yintercept=60) + geom_vline(xintercept=14)

pg

pg + geom_vline(xintercept = 2)

pg + geom_vline(xintercept = which(levels(PlantGrowth\$group)=="ctrl"))

# 添加斜线

p + geom_abline(intercept=37.4, slope=1.75)

#添加平均线

library(plyr) # For the ddply() function

hw_means

hw_means

p + geom_hline(aes(yintercept=heightIn, colour=sex), data=hw_means,

linetype="dashed", size=1)

#添加线段和箭头

p

geom_line()

p + annotate("segment", x=1950, xend=1980, y=-.25, yend=-.25)#segment添加线段

library(grid)

p + annotate("segment", x=1850, xend=1820, y=-.8, yend=-.95, colour="blue",

size=2, arrow=arrow()) +

annotate("segment", x=1950, xend=1980, y=-.25, yend=-.25,

arrow=arrow(ends="both", angle=90, length=unit(.2,"cm")))

#添加矩阵阴影

p

p + annotate("rect", xmin=1950, xmax=1980, ymin=-1, ymax=1, alpha=.1,

fill="blue")

#高亮某一元素

pg

pg\$hl

pg\$hl[pg\$group=="trt2"]

ggplot(pg, aes(x=group, y=weight, fill=hl)) + geom_boxplot() +

scale_fill_manual(values=c("grey85", "#FFDDCC"), guide=FALSE)

ggplot(PlantGrowth, aes(x=group, y=weight, fill=group)) + geom_boxplot() +

scale_fill_manual(values=c("grey85", "grey85", "#FFDDCC"), guide=FALSE)

#添加误差线

library(gcookbook) # For the data set

# Take a subset of the cabbage_exp data for this example

ce

# 为条形图添加误差线

ggplot(ce, aes(x=Date, y=Weight)) +

geom_bar(fill="white", colour="black") +

geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)#报错

# 为折线图添加误差线

ggplot(ce, aes(x=Date, y=Weight)) +

geom_line(aes(group=1)) +

geom_point(size=4) +

geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)

# 反例：未指定并列宽度

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +

geom_bar(position="dodge") +

geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),

position="dodge", width=.2)#报错

# 正例：设定并列宽度与条形的相同

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +

geom_bar(position="dodge") +

geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),

position=position_dodge(0.9), width=.2)#报错

pd

ggplot(cabbage_exp, aes(x=Date, y=Weight, colour=Cultivar, group=Cultivar)) +

geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),

width=.2, size=0.25, colour="black", position=pd) +

geom_line(position=pd) +

geom_point(position=pd, size=2.5)

##像独立分面添加注释

# The base plot

p

# A data frame with labels for each facet

f_labels

p + geom_text(x=6, y=40, aes(label=label), data=f_labels)

# If you use annotate(), the label will appear in all facets

p + annotate("text", x=6, y=42, label="label text")

lm_labels

mod

formula

round(coef(mod)[1], 2), round(coef(mod)[2], 2))

r

r2

data.frame(formula=formula, r2=r2, stringsAsFactors=FALSE)

}

library(plyr) # For the ddply() function

labels

labels

p + geom_smooth(method=lm, se=FALSE) +

geom_text(x=3, y=40, aes(label=formula), data=labels, parse=TRUE, hjust=0) +

geom_text(x=3, y=35, aes(label=r2), data=labels, parse=TRUE, hjust=0)

Figure

##坐标轴

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot()

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + coord_flip()#使横纵坐标互换

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + coord_flip() +

scale_x_discrete(limits=rev(levels(PlantGrowth\$group)))#反转了x轴元素顺序

#设置连续型坐标轴值域

p=ggplot(PlantGrowth,aes(x=group,y=weight))+geom_boxplot()

p

p+ylim(0,max(PlantGrowth\$weight))

ylim(0, 10)

scale_y_continuous(limits=c(0, 10))

p + ylim(0, 10) + scale_y_continuous(breaks=NULL)

p + scale_y_continuous(breaks=NULL) + ylim(0, 10)

p + scale_y_continuous(limits=c(0, 10), breaks=NULL)

p + scale_y_continuous(limits = c(5, 6.5)) # Same as using ylim()

p + coord_cartesian(ylim = c(5, 6.5))

p + expand_limits(y=0)

# y轴坐标翻转

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + scale_y_reverse()

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + ylim(6.5, 3.5)

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() +

scale_y_reverse(limits=c(8, 0))

#修改类别型x坐标轴上项目的顺序

p

p + scale_x_discrete(limits=c("trt1","ctrl","trt2"))

p + scale_x_discrete(limits=c("ctrl","trt1"))

p + scale_x_discrete(limits=rev(levels(PlantGrowth\$group)))

#设置x轴，y轴的缩放比例

sp

sp + coord_fixed()

sp + coord_fixed() +

sp + coord_fixed(ratio=1/2) +

##设置刻度线的位置

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot()

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() +

scale_y_continuous(breaks=c(4, 4.25, 4.5, 5, 6, 8))

#为离散坐标轴同时设定breaks和limits

ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() +

scale_x_discrete(limits=c("trt2", "ctrl"), breaks="ctrl")

#移除刻度线和标签

p

p + theme(axis.text.y = element_blank())

p + theme(axis.ticks = element_blank(), axis.text.y = element_blank())

p + scale_y_continuous(breaks=NULL)

#修改刻度标签的文本

hwp

geom_point()

hwp

hwp + scale_y_continuous(breaks=c(50, 56, 60, 66, 72),

labels=c("Tiny", "Really\nshort", "Short",

"Medium", "Tallish"))#\n用来换行

footinch_formatter

foot

inch

return(paste(foot, "'", inch, "\"", sep=""))

}

footinch_formatter(56:64)

timeHMS_formatter

h

m

s

lab

lab

lab

return(lab)

}

timeHMS_formatter(c(.33, 50, 51.25, 59.32, 60, 60.1, 130.23))

#修改刻度标签的外观

bp

scale_x_discrete(breaks=c("ctrl", "trt1", "trt2"),

labels=c("Control", "Treatment 1", "Treatment 2"))

bp

bp + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=.5))

bp + theme(axis.text.x = element_text(angle=30, hjust=1, vjust=1))#调节文字角度

bp + theme(axis.text.x = element_text(family="Times", face="italic",

colour="darkred", size=rel(0.9)))#调节大小、样式、字体

#修改坐标轴标签的文本

library(gcookbook) # For the data set

hwp

geom_point()

hwp

# 设置坐标轴标签

hwp + xlab("Age in years") + ylab("Height in inches")

hwp + labs(x = "Age in years", y = "Height in inches")

hwp + scale_x_continuous(name="Age in years")

hwp + scale_x_continuous(name="Age\n(years)")

#移除坐标轴标签

p

p + theme(axis.title.x=element_blank())

#修改坐标轴标签的外观

library(gcookbook) # For the data set

hwp

hwp + theme(axis.title.x=element_text(face="italic", colour="darkred", size=14))

hwp + ylab("Height\n(inches)") +

theme(axis.title.y=element_text(angle=90, face="italic", colour="darkred",

size=14))

#沿坐标轴显示直线

library(gcookbook) # For the data set

p

p + theme(axis.line = element_line(colour="black"))

# 对于较粗的直线，只有一半重叠

p + theme_bw() +

theme(panel.border = element_blank(),

axis.line = element_line(colour="black", size=4))

# 完全重叠

p + theme_bw() +

theme(panel.border = element_blank(),

axis.line = element_line(colour="black", size=4, lineend="square"))

#使用对数坐标轴

library(MASS) # For the data set

p

geom_text(size=3)

p

# 使用对数x,y标度

p + scale_x_log10() + scale_y_log10()

p + scale_x_log10(breaks=10^(-1:5)) + scale_y_log10(breaks=10^(0:3))

library(scales)

p + scale_x_log10(breaks=10^(-1:5),

labels=trans_format("log10", math_format(10^.x))) +

scale_y_log10(breaks=10^(0:3),

labels=trans_format("log10", math_format(10^.x)))

ggplot(Animals, aes(x=log10(body), y=log10(brain), label=rownames(Animals))) +

geom_text(size=3)

library(scales)

# Use natural log on x, and log2 on y

p + scale_x_continuous(trans = log_trans(),

breaks = trans_breaks("log", function(x) exp(x)),

labels = trans_format("log", math_format(e^.x))) +

scale_y_continuous(trans = log2_trans(),

breaks = trans_breaks("log2", function(x) 2^x),

labels = trans_format("log2", math_format(2^.x)))

library(gcookbook) # For the data set

scale_y_log10(breaks=c(2,10,50,250))

#为对数坐标轴添加刻度

library(MASS) # For the data set

library(scales) # For the trans and format functions

ggplot(Animals, aes(x=body, y=brain, label=rownames(Animals))) +

geom_text(size=3) +

annotation_logticks() +

scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),

labels = trans_format("log10", math_format(10^.x))) +

scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),

labels = trans_format("log10", math_format(10^.x)))

ggplot(Animals, aes(x=body, y=brain, label=rownames(Animals))) +

geom_text(size=3) +

annotation_logticks() +

scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),

labels = trans_format("log10", math_format(10^.x)),

minor_breaks = log10(5) + -2:5) +

scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),

labels = trans_format("log10", math_format(10^.x)),

minor_breaks = log10(5) + -1:3) +

coord_fixed() +

theme_bw()

####绘制环状图形

ggplot(wind, aes(x=DirCat, fill=SpeedCat)) +

geom_histogram(binwidth=15, boundary=-7.5) +

coord_polar() +

scale_x_continuous(limits=c(0,360))

ggplot(wind, aes(x=DirCat, fill=SpeedCat)) +

geom_histogram(binwidth=15, boundary=-7.5, colour="black", size=.25) +

guides(fill=guide_legend(reverse=TRUE)) +

coord_polar() +

scale_x_continuous(limits=c(0,360), breaks=seq(0, 360, by=45),

minor_breaks=seq(0, 360, by=15)) +

scale_fill_brewer()

# 将mdeaths的时间序列数据放入一个数据框

md

month = as.numeric(cycle(mdeaths)))

library(plyr) # For the ddply() function

md

md

p

scale_x_continuous(breaks=1:12)

p + coord_polar()

p + coord_polar() + ylim(0, max(md\$deaths))

p + coord_polar() + ylim(0, max(md\$deaths)) + xlim(0, 12)

# Connect the lines by adding a value for 0 that is the same as 12

mdx

mdx\$month

mdnew

# Make the same plot as before, but with the new data, by using %+%

p %+% mdnew + coord_polar() + ylim(0, max(md\$deaths))

#在坐标轴上使用日期

str(economics)

ggplot(economics, aes(x=date, y=psavert)) + geom_line()

# Take a subset of economics

econ = as.Date("1992-05-01") &

date

# Base plot - without specifying breaks

p

p

# Specify breaks as a Date vector

datebreaks

# Use breaks, and rotate text labels

p + scale_x_date(breaks=datebreaks) +

theme(axis.text.x = element_text(angle=30, hjust=1))

library(scales)

p + scale_x_date(breaks=datebreaks, labels=date_format("%Y %b")) +

theme(axis.text.x = element_text(angle=30, hjust=1))

# Mac and Linux

Sys.setlocale("LC_TIME", "it_IT.UTF-8")

# Windows

Sys.setlocale("LC_TIME", "italian")

####函数图像

myfun=function(xvar){

1/(1+exp(-xvar+10))

}

qplot(c(0,20),fun=myfun,stat="function",geom="line")#报错

ggplot(data.frame(x=c(0,20)),aes=(x=x))+stat_function(fun=myfun,geom="line")

####玫瑰图

####玫瑰图

install.packages("rvest")

install.packages("xml2")

install.packages("dplyr")

install.packages("showtext")

install.packages("Cairo")

library(sysfonts)

library(showtextdb)

library(rvest)

library(xml2)

library(dplyr)

library(ggplot2)

library(grid)

library(showtext)

library(Cairo)

names(mydata)=c("city","rate")

#计算标签的旋转角度：

mydata\$angle=c(rev(9*(1:10-1)+4.5),-(9*(1:10-1)+4.5))

mydata\$angle[16:20]

mydata2=arrange(mydata,city,desc(rate))

#图形可视化过程：

ggplot(mydata2,aes(x=city,y=rate))+geom_bar(stat="identity")+

#geom_hline(yintercept =c(25,50,100,200,500),linetype=2,size=.25)+

#geom_text(aes(x=id,y=value+12,label=label_ff,angle=angle),family="myfont",size=3.5,lineheight=1)+#坐标轴放大一倍占位：

#geom_text(aes(x=id,y=value+12,label=label_ff,angle=angle),size=3.5,lineheight=1)+

#scale_x_continuous(limits=c(0,40),expand=c(0,0))+#Y延伸到负值突出圆心的空白

#scale_y_continuous(limits=c(0,600))+

#scale_fill_manual(values=c("#00643E","#207A57","#3D8C6D","#59A284","#76B69B","#95CBB3"),guide=FALSE)+

coord_polar(start=0)

theme_void()

#图形输出：

setwd("E:/数据可视化/R/R语言学习笔记/数据可视化/ggplot2/优秀R语言案例")

CairoPNG(file="polar_rose.png",width=2400,height=1800)

showtext.begin()

grid.newpage()

pushViewport(viewport(layout=grid.layout(6,8)))

vplayout

print(p,vp=vplayout(1:6,1:8))

showtext.end()

dev.off()

• 发表于:
• 原文链接https://kuaibao.qq.com/s/20180723G0OUV100?refer=cp_1026
• 腾讯「腾讯云开发者社区」是腾讯内容开放平台帐号（企鹅号）传播渠道之一，根据《腾讯内容开放平台服务协议》转载发布内容。
• 如有侵权，请联系 cloudcommunity@tencent.com 删除。

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29

2024-05-29