前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >R_basic 02

R_basic 02

原创
作者头像
用户10407321
发布2023-03-23 12:24:20
2800
发布2023-03-23 12:24:20
举报
文章被收录于专栏:R语言基础R语言基础

Part5文件读取

代码语言:txt
复制
 >write.csv(test,file="example.csv")               > read.csv()通常读取CSV格式     
 > write.table(test,file="example.txt")    > read.table()通常读取txt格式

 > save(test,file="example.Rdata")         > load("example.Rdata")

> a <- data.table::fread("soft.txt",data.table = F) ##data.frame



> load("C:/Users/win10/Desktop/R_01/gands.Rdata")
> load("../R_01/gands.Rdata")   #代表上一级目录


很丝滑
> sdp <- rio::import("TCGA-CHOL.GDC_phenotype.tsv.gz")
> rio::export(sdp,"TCGA-CHOL.GDC_phenotype.tsv.gz")

怎么改?

Part6ggplot绘图

代码语言:txt
复制
> ggplot(data = iris)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length,
+                            color = Species))
> ggplot(data = iris)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length,
+                            ),color = "blue")

自行选定颜色?

代码语言:txt
复制
 ggplot(data = iris)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length,
+                            color = Species))+
+   scale_color_manual(values = c("blue","grey","red"))

必须上面有color,才有下面的自行设定颜色
分面?
+facet_wrap(~y)纵坐标分面
+facet_grid(x~y) 横坐标纵坐标分面
> sample(letters[1:5],6,replace = T)
[1] "a" "a" "c" "a" "e" "d"
代码语言:txt
复制
> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut))
代码语言:txt
复制
> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1)
我就是要统计他的占比,这个占比就是和diamonds里面取y的列区分开来
代码语言:txt
复制
> ggplot(data = iris,mapping = aes(x = Species, 
+                                  y = Sepal.Width,
+                                  fill = Species)) + 
+   geom_boxplot()+
+   geom_jitter()                     geom_point()#把点抖开
dae120d84d6be168d4d55f81e1941dc)
代码语言:txt
复制
> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut,fill=clarity),
+            position = "fill")

翻转横纵坐标

coord_flip()

代码语言:txt
复制
看显著性:
> my_comparisons <- list( c("setosa", "versicolor"), 
+                         c("setosa", "virginica"), 
+                         c("versicolor", "virginica") )
> 
> ggplot(data = iris,mapping = aes(x = Species, 
+                                  y = Sepal.Length,
+                                  fill = Species)) + 
+   geom_boxplot()+stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
+   stat_compare_means(label.y=6)

![请在此添加图片描述](https://ask8088-private-1251520898.cos.ap-guangzhou.myqcloud.com/developer-images/article/10407321/4e1cds8l4j.png?q-sign-algorithm=sha1&q-ak=AKID2uZ1FGBdx1pNgjE3KK4YliPpzyjLZvug&q-sign-time=1679544849;1679552049&q-key-time=1679544849;1679552049&q-header-list=&q-url-param-list=&q-signature=e3cbea40d37fde2710115dcfc2c4de1da6717624)

Part7数据处理

代码语言:text
复制
## 1.检测字符串长度
> x <- "The birch canoe slid on the smooth planks."
> str_length(x)
[1] 42
> length(x)
[1] 1

> y <- c("jimmy 150","nicker 140","tony 152")
 y : a character vector   由3个charater组成
 string  :a character vector

## 2.字符串拆分
> str_split(x," ")
[[1]]
[1] "The"     "birch"   "canoe"   "slid"    "on"      "the"     "smooth" 
[8] "planks."
> y = c("jimmy 150","nicker 140","tony 152")
> str_split(y," ")
[[1]]
[1] "jimmy" "150"  
[[2]]
[1] "nicker" "140"   
[[3]]
[1] "tony" "152" 
> str_split(y," ",simplify = T)
     [,1]     [,2] 
[1,] "jimmy"  "150"
[2,] "nicker" "140"
[3,] "tony"   "152"

## 3.按位置提取字符串
> str_sub(x,5,9)
[1] "birch"
所有的空格、引号、逗号都是算数的
From <http://127.0.0.1:23967/> 

## 4.字符检测
> str_detect(x2,"h")
[1] TRUE
> str_starts(x2,"T")
[1] FALSE
> str_ends(x2,"e")
[1] FALSE

## 5.字符串替换
> str_replace_all(x2,"o","A")
全部换

##  6.字符删除
> str_remove_all(x," ")
[1] "Thebirchcanoeslidonthesmoothplanks."
代码语言:txt
复制
## 王炸
> samples = c("tumor1","tumor2","tumor3","normal1","normal2","normal3")
> k1 = str_detect(samples,"tumor");k1
[1]  TRUE  TRUE  TRUE FALSE FALSE FALSE
> ifelse(k1,"tumor","normal")
[1] "tumor"  "tumor"  "tumor"  "normal" "normal" "normal"

> str_remove_all(samples,"[0-9]")
[1] "tumor"  "tumor"  "tumor"  "normal" "normal" "normal"

## For 循环

例题1:
> par(mfrow=c(2,2))
> for (i in 1:4) {
+   plot(iris[,i],col=iris[,5])
+ }

例题2:
> x <- c(1,5,7,3)
> a <- list()
> for (i in 1:length(x)) {
+   a[[i]] <- rnorm(x[[i]])
+ }
> View(a)
为什么不用x[i]?用两个[]给向量取子集是哈德雷大神推荐的写法
代码语言:txt
复制
> set.seed(10086)
> exp = matrix(rnorm(18),ncol = 6)
> exp = round(exp,2)
> rownames(exp) = paste0("gene",1:3)
> colnames(exp) = paste0("test",1:6)
> exp[,1:3] = exp[,1:3]+1
> exp
      test1 test2 test3 test4 test5 test6
gene1  1.55  1.49  1.80 -0.37 -1.82 -1.62
gene2 -1.74  0.37  2.08  2.11 -0.22  1.42
gene3  1.57  1.25  1.32  2.49  0.58 -0.81
> 
> library(tidyr)
> library(tibble)
> library(dplyr)
> dat = t(exp) %>% 
+   as.data.frame() %>% 
+   rownames_to_column() %>% 
+   mutate(group = rep(c("control","treat"),each = 3))

![请在此添加图片描述](https://ask8088-private-1251520898.cos.ap-guangzhou.myqcloud.com/developer-images/article/10407321/nsr7nolzqp.png?q-sign-algorithm=sha1&q-ak=AKID2uZ1FGBdx1pNgjE3KK4YliPpzyjLZvug&q-sign-time=1679545032;1679552232&q-key-time=1679545032;1679552232&q-header-list=&q-url-param-list=&q-signature=f89c89c37f61575c1bca4a604fdd72b7363b41a3)
代码语言:txt
复制
> pdat = dat%>% 
+   pivot_longer(cols = starts_with("gene"),
+                names_to = "gene",
+                values_to = "count")
> 
> library(ggplot2)
> p = ggplot(pdat,aes(gene,count))+
+   geom_boxplot(aes(fill = group))+
+   theme_bw()
> p
> p + facet_wrap(~gene,scales = "free")

1.data.frame

代码语言:txt
复制
apply(test,2,mean)
 test是数据框  1为行,2为列   mean为函数
挑选一个表达矩阵中方差最大的1000给基因的名字
> names(sort(apply(test,1,var),decreasing = T)[1:1000])

2.list

代码语言:txt
复制
看起来很麻烦
> lapply(test,mean)
$x
[1] 34.5
$y
[1] 33.5
$z
[1] 28.5

> lapply(test,fivenum)
$x
[1] 33.0 33.5 34.5 35.5 36.0
$y
[1] 32.0 32.5 33.5 34.5 35.0
$z
[1] 27.0 27.5 28.5 29.5 30.0

> sapply(test,mean)
   x    y    z 
34.5 33.5 28.5

> sapply(test,fivenum)
        x    y    z
[1,] 33.0 32.0 27.0
[2,] 33.5 32.5 27.5
[3,] 34.5 33.5 28.5
[4,] 35.5 34.5 29.5
[5,] 36.0 35.0 30.0

df <- chickwts
> table(df$feed)
casein horsebean   linseed  meatmeal   soybean sunflower 
       12        10        12        11        14        12 
> group_mean <- aggregate(df$weight,list(df$feed),mean)
> group_mean
    Group.1        x
1    casein 323.5833
2 horsebean 160.2000
3   linseed 218.7500
4  meatmeal 276.9091
5   soybean 246.4286
6 sunflower 328.9167


> table(a>60,useNA="always")
FALSE  TRUE  <NA> 
   18    37     3 

tips

代码语言:txt
复制
> #存在即跳过
> F <- "aaa.Rdata"
> if(!file.exists("F")){
+   a=1  # 假如是限速步骤
+   save(a,file = "F")
+ }
> load("F")

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • Part5文件读取
  • Part6ggplot绘图
  • Part7数据处理
    • 1.data.frame
      • 2.list
      • tips
      领券
      问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档