R_basic 02

原创

用户10407321

发布于 2023-03-23 12:24:20

2800

发布于 2023-03-23 12:24:20

Part5文件读取

 >write.csv(test,file="example.csv")               > read.csv()通常读取CSV格式     
 > write.table(test,file="example.txt")    > read.table()通常读取txt格式

 > save(test,file="example.Rdata")         > load("example.Rdata")

> a <- data.table::fread("soft.txt",data.table = F) ##data.frame



> load("C:/Users/win10/Desktop/R_01/gands.Rdata")
> load("../R_01/gands.Rdata")   #代表上一级目录


很丝滑
> sdp <- rio::import("TCGA-CHOL.GDC_phenotype.tsv.gz")
> rio::export(sdp,"TCGA-CHOL.GDC_phenotype.tsv.gz")

怎么改？

Part6ggplot绘图

> ggplot(data = iris)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length,
+                            color = Species))
> ggplot(data = iris)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length,
+                            ),color = "blue")

自行选定颜色？

 ggplot(data = iris)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length,
+                            color = Species))+
+   scale_color_manual(values = c("blue","grey","red"))

必须上面有color，才有下面的自行设定颜色
分面？
+facet_wrap(~y)纵坐标分面
+facet_grid(x~y) 横坐标纵坐标分面
> sample(letters[1:5],6,replace = T)
[1] "a" "a" "c" "a" "e" "d"

> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut))

> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1)
我就是要统计他的占比，这个占比就是和diamonds里面取y的列区分开来

> ggplot(data = iris,mapping = aes(x = Species, 
+                                  y = Sepal.Width,
+                                  fill = Species)) + 
+   geom_boxplot()+
+   geom_jitter()                     geom_point()#把点抖开
dae120d84d6be168d4d55f81e1941dc)

> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut,fill=clarity),
+            position = "fill")

翻转横纵坐标

coord_flip()

看显著性：
> my_comparisons <- list( c("setosa", "versicolor"), 
+                         c("setosa", "virginica"), 
+                         c("versicolor", "virginica") )
> 
> ggplot(data = iris,mapping = aes(x = Species, 
+                                  y = Sepal.Length,
+                                  fill = Species)) + 
+   geom_boxplot()+stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
+   stat_compare_means(label.y=6)

![请在此添加图片描述](https://ask8088-private-1251520898.cos.ap-guangzhou.myqcloud.com/developer-images/article/10407321/4e1cds8l4j.png?q-sign-algorithm=sha1&q-ak=AKID2uZ1FGBdx1pNgjE3KK4YliPpzyjLZvug&q-sign-time=1679544849;1679552049&q-key-time=1679544849;1679552049&q-header-list=&q-url-param-list=&q-signature=e3cbea40d37fde2710115dcfc2c4de1da6717624)

Part7数据处理

## 1.检测字符串长度
> x <- "The birch canoe slid on the smooth planks."
> str_length(x)
[1] 42
> length(x)
[1] 1

> y <- c("jimmy 150","nicker 140","tony 152")
 y : a character vector   由3个charater组成
 string  ：a character vector

## 2.字符串拆分
> str_split(x," ")
[[1]]
[1] "The"     "birch"   "canoe"   "slid"    "on"      "the"     "smooth" 
[8] "planks."
> y = c("jimmy 150","nicker 140","tony 152")
> str_split(y," ")
[[1]]
[1] "jimmy" "150"  
[[2]]
[1] "nicker" "140"   
[[3]]
[1] "tony" "152" 
> str_split(y," ",simplify = T)
     [,1]     [,2] 
[1,] "jimmy"  "150"
[2,] "nicker" "140"
[3,] "tony"   "152"

## 3.按位置提取字符串
> str_sub(x,5,9)
[1] "birch"
所有的空格、引号、逗号都是算数的
From <http://127.0.0.1:23967/> 

## 4.字符检测
> str_detect(x2,"h")
[1] TRUE
> str_starts(x2,"T")
[1] FALSE
> str_ends(x2,"e")
[1] FALSE

## 5.字符串替换
> str_replace_all(x2,"o","A")
全部换

##  6.字符删除
> str_remove_all(x," ")
[1] "Thebirchcanoeslidonthesmoothplanks."

## 王炸
> samples = c("tumor1","tumor2","tumor3","normal1","normal2","normal3")
> k1 = str_detect(samples,"tumor");k1
[1]  TRUE  TRUE  TRUE FALSE FALSE FALSE
> ifelse(k1,"tumor","normal")
[1] "tumor"  "tumor"  "tumor"  "normal" "normal" "normal"

> str_remove_all(samples,"[0-9]")
[1] "tumor"  "tumor"  "tumor"  "normal" "normal" "normal"

## For 循环

例题1：
> par(mfrow=c(2,2))
> for (i in 1:4) {
+   plot(iris[,i],col=iris[,5])
+ }

例题2：
> x <- c(1,5,7,3)
> a <- list()
> for (i in 1:length(x)) {
+   a[[i]] <- rnorm(x[[i]])
+ }
> View(a)
为什么不用x[i]？用两个[]给向量取子集是哈德雷大神推荐的写法

> set.seed(10086)
> exp = matrix(rnorm(18),ncol = 6)
> exp = round(exp,2)
> rownames(exp) = paste0("gene",1:3)
> colnames(exp) = paste0("test",1:6)
> exp[,1:3] = exp[,1:3]+1
> exp
      test1 test2 test3 test4 test5 test6
gene1  1.55  1.49  1.80 -0.37 -1.82 -1.62
gene2 -1.74  0.37  2.08  2.11 -0.22  1.42
gene3  1.57  1.25  1.32  2.49  0.58 -0.81
> 
> library(tidyr)
> library(tibble)
> library(dplyr)
> dat = t(exp) %>% 
+   as.data.frame() %>% 
+   rownames_to_column() %>% 
+   mutate(group = rep(c("control","treat"),each = 3))

![请在此添加图片描述](https://ask8088-private-1251520898.cos.ap-guangzhou.myqcloud.com/developer-images/article/10407321/nsr7nolzqp.png?q-sign-algorithm=sha1&q-ak=AKID2uZ1FGBdx1pNgjE3KK4YliPpzyjLZvug&q-sign-time=1679545032;1679552232&q-key-time=1679545032;1679552232&q-header-list=&q-url-param-list=&q-signature=f89c89c37f61575c1bca4a604fdd72b7363b41a3)

> pdat = dat%>% 
+   pivot_longer(cols = starts_with("gene"),
+                names_to = "gene",
+                values_to = "count")
> 
> library(ggplot2)
> p = ggplot(pdat,aes(gene,count))+
+   geom_boxplot(aes(fill = group))+
+   theme_bw()
> p
> p + facet_wrap(~gene,scales = "free")

1.data.frame

apply(test,2,mean)
 test是数据框  1为行，2为列   mean为函数
挑选一个表达矩阵中方差最大的1000给基因的名字
> names(sort(apply(test,1,var),decreasing = T)[1:1000])

2.list

看起来很麻烦
> lapply(test,mean)
$x
[1] 34.5
$y
[1] 33.5
$z
[1] 28.5

> lapply(test,fivenum)
$x
[1] 33.0 33.5 34.5 35.5 36.0
$y
[1] 32.0 32.5 33.5 34.5 35.0
$z
[1] 27.0 27.5 28.5 29.5 30.0

> sapply(test,mean)
   x    y    z 
34.5 33.5 28.5

> sapply(test,fivenum)
        x    y    z
[1,] 33.0 32.0 27.0
[2,] 33.5 32.5 27.5
[3,] 34.5 33.5 28.5
[4,] 35.5 34.5 29.5
[5,] 36.0 35.0 30.0

df <- chickwts
> table(df$feed)
casein horsebean   linseed  meatmeal   soybean sunflower 
       12        10        12        11        14        12 
> group_mean <- aggregate(df$weight,list(df$feed),mean)
> group_mean
    Group.1        x
1    casein 323.5833
2 horsebean 160.2000
3   linseed 218.7500
4  meatmeal 276.9091
5   soybean 246.4286
6 sunflower 328.9167


> table(a>60,useNA="always")
FALSE  TRUE  <NA> 
   18    37     3

tips

> #存在即跳过
> F <- "aaa.Rdata"
> if(!file.exists("F")){
+   a=1  # 假如是限速步骤
+   save(a,file = "F")
+ }
> load("F")

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

数据处理

data

frame

list

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

登录后参与评论

0 条评论

热度

R_basic 02

R_basic 02

Part5文件读取

Part6ggplot绘图

Part7数据处理

1.data.frame

2.list

tips

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐