判断数据类型的函数:
> class()#将要判断的内容写在括号里
> class("a")
[1] "character"
> class(TRUE)
[1] "logical"
> class(3)
[1] "numeric"
常见报错:引号、拼写错误、大小写
> class(a)
Error: object 'a' not found
> calss("a")
Error in calss("a") : could not find function "calss"
class(true)
Error: object 'true' not found
> class(3)
Error: unexpected input in "class("
Tips---上下键:
逻辑型数据:
比较运算的结果是逻辑值:
>,<,<=,>=,==,!=
> 3==5
[1] FALSE
> 3<=5
[1] TRUE
> 3!=2 #3不等于2
[1] TRUE
逻辑运算 多个逻辑条件的连接:
与:&
或:|
非:!
> 3<5&4>5
[1] FALSE
> 3<5|4>5
[1] TRUE
> !(4>5)
[1] TRUE
is族函数,判断,返回值为TRUE或FALSE
> is.numeric() #是否为数值型数据
> is.logical() #是否为逻辑型数据
> is.character() #是否为字符型数据
as族函数转换数据类型:
as.numeric() #将其他数据类型转换为数值型
as.logical() #将其他数据类型转换为逻辑型
as.character() #将其他数据类型转换为字符型
能转换的可以转换,不能转换的不可以转换:
> as.numeric("jimmy")
[1] NA
Warning message:
NAs introduced by coercion
多个数据如何组织?
数据框不是文件,只是R语言内部的数据
数据框的每一列只能存在一种数据类型,单独拿出来的一列是向量,视为一个整体,可以有重复值
Tips---脚本打开是乱码的解决方案
(1)用 c() 结合到一起
c(2,5,6,2,9)
[1] 2 5 6 2 9
c("a","f","md","b")
[1] "a" "f" "md" "b"
#(2)连续的数字用冒号“:”
1:5
[1] 1 2 3 4 5
#(3)有重复的用rep(),有规律的序列用seq(),随机数用rnorm()
rep("x",times=3)
[1] "x" "x" "x"
seq(from=3,to=21,by=3)
[1] 3 6 9 12 15 18 21
rnorm(n=3)
[1] 0.08516491 0.84425482 -0.32409770 #测试某个函数如何使用时生成数据使用
#(4)通过组合,产生更为复杂的向量。
paste0(rep("x",times=3),1:3)
[1] "x1" "x2" "x3"
> x <- c(1,3,5,1) #规范的赋值符号 Alt+减号
> x #查看已赋值变量
[1] 1 3 5 1
> x = c(1,3,5,1) #随意的赋值写法
> x
[1] 1 3 5 1
#赋值+输出一起实现
> (x <- c(1,3,5,1))
[1] 1 3 5 1
> x <- c(1,3,5,1);x
[1] 1 3 5 1
赋值名称:不要使用函数名称定义变量,不用空格斜杠引号等特殊字符
2. 简单数学计算
> x+1
[1] 2 4 6 2
> log(x)
[1] 0.000000 1.098612 1.609438 0.000000
> sqrt(x)
[1] 1.000000 1.732051 2.236068 1.000000
3. 根据某条件进行判断,生成逻辑值向量
> x>3
[1] FALSE FALSE TRUE FALSE
> x==3
[1] FALSE TRUE FALSE FALSE
>
4. 初级统计
max(x) #最大值
min(x) #最小值
mean(x) #均值
median(x) #中位数
var(x) #方差
sd(x) #标准差
sum(x) #总和
> length(x) #长度 一个向量里有多少元素
[1] 4
> unique(x) #去重复 重复:从左往右第一次出现的元素不是重复,第二次或多次出现则为重复
[1] 1 3 5
> duplicated(x) #判断对应元素是否重复
[1] FALSE FALSE FALSE TRUE
> duplicated(!x) #不重复的为TRUE,重复的为FALSE
[1] FALSE TRUE TRUE TRUE
> table(x) #重复值统计
x
1 3 5
2 1 1
> sort(x)
[1] 1 1 3 5
> sort(x,decreasing = F)
[1] 1 1 3 5
> sort(x,decreasing = T)
[1] 5 3 1 1
> seq(from=2,to=15,by=2)
[1] 2 4 6 8 10 12 14
> paste0(rep("student",times=7),seq(from=2,to=15,by=2))
[1] "student2" "student4" "student6"
[4] "student8" "student10" "student12"
[7] "student14"
> c(3,"int") #数值型转换为字符型
[1] "3" "int"
> c(2,TRUE) #逻辑型会转换为数值型,TRUE---1,FASLE---0
[1] 2 1
> c(2,5,FALSE)
[1] 2 5 0
> c("a",TRUE) #逻辑型转换为字符型
[1] "a" "TRUE"
练习题2改良版:
a=seq(from=2,to=15,by=2)
> paste0(rep("student",times=length(a),a)
[1] "student2" "student4" "student6"
[4] "student8" "student10" "student12"
[7] "student14"
R语言中函数思想:能用函数代替就不要手动去数,除非这个代码只用一次
> x=c(1,3,5,1)
> y=c(3,2,5,6)
##1.比较运算,生成等长的逻辑向量
> x==y
[1] FALSE FALSE TRUE FALSE
> y == x
[1] FALSE FALSE TRUE FALSE
##2.数学计算
> x + y #等位运算
[1] 4 5 10 7
##3.连接
> paste(x,y,sep=",") #一对一连接
[1] "1,3" "3,2" "5,5" "1,6"
> x=c("a","a","a")
> y=c("b","b","b")
###如何将两个向量连接在一起?
> paste(x+y)
Error in x + y : non-numeric argument to binary operator
> paste0(x,y)
[1] "ab" "ab" "ab"
> c(x,y)
[1] "a" "a" "a" "b" "b" "b"
paste与paste0的区别与联系
##Usage:
> ###paste(...,sep=" ",collapse=NULL,recycle0=FALSE)
> ###paste(...,collapse=NULL,recycle0=FALSE)
> paste(x,y) #两个连接元素之间可以有分隔符
[1] "1 3" "3 2" "5 5" "1 6"
> paste0(x,y) #无缝连接
[1] "13" "32" "55" "16"
> paste(x,y,sep = "")
[1] "13" "32" "55" "16"
> paste(x,y,sep = ",")
[1] "1,3" "3,2" "5,5" "1,6"
当两个向量长度不一致时
> x = c(1,3,5,6,2)
> y = c(3,2,5)
> x == y
[1] FALSE FALSE TRUE FALSE TRUE
Warning message: ##不是报错!!!
In x == y : longer object length is not a multiple of shorter object length
利用循环补齐简化代码:弃长补短
> paste0(rep("x",times=3),1:3) ##or
> paste0(rep("x",3),1:3)
[1] "x1" "x2" "x3"
> paste0("x",1:3)
[1] "x1" "x2" "x3"
> paste0("student",seq(2,15,2))
[1] "student2" "student4" "student6"
[4] "student8" "student10" "student12"
[7] "student14"
x = c(1,3,5,6,2)
y = c(3,2,5)
> intersect(x,y)
[1] 3 5 2
> union(x,y) ##取并集并去重复
[1] 1 3 5 6 2
> setdiff(x,y) ##返回在向量x里存在但在y里不存在的元素
[1] 1 6
> setdiff(y,x) ##返回在向量y里存在但在x里不存在的元素
numeric(0) ##空集
> x %in% y #x的每个元素在y中存在吗
[1] FALSE TRUE TRUE FALSE TRUE
> y %in% x #y的每个元素在x中存在吗
[1] TRUE TRUE TRUE
[ ]:将TRUE对应的值挑选出来,FALSE丢弃
> x=1:15
> x
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
[15] 15
> x==10
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[8] FALSE FALSE TRUE FALSE FALSE FALSE FALSE
[15] FALSE
> x[x==10] ##逻辑值放在中括号里面
[1] 10
> x>10
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[8] FALSE FALSE FALSE TRUE TRUE TRUE TRUE
[15] TRUE
> x[x>10]
[1] 11 12 13 14 15
> x[x %in% c(9,13)] ## %in%:x的每个元素在y中存在吗
[1] 9 13
###FALSE取子集
> x=rep(c("a","b","c"),each=3);x
[1] "a" "a" "a" "b" "b" "b" "c" "c" "c"
> duplicated(x)
[1] FALSE TRUE TRUE FALSE TRUE TRUE FALSE
[8] TRUE TRUE
> x[duplicated(x)]
[1] "a" "a" "b" "b" "c" "c"
> x[!duplicated(x)]
[1] "a" "b" "c"
###x[位置]
> x[4]
[1] 4
> x[2:4]
[1] 2 3 4
> x[1,5]
Error in x[1, 5] : incorrect number of dimensions
> x[c(1,5)] ##取两个不相连位置的元素需要写c()
[1] 1 5
> x[-4]
[1] 1 2 3 5 6 7 8 9 10 11 12 13 14 15
> x[-(2:4)]
[1] 1 5 6 7 8 9 10 11 12 13 14 15
###从向量x中筛选出属于向量y中的值
> x=9:12
> y=8:10
> x[x%in%y]
[1] 9 10
R语言中的修改都要赋值,没有赋值就没有发生过
###改一个元素
> x=9:12
> x[4] <- 40
> x
[1] 9 10 11 40
###改多个元素
> x[c(1,5)] <- c(80,20)
> x
[1] 80 10 11 40 20
> load("gands.Rdata")
> g
[1] "WNT9B" "CRAMP1L"
[3] "MYL12B" "PRSS8"
[5] "GFM2" "CRAMP1L"
[7] "TUBA4A" "SLCO1C1"
[9] "NYNRIN" "COMMD1"
[11] "COMMD1" "CCT4"
[13] "AC017081.1" "RAB7A"
[15] "CCT4" "ZDHHC16"
[17] "CASKIN2" "MYL12B"
[19] "GGT7" "SNRPE"
[21] "RGPD3" "ZNF586"
[23] "COMMD1" "GGT7"
[25] "URB1" "RAB7A"
[27] "MPP2" "AFG3L2"
[29] "URB1" "AC104581.1"
[31] "IL19" "MPP2"
[33] "SYT6" "ATP2A2"
[35] "IL19" "SNRPE"
[37] "ARHGAP1" "PRSS8"
[39] "PNMT" "ZNF461"
[41] "OR2D3" "CECR5"
[43] "SPDL1" "CLEC17A"
[45] "ZNF461" "ATG10"
[47] "ATG10" "ATG10"
[49] "ZDHHC16" "SLC25A25"
[51] "TCP10" "KRTAP4-3"
[53] "SLC30A9" "SLCO1C1"
[55] "UBAC1" "GGT7"
[57] "CASKIN2" "GSTP1"
[59] "PRY" "UBAC1"
[61] "MPP2" "NYNRIN"
[63] "INTS12" "MYL12B"
[65] "MPP2" "KCND1"
[67] "RGPD3" "RGPD3"
[69] "SLC30A9" "C10orf128"
[71] "HBD" "SLC30A9"
[73] "MYL12B" "GGT7"
[75] "HEPH" "TUBA4A"
[77] "RP5-1021I20.4" "KLHDC8A"
[79] "HBD" "HBP1"
[81] "CCT4" "MARC2"
[83] "ZNF586" "LCP1"
[85] "CECR5" "OR2D3"
[87] "CRAMP1L" "LIPE"
[89] "INTS12" "LIPE"
[91] "NETO2" "CANX"
[93] "SPDL1" "ATP6V1B2"
[95] "SLCO1C1" "MARC2"
[97] "GGT7" "LCP1"
[99] "CECR5" "HOOK2"
> length(g)
[1] 100
> g[g=seq(2,100,2)]
[1] "CRAMP1L" "PRSS8" "CRAMP1L"
[4] "SLCO1C1" "COMMD1" "CCT4"
[7] "RAB7A" "ZDHHC16" "MYL12B"
[10] "SNRPE" "ZNF586" "GGT7"
[13] "RAB7A" "AFG3L2" "AC104581.1"
[16] "MPP2" "ATP2A2" "SNRPE"
[19] "PRSS8" "ZNF461" "CECR5"
[22] "CLEC17A" "ATG10" "ATG10"
[25] "SLC25A25" "KRTAP4-3" "SLCO1C1"
[28] "GGT7" "GSTP1" "UBAC1"
[31] "NYNRIN" "MYL12B" "KCND1"
[34] "RGPD3" "C10orf128" "SLC30A9"
[37] "GGT7" "TUBA4A" "KLHDC8A"
[40] "HBP1" "MARC2" "LCP1"
[43] "OR2D3" "LIPE" "LIPE"
[46] "CANX" "ATP6V1B2" "MARC2"
[49] "LCP1" "HOOK2"
> s
[1] "LIPE" "SLC30A9"
[3] "SLC30A9" "GSTP1"
[5] "LIPE" "INTS12"
[7] "OR2D3" "GFM2"
[9] "SLCO1C1" "NYNRIN"
[11] "CECR5" "RAB7A"
[13] "FO538757.2" "COMMD1"
[15] "C10orf128" "HEPH"
[17] "HBD" "AFG3L2"
[19] "ZNF586" "SPDL1"
[21] "KLHDC8A" "AC017081.1"
[23] "SLC30A9" "CECR5"
[25] "GGT7" "COMMD1"
[27] "CASKIN2" "MPP2"
[29] "KLHDC8A" "PLEKHN1"
[31] "SLC30A9" "ATG10"
[33] "SNRPE" "FO538757.2"
[35] "GSTP1" "ARHGAP1"
[37] "AC104581.1" "KLHL17"
[39] "CLEC17A" "PERM1"
[41] "RAB7A" "GGT7"
[43] "SLC25A25" "URB1"
[45] "OR4F16" "ZNF461"
[47] "RGPD3" "CECR5"
[49] "RP5-1021I20.4" "OR4F5"
> g%in%s
[1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE
[8] TRUE TRUE TRUE TRUE FALSE TRUE TRUE
[15] FALSE FALSE TRUE FALSE TRUE TRUE TRUE
[22] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[29] TRUE TRUE FALSE TRUE FALSE FALSE FALSE
[36] TRUE TRUE FALSE FALSE TRUE TRUE TRUE
[43] TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[50] TRUE FALSE FALSE TRUE TRUE FALSE TRUE
[57] TRUE TRUE FALSE FALSE TRUE TRUE TRUE
[64] FALSE TRUE FALSE TRUE TRUE TRUE TRUE
[71] TRUE TRUE FALSE TRUE TRUE FALSE TRUE
[78] TRUE TRUE FALSE FALSE FALSE TRUE FALSE
[85] TRUE TRUE FALSE TRUE TRUE TRUE FALSE
[92] FALSE TRUE FALSE TRUE FALSE TRUE FALSE
[99] TRUE FALSE
> g[g%in%s]
[1] "GFM2" "SLCO1C1"
[3] "NYNRIN" "COMMD1"
[5] "COMMD1" "AC017081.1"
[7] "RAB7A" "CASKIN2"
[9] "GGT7" "SNRPE"
[11] "RGPD3" "ZNF586"
[13] "COMMD1" "GGT7"
[15] "URB1" "RAB7A"
[17] "MPP2" "AFG3L2"
[19] "URB1" "AC104581.1"
[21] "MPP2" "SNRPE"
[23] "ARHGAP1" "ZNF461"
[25] "OR2D3" "CECR5"
[27] "SPDL1" "CLEC17A"
[29] "ZNF461" "ATG10"
[31] "ATG10" "ATG10"
[33] "SLC25A25" "SLC30A9"
[35] "SLCO1C1" "GGT7"
[37] "CASKIN2" "GSTP1"
[39] "MPP2" "NYNRIN"
[41] "INTS12" "MPP2"
[43] "RGPD3" "RGPD3"
[45] "SLC30A9" "C10orf128"
[47] "HBD" "SLC30A9"
[49] "GGT7" "HEPH"
[51] "RP5-1021I20.4" "KLHDC8A"
[53] "HBD" "ZNF586"
[55] "CECR5" "OR2D3"
[57] "LIPE" "INTS12"
[59] "LIPE" "SPDL1"
[61] "SLCO1C1" "GGT7"
[63] "CECR5"
> length(g[g%in%s])
[1] 63
> x=rnorm(10,0,18)
> x
[1] 5.2828494 3.0746745 0.8475577
[4] -11.3365565 26.5630396 22.5384872
[7] 22.8419036 6.9907060 -23.1082285
[10] -3.8680376
> x[x<(-2)]
[1] -11.336557 -23.108229 -3.868038
> k1 = rnorm(12);k1
[1] 0.8622467 0.4796227 1.5102533 0.6260323
[5] 2.7191662 0.4572955 -1.2713989 0.3595864
[9] -0.1002634 -1.5966247 2.2007094 -2.5580408
> plot(k1)
> k2 = rep(c("a","b","c","d"),each = 3);k2
[1] "a" "a" "a" "b" "b" "b" "c" "c" "c" "d" "d"
[12] "d"
> boxplot(k1~k2) ##箱线图
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。