read.table(file, header = FALSE, sep = "", quote = "\"'",
dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
row.names, col.names, as.is = !stringsAsFactors,
na.strings = "NA", colClasses = NA, nrows = -1,
skip = 0, check.names = TRUE, fill = !blank.lines.skip,
strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "#",
allowEscapes = FALSE, flush = FALSE,
stringsAsFactors = default.stringsAsFactors(),
fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
setwd("E:/Lang/R/finally_book/test3/")
rm(list = ls())
## 基本参数
dataset1 <- read.table("./women1.txt", header = T, sep = "\t")
head(dataset1)
## name height weight tmp
## 1 stu1 58 115 1.1
## 2 stu2 59 117 1.2
## 3 stu3 60 120 1.3
## 4 stu4 61 123 1.4
## 5 stu5 62 126 1.5
## 6 stu6 63 129 1.6
dataset1$name
## [1] stu1 stu2 stu3 stu4 stu5 stu6 stu7 stu8 stu9 stu10 stu11
## [12] stu12 stu13 stu14 stu15
## 15 Levels: stu1 stu10 stu11 stu12 stu13 stu14 stu15 stu2 stu3 ... stu9
class(dataset1$name)
## [1] "factor"
is.factor(dataset1$name)
## [1] TRUE
dataset1 <- read.table("./women1.txt", header = T, sep = "\t", as.is = T)
head(dataset1)
## name height weight tmp
## 1 stu1 58 115 1.1
## 2 stu2 59 117 1.2
## 3 stu3 60 120 1.3
## 4 stu4 61 123 1.4
## 5 stu5 62 126 1.5
## 6 stu6 63 129 1.6
dataset1$name
## [1] "stu1" "stu2" "stu3" "stu4" "stu5" "stu6" "stu7" "stu8"
## [9] "stu9" "stu10" "stu11" "stu12" "stu13" "stu14" "stu15"
class(dataset1$name)
## [1] "character"
is.factor(dataset1$name)
## [1] FALSE
## skip = 0 跳过文件的前n行(skip = n)
dataset2 <- read.table("./women1.txt", header = T, sep = "\t", skip = 3)
head(dataset2)
## stu3 X60 X120 X1.3
## 1 stu4 61 123 1.4
## 2 stu5 62 126 1.5
## 3 stu6 63 129 1.6
## 4 stu7 64 132 1.7
## 5 stu8 65 135 1.8
## 6 stu9 66 139 1.9
dataset2 <- read.table("./women1.txt", header = F, sep = "\t", skip = 3)
head(dataset2)
## V1 V2 V3 V4
## 1 stu3 60 120 1.3
## 2 stu4 61 123 1.4
## 3 stu5 62 126 1.5
## 4 stu6 63 129 1.6
## 5 stu7 64 132 1.7
## 6 stu8 65 135 1.8
## nrows = -1 最大读入行数,“-1”表示都读入
dataset3 <- read.table("./women1.txt", header = T, sep = "\t", nrows = 3)
head(dataset3)
## name height weight tmp
## 1 stu1 58 115 1.1
## 2 stu2 59 117 1.2
## 3 stu3 60 120 1.3
dataset3 <- read.table("./women1.txt", header = F, sep = "\t", nrows = 3)
head(dataset3)
## V1 V2 V3 V4
## 1 name height weight tmp
## 2 stu1 58 115 1.1
## 3 stu2 59 117 1.2
## 指定行名
dataset4 <- read.table("./women1.txt", header = T, sep = "\t", row.names = 1) # **表中第一行一列元素被跳过**
head(dataset4)
## height weight tmp
## stu1 58 115 1.1
## stu2 59 117 1.2
## stu3 60 120 1.3
## stu4 61 123 1.4
## stu5 62 126 1.5
## stu6 63 129 1.6
row.names(dataset4)
## [1] "stu1" "stu2" "stu3" "stu4" "stu5" "stu6" "stu7" "stu8"
## [9] "stu9" "stu10" "stu11" "stu12" "stu13" "stu14" "stu15"
## dec = “.” 指定小数点数;na.strings = “NA” 指定什么样的字符表示值缺少;comment.char 只能设定一个
data1 <- read.table("./women2.txt", header = T, dec = "*", na.strings = c("", "NA", "NO"), comment.char = "\\")
head(data1)
## name height weight tmp
## 1 /stu1/ 58 115 1.1
## 2 /stu2/ 59 117 1.2
## 3 /stu3/ 60 NA 1.3
## 4 /stu4/ 61 123 1.4
## 5 /stu5/ 62 NA 1.5
## 6 /stu6/ NA NA 1.6
sapply(data1[1:6,], is.na)
## name height weight tmp
## [1,] FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE TRUE FALSE
## [4,] FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE TRUE FALSE
## [6,] FALSE TRUE TRUE FALSE
sapply(data1, class)
## name height weight tmp
## "factor" "integer" "integer" "numeric"
# quote的设定
data1 <- read.table("./women2.txt", header = T, dec = "*", na.strings = c("", "NA", "NO"), comment.char = "\\", quote = "/", as.is = F)
head(data1)
## name height weight tmp
## 1 stu1 58 115 1.1
## 2 stu2 59 117 1.2
## 3 stu3 60 "" 1.3
## 4 stu4 61 123 1.4
## 5 stu5 62 <NA> 1.5
## 6 stu6 NA <NA> 1.6
sapply(data1, class)
## name height weight tmp
## "factor" "integer" "factor" "numeric"
test1 <- c(1:5, "6,7", "8,9,10")
tf <- tempfile() # 生成一个临时文件
tf
## [1] "C:\\Users\\Administrator\\AppData\\Local\\Temp\\RtmpeGFHVW\\file1aa8786c53fe"
writeLines(test1, tf) # write
read.csv(tf)
## X1
## 1 2
## 2 3
## 3 4
## 4 5
## 5 6
## 6 7
## 7 8
## 8 9
## 9 10
read.csv(tf, fill = T)
## X1
## 1 2
## 2 3
## 3 4
## 4 5
## 5 6
## 6 7
## 7 8
## 8 9
## 9 10
t( count.fields(tf, sep = ",") )
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1 1 1 1 1 2 3
ncol <- max(count.fields(tf, sep = ","))
ncol
## [1] 3
seq_len(ncol)
## [1] 1 2 3
paste("V", seq_len(ncol))
## [1] "V 1" "V 2" "V 3"
paste0("V", seq_len(ncol))
## [1] "V1" "V2" "V3"
read.csv(tf, fill = TRUE, header = FALSE,
col.names = paste0("V", seq_len(ncol)))
## V1 V2 V3
## 1 1 NA NA
## 2 2 NA NA
## 3 3 NA NA
## 4 4 NA NA
## 5 5 NA NA
## 6 6 7 NA
## 7 8 9 10
unlink(tf)
## "Inline" data set, using text=
## Notice that leading and trailing empty lines are auto-trimmed
read.table(header = TRUE, text = "
a b
1 2
3 4
")
## a b
## 1 1 2
## 2 3 4
read.table(header = TRUE, text = "
ab
1 2
3 4
")
## ab
## 1 2
## 3 4
x <- data.frame(a = I("a \" quote"), b = pi)
x
## a b
## 1 a " quote 3.141593
write.table(x, file = "foo.csv", sep = ",", col.names = NA,
qmethod = "double")
tmp <- read.table("foo.csv", header = TRUE, sep = ",", row.names = 1);tmp
## a b
## 1 a " quote 3.141593
row.names(tmp)
## [1] "1"
write.csv(x, file = "foo.csv")
Error in file(file, ifelse(append, "a", "w")) :
cannot open the connection
In addition: Warning message:
In file(file, ifelse(append, "a", "w")) :
cannot open file 'foo.csv': Permission denied
注意,产生这个错误信息原因是文件被外部打开
write.csv(x, file = "foo.csv", row.names = FALSE)
read.csv("foo.csv")
## a b
## 1 a " quote 3.141593
write.csv(x, file = "foo.csv", row.names = TRUE)
read.csv("foo.csv")
## X a b
## 1 1 a " quote 3.141593
## To write a file in MacRoman for simple use in Mac Excel 2004/8
write.csv(x, file = "foo.csv", fileEncoding = "macroman")
## or for Windows Excel 2007/10
write.csv(x, file = "foo.csv", fileEncoding = "UTF-16LE")
读入固定分隔长度的数据:
read.fwf(file, widths, header = FALSE, sep = "\t",
skip = 0, row.names, col.names, n = -1,
buffersize = 2000, fileEncoding = "", ...)
fwf.txt
ABC123%$12
TEX124@#12
y o14 @@#
demo_3 <- read.fwf("./fwf.txt", widths = c(3,3), header = F, col.names = c("name", "score"))
demo_3
## name score
## 1 ABC 123
## 2 TEX 124
## 3 y o 14
ff <- tempfile()
cat(file = ff, "123456", "987654", sep = "\n") # 123456\n987654
read.fwf(ff, widths = c(1,2,3))
## V1 V2 V3
## 1 1 23 456
## 2 9 87 654
read.fwf(ff, widths = c(1,0,3)) # 0表示不读入,为空NA
## V1 V2 V3
## 1 1 NA 234
## 2 9 NA 876
read.fwf(ff, widths = c(1,-1,3)) # 负数表示省略
## V1 V2
## 1 1 345
## 2 9 765
read.fwf(ff, widths = c(1,-2,3))
## V1 V2
## 1 1 456
## 2 9 654
unlink(ff)
cat(file = ff, "123", "987654", sep = "\n")
read.fwf(ff, widths = c(1,0, 2,3)) # 当一行读完了之后,没有的置为NA
## V1 V2 V3 V4
## 1 1 NA 23 NA
## 2 9 NA 87 654
unlink(ff)
cat(file = ff, "123456", "987654", sep = "\n")
tmp <- read.fwf(ff, widths = list(c(1,0, 2,3), c(2,2,2))) # 利用列表为每行指定长度
tmp
## V1 V2 V3 V4 V5 V6 V7
## 1 1 NA 23 456 98 76 54
class(tmp)
## [1] "data.frame"
dim(tmp)
## [1] 1 7
unlink(ff)
readline(prompt = "")
1,用于程序的交互,根据输入的条件来判断后续执行的方向;
2,通过键盘读入一行数据;
Demo_2 <- function()
{
input <- readline("DO you think R is hard to learn,Please give your choice:Y or N ")
if (input == "Y")
cat("Come on; Spent more time.\n")
else
cat("Good!")
}
Demo_2()
## DO you think R is hard to learn,Please give your choice:Y or N
## Good!
readLines(con = stdin(), n = -1L, ok = TRUE, warn = TRUE,
encoding = "unknown", skipNul = FALSE)
1,控制读入的数据行数,非批处理,有点类似数据库中的指标操作,可对文件中的数据逐行操作。
2,例如关于通过读入数据的每行来判断是否有需要的数据,有再对数据进行处理;提示:该数据配合R中的正则表达式相关函数,对于处理不规则的数据很强大。
readLines("./women1.txt", n = 1)
## [1] "name\theight\tweight\ttmp"
readLines("./women1.txt", n = 1)
## [1] "name\theight\tweight\ttmp"
con <- file("./women1.txt","r")
nfields <- count.fields(con, sep = "\t") # 这一句把文件读了一遍,把指针指到了最后
readLines(con, n = 1)
## character(0)
seek(con, 0, rw = "r") # con <- file("./women1.txt","r")
## [1] 283
i <- 1
repeat{
myline <- readLines(con, n = 1);
if(length(myline) == 0) {break;}
cat(i, "->", myline, "\n", sep = "")
i = i + 1
}
## 1->name height weight tmp
## 2->stu1 58 115 1.1
## 3->stu2 59 117 1.2
## 4->stu3 60 120 1.3
## 5->stu4 61 123 1.4
## 6->stu5 62 126 1.5
## 7->stu6 63 129 1.6
## 8->stu7 64 132 1.7
## 9->stu8 65 135 1.8
## 10->stu9 66 139 1.9
## 11->stu10 67 142 2
## 12->stu11 68 146 2.1
## 13->stu12 69 150 2.2
## 14->stu13 70 154 2.3
## 15->stu14 71 159 2.4
## 16->stu15 72 164 2.5
close(con)
perl中的写法会出错
while(myline = readLines(con, n = 1)){
myline
} # Error: unexpected '=' in "while(myline ="
cat("TITLE extra line", "2 3 5 7", "", "11 13 17", file = "ex.data",
sep = "\n")
readLines("ex.data", n = -1)
## [1] "TITLE extra line" "2 3 5 7" ""
## [4] "11 13 17"
unlink("ex.data") # delete
## difference in blocking
cat("123\nabc", file = "test1") # 最后一行没有换行符
readLines("test1") # line with a warning
## Warning in readLines("test1"): incomplete final line found on 'test1'
## [1] "123" "abc"
con <- file("test1", "r", blocking = FALSE) # 最后没有换行符的行不读
readLines(con) # empty
## [1] "123"
cat(" def\n", file = "test1", append = TRUE)
readLines(con) # gets both
## [1] "abc def"
close(con)
1.1.5函数scan()
该函数从键盘或文件中读取数据,并存入向量或列表中。
scan(file, what)
scan(file = "", what = double(), nmax = -1, n = -1, sep = "",
quote = if(identical(sep, "\n")) "" else "'\"", dec = ".",
skip = 0, nlines = 0, na.strings = "NA",
flush = FALSE, fill = FALSE, strip.white = FALSE,
quiet = FALSE, blank.lines.skip = TRUE, multi.line = TRUE,
comment.char = "", allowEscapes = FALSE,
fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
如果SCAN()读入有字符与数字,用what =“”来进行声明,直接把读入的数字隐式的都转换成字符;
扫描注意:
# 从键盘中输入数字(单独回车换行结束输入,与Perl语言不一样)
d<-scan("") # error scan(" ")
d
## numeric(0)
> d<-scan("") # error scan(" ")
1: r
1: r
Error in scan("") : scan() expected 'a real', got 'r'
# 从键盘中输入字符
d<-scan("",what="")
d
## character(0)
fwf.txt
ABC123%$12
TEX124@#12
y o14 @@#
# 从外部读入
d <- scan("./fwf.txt")
Error in scan("./fwf.txt") : scan() expected 'a real', got 'ABC123%$12'
# fwf2.txt
# 1 2 3
# 1 2 3
d <- scan("./fwf2.txt")
d
## [1] 1 2 3 1 2 3
length(d)
## [1] 6
cat("TITLE extra line", "2 3 5 7", "11 13 17", file = "ex.data", sep = "\n")
# ex.data
# TITLE extra line
# 2 3 5 7
# 11 13 17
scan("ex.data", skip = 1, quiet = F) # 等价于scan("ex.data", skip = 1)
## [1] 2 3 5 7 11 13 17
scan("ex.data", skip = 1, quiet = T)
## [1] 2 3 5 7 11 13 17
scan("ex.data", skip = 1, nlines = 1) # only 1 line after the skipped one
## [1] 2 3 5 7
scan("ex.data", what = list("","","")) # flush is F -> read "7"
## Warning in scan("ex.data", what = list("", "", "")): number of items read
## is not a multiple of the number of columns
## [[1]]
## [1] "TITLE" "2" "7" "17"
##
## [[2]]
## [1] "extra" "3" "11" ""
##
## [[3]]
## [1] "line" "5" "13" ""
scan("ex.data", what = list("","",""), flush = TRUE)
## [[1]]
## [1] "TITLE" "2" "11"
##
## [[2]]
## [1] "extra" "3" "13"
##
## [[3]]
## [1] "line" "5" "17"
unlink("ex.data")
## "inline" usage
scan(text = "1 2 3")
## [1] 1 2 3
R本身提供超过50个数据集,同时在功能包(包括标准功能包)中附带更多的数据集。与S-Plus不同,这些数据即必须通过数据函数加载。
data(package="nls") #查看nls中数据集
data(Puromycin, package="nls") #读取nls中Puromycin数据集。
在使用一个数据帧或矩阵时,编辑提供一个独立的工作表式编辑环境。
xold <- NULL
xnew <- edit(xold) #对数据集xold进行编辑。并在完成时将改动后的对象赋值给xnew(只能输入一列)
xnew <- edit(data.frame()) #可以通过工作表界面录入新数据。
xnew
## var1 var2
## 1 1 A
## 2 2 B
## 3 3 C
## 4 NA D
fix(xnew) # 打开数据框界面,修改已有的对象
cat("file A\n", file = "A") # 创建一个文件A,文件内容是'file A','\n'表示换行,这是一个很好的习惯
cat("file B\n", file="B") #创建一个文件B
file.append("A", "B") # 将文件B的内容附到A内容的后面,注意没有空行
## [1] TRUE
file.create("A") # 创建一个文件A, 注意会覆盖原来的文件
## [1] TRUE
file.append("A", rep("B", 10)) # 将文件B的内容复制10遍,并先后附到文件A内容后
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
read.table("./A")
## V1 V2
## 1 file B
## 2 file B
## 3 file B
## 4 file B
## 5 file B
## 6 file B
## 7 file B
## 8 file B
## 9 file B
## 10 file B
file.show("A") #新开工作窗口显示文件A的内容
file.copy("A", "C") # 复制文件A保存为C文件,同一个文件夹
## [1] TRUE
dir.create("tmp") # 创建名为tmp的文件夹
file.copy(c("A", "B"), "tmp") #将文件夹拷贝到tmp文件夹中
## [1] TRUE TRUE
list.files("tmp") # 查看文件夹tmp中的文件名
## [1] "A" "B"
unlink("tmp", recursive = F) # 如果文件夹tmp为空,删除文件夹tmp
list.dirs() # 上面的命令没有删除目录
## [1] "." "./tmp"
unlink("tmp", recursive = TRUE) # 删除文件夹tmp,如果其中有文件一并删除
list.dirs() # 上面的命令删除目录及文件
## [1] "."
file.remove("A", "B", "C") # 移除三个文件
## [1] TRUE TRUE TRUE
zz <- file("ex.data", "w") # open an output file connection. And the file will be create if not exist
cat("TITLE extra line", "2 3 5 7", "", "11 13 17", file = zz, sep = "\n")
cat("One more line\n", file = zz)
close(zz)
readLines("ex.data")
## [1] "TITLE extra line" "2 3 5 7" ""
## [4] "11 13 17" "One more line"
unlink("ex.data")
zz <- gzfile("ex.gz", "w") # compressed file
cat("TITLE extra line", "2 3 5 7", "", "11 13 17", file = zz, sep = "\n")
close(zz)
readLines(zz <- gzfile("ex.gz"))
## [1] "TITLE extra line" "2 3 5 7" ""
## [4] "11 13 17"
close(zz)
unlink("ex.gz")
zz <- bzfile("ex.bz2", "w") # bzip2-ed file
cat("TITLE extra line", "2 3 5 7", "", "11 13 17", file = zz, sep = "\n")
close(zz)
readLines(zz <- bzfile("ex.bz2"))
## [1] "TITLE extra line" "2 3 5 7" ""
## [4] "11 13 17"
unlink("ex.bz2")
Tfile <- file("test1", "w+")
c(isOpen(Tfile, "r"), isOpen(Tfile, "w"))
## [1] TRUE TRUE
cat("abc\ndef\n", file=Tfile)
readLines(Tfile)
## [1] "abc" "def"
seek(Tfile, 0, rw="r") # reset to beginning
## [1] 10
readLines(Tfile)
## [1] "abc" "def"
cat("ghi\n", file=Tfile)
readLines(Tfile)
## [1] "ghi"
close(Tfile)
unlink("test1")
## We can do the same thing with an anonymous file.(匿名文件操作,不许通过unlink()释放文件)
Tfile <- file()
cat("abc\ndef\n", file=Tfile)
readLines(Tfile)
## [1] "abc" "def"
close(Tfile)
Tfile <- tempfile()
Tfile
## [1] "C:\\Users\\Administrator\\AppData\\Local\\Temp\\RtmpeGFHVW\\file1aa821bc38ed"
unlink(Tfile)
这段代码运行时死机
if(capabilities("fifo")) {
zz <- fifo("foo-fifo", "w+")
writeLines("abc", zz)
print(readLines(zz))
close(zz)
unlink("foo-fifo")
}
con1 <- socketConnection(port = 6011, server=TRUE)
repeat{
message <- readline("huang:")
writeLines(message, con1)
if(message == "bye"){break}
}
close(con1)
# R process 2
con2 <- socketConnection(Sys.info()["nodename"], port = 6011)
# as non-blocking, may need to loop for input
repeat{
message <-readLines(con2)
if(length(message) ==0){
Sys.sleep(1);
next;
}
cat("message from huang:", message, "\n", sep = "")
if(message == "bye"){break}
}
close(con2)
2.3 excel文件的读取
library(RODBC)
excel_file <- odbcConnectExcel("./tmp.xls")
sheet_data <- sqlFetch ( excel_file ,"Sheet1");sheet_data
## F1 F2 F3
## 1 1 2 3
## 2 1 2 3
## 3 1 2 3
## 4 1 2 3
## 5 1 2 3
## 6 1 2 3
## 7 1 2 3
## 8 1 2 3
class(sheet_data)
## [1] "data.frame"
close ( excel_file )
方式1:使用read.fwf函数:该方法较慢(相对于read.table,但是可以处理复杂的数据)
方法2:使用read.table速度比方方1快,但是需要读入的原始数据格式有一定的要求
出现错误:
【载入需要的程辑包:RODBC
Failed with error: ‘程辑包‘RODBC’是在R版本3.0.0之前建的:你得重新安装
或者
Error: package ‘RODBC’ was built before R 3.0.0: please re-install it】
因为这些RODBC包相对于R平台而言版本适当,需要通过平台更新后包才可以应用。对于R3.1.0版本来说用RODBC_1.3-10.zip就可以。
https://rstudio-pubs-static.s3.amazonaws.com/188561_f365a21d9ac041f99dc92c6d9e20cfeb.html