我想一次又一次地重复给定的序列,在R中填充大约5000行。
时间数据集:
8.00.00 AM
9.00.00 AM
10.00.00 AM
11.00.00 AM
12.00.00 PM
1.00.00 PM
2.00.00 PM
3.00.00 PM
4.00.00 PM
5.00.00 PM
6.00.00 PM
7.00.00 PM
8.00.00 PM
9.00.00 PM发布于 2019-07-27 19:59:44
可能有几个原因可以让你看到空白。我将集中讨论两个可能性:NAs和文字空白。
srcvec <- c("8.00.00 AM", "9.00.00 AM", "10.00.00 AM", "11.00.00 AM", "12.00.00 PM",
"1.00.00 PM", "2.00.00 PM", "3.00.00 PM", "4.00.00 PM", "5.00.00 PM",
"6.00.00 PM", "7.00.00 PM", "8.00.00 PM", "9.00.00 PM", NA, ""
)
rep(srcvec, len=30)
# [1] "8.00.00 AM" "9.00.00 AM" "10.00.00 AM" "11.00.00 AM" "12.00.00 PM" "1.00.00 PM"
# [7] "2.00.00 PM" "3.00.00 PM" "4.00.00 PM" "5.00.00 PM" "6.00.00 PM" "7.00.00 PM"
# [13] "8.00.00 PM" "9.00.00 PM" NA "" "8.00.00 AM" "9.00.00 AM"
# [19] "10.00.00 AM" "11.00.00 AM" "12.00.00 PM" "1.00.00 PM" "2.00.00 PM" "3.00.00 PM"
# [25] "4.00.00 PM" "5.00.00 PM" "6.00.00 PM" "7.00.00 PM" "8.00.00 PM" "9.00.00 PM" 要删除NA,我们可以简单地使用na.omit
rep(na.omit(srcvec), len=30)
# [1] "8.00.00 AM" "9.00.00 AM" "10.00.00 AM" "11.00.00 AM" "12.00.00 PM" "1.00.00 PM"
# [7] "2.00.00 PM" "3.00.00 PM" "4.00.00 PM" "5.00.00 PM" "6.00.00 PM" "7.00.00 PM"
# [13] "8.00.00 PM" "9.00.00 PM" "" "8.00.00 AM" "9.00.00 AM" "10.00.00 AM"
# [19] "11.00.00 AM" "12.00.00 PM" "1.00.00 PM" "2.00.00 PM" "3.00.00 PM" "4.00.00 PM"
# [25] "5.00.00 PM" "6.00.00 PM" "7.00.00 PM" "8.00.00 PM" "9.00.00 PM" "" 为了删除空白,我们可以在nzchar上进行筛选,当字符串继续使用一个或多个字符时,它返回true:
rep(Filter(nzchar, na.omit(srcvec)), len=30)
# [1] "8.00.00 AM" "9.00.00 AM" "10.00.00 AM" "11.00.00 AM" "12.00.00 PM" "1.00.00 PM"
# [7] "2.00.00 PM" "3.00.00 PM" "4.00.00 PM" "5.00.00 PM" "6.00.00 PM" "7.00.00 PM"
# [13] "8.00.00 PM" "9.00.00 PM" "8.00.00 AM" "9.00.00 AM" "10.00.00 AM" "11.00.00 AM"
# [19] "12.00.00 PM" "1.00.00 PM" "2.00.00 PM" "3.00.00 PM" "4.00.00 PM" "5.00.00 PM"
# [25] "6.00.00 PM" "7.00.00 PM" "8.00.00 PM" "9.00.00 PM" "8.00.00 AM" "9.00.00 AM" 如果您有非空空格(例如,空格),则可以使用以下内容:
srcvec <- c(srcvec, " ")
rep(Filter(function(a) !is.na(a) & nzchar(gsub("\\s", "", a)), srcvec), len=30)
# [1] "8.00.00 AM" "9.00.00 AM" "10.00.00 AM" "11.00.00 AM" "12.00.00 PM" "1.00.00 PM"
# [7] "2.00.00 PM" "3.00.00 PM" "4.00.00 PM" "5.00.00 PM" "6.00.00 PM" "7.00.00 PM"
# [13] "8.00.00 PM" "9.00.00 PM" "8.00.00 AM" "9.00.00 AM" "10.00.00 AM" "11.00.00 AM"
# [19] "12.00.00 PM" "1.00.00 PM" "2.00.00 PM" "3.00.00 PM" "4.00.00 PM" "5.00.00 PM"
# [25] "6.00.00 PM" "7.00.00 PM" "8.00.00 PM" "9.00.00 PM" "8.00.00 AM" "9.00.00 AM" 发布于 2019-07-27 09:09:12
Time = paste(c(8:12,1:9),".00.00",sep = "")
PM_AM <- paste(Time,rep(c("AM","PM"),c(4,10)))
n_rep <- 5000 %/% length(PM_AM)
n_remain <- 5000 %% length(PM_AM)
x <- rep(PM_AM,n_rep)
x <- c(x,PM_AM[1:n_remain])发布于 2019-07-27 11:38:27
您可以使用github yikeshu0611中的onetree包来处理问题。
devtools::install_github("yikeshu0611/onetree")
library(onetree)
df=read_Text("
time PAM
8.00.00 AM
9.00.00 AM
10.00.00 AM
11.00.00 AM
12.00.00 PM
1.00.00 PM
2.00.00 PM
3.00.00 PM
4.00.00 PM
5.00.00 PM
6.00.00 PM
7.00.00 PM
8.00.00 PM
9.00.00 PM
")
nrow(df)
358 ≈ 5000/14df是您的原始数据。然后我们将重复数358添加到df中。
df$n=358最后,我们使用flat_strech对中的数据进行了n。
df2=flat_strech(data=df,strech="n")
nrow(df2)
5012https://stackoverflow.com/questions/57230593
复制相似问题