我有一个包含以下变量的数据帧:
date=c("30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018")
hour=c(1,1,1,1,2,2,2,2)
location=c(North,South,East,West,North,South,East,West)
North=c(10,30,40,50,0,40,31,11)
South=c(20,10,20,0,0,0,5,0)
East=c(0,10,10,5,0,0,5,0)
West=c(5,30,40,50,0,40,31,11)
df <- data.frame(date, hour, location, North, South, East, West)
location
表示观察到的位置。列North
、South
、East
和West
显示了这些地区的平均降雨量。首先,我需要创建一个列Actual
来描述location
中的降雨。例如,对于第1行,Actual
的值将为10,因为它位于北方。接下来,我需要创建另外两个列High1
和High2
。前者代表该小时内其余3个地区的最高降雨量,而后者代表其余3个地区的次高降雨量。例如,对于行1,High1
和High2
的值分别为20和5,因为在该小时内,南部和西部具有最高和第二高的记录值。
是否有推荐的命令可以用于此设置?谢谢。
发布于 2020-03-29 01:22:54
试试这个(我相信有很多更简单的解决方案):
library(tidyverse)
df %>%
left_join(
df %>%
mutate(n = row_number()) %>%
pivot_longer(North:West, names_to = "loc", values_to = "rainfall") %>%
group_by(n) %>%
mutate(actual = max(ifelse(location==loc,rainfall,0))) %>%
filter(location != loc) %>%
arrange(-rainfall) %>%
mutate(high = paste0("High", row_number())) %>%
filter(row_number() < 3) %>%
select(-loc, -n) %>%
pivot_wider(names_from = high, values_from = rainfall) %>%
ungroup() %>%
arrange(n) %>% select(-n) )
# date hour location North South East West actual High1 High2
# 1 30/03/2018 1 North 10 20 0 5 10 20 5
# 2 30/03/2018 1 South 30 10 10 30 10 30 30
# 3 30/03/2018 1 East 40 20 10 40 10 40 40
# 4 30/03/2018 1 West 50 0 5 50 50 50 5
# 5 30/03/2018 2 North 0 0 0 0 0 0 0
# 6 30/03/2018 2 South 40 0 0 40 0 40 40
# 7 30/03/2018 2 East 31 5 5 31 5 31 31
# 8 30/03/2018 2 West 11 0 0 11 11 11 0
Data
date <- c("30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018")
hour <- c(1,1,1,1,2,2,2,2)
location <- c("North","South","East","West","North","South","East","West")
North <- c(10,30,40,50,0,40,31,11)
South <- c(20,10,20,0,0,0,5,0)
East <- c(0,10,10,5,0,0,5,0)
West <- c(5,30,40,50,0,40,31,11)
df <- data.frame(date, hour, location, North, South, East, West)
发布于 2020-03-29 10:17:38
您可以使用以下代码,该代码使用Rfast包中的nth
函数来获取其余地区的第二高降雨量。
library(Rfast)
NSEW <- grep("North|South|East|West", names(df))
i.col <- sapply(df$location, function(x) grep(x, names(df)))
df$Actual <- as.numeric(df[cbind(1:nrow(df), i.col)])
df[cbind(1:nrow(df), i.col)] <- 0
df$High1 <- apply(df[,NSEW], 1, max) # *
df$High2 <- apply(df[,NSEW], 1, Rfast::nth, k=2, descending=TRUE) # *
df[cbind(1:nrow(df), i.col)] <- df$Actual
df
date hour location North South East West Actual High1 High2
1 30/03/2018 1 North 10 20 0 5 10 20 5
2 30/03/2018 1 South 30 10 10 30 10 30 30
3 30/03/2018 1 East 40 20 10 40 10 40 40
4 30/03/2018 1 West 50 0 5 50 50 50 5
5 30/03/2018 2 North 0 0 0 0 0 0 0
6 30/03/2018 2 South 40 0 0 40 0 40 40
7 30/03/2018 2 East 31 5 5 31 5 31 31
8 30/03/2018 2 West 11 0 0 11 11 11 0
*也可以使用rownth
,它返回每行的第n个最小值:
df$High1 <- Rfast::rownth(as.matrix(df[,NSEW]), elems=rep(1, nrow(df)), descending=TRUE)
df$High2 <- Rfast::rownth(as.matrix(df[,NSEW]), elems=rep(2, nrow(df)), descending=TRUE)
发布于 2020-03-29 01:40:45
如果你不想使用tidyverse,这里有一个替代方案:
date <- c("30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018","30/03/2018")
hour <- c(1,1,1,1,2,2,2,2)
loc <- c("North","South","East","West","North","South","East","West")
North <- c(10,30,40,50,0,40,31,11)
South <- c(20,10,20,0,0,0,5,0)
East <- c(0,10,10,5,0,0,5,0)
West <- c(5,30,40,50,0,40,31,11)
location <- list(North, South, East, West, North, South, East, West)
actual <- numeric(length(location))
High1 <- numeric(length(location))
High2 <- numeric(length(location))
for (i in 1:length(location)){
actual[i] <- (location[[i]][i])
location[[i]][i] <- 0
High1[i] <- max(sapply(location , `[` , i))
}
short_location <- location[c(1:4)]
for (i in 1:length(location)){
n <- length(short_location)
High2[i] <- sort(sapply(short_location , `[` , i), partial=n-1)[n-1]
}
df <- data.frame(date, hour, loc, North, South, East, West, actual, High1, High2)
https://stackoverflow.com/questions/60901725
复制相似问题