假设这是我的数据集
library(gtools)
library(dplyr)
df <- data.frame(grp=c(0.5,0.6,1,2,2,2,4.5,10,22,"kids","Parents","Teachers"),
f1= c(1,0,3,2,4,0,3,0,1,6,8,4),
f2= c(1,0,3,1,4,0,1,0,1,5,8,4),
f3= c(1,0,3,2,4,6,1,2,1,6,8,4))
df
grp f1 f2 f3
1 0.5 1 1 1
2 0.6 0 0 0
3 1 3 3 3
4 2 2 1 2
5 2 4 4 4
6 2 0 0 6
7 4.5 3 1 1
8 10 0 0 2
9 22 1 1 1
10 kids 6 5 6
11 Parents 8 8 8
12 Teachers 4 4 4
这是我想要的输出
df_final
grp f1 f2 f3
1 <=1 4 4 4
2 2-9 9 6 13
3 10-19 0 0 2
4 >20 1 1 1
5 kids 6 5 6
6 Parents 8 8 8
7 Teachers 4 4 4
这就是我所做的+评论我的问题:
############ how NOT to splot set into two subsets of data
df_1 <- df %>%
filter(grepl('kids|Parents|Teachers', grp))
df_1
grp f1 f2 f3
1 kids 6 5 6
2 Parents 8 8 8
3 Teachers 4 4 4
df_2 <- df %>%
filter(!grepl('kids|Parents|Teachers', grp)) %>%
mutate(across(.cols = grp, .fns = as.numeric)) %>%
mutate(grp= cut(grp, breaks=c(-999,2,10,21,999) , labels=c("<=1", "2-9","10-19",">20"), right=F))
df_2
grp f1 f2 f3
1 <=1 1 1 1
2 <=1 0 0 0
3 <=1 3 3 3
4 2-9 2 1 2
5 2-9 4 4 4
6 2-9 0 0 6
7 2-9 3 1 1
8 10-19 0 0 2
9 >20 1 1 1
### how to pipe both aggregate and mixedorder/sort instead of separate lined of codes
df_2 <- aggregate(.~grp, data = df_2, FUN=sum)
df2[mixedorder(df2$grp, decreasing = T),]
df_2
grp f1 f2 f3
1 <=1 4 4 4
2 2-9 9 6 13
3 10-19 0 0 2
4 >20 1 1 1
### how to make sure 10-19 does not come before 2-9 in case of actual dataset
grp a b d
1 <=1 53 48 53
2 10-15 65 63 65
3 2-9 30 40 30
df_final <- rbind(df_2, df_1)
df_final
grp f1 f2 f3
1 <=1 4 4 4
2 2-9 9 6 13
3 10-19 0 0 2
4 >20 1 1 1
5 kids 6 5 6
6 Parents 8 8 8
7 Teachers 4 4 4
在dplyr中,是否有任何简单的方法可以通过管道命令从原始df到df_final?
如何不将集划分为两个数据子集?
如何输送聚合和混合顺序/排序而不是单独排列的代码?
在实际数据集中的情况下,如何确保10-19不会出现在2-9之前?
发布于 2022-05-02 15:43:17
这里有一个选项--创建第二列('grp2'),其中只包含numeric
元素上的cut
值,然后用原始列创建coalesce
,同时附加levels
,然后用across
执行group_by
summarise
。这样,我们就不必使用mixedsort
了,因为cut
已经对分组进行了排序
library(dplyr)
library(stringr)
df %>%
mutate(grp2 = case_when(str_detect(grp, '^[0-9.]+$')
~ cut(as.numeric(grp), breaks=c(-999,2,10,21,999) ,
labels=c("<=1", "2-9","10-19",">20"), right=FALSE))) %>%
mutate(grp =factor(coalesce(grp2, grp),
levels = c(levels(grp2), unique(grp[is.na(grp2)]))), .keep = "unused") %>%
group_by(grp) %>%
summarise(across(everything(), sum, na.rm = TRUE), .groups = 'drop')
-output
# A tibble: 7 × 4
grp f1 f2 f3
<fct> <dbl> <dbl> <dbl>
1 <=1 4 4 4
2 2-9 9 6 13
3 10-19 0 0 2
4 >20 1 1 1
5 kids 6 5 6
6 Parents 8 8 8
7 Teachers 4 4 4
https://stackoverflow.com/questions/72088950
复制相似问题