我有如下所示的数据:
df <- data.frame(
ID = c(rep(c("ABC123", "BCD234", "CDE345", "DEF456", "EFG567", "FGH678", "GHI891", "HIJ910", "IJK101", "JKL011"),2)),
eth = c(1, 2, 2, 3, 1, 1, 4, 4, 3, 3, 1, 4, 1, 3, 1, 3, 4, 4, 3, 2),
nzdep = c(4, 3, 3, 2, 4, 4, 1, 1, 2, 2, 4, 3, 3, 4, 4, 2, 1, 1, 2, 3),
sex = c("M", "M", "F", "F", "M", "M", "F", "F", "M", "M", "F", "M", "M", "M", "M", "F", "F", "M", "F", "M"),
Year = c(rep("Y1", 10), rep("Y2", 10)))
这是不同年份,相同人群的调查数据。ID是每个人的唯一ID,年份告诉我们调查是哪一年完成的。我想知道的是,相同的ID是否在两年内以相同的方式回答了相同的问题。
我尝试过这样的东西:
dems <- df %>%
group_by(ID) %>%
mutate(dep_dif = ifelse(nzdep = nzdep, 1, 0),
sex_dif = ifelse(sex = sex, 1, 0),
eth_dif = ifelse(eth = eth, 1, 0))
这是行不通的,但我一直在想一些事情。
我想要的输出是:
dems <- data.frame(
ID = c(rep(c("ABC123", "BCD234", "CDE345", "DEF456", "EFG567", "FGH678", "GHI891", "HIJ910", "IJK101", "JKL011"),2)),
eth = c(1, 2, 2, 3, 1, 1, 4, 4, 3, 3, 1, 4, 1, 3, 1, 3, 4, 4, 3, 2),
nzdep = c(4, 3, 3, 2, 4, 4, 1, 1, 2, 2, 4, 3, 3, 4, 4, 2, 1, 1, 2, 3),
sex = c("M", "M", "F", "F", "M", "M", "F", "F", "M", "M", "F", "M", "M", "M", "M", "F", "F", "M", "F", "M"),
Year = c(rep("Y1", 10), rep("Y2", 10)),
eth_dif = c(rep(c(1, 0, 0, 1, 1, 0, 1, 1, 1, 0),2)),
dep_dif = c(rep(c(1, 1, 1, 0, 1, 0, 1, 1, 1, 0),2)),
sex_dif = c(rep(c(0, 1, 0, 0, 1, 0, 1, 0, 0, 1),2)))
有人知道怎么做吗?
谢谢
发布于 2018-08-03 08:31:33
看起来您需要unique
值等于1
df%>%group_by(ID)%>%dplyr::mutate( ifelse(length(unique(nzdep))==1, 1, 0),
+ sex_dif = ifelse(length(unique(sex))==1, 1, 0),
+ eth_dif = ifelse(length(unique(eth))==1, 1, 0))
# A tibble: 20 x 8
# Groups: ID [10]
ID eth nzdep sex Year `ifelse(length(unique(nzdep)) == 1, 1, 0)` sex_dif eth_dif
<fctr> <dbl> <dbl> <fctr> <fctr> <dbl> <dbl> <dbl>
1 ABC123 1 4 M Y1 1 0 1
2 BCD234 2 3 M Y1 1 1 0
3 CDE345 2 3 F Y1 1 0 0
4 DEF456 3 2 F Y1 0 0 1
5 EFG567 1 4 M Y1 1 1 1
6 FGH678 1 4 M Y1 0 0 0
7 GHI891 4 1 F Y1 1 1 1
8 HIJ910 4 1 F Y1 1 0 1
9 IJK101 3 2 M Y1 1 0 1
10 JKL011 3 2 M Y1 0 1 0
11 ABC123 1 4 F Y2 1 0 1
12 BCD234 4 3 M Y2 1 1 0
13 CDE345 1 3 M Y2 1 0 0
14 DEF456 3 4 M Y2 0 0 1
15 EFG567 1 4 M Y2 1 1 1
16 FGH678 3 2 F Y2 0 0 0
17 GHI891 4 1 F Y2 1 1 1
18 HIJ910 4 1 M Y2 1 0 1
19 IJK101 3 2 F Y2 1 0 1
20 JKL011 2 3 M Y2 0 1 0
发布于 2018-08-03 12:12:49
我们可以用mutate_at
做到这一点
library(dplyr)
df %>%
group_by(ID) %>%
mutate_at(2:4, funs(dif = as.integer(.[Year == "Y1"] == .[Year == "Y2"])))
# A tibble: 20 x 8
# Groups: ID [10]
# ID eth nzdep sex Year eth_dif nzdep_dif sex_dif
# <fct> <dbl> <dbl> <fct> <fct> <int> <int> <int>
# 1 ABC123 1 4 M Y1 1 1 0
# 2 BCD234 2 3 M Y1 0 1 1
# 3 CDE345 2 3 F Y1 0 1 0
# 4 DEF456 3 2 F Y1 1 0 0
# 5 EFG567 1 4 M Y1 1 1 1
# 6 FGH678 1 4 M Y1 0 0 0
# 7 GHI891 4 1 F Y1 1 1 1
# 8 HIJ910 4 1 F Y1 1 1 0
# 9 IJK101 3 2 M Y1 1 1 0
#10 JKL011 3 2 M Y1 0 0 1
#11 ABC123 1 4 F Y2 1 1 0
#12 BCD234 4 3 M Y2 0 1 1
#13 CDE345 1 3 M Y2 0 1 0
#14 DEF456 3 4 M Y2 1 0 0
#15 EFG567 1 4 M Y2 1 1 1
#16 FGH678 3 2 F Y2 0 0 0
#17 GHI891 4 1 F Y2 1 1 1
#18 HIJ910 4 1 M Y2 1 1 0
#19 IJK101 3 2 F Y2 1 1 0
#20 JKL011 2 3 M Y2 0 0 1
如果'ID‘已经被排序,则base R
选项将是
df[paste0(names(df)[2:4], "_dif")] <- +(Reduce(`==`, split(df[2:4], df$Year)))
https://stackoverflow.com/questions/51663800
复制相似问题