正如标题所述,我想把自由之家索引从excel转换成整洁的R格式。FHI可以在https://freedomhouse.org/reports/publication-archives下下载,然后在1973-2021年国家和地区的评级和地位下下载,看起来如下:excel中的FHI。

我已经用下面的代码完成了它,但是我认为我的解决方案不是很优雅,更像是分解和处理。因此,我正在寻找另一个解决方案,充其量在潮间带内。提前谢谢。
#load packages
library(tidyverse)
#load data
library(readxl)
fh <- read_excel("Data/Country_and_Territory_Ratings_and_Statuses_FIW1973-2021.xlsx",
sheet = "Country Ratings, Statuses ",
col_names = FALSE, na = "-")
# remove survey edition and years
fh_raw <- fh %>%
filter(...1 != c("Survey Edition",
"Year(s) Under Review"))
# save country names
cty <- unlist(fh_raw[1]) %>%
unname()
fh_raw <- fh_raw %>%
select(!...1)
# variable
pr <- seq(to = length(fh_raw), by = 3)
cl <- seq(from = 2, to = length(fh_raw), by = 3)
status <- seq(from = 3, to = length(fh_raw), by = 3)
# select variables and transform into long-format
fh_pr <- fh_raw[pr] %>%
pivot_longer(cols = 1:length(pr))
fh_pr <- unlist(fh_pr[2]) %>%
unname() %>% as.numeric()
fh_cl <- fh_raw[cl] %>%
pivot_longer(cols = 1:length(cl))
fh_cl <- unlist(fh_cl[2]) %>%
unname() %>% as.numeric()
fh_status <- fh_raw[status] %>%
pivot_longer(cols = 1:length(status))
fh_status <- unlist(fh_status[2]) %>%
unname()
cty <- rep(cty, each = length(cl))
year = 1972:2020
year <- rep(year[year != 1981], times = 205) # 1981 is skipped
#create FH data frame
fh_long <- tibble(country = cty,
year = year,
pr = fh_pr,
cl = fh_cl,
status = fh_status)发布于 2021-05-25 11:47:35
进入tidyxl和unpivotr的神奇世界;-)
library(tidyverse)
library(tidyxl)
library(unpivotr)
file.to.read <- "./Country_and_Territory_Ratings_and_Statuses_FIW1973-2021.xlsx"
sheet.to.read <- "Country Ratings, Statuses "
#read sheet's contents (take a look at it to see what you actrually just read in)
cells <- tidyxl::xlsx_cells( file.to.read, sheet = sheet.to.read)
ans <- cells %>%
# Drop empty cells
dplyr::filter(!is_blank) %>%
# Setup headers from top and left side
unpivotr::behead("up", "Survey_Edition") %>%
unpivotr::behead("up", "year") %>%
unpivotr::behead("up", "item") %>%
unpivotr::behead("left", "country") %>%
# There are unwanted training spaces in the item-clum, remove them
dplyr::mutate(item = trimws(item)) %>%
# Get value from numeric and character-column
dplyr::mutate(value = ifelse(item == "Status", character, numeric)) %>%
# Drop unneeded data
dplyr::select(country, year, item, value) %>%
# Fill down missing year info
tidyr::fill(year, .direction = "down") %>%
# Cast to wide format
tidyr::pivot_wider(names_from = item, values_from = value)输出
# country year PR CL Status
# <chr> <chr> <chr> <chr> <chr>
# 1 Afghanistan 1972 4 5 PF
# 2 Afghanistan 1973 7 6 NF
# 3 Afghanistan 1974 7 6 NF
# 4 Afghanistan 1975 7 6 NF
# 5 Afghanistan 1976 7 6 NF
# 6 Afghanistan 1977 6 6 NF
# 7 Afghanistan 1978 7 7 NF
# 8 Afghanistan 1979 7 7 NF
# 9 Afghanistan 1980 7 7 NF
#10 Afghanistan Jan.1981-Aug. 1982 7 7 NF
#11 Afghanistan Aug.1982-Nov.1983 7 7 NF
#12 Afghanistan Nov.1983-Nov.1984 7 7 NF
#13 Afghanistan Nov.1984-Nov.1985 7 7 NF
#14 Afghanistan Nov.1985-Nov.1986 7 7 NF
#15 Afghanistan Nov.1986-Nov.1987 7 7 NF
#16 Afghanistan Nov.1987-Nov.1988 6 6 NF
#17 Afghanistan Nov.1988-Dec.1989 7 7 NF
#18 Afghanistan 1990 7 7 NF
#19 Afghanistan 1991 7 7 NF
#20 Afghanistan 1992 6 6 NF
# ...https://stackoverflow.com/questions/67683442
复制相似问题