我有以下数据:
require("data.table")
dt1 <- data.table(ZONE = c("A34","G345","H62","D563","T63","P983","S24","J54","W953","L97","V56","R99"), POPULATION = c(40,110,80,70,90,90,130,140,80,30,80,50), MIN = c(1,0,0,1,0,1,0,1,1,0,1,1), MAX = c(10,9,2,11,12,8,5,3,2,0,8,8))
我想把50个,比如说帽子,分发给这些按人口加权的地区。然而,其中一些区域至少需要一顶帽子,而另一些地区则只能收到非常少的帽子或根本没有帽子。
是否有一种方法可以根据人口分配50个帽子(因此是一个尽可能精确的比例分配),但要考虑到最小和最大的标准,并在一个区域不能得到任何/不再接收的情况下重新分配给其他区域?例如,如果一个区域应根据确切的比例分配,分配20顶帽子,但只能接受10顶帽子,则其他10顶应分配给对其人口加权的其他地区。
发布于 2020-05-09 13:49:14
我对此不太确定。这听起来像是优化或线性规划任务。
以下是功能:
allocate <- function(dt, N){
if(N>dt[,sum(MAX)])
stop("Too many hats to go around")
if(N<dt[,sum(MIN)])
stop("Not enough hats to go around")
# Allocate hats initially based on proportion but use clamping
dt[, HATS := pmax(MIN, pmin(MAX, round(N * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
n <- N - dt[,sum(HATS)]
if(n==0) # All hats accouted for
return(dt)
if(n>0){ # Allocate the extra hats, again proportional to pop with clamping
dt[HATS<MAX, HATS := HATS + pmax(MIN, pmin(MAX,
round(n * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
} else { # Or subtract the superfluous hats, according to pop
dt[HATS>MIN, HATS := HATS - pmax(MIN, pmin(MAX,
round(abs(n) * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
}
n <- N - dt[,sum(HATS)] # Check again
if(n==0) # All hats accouted for
return(dt)
if(n>0){ # This time, just add 1 hat to those that require them
dt[HATS<MAX, i:=.I][i<=n, HATS := HATS + 1]
} else { # Or reduce the number of hats by one
dt[HATS>MIN, i:=.I][i<=abs(n), HATS := HATS - 1]
}
dt[, i:=NULL] # Remove this guy
return(dt)
}
测试它50:
dt2 <- allocate(dt1, 50)
dt2
ZONE POPULATION MIN MAX HATS
1: A34 40 1 10 2
2: G345 110 0 9 8
3: H62 80 0 2 2
4: D563 70 1 11 5
5: T63 90 0 12 7
6: P983 90 1 8 7
7: S24 130 0 5 5
8: J54 140 1 3 3
9: W953 80 1 2 2
10: L97 30 0 0 0
11: V56 80 1 8 5
12: R99 50 1 8 4
分配了50顶帽子。
它可能不是优雅或数学上的声音,但that是我的尝试w帽子它是值得的。希望它能有一些用处。
发布于 2020-05-10 10:11:11
将其表述为整数规划,其中目标函数最小化被分配和目标分配之间的平方和,但必须受最小和最大分配约束:
dt1[, TARGET := POPULATION / sum(POPULATION) * TOTAL]
system.time({
library(CVXR)
x <- Variable(nrow(dt1), integer=TRUE)
mini <- dt1$MIN
maxi <- dt1$MAX
target <- dt1$TARGET
obj <- Minimize(sum_squares(x - target))
constr <- list(mini <= x, x <= maxi, sum(x) == TOTAL)
prob <- Problem(obj, constr)
result <- solve(prob)
})
# user system elapsed
# 1.60 0.17 1.76
dt1[, ALLOCATION := as.integer(round(result$getValue(x)))]
产出:
ZONE POPULATION MIN MAX TARGET ALLOCATION
1: A34 40 1 10 2.020202 4
2: G345 110 0 9 5.555556 7
3: H62 80 0 2 4.040404 2
4: D563 70 1 11 3.535354 5
5: T63 90 0 12 4.545455 6
6: P983 90 1 8 4.545455 6
7: S24 130 0 5 6.565657 5
8: J54 140 1 3 7.070707 3
9: W953 80 1 2 4.040404 2
10: L97 30 0 0 1.515152 0
11: V56 80 1 8 4.040404 6
12: R99 50 1 8 2.525253 4
数据:
library(data.table)
dt1 <- data.table(ZONE = c("A34","G345","H62","D563","T63","P983","S24","J54","W953","L97","V56","R99"),
POPULATION = c(40,110,80,70,90,90,130,140,80,30,80,50),
MIN = c(1,0,0,1,0,1,0,1,1,0,1,1),
MAX = c(10,9,2,11,12,8,5,3,2,0,8,8))
TOTAL <- 50L
https://stackoverflow.com/questions/61695954
复制相似问题