ROC的计算与绘制

calcROC <- function(.data, predict_var, target, group_var, positive="success"){
# predic_var here must be a numeric value
require(tidyverse)
predict_var <- enquo(predict_var)
target <- enquo(target)
group_var <- enquo(group_var)

groups <- .data %>% filter(!is.na(!! predict_var)) %>% select(!! group_var) %>%
unlist() %>% table() %>% names()

total_res <- list()
# process groups one by one
j <- 1
for (i in groups){
df <- list()
df <- .data %>% filter(!is.na(!! predict_var), !! group_var == i) %>%
arrange(desc(!! predict_var)) %>%
mutate(isPositive = ifelse(!! target == positive, 1, 0))        # select a threshold, calculate true positive and false positive value
ths <- df %>% select(!! predict_var) %>% unlist

mat <- base::sapply(ths, function(th){
# true positive
tp <- df %>% filter(!! predict_var >= th) %>% filter(isPositive == 1) %>% nrow
# false positive
fp <- df %>% filter(!! predict_var >= th) %>% filter(isPositive == 0) %>% nrow
# true negative
tn <- df %>% filter(!! predict_var < th) %>% filter(isPositive == 0) %>% nrow
# false negative
fn <- df %>% filter(!! predict_var < th) %>% filter(isPositive == 1) %>% nrow
# true positive rate
tpr <- tp / (tp + fn)
# false positive rate
fpr <- fp / (fp + tn)
return(c(tp, fp, tn, fn, tpr, fpr))
})

res <- t(mat)
res <- data.frame(res)

# fake a (0, 0) point
res <- rbind(c(rep(NA, 4), 0, 0), res)
colnames(res) <- c("tp", "fp", "tn", "fn", "tpr", "fpr")
res\$Group <- i
total_res[[j]] <- res
j <- j + 1
}

dat <- base::Reduce(rbind, total_res)    return(dat)
}

> args(calcROC)
function (.data, predict_var, target, group_var, positive = "success")

calcROC(dat, mut, isBenefit, Gender)
tp fp tn fn       tpr fpr  Group
1                NA NA NA NA 0.0000000 0.0 Female
TMB_NonsynSNP1    1  0 10  7 0.1250000 0.0 Female
TMB_NonsynSNP2    2  0 10  6 0.2500000 0.0 Female
TMB_NonsynSNP3    3  0 10  5 0.3750000 0.0 Female
TMB_NonsynSNP4    4  0 10  4 0.5000000 0.0 Female
TMB_NonsynSNP5    4  1  9  4 0.5000000 0.1 Female
TMB_NonsynSNP6    5  1  9  3 0.6250000 0.1 Female
TMB_NonsynSNP7    6  1  9  2 0.7500000 0.1 Female
TMB_NonsynSNP8    6  2  8  2 0.7500000 0.2 Female
TMB_NonsynSNP9    7  2  8  1 0.8750000 0.2 Female
TMB_NonsynSNP10   7  3  7  1 0.8750000 0.3 Female
TMB_NonsynSNP11   8  3  7  0 1.0000000 0.3 Female
TMB_NonsynSNP12   8  4  6  0 1.0000000 0.4 Female
TMB_NonsynSNP13   8  5  5  0 1.0000000 0.5 Female
TMB_NonsynSNP14   8  6  4  0 1.0000000 0.6 Female
TMB_NonsynSNP15   8  7  3  0 1.0000000 0.7 Female
TMB_NonsynSNP16   8  8  2  0 1.0000000 0.8 Female
TMB_NonsynSNP17   8  9  1  0 1.0000000 0.9 Female
TMB_NonsynSNP18   8 10  0  0 1.0000000 1.0 Female
11               NA NA NA NA 0.0000000 0.0   Male
TMB_NonsynSNP19   0  1  9  6 0.0000000 0.1   Male
TMB_NonsynSNP21   1  1  9  5 0.1666667 0.1   Male
TMB_NonsynSNP31   1  2  8  5 0.1666667 0.2   Male
TMB_NonsynSNP41   2  2  8  4 0.3333333 0.2   Male
TMB_NonsynSNP51   2  3  7  4 0.3333333 0.3   Male
TMB_NonsynSNP61   3  3  7  3 0.5000000 0.3   Male
TMB_NonsynSNP71   4  3  7  2 0.6666667 0.3   Male
TMB_NonsynSNP81   4  4  6  2 0.6666667 0.4   Male
TMB_NonsynSNP91   5  4  6  1 0.8333333 0.4   Male
TMB_NonsynSNP101  5  5  5  1 0.8333333 0.5   Male
TMB_NonsynSNP111  5  6  4  1 0.8333333 0.6   Male
TMB_NonsynSNP121  5  7  3  1 0.8333333 0.7   Male
TMB_NonsynSNP131  5  8  2  1 0.8333333 0.8   Male
TMB_NonsynSNP141  5  9  1  1 0.8333333 0.9   Male
TMB_NonsynSNP151  6  9  1  0 1.0000000 0.9   Male
TMB_NonsynSNP161  6 10  0  0 1.0000000 1.0   Male

ggpubr::ggline(data = calcROC(dat, mut, isBenefit, Gender), x = "fpr", y = "tpr", linetype = "Group", shape = "Group")

0 条评论

相关文章

51960

80310

如何使用sklearn加载和下载机器学习数据集

sklearn 中提供了很多常用（或高级）的模型和算法，但是真正决定一个模型效果的最后还是取决于训练（喂养）模型时所用的数据。sklearn 中的 sklear...

78550

上个月发布了四篇文章，主要讲了深度学习中的“hello world”----mnist图像识别，以及卷积神经网络的原理详解，包括基本原理、自己手写CNN和p...

51150

238100

26450

1.9K40

10820

35160

21020