我正在使用mlr3中的benchmark()函数来比较几种ML算法。其中之一是带有超参数调优的XGB。因此,我有一个外部重采样来评估总体性能(保持样本)和内部重采样用于超参数调整(5倍交叉验证)。除了对所有ML算法的准确性进行估计外,我还希望看到调优的XGB特性的重要性。为此,我必须访问调优模型(在基准测试对象中)。我不知道怎么做。benchmark()返回的对象是一个深嵌套的列表,我不理解它的结构。
这个关于堆叠溢出的答案没有帮助我,因为它使用的是不同的设置(在管道中学习,而不是基准对象)。
这个关于github的答复没有帮助我,因为它展示了如何一次提取关于基准测试的所有信息,而不是如何提取基准测试中的一个学习者的一个(调优)模型。
下面是我用来执行嵌套重采样的代码。在基准测试之后,我想按照描述的这里估计特性的重要性,这需要访问调优的XGB模型。
require(mlr3verse)
### Parameters
## Tuning
n_folds = 5
grid_search_resolution = 2
measure = msr("classif.acc")
task = tsk("iris")
# Messages mlr3
# https://stackoverflow.com/a/69336802/7219311
options("mlr3.debug" = TRUE)
### Set up hyperparameter tuning
# AutoTuner for the inner resampling
## inner-resampling design
inner_resampling = rsmp("cv", folds = n_folds)
terminator = trm("none")
## XGB: no Hyperparameter Tuning
xgb_no_tuning = lrn("classif.xgboost", eval_metric = "mlogloss")
set_threads(xgb_no_tuning, n = 6)
## XGB: AutoTuner
# Setting up Hyperparameter Tuning
xgb_learner_tuning = lrn("classif.xgboost", eval_metric = "mlogloss")
xgb_search_space = ps(nrounds = p_int(lower = 100, upper= 500),
max_depth = p_int(lower = 3, upper= 10),
colsample_bytree = p_dbl(lower = 0.6, upper = 1)
)
xgb_tuner = tnr("grid_search", resolution = grid_search_resolution)
# implicit parallelisation
set_threads(xgb_learner_tuning, n = 6)
xgb_tuned = AutoTuner$new(xgb_learner_tuning, inner_resampling, measure, terminator, xgb_tuner, xgb_search_space, store_tuning_instance = TRUE)
## Outer re-sampling: hold-out
outer_resampling = rsmp("holdout")
outer_resampling$instantiate(task)
bm_design = benchmark_grid(
tasks = task,
learners = c(lrn("classif.featureless"),
xgb_no_tuning,
xgb_tuned
),
resamplings = outer_resampling
)
begin_time = Sys.time()
bmr = benchmark(bm_design, store_models = TRUE)
duration = Sys.time() - begin_time
print(duration)
## Results of benchmarking
benchmark_results = bmr$aggregate(measure)
print(benchmark_results)
## Overview
mlr3misc::map(as.data.table(bmr)$learner, "model")
## Detailed results
# Specification of learners
print(bmr$learners$learner)溶液
基于贝-马克的评论
require(mlr3verse)
require(mlr3tuning)
require(mlr3misc)
### Parameters
## Tuning
n_folds = 5
grid_search_resolution = 2
measure = msr("classif.acc")
task = tsk("iris")
# Messages mlr3
# https://stackoverflow.com/a/69336802/7219311
options("mlr3.debug" = TRUE)
### Set up hyperparameter tuning
# AutoTuner for the inner resampling
## inner-resampling design
inner_resampling = rsmp("cv", folds = n_folds)
terminator = trm("none")
## XGB: no Hyperparameter Tuning
xgb_no_tuning = lrn("classif.xgboost", eval_metric = "mlogloss")
set_threads(xgb_no_tuning, n = 6)
## XGB: AutoTuner
# Setting up Hyperparameter Tuning
xgb_learner_tuning = lrn("classif.xgboost", eval_metric = "mlogloss")
xgb_search_space = ps(nrounds = p_int(lower = 100, upper= 500),
max_depth = p_int(lower = 3, upper= 10),
colsample_bytree = p_dbl(lower = 0.6, upper = 1)
)
xgb_tuner = tnr("grid_search", resolution = grid_search_resolution)
# implicit parallelisation
set_threads(xgb_learner_tuning, n = 6)
xgb_tuned = AutoTuner$new(xgb_learner_tuning, inner_resampling, measure, terminator, xgb_tuner, xgb_search_space, store_tuning_instance = TRUE)
## Outer re-sampling: hold-out
outer_resampling = rsmp("holdout")
outer_resampling$instantiate(task)
bm_design = benchmark_grid(
tasks = task,
learners = c(lrn("classif.featureless"),
xgb_no_tuning,
xgb_tuned
),
resamplings = outer_resampling
)
begin_time = Sys.time()
bmr = benchmark(bm_design, store_models = TRUE)
duration = Sys.time() - begin_time
print(duration)
## Results of benchmarking
benchmark_results = bmr$aggregate(measure)
print(benchmark_results)
## Overview
mlr3misc::map(as.data.table(bmr)$learner, "model")
## Detailed results
# Specification of learners
print(bmr$learners$learner)
## Feature Importance
# extract models from outer sampling
# https://stackoverflow.com/a/69828801
data = as.data.table(bmr)
outer_learners = map(data$learner, "learner")
xgb_tuned_model = outer_learners[[3]]
print(xgb_tuned_model)
# print feature importance
# (presumably gain - mlr3 documentation not clear)
print(xgb_tuned_model$importance())发布于 2021-11-03 16:54:55
library(mlr3tuning)
library(mlr3learners)
library(mlr3misc)
learner = lrn("classif.xgboost", nrounds = to_tune(100, 500), eval_metric = "logloss")
at = AutoTuner$new(
learner = learner,
resampling = rsmp("cv", folds = 3),
measure = msr("classif.ce"),
terminator = trm("evals", n_evals = 5),
tuner = tnr("random_search"),
store_models = TRUE
)
design = benchmark_grid(task = tsk("pima"), learner = at, resampling = rsmp("cv", folds = 5))
bmr = benchmark(design, store_models = TRUE)提取适合外循环的学习者
data = as.data.table(bmr)
outer_learners = map(data$learner, "learner")提取内循环中的学习者
archives = extract_inner_tuning_archives(bmr)
inner_learners = map(archives$resample_result, "learners")https://stackoverflow.com/questions/69827716
复制相似问题