我正在尝试测试一个过程,在这个过程中,我通过R(使用H2O包)在H2O中构建了一个模型,下载MOJO,创建一个函数来调用'h2o.mojo_predict_df',然后使用plumber创建一个restful API。根据我对plumber实现的理解,我需要将获取预测的方法包装在一个函数中。
为了测试,我使用的是iris数据集。
` `sessionInfo()‘的结果是:
R version 3.4.3 (2017-11-30)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 16299)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] purrr_0.2.5 h2o_3.20.0.2
loaded via a namespace (and not attached):
[1] Rcpp_0.12.18 crayon_1.3.4 dplyr_0.7.6 assertthat_0.2.0 bitops_1.0-6 R6_2.2.2 jsonlite_1.5 magrittr_1.5 pillar_1.3.0
[10] rlang_0.2.1 bindrcpp_0.2.2 tools_3.4.3 glue_1.3.0 RCurl_1.95-4.11 compiler_3.4.3 pkgconfig_2.0.1 tidyselect_0.2.4 bindr_0.1.1
[19] tibble_1.4.2我要训练/保存/下载的代码如下:
library(RODBC)
library(caret)
library(h2o)
if(Sys.info()["nodename"]!="WINX-08947")
{
Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jre1.8.0_161')
}
fulldata<-iris
summary(fulldata)
fulldata_trainindex<-createDataPartition(fulldata$Species, p=0.75, list=FALSE)
train<-fulldata[fulldata_trainindex,]
test<-fulldata[-fulldata_trainindex,]
## Using H2O
h2o.shutdown(prompt=FALSE)
Sys.sleep(10)
localH2Oconn = h2o.init(ip="localhost",port=54321,startH2O=TRUE, nthreads=-1, max_mem_size = "4g")
X<-train[,c(1:4)]
target<-as.data.frame(train$Species)
colnames(target)<-"Species"
train_h2o <- as.h2o(train, destination_frame="train_h2o")
test_h2o <- as.h2o(test, destination_frame="test_h2o")
##### AutoML
aml<-h2o.automl(x=colnames(X)
, y=colnames(target)
, training_frame=train_h2o
, validation_frame=test_h2o
, leaderboard_frame=test_h2o
, nfolds=5
, max_runtime_secs=600
, project_name='AML_MODEL'
)
aml_best<-aml@leader
aml_best_MODEL_ID<-aml_best@model_id
h2o.download_mojo(aml_best, path = getwd(), get_genmodel_jar = TRUE)
preds<-h2o.mojo_predict_df(test, "C:/Users/jeng209/Documents/GBM_grid_0_AutoML_20180731_202910_model_3.zip", genmodel_jar_path = "C:/Users/jeng209/Documents/h2o-genmodel.jar", java_options = "-Xmx256m -XX:ReservedCodeCacheSize=256m" , verbose = TRUE)
test$PREDICTION<-preds$predict以所需格式返回预测的测试用例的代码为
library(h2o)
newdata <- data.frame(
Sepal.Length=5
, Sepal.Width=3
, Petal.Length=2
, Petal.Width=1
)
mojo_res <- h2o.mojo_predict_df(newdata, "C:/Users/jeng209/Documents/GBM_grid_0_AutoML_20180731_202910_model_3.zip", genmodel_jar_path = "C:/Users/jeng209/Documents/h2o-genmodel.jar", java_options = "-Xmx256m -XX:ReservedCodeCacheSize=256m" , verbose = FALSE)
mojo_pred <- as.character(mojo_res$predict)
mojo_pred使用以下输出
> newdata <- data.frame(
+ Sepal.Length=5
+ , Sepal.Width=3
+ , Petal.Length=2
+ , Petal.Width=1
+ )
> mojo_res <- h2o.mojo_predict_df(newdata, "C:/Users/jeng209/Documents/GBM_grid_0_AutoML_20180731_202910_model_3.zip", genmodel_jar_path = "C:/Users/jeng209/Documents/h2o-genmodel.jar", java_options = "-Xmx256m -XX:ReservedCodeCacheSize=256m" , verbose = FALSE)
[1] "+ CMD: java -Xmx256m -XX:ReservedCodeCacheSize=256m -cp C:/Users/jeng209/Documents/h2o-genmodel.jar hex.genmodel.tools.PredictCsv --mojo C:\\Users\\jeng209\\Documents\\GBM_grid_0_AutoML_20180731_202910_model_3.zip --input C:\\Users\\jeng209\\AppData\\Local\\Temp\\RtmpSeY5TX/input.csv --output C:\\Users\\jeng209\\AppData\\Local\\Temp\\RtmpSeY5TX/prediction.csv --decimal"
[1] 0
[1] "data.frame"
Warning message:
In dir.create(tmp_dir) :
'C:\Users\jeng209\AppData\Local\Temp\RtmpSeY5TX' already exists
> mojo_pred <- as.character(mojo_res$predict)
> mojo_pred
[1] "versicolor"请注意`mojo_res对象创建后的额外输出。将其放入函数中会产生以下结果:
> iris_pred <- function(sl, sw, pl, pw){
+ newdata <- data.frame(
+ Sepal.Length=sl
+ , Sepal.Width=sw
+ , Petal.Length=pl
+ , Petal.Width=pw
+ )
+ mojo_res <- invisible(h2o.mojo_predict_df(newdata, "C:/Users/jeng209/Documents/GBM_grid_0_AutoML_20180731_202910_model_3.zip", genmodel_jar_path = "C:/Users/jeng209/Documents/h2o-genmodel.jar", java_options = "-Xmx256m -XX:ReservedCodeCacheSize=256m" , verbose = FALSE))
+ mojo_pred <- as.character(mojo_res$predict)
+ return(mojo_pred)
+ }
> iris_pred(5,3,2,1)
[1] "+ CMD: java -Xmx256m -XX:ReservedCodeCacheSize=256m -cp C:/Users/jeng209/Documents/h2o-genmodel.jar hex.genmodel.tools.PredictCsv --mojo C:\\Users\\jeng209\\Documents\\GBM_grid_0_AutoML_20180731_202910_model_3.zip --input C:\\Users\\jeng209\\AppData\\Local\\Temp\\RtmpSeY5TX/input.csv --output C:\\Users\\jeng209\\AppData\\Local\\Temp\\RtmpSeY5TX/prediction.csv --decimal"
[1] 0
[1] "data.frame"
[1] "versicolor"
Warning message:
In dir.create(tmp_dir) :
'C:\Users\jeng209\AppData\Local\Temp\RtmpSeY5TX' already exists我还没有找到一种只从函数调用中返回versicolor的方法。到目前为止,我已经尝试了sink和invisible,但它们使预测无法被检索到。
他们是否有任何已知的方法来解决这个问题,并且只从h2o.mojo_predict_df检索结果数据帧,而不生成和存储所有额外的输出?
发布于 2018-08-14 00:39:23
有一个jira标签可以删除给你带来麻烦的输出,你可以在here上关注它。
作为一种变通办法,您可以在invisible:invisible(capture.output())中使用capture.output,然后尝试从那里执行一些字符串转换。
https://stackoverflow.com/questions/51715175
复制相似问题