调整后的预测 Tidymodels

发布于 2025-01-17 06:07:12 字数 1786 浏览 2 评论 0原文

有谁知道如何将 marginaleffects() 包中的 predictions() 与 tidymodels 一起使用？在这个玩具示例中，我想要获取变量 state 的预测值，同时将所有其他变量保持在其基本水平或平均值。

library(liver)
library(tidymodels)
library(marginaleffects)

df_churn <- data.frame(churn)

# Create data split object
churn_split <- initial_split(df_churn, prop = 0.75,
                             strata = churn)

# Create the training data
churn_train <- churn_split %>% 
  training()

# Create the test data
churn_test <- churn_split %>% 
  testing()

lr_mod <- 
  logistic_reg(penalty = tune(), mixture = 1) %>% # penalty = lambda. mixture = alpha
  set_engine("glmnet") %>%
  set_mode("classification")

# pre-process recipe
churn_recipe <- recipe(churn ~ .,
                       data = churn_train) %>%
  step_corr(all_numeric(), threshold = 0.9) %>%
  step_normalize(all_numeric()) %>%
  step_dummy(all_nominal(), -all_outcomes())

# model + recipe = workflow
churn_wkfl <- workflow() %>%
  add_model(lr_mod) %>%
  add_recipe(churn_recipe)

# cv
set.seed(1)
churn_folds <- vfold_cv(churn_train,
                        v = 10,
                        strata = churn)

# grid
set.seed(1)
glmnet_tuning <- churn_wkfl %>%
  tune_grid(resamples = churn_folds,
            grid = 25, # let the model find the best hyperparameters
            metrics = metric_set(roc_auc))

# select the best model
best_glmnet_model <- glmnet_tuning %>%
  select_best(metric = 'roc_auc')

# finalize the workflow and try to get adjusted predictions
# This does not work 
final_churn_wkfl <- churn_wkfl %>%
  finalize_workflow(best_glmnet_model) %>%
  fit(churn_train) %>%
  tidy() %>%
  predictions(variables = c("state"))

原文

Does anyone know how to use predictions() in the marginaleffects() package with tidymodels? In this toy example, I want to get the predicted values of the variable state while holding all other variables at their base levels or mean values.

library(liver)
library(tidymodels)
library(marginaleffects)

df_churn <- data.frame(churn)

# Create data split object
churn_split <- initial_split(df_churn, prop = 0.75,
                             strata = churn)

# Create the training data
churn_train <- churn_split %>% 
  training()

# Create the test data
churn_test <- churn_split %>% 
  testing()

lr_mod <- 
  logistic_reg(penalty = tune(), mixture = 1) %>% # penalty = lambda. mixture = alpha
  set_engine("glmnet") %>%
  set_mode("classification")

# pre-process recipe
churn_recipe <- recipe(churn ~ .,
                       data = churn_train) %>%
  step_corr(all_numeric(), threshold = 0.9) %>%
  step_normalize(all_numeric()) %>%
  step_dummy(all_nominal(), -all_outcomes())

# model + recipe = workflow
churn_wkfl <- workflow() %>%
  add_model(lr_mod) %>%
  add_recipe(churn_recipe)

# cv
set.seed(1)
churn_folds <- vfold_cv(churn_train,
                        v = 10,
                        strata = churn)

# grid
set.seed(1)
glmnet_tuning <- churn_wkfl %>%
  tune_grid(resamples = churn_folds,
            grid = 25, # let the model find the best hyperparameters
            metrics = metric_set(roc_auc))

# select the best model
best_glmnet_model <- glmnet_tuning %>%
  select_best(metric = 'roc_auc')

# finalize the workflow and try to get adjusted predictions
# This does not work 
final_churn_wkfl <- churn_wkfl %>%
  finalize_workflow(best_glmnet_model) %>%
  fit(churn_train) %>%
  tidy() %>%
  predictions(variables = c("state"))

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

故事未完 2025-01-24 06:07:12

不幸的是，glmnet 不是受支持的边际效应模型之一。

您可以将其切换为受支持的模型之一（例如常规 glm()）和此将使用 extract_fit_engine() 工作。

library(tidymodels)
library(marginaleffects)
data("mlc_churn")

set.seed(123)
churn_split <- initial_split(mlc_churn, prop = 0.75, strata = churn)
churn_train <- training(churn_split)
churn_test <- testing(churn_split)

churn_recipe <- recipe(churn ~ .,
                       data = churn_train) %>%
  step_corr(all_numeric(), threshold = 0.9) %>%
  step_normalize(all_numeric()) %>%
  step_dummy(all_nominal(), -all_outcomes())

# model + recipe = workflow
churn_wkfl <- workflow(churn_recipe, logistic_reg()) 

# finalize the workflow and try to get adjusted predictions
churn_wkfl %>%
  fit(churn_train) %>%
  extract_fit_engine() %>%
  predictions(variables = c("total_intl_calls")) %>%
  as_tibble()
#> # A tibble: 5 × 71
#>   rowid type     predicted std.error conf.low conf.high account_length
#>   <int> <chr>        <dbl>     <dbl>    <dbl>     <dbl>          <dbl>
#> 1     1 response     0.895   0.0119     0.870     0.916       1.76e-17
#> 2     2 response     0.917   0.00620    0.904     0.928       1.76e-17
#> 3     3 response     0.923   0.00543    0.912     0.933       1.76e-17
#> 4     4 response     0.934   0.00549    0.923     0.944       1.76e-17
#> 5     5 response     0.977   0.00840    0.953     0.989       1.76e-17
#> # … with 64 more variables: number_vmail_messages <dbl>,
#> #   total_day_minutes <dbl>, total_day_calls <dbl>, total_eve_minutes <dbl>,
#> #   total_eve_calls <dbl>, total_night_calls <dbl>, total_night_charge <dbl>,
#> #   total_intl_minutes <dbl>, number_customer_service_calls <dbl>,
#> #   state_AL <dbl>, state_AR <dbl>, state_AZ <dbl>, state_CA <dbl>,
#> #   state_CO <dbl>, state_CT <dbl>, state_DC <dbl>, state_DE <dbl>,
#> #   state_FL <dbl>, state_GA <dbl>, state_HI <dbl>, state_IA <dbl>, …

^{由 reprex 包 (v2.0.1) 于 2022 年 3 月 25 日创建}

我没有使用variables = c("state")并替换了连续的数字预测变量之一。

Unfortunately, glmnet is not one of the supported models for marginaleffects.

You can switch this to one of the supported models (like regular glm()) and this will work using extract_fit_engine().

library(tidymodels)
library(marginaleffects)
data("mlc_churn")

set.seed(123)
churn_split <- initial_split(mlc_churn, prop = 0.75, strata = churn)
churn_train <- training(churn_split)
churn_test <- testing(churn_split)

churn_recipe <- recipe(churn ~ .,
                       data = churn_train) %>%
  step_corr(all_numeric(), threshold = 0.9) %>%
  step_normalize(all_numeric()) %>%
  step_dummy(all_nominal(), -all_outcomes())

# model + recipe = workflow
churn_wkfl <- workflow(churn_recipe, logistic_reg()) 

# finalize the workflow and try to get adjusted predictions
churn_wkfl %>%
  fit(churn_train) %>%
  extract_fit_engine() %>%
  predictions(variables = c("total_intl_calls")) %>%
  as_tibble()
#> # A tibble: 5 × 71
#>   rowid type     predicted std.error conf.low conf.high account_length
#>   <int> <chr>        <dbl>     <dbl>    <dbl>     <dbl>          <dbl>
#> 1     1 response     0.895   0.0119     0.870     0.916       1.76e-17
#> 2     2 response     0.917   0.00620    0.904     0.928       1.76e-17
#> 3     3 response     0.923   0.00543    0.912     0.933       1.76e-17
#> 4     4 response     0.934   0.00549    0.923     0.944       1.76e-17
#> 5     5 response     0.977   0.00840    0.953     0.989       1.76e-17
#> # … with 64 more variables: number_vmail_messages <dbl>,
#> #   total_day_minutes <dbl>, total_day_calls <dbl>, total_eve_minutes <dbl>,
#> #   total_eve_calls <dbl>, total_night_calls <dbl>, total_night_charge <dbl>,
#> #   total_intl_minutes <dbl>, number_customer_service_calls <dbl>,
#> #   state_AL <dbl>, state_AR <dbl>, state_AZ <dbl>, state_CA <dbl>,
#> #   state_CO <dbl>, state_CT <dbl>, state_DC <dbl>, state_DE <dbl>,
#> #   state_FL <dbl>, state_GA <dbl>, state_HI <dbl>, state_IA <dbl>, …

^{Created on 2022-03-25 by the reprex package (v2.0.1)}

Notice that I did not use variables = c("state") and substituted one of the continuous, numeric predictors.

回复收藏 0 原文

~没有更多了~