analysis_train <- readRDS(here("models", "data", "analysis_train.rds"))
analysis_data <- readRDS(here("models", "data", "analysis_data.rds"))
analysis_folds <- readRDS(here("models", "data", "analysis_folds.rds"))Random Forest
Library
###Loading the data
###SPLIT THE DATA
set.seed(2023)
Weapon_split <- initial_split(analysis_data,
strata = WeaponCarryingSchool)
weapon_train <- training(Weapon_split)
weapon_test <- testing(Weapon_split)
Weapon_split<Training/Testing/Total>
<14696/4899/19595>
###Lets check our work
weapon_train |>
tabyl(WeaponCarryingSchool) |>
adorn_pct_formatting(0) |>
adorn_totals() WeaponCarryingSchool n percent
0 14042 96%
1 654 4%
Total 14696 -
weapon_test |>
tabyl(WeaponCarryingSchool) |>
adorn_pct_formatting(0) |>
adorn_totals() WeaponCarryingSchool n percent
0 4705 96%
1 194 4%
Total 4899 -
set.seed(2023)
cv_weapon <- rsample::vfold_cv(weapon_train,
v= 5,
strata = WeaponCarryingSchool)
cv_weapon# 5-fold cross-validation using stratification
# A tibble: 5 × 2
splits id
<list> <chr>
1 <split [11756/2940]> Fold1
2 <split [11757/2939]> Fold2
3 <split [11757/2939]> Fold3
4 <split [11757/2939]> Fold4
5 <split [11757/2939]> Fold5
weapon_recipe <-
recipe(formula = WeaponCarryingSchool ~ ., data = weapon_train) |>
step_impute_mode(all_nominal_predictors()) |>
step_impute_mean(all_numeric_predictors()) |>
step_dummy(all_nominal_predictors())weapon_spec <-
rand_forest(
# the number of predictors to sample at each split
mtry = tune(),
# the number of observations needed to keep splitting nodes
min_n = tune(),
trees = 100) |>
set_mode("classification") |>
set_engine("ranger",
# This is essential for vip()
importance = "permutation")
weapon_specRandom Forest Model Specification (classification)
Main Arguments:
mtry = tune()
trees = 100
min_n = tune()
Engine-Specific Arguments:
importance = permutation
Computational engine: ranger
weapon_workflow <-
workflow() |>
add_recipe(weapon_recipe) |>
add_model(weapon_spec)
weapon_workflow══ Workflow ════════════════════════════════════════════════════════════════════
Preprocessor: Recipe
Model: rand_forest()
── Preprocessor ────────────────────────────────────────────────────────────────
3 Recipe Steps
• step_impute_mode()
• step_impute_mean()
• step_dummy()
── Model ───────────────────────────────────────────────────────────────────────
Random Forest Model Specification (classification)
Main Arguments:
mtry = tune()
trees = 100
min_n = tune()
Engine-Specific Arguments:
importance = permutation
Computational engine: ranger
doParallel::registerDoParallel()
set.seed(46257)
weapon_tune <-
tune_grid(
weapon_workflow,
resamples = cv_weapon,
# grid = 11 says to choose 11 parameter sets automatically
grid = 11)i Creating pre-processing data to finalize unknown parameter: mtry
Warning: ! tune detected a parallel backend registered with foreach but no backend
registered with future.
ℹ Support for parallel processing with foreach was soft-deprecated in tune
1.2.1.
ℹ See ?parallelism (`?tune::parallelism()`) to learn more.
doParallel::stopImplicitCluster()
weapon_tune# Tuning results
# 5-fold cross-validation using stratification
# A tibble: 5 × 4
splits id .metrics .notes
<list> <chr> <list> <list>
1 <split [11756/2940]> Fold1 <tibble [33 × 6]> <tibble [0 × 3]>
2 <split [11757/2939]> Fold2 <tibble [33 × 6]> <tibble [0 × 3]>
3 <split [11757/2939]> Fold3 <tibble [33 × 6]> <tibble [0 × 3]>
4 <split [11757/2939]> Fold4 <tibble [33 × 6]> <tibble [0 × 3]>
5 <split [11757/2939]> Fold5 <tibble [33 × 6]> <tibble [0 × 3]>
collect_metrics(weapon_tune)# A tibble: 33 × 8
mtry min_n .metric .estimator mean n std_err .config
<int> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
1 1 17 accuracy binary 0.955 5 0.00160 Preprocessor1_Model01
2 1 17 brier_class binary 0.0417 5 0.00137 Preprocessor1_Model01
3 1 17 roc_auc binary 0.673 5 0.00677 Preprocessor1_Model01
4 1 32 accuracy binary 0.955 5 0.00160 Preprocessor1_Model02
5 1 32 brier_class binary 0.0417 5 0.00137 Preprocessor1_Model02
6 1 32 roc_auc binary 0.673 5 0.00931 Preprocessor1_Model02
7 2 5 accuracy binary 0.955 5 0.00160 Preprocessor1_Model03
8 2 5 brier_class binary 0.0417 5 0.00128 Preprocessor1_Model03
9 2 5 roc_auc binary 0.672 5 0.00663 Preprocessor1_Model03
10 3 21 accuracy binary 0.955 5 0.00160 Preprocessor1_Model04
# ℹ 23 more rows
autoplot(weapon_tune) 
best_weapon <- select_best(weapon_tune, metric = "roc_auc")
best_weapon# A tibble: 1 × 3
mtry min_n .config
<int> <int> <chr>
1 1 32 Preprocessor1_Model02
weapon_final_wf <- finalize_workflow(weapon_workflow, best_weapon)
weapon_final_wf══ Workflow ════════════════════════════════════════════════════════════════════
Preprocessor: Recipe
Model: rand_forest()
── Preprocessor ────────────────────────────────────────────────────────────────
3 Recipe Steps
• step_impute_mode()
• step_impute_mean()
• step_dummy()
── Model ───────────────────────────────────────────────────────────────────────
Random Forest Model Specification (classification)
Main Arguments:
mtry = 1
trees = 100
min_n = 32
Engine-Specific Arguments:
importance = permutation
Computational engine: ranger
weapon_fit <- fit(weapon_final_wf, weapon_train)
weapon_fit══ Workflow [trained] ══════════════════════════════════════════════════════════
Preprocessor: Recipe
Model: rand_forest()
── Preprocessor ────────────────────────────────────────────────────────────────
3 Recipe Steps
• step_impute_mode()
• step_impute_mean()
• step_dummy()
── Model ───────────────────────────────────────────────────────────────────────
Ranger result
Call:
ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~1L, x), num.trees = ~100, min.node.size = min_rows(~32L, x), importance = ~"permutation", num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE)
Type: Probability estimation
Number of trees: 100
Sample size: 14696
Number of independent variables: 10
Mtry: 1
Target node size: 32
Variable importance mode: permutation
Splitrule: gini
OOB prediction error (Brier s.): 0.04166282
weapon_pred <-
augment(weapon_fit, weapon_train) |>
select(WeaponCarryingSchool, .pred_class, .pred_1, .pred_0)
weapon_pred# A tibble: 14,696 × 4
WeaponCarryingSchool .pred_class .pred_1 .pred_0
<fct> <fct> <dbl> <dbl>
1 0 0 0.0489 0.951
2 0 0 0.0430 0.957
3 0 0 0.0351 0.965
4 0 0 0.0412 0.959
5 0 0 0.0327 0.967
6 0 0 0.0880 0.912
7 0 0 0.0489 0.951
8 0 0 0.0324 0.968
9 0 0 0.0488 0.951
10 0 0 0.0324 0.968
# ℹ 14,686 more rows
weapon_roc_plot <-
weapon_pred |>
roc_curve(truth = WeaponCarryingSchool,
.pred_1,
event_level = "second") |>
autoplot()
weapon_roc_plot
saveRDS(weapon_roc_plot, here("models", "roc_graphs", "random_forest.rds"))weapon_pred |>
roc_auc(truth = WeaponCarryingSchool,
.pred_1,
event_level = "second")# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 roc_auc binary 0.683