
Check and complete argument preference_order
Source: R/validate_arg_preference_order.R
validate_arg_preference_order.RdInternal function to validate the argument preference_order in cor_select(), vif_select(), collinear_select(), collinear(), and collinear(). Predictors not in preference_order are ranked from lower to higher sum of Pearson correlations with all other predictors.
Usage
validate_arg_preference_order(
df = NULL,
response = NULL,
predictors = NULL,
preference_order = NULL,
quiet = FALSE,
function_name = NULL,
...
)Arguments
- df
(required; dataframe, tibble, or sf) A dataframe with responses (optional) and predictors. Must have at least 10 rows for pairwise correlation analysis, and
10 * (length(predictors) - 1)for VIF. Default: NULL.- response
(optional, character string) Name of a numeric response variable in
df. Default: NULL.- predictors
(optional; character vector or NULL) Names of the predictors in
df. If NULL, all columns exceptresponsesand constant/near-zero-variance columns are used. Default: NULL.- preference_order
(optional; character vector, dataframe from
preference_order, or NULL) Prioritizes predictors to preserve.- quiet
(optional; logical) If FALSE, messages are printed. Default: FALSE.
- function_name
(optional, character string) Name of the function performing the argument check. Default: NULL
- ...
(optional) Internal args (e.g.
function_nameforvalidate_arg_function_name, a precomputed correlation matrixm, or cross-validation args forpreference_order).
Examples
data(
vi_smol,
vi_predictors_numeric
)
#input arguments must be validated first
df <- validate_arg_df(
df = vi_smol,
response = "vi_numeric",
predictors = vi_predictors_numeric,
quiet = TRUE
)
response <- validate_arg_responses(
df = df,
responses = "vi_numeric"
)
predictors <- validate_arg_predictors(
df = df,
response = response,
predictors = vi_predictors_numeric[1:10]
)
#no preference order
#no response
#ranks predictor from lower to higher multicollinearity
y <- validate_arg_preference_order(
df = df,
predictors = predictors,
preference_order = NULL
)
#>
#> collinear::validate_arg_preference_order()
#> └── collinear::preference_order(): ranking 10 'predictors' from lower to higher multicollinearity.
y
#> response predictor f metric score rank
#> 1 none topo_elevation stats::cor() 1 - R-squared 0.8184 1
#> 2 none topo_diversity stats::cor() 1 - R-squared 0.8183 2
#> 3 none topo_slope stats::cor() 1 - R-squared 0.8001 3
#> 4 none soil_temperature_min stats::cor() 1 - R-squared 0.7434 4
#> 5 none swi_range stats::cor() 1 - R-squared 0.7387 5
#> 6 none soil_temperature_mean stats::cor() 1 - R-squared 0.6534 6
#> 7 none swi_mean stats::cor() 1 - R-squared 0.6272 7
#> 8 none swi_max stats::cor() 1 - R-squared 0.6259 8
#> 9 none swi_min stats::cor() 1 - R-squared 0.6170 9
#> 10 none soil_temperature_max stats::cor() 1 - R-squared 0.6021 10
attributes(y)$validated
#> [1] TRUE
#validate character vector
y <- validate_arg_preference_order(
df = df,
predictors = predictors,
preference_order = c(
"swi_max",
"swi_min",
"swi_deviance" #does not exist
)
)
#>
#> collinear::validate_arg_preference_order()
#> └── collinear::preference_order(): ranking 8 'predictors' from lower to higher multicollinearity.
y
#> response predictor f metric score rank
#> 1 none swi_max <NA> user_preference 1.0000 1
#> 2 none swi_min <NA> user_preference 0.0000 2
#> 3 none topo_diversity stats::cor() 1 - R-squared 0.8049 3
#> 4 none swi_range stats::cor() 1 - R-squared 0.7981 4
#> 5 none topo_elevation stats::cor() 1 - R-squared 0.7859 5
#> 6 none topo_slope stats::cor() 1 - R-squared 0.7855 6
#> 7 none soil_temperature_min stats::cor() 1 - R-squared 0.7452 7
#> 8 none swi_mean stats::cor() 1 - R-squared 0.7229 8
#> 9 none soil_temperature_mean stats::cor() 1 - R-squared 0.7057 9
#> 10 none soil_temperature_max stats::cor() 1 - R-squared 0.6854 10
attributes(y)$validated
#> [1] TRUE
#validate output of preference order
x <- preference_order(
df = df,
responses = response,
predictors = predictors
)
#>
#> collinear::preference_order()
#> └── collinear::f_auto(): selected function 'f_numeric_glm()' to compute preference order.
x
#> response predictor f metric score rank
#> 1 vi_numeric swi_mean f_numeric_glm R-squared 0.7182 1
#> 2 vi_numeric swi_max f_numeric_glm R-squared 0.5549 2
#> 3 vi_numeric soil_temperature_max f_numeric_glm R-squared 0.5462 3
#> 4 vi_numeric swi_range f_numeric_glm R-squared 0.4225 4
#> 5 vi_numeric swi_min f_numeric_glm R-squared 0.2130 5
#> 6 vi_numeric soil_temperature_min f_numeric_glm R-squared 0.0972 6
#> 7 vi_numeric topo_diversity f_numeric_glm R-squared 0.0663 7
#> 8 vi_numeric topo_elevation f_numeric_glm R-squared 0.0351 8
#> 9 vi_numeric topo_slope f_numeric_glm R-squared 0.0176 9
#> 10 vi_numeric soil_temperature_mean f_numeric_glm R-squared 0.0151 10
y <- validate_arg_preference_order(
df = df,
response = response,
predictors = predictors,
preference_order = x
)
y
#> response predictor f metric score rank
#> 1 vi_numeric swi_mean f_numeric_glm R-squared 0.7182 1
#> 2 vi_numeric swi_max f_numeric_glm R-squared 0.5549 2
#> 3 vi_numeric soil_temperature_max f_numeric_glm R-squared 0.5462 3
#> 4 vi_numeric swi_range f_numeric_glm R-squared 0.4225 4
#> 5 vi_numeric swi_min f_numeric_glm R-squared 0.2130 5
#> 6 vi_numeric soil_temperature_min f_numeric_glm R-squared 0.0972 6
#> 7 vi_numeric topo_diversity f_numeric_glm R-squared 0.0663 7
#> 8 vi_numeric topo_elevation f_numeric_glm R-squared 0.0351 8
#> 9 vi_numeric topo_slope f_numeric_glm R-squared 0.0176 9
#> 10 vi_numeric soil_temperature_mean f_numeric_glm R-squared 0.0151 10
attributes(y)$validated
#> [1] TRUE