Generate Model Formulas
Usage
model_formula(
df = NULL,
response = NULL,
predictors = NULL,
term_f = NULL,
term_args = NULL,
random_effects = NULL,
quiet = FALSE
)
Arguments
- df
(optional; data frame, tibble, or sf). A data frame with responses and predictors. Required if
predictors = NULL
. Default: NULL.- response
(optional; character string or vector) Name/s of response variable/s in
df
. Used in target encoding when it names a numeric variable and there are categorical predictors, and to compute preference order. Default: NULL.- predictors
(optional, character vector, output of
collinear()
): predictors to include in the formula. Required ifdf = NULL
.- term_f
(optional; string). Name of function to apply to each term in the formula, such as "s" for
mgcv::s()
or any other smoothing function, "poly" forstats::poly()
. Default: NULL- term_args
(optional; string). Arguments of the function applied to each term. For example, for "poly" it can be "degree = 2, raw = TRUE". Default: NULL
- random_effects
(optional, string or character vector). Names of variables to be used as random effects. Each element is added to the final formula as
+(1 | random_effect_name)
. Default: NULL- quiet
(optional; logical) If FALSE, messages generated during the execution of the function are printed to the console Default: FALSE
Value
list if predictors
is a list or length of response
is higher than one, and character vector otherwise.
See also
Other modelling_tools:
case_weights()
,
performance_score_auc()
,
performance_score_r2()
,
performance_score_v()
Examples
#using df, response, and predictors
#----------------------------------
df <- vi[1:1000, ]
#additive formulas
formulas_additive <- model_formula(
df = df,
response = c(
"vi_numeric",
"vi_categorical"
),
predictors = vi_predictors_numeric[1:10]
)
formulas_additive
#> $vi_numeric
#> vi_numeric ~ topo_slope + topo_diversity + topo_elevation + swi_mean +
#> swi_max + swi_min + swi_range + soil_temperature_mean + soil_temperature_max +
#> soil_temperature_min
#> <environment: 0x55b8a9801ee0>
#>
#> $vi_categorical
#> vi_categorical ~ topo_slope + topo_diversity + topo_elevation +
#> swi_mean + swi_max + swi_min + swi_range + soil_temperature_mean +
#> soil_temperature_max + soil_temperature_min
#> <environment: 0x55b8a9801ee0>
#>
#using a formula in a model
#m <- stats::lm(
# formula = formulas_additive[[1]],
# data = df
# )
# using output of collinear()
#----------------------------------
selection <- collinear(
df = df,
response = c(
"vi_numeric",
"vi_binomial"
),
predictors = vi_predictors_numeric[1:10],
quiet = TRUE
)
#polynomial formulas
formulas_poly <- model_formula(
predictors = selection,
term_f = "poly",
term_args = "degree = 3, raw = TRUE"
)
formulas_poly
#> $vi_numeric
#> vi_numeric ~ poly(swi_mean, degree = 3, raw = TRUE) + poly(soil_temperature_max,
#> degree = 3, raw = TRUE) + poly(swi_range, degree = 3, raw = TRUE) +
#> poly(topo_diversity, degree = 3, raw = TRUE) + poly(soil_temperature_min,
#> degree = 3, raw = TRUE) + poly(topo_slope, degree = 3, raw = TRUE) +
#> poly(topo_elevation, degree = 3, raw = TRUE)
#> <environment: 0x55b8a1b2b4d8>
#>
#> $vi_binomial
#> vi_binomial ~ poly(swi_mean, degree = 3, raw = TRUE) + poly(soil_temperature_max,
#> degree = 3, raw = TRUE) + poly(swi_range, degree = 3, raw = TRUE) +
#> poly(soil_temperature_mean, degree = 3, raw = TRUE) + poly(topo_elevation,
#> degree = 3, raw = TRUE) + poly(topo_diversity, degree = 3,
#> raw = TRUE) + poly(topo_slope, degree = 3, raw = TRUE)
#> <environment: 0x55b8a1b2b4d8>
#>
#gam formulas
formulas_gam <- model_formula(
predictors = selection,
term_f = "s"
)
formulas_gam
#> $vi_numeric
#> vi_numeric ~ s(swi_mean) + s(soil_temperature_max) + s(swi_range) +
#> s(topo_diversity) + s(soil_temperature_min) + s(topo_slope) +
#> s(topo_elevation)
#> <environment: 0x55b8a1969c40>
#>
#> $vi_binomial
#> vi_binomial ~ s(swi_mean) + s(soil_temperature_max) + s(swi_range) +
#> s(soil_temperature_mean) + s(topo_elevation) + s(topo_diversity) +
#> s(topo_slope)
#> <environment: 0x55b8a1969c40>
#>
#adding a random effect
formulas_random_effect <- model_formula(
predictors = selection,
random_effects = "country_name"
)
formulas_random_effect
#> $vi_numeric
#> vi_numeric ~ swi_mean + soil_temperature_max + swi_range + topo_diversity +
#> soil_temperature_min + topo_slope + topo_elevation + (1 |
#> country_name)
#> <environment: 0x55b8a158f630>
#>
#> $vi_binomial
#> vi_binomial ~ swi_mean + soil_temperature_max + swi_range + soil_temperature_mean +
#> topo_elevation + topo_diversity + topo_slope + (1 | country_name)
#> <environment: 0x55b8a158f630>
#>