Skip to contents

Generate Model Formulas

Usage

model_formula(
  df = NULL,
  response = NULL,
  predictors = NULL,
  term_f = NULL,
  term_args = NULL,
  random_effects = NULL,
  quiet = FALSE
)

Arguments

df

(optional; data frame, tibble, or sf). A data frame with responses and predictors. Required if predictors = NULL. Default: NULL.

response

(optional; character string or vector) Name/s of response variable/s in df. Used in target encoding when it names a numeric variable and there are categorical predictors, and to compute preference order. Default: NULL.

predictors

(optional, character vector, output of collinear()): predictors to include in the formula. Required if df = NULL.

term_f

(optional; string). Name of function to apply to each term in the formula, such as "s" for mgcv::s() or any other smoothing function, "poly" for stats::poly(). Default: NULL

term_args

(optional; string). Arguments of the function applied to each term. For example, for "poly" it can be "degree = 2, raw = TRUE". Default: NULL

random_effects

(optional, string or character vector). Names of variables to be used as random effects. Each element is added to the final formula as +(1 | random_effect_name). Default: NULL

quiet

(optional; logical) If FALSE, messages generated during the execution of the function are printed to the console Default: FALSE

Value

list if predictors is a list or length of response is higher than one, and character vector otherwise.

See also

Examples

#using df, response, and predictors
#----------------------------------
df <- vi[1:1000, ]

#additive formulas
formulas_additive <- model_formula(
  df = df,
  response = c(
    "vi_numeric",
    "vi_categorical"
    ),
  predictors = vi_predictors_numeric[1:10]
)

formulas_additive
#> $vi_numeric
#> vi_numeric ~ topo_slope + topo_diversity + topo_elevation + swi_mean + 
#>     swi_max + swi_min + swi_range + soil_temperature_mean + soil_temperature_max + 
#>     soil_temperature_min
#> <environment: 0x560ca731ffc8>
#> 
#> $vi_categorical
#> vi_categorical ~ topo_slope + topo_diversity + topo_elevation + 
#>     swi_mean + swi_max + swi_min + swi_range + soil_temperature_mean + 
#>     soil_temperature_max + soil_temperature_min
#> <environment: 0x560ca731ffc8>
#> 

#using a formula in a model
#m <- stats::lm(
#  formula = formulas_additive[[1]],
#  data = df
#  )

# using output of collinear()
#----------------------------------
selection <- collinear(
  df = df,
  response = c(
    "vi_numeric",
    "vi_binomial"
  ),
  predictors = vi_predictors_numeric[1:10],
  quiet = TRUE
)

#polynomial formulas
formulas_poly <- model_formula(
  predictors = selection,
  term_f = "poly",
  term_args = "degree = 3, raw = TRUE"
)

formulas_poly
#> $vi_numeric
#> vi_numeric ~ poly(swi_mean, degree = 3, raw = TRUE) + poly(soil_temperature_max, 
#>     degree = 3, raw = TRUE) + poly(swi_range, degree = 3, raw = TRUE) + 
#>     poly(topo_diversity, degree = 3, raw = TRUE) + poly(soil_temperature_min, 
#>     degree = 3, raw = TRUE) + poly(topo_slope, degree = 3, raw = TRUE) + 
#>     poly(topo_elevation, degree = 3, raw = TRUE)
#> <environment: 0x560c9f389600>
#> 
#> $vi_binomial
#> vi_binomial ~ poly(swi_mean, degree = 3, raw = TRUE) + poly(soil_temperature_max, 
#>     degree = 3, raw = TRUE) + poly(swi_range, degree = 3, raw = TRUE) + 
#>     poly(soil_temperature_mean, degree = 3, raw = TRUE) + poly(topo_elevation, 
#>     degree = 3, raw = TRUE) + poly(topo_diversity, degree = 3, 
#>     raw = TRUE) + poly(topo_slope, degree = 3, raw = TRUE)
#> <environment: 0x560c9f389600>
#> 

#gam formulas
formulas_gam <- model_formula(
  predictors = selection,
  term_f = "s"
)

formulas_gam
#> $vi_numeric
#> vi_numeric ~ s(swi_mean) + s(soil_temperature_max) + s(swi_range) + 
#>     s(topo_diversity) + s(soil_temperature_min) + s(topo_slope) + 
#>     s(topo_elevation)
#> <environment: 0x560c9f25fc38>
#> 
#> $vi_binomial
#> vi_binomial ~ s(swi_mean) + s(soil_temperature_max) + s(swi_range) + 
#>     s(soil_temperature_mean) + s(topo_elevation) + s(topo_diversity) + 
#>     s(topo_slope)
#> <environment: 0x560c9f25fc38>
#> 

#adding a random effect
formulas_random_effect <- model_formula(
  predictors = selection,
  random_effects = "country_name"
)

formulas_random_effect
#> $vi_numeric
#> vi_numeric ~ swi_mean + soil_temperature_max + swi_range + topo_diversity + 
#>     soil_temperature_min + topo_slope + topo_elevation + (1 | 
#>     country_name)
#> <environment: 0x560c9f06ede8>
#> 
#> $vi_binomial
#> vi_binomial ~ swi_mean + soil_temperature_max + swi_range + soil_temperature_mean + 
#>     topo_elevation + topo_diversity + topo_slope + (1 | country_name)
#> <environment: 0x560c9f06ede8>
#>