
Encode categories as response means
Source:R/target_encoding_loo.R, R/target_encoding_mean.R, R/target_encoding_rank.R
target_encoding_methods.RdEncode categories as response means
Usage
target_encoding_loo(
df = NULL,
response = NULL,
predictor = NULL,
encoded_name = NULL,
smoothing = NULL,
...
)
target_encoding_mean(
df = NULL,
response = NULL,
predictor = NULL,
encoded_name = NULL,
smoothing = 0,
...
)
target_encoding_rank(
df = NULL,
response = NULL,
predictor = NULL,
encoded_name = NULL,
smoothing = NULL,
...
)Arguments
- df
(required; dataframe, tibble, or sf) A dataframe with responses (optional) and predictors. Must have at least 10 rows for pairwise correlation analysis, and
10 * (length(predictors) - 1)for VIF. Default: NULL.- response
(optional, character string) Name of a numeric response variable in
df. Default: NULL.- predictor
(required; string) Name of the categorical predictor to encode. Default: NULL
- encoded_name
(optional, string) Name of the encoded predictor. Default: NULL
- smoothing
(optional; integer) Groups smaller than this number have their means pulled towards the mean of the response across all cases. Ignored by
target_encoding_rank()andtarget_encoding_loo(). Default: 0- ...
(optional) Internal args (e.g.
function_nameforvalidate_arg_function_name, a precomputed correlation matrixm, or cross-validation args forpreference_order).
See also
Other target_encoding:
target_encoding_lab()
Other target_encoding:
target_encoding_lab()
Examples
# loading example data
data(vi_smol)
#mean encoding
#-------------
df <- target_encoding_mean(
df = vi_smol,
response = "vi_numeric",
predictor = "soil_type", #categorical
encoded_name = "soil_type_encoded"
)
if(interactive()){
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)
}
#rank encoding
#----------
df <- target_encoding_rank(
df = vi_smol,
response = "vi_numeric",
predictor = "soil_type",
encoded_name = "soil_type_encoded"
)
if(interactive()){
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)
}
#leave-one-out
#-------------
df <- target_encoding_loo(
df = vi_smol,
response = "vi_numeric",
predictor = "soil_type",
encoded_name = "soil_type_encoded"
)
if(interactive()){
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)
}