Target Encoding Methods
Usage
target_encoding_mean(
df = NULL,
response = NULL,
predictor = NULL,
encoded_name = NULL,
smoothing = 0
)
target_encoding_rank(
df = NULL,
response = NULL,
predictor = NULL,
encoded_name = NULL,
smoothing = 0
)
target_encoding_loo(
df = NULL,
response = NULL,
predictor = NULL,
encoded_name = NULL,
smoothing = 0
)
Arguments
- df
(required; data frame, tibble, or sf) A data frame with responses and predictors. Default: NULL.
- response
(optional, character string) Name of a numeric response variable in
df
. Default: NULL.- predictor
(required; string) Name of the categorical predictor to encode. Default: NULL
- encoded_name
(required, string) Name of the encoded predictor. Default: NULL
- smoothing
(optional; integer) Groups smaller than this number have their means pulled towards the mean of the response across all cases. Ignored by
target_encoding_rank()
andtarget_encoding_loo()
. Default: 0
See also
Other target_encoding:
target_encoding_lab()
Other target_encoding:
target_encoding_lab()
Examples
data(vi)
#subset to limit example run time
vi <- vi[1:1000, ]
#mean encoding
#-------------
#without noise
df <- target_encoding_mean(
df = vi,
response = "vi_numeric",
predictor = "soil_type",
encoded_name = "soil_type_encoded"
)
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)
#group rank
#----------
df <- target_encoding_rank(
df = vi,
response = "vi_numeric",
predictor = "soil_type",
encoded_name = "soil_type_encoded"
)
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)
#leave-one-out
#-------------
#without noise
df <- target_encoding_loo(
df = vi,
response = "vi_numeric",
predictor = "soil_type",
encoded_name = "soil_type_encoded"
)
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)