Skip to contents

Target Encoding Methods

Usage

target_encoding_mean(
  df = NULL,
  response = NULL,
  predictor = NULL,
  encoded_name = NULL,
  smoothing = 0
)

target_encoding_rank(
  df = NULL,
  response = NULL,
  predictor = NULL,
  encoded_name = NULL,
  smoothing = 0
)

target_encoding_loo(
  df = NULL,
  response = NULL,
  predictor = NULL,
  encoded_name = NULL,
  smoothing = 0
)

Arguments

df

(required; data frame, tibble, or sf) A data frame with responses and predictors. Default: NULL.

response

(optional, character string) Name of a numeric response variable in df. Default: NULL.

predictor

(required; string) Name of the categorical predictor to encode. Default: NULL

encoded_name

(required, string) Name of the encoded predictor. Default: NULL

smoothing

(optional; integer) Groups smaller than this number have their means pulled towards the mean of the response across all cases. Ignored by target_encoding_rank() and target_encoding_loo(). Default: 0

Value

data frame

See also

Other target_encoding: target_encoding_lab()

Other target_encoding: target_encoding_lab()

Examples


data(vi)

#subset to limit example run time
vi <- vi[1:1000, ]

#mean encoding
#-------------

#without noise
df <- target_encoding_mean(
  df = vi,
  response = "vi_numeric",
  predictor = "soil_type",
  encoded_name = "soil_type_encoded"
)

plot(
  x = df$soil_type_encoded,
  y = df$vi_numeric,
  xlab = "encoded variable",
  ylab = "response"
)


#group rank
#----------

df <- target_encoding_rank(
  df = vi,
  response = "vi_numeric",
  predictor = "soil_type",
  encoded_name = "soil_type_encoded"
)

plot(
  x = df$soil_type_encoded,
  y = df$vi_numeric,
  xlab = "encoded variable",
  ylab = "response"
)



#leave-one-out
#-------------

#without noise
df <- target_encoding_loo(
  df = vi,
  response = "vi_numeric",
  predictor = "soil_type",
  encoded_name = "soil_type_encoded"
)

plot(
  x = df$soil_type_encoded,
  y = df$vi_numeric,
  xlab = "encoded variable",
  ylab = "response"
)