Transform a data.frame using split points returned by discretize function.

discretize_transform(disc, data, dropColumns = NA)

extract_discretize_transformer(disc)

Arguments

disc

a result of the discretize function.

data

a data.frame to transform using cutpoints from disc.

dropColumns

determine

Value

A new data.frame with discretized columns using cutpoints from the result of discretize function.

Examples


set.seed(123)
idx <- sort(sample.int(150, 100))
iris1 <- iris[idx, ]
iris2 <- iris[-idx, ]
disc <- discretize(Species ~ ., iris)
head(discretize_transform(disc, iris2))
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1   (-Inf,5.55] (3.35, Inf]  (-Inf,2.45]  (-Inf,0.8]  setosa
#> 2   (-Inf,5.55] (2.95,3.35]  (-Inf,2.45]  (-Inf,0.8]  setosa
#> 3   (-Inf,5.55] (2.95,3.35]  (-Inf,2.45]  (-Inf,0.8]  setosa
#> 5   (-Inf,5.55] (3.35, Inf]  (-Inf,2.45]  (-Inf,0.8]  setosa
#> 11  (-Inf,5.55] (3.35, Inf]  (-Inf,2.45]  (-Inf,0.8]  setosa
#> 15  (5.55,6.15] (3.35, Inf]  (-Inf,2.45]  (-Inf,0.8]  setosa

# Chain discretization:
ir1 <- discretize(Species ~ Sepal.Length, iris1)
ir2 <- discretize(Species ~ Sepal.Width, ir1, control = equalsizeControl(3))
ir3 <- discretize(Species ~ Petal.Length, ir2, control = equalsizeControl(5))

## note that Petal.Width is untouched:
head(discretize_transform(ir3, iris2))
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1   (5.05,5.55]  (3.2, Inf]   (-Inf,1.5]         0.2  setosa
#> 2   (-Inf,5.05]   (2.9,3.2]   (-Inf,1.5]         0.2  setosa
#> 3   (-Inf,5.05]   (2.9,3.2]   (-Inf,1.5]         0.2  setosa
#> 5   (-Inf,5.05]  (3.2, Inf]   (-Inf,1.5]         0.2  setosa
#> 11  (5.05,5.55]  (3.2, Inf]   (-Inf,1.5]         0.2  setosa
#> 15  (5.55, Inf]  (3.2, Inf]   (-Inf,1.5]         0.2  setosa

## extract_discretize_transformer
discObj <- extract_discretize_transformer(ir3)
head(discretize_transform(discObj, iris2))
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1   (5.05,5.55]  (3.2, Inf]   (-Inf,1.5]         0.2  setosa
#> 2   (-Inf,5.05]   (2.9,3.2]   (-Inf,1.5]         0.2  setosa
#> 3   (-Inf,5.05]   (2.9,3.2]   (-Inf,1.5]         0.2  setosa
#> 5   (-Inf,5.05]  (3.2, Inf]   (-Inf,1.5]         0.2  setosa
#> 11  (5.05,5.55]  (3.2, Inf]   (-Inf,1.5]         0.2  setosa
#> 15  (5.55, Inf]  (3.2, Inf]   (-Inf,1.5]         0.2  setosa