R/discretize_transform.R
discretize_transform.Rd
Transform a data.frame using split points returned by discretize function.
discretize_transform(disc, data, dropColumns = NA)
extract_discretize_transformer(disc)
a result of the discretize
function.
a data.frame to transform using cutpoints from disc.
determine
A new data.frame with discretized columns using cutpoints from the result of discretize function.
set.seed(123)
idx <- sort(sample.int(150, 100))
iris1 <- iris[idx, ]
iris2 <- iris[-idx, ]
disc <- discretize(Species ~ ., iris)
head(discretize_transform(disc, iris2))
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 (-Inf,5.55] (3.35, Inf] (-Inf,2.45] (-Inf,0.8] setosa
#> 2 (-Inf,5.55] (2.95,3.35] (-Inf,2.45] (-Inf,0.8] setosa
#> 3 (-Inf,5.55] (2.95,3.35] (-Inf,2.45] (-Inf,0.8] setosa
#> 5 (-Inf,5.55] (3.35, Inf] (-Inf,2.45] (-Inf,0.8] setosa
#> 11 (-Inf,5.55] (3.35, Inf] (-Inf,2.45] (-Inf,0.8] setosa
#> 15 (5.55,6.15] (3.35, Inf] (-Inf,2.45] (-Inf,0.8] setosa
# Chain discretization:
ir1 <- discretize(Species ~ Sepal.Length, iris1)
ir2 <- discretize(Species ~ Sepal.Width, ir1, control = equalsizeControl(3))
ir3 <- discretize(Species ~ Petal.Length, ir2, control = equalsizeControl(5))
## note that Petal.Width is untouched:
head(discretize_transform(ir3, iris2))
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 (5.05,5.55] (3.2, Inf] (-Inf,1.5] 0.2 setosa
#> 2 (-Inf,5.05] (2.9,3.2] (-Inf,1.5] 0.2 setosa
#> 3 (-Inf,5.05] (2.9,3.2] (-Inf,1.5] 0.2 setosa
#> 5 (-Inf,5.05] (3.2, Inf] (-Inf,1.5] 0.2 setosa
#> 11 (5.05,5.55] (3.2, Inf] (-Inf,1.5] 0.2 setosa
#> 15 (5.55, Inf] (3.2, Inf] (-Inf,1.5] 0.2 setosa
## extract_discretize_transformer
discObj <- extract_discretize_transformer(ir3)
head(discretize_transform(discObj, iris2))
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 (5.05,5.55] (3.2, Inf] (-Inf,1.5] 0.2 setosa
#> 2 (-Inf,5.05] (2.9,3.2] (-Inf,1.5] 0.2 setosa
#> 3 (-Inf,5.05] (2.9,3.2] (-Inf,1.5] 0.2 setosa
#> 5 (-Inf,5.05] (3.2, Inf] (-Inf,1.5] 0.2 setosa
#> 11 (5.05,5.55] (3.2, Inf] (-Inf,1.5] 0.2 setosa
#> 15 (5.55, Inf] (3.2, Inf] (-Inf,1.5] 0.2 setosa