| Title: | Encoders for Categorical Variables |
|---|---|
| Description: | Contains some commonly used categorical variable encoders, such as 'LabelEncoder' and 'OneHotEncoder'. Inspired by the encoders implemented in Python 'sklearn.preprocessing' package (see <http://scikit-learn.org/stable/modules/preprocessing.html>). |
| Authors: | nl zhang |
| Maintainer: | nl zhang <[email protected]> |
| License: | GPL-2 | GPL-3 |
| Version: | 0.1.1 |
| Built: | 2026-05-31 09:53:41 UTC |
| Source: | https://github.com/cran/CatEncoders |
inverse.transform transforms an integer vector back to the original vector
inverse.transform(enc, z) ## S4 method for signature 'LabelEncoder,numeric' inverse.transform(enc, z)inverse.transform(enc, z) ## S4 method for signature 'LabelEncoder,numeric' inverse.transform(enc, z)
enc |
A fitted LabelEncoder |
z |
A vector of integers |
A vector of characters, factors or numerics.
# character vector y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) inverse.transform(lenc,z) # factor vector y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('a','d',NA,'f'))) inverse.transform(lenc,z) # numeric vector y set.seed(123) y <- c(1:10,NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA newy <- sample(c(1:10,NA),5) print(newy) z <-transform(lenc,newy) inverse.transform(lenc, z)# character vector y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) inverse.transform(lenc,z) # factor vector y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('a','d',NA,'f'))) inverse.transform(lenc,z) # numeric vector y set.seed(123) y <- c(1:10,NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA newy <- sample(c(1:10,NA),5) print(newy) z <-transform(lenc,newy) inverse.transform(lenc, z)
An S4 class to represent a LabelEncoder.
typeA character to denote the input type, either character, factor or numeric
mappingA data.frame to store the mapping table
An S4 class to represent a LabelEncoder with character input.
classesA character vector to store the unique values of classes
An S4 class to represent a LabelEncoder with factor input.
classesA factor vector to store the unique values of classes
LabelEncoder.fit fits a LabelEncoder object
LabelEncoder.fit(y)LabelEncoder.fit(y)
y |
A vector of characters, factors, or numerics, which can include NA as well |
Returns an object of S4 class LabelEncoder.
# factor y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('d','d',NA,'f'))) print(z) # character y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) # numeric y set.seed(123) y <- sample(c(1:10,NA),5) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <-transform(lenc,sample(c(1:10,NA),5)) print(z)# factor y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('d','d',NA,'f'))) print(z) # character y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) # numeric y set.seed(123) y <- sample(c(1:10,NA),5) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <-transform(lenc,sample(c(1:10,NA),5)) print(z)
An S4 class to represent a LabelEncoder with numeric input.
classesA numeric vector to store the unique values of classes
An S4 class to represent a OneHotEncoder
n_columnsAn integer value to store the number of columns of input data
n_valuesA numeric vector to store the number of unique values in each column of input data
column_encodersA list that stores the LabelEncoder for each column of input data
OneHotEncoder.fit fits an OneHotEncoder object
OneHotEncoder.fit(X)OneHotEncoder.fit(X)
X |
A matrix or data.frame, which can include NA |
Returns an object of S4 class OneHotEncoder
# matrix input X1 <- matrix(c(0, 1, 0, 1, 0, 1, 2, 0, 3, 0, 1, 2),c(4,3),byrow=FALSE) oenc <- OneHotEncoder.fit(X1) z <- transform(oenc,X1,sparse=TRUE) # return a sparse matrix print(z) # data.frame X2 <- cbind(data.frame(X1),X4=c('a','b','d',NA),X5=factor(c(1,2,3,1))) oenc <- OneHotEncoder.fit(X2) z <- transform(oenc,X2,sparse=FALSE) # return a dense matrix print(z)# matrix input X1 <- matrix(c(0, 1, 0, 1, 0, 1, 2, 0, 3, 0, 1, 2),c(4,3),byrow=FALSE) oenc <- OneHotEncoder.fit(X1) z <- transform(oenc,X1,sparse=TRUE) # return a sparse matrix print(z) # data.frame X2 <- cbind(data.frame(X1),X4=c('a','b','d',NA),X5=factor(c(1,2,3,1))) oenc <- OneHotEncoder.fit(X2) z <- transform(oenc,X2,sparse=FALSE) # return a dense matrix print(z)
transform transforms a new data set using the fitted encoder
transform(enc, ...) ## S4 method for signature 'LabelEncoder.Numeric' transform(enc, y) ## S4 method for signature 'LabelEncoder.Character' transform(enc, y) ## S4 method for signature 'LabelEncoder.Factor' transform(enc, y) ## S4 method for signature 'OneHotEncoder' transform(enc, X, sparse = TRUE, new.feature.error = TRUE)transform(enc, ...) ## S4 method for signature 'LabelEncoder.Numeric' transform(enc, y) ## S4 method for signature 'LabelEncoder.Character' transform(enc, y) ## S4 method for signature 'LabelEncoder.Factor' transform(enc, y) ## S4 method for signature 'OneHotEncoder' transform(enc, X, sparse = TRUE, new.feature.error = TRUE)
enc |
A fitted encoder, i.e., LabelEncoder or OneHotEncoder |
... |
Additional argument list |
y |
A vector of character, factor or numeric values |
X |
A data.frame or matrix |
sparse |
If TRUE then return a sparse matrix, default = TRUE |
new.feature.error |
If TRUE then throw an error for new feature values; otherwise the new feature values are ignored, default = TRUE |
If enc is an OneHotEncoder, the returned value is a sparse or dense matrix. If enc is a LabelEncoder, the returned value is a vector.
# matrix X X1 <- matrix(c(0, 1, 0, 1, 0, 1, 2, 0, 3, 0, 1, 2),c(4,3),byrow=FALSE) oenc <- OneHotEncoder.fit(X1) z <- transform(oenc,X1,sparse=TRUE) # return a sparse matrix print(z) # data.frame X X2 <- cbind(data.frame(X1),X4=c('a','b','d',NA),X5=factor(c(1,2,3,1))) oenc <- OneHotEncoder.fit(X2) z <- transform(oenc,X2,sparse=FALSE) # return a dense matrix print(z) # factor vector y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('d','d',NA,'f'))) print(z) # character vector y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) # numeric vector y set.seed(123) y <- sample(c(1:10,NA),5) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <-transform(lenc,sample(c(1:10,NA),5)) print(z)# matrix X X1 <- matrix(c(0, 1, 0, 1, 0, 1, 2, 0, 3, 0, 1, 2),c(4,3),byrow=FALSE) oenc <- OneHotEncoder.fit(X1) z <- transform(oenc,X1,sparse=TRUE) # return a sparse matrix print(z) # data.frame X X2 <- cbind(data.frame(X1),X4=c('a','b','d',NA),X5=factor(c(1,2,3,1))) oenc <- OneHotEncoder.fit(X2) z <- transform(oenc,X2,sparse=FALSE) # return a dense matrix print(z) # factor vector y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('d','d',NA,'f'))) print(z) # character vector y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) # numeric vector y set.seed(123) y <- sample(c(1:10,NA),5) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <-transform(lenc,sample(c(1:10,NA),5)) print(z)