Title: | Encoders for Categorical Variables |
---|---|
Description: | Contains some commonly used categorical variable encoders, such as 'LabelEncoder' and 'OneHotEncoder'. Inspired by the encoders implemented in Python 'sklearn.preprocessing' package (see <http://scikit-learn.org/stable/modules/preprocessing.html>). |
Authors: | nl zhang |
Maintainer: | nl zhang <[email protected]> |
License: | GPL-2 | GPL-3 |
Version: | 0.1.1 |
Built: | 2024-11-09 04:06:51 UTC |
Source: | https://github.com/cran/CatEncoders |
inverse.transform transforms an integer vector back to the original vector
inverse.transform(enc, z) ## S4 method for signature 'LabelEncoder,numeric' inverse.transform(enc, z)
inverse.transform(enc, z) ## S4 method for signature 'LabelEncoder,numeric' inverse.transform(enc, z)
enc |
A fitted LabelEncoder |
z |
A vector of integers |
A vector of characters, factors or numerics.
# character vector y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) inverse.transform(lenc,z) # factor vector y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('a','d',NA,'f'))) inverse.transform(lenc,z) # numeric vector y set.seed(123) y <- c(1:10,NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA newy <- sample(c(1:10,NA),5) print(newy) z <-transform(lenc,newy) inverse.transform(lenc, z)
# character vector y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) inverse.transform(lenc,z) # factor vector y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('a','d',NA,'f'))) inverse.transform(lenc,z) # numeric vector y set.seed(123) y <- c(1:10,NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA newy <- sample(c(1:10,NA),5) print(newy) z <-transform(lenc,newy) inverse.transform(lenc, z)
An S4 class to represent a LabelEncoder.
type
A character to denote the input type, either character, factor or numeric
mapping
A data.frame to store the mapping table
An S4 class to represent a LabelEncoder with character input.
classes
A character vector to store the unique values of classes
An S4 class to represent a LabelEncoder with factor input.
classes
A factor vector to store the unique values of classes
LabelEncoder.fit fits a LabelEncoder object
LabelEncoder.fit(y)
LabelEncoder.fit(y)
y |
A vector of characters, factors, or numerics, which can include NA as well |
Returns an object of S4 class LabelEncoder.
# factor y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('d','d',NA,'f'))) print(z) # character y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) # numeric y set.seed(123) y <- sample(c(1:10,NA),5) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <-transform(lenc,sample(c(1:10,NA),5)) print(z)
# factor y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('d','d',NA,'f'))) print(z) # character y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) # numeric y set.seed(123) y <- sample(c(1:10,NA),5) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <-transform(lenc,sample(c(1:10,NA),5)) print(z)
An S4 class to represent a LabelEncoder with numeric input.
classes
A numeric vector to store the unique values of classes
An S4 class to represent a OneHotEncoder
n_columns
An integer value to store the number of columns of input data
n_values
A numeric vector to store the number of unique values in each column of input data
column_encoders
A list that stores the LabelEncoder for each column of input data
OneHotEncoder.fit fits an OneHotEncoder object
OneHotEncoder.fit(X)
OneHotEncoder.fit(X)
X |
A matrix or data.frame, which can include NA |
Returns an object of S4 class OneHotEncoder
# matrix input X1 <- matrix(c(0, 1, 0, 1, 0, 1, 2, 0, 3, 0, 1, 2),c(4,3),byrow=FALSE) oenc <- OneHotEncoder.fit(X1) z <- transform(oenc,X1,sparse=TRUE) # return a sparse matrix print(z) # data.frame X2 <- cbind(data.frame(X1),X4=c('a','b','d',NA),X5=factor(c(1,2,3,1))) oenc <- OneHotEncoder.fit(X2) z <- transform(oenc,X2,sparse=FALSE) # return a dense matrix print(z)
# matrix input X1 <- matrix(c(0, 1, 0, 1, 0, 1, 2, 0, 3, 0, 1, 2),c(4,3),byrow=FALSE) oenc <- OneHotEncoder.fit(X1) z <- transform(oenc,X1,sparse=TRUE) # return a sparse matrix print(z) # data.frame X2 <- cbind(data.frame(X1),X4=c('a','b','d',NA),X5=factor(c(1,2,3,1))) oenc <- OneHotEncoder.fit(X2) z <- transform(oenc,X2,sparse=FALSE) # return a dense matrix print(z)
transform transforms a new data set using the fitted encoder
transform(enc, ...) ## S4 method for signature 'LabelEncoder.Numeric' transform(enc, y) ## S4 method for signature 'LabelEncoder.Character' transform(enc, y) ## S4 method for signature 'LabelEncoder.Factor' transform(enc, y) ## S4 method for signature 'OneHotEncoder' transform(enc, X, sparse = TRUE, new.feature.error = TRUE)
transform(enc, ...) ## S4 method for signature 'LabelEncoder.Numeric' transform(enc, y) ## S4 method for signature 'LabelEncoder.Character' transform(enc, y) ## S4 method for signature 'LabelEncoder.Factor' transform(enc, y) ## S4 method for signature 'OneHotEncoder' transform(enc, X, sparse = TRUE, new.feature.error = TRUE)
enc |
A fitted encoder, i.e., LabelEncoder or OneHotEncoder |
... |
Additional argument list |
y |
A vector of character, factor or numeric values |
X |
A data.frame or matrix |
sparse |
If TRUE then return a sparse matrix, default = TRUE |
new.feature.error |
If TRUE then throw an error for new feature values; otherwise the new feature values are ignored, default = TRUE |
If enc is an OneHotEncoder, the returned value is a sparse or dense matrix. If enc is a LabelEncoder, the returned value is a vector.
# matrix X X1 <- matrix(c(0, 1, 0, 1, 0, 1, 2, 0, 3, 0, 1, 2),c(4,3),byrow=FALSE) oenc <- OneHotEncoder.fit(X1) z <- transform(oenc,X1,sparse=TRUE) # return a sparse matrix print(z) # data.frame X X2 <- cbind(data.frame(X1),X4=c('a','b','d',NA),X5=factor(c(1,2,3,1))) oenc <- OneHotEncoder.fit(X2) z <- transform(oenc,X2,sparse=FALSE) # return a dense matrix print(z) # factor vector y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('d','d',NA,'f'))) print(z) # character vector y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) # numeric vector y set.seed(123) y <- sample(c(1:10,NA),5) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <-transform(lenc,sample(c(1:10,NA),5)) print(z)
# matrix X X1 <- matrix(c(0, 1, 0, 1, 0, 1, 2, 0, 3, 0, 1, 2),c(4,3),byrow=FALSE) oenc <- OneHotEncoder.fit(X1) z <- transform(oenc,X1,sparse=TRUE) # return a sparse matrix print(z) # data.frame X X2 <- cbind(data.frame(X1),X4=c('a','b','d',NA),X5=factor(c(1,2,3,1))) oenc <- OneHotEncoder.fit(X2) z <- transform(oenc,X2,sparse=FALSE) # return a dense matrix print(z) # factor vector y y <- factor(c('a','d','e',NA),exclude=NULL) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,factor(c('d','d',NA,'f'))) print(z) # character vector y y <- c('a','d','e',NA) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <- transform(lenc,c('d','d',NA,'f')) print(z) # numeric vector y set.seed(123) y <- sample(c(1:10,NA),5) lenc <- LabelEncoder.fit(y) # new values are transformed to NA z <-transform(lenc,sample(c(1:10,NA),5)) print(z)