In this lab, we will cover some R packages to train neural networks. We use the same datasets as in previous lab, Boston Housing data and Credit Scoring data.
# load Boston data
library(MASS)
library(tidyverse)
data(Boston)
maxs <- apply(Boston, 2, max)
mins <- apply(Boston, 2, min)
Boston.scaled <- as.data.frame(scale(Boston, center = mins, scale = maxs - mins))
index <- sample(nrow(Boston),nrow(Boston)*0.70)
boston.train <- Boston.scaled[index,]
boston.test <- Boston.scaled[-index,]
# load credit card data
credit.data <- read.csv("https://www.dropbox.com/s/tnoo06n8m842uit/credit_card_default.csv?dl=1")
credit.data<- rename(credit.data, default=default.payment.next.month)
# convert categorical data to factor
credit.data$SEX<- as.factor(credit.data$SEX)
credit.data$EDUCATION<- as.factor(credit.data$EDUCATION)
credit.data$MARRIAGE<- as.factor(credit.data$MARRIAGE)
# random splitting
index <- sample(nrow(credit.data),nrow(credit.data)*0.80)
credit.train = credit.data[index,]
credit.test = credit.data[-index,]
# load MNIST data
digit<- data.matrix(read_csv("https://www.dropbox.com/s/ulujvi2a4ykfzju/train.csv?dl=1"))
# random splitting
index<- sample(1:nrow(digit), 0.6*nrow(digit))
train<- digit[index,]
test<- digit[-index,]
# standardize X
train.x <- train[,-1] #remove 'label' column
test.x<- test[,-1]
train.y <- train[,1] #label column
test.y<- test[,1]
train.x <- train.x/255
test.x <- test.x/255
neuralnet
The R package neuralnet
is available to fit simple (less
hidden layers) artifical neural network models. One good thing about
this package is that user can visualize the fitted neural networks as
well as the weights.
library(neuralnet)
We use our familiar Boston housing data for demonstration. Note, it is recommended that the data is standardized including response variable (to [0,1]), and we have done this when we load the data.
# fit neural network with one hidden layer
boston.ann1<- neuralnet(medv~., data = boston.train, hidden = 5, linear.output = TRUE)
#Plot the neural network
plot(boston.ann1)
Prediction on testing sample.
boston.pred1<- compute(boston.ann1, boston.test)
boston.pred1<- boston.pred1$net.result
## scale back
boston.pred1 <- boston.pred1*(maxs[14] - mins[14])+mins[14]
y.test <- boston.test$medv*(maxs[14] - mins[14])+mins[14]
mean((y.test-boston.pred1)^2)
## [1] 21.19456
Let’s add another hidden layer.
# fit neural network with one hidden layer
boston.ann2<- neuralnet(medv~., data = boston.train, hidden = c(5,3), linear.output = TRUE)
#Plot the neural network
plot(boston.ann2)
Prediction on testing sample.
boston.pred2<- compute(boston.ann2, boston.test)
boston.pred2<- boston.pred2$net.result
## scale back
boston.pred2 <- boston.pred2*(maxs[14] - mins[14])+mins[14]
mean((y.test-boston.pred2)^2)
## [1] 19.0763
boston.reg<- lm(medv~., data = boston.train)
boston.reg.pred<- predict(boston.reg, newdata = boston.test)
## scale back
boston.reg.pred <- boston.reg.pred*(maxs[14] - mins[14])+mins[14]
mean((y.test-boston.reg.pred)^2)
## [1] 20.20717
First standardize the data and create dummy variables for categorical data.
y.ind <- which(names(credit.train)=="default")
credit.train.X<- as.data.frame(scale(model.matrix(~., data = credit.train[,-y.ind])[,-1]))
credit.train.Y<- credit.train[,y.ind]
credit.train1<- data.frame(default= credit.train.Y, credit.train.X)
credit.test1<- as.data.frame(scale(model.matrix(~., data = credit.test[,-y.ind])[,-1]))
# fit neural networks. CAUTION: this is slow
credit.ann<- neuralnet(default~., data = credit.train1, hidden = 3, linear.output = FALSE)
plot(credit.ann)
credit.pred1<- compute(credit.ann, credit.test1)
head(cbind(credit.test$default, credit.pred1$net.result), 10)
## [,1] [,2]
## 4 0 0.16659055
## 8 0 0.25877303
## 27 1 0.37737295
## 28 0 0.16991665
## 30 0 0.21747912
## 34 0 0.08735394
## 36 0 0.13229628
## 40 0 0.13736966
## 41 0 0.36633107
## 48 1 0.08735416
library(ROCR)
pred <- prediction(credit.pred1$net.result, credit.test$default)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize=TRUE)
#Get the AUC
unlist(slot(performance(pred, "auc"), "y.values"))
## [1] 0.7578199
credit.pred01.ann<- (credit.pred1$net.result>mean(credit.train$default))*1
#Confusion matrix
table(credit.test$default, credit.pred01.ann, dnn=c("Truth","Predicted"))
## Predicted
## Truth 0 1
## 0 3375 1306
## 1 423 896
# MR, FPR, FNR
c(mean(credit.test$default != credit.pred01.ann),
sum(credit.test$default ==0 & credit.pred01.ann==1)/sum(credit.test$default ==0),
sum(credit.test$default ==1 & credit.pred01.ann==0)/sum(credit.test$default ==1)
)
## [1] 0.2881667 0.2790002 0.3206975
#Fit logistic regression model
credit.glm<- glm(default~., data = credit.train, family=binomial)
#Get prediction
credit.pred.glm<- predict(credit.glm, credit.test, type="response")
# obtain ROC curve
pred <- prediction(credit.pred.glm, credit.test$default)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize=TRUE)
#Get the AUC
unlist(slot(performance(pred, "auc"), "y.values"))
## [1] 0.7367672
credit.pred01.glm<- (credit.pred.glm>mean(credit.train$default))*1
#Confusion matrix
table(credit.test$default, credit.pred01.glm, dnn=c("Truth","Predicted"))
## Predicted
## Truth 0 1
## 0 3215 1466
## 1 446 873
# MR, FPR, FNR
c(mean(credit.test$default != credit.pred01.glm),
sum(credit.test$default ==0 & credit.pred01.glm==1)/sum(credit.test$default ==0),
sum(credit.test$default ==1 & credit.pred01.glm==0)/sum(credit.test$default ==1)
)
## [1] 0.3186667 0.3131809 0.3381350
MXNet
This part of notes are based on a Kaggle Kernel at here.
For window users, please type the code below to install
mxnet
package. According to the official site,
MXNET is not available for R version>3.5, so this part of code won’t
work if your R version is >3.5.
cran <- getOption("repos")
cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/"
options(repos = cran)
install.packages("mxnet")
library(mxnet)
plotTrain <- function(images){
op <- par(no.readonly=TRUE)
x <- ceiling(sqrt(length(images)))
par(mfrow=c(x, x), mar=c(.1, .1, .1, .1))
for (i in images){ #reverse and transpose each matrix to rotate images
m <- matrix(train[i,-1], nrow=28, byrow=TRUE)
m <- apply(m, 2, rev)
image(t(m), col=gray((0:255)/255), axes=FALSE)
text(0.05, 0.2, col="white", cex=1.2, train[i, 1])
}
par(op) #reset the original graphics parameters
}
plotTrain(1:36)
We need to specify each layer with number of neurons and the activation functions. We specify the activation functions in first two layers as “ReLU” (Rectified Linear Unit), while the activation function on the output layer to be “softmax” because we have multiple classes.
For more information, I recommend checking out the MXNet tutorial page.
m1.data <- mx.symbol.Variable("data") # Notice how each layer is passed to the next
m1.fc1 <- mx.symbol.FullyConnected(m1.data, name="fc1", num_hidden=128)
m1.act1 <- mx.symbol.Activation(m1.fc1, name="activation1", act_type="relu")
m1.fc2 <- mx.symbol.FullyConnected(m1.act1, name="fc2", num_hidden=64)
m1.act2 <- mx.symbol.Activation(m1.fc2, name="activation2", act_type="relu")
m1.fc3 <- mx.symbol.FullyConnected(m1.act2, name="fc3", num_hidden=10)
m1.softmax <- mx.symbol.SoftmaxOutput(m1.fc3, name="softMax")
MXNet can make use of the Graphviz package to create an interactive plot of the structure of the network. This one is nothing fancy, but they can be quite useful if your network is complex.
graph.viz(m1.softmax)
By default, it trains the model using CPU, but it is really fast. If GPU is used, it would be 50+ times faster! However, you have to install the GPU version of the package yourself.
devices <- mx.cpu() # decide which device to use
#log <- mx.metric.logger$new() #to keep track of the results each iterration
tick <- proc.time() #mark the start time
mx.set.seed(0)
mxnet1 <- mx.model.FeedForward.create(m1.softmax, #the network configuration made above
X = t(train.x), #the predictors (need transpose)
y = train.y, #the labels
ctx = devices,
num.round = 20, # number of training iterations
array.batch.size = 100,
array.layout="colmajor",
learning.rate = 0.01,
momentum = 0.95,
eval.metric = mx.metric.accuracy,
initializer = mx.init.uniform(0.07),
epoch.end.callback = mx.callback.log.train.metric(100)
)
print(paste("Training took:", round((proc.time() - tick)[3],2),"seconds"))
preds <- predict(mxnet1, test.x)
dim(preds)
pred.label <- max.col(t(preds)) - 1
table(pred.label)
mean(pred.label==test.y)