In this lab, we will cover some R packages to train neural networks. We use the same datasets as in previous lab, Boston Housing data and Credit Scoring data.

# load Boston data
library(MASS)
library(tidyverse)
data(Boston)
maxs <- apply(Boston, 2, max)  
mins <- apply(Boston, 2, min) 
Boston.scaled <- as.data.frame(scale(Boston, center = mins, scale = maxs - mins))
index <- sample(nrow(Boston),nrow(Boston)*0.70)
boston.train <- Boston.scaled[index,]
boston.test <- Boston.scaled[-index,]

# load credit card data
credit.data <- read.csv("https://www.dropbox.com/s/tnoo06n8m842uit/credit_card_default.csv?dl=1")
credit.data<- rename(credit.data, default=default.payment.next.month)
# convert categorical data to factor
credit.data$SEX<- as.factor(credit.data$SEX)
credit.data$EDUCATION<- as.factor(credit.data$EDUCATION)
credit.data$MARRIAGE<- as.factor(credit.data$MARRIAGE)
# random splitting
index <- sample(nrow(credit.data),nrow(credit.data)*0.80)
credit.train = credit.data[index,]
credit.test = credit.data[-index,]

# load MNIST data
digit<- data.matrix(read_csv("https://www.dropbox.com/s/ulujvi2a4ykfzju/train.csv?dl=1"))
# random splitting
index<- sample(1:nrow(digit), 0.6*nrow(digit))
train<- digit[index,]
test<- digit[-index,]
# standardize X
train.x <- train[,-1] #remove 'label' column
test.x<- test[,-1]
train.y <- train[,1] #label column
test.y<- test[,1]
train.x <- train.x/255
test.x <- test.x/255

Neural network with package neuralnet

The R package neuralnet is available to fit simple (less hidden layers) artifical neural network models. One good thing about this package is that user can visualize the fitted neural networks as well as the weights.

library(neuralnet)

ANN for regression.

We use our familiar Boston housing data for demonstration. Note, it is recommended that the data is standardized including response variable (to [0,1]), and we have done this when we load the data.

# fit neural network with one hidden layer
boston.ann1<- neuralnet(medv~., data = boston.train, hidden = 5, linear.output = TRUE)
#Plot the neural network
plot(boston.ann1)

Prediction on testing sample.

boston.pred1<- compute(boston.ann1, boston.test)
boston.pred1<- boston.pred1$net.result
## scale back
boston.pred1 <- boston.pred1*(maxs[14] - mins[14])+mins[14]
y.test <- boston.test$medv*(maxs[14] - mins[14])+mins[14]
mean((y.test-boston.pred1)^2)
## [1] 21.19456

Let’s add another hidden layer.

# fit neural network with one hidden layer
boston.ann2<- neuralnet(medv~., data = boston.train, hidden = c(5,3), linear.output = TRUE)
#Plot the neural network
plot(boston.ann2)

Prediction on testing sample.

boston.pred2<- compute(boston.ann2, boston.test)
boston.pred2<- boston.pred2$net.result
## scale back
boston.pred2 <- boston.pred2*(maxs[14] - mins[14])+mins[14]
mean((y.test-boston.pred2)^2)
## [1] 19.0763

Comparing with linear regression.

boston.reg<- lm(medv~., data = boston.train)
boston.reg.pred<- predict(boston.reg, newdata = boston.test)
## scale back
boston.reg.pred <- boston.reg.pred*(maxs[14] - mins[14])+mins[14]
mean((y.test-boston.reg.pred)^2)
## [1] 20.20717

How many neurons are good for one hidden layer nn?

Exercise: implement CV to select the number of neurons.

Back to top


ANN for Classification

First standardize the data and create dummy variables for categorical data.

y.ind <- which(names(credit.train)=="default")
credit.train.X<- as.data.frame(scale(model.matrix(~., data = credit.train[,-y.ind])[,-1]))
credit.train.Y<- credit.train[,y.ind]
credit.train1<- data.frame(default= credit.train.Y, credit.train.X)
credit.test1<- as.data.frame(scale(model.matrix(~., data = credit.test[,-y.ind])[,-1]))
# fit neural networks. CAUTION: this is slow
credit.ann<- neuralnet(default~., data = credit.train1, hidden = 3, linear.output = FALSE)
plot(credit.ann)

Prediction on testing sample.

credit.pred1<- compute(credit.ann, credit.test1)
head(cbind(credit.test$default, credit.pred1$net.result), 10)
##    [,1]       [,2]
## 4     0 0.16659055
## 8     0 0.25877303
## 27    1 0.37737295
## 28    0 0.16991665
## 30    0 0.21747912
## 34    0 0.08735394
## 36    0 0.13229628
## 40    0 0.13736966
## 41    0 0.36633107
## 48    1 0.08735416

ROC curve and AUC

library(ROCR)
pred <- prediction(credit.pred1$net.result, credit.test$default)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize=TRUE)

#Get the AUC
unlist(slot(performance(pred, "auc"), "y.values"))
## [1] 0.7578199

Confusion matrix

credit.pred01.ann<- (credit.pred1$net.result>mean(credit.train$default))*1
#Confusion matrix
table(credit.test$default, credit.pred01.ann, dnn=c("Truth","Predicted"))
##      Predicted
## Truth    0    1
##     0 3375 1306
##     1  423  896
# MR, FPR, FNR
c(mean(credit.test$default != credit.pred01.ann),
sum(credit.test$default ==0 & credit.pred01.ann==1)/sum(credit.test$default ==0),
sum(credit.test$default ==1 & credit.pred01.ann==0)/sum(credit.test$default ==1)
)
## [1] 0.2881667 0.2790002 0.3206975

Comparing with logistic model

#Fit logistic regression model
credit.glm<- glm(default~., data = credit.train, family=binomial)
#Get prediction
credit.pred.glm<- predict(credit.glm, credit.test, type="response")
# obtain ROC curve
pred <- prediction(credit.pred.glm, credit.test$default)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize=TRUE)

#Get the AUC
unlist(slot(performance(pred, "auc"), "y.values"))
## [1] 0.7367672

Confusion matrix

credit.pred01.glm<- (credit.pred.glm>mean(credit.train$default))*1
#Confusion matrix
table(credit.test$default, credit.pred01.glm, dnn=c("Truth","Predicted"))
##      Predicted
## Truth    0    1
##     0 3215 1466
##     1  446  873
# MR, FPR, FNR
c(mean(credit.test$default != credit.pred01.glm),
sum(credit.test$default ==0 & credit.pred01.glm==1)/sum(credit.test$default ==0),
sum(credit.test$default ==1 & credit.pred01.glm==0)/sum(credit.test$default ==1)
)
## [1] 0.3186667 0.3131809 0.3381350

Back to top


Deep learning with MXNet

This part of notes are based on a Kaggle Kernel at here.

For window users, please type the code below to install mxnet package. According to the official site, MXNET is not available for R version>3.5, so this part of code won’t work if your R version is >3.5.

cran <- getOption("repos")
cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/"
options(repos = cran)
install.packages("mxnet")
library(mxnet)

Visualize the data

plotTrain <- function(images){
  op <- par(no.readonly=TRUE)
  x <- ceiling(sqrt(length(images)))
  par(mfrow=c(x, x), mar=c(.1, .1, .1, .1))
  
  for (i in images){ #reverse and transpose each matrix to rotate images
    m <- matrix(train[i,-1], nrow=28, byrow=TRUE)
    m <- apply(m, 2, rev)
    image(t(m), col=gray((0:255)/255), axes=FALSE)
    text(0.05, 0.2, col="white", cex=1.2, train[i, 1])
  }
  par(op) #reset the original graphics parameters
}

plotTrain(1:36)

Building A Simple Neural Network

We need to specify each layer with number of neurons and the activation functions. We specify the activation functions in first two layers as “ReLU” (Rectified Linear Unit), while the activation function on the output layer to be “softmax” because we have multiple classes.

For more information, I recommend checking out the MXNet tutorial page.

m1.data <- mx.symbol.Variable("data") # Notice how each layer is passed to the next 

m1.fc1 <- mx.symbol.FullyConnected(m1.data, name="fc1", num_hidden=128)
m1.act1 <- mx.symbol.Activation(m1.fc1, name="activation1", act_type="relu")

m1.fc2 <- mx.symbol.FullyConnected(m1.act1, name="fc2", num_hidden=64)
m1.act2 <- mx.symbol.Activation(m1.fc2, name="activation2", act_type="relu")

m1.fc3 <- mx.symbol.FullyConnected(m1.act2, name="fc3", num_hidden=10)
m1.softmax <- mx.symbol.SoftmaxOutput(m1.fc3, name="softMax")

MXNet can make use of the Graphviz package to create an interactive plot of the structure of the network. This one is nothing fancy, but they can be quite useful if your network is complex.

graph.viz(m1.softmax)

Train the Neural Network

By default, it trains the model using CPU, but it is really fast. If GPU is used, it would be 50+ times faster! However, you have to install the GPU version of the package yourself.

devices <- mx.cpu()  # decide which device to use
#log <- mx.metric.logger$new() #to keep track of the results each iterration
tick <- proc.time() #mark the start time
mx.set.seed(0)

mxnet1 <- mx.model.FeedForward.create(m1.softmax,  #the network configuration made above
                                     X = t(train.x), #the predictors (need transpose)
                                     y = train.y, #the labels
                                     ctx = devices,
                                     num.round = 20, # number of training iterations
                                     array.batch.size = 100,
                                     array.layout="colmajor",
                                     learning.rate = 0.01,
                                     momentum = 0.95,
                                     eval.metric = mx.metric.accuracy,
                                     initializer = mx.init.uniform(0.07),
                                     epoch.end.callback = mx.callback.log.train.metric(100)
)


print(paste("Training took:", round((proc.time() - tick)[3],2),"seconds"))

Prediction

preds <- predict(mxnet1, test.x)
dim(preds)
pred.label <- max.col(t(preds)) - 1
table(pred.label)
mean(pred.label==test.y)

Back to top