Skip to content

Commit

Permalink
Create dhfr-parallel-speed-up.R
Browse files Browse the repository at this point in the history
  • Loading branch information
dataprofessor authored Jan 6, 2020
1 parent 550bff5 commit 156ff25
Showing 1 changed file with 92 additions and 0 deletions.
92 changes: 92 additions & 0 deletions dhfr/dhfr-parallel-speed-up.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
####################################
# Data Professor #
# http://youtube.com/dataprofessor #
# http://github.com/dataprofessor #
####################################

# Importing libraries
library(datasets) # Contains several data sets
library(caret) # Package for machine learning algorithms / CARET stands for Classification And REgression Training

# Importing the dhfr data set
data(dhfr)

# Check to see if there are missing data?
sum(is.na(dhfr))

# To achieve reproducible model; set the random seed number
set.seed(100)

# Performs stratified random split of the data set
TrainingIndex <- createDataPartition(dhfr$Y, p=0.8, list = FALSE)
TrainingSet <- dhfr[TrainingIndex,] # Training Set
TestingSet <- dhfr[-TrainingIndex,] # Test Set



###############################
# SVM model (polynomial kernel)


# Run normally without parallel processing
start.time <- proc.time()
Model <- train(Y ~ .,
data = TrainingSet, # Build model using training set
method = "svmPoly" # Learning algorithm
)
stop.time <- proc.time()
run.time <- stop.time - start.time
print(run.time)

# Method 1 - Use doMC


library(doMC)
registerDoMC(2)
start.time <- proc.time()
Model <- train(Y ~ .,
data = TrainingSet, # Build model using training set
method = "svmPoly" # Learning algorithm
)
stop.time <- proc.time()
run.time <- stop.time - start.time
print(run.time)


# Method 2 - Use doParallel
# https://topepo.github.io/caret/parallel-processing.html

library(doParallel)

cl <- makePSOCKcluster(5)
registerDoParallel(cl)

## All subsequent models are then run in parallel
start.time <- proc.time()
Model <- train(Y ~ .,
data = TrainingSet, # Build model using training set
method = "svmPoly" # Learning algorithm
)
stop.time <- proc.time()
run.time <- stop.time - start.time
print(run.time)

## When you are done:
stopCluster(cl)




##########################
# Apply model for prediction
Model.training <-predict(Model, TrainingSet) # Apply model to make prediction on Training set

# Model performance (Displays confusion matrix and statistics)
Model.training.confusion <-confusionMatrix(Model.training, TrainingSet$Y)

print(Model.training.confusion)

# Feature importance
Importance <- varImp(Model)
plot(Importance, top = 25)
plot(Importance, col = "red")

0 comments on commit 156ff25

Please sign in to comment.