Create dhfr-parallel-speed-up.R

ayari52000 · Jan 6, 2020 · 156ff25 · 156ff25
1 parent 550bff5
commit 156ff25
Showing 1 changed file with 92 additions and 0 deletions.
diff --git a/dhfr/dhfr-parallel-speed-up.R b/dhfr/dhfr-parallel-speed-up.R
@@ -0,0 +1,92 @@
+####################################
+# Data Professor                   #
+# http://youtube.com/dataprofessor #
+# http://github.com/dataprofessor  #
+####################################
+
+# Importing libraries
+library(datasets) # Contains several data sets
+library(caret) # Package for machine learning algorithms / CARET stands for Classification And REgression Training
+
+# Importing the dhfr data set
+data(dhfr)
+
+# Check to see if there are missing data?
+sum(is.na(dhfr))
+
+# To achieve reproducible model; set the random seed number
+set.seed(100)
+
+# Performs stratified random split of the data set
+TrainingIndex <- createDataPartition(dhfr$Y, p=0.8, list = FALSE)
+TrainingSet <- dhfr[TrainingIndex,] # Training Set
+TestingSet <- dhfr[-TrainingIndex,] # Test Set
+
+
+
+###############################
+# SVM model (polynomial kernel)
+
+
+# Run normally without parallel processing
+start.time <- proc.time()
+Model <- train(Y ~ ., 
+               data = TrainingSet, # Build model using training set
+               method = "svmPoly" # Learning algorithm
+         )
+stop.time <- proc.time()
+run.time <- stop.time - start.time
+print(run.time)
+
+# Method 1 - Use doMC
+
+
+library(doMC)
+registerDoMC(2)
+start.time <- proc.time()
+Model <- train(Y ~ ., 
+               data = TrainingSet, # Build model using training set
+               method = "svmPoly" # Learning algorithm
+         )
+stop.time <- proc.time()
+run.time <- stop.time - start.time
+print(run.time)
+
+
+# Method 2 - Use doParallel
+# https://topepo.github.io/caret/parallel-processing.html
+
+library(doParallel)
+
+cl <- makePSOCKcluster(5)
+registerDoParallel(cl)
+
+## All subsequent models are then run in parallel
+start.time <- proc.time()
+Model <- train(Y ~ ., 
+               data = TrainingSet, # Build model using training set
+               method = "svmPoly" # Learning algorithm
+         )
+stop.time <- proc.time()
+run.time <- stop.time - start.time
+print(run.time)
+
+## When you are done:
+stopCluster(cl)
+
+
+
+
+##########################
+# Apply model for prediction
+Model.training <-predict(Model, TrainingSet) # Apply model to make prediction on Training set
+
+# Model performance (Displays confusion matrix and statistics)
+Model.training.confusion <-confusionMatrix(Model.training, TrainingSet$Y)
+
+print(Model.training.confusion)
+
+# Feature importance
+Importance <- varImp(Model)
+plot(Importance, top = 25)
+plot(Importance, col = "red")