forked from dataprofessor/code
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
550bff5
commit 156ff25
Showing
1 changed file
with
92 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#################################### | ||
# Data Professor # | ||
# http://youtube.com/dataprofessor # | ||
# http://github.com/dataprofessor # | ||
#################################### | ||
|
||
# Importing libraries | ||
library(datasets) # Contains several data sets | ||
library(caret) # Package for machine learning algorithms / CARET stands for Classification And REgression Training | ||
|
||
# Importing the dhfr data set | ||
data(dhfr) | ||
|
||
# Check to see if there are missing data? | ||
sum(is.na(dhfr)) | ||
|
||
# To achieve reproducible model; set the random seed number | ||
set.seed(100) | ||
|
||
# Performs stratified random split of the data set | ||
TrainingIndex <- createDataPartition(dhfr$Y, p=0.8, list = FALSE) | ||
TrainingSet <- dhfr[TrainingIndex,] # Training Set | ||
TestingSet <- dhfr[-TrainingIndex,] # Test Set | ||
|
||
|
||
|
||
############################### | ||
# SVM model (polynomial kernel) | ||
|
||
|
||
# Run normally without parallel processing | ||
start.time <- proc.time() | ||
Model <- train(Y ~ ., | ||
data = TrainingSet, # Build model using training set | ||
method = "svmPoly" # Learning algorithm | ||
) | ||
stop.time <- proc.time() | ||
run.time <- stop.time - start.time | ||
print(run.time) | ||
|
||
# Method 1 - Use doMC | ||
|
||
|
||
library(doMC) | ||
registerDoMC(2) | ||
start.time <- proc.time() | ||
Model <- train(Y ~ ., | ||
data = TrainingSet, # Build model using training set | ||
method = "svmPoly" # Learning algorithm | ||
) | ||
stop.time <- proc.time() | ||
run.time <- stop.time - start.time | ||
print(run.time) | ||
|
||
|
||
# Method 2 - Use doParallel | ||
# https://topepo.github.io/caret/parallel-processing.html | ||
|
||
library(doParallel) | ||
|
||
cl <- makePSOCKcluster(5) | ||
registerDoParallel(cl) | ||
|
||
## All subsequent models are then run in parallel | ||
start.time <- proc.time() | ||
Model <- train(Y ~ ., | ||
data = TrainingSet, # Build model using training set | ||
method = "svmPoly" # Learning algorithm | ||
) | ||
stop.time <- proc.time() | ||
run.time <- stop.time - start.time | ||
print(run.time) | ||
|
||
## When you are done: | ||
stopCluster(cl) | ||
|
||
|
||
|
||
|
||
########################## | ||
# Apply model for prediction | ||
Model.training <-predict(Model, TrainingSet) # Apply model to make prediction on Training set | ||
|
||
# Model performance (Displays confusion matrix and statistics) | ||
Model.training.confusion <-confusionMatrix(Model.training, TrainingSet$Y) | ||
|
||
print(Model.training.confusion) | ||
|
||
# Feature importance | ||
Importance <- varImp(Model) | ||
plot(Importance, top = 25) | ||
plot(Importance, col = "red") |