# Define a function to obtain the cutoff probability
# @perf is a S4 object gotten from @performance function
# @threshold is the targeted fpr
# In the ShinyApp, users can adjust the threshold by themselves and
# obtain different confusion matrix accordingly. Here, we always set
# threshold = 0.5 just for illustration.
get_cutoff_point <- function(perf, threshold)
{
cutoffs <- data.frame(cut=perf@alpha.values[[1]], fpr=perf@x.values[[1]], tpr=perf@y.values[[1]])
cutoffs <- cutoffs[order(cutoffs$tpr, decreasing=TRUE),]
cutoffs <- subset(cutoffs, fpr <= threshold)
if(nrow(cutoffs) == 0){ return(1.0)}
else return(cutoffs[1, 1])
}
# Define a function to draw a confusion matrix plot
# @cm is a confusion matrix obtained from @confusionMatrix function
# @auc is the auc value obtained from @performance function
# @color is the kind of color you want for true positive and true negative areas
# In this function, we also add in accuracy information which calculates the
# overall performance of model
draw_confusion_matrix <- function(cm, auc, color) {
layout(matrix(c(1,1,2)))
par(mar=c(0,0.1,1,0.1))
plot(c(125, 345), c(300, 450), type = "n", xlab="", ylab="", xaxt='n', yaxt='n')
# create the matrix
rect(150, 430, 240, 370, col=color)
text(195, 435, '0', cex=1.2)
rect(250, 430, 340, 370, col='white')
text(295, 435, '1', cex=1.2)
text(125, 370, 'Predicted', cex=1.3, srt=90, font=2)
text(245, 450, 'Actual', cex=1.3, font=2)
rect(150, 305, 240, 365, col='white')
rect(250, 305, 340, 365, col=color)
text(140, 400, '0', cex=1.2, srt=90)
text(140, 335, '1', cex=1.2, srt=90)
# add in the cm results
res <- as.numeric(cm$table)
text(195, 400, res[1], cex=1.6, font=2, col='white')
text(195, 335, res[2], cex=1.6, font=2, col='black')
text(295, 400, res[3], cex=1.6, font=2, col='black')
text(295, 335, res[4], cex=1.6, font=2, col='white')
# add in the specifics
plot(c(0, 100), c(0, 50), type = "n", xlab="", ylab="", main = "", xaxt='n', yaxt='n')
# add in the accuracy information
text(25, 30, "AUC", cex=1.8, font=2)
text(25, 20, round(as.numeric(auc), 3), cex=1.8)
text(75, 30, names(cm$overall[1]), cex=1.8, font=2)
text(75, 20, round(as.numeric(cm$overall[1]), 3), cex=1.8)
}
# draw XGBoosting confusion matrix
auc_xgb <- performance(ROCRpred_xgb, measure = "auc") #obtain auc from @performance
perf_xgb <- performance(ROCRpred_xgb, 'tpr','fpr') #obtain tpr and fpr from @performance
cut <- get_cutoff_point(perf_xgb, 1) #obtain the cutoff probability
pred_values_xgb <- ifelse(pred.xgb > cut,1,0) #classify using cutoff probability
cm_xgb <- confusionMatrix(data = pred_values_xgb, reference = test_Y) #obtain confusion matrix
draw_confusion_matrix(cm_xgb, auc_xgb@y.values, "#3DB7E4") #Draw confusion matrix plot
# draw SVM confusion matrix
auc_svm <- performance(ROCRpred_svm, measure = "auc")
perf_svm <- performance(ROCRpred_svm, 'tpr','fpr')
cut <- get_cutoff_point(perf_svm, 0.5)
pred_values_svm <- ifelse(svm_model.prob[,2] > cut,1,0)
cm_svm <- confusionMatrix(data = pred_values_svm, reference = test_Y)
draw_confusion_matrix(cm_svm, auc_svm@y.values, "#FF8849")
# draw Logistic regression confusion matrix
auc_lr <- performance(ROCRpred_lr, measure = "auc")
perf_lr <- performance(ROCRpred_lr, 'tpr','fpr')
cut <- get_cutoff_point(perf_lr, 0.5)
pred_values_lr <- ifelse(LR_model.predict > cut,1,0)
cm_lr <- confusionMatrix(data = pred_values_lr, reference = test_Y)
draw_confusion_matrix(cm_lr, auc_lr@y.values, "#69BE28")