#==================================================# # model the "correct" column library(here) library(tidyverse) library(caret) library(inspectdf) library(pls) #devtools::install_github("sachsmc/plotROC") library(plotROC) load(here("notebooks/data/nback_seqs.Rd")) set.seed(42) seqs.imputed <- seqs %>% filter(!is.na(correct), !is.na(rt)) %>% mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) inspect_cat(seqs.imputed) inspect_num(seqs.imputed) seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) train_indexes <- createDataPartition(seqs.imputed$correct, times = 1, p = 0.7, list = F) train_data <- seqs.imputed[train_indexes,] test_data <- seqs.imputed[-train_indexes,] control <- trainControl( method = "repeatedcv", number = 5, classProbs = T, verboseIter = T, summaryFunction = twoClassSummary, savePredictions = T, sampling = "down" ) pls.new_model <- train( correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, data = train_data, method = "pls", preProcess = c("center","scale"), trControl = control ) pls.old_model <- train( correct ~ t + n + v, data = train_data, method = "pls", preProcess = c("center","scale"), trControl = control ) pls.new_model pls.old_model varImp(pls.new_model) varImp(pls.old_model) trellis.par.set(caretTheme()) densityplot(pls.new_model, pch = "|") densityplot(pls.old_model, pch = "|") resamps <- resamples(list(new = pls.new_model, old = pls.old_model)) summary(resamps) dotplot(resamps) diffValues <- diff(resamps) bwplot(diffValues, layout=c(1,3)) pls.new_predicted <- predict(pls.new_model, test_data, type="raw") pls.new_predicted_prob <- predict(pls.new_model, test_data, type="prob") pls.old_predicted <- predict(pls.old_model, test_data, type="raw") pls.old_predicted_prob <- predict(pls.old_model, test_data, type="prob") confusionMatrix(pls.new_predicted, test_data$correct) confusionMatrix(pls.old_predicted, test_data$correct) library(pROC) par(pty="s") roc(test_data$correct, pls.new_predicted_prob$CORRECT, plot = T, legacy.axes=T, lwd=4, col="black", print.auc.y = 45, percent = T, print.auc=T) plot.roc(test_data$correct, pls.old_predicted_prob$CORRECT, legacy.axes=T, lwd=4, col="darkgray", print.auc=T, percent = T, print.auc.y = 40, lty = 3, add=T) legend(100,100, legend=c("New Model", "Old Model"), col=c("black", "darkgray"), lty=c(1,3),lwd=3, cex=0.8) # ggplot(pls.old_model, aes(d = pred$obs, m = pred$CORRECT)) + # geom_roc()