#==================================================#
# model the "correct" column
library(here)
library(tidyverse)
library(caret)
library(inspectdf)
library(pls)
#devtools::install_github("sachsmc/plotROC")
library(plotROC)
load(here("notebooks/data/nback_seqs.Rd"))
set.seed(42)
seqs.imputed <- seqs %>%
filter(!is.na(correct), !is.na(rt)) %>%
mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT")))
inspect_cat(seqs.imputed)
inspect_num(seqs.imputed)
seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed)
train_indexes <- createDataPartition(seqs.imputed$correct,
times = 1,
p = 0.7,
list = F)
train_data <- seqs.imputed[train_indexes,]
test_data <- seqs.imputed[-train_indexes,]
control <- trainControl(
method = "repeatedcv",
number = 5,
repeats = 2,
classProbs = T,
verboseIter = T,
savePredictions = T,
sampling = "down",
selectionFunction = "oneSE"
)
pls.new_model <- train(
correct ~ .-a-al-dp-cr-rt,
data = train_data,
method = "pls",
metric = "Accuracy",
tuneLength = 20,
preProcess = c("zv","center","scale"),
trControl = control
)
plot(pls.new_model)
plot(varImp(pls.new_model), main="Variables Importance for Correctness (New Model)")
pls.common_model <- train(
correct ~ .-a-al-dp-cr-rt-tl-ul-sl-s-ll-vl-l,
data = train_data,
method = "pls",
metric = "Accuracy",
tuneLength = 20,
preProcess = c("zv","center","scale"),
trControl = control
)
plot(pls.common_model)
plot(varImp(pls.common_model), main="Variable Importance for Correctness (Common Model)")
#trellis.par.set(caretTheme())
#densityplot(pls.new_model, pch = "|")
#densityplot(pls.common_model, pch = "|")
# Compile models and compare performance
pls.models <- resamples(list(new = pls.new_model, common = pls.common_model))
#DEBUG summary(pls.models)
#DEBUG dotplot(pls.models)
#DEBUG diffValues <- diff(resamps)
bwplot(pls.models, metric = "Accuracy", layout=c(1,1), main="Correctness Model Performance")
pls.new_predicted <- predict(pls.new_model, test_data, type="raw")
pls.new_predicted_prob <- predict(pls.new_model, test_data, type="prob")
pls.common_predicted <- predict(pls.common_model, test_data, type="raw")
pls.common_predicted_prob <- predict(pls.common_model, test_data, type="prob")
confusionMatrix(pls.new_predicted, test_data$correct)
confusionMatrix(pls.common_predicted, test_data$correct)
library(pROC)
par(pty="s")
roc(test_data$correct,
pls.common_predicted_prob$CORRECT,
plot = T,
legacy.axes=T,
lwd=2,
col="darkgrey",
lty = 3,
print.auc = T,
print.auc.y = 45,
print.auc.x = 55,
percent = T,
ci = T,
boot.n = 100
)
# roc_test_indices <- createDataPartition(test_data$correct,
# times = 10,
# p = 0.9,
# list = F)
#for (i in 1:ncol(roc_test_indices)) {
# test_sample_correct <- test_data[roc_test_indices[,i],]$correct
# predprob_sample_correct <- pls.new_predicted_prob[roc_test_indices[,i],]$CORRECT
# plot.roc(test_sample_correct,
# predprob_sample_correct,
roc(test_data$correct,
pls.new_predicted_prob$CORRECT,
legacy.axes=T,
plot = T,
lwd=2,
col="black",
print.auc=T,
percent = T,
print.auc.y = 40,
print.auc.x = 55,
lty = 1,
add=T,
of = "se",
boot.n = 100,
ci = T)
#}
legend(100,100, legend=c("New Model", "Common Model"),
col=c("black", "darkgray"), lty=c(1,1),lwd=2, cex=0.9)
# requires plotROC package
#DEBUG ggplot(pls.common_model, aes(d = pred$obs, m = pred$CORRECT)) +
#DEBUG geom_roc()