Newer
Older
notebooks / cnn2019-correct.R
#==================================================#
# model the "correct" column

library(here)
library(tidyverse)
library(caret)
library(inspectdf)


load(here("data/nback_seqs.Rd"))

set.seed(42)

seqs.imputed <- seqs %>% 
  filter(!is.na(correct), !is.na(rt)) %>%
  mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT")))

inspect_cat(seqs.imputed)
inspect_num(seqs.imputed)

seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed)


train_indexes <- createDataPartition(seqs.imputed$correct,
                                     times = 1,
                                     p = 0.7,
                                     list = F)

train_data <- seqs.imputed[train_indexes,]
test_data <- seqs.imputed[-train_indexes,]

control <- trainControl(
  method = "cv",
  number = 5,
  classProbs = T,
  verboseIter = T,
  summaryFunction = twoClassSummary
)

pls.model1 <- train(
  correct ~ t + l + s + v + n + tl + ll + sl + ul + vl,
  data = train_data,
  method = "pls",
  preProcess = c("center","scale"),
  trControl = control
)

pls.model2 <- train(
  correct ~ t + n + v,
  data = train_data,
  method = "pls",
  preProcess = c("center","scale"),
  trControl = control
)

pls.model1
pls.model2
varImp(pls.model1)
varImp(pls.model2)

trellis.par.set(caretTheme())
densityplot(pls.model1, pch = "|")
densityplot(pls.model2, pch = "|")

resamps <- resamples(list(model1 = pls.model1, model2 = pls.model2))
summary(resamps)
dotplot(resamps, metric = "ROC")
difValues <- diff(resamps)
bwplot(difValues, layout=c(1,3))


pls.train_predicted1 <- predict(pls.model1, train_data, type="raw")
pls.train_predicted2 <- predict(pls.model2, train_data, type="raw")
pls.predicted1 <- predict(pls.model1, test_data, type="raw")
pls.predicted2 <- predict(pls.model2, test_data, type="raw")

#FIXME
confusionMatrix(pls.train_predicted1, train_data$correct)
confusionMatrix(pls.train_predicted2, train_data$correct)
confusionMatrix(pls.predicted1, test_data$correct)
confusionMatrix(pls.predicted2, test_data$correct)

colAUC(pls.predicted1, test_data$correct, plotROC=T)
colAUC(pls.predicted2, test_data$correct, plotROC=T)