library(tidyverse)
library(caret)
library(here)
library(inspectdf)
library(glmnet)
library(ROSE)
library(pROC)
library(ppls)
rm(seqs)
load(here("notebooks/data/nback_seqs.Rd"))
seqs <- seqs %>% drop_na(rt, correct, tl,sl)
f <- correct ~ n + t + stimulus_type
f <- correct ~ n + tl + vl + sl + s + stimulus_type
# predictors selected with stepAIC
f <- correct ~ n + tl + t + l + stimulus_type
set.seed(654321)
train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE)
seqs.train.balanced <- seqs[train.indices,]
#seqs.train <- ROSE(f, data = seqs.train.balanced, N=100)$data
seqs.train.x <- model.matrix(f, seqs.train)[,-1]
seqs.train.y <- seqs.train[[toString(f[[2]])]]
seqs.test <- seqs[-train.indices,]
seqs.test.x <- model.matrix(f, seqs.test)[,-1]
seqs.test.observed_y <- seqs.test[[toString(f[[2]])]]
set.seed(10)
ctrl <- trainControl(
method = "cv",
number = 10,
classProbs = T,
summaryFunction = twoClassSummary,
sampling = "down"
)
grid <- expand.grid(
alpha = seq(0,1,length=10),
lambda = seq(0.0001, 0.1, length=10)
)
model <- train(seqs.train.x, seqs.train.y,
method = "glmnet",
preProcess = c("nzv","center","scale"), # or c("knnImpute"),
tuneGrid = grid,
metric = "ROC",
trControl = ctrl)
seqs.test.y_prob <- predict(model, seqs.test.x, type="prob")
model
plot(varImp(model, useModel = F))
plot(model)
max(model$results$ROC)
roc(seqs.test.observed_y,
seqs.test.y_prob$YES,
legacy.axes=T,
plot = T,
lwd=2,
col="black",
print.auc=T,
percent = T,
print.auc.y = 40,
print.auc.x = 55,
lty = 1,
of = "se",
boot.n = 2000,
ci = T)
# PPLS
#penalized.pls.cv(seqs.train.y, seqs.train.x, kernel = T, scale=T)