library(tidyverse) library(caret) library(here) library(inspectdf) library(glmnet) library(ROSE) library(pROC) library(ppls) rm(seqs) load(here("notebooks/data/nback_seqs.Rd")) seqs <- seqs %>% drop_na(rt, correct, tl,sl) f <- correct ~ n + t + stimulus_type f <- correct ~ n + tl + vl + sl + s + stimulus_type # predictors selected with stepAIC f <- correct ~ n + tl + t + l + stimulus_type set.seed(654321) train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE) seqs.train.balanced <- seqs[train.indices,] #seqs.train <- ROSE(f, data = seqs.train.balanced, N=100)$data seqs.train.x <- model.matrix(f, seqs.train)[,-1] seqs.train.y <- seqs.train[[toString(f[[2]])]] seqs.test <- seqs[-train.indices,] seqs.test.x <- model.matrix(f, seqs.test)[,-1] seqs.test.observed_y <- seqs.test[[toString(f[[2]])]] set.seed(10) ctrl <- trainControl( method = "cv", number = 10, classProbs = T, summaryFunction = twoClassSummary, sampling = "down" ) grid <- expand.grid( alpha = seq(0,1,length=10), lambda = seq(0.0001, 0.1, length=10) ) model <- train(seqs.train.x, seqs.train.y, method = "glmnet", preProcess = c("nzv","center","scale"), # or c("knnImpute"), tuneGrid = grid, metric = "ROC", trControl = ctrl) seqs.test.y_prob <- predict(model, seqs.test.x, type="prob") model plot(varImp(model, useModel = F)) plot(model) max(model$results$ROC) roc(seqs.test.observed_y, seqs.test.y_prob$YES, legacy.axes=T, plot = T, lwd=2, col="black", print.auc=T, percent = T, print.auc.y = 40, print.auc.x = 55, lty = 1, of = "se", boot.n = 2000, ci = T) # PPLS #penalized.pls.cv(seqs.train.y, seqs.train.x, kernel = T, scale=T)