library(tidyverse) library(caret) library(here) library(inspectdf) library(glmnet) library(ROSE) rm(seqs) load(here("notebooks/data/nback_seqs.Rd")) set.seed(42) # 1. dummy vars # INPUTS : seqs # OUTPUTS: seqs.dmy seqs <- seqs %>% filter(!is.na(correct) & !is.na(rt)) %>% mutate(correct = factor(as.numeric(correct), labels=c("NO","YES"))) %>% mutate(stimulus = factor(stimulus)) %>% mutate(stimulus_type = factor(stimulus_type)) table(seqs$stimulus) train.indices <- createDataPartition(seqs$correct, p = .8, list =FALSE) seqs.train.balanced <- seqs[train.indices,] seqs.train <- ROSE(correct ~ ., data = seqs.train.balanced)$data seqs.train.x <- model.matrix(correct ~ stimulus + stimulus_type + n, seqs.train)[,-1] seqs.train.y <- seqs.train$correct seqs.test <- seqs[-train.indices,] seqs.test.x <- model.matrix(correct ~ stimulus + stimulus_type + n, seqs.test)[,-1] seqs.test.observed_y <- seqs.test$correct # model <- cv.glmnet(seqs.train.x, # seqs.train.y, # alpha = 1, # nfolds = 5, # family = "binomial", # type.measure = "auc") # # model$lambda.min ctrl <- trainControl(method="cv", number=5, classProbs=T, summaryFunction=twoClassSummary) # glmnet tune tune <- expand.grid(alpha = 0:1, lambda = seq(0, 0.01, length = 100),ncomp=1:10) # pls tune tune <- expand.grid(ncomp=1:10) model <- train(seqs.train.x, seqs.train.y, method = "pls", family = "binomial", metric = "ROC", preProc = c("center", "scale"), tuneGrid = tune, trControl = ctrl) model$bestTune plot(model) seqs.test.y <- model %>% predict(seqs.test.x) confusionMatrix(seqs.test.y, seqs.test.observed_y) # RT # data.frame( # RMSE = RMSE(y.test, seqs.test$correct), # Rsquare = R2(y.test, seqs.test$correct) # ) #dmy <- dummyVars(~.-stimulus-stimulus_type,seqs,fullRank = T) #dmy.rt <- dummyVars(~correct+stimulus_type,seqs)