library(tidyverse) library(caret) library(here) library(inspectdf) library(glmnet) library(ROSE) library(pROC) rm(seqs) load(here("notebooks/data/nback_seqs.Rd")) seqs <- seqs %>% drop_na(rt, correct, tl,sl) f <- rt ~ n + t + v f <- rt ~ n + tl + v + s + l set.seed(654321) train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE) seqs.train.balanced <- seqs[train.indices,] #seqs.train <- ROSE(correct~., data = seqs.train.balanced, N=100)$data seqs.train.x <- model.matrix(f, seqs.train)[,-1] seqs.train.y <- seqs.train[[toString(f[[2]])]] seqs.test <- seqs[-train.indices,] seqs.test.x <- model.matrix(f, seqs.test)[,-1] seqs.test.observed_y <- seqs.test[[toString(f[[2]])]] ctrl <- trainControl( method = "cv", number = 10 ) grid <- expand.grid( ncomp = 1:3 ) model <- train(seqs.train.x, seqs.train.y, method = "pls", preProcess = c("nzv","center","scale"), # or c("knnImpute"), #tuneGrid = grid, tuneLength = 10, metric = "RMSE", trControl = ctrl) seqs.test.predicted_y <- predict(model, seqs.test.x) model plot(varImp(model, useModel = F, scale=F)) plot(model) max(model$results$RMSE) # RT data.frame( RMSE = RMSE(seqs.test.predicted_y, seqs.test.observed_y), Rsquare = R2(seqs.test.predicted_y, seqs.test.observed_y) )