Newer
Older
notebooks / ccn2019 / ccn2019-feature-selection.R
library(tidyverse)
library(caret)
library(here)
library(inspectdf)
library(glmnet)
library(ROSE)

rm(seqs)
load(here("notebooks/data/nback_seqs.Rd"))
seqs <- seqs %>% drop_na(rt, correct, tl,sl)

f <- correct ~ n + t + s + v + l + vl + sl + tl + ul + ll + stimulus_type + participant
#f <- rt ~ n + t + s + v + l + vl + sl + tl + ul + ll + stimulus_type

set.seed(654321)

train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE)

seqs.train.imbalanced <- seqs[train.indices,]
seqs.train <- seqs.train.imbalanced

#if (toString(f[[2]]) == "correct")
#  seqs.train <- ROSE(f, data = seqs.train.balanced)$data

seqs.train.x <- model.matrix(f, seqs.train)[,-1]
seqs.train.y <- seqs.train[[toString(f[[2]])]]

#upSamples <- upSample(seqs.train.x, seqs.train[["stimulus_type"]])


# upSamples <- upSample(seqs.train.x, seqs.train[,"stimulus_type"])
# seqs.train.x <- upSamples %>% dplyr::select(-correct)

seqs.test  <- seqs[-train.indices,]
seqs.test.x <-  model.matrix(f, seqs.test)[,-1]
seqs.test.observed_y <- seqs.test[[toString(f[[2]])]]


ctrl <- trainControl(method="cv",
                     number = 3,
                     verbose = T)

#model <- train(seqs.train.x, seqs.train.y, method = "glmStepAIC", trControl = ctrl)
#model <- train(seqs.train.x, seqs.train.y, method = "ORFpls", trControl = ctrl)

ctrl <- rfeControl(functions = rfFuncs,
                   method = "cv",
                   number = 3,
                   verbose = T)

rmProfile <- rfe(seqs.train.x, seqs.train.y,
                 rfeControl = ctrl)

summary(model)
rmProfile