Newer
Older
notebooks / dummy-vars-playground.R
library(tidyverse)
library(caret)
library(here)
library(inspectdf)
library(glmnet)
library(ROSE)

rm(seqs)
load(here("notebooks/data/nback_seqs.Rd"))

set.seed(42)

# 1. dummy vars
# INPUTS : seqs
# OUTPUTS: seqs.dmy

seqs <- seqs %>%
  filter(!is.na(correct) & !is.na(rt)) %>%
  mutate(correct = factor(as.numeric(correct), labels=c("NO","YES"))) %>%
  mutate(stimulus = factor(stimulus)) %>%
  mutate(stimulus_type = factor(stimulus_type))


table(seqs$stimulus)

train.indices <- createDataPartition(seqs$correct, p = .8, list =FALSE)


seqs.train.balanced <- seqs[train.indices,]
seqs.train <- ROSE(correct ~ ., data = seqs.train.balanced)$data

seqs.train.x <- model.matrix(correct ~ stimulus + stimulus_type + n, seqs.train)[,-1]
seqs.train.y <- seqs.train$correct

seqs.test  <- seqs[-train.indices,]
seqs.test.x <-  model.matrix(correct ~ stimulus + stimulus_type + n, seqs.test)[,-1]
seqs.test.observed_y <- seqs.test$correct
  
# model <- cv.glmnet(seqs.train.x,
#                    seqs.train.y,
#                    alpha = 1,
#                    nfolds = 5,
#                    family = "binomial",
#                    type.measure = "auc")
# 
# model$lambda.min

ctrl <- trainControl(method="cv",
                     number=5, 
                     classProbs=T,
                     summaryFunction=twoClassSummary)

# glmnet tune
tune <- expand.grid(alpha = 0:1, lambda = seq(0, 0.01, length = 100),ncomp=1:10)

# pls tune
tune <- expand.grid(ncomp=1:10)

model <- train(seqs.train.x,
               seqs.train.y, 
               method = "pls",
               family = "binomial",
               metric = "ROC",
               preProc = c("center", "scale"),
               tuneGrid = tune,
               trControl = ctrl)

model$bestTune
plot(model)

seqs.test.y <- model %>% predict(seqs.test.x)

confusionMatrix(seqs.test.y, seqs.test.observed_y)

# RT
# data.frame(
#   RMSE = RMSE(y.test, seqs.test$correct),
#   Rsquare = R2(y.test, seqs.test$correct)
# )


#dmy <- dummyVars(~.-stimulus-stimulus_type,seqs,fullRank = T)
#dmy.rt <- dummyVars(~correct+stimulus_type,seqs)