Newer
Older
notebooks / ccn2019 / dummy-vars-playground.R
library(tidyverse)
library(caret)
library(here)
library(inspectdf)
library(glmnet)
library(ROSE)

rm(seqs)
load(here("notebooks/data/nback_seqs.Rd"))

# seqs %>%
#   ggplot(aes(x=v,y=a,col=correct)) +
#   geom_jitter() +
#   geom_point(alpha=0.1) +
#   geom_smooth()

f <- correct ~ n + t + v + s + l + vl + sl + tl + ul + ll + stimulus
f <- correct ~ n + t + v + stimulus

set.seed(654321)

# 1. dummy vars
# INPUTS : seqs
# OUTPUTS: seqs.dmy

seqs <- seqs %>% 
  drop_na(rt, correct, tl,sl)


train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE)

seqs.train.balanced <- seqs[train.indices,]
seqs.train <- seqs.train.balanced
# seqs.train <- ROSE(f, data = seqs.train.balanced)$data

seqs.train.x <- model.matrix(f, seqs.train)[,-1]
seqs.train.y <- seqs.train[[toString(f[[2]])]]

seqs.test  <- seqs[-train.indices,]
seqs.test.x <-  model.matrix(f, seqs.test)[,-1]
seqs.test.observed_y <- seqs.test[[toString(f[[2]])]]
  

# ROC for each var
filterVarImp(as.data.frame(seqs.train.x), seqs.train.y)

# model <- cv.glmnet(seqs.train.x,
#                    seqs.train.y,
#                    alpha = 1,
#                    nfolds = 5,
#                    family = "binomial",
#                    type.measure = "auc")
# 
# model$lambda.min

ctrl <- trainControl(method="cv",
                     number=1, 
                     classProbs=T,
                     verbose = T,
#                     sampling = "up",
                     savePredictions = T,
                     summaryFunction=twoClassSummary)

# glmnet tune
tune <- expand.grid(alpha = 0:1, lambda = seq(0, 0.01, length = 100))

max_components <- n_distinct(attr(terms(f),"term.labels"))
# pls tune
tune <- expand.grid(ncomp=1:max_components)

model <- train(seqs.train.x,
               seqs.train.y, 
               method = "glmnet",
               #family = "binomial",
               #metric = "ROC",
               preProc = c("nzv","center", "scale"),
               #verboseIter = TRUE,
               tuneLength = 2,
               #tuneGrid = tune,
               trControl = ctrl)

model$bestTune
plot(model)

seqs.test.y <- model %>% predict(seqs.test.x)
seqs.test.y_prob <- model %>% predict(seqs.test.x, type="prob")

confusionMatrix(seqs.test.y, seqs.test.observed_y)

plot(varImp(model, scale = F, useModel = F))

library(pROC)


roc(seqs.test.observed_y,
    seqs.test.y_prob$YES,
    legacy.axes=T,
    plot = T,
    lwd=2,
    col="black",
    print.auc=T,
    percent = T,
    print.auc.y = 40,
    print.auc.x = 55,
    lty = 1,
    of = "auc",
    boot.n = 100,
    ci = T)