diff --git a/dummy-vars-playground.R b/dummy-vars-playground.R index 3db3bc1..d956522 100644 --- a/dummy-vars-playground.R +++ b/dummy-vars-playground.R @@ -1,24 +1,83 @@ library(tidyverse) library(caret) library(here) - +library(inspectdf) +library(glmnet) +library(ROSE) rm(seqs) load(here("notebooks/data/nback_seqs.Rd")) set.seed(42) -seqs.dummy <- data.frame(predict(dummyVars(~.,seqs,fullRank = T),seqs)) %>% - filter(!is.na(correctTRUE), !is.na(rt)) +# 1. dummy vars +# INPUTS : seqs +# OUTPUTS: seqs.dmy -inspect_num(seqs.dummy) -inspect_na(seqs.dummy,show_plot = T) +seqs <- seqs %>% + filter(!is.na(correct) & !is.na(rt)) %>% + mutate(correct = factor(as.numeric(correct), labels=c("NO","YES"))) %>% + mutate(stimulus = factor(stimulus)) %>% + mutate(stimulus_type = factor(stimulus_type)) -#inspect_cor(seqs.dummy,show_plot = T) -cor_matrix <- cor(seqs.dummy) -cor_high <- findCorrelation(cor_matrix, 0.5) -high_cor_remove <- row.names(cor_matrix)[cor_high] -#FIXME remove by column name -seqs.uncorr <- seqs.dummy %>% select(-high_cor_remove) -``` \ No newline at end of file +table(seqs$stimulus) + +train.indices <- createDataPartition(seqs$correct, p = .8, list =FALSE) + + +seqs.train.balanced <- seqs[train.indices,] +seqs.train <- ROSE(correct ~ ., data = seqs.train.balanced)$data + +seqs.train.x <- model.matrix(correct ~ stimulus + stimulus_type + n, seqs.train)[,-1] +seqs.train.y <- seqs.train$correct + +seqs.test <- seqs[-train.indices,] +seqs.test.x <- model.matrix(correct ~ stimulus + stimulus_type + n, seqs.test)[,-1] +seqs.test.observed_y <- seqs.test$correct + +# model <- cv.glmnet(seqs.train.x, +# seqs.train.y, +# alpha = 1, +# nfolds = 5, +# family = "binomial", +# type.measure = "auc") +# +# model$lambda.min + +ctrl <- trainControl(method="cv", + number=5, + classProbs=T, + summaryFunction=twoClassSummary) + +# glmnet tune +tune <- expand.grid(alpha = 0:1, lambda = seq(0, 0.01, length = 100),ncomp=1:10) + +# pls tune +tune <- expand.grid(ncomp=1:10) + +model <- train(seqs.train.x, + seqs.train.y, + method = "pls", + family = "binomial", + metric = "ROC", + preProc = c("center", "scale"), + tuneGrid = tune, + trControl = ctrl) + +model$bestTune +plot(model) + +seqs.test.y <- model %>% predict(seqs.test.x) + +confusionMatrix(seqs.test.y, seqs.test.observed_y) + +# RT +# data.frame( +# RMSE = RMSE(y.test, seqs.test$correct), +# Rsquare = R2(y.test, seqs.test$correct) +# ) + + +#dmy <- dummyVars(~.-stimulus-stimulus_type,seqs,fullRank = T) +#dmy.rt <- dummyVars(~correct+stimulus_type,seqs)