diff --git a/ccn2019-featsel.R b/ccn2019-featsel.R new file mode 100644 index 0000000..d59cd52 --- /dev/null +++ b/ccn2019-featsel.R @@ -0,0 +1,39 @@ +library(tidyverse) +library(caret) +library(here) +library(inspectdf) +library(glmnet) +library(ROSE) + +rm(seqs) +load(here("notebooks/data/nback_seqs.Rd")) +seqs <- seqs %>% drop_na(rt, correct, tl,sl) + +f <- correct ~ n + t + s + v + l + vl + sl + tl + ul + ll + +set.seed(654321) + +train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE) + +seqs.train.balanced <- seqs[train.indices,] +seqs.train <- ROSE(f, data = seqs.train.balanced)$data + +seqs.train.x <- model.matrix(f, seqs.train)[,-1] +seqs.train.y <- seqs.train[[toString(f[[2]])]] + +seqs.test <- seqs[-train.indices,] +seqs.test.x <- model.matrix(f, seqs.test)[,-1] +seqs.test.observed_y <- seqs.test[[toString(f[[2]])]] + + +set.seed(10) + +ctrl <- rfeControl(functions = lrFuncs, + method = "cv", + number = 5, + verbose = T) + +rmProfile <- rfe(seqs.train.x, seqs.train.y, + rfeControl = ctrl) + +rmProfile diff --git a/ccn2019-featsel.R b/ccn2019-featsel.R new file mode 100644 index 0000000..d59cd52 --- /dev/null +++ b/ccn2019-featsel.R @@ -0,0 +1,39 @@ +library(tidyverse) +library(caret) +library(here) +library(inspectdf) +library(glmnet) +library(ROSE) + +rm(seqs) +load(here("notebooks/data/nback_seqs.Rd")) +seqs <- seqs %>% drop_na(rt, correct, tl,sl) + +f <- correct ~ n + t + s + v + l + vl + sl + tl + ul + ll + +set.seed(654321) + +train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE) + +seqs.train.balanced <- seqs[train.indices,] +seqs.train <- ROSE(f, data = seqs.train.balanced)$data + +seqs.train.x <- model.matrix(f, seqs.train)[,-1] +seqs.train.y <- seqs.train[[toString(f[[2]])]] + +seqs.test <- seqs[-train.indices,] +seqs.test.x <- model.matrix(f, seqs.test)[,-1] +seqs.test.observed_y <- seqs.test[[toString(f[[2]])]] + + +set.seed(10) + +ctrl <- rfeControl(functions = lrFuncs, + method = "cv", + number = 5, + verbose = T) + +rmProfile <- rfe(seqs.train.x, seqs.train.y, + rfeControl = ctrl) + +rmProfile diff --git a/ccn2019-svm.R b/ccn2019-svm.R new file mode 100644 index 0000000..a1590a5 --- /dev/null +++ b/ccn2019-svm.R @@ -0,0 +1,79 @@ +library(tidyverse) +library(caret) +library(here) +library(inspectdf) +library(glmnet) +library(ROSE) + +rm(seqs) +load(here("notebooks/data/nback_seqs.Rd")) + +f <- correct ~ n + stimulus_type + stimulus + t + s + v + l + vl + sl + tl + ul + ll + +set.seed(654321) + +seqs <- seqs %>%drop_na(rt, correct, tl,sl) + +train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE) + +seqs.train.balanced <- seqs[train.indices,] +seqs.train <- seqs.train.balanced + +seqs.train.x <- model.matrix(f, seqs.train)[,-1] +seqs.train.y <- seqs.train[[toString(f[[2]])]] + +seqs.test <- seqs[-train.indices,] +seqs.test.x <- model.matrix(f, seqs.test)[,-1] +seqs.test.observed_y <- seqs.test[[toString(f[[2]])]] + +ctrl <- trainControl(method="cv", + number=10, +# repeats = 1, #repeatedcv + sampling = "up", + savePredictions = T, + verbose = T) + +tune <- expand.grid(C = seq(0,5,by=0.25)) + +model <- train(seqs.train.x, + seqs.train.y, + method = "svmLinear", + preProc = c("center", "scale"), + tuneLength = 10, + tuneGrid = tune, + trControl = ctrl) + +model$bestTune +plot(model) + +seqs.test.y <- model %>% predict(seqs.test.x) +seqs.test.y_prob <- model %>% predict(seqs.test.x, type="prob") + +confusionMatrix(seqs.test.y, seqs.test.observed_y) + +plot(varImp(model, scale = F, useModel = F)) + +library(pROC) + +roc(seqs.test.observed_y, + seqs.test.y_prob$YES, + legacy.axes=T, + plot = T, + lwd=2, + col="black", + print.auc=T, + percent = T, + print.auc.y = 40, + print.auc.x = 55, + lty = 1, + of = "se", + boot.n = 200, + ci = T) + + +# RT +# data.frame( +# RMSE = RMSE(y.test, seqs.test$correct), +# Rsquare = R2(y.test, seqs.test$correct) +# ) + diff --git a/ccn2019-featsel.R b/ccn2019-featsel.R new file mode 100644 index 0000000..d59cd52 --- /dev/null +++ b/ccn2019-featsel.R @@ -0,0 +1,39 @@ +library(tidyverse) +library(caret) +library(here) +library(inspectdf) +library(glmnet) +library(ROSE) + +rm(seqs) +load(here("notebooks/data/nback_seqs.Rd")) +seqs <- seqs %>% drop_na(rt, correct, tl,sl) + +f <- correct ~ n + t + s + v + l + vl + sl + tl + ul + ll + +set.seed(654321) + +train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE) + +seqs.train.balanced <- seqs[train.indices,] +seqs.train <- ROSE(f, data = seqs.train.balanced)$data + +seqs.train.x <- model.matrix(f, seqs.train)[,-1] +seqs.train.y <- seqs.train[[toString(f[[2]])]] + +seqs.test <- seqs[-train.indices,] +seqs.test.x <- model.matrix(f, seqs.test)[,-1] +seqs.test.observed_y <- seqs.test[[toString(f[[2]])]] + + +set.seed(10) + +ctrl <- rfeControl(functions = lrFuncs, + method = "cv", + number = 5, + verbose = T) + +rmProfile <- rfe(seqs.train.x, seqs.train.y, + rfeControl = ctrl) + +rmProfile diff --git a/ccn2019-svm.R b/ccn2019-svm.R new file mode 100644 index 0000000..a1590a5 --- /dev/null +++ b/ccn2019-svm.R @@ -0,0 +1,79 @@ +library(tidyverse) +library(caret) +library(here) +library(inspectdf) +library(glmnet) +library(ROSE) + +rm(seqs) +load(here("notebooks/data/nback_seqs.Rd")) + +f <- correct ~ n + stimulus_type + stimulus + t + s + v + l + vl + sl + tl + ul + ll + +set.seed(654321) + +seqs <- seqs %>%drop_na(rt, correct, tl,sl) + +train.indices <- createDataPartition(seqs[[toString(f[[2]])]], p = .8, list =FALSE) + +seqs.train.balanced <- seqs[train.indices,] +seqs.train <- seqs.train.balanced + +seqs.train.x <- model.matrix(f, seqs.train)[,-1] +seqs.train.y <- seqs.train[[toString(f[[2]])]] + +seqs.test <- seqs[-train.indices,] +seqs.test.x <- model.matrix(f, seqs.test)[,-1] +seqs.test.observed_y <- seqs.test[[toString(f[[2]])]] + +ctrl <- trainControl(method="cv", + number=10, +# repeats = 1, #repeatedcv + sampling = "up", + savePredictions = T, + verbose = T) + +tune <- expand.grid(C = seq(0,5,by=0.25)) + +model <- train(seqs.train.x, + seqs.train.y, + method = "svmLinear", + preProc = c("center", "scale"), + tuneLength = 10, + tuneGrid = tune, + trControl = ctrl) + +model$bestTune +plot(model) + +seqs.test.y <- model %>% predict(seqs.test.x) +seqs.test.y_prob <- model %>% predict(seqs.test.x, type="prob") + +confusionMatrix(seqs.test.y, seqs.test.observed_y) + +plot(varImp(model, scale = F, useModel = F)) + +library(pROC) + +roc(seqs.test.observed_y, + seqs.test.y_prob$YES, + legacy.axes=T, + plot = T, + lwd=2, + col="black", + print.auc=T, + percent = T, + print.auc.y = 40, + print.auc.x = 55, + lty = 1, + of = "se", + boot.n = 200, + ci = T) + + +# RT +# data.frame( +# RMSE = RMSE(y.test, seqs.test$correct), +# Rsquare = R2(y.test, seqs.test$correct) +# ) + diff --git a/dummy-vars-playground.R b/dummy-vars-playground.R index 1097da7..d79b629 100644 --- a/dummy-vars-playground.R +++ b/dummy-vars-playground.R @@ -8,7 +8,13 @@ rm(seqs) load(here("notebooks/data/nback_seqs.Rd")) -f <- as.formula("correct ~ n + vl + sl") +# seqs %>% +# ggplot(aes(x=v,y=a,col=correct)) + +# geom_jitter() + +# geom_point(alpha=0.1) + +# geom_smooth() + +f <- correct ~ n + stimulus + stimulus_type + t + s + v + l + vl + sl + tl + ul + ll set.seed(654321) @@ -33,6 +39,10 @@ seqs.test.x <- model.matrix(f, seqs.test)[,-1] seqs.test.observed_y <- seqs.test[[toString(f[[2]])]] + +# ROC for each var +filterVarImp(as.data.frame(seqs.train.x), seqs.train.y) + # model <- cv.glmnet(seqs.train.x, # seqs.train.y, # alpha = 1, @@ -62,7 +72,9 @@ method = "pls", family = "binomial", metric = "ROC", - preProc = c("zv","center", "scale"), + preProc = c("center", "scale"), + verboseIter = TRUE, + tuneLength = 10, tuneGrid = tune, trControl = ctrl)