diff --git a/ccn2019.rev3.Rmd b/ccn2019.rev3.Rmd index d286a1c..9f38d9b 100644 --- a/ccn2019.rev3.Rmd +++ b/ccn2019.rev3.Rmd @@ -1,11 +1,11 @@ ```{r setup, message=FALSE, include=FALSE, paged.print=FALSE} +#! =============================================== +#! load required packages + library(ggplot2) library(tidyverse) library(stringi) library(pls) -#library(plsRglm) -#library(plsdof) -library(pls) library(caret) library(here) library(tsibble) @@ -18,9 +18,13 @@ ```{r preprocessing} +#! =============================================== +#! load data set and set running window size load(here('notebooks/data/CL2015.RData')) window_size <- 8 +#! =============================================== +#! A function to mark lures in a sequence with_lures <- function(stimulus, stimulus_type, n) { sapply(1:length(stimulus), function(i) { lures <- c(as.character(stimulus[i-n-1]), as.character(stimulus[i-n+1])) @@ -31,6 +35,9 @@ }) } +#! =============================================== +#! Preprocess data set to add t,tl,l,ll,u,ul,s,sl,a,al +#! a and al are respectively accuracy and recent accuracy seqs <- NB %>% group_by(participant, block, condition) %>% mutate(n = ifelse(condition=='2-back',2,3)) %>% @@ -55,25 +62,30 @@ ungroup() %>% select(-participant,-block,-condition) +#! =============================================== +#! visualize correlations inspect_cor(seqs %>% unnest(local_stats), show_plot = T) -#inspect_cor(NB,show_plot = T) ``` ```{r models} +#! =============================================== +#! prepare data for modeling (remove na, etc) +#! it also restructures "correct" column to avoid caret errors. C stands for "CORRECT", and I is "INCORRECT" data <- seqs %>% unnest(local_stats) %>% - # restructure correct column to avoid caret errors - # C stands for "CORRECT", and I is "INCORRECT" mutate(correct=factor(as.numeric(correct),labels=c("C","I"))) %>% filter(!is.na(correct), !is.na(rt)) +#! =============================================== +#! Prepare train and test partials shuff <- sample(nrow(data)) split <- nrow(data) * 0.8 - train_data <- data[1:split,] test_data <- data[(split+1):nrow(data),] +#! =============================================== +#! training parameters for the PLS models plsTrControl <- trainControl( method = "cv", number = 5 @@ -93,7 +105,7 @@ plot(model_pls_accuracy) # PLS variable importance -varImp(model_pls_accuracy) +plot(varImp(model_pls_accuracy), main="Accuracy - Variable Importance") #==================================================# @@ -113,13 +125,12 @@ plot(model_pls_rt) # PLS variable importance -varImp(model_pls_rt) - +plot(varImp(model_pls_rt), main="RT - Variable Importance") predicted_rt_data <- predict(model_pls_rt, test_data) +#FIXME confusionMatrix(predicted_rt_data,test_data$rt) - colAUC(predicted_rt_data,test_data$rt, plotROC=T) #==================================================# @@ -140,11 +151,13 @@ ) model_glm_correct +varImp(model_glm_correct) + predicted_correct_data <- predict(model_glm_correct, test_data, type="prob") -confusionMatrix(test_data$correct, predicted_correct_data) - +#FIXME +confusionMatrix(predicted_correct_data, test_data$correct) colAUC(predicted_correct_data, test_data$correct, plotROC=T) #==================================================# @@ -161,7 +174,7 @@ predicted_old_correct_data <- predict(model_glm_correct_old, test_data, type="prob") +#FIXME confusionMatrix(test_data$correct, predicted_old_correct_data) - colAUC(predicted_old_correct_data,test_data$correct, plotROC=T) ```