```{r setup, message=FALSE, include=FALSE, paged.print=FALSE} library(ggplot2) library(tidyverse) library(stringi) library(pls) #library(plsRglm) #library(plsdof) library(pls) library(caret) library(here) library(tsibble) library(broom) library(rsample) library(inspectdf) ``` ```{r preprocessing} load(here('notebooks/data/CL2015.RData')) window_size <- 8 with_lures <- function(stimulus, stimulus_type, n) { sapply(1:length(stimulus), function(i) { lures <- c(as.character(stimulus[i-n-1]), as.character(stimulus[i-n+1])) are_valid_trials <- i>n && all(!is.na(c(lures,stimulus[i]))) ifelse(are_valid_trials && stimulus[i] %in% lures, "lure", as.character(stimulus_type[i])) }) } invs <- function(s) { print(length(s)!=8) 1 } seqs <- NB %>% group_by(participant, block, condition) %>% mutate(n = ifelse(condition=='2-back',2,3)) %>% mutate(stimulus_type = with_lures(stimulus, stimulus_type, n)) %>% mutate(tl = slide2_dbl(stimulus_type, rt, ~length(which(.x=='target'))/length(which(!is.na(.y))), .partial=T,.size=window_size), ll = slide2_dbl(stimulus_type, rt, ~length(which(.x=='lure'))/length(which(!is.na(.y))), .partial=T, .size=window_size), sl = slide_dbl(stimulus_type, ~sum(sort(table(.x), decreasing = T)[1:2]) - 1, .partial=T, .size=window_size), sl = ifelse(is.na(sl), 0, sl), tl = ifelse(is.na(tl), NA, tl), ll = ifelse(is.na(ll), NA, ll), ul = slide_dbl(stimulus, ~max(table(.))-1, .partial=T, .size=window_size), vl = slide_dbl(stimulus, ~length(unique(.)), .partial=T, .size=window_size), al = slide2_dbl(correct, rt, ~length(which(.x))/length(which(!is.na(.y))), .partial=T, .size=window_size)) %>% nest(.key='local_stats') %>% #mutate(stimuli = map(local_stats, ~paste0(.x$stimulus,collapse = ''))) %>% mutate(a = map_dbl(local_stats, ~length(which(.x$correct)))) %>% mutate(t = map_dbl(local_stats, ~length(which(.x$stimulus_type=='target')))) %>% mutate(l = map_dbl(local_stats, ~length(which(.x$stimulus_type=='lure')))) %>% mutate(s = map_dbl(local_stats, ~sum(sort(table(.x$stimulus), decreasing = T)[1:2]) - 1)) %>% mutate(v = map_dbl(local_stats, ~length(unique(.x$stimulus)))) %>% mutate(local_stats = map(local_stats, ~.x %>% select(-trial,-stimulus,-stimulus_type,-choice))) %>% ungroup() %>% select(-participant,-block,-condition) inspect_cor(seqs %>% unnest(local_stats), show_plot = T) #inspect_cor(NB,show_plot = T) ``` ```{r} data <- seqs %>% unnest(local_stats) %>% mutate(correct=factor(as.numeric(correct),labels=c("C","I"))) %>% filter(!is.na(correct), !is.na(rt)) shuff <- sample(nrow(data)) split <- nrow(data) * 0.8 train <- data[1:split,] test <- data[(split+1):nrow(data),] model <- train( correct ~ ., data = train, method = "glm", family = "binomial", trControl = trainControl( method = "cv", number = 5, classProbs = T, summaryFunction = twoClassSummary ) ) model p <- predict(model, test, type="prob") confusionMatrix(ds$correct, prd) library(caTools) colAUC(p,test$correct, plotROC=T) ```