--- title: "Statistical Properties of the N-Back Sequences" output: html_notebook: default pdf_document: default editor_options: chunk_output_type: console --- # Problems Statistical properties of n-back sequences bias behaviors. These bias, under specified structure, allows multiple cognitive strategies, producing heterogeneous behavior in a "gold standard" cognitive task. # Gaps - Unclear how to parameterize interesting variations for sequence generation - How do we model these multiple strategies (which requires identifying which sequence variations matter) - local vs. global properties, which one matters the most? - Local: lumpiness, short sequence patterns -> could be exploited by “reactive”/automaticity - Global: No lures, large vocabulary -> pattern repeats implies a target ## Formulating Generating the N-Back Sequences as a CSP instance $P=\langle V,D,C,W\rangle$ $V=\{x_N,x_{T},x_{T,local},x_L,x_{L,local},x_V,x_U,x_S,x_{S,local},x_G\}$ $D=\{\}$ Constraints: $$ \\ x_n = N, W_n = 1 - |10 \times dnorm(x_n-N,sd=4)| \\\\ x_t = T \times trials, W_t = 1 - |10\times dnorm(T\times trials-x_t,sd=4)| \\\\ x_{tl} = {T \times w \over trials}, W_{tl} = 1 - |10\times dnorm(x_{tl} - {T \over trials} \times w,sd=4)| \\\\ x_{l} = L \times trials \\\\ x_{ll} = L \times w \\\\ x_{v} = |V| \\ x_{ul} = w \\\\ x_{s} = {trials \over |V|} \\\\ x_{sl} = max(1, {w \over |V|}) \\\\ x_{g} = {trials \over w} \\\\ x_{vl} = min(|V|, w) $$ ```{r setup, message=FALSE, include=FALSE, paged.print=FALSE} library(ggplot2) library(tidyverse) library(stringi) library(pls) #library(plsRglm) #library(plsdof) library(pls) library(caret) library(here) library(tsibble) library(broom) library(rsample) ``` ```{r preprocessing} load(here('notebooks/data/CL2015.RData')) window_size <- 8 with_lures <- function(stimulus, stimulus_type, n) { sapply(1:length(stimulus), function(i) { lures <- c(as.character(stimulus[i-n-1]), as.character(stimulus[i-n+1])) are_valid_trials <- i>n && all(!is.na(c(lures,stimulus[i]))) ifelse(are_valid_trials && stimulus[i] %in% lures, "lure", as.character(stimulus_type[i])) }) } seqs <- NB %>% group_by(participant, block, condition) %>% mutate(n = ifelse(condition=='2-back',2,3)) %>% mutate(stimulus_type = with_lures(stimulus, stimulus_type, n)) %>% mutate(tl = slide_dbl(stimulus_type, ~length(which(.=='target')), .partial=T,.size=window_size), ll = slide_dbl(stimulus_type, ~length(which(.=='lure')), .partial=T, .size=window_size), sl = slide_dbl(stimulus, ~(sum(sort(table(.), decreasing = T)[1:2]) - 1), .partial=T, .size=window_size), sl = ifelse(is.na(sl), 0, sl), ul = slide_dbl(stimulus, ~(max(table(.))-1), .partial=T, .size=window_size), vl = slide_dbl(stimulus, ~(length(unique(.))), .partial=T, .size=window_size), al = slide_dbl(correct, ~(length(which(.))), .partial=T, .size=window_size)) %>% nest(.key='local_stats') %>% #mutate(stimuli = map(local_stats, ~paste0(.x$stimulus,collapse = ''))) %>% mutate(a = map(local_stats, ~length(which(.x$correct)))) %>% mutate(t = map(local_stats, ~length(which(.x$stimulus_type=='target')))) %>% mutate(l = map(local_stats, ~length(which(.x$stimulus_type=='lure')))) %>% mutate(s = map(local_stats, ~sum(sort(table(.x$stimulus), decreasing = T)[1:2]) - 1)) %>% mutate(v = map(local_stats, ~length(unique(.x$stimulus)))) %>% mutate(local_stats = map(local_stats, ~.x %>% select(-trial,-stimulus,-stimulus_type,-choice))) %>% ungroup() %>% select(-participant,-block,-condition) View() inspectdf::inspect_cor(seqs) #inspect_cor(NB,show_plot = T) ``` ```{r} model1 <- NB2 %>% select(-participant, -stimulus) %>% glm(rt~t+n+a,data=., family = "gaussian") aug1 <- augment(model1) aug1 %>% ggplot(aes(a,rt)) + geom_point() + geom_smooth(aes(y=.fitted, color='red')) ``` ```{r} model2 <- NB2 %>% select(-participant, -stimulus) %>% glm(rt~t+n+a+al+s+sl+ll+l,data=., family = "gaussian") aug2 <- augment(model2) aug2 %>% ggplot(aes(jitter(al),rt)) + geom_point(alpha=0.2,shape=18) + xlab("accuracy") + geom_smooth(aes(y=.fitted), color='blue') + geom_smooth(aes(y=aug1$.fitted), color='red') ``` ```{r models} nb_split <- initial_split(NB2, prop = 0.75) training_data <- training(nb_split) testing_data <- testing(nb_split) cv_split <- vfold_cv(training_data, v = 5) cv_data <- cv_split %>% mutate( train = map(splits, ~training(.x)), validate = map(splits, ~testing(.x)) ) cv_models_lm_a <- cv_data %>% mutate(model = map(train, ~lm(formula = a~., data = .x)), tidied = map(model, tidy), glanced = map(model, glance), augment = map(model, augment)) cv_models_glm_a <- cv_data %>% mutate(model = map(train, ~lm(formula = a~., data = .x)), tidied = map(model, tidy), glanced = map(model, glance), augment = map(model, augment)) cv_models_pls_a <- cv_data %>% mutate(model = map(train, ~plsr(a~., data = .x, validation = "CV")), best_dims = map_dbl(model, ~which.min(RMSEP(.x)$val[estimate="adjCV",,]) - 1)) %>% mutate(model = map(train, ~plsr(a ~ ., data = .x, ncomp = best_dims)) ) head(cv_models_pls_a) cv_models_pls_a1 <- cv_data[3][[1]] NBx <- NB %>% group_by(participant) %>% summarise(freq = as.data.frame(table(stimulus))) ggplot(NBx$freq, aes(, group=participant)) + geom_point(se = F) #%>% # mutate(model.pls = map(variables, ~plsr(rt ~ ., data = .x, validation = "CV"))) #mutate(model.pca = map(design_matrix, ~prcomp(~rt,.x, center=T,scale.=T, na.action=na.exclude))) %>% #mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) # Compile cross-validation settings #any(is.na(NB2)) #NB2 <- na.omit(NB2) # set.seed(100) # trainingfold <- createMultiFolds(NB2@correct, k = 5, times = 10) # # # PLS # mod1 <- train(correct ~ ., data = NB2[,c("correct","x_sl","x_ul","x_t","x_l")], # method = "pls", # metric = "Accuracy", # tuneLength = 20, # trControl = trainControl("repeatedcv", index = trainingfold, selectionFunction = "oneSE"), # preProc = c("zv","center","scale")) # # # Check CV # plot(mod1) #plsResult <- plsR(rt ~ ., data=NB2[,c("rt","x_sl","x_ul","x_t","x_l")],3) #plsResult <- plsR(correct ~ ., data=NB2[,c("correct","x_sl","x_ul","x_t","x_l")],3) ```