--- title: "Statistical Properties of the N-Back Sequences" output: pdf_document: default html_notebook: default --- # Problems Statistical properties of n-back sequences bias behaviors. These bias, under specified structure, allows multiple cognitive strategies, producing heterogeneous behavior in a "gold standard" cognitive task. # Gaps - Unclear how to parameterize interesting variations for sequence generation - How do we model these multiple strategies (which requires identifying which sequence variations matter) - local vs. global properties, which one matters the most? - Local: lumpiness, short sequence patterns -> could be exploited by “reactive”/automaticity - Global: No lures, large vocabulary -> pattern repeats implies a target ```{r libraries, message=FALSE, include=FALSE, paged.print=FALSE} library(ggplot2) library(tidyverse) library(stringi) library(plsRglm) library(plsdof) library(caret) ``` ```{r params} load('./data/CL2015.RData') window_size <- 8 ``` $P=\{V,D,C,W\}$ $V=\{x_N,x_{T},x_{T,local},x_L,x_{L,local},x_V,x_U,x_S,x_{S,local},x_G\}$ $D=\{\}$ ```{r history} with_lures <- function(stimulus, stimulus_type, n) { sapply(1:length(stimulus), function(i) { lures <- c(as.character(stimulus[i-n-1]), as.character(stimulus[i-n+1])) are_valid_trials <- i>n && all(!is.na(c(lures,stimulus[i]))) ifelse(are_valid_trials && stimulus[i] %in% lures, "lure", as.character(stimulus_type[i])) }) } with_history <- function(stimuli, length=16, fixed=F) { seq <- paste(stimuli, collapse = '') sapply(1:length(stimuli), function(i) { stri_reverse(str_sub(seq, max(1,i-length+1), i)) }) #ifelse(fixed, h[str_length(h)==size], h) } # $x_{s,local}$ with_skewness <- function(history) { sapply(history, function(h) { freqs <- table(unlist(str_split(h,""))) sum(sort(freqs, decreasing = T)[1:2]) - 1 }) } # $x_{u,local}$ with_lumpiness <- function(history) { sapply(history, function(h) { freqs <- table(unlist(str_split(h,""))) max(freqs) - 1 }) } with_targets_ratio <- function(stimulus_type, length=16) { sapply(1:length(stimulus_type), function(i) { trials <- stimulus_type[max(1,i-length):i] length(trials[trials=="target"]) / length(trials) }) } with_lures_ratio <- function(stimulus_type, length=16) { sapply(1:length(stimulus_type), function(i) { trials <- stimulus_type[max(1,i-length):i] length(trials[trials=="lure"]) / length(trials) }) } NB2 <- NB %>% filter(participant=="P13") %>% group_by(participant, condition, block) %>% mutate(n = ifelse(condition=='2-back',2,3)) %>% mutate(stimulus_type = with_lures(stimulus, stimulus_type, n)) %>% mutate(history = with_history(stimulus, window_size)) %>% mutate(x_sl = with_skewness(history)) %>% mutate(x_ul = with_lumpiness(history)) %>% mutate(x_t = with_targets_ratio(stimulus_type, window_size)) %>% mutate(x_l = with_lures_ratio(stimulus_type, window_size)) %>% ungroup() pca <- prcomp(~x_sl+x_ul+x_t+x_l, NB2, center = TRUE,scale. = TRUE, na.action=na.exclude) NB2 <- NB2 %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) # caret library(caret) # Compile cross-validation settings any(is.na(NB2)) NB2 <- na.omit(NB2) # set.seed(100) # trainingfold <- createMultiFolds(NB2@correct, k = 5, times = 10) # # # PLS # mod1 <- train(correct ~ ., data = NB2[,c("correct","x_sl","x_ul","x_t","x_l")], # method = "pls", # metric = "Accuracy", # tuneLength = 20, # trControl = trainControl("repeatedcv", index = trainingfold, selectionFunction = "oneSE"), # preProc = c("zv","center","scale")) # # # Check CV # plot(mod1) plsResult <- plsR(correct ~ ., data=NB2[,c("correct","x_sl","x_ul","x_t","x_l")],3) plsResult ```