--- title: "Evaluating N-Back Sequences" --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE) library(tidyverse) library(ggplot2) library(stringi) library(GA) library(dbscan) ``` # Introduction ## Problem - local statistical properties of the n-back affect how we respond. - local vs. global properties, what matters the most? ### - an agent to replicate behavioral data sets - an online service to generate and evaluate n-back sequences ## Method - create a history window for each trial (a.k.a, contiguous subsequences) - calculate local $T$, $L$, $S$, $U$, $RT_{mean}$, $Accuracy_{mean}$ for each subsequence - Model RT/Acc (response vars) with local properties (exp. vars) - Cluster responses (or exp. vars?) - Investigate if extracted clusters are statistically different ### Explanatory Variables - $T$ number of targets - $L$ number of lures - $S$ Skewness score - $U$ Uniformity (!repetition) ### Response Variables - $RT_{mean}$ - $Accuracy_{mean}$ ## Constraints - fixed number of targets - fixed number of lures (a.k.a, foils) - uniform distribution of choices - controlled local lumpiness ## Modeling - Create two models for local and global features as explanatory vars - Continue with modeling RT and Accuracy based upon local and global feats and compare them. Which model provides a better description of the recoreded RT and Accuracy vars? (model comparasion, model selection, etc) The N-Back data set from @cardoso-leite2015 contains all required parameters for this study including RT, accuracy, and stimuli. ```{r, echo=F} load('./data/CL2015.RData') ``` ```{r, include=F} trials <- c('a','b','c','d','c','d','b','a','a','d','b','a','c','c','a','c') min_len <-4 max_len <-4 contig_seqs = list() for (st in 1:length(trials)) { min_fin_index <- st + min_len - 1 max_fin_index <- min(st + max_len -1, length(trials)) for (fin in min_fin_index:max_fin_index) { seq <- list(trials[st:fin]) contig_seqs <- c(contig_seqs, seq) } } ``` Each constraint is a cost function to minimize for each sequence of stimuli. ```{r, eval=F} # Codes for fitness and loss functions history <- contig_seqs targets <- 4 lures <- 2 targets_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) lures_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) # calc skewness skewness_fitness <- function(x, choices) { uniform_ratio <- length(x) / length(choices) deviation_from_uniform <- setNames(vector('numeric', length(choices)), choices) for (c in choices) { deviation_from_uniform[c] = abs(length(x[x==c]) - uniform_ratio) } #TODO convert to gaussian loss max(deviation_from_uniform) } ralph2014_skewed <- function(x, choices) { #trials = len(seq) #freqs = [float(seq.count(c)) for c in choices] #ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3) #return ralph_skewed F } merged_fitness <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x) GA <- ga(type = "real-valued", fitness = fitness, lower = -10, upper = 10) plot(GA) targets_sample <- data.frame(x=-targets:targets) targets_sample %>% ggplot(aes(x,y=targets_fitness(x))) + geom_line() ``` ```{r} # Codes to calculate local statistical properties with_lures <- function(condition, stim, stim_type, history = NA) { # extend to 2-back/3-back sapply( 1:length(stim), function(i) { switch(as.character(condition[i]), "2-back" = { ifelse( stim[i]==stri_sub(history[i],-2,-2) || stim[i]==stri_sub(history[i],-4,-4), 'lure', as.character(stim_type[i]) )}, "3-back" = { ifelse( stim[i]==stri_sub(history[i],-3,-3) || stim[i]==stri_sub(history[i],-5,-5), 'lure', as.character(stim_type[i]) )} ) }) } with_targets_ratio <- function(stimulus_type, history) { sapply(1:length(history), function(i) { trials <- stimulus_type[(i-stri_length(history[i])):i] trials <- unlist(trials, use.names=FALSE) length(trials[trials=="target"]) }) } with_lures_ratio <- function(stimulus_type, history) { sapply(1:length(history), function(i) { trials <- stimulus_type[(i-str_length(history[i])):i] trials <- unlist(trials, use.names=FALSE) length(trials[trials=="lure"]) }) } with_lumpiness_score <- function(stimulus, history) { sapply(1:length(history), function(i) { trials <- stimulus[(i-str_length(history[i])):i] trials <- unlist(trials, use.names=FALSE) max(table(trials)) - 1 }) } with_skewness_score <- function(stimulus, history) { sapply(1:length(history), function(i) { trials <- stimulus[(i-str_length(history[i])):i] trials <- unlist(trials, use.names=FALSE) sum(sort(table(trials), decreasing = T)[1:2]) - 1 }) } with_history <- function(stims, size=16) { res <- c('') for (i in 2:length(stims)) { res[i] <- stri_sub(paste(res[i-1], stims[i], sep=''),from=-size,length=size) } #res <- ifelse(stri_length(res)==size, res, NA) res } normalize_scores <- function(targets_ratio, lures_ratio, skewness, lumpiness) { #TODO sapply(1:length(targets_ratio), function(i) 0) } NB_modified <- NB %>% group_by(participant, condition, block) %>% mutate(history = with_history(stimulus)) %>% #mutate(stimulus_type = map_chr(.x=stimulus, stim_type=stimulus_type, history=history,.f=with_lures)) mutate(stimulus_type_2 = with_lures(condition, stimulus, stimulus_type, history)) %>% mutate(t = with_targets_ratio(stimulus_type_2, history)) %>% mutate(l = with_lures_ratio(stimulus_type_2, history)) %>% mutate(s = with_skewness_score(stimulus, history)) %>% mutate(u = with_lumpiness_score(stimulus, history)) %>% filter(stri_length(history)==16) %>% #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>% ungroup() NB_modified %>% mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) ## group-level averaged NB, a single row represent an observation for a single subject ## in a single condition NB_avg <- NB_modified %>% group_by(participant, condition) %>% mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>% summarise(targets=sum(t), lures=sum(l), skewness=sum(s), lumpiness=sum(u), rts = mean(rt, na.rm=T), accuracy=sum(correct,na.rm=T)/90) %>% ungroup() # print # NB_modified %>% # filter(participant=='P1') %>% # View() # pca <- prcomp(NB_modified[,c('t','l','s','u')], center = TRUE,scale. = TRUE) NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) fit <- lm(correct ~ t * s, NB_modified) ``` ```{r} # DBSCAN Clustering (RT+ACCURACY against skewness) NB_avg %>% mutate(cluster = dbscan(cbind(accuracy,rts), eps = 0.5, minPts = 3)$cluster) %>% ggplot(aes(skewness, accuracy, color=factor(cluster))) + ggtitle("targets (window = 16 trials)", "NOTE: each point is a single participant") + geom_point(alpha=0.3) + #geom_smooth(method='lm', se = F) + facet_wrap(~condition) ``` ```{r} ## single-subject figures NB_modified %>% ggplot(aes(t,s,color=correct)) + geom_jitter() + geom_point() + stat_summary(fun.y="mean") NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(jitter(s),jitter(t),color=correct)) + geom_jitter() + geom_point(alpha=0.1) NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(jitter(s),jitter(u),color=correct)) + geom_jitter() + geom_point() + facet_wrap(~condition) # rt/accuracy and lures NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(jitter(l),rt,color=correct,alpha=0.01)) + geom_jitter() + geom_point(shape=16) + geom_smooth(method="lm",se = F) + facet_wrap(~condition, scales="free") NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(pc1,pc2,color=correct)) + geom_point() + geom_smooth(method="lm",se = F) + facet_wrap(~condition, scales="free") NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(pc1,rt,color=correct)) + geom_point() + geom_smooth(method="lm",se = F) + facet_wrap(~condition, scales="free") NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(pc1,correct,color=correct)) + geom_point() + geom_smooth(method="lm",se = F) + facet_wrap(~condition, scales="free") ``` ## TODO - data %>% mutate(constraint1=fitness1(history), constrain2=fitness2(history), - constraint3=fitness(history)) - kmeans(NB) - ggplot(kmeans_clusters$accuracy) - ggplot(kmeans_clusters$rt) -