diff --git a/ccn2019.Rmd b/ccn2019.Rmd index d01e020..9074227 100644 --- a/ccn2019.Rmd +++ b/ccn2019.Rmd @@ -9,34 +9,20 @@ library(stringi) library(GA) library(dbscan) +library(inspectdf) + +load('./data/CL2015.RData') ``` -# Introduction - -## Problem - - local statistical properties of the n-back affect how we respond. - - local vs. global properties, what matters the most? - -### - - an agent to replicate behavioral data sets - - an online service to generate and evaluate n-back sequences - -## Method - - create a history window for each trial (a.k.a, contiguous subsequences) - - calculate local $T$, $L$, $S$, $U$, $RT_{mean}$, $Accuracy_{mean}$ for each subsequence - - Model RT/Acc (response vars) with local properties (exp. vars) - - Cluster responses (or exp. vars?) - - Investigate if extracted clusters are statistically different - -### Explanatory Variables +### Variables - $T$ number of targets - $L$ number of lures - $S$ Skewness score - $U$ Uniformity (!repetition) - -### Response Variables - $RT_{mean}$ - $Accuracy_{mean}$ +- $dprime$ +- $criterion$ ## Constraints @@ -45,52 +31,13 @@ - uniform distribution of choices - controlled local lumpiness -## Modeling - - Create two models for local and global features as explanatory vars - - Continue with modeling RT and Accuracy based upon local and global feats and compare them. Which model provides a better description of the recoreded RT and Accuracy vars? (model comparasion, model selection, etc) - -The N-Back data set from @cardoso-leite2015 contains all required parameters for this study including RT, accuracy, and stimuli. - -```{r, echo=F} -load('./data/CL2015.RData') -``` - - - -```{r, include=F} -trials <- c('a','b','c','d','c','d','b','a','a','d','b','a','c','c','a','c') -min_len <-4 -max_len <-4 - -contig_seqs = list() - -for (st in 1:length(trials)) { - min_fin_index <- st + min_len - 1 - max_fin_index <- min(st + max_len -1, length(trials)) - - for (fin in min_fin_index:max_fin_index) { - seq <- list(trials[st:fin]) - contig_seqs <- c(contig_seqs, seq) - } -} - -``` - - -Each constraint is a cost function to minimize for each sequence of stimuli. +Each constraint is an up side down quadratic function to be minimized. ```{r, eval=F} - -# Codes for fitness and loss functions -history <- contig_seqs -targets <- 4 -lures <- 2 -targets_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) -lures_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) - -# calc skewness -skewness_fitness <- function(x, choices) { +targets_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) +lures_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) +skewness_cost <- function(x, choices) { uniform_ratio <- length(x) / length(choices) deviation_from_uniform <- setNames(vector('numeric', length(choices)), choices) for (c in choices) { @@ -100,33 +47,24 @@ max(deviation_from_uniform) } -ralph2014_skewed <- function(x, choices) { +lumpiness_cost <- function(x, choices) { #trials = len(seq) #freqs = [float(seq.count(c)) for c in choices] #ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3) #return ralph_skewed - F + NA } -merged_fitness <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x) -GA <- ga(type = "real-valued", fitness = fitness, lower = -10, upper = 10) -plot(GA) - -targets_sample <- data.frame(x=-targets:targets) -targets_sample %>% - ggplot(aes(x,y=targets_fitness(x))) + - geom_line() +#merged_cost <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x) +#GA <- ga(type = "real-valued", fitness = merged_cost, lower = -10, upper = 10) +#plot(GA) ``` ```{r} -# Codes to calculate local statistical properties with_lures <- function(condition, stim, stim_type, history = NA) { - - # extend to 2-back/3-back - sapply( - 1:length(stim), + sapply(1:length(stim), function(i) { switch(as.character(condition[i]), "2-back" = { @@ -170,6 +108,10 @@ }) } +with_lag <- function(stimulus, history) { + # find last occurance the of stimulus +} + with_skewness_score <- function(stimulus, history) { sapply(1:length(history), function(i) { trials <- stimulus[(i-str_length(history[i])):i] @@ -192,28 +134,39 @@ sapply(1:length(targets_ratio), function(i) 0) } +window_size <- 8 + NB_modified <- NB %>% group_by(participant, condition, block) %>% - mutate(history = with_history(stimulus)) %>% + mutate(history = with_history(stimulus, size=window_size)) %>% #mutate(stimulus_type = map_chr(.x=stimulus, stim_type=stimulus_type, history=history,.f=with_lures)) mutate(stimulus_type_2 = with_lures(condition, stimulus, stimulus_type, history)) %>% - mutate(t = with_targets_ratio(stimulus_type_2, history)) %>% - mutate(l = with_lures_ratio(stimulus_type_2, history)) %>% - mutate(s = with_skewness_score(stimulus, history)) %>% - mutate(u = with_lumpiness_score(stimulus, history)) %>% - filter(stri_length(history)==16) %>% - #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>% + mutate(targets = with_targets_ratio(stimulus_type_2, history)) %>% + mutate(lures = with_lures_ratio(stimulus_type_2, history)) %>% + mutate(skewness = with_skewness_score(stimulus, history)) %>% + mutate(lumpiness = with_lumpiness_score(stimulus, history)) %>% + filter(stri_length(history)==window_size) %>% + mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>% + #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>% ungroup() -NB_modified %>% - mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) -## group-level averaged NB, a single row represent an observation for a single subject -## in a single condition +pca <- prcomp(NB_modified[,c('targets','lures','skewness','lumpiness')], center = TRUE,scale. = TRUE) +NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) + + +## participant-level averaged NB, a single row represent an observation for a single subject +## in a single condition NB_avg <- NB_modified %>% group_by(participant, condition) %>% mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>% - summarise(targets=sum(t), lures=sum(l), skewness=sum(s), lumpiness=sum(u), rts = mean(rt, na.rm=T), accuracy=sum(correct,na.rm=T)/90) %>% + summarise( + targets=sum(targets), + lures=sum(lures), + skewness=sum(skewness), + lumpiness=sum(lumpiness), + rt = mean(rt, na.rm=T), + correct=sum(correct,na.rm=T)/90) %>% ungroup() # print @@ -222,11 +175,8 @@ # View() # -pca <- prcomp(NB_modified[,c('t','l','s','u')], center = TRUE,scale. = TRUE) -NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) - -fit <- lm(correct ~ t * s, NB_modified) +fit <- lm(correct ~ t * s * u * l * d, NB_modified) ``` @@ -234,10 +184,12 @@ ```{r} # DBSCAN Clustering (RT+ACCURACY against skewness) +NB_avg <- NB_avg %>% + mutate(cluster = dbscan(cbind(correct,rt), eps = 0.3, minPts = 3)$cluster) + NB_avg %>% - mutate(cluster = dbscan(cbind(accuracy,rts), eps = 0.5, minPts = 3)$cluster) %>% - ggplot(aes(skewness, accuracy, color=factor(cluster))) + - ggtitle("targets (window = 16 trials)", "NOTE: each point is a single participant") + + ggplot(aes(skewness, correct, color=factor(cluster))) + + ggtitle(" clusters (window = 16 trials)", "NOTE: each point is a single participant") + geom_point(alpha=0.3) + #geom_smooth(method='lm', se = F) + facet_wrap(~condition) @@ -252,6 +204,19 @@ geom_jitter() + geom_point() + stat_summary(fun.y="mean") + +NB_modified %>% + inspect_cor(show_plot = T) + +NB_avg %>% + inspect_cor(show_plot = T) + +NB_modified %>% + ggplot(aes(rt,correct,color=u)) + + geom_jitter() + + geom_point() + + stat_summary(fun.y="mean") + NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(jitter(s),jitter(t),color=correct)) + @@ -285,7 +250,7 @@ NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(pc1,rt,color=correct)) + - geom_point() + + geom_point(alpha=0.3) + geom_smooth(method="lm",se = F) + facet_wrap(~condition, scales="free") @@ -307,3 +272,10 @@ - ggplot(kmeans_clusters$rt) - + + + +```{python} +a=2 +p +``` diff --git a/ccn2019.Rmd b/ccn2019.Rmd index d01e020..9074227 100644 --- a/ccn2019.Rmd +++ b/ccn2019.Rmd @@ -9,34 +9,20 @@ library(stringi) library(GA) library(dbscan) +library(inspectdf) + +load('./data/CL2015.RData') ``` -# Introduction - -## Problem - - local statistical properties of the n-back affect how we respond. - - local vs. global properties, what matters the most? - -### - - an agent to replicate behavioral data sets - - an online service to generate and evaluate n-back sequences - -## Method - - create a history window for each trial (a.k.a, contiguous subsequences) - - calculate local $T$, $L$, $S$, $U$, $RT_{mean}$, $Accuracy_{mean}$ for each subsequence - - Model RT/Acc (response vars) with local properties (exp. vars) - - Cluster responses (or exp. vars?) - - Investigate if extracted clusters are statistically different - -### Explanatory Variables +### Variables - $T$ number of targets - $L$ number of lures - $S$ Skewness score - $U$ Uniformity (!repetition) - -### Response Variables - $RT_{mean}$ - $Accuracy_{mean}$ +- $dprime$ +- $criterion$ ## Constraints @@ -45,52 +31,13 @@ - uniform distribution of choices - controlled local lumpiness -## Modeling - - Create two models for local and global features as explanatory vars - - Continue with modeling RT and Accuracy based upon local and global feats and compare them. Which model provides a better description of the recoreded RT and Accuracy vars? (model comparasion, model selection, etc) - -The N-Back data set from @cardoso-leite2015 contains all required parameters for this study including RT, accuracy, and stimuli. - -```{r, echo=F} -load('./data/CL2015.RData') -``` - - - -```{r, include=F} -trials <- c('a','b','c','d','c','d','b','a','a','d','b','a','c','c','a','c') -min_len <-4 -max_len <-4 - -contig_seqs = list() - -for (st in 1:length(trials)) { - min_fin_index <- st + min_len - 1 - max_fin_index <- min(st + max_len -1, length(trials)) - - for (fin in min_fin_index:max_fin_index) { - seq <- list(trials[st:fin]) - contig_seqs <- c(contig_seqs, seq) - } -} - -``` - - -Each constraint is a cost function to minimize for each sequence of stimuli. +Each constraint is an up side down quadratic function to be minimized. ```{r, eval=F} - -# Codes for fitness and loss functions -history <- contig_seqs -targets <- 4 -lures <- 2 -targets_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) -lures_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) - -# calc skewness -skewness_fitness <- function(x, choices) { +targets_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) +lures_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4) +skewness_cost <- function(x, choices) { uniform_ratio <- length(x) / length(choices) deviation_from_uniform <- setNames(vector('numeric', length(choices)), choices) for (c in choices) { @@ -100,33 +47,24 @@ max(deviation_from_uniform) } -ralph2014_skewed <- function(x, choices) { +lumpiness_cost <- function(x, choices) { #trials = len(seq) #freqs = [float(seq.count(c)) for c in choices] #ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3) #return ralph_skewed - F + NA } -merged_fitness <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x) -GA <- ga(type = "real-valued", fitness = fitness, lower = -10, upper = 10) -plot(GA) - -targets_sample <- data.frame(x=-targets:targets) -targets_sample %>% - ggplot(aes(x,y=targets_fitness(x))) + - geom_line() +#merged_cost <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x) +#GA <- ga(type = "real-valued", fitness = merged_cost, lower = -10, upper = 10) +#plot(GA) ``` ```{r} -# Codes to calculate local statistical properties with_lures <- function(condition, stim, stim_type, history = NA) { - - # extend to 2-back/3-back - sapply( - 1:length(stim), + sapply(1:length(stim), function(i) { switch(as.character(condition[i]), "2-back" = { @@ -170,6 +108,10 @@ }) } +with_lag <- function(stimulus, history) { + # find last occurance the of stimulus +} + with_skewness_score <- function(stimulus, history) { sapply(1:length(history), function(i) { trials <- stimulus[(i-str_length(history[i])):i] @@ -192,28 +134,39 @@ sapply(1:length(targets_ratio), function(i) 0) } +window_size <- 8 + NB_modified <- NB %>% group_by(participant, condition, block) %>% - mutate(history = with_history(stimulus)) %>% + mutate(history = with_history(stimulus, size=window_size)) %>% #mutate(stimulus_type = map_chr(.x=stimulus, stim_type=stimulus_type, history=history,.f=with_lures)) mutate(stimulus_type_2 = with_lures(condition, stimulus, stimulus_type, history)) %>% - mutate(t = with_targets_ratio(stimulus_type_2, history)) %>% - mutate(l = with_lures_ratio(stimulus_type_2, history)) %>% - mutate(s = with_skewness_score(stimulus, history)) %>% - mutate(u = with_lumpiness_score(stimulus, history)) %>% - filter(stri_length(history)==16) %>% - #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>% + mutate(targets = with_targets_ratio(stimulus_type_2, history)) %>% + mutate(lures = with_lures_ratio(stimulus_type_2, history)) %>% + mutate(skewness = with_skewness_score(stimulus, history)) %>% + mutate(lumpiness = with_lumpiness_score(stimulus, history)) %>% + filter(stri_length(history)==window_size) %>% + mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>% + #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>% ungroup() -NB_modified %>% - mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) -## group-level averaged NB, a single row represent an observation for a single subject -## in a single condition +pca <- prcomp(NB_modified[,c('targets','lures','skewness','lumpiness')], center = TRUE,scale. = TRUE) +NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) + + +## participant-level averaged NB, a single row represent an observation for a single subject +## in a single condition NB_avg <- NB_modified %>% group_by(participant, condition) %>% mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>% - summarise(targets=sum(t), lures=sum(l), skewness=sum(s), lumpiness=sum(u), rts = mean(rt, na.rm=T), accuracy=sum(correct,na.rm=T)/90) %>% + summarise( + targets=sum(targets), + lures=sum(lures), + skewness=sum(skewness), + lumpiness=sum(lumpiness), + rt = mean(rt, na.rm=T), + correct=sum(correct,na.rm=T)/90) %>% ungroup() # print @@ -222,11 +175,8 @@ # View() # -pca <- prcomp(NB_modified[,c('t','l','s','u')], center = TRUE,scale. = TRUE) -NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) - -fit <- lm(correct ~ t * s, NB_modified) +fit <- lm(correct ~ t * s * u * l * d, NB_modified) ``` @@ -234,10 +184,12 @@ ```{r} # DBSCAN Clustering (RT+ACCURACY against skewness) +NB_avg <- NB_avg %>% + mutate(cluster = dbscan(cbind(correct,rt), eps = 0.3, minPts = 3)$cluster) + NB_avg %>% - mutate(cluster = dbscan(cbind(accuracy,rts), eps = 0.5, minPts = 3)$cluster) %>% - ggplot(aes(skewness, accuracy, color=factor(cluster))) + - ggtitle("targets (window = 16 trials)", "NOTE: each point is a single participant") + + ggplot(aes(skewness, correct, color=factor(cluster))) + + ggtitle(" clusters (window = 16 trials)", "NOTE: each point is a single participant") + geom_point(alpha=0.3) + #geom_smooth(method='lm', se = F) + facet_wrap(~condition) @@ -252,6 +204,19 @@ geom_jitter() + geom_point() + stat_summary(fun.y="mean") + +NB_modified %>% + inspect_cor(show_plot = T) + +NB_avg %>% + inspect_cor(show_plot = T) + +NB_modified %>% + ggplot(aes(rt,correct,color=u)) + + geom_jitter() + + geom_point() + + stat_summary(fun.y="mean") + NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(jitter(s),jitter(t),color=correct)) + @@ -285,7 +250,7 @@ NB_modified %>% filter(!is.na(correct)) %>% ggplot(aes(pc1,rt,color=correct)) + - geom_point() + + geom_point(alpha=0.3) + geom_smooth(method="lm",se = F) + facet_wrap(~condition, scales="free") @@ -307,3 +272,10 @@ - ggplot(kmeans_clusters$rt) - + + + +```{python} +a=2 +p +``` diff --git a/ccn2019.rev2.Rmd b/ccn2019.rev2.Rmd new file mode 100644 index 0000000..16bd274 --- /dev/null +++ b/ccn2019.rev2.Rmd @@ -0,0 +1,28 @@ +--- +title: "Statistical Properties of the N-Back Sequences" +output: html_notebook +--- + +# Problems + +Statistical properties of n-back sequences bias behaviors. These bias, under specified structure, allows multiple cognitive strategies, producing heterogeneous behavior in a "gold standard" cognitive task. + +# Gaps + +- Unclear how to parameterize interesting variations for sequence generation +- How do we model these multiple strategies (which requires identifying which sequence variations matter) + - local vs. global properties, which one matters the most? + - Local: lumpiness, short sequence patterns -> could be exploited by “reactive”/automaticity + - Global: No lures, large vocabulary -> pattern repeats implies a target + + +```{r libraries, message=FALSE, include=FALSE, paged.print=FALSE} +library(ggplot2) +library(tidyverse) + +``` + +```{r datasets} +load('./data/CL2015.RData') +``` +