---
title: "Evaluating N-Back Sequences"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(tidyverse)
library(ggplot2)
library(stringi)
library(GA)
library(dbscan)
```
# Introduction
## Problem
- local statistical properties of the n-back affect how we respond.
- local vs. global properties, what matters the most?
###
- an agent to replicate behavioral data sets
- an online service to generate and evaluate n-back sequences
## Method
- create a history window for each trial (a.k.a, contiguous subsequences)
- calculate local $T$, $L$, $S$, $U$, $RT_{mean}$, $Accuracy_{mean}$ for each subsequence
- Model RT/Acc (response vars) with local properties (exp. vars)
- Cluster responses (or exp. vars?)
- Investigate if extracted clusters are statistically different
### Explanatory Variables
- $T$ number of targets
- $L$ number of lures
- $S$ Skewness score
- $U$ Uniformity (!repetition)
### Response Variables
- $RT_{mean}$
- $Accuracy_{mean}$
## Constraints
- fixed number of targets
- fixed number of lures (a.k.a, foils)
- uniform distribution of choices
- controlled local lumpiness
## Modeling
- Create two models for local and global features as explanatory vars
- Continue with modeling RT and Accuracy based upon local and global feats and compare them. Which model provides a better description of the recoreded RT and Accuracy vars? (model comparasion, model selection, etc)
The N-Back data set from @cardoso-leite2015 contains all required parameters for this study including RT, accuracy, and stimuli.
```{r, echo=F}
load('./data/CL2015.RData')
```
```{r, include=F}
trials <- c('a','b','c','d','c','d','b','a','a','d','b','a','c','c','a','c')
min_len <-4
max_len <-4
contig_seqs = list()
for (st in 1:length(trials)) {
min_fin_index <- st + min_len - 1
max_fin_index <- min(st + max_len -1, length(trials))
for (fin in min_fin_index:max_fin_index) {
seq <- list(trials[st:fin])
contig_seqs <- c(contig_seqs, seq)
}
}
```
Each constraint is a cost function to minimize for each sequence of stimuli.
```{r, eval=F}
# Codes for fitness and loss functions
history <- contig_seqs
targets <- 4
lures <- 2
targets_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
lures_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
# calc skewness
skewness_fitness <- function(x, choices) {
uniform_ratio <- length(x) / length(choices)
deviation_from_uniform <- setNames(vector('numeric', length(choices)), choices)
for (c in choices) {
deviation_from_uniform[c] = abs(length(x[x==c]) - uniform_ratio)
}
#TODO convert to gaussian loss
max(deviation_from_uniform)
}
ralph2014_skewed <- function(x, choices) {
#trials = len(seq)
#freqs = [float(seq.count(c)) for c in choices]
#ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3)
#return ralph_skewed
F
}
merged_fitness <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x)
GA <- ga(type = "real-valued", fitness = fitness, lower = -10, upper = 10)
plot(GA)
targets_sample <- data.frame(x=-targets:targets)
targets_sample %>%
ggplot(aes(x,y=targets_fitness(x))) +
geom_line()
```
```{r}
# Codes to calculate local statistical properties
with_lures <- function(condition, stim, stim_type, history = NA) {
# extend to 2-back/3-back
sapply(
1:length(stim),
function(i) {
switch(as.character(condition[i]),
"2-back" = {
ifelse(
stim[i]==stri_sub(history[i],-2,-2) || stim[i]==stri_sub(history[i],-4,-4),
'lure',
as.character(stim_type[i])
)},
"3-back" = {
ifelse(
stim[i]==stri_sub(history[i],-3,-3) || stim[i]==stri_sub(history[i],-5,-5),
'lure',
as.character(stim_type[i])
)}
)
})
}
with_targets_ratio <- function(stimulus_type, history) {
sapply(1:length(history), function(i) {
trials <- stimulus_type[(i-stri_length(history[i])):i]
trials <- unlist(trials, use.names=FALSE)
length(trials[trials=="target"])
})
}
with_lures_ratio <- function(stimulus_type, history) {
sapply(1:length(history), function(i) {
trials <- stimulus_type[(i-str_length(history[i])):i]
trials <- unlist(trials, use.names=FALSE)
length(trials[trials=="lure"])
})
}
with_lumpiness_score <- function(stimulus, history) {
sapply(1:length(history), function(i) {
trials <- stimulus[(i-str_length(history[i])):i]
trials <- unlist(trials, use.names=FALSE)
max(table(trials)) - 1
})
}
with_skewness_score <- function(stimulus, history) {
sapply(1:length(history), function(i) {
trials <- stimulus[(i-str_length(history[i])):i]
trials <- unlist(trials, use.names=FALSE)
sum(sort(table(trials), decreasing = T)[1:2]) - 1
})
}
with_history <- function(stims, size=16) {
res <- c('')
for (i in 2:length(stims)) {
res[i] <- stri_sub(paste(res[i-1], stims[i], sep=''),from=-size,length=size)
}
#res <- ifelse(stri_length(res)==size, res, NA)
res
}
normalize_scores <- function(targets_ratio, lures_ratio, skewness, lumpiness) {
#TODO
sapply(1:length(targets_ratio), function(i) 0)
}
NB_modified <- NB %>%
group_by(participant, condition, block) %>%
mutate(history = with_history(stimulus)) %>%
#mutate(stimulus_type = map_chr(.x=stimulus, stim_type=stimulus_type, history=history,.f=with_lures))
mutate(stimulus_type_2 = with_lures(condition, stimulus, stimulus_type, history)) %>%
mutate(t = with_targets_ratio(stimulus_type_2, history)) %>%
mutate(l = with_lures_ratio(stimulus_type_2, history)) %>%
mutate(s = with_skewness_score(stimulus, history)) %>%
mutate(u = with_lumpiness_score(stimulus, history)) %>%
filter(stri_length(history)==16) %>%
#normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>%
ungroup()
NB_modified %>%
mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct))
## group-level averaged NB, a single row represent an observation for a single subject
## in a single condition
NB_avg <- NB_modified %>%
group_by(participant, condition) %>%
mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>%
summarise(targets=sum(t), lures=sum(l), skewness=sum(s), lumpiness=sum(u), rts = mean(rt, na.rm=T), accuracy=sum(correct,na.rm=T)/90) %>%
ungroup()
# print
# NB_modified %>%
# filter(participant=='P1') %>%
# View()
#
pca <- prcomp(NB_modified[,c('t','l','s','u')], center = TRUE,scale. = TRUE)
NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2'])
fit <- lm(correct ~ t * s, NB_modified)
```
```{r}
# DBSCAN Clustering (RT+ACCURACY against skewness)
NB_avg %>%
mutate(cluster = dbscan(cbind(accuracy,rts), eps = 0.5, minPts = 3)$cluster) %>%
ggplot(aes(skewness, accuracy, color=factor(cluster))) +
ggtitle("targets (window = 16 trials)", "NOTE: each point is a single participant") +
geom_point(alpha=0.3) +
#geom_smooth(method='lm', se = F) +
facet_wrap(~condition)
```
```{r}
## single-subject figures
NB_modified %>%
ggplot(aes(t,s,color=correct)) +
geom_jitter() +
geom_point() +
stat_summary(fun.y="mean")
NB_modified %>%
filter(!is.na(correct)) %>%
ggplot(aes(jitter(s),jitter(t),color=correct)) +
geom_jitter() +
geom_point(alpha=0.1)
NB_modified %>%
filter(!is.na(correct)) %>%
ggplot(aes(jitter(s),jitter(u),color=correct)) +
geom_jitter() +
geom_point() +
facet_wrap(~condition)
# rt/accuracy and lures
NB_modified %>%
filter(!is.na(correct)) %>%
ggplot(aes(jitter(l),rt,color=correct,alpha=0.01)) +
geom_jitter() +
geom_point(shape=16) +
geom_smooth(method="lm",se = F) +
facet_wrap(~condition, scales="free")
NB_modified %>%
filter(!is.na(correct)) %>%
ggplot(aes(pc1,pc2,color=correct)) +
geom_point() +
geom_smooth(method="lm",se = F) +
facet_wrap(~condition, scales="free")
NB_modified %>%
filter(!is.na(correct)) %>%
ggplot(aes(pc1,rt,color=correct)) +
geom_point() +
geom_smooth(method="lm",se = F) +
facet_wrap(~condition, scales="free")
NB_modified %>%
filter(!is.na(correct)) %>%
ggplot(aes(pc1,correct,color=correct)) +
geom_point() +
geom_smooth(method="lm",se = F) +
facet_wrap(~condition, scales="free")
```
## TODO
- data %>% mutate(constraint1=fitness1(history), constrain2=fitness2(history),
- constraint3=fitness(history))
- kmeans(NB)
- ggplot(kmeans_clusters$accuracy)
- ggplot(kmeans_clusters$rt)
-