Newer
Older
notebooks / ccn2019.Rmd
---
title: "Evaluating N-Back Sequences"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(tidyverse)
library(ggplot2)
library(stringi)
library(GA)
library(dbscan)
library(inspectdf)

load('./data/CL2015.RData')
```

### Variables
- $T$ number of targets
- $L$ number of lures
- $S$ Skewness score
- $U$ Uniformity (!repetition)
- $RT_{mean}$
- $Accuracy_{mean}$
- $dprime$
- $criterion$

## Constraints

- fixed number of targets
- fixed number of lures (a.k.a, foils)
- uniform distribution of choices
- controlled local lumpiness


Each constraint is an up side down quadratic function to be minimized.

```{r, eval=F}
targets_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
lures_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
skewness_cost <- function(x, choices) {
  uniform_ratio <- length(x) / length(choices)
  deviation_from_uniform <- setNames(vector('numeric', length(choices)), choices)
  for (c in choices) {
    deviation_from_uniform[c] = abs(length(x[x==c]) - uniform_ratio)
  }
  #TODO convert to gaussian loss
  max(deviation_from_uniform)
}

lumpiness_cost <- function(x, choices) {
  #trials = len(seq)
  #freqs = [float(seq.count(c)) for c in choices]
  #ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3)
  #return ralph_skewed
  NA
}

#merged_cost <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x)
#GA <- ga(type = "real-valued", fitness = merged_cost, lower = -10, upper = 10)
#plot(GA)
```


```{r}

with_lures <- function(condition, stim, stim_type, history = NA) {
  sapply(1:length(stim), 
    function(i) {
      switch(as.character(condition[i]),
             "2-back" = {
               ifelse(
                 stim[i]==stri_sub(history[i],-2,-2) ||  stim[i]==stri_sub(history[i],-4,-4), 
                 'lure', 
                 as.character(stim_type[i])
               )},
             "3-back" = {
               ifelse(
                 stim[i]==stri_sub(history[i],-3,-3) ||  stim[i]==stri_sub(history[i],-5,-5), 
                 'lure', 
                 as.character(stim_type[i])
               )}
      )

    })
}

with_targets_ratio <- function(stimulus_type, history) {
  sapply(1:length(history), function(i) {
    trials <- stimulus_type[(i-stri_length(history[i])):i]
    trials <- unlist(trials, use.names=FALSE)
    length(trials[trials=="target"])
  })
}

with_lures_ratio <- function(stimulus_type, history) {
  sapply(1:length(history), function(i) {
    trials <- stimulus_type[(i-str_length(history[i])):i]
    trials <- unlist(trials, use.names=FALSE)
    length(trials[trials=="lure"])
  })
}

with_lumpiness_score <- function(stimulus, history) {
  sapply(1:length(history), function(i) {
    trials <- stimulus[(i-str_length(history[i])):i]
    trials <- unlist(trials, use.names=FALSE)
    max(table(trials)) - 1
  })
}

with_lag <- function(stimulus, history) {
  # find last occurance the of stimulus
}

with_skewness_score <- function(stimulus, history) {
  sapply(1:length(history), function(i) {
    trials <- stimulus[(i-str_length(history[i])):i]
    trials <- unlist(trials, use.names=FALSE)
    sum(sort(table(trials), decreasing = T)[1:2]) - 1
  })
}

with_history <- function(stims, size=16) {
  res <- c('')
  for (i in 2:length(stims)) {
    res[i] <- stri_sub(paste(res[i-1], stims[i], sep=''),from=-size,length=size)
  }
  #res <- ifelse(stri_length(res)==size, res, NA)
  res
}

normalize_scores <- function(targets_ratio, lures_ratio, skewness, lumpiness) {
  #TODO
  sapply(1:length(targets_ratio), function(i) 0)
}

window_size <- 8

NB_modified <- NB %>%
  group_by(participant, condition, block) %>%
  mutate(history = with_history(stimulus, size=window_size)) %>%
  #mutate(stimulus_type = map_chr(.x=stimulus, stim_type=stimulus_type, history=history,.f=with_lures))
  mutate(stimulus_type_2 = with_lures(condition, stimulus, stimulus_type, history)) %>%
  mutate(targets = with_targets_ratio(stimulus_type_2, history)) %>%
  mutate(lures = with_lures_ratio(stimulus_type_2, history)) %>%
  mutate(skewness = with_skewness_score(stimulus, history)) %>%
  mutate(lumpiness = with_lumpiness_score(stimulus, history)) %>%
  filter(stri_length(history)==window_size) %>%
  mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>%
    #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>%
  ungroup()

pca <- prcomp(NB_modified[,c('targets','lures','skewness','lumpiness')], center = TRUE,scale. = TRUE)

NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2'])


## participant-level averaged NB, a single row represent an observation for a single subject
## in a single condition
NB_avg <- NB_modified %>%
  group_by(participant, condition) %>%
  mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>%
  summarise(
    targets=sum(targets), 
    lures=sum(lures), 
    skewness=sum(skewness), 
    lumpiness=sum(lumpiness), 
    rt = mean(rt, na.rm=T), 
    correct=sum(correct,na.rm=T)/90) %>%
  ungroup()

# print
# NB_modified %>%
#   filter(participant=='P1') %>%
#   View()
#   
  

fit <- lm(correct ~ t * s * u * l * d, NB_modified)

```


```{r}

# DBSCAN Clustering (RT+ACCURACY against skewness)
NB_avg <- NB_avg %>%
  mutate(cluster = dbscan(cbind(correct,rt), eps = 0.3, minPts = 3)$cluster)

NB_avg %>%
  ggplot(aes(skewness, correct, color=factor(cluster))) +
  ggtitle(" clusters (window = 16 trials)", "NOTE: each point is a single participant") +
  geom_point(alpha=0.3) +
  #geom_smooth(method='lm', se = F) +
  facet_wrap(~condition)

```


```{r}
## single-subject figures
NB_modified %>%
  ggplot(aes(t,s,color=correct)) +
  geom_jitter() +
  geom_point() +
  stat_summary(fun.y="mean")

NB_modified %>%
  inspect_cor(show_plot = T)

NB_avg %>%
  inspect_cor(show_plot = T)

NB_modified %>%
  ggplot(aes(rt,correct,color=u)) +
  geom_jitter() +
  geom_point() +
  stat_summary(fun.y="mean")

NB_modified %>%
  filter(!is.na(correct)) %>%
  ggplot(aes(jitter(s),jitter(t),color=correct)) +
    geom_jitter() +
    geom_point(alpha=0.1)

NB_modified %>%
  filter(!is.na(correct)) %>%
  ggplot(aes(jitter(s),jitter(u),color=correct)) +
    geom_jitter() +
    geom_point() +
    facet_wrap(~condition)

# rt/accuracy and lures
NB_modified %>%
  filter(!is.na(correct)) %>%
  ggplot(aes(jitter(l),rt,color=correct,alpha=0.01)) +
    geom_jitter() +
    geom_point(shape=16) +
    geom_smooth(method="lm",se = F) + 
    facet_wrap(~condition, scales="free")


NB_modified %>%
  filter(!is.na(correct)) %>%
  ggplot(aes(pc1,pc2,color=correct)) +
    geom_point() +
    geom_smooth(method="lm",se = F) + 
    facet_wrap(~condition, scales="free")

NB_modified %>%
  filter(!is.na(correct)) %>%
  ggplot(aes(pc1,rt,color=correct)) +
    geom_point(alpha=0.3) +
    geom_smooth(method="lm",se = F) + 
    facet_wrap(~condition, scales="free")

NB_modified %>%
  filter(!is.na(correct)) %>%
  ggplot(aes(pc1,correct,color=correct)) +
    geom_point() +
    geom_smooth(method="lm",se = F) + 
    facet_wrap(~condition, scales="free")


```

## TODO
  - data %>% mutate(constraint1=fitness1(history), constrain2=fitness2(history),
  - constraint3=fitness(history))
  - kmeans(NB)
  - ggplot(kmeans_clusters$accuracy) 
  - ggplot(kmeans_clusters$rt)
  - 




```{python}
a=2
p
```