notebooks/ccn2019.Rmd at 537a625da28f9d724e4345bd03814b5323ecc442

Fork: 0
morteza / notebooks
Find file
Newer
Older
notebooks / ccn2019.Rmd
Morteza Ansarinia on 13 May 2019 5 KB WIP: calc lumpiness and skewness, and cleanup the the whole notebook.
Raw Blame History
---
title: "Unbiased N-Back"
output:
  ioslides_presentation: default
  slidy_presentation: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(ggplot2)
library(tidyverse)
library(GA)
library(stringi)

load('./data/CL2015.RData')
```

## Problem
 - local statistical properties of the n-back affect how we respond.
 - local vs. global properties, what matters the most?

### 
 - an agent to replicate behavioral data sets
 - an online service to generate and evaluate n-back sequences

## Method
 - create a history window for each trial (a.k.a, contiguous subsequences)
 - calculate local $T$, $L$, $S$, $U$, $RT_{mean}$, $Accuracy_{mean}$ for each subsequence
 - Model RT/Acc (response vars) with local properties (exp. vars)
 - Cluster responses (or exp. vars?)
 - Investigate if extracted clusters are statistically different

### Explanatory Variables
- $T$ number of targets
- $L$ number of lures
- $S$ Skewness score
- $U$ Uniformity (!repetition)

### Response Variables
- $RT_{mean}$
- $Accuracy_{mean}$

## Constraints

- fixed number of targets
- fixed number of lures (a.k.a, foils)
- uniform distribution of choices
- controlled local lumpiness

## Modeling
 - Create two models for local and global features as explanatory vars
 - Continue with modeling RT and Accuracy based upon local and global feats and compare them. Which model provides a better description of the recoreded RT and Accuracy vars? (model comparasion, model selection, etc)


```{r}
trials <- c('a','b','c','d','c','d','b','a','a','d','b','a','c','c','a','c')
min_len <-4
max_len <-4

contig_seqs = list()

for (st in 1:length(trials)) {
  min_fin_index <- st + min_len - 1
  max_fin_index <- min(st + max_len -1, length(trials))

  for (fin in min_fin_index:max_fin_index) {
    seq <- list(trials[st:fin])
    contig_seqs <- c(contig_seqs, seq)
  }
}

```


Each constraint is a cost function to minimize for each sequence of stimuli.

```{r, eval=F}

# Codes for fitness and loss functions
history <- contig_seqs
targets <- 4
lures <- 2
targets_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
lures_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)

# calc skewness
skewness_fitness <- function(x, choices) {
  uniform_ratio <- length(x) / length(choices)
  deviation_from_uniform <- setNames(vector('numeric', length(choices)), choices)
  for (c in choices) {
    deviation_from_uniform[c] = abs(length(x[x==c]) - uniform_ratio)
  }
  #TODO convert to gaussian loss
  max(deviation_from_uniform)
}

ralph2014_skewed <- function(x, choices) {
  #trials = len(seq)
  #freqs = [float(seq.count(c)) for c in choices]
  #ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3)
  #return ralph_skewed
  F
}

merged_fitness <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x)
GA <- ga(type = "real-valued", fitness = fitness, lower = -10, upper = 10)
plot(GA)

targets_sample <- data.frame(x=-targets:targets)
targets_sample %>%
  ggplot(aes(x,y=targets_fitness(x))) +
  geom_line()
```


```{r}

# Codes to calculate local statistical properties
with_lures <- function(condition, stim, stim_type, history = NA) {
  
  # extend to 2-back/3-back
  sapply(
    1:length(stim), 
    function(i) {
      switch(condition[i],
             "2-back" = {
               ifelse(
                 stim[i]==stri_sub(history[i],-2,-2) ||  stim[i]==stri_sub(history[i],-4,-4), 
                 'lure', 
                 as.character(stim_type[i])
               )},
             "3-back" = {
               ifelse(
                 stim[i]==stri_sub(history[i],-3,-3) ||  stim[i]==stri_sub(history[i],-5,-5), 
                 'lure', 
                 as.character(stim_type[i])
               )}
      )

    })
}

with_targets_ratio <- function(stimulus_type, history) {
  sapply(1:length(history), function(i) {
    trials <- stimulus_type[(i-str_length(history[i])):i]
    trials <- unlist(trials, use.names=FALSE)
    length(trials[trials=="target"])
  })
}

with_lures_ratio <- function(stimulus_type, history) {
  sapply(1:length(history), function(i) {
    trials <- stimulus_type[(i-str_length(history[i])):i]
    trials <- unlist(trials, use.names=FALSE)
    length(trials[trials=="lure"])
  })
}

with_lumpiness_score <- function(stimulus, history) {
  sapply(1:length(history), function(i) {
    trials <- stimulus[(i-str_length(history[i])):i]
    trials <- unlist(trials, use.names=FALSE)
    max(table(trials)) - 1
  })
}

with_skewness_score <- function(stimulus, history) {
  sapply(1:length(history), function(i) 0)
}

with_history <- function(stims, max=8) {
  res <- c('')
  for (i in 2:length(stims)) {
    res[i] <- stri_sub(paste(res[i-1], stims[i], sep=''),from=-max,length=max)
  }
  res
}

normalize_scores <- function(targets_ratio, lures_ratio, skewness, lumpiness) {
  #TODO
  sapply(1:length(targets_ratio), function(i) 0)
}

NB_modified <- NB %>%
  group_by(participant, condition, block) %>%
  mutate(history = with_history(stimulus)) %>%
  #mutate(stimulus_type = map_chr(.x=stimulus, stim_type=stimulus_type, history=history,.f=with_lures))
  mutate(stimulus_type_2 = with_lures(condition, stimulus, stimulus_type, history)) %>%
  mutate(t = with_targets_ratio(stimulus_type_2, history)) %>%
  mutate(l = with_lures_ratio(stimulus_type_2, history)) %>%
  mutate(s = with_skewness_score(stimulus, history)) %>%
  mutate(u = with_lumpiness_score(stimulus, history)) %>%
  #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>%
  ungroup()

# print
# NB_modified %>%
#   filter(participant=='P1') %>%
#   View()
#   
  
```

## TODO
  - data %>% mutate(constraint1=fitness1(history), constrain2=fitness2(history),
  - constraint3=fitness(history))
  - kmeans(NB)
  - ggplot(kmeans_clusters$accuracy) 
  - ggplot(kmeans_clusters$rt)
  -