notebooks/ccn2019.rev2.Rmd at 759b8f34606783d5d8750f9a0720a412a9de249b

Fork: 0
morteza / notebooks
Find file
Newer
Older
notebooks / ccn2019.rev2.Rmd
Morteza Ansarinia on 23 May 2019 6 KB caret, AUC, etc for CCN2019
Raw Blame History
---
title: "Statistical Properties of the N-Back Sequences"
output:
  html_notebook: default
  pdf_document: default
editor_options: 
  chunk_output_type: console
---

# Problems

Statistical properties of n-back sequences bias behaviors. These bias, under specified structure, allows multiple cognitive strategies, producing heterogeneous behavior in a "gold standard" cognitive task.

# Gaps

- Unclear how to parameterize interesting variations for sequence generation
- How do we model these multiple strategies (which requires identifying which sequence variations matter) 
    - local vs. global properties, which one matters the most?
    - Local:  lumpiness, short sequence patterns -> could be exploited by “reactive”/automaticity 
    - Global:  No lures, large vocabulary -> pattern repeats implies a target


## Formulating Generating the N-Back Sequences as a CSP instance

$P=\langle V,D,C,W\rangle$

$V=\{x_N,x_{T},x_{T,local},x_L,x_{L,local},x_V,x_U,x_S,x_{S,local},x_G\}$

$D=\{\}$


Constraints:

$$
\\

x_n = N, W_n = 1 - |10 \times dnorm(x_n-N,sd=4)|

\\\\

x_t = T \times trials, W_t = 1 - |10\times dnorm(T\times trials-x_t,sd=4)|

\\\\

x_{tl} = {T \times w \over trials}, W_{tl} = 1 - |10\times dnorm(x_{tl} - {T \over trials} \times w,sd=4)|

\\\\

x_{l} = L \times trials
\\\\

x_{ll} = L \times w
\\\\

x_{v} = |V|
\\

x_{ul} = w
\\\\

x_{s} = {trials \over |V|}
\\\\

x_{sl} = max(1, {w \over |V|})
\\\\

x_{g} = {trials \over w}

\\\\

x_{vl} = min(|V|, w)
$$

```{r setup, message=FALSE, include=FALSE, paged.print=FALSE}
library(ggplot2)
library(tidyverse)
library(stringi)
library(pls)
#library(plsRglm)
#library(plsdof)
library(pls)
library(caret)
library(here)
library(tsibble)
library(broom)
library(rsample)

```

```{r preprocessing}

load(here('notebooks/data/CL2015.RData'))
window_size <- 8

with_lures <- function(stimulus, stimulus_type, n) {
  sapply(1:length(stimulus), function(i) {
    lures <- c(as.character(stimulus[i-n-1]), as.character(stimulus[i-n+1]))
    are_valid_trials <- i>n && all(!is.na(c(lures,stimulus[i])))
    ifelse(are_valid_trials && stimulus[i] %in% lures,
           "lure", 
           as.character(stimulus_type[i]))
  })
}

seqs <- NB %>%
  group_by(participant, block, condition) %>%
  mutate(n = ifelse(condition=='2-back',2,3)) %>%
  mutate(stimulus_type = with_lures(stimulus, stimulus_type, n)) %>%
  mutate(tl = slide_dbl(stimulus_type, ~length(which(.=='target')), .partial=T,.size=window_size),
         ll = slide_dbl(stimulus_type, ~length(which(.=='lure')), .partial=T, .size=window_size),
         sl = slide_dbl(stimulus, ~(sum(sort(table(.), decreasing = T)[1:2]) - 1), .partial=T, .size=window_size),
         sl = ifelse(is.na(sl), 0, sl),
         ul = slide_dbl(stimulus, ~(max(table(.))-1), .partial=T, .size=window_size),
         vl = slide_dbl(stimulus, ~(length(unique(.))), .partial=T, .size=window_size),
         al = slide_dbl(correct, ~(length(which(.))), .partial=T, .size=window_size)) %>%
  nest(.key='local_stats') %>%
  #mutate(stimuli = map(local_stats, ~paste0(.x$stimulus,collapse = ''))) %>%
  mutate(a  = map(local_stats, ~length(which(.x$correct)))) %>%
  mutate(t  = map(local_stats, ~length(which(.x$stimulus_type=='target')))) %>%
  mutate(l  = map(local_stats, ~length(which(.x$stimulus_type=='lure')))) %>%
  mutate(s  = map(local_stats, ~sum(sort(table(.x$stimulus), decreasing = T)[1:2]) - 1)) %>%
  mutate(v  = map(local_stats, ~length(unique(.x$stimulus)))) %>%
  mutate(local_stats = map(local_stats, ~.x %>% select(-trial,-stimulus,-stimulus_type,-choice))) %>%
  ungroup() %>%
  select(-participant,-block,-condition)

View()
inspectdf::inspect_cor(seqs)
#inspect_cor(NB,show_plot = T)
```

```{r}
model1 <- NB2 %>%
  select(-participant, -stimulus) %>%
  glm(rt~t+n+a,data=., family = "gaussian")
aug1 <- augment(model1)
aug1 %>%
  ggplot(aes(a,rt)) +
    geom_point() +
    geom_smooth(aes(y=.fitted, color='red'))
```

```{r}
model2 <- NB2 %>%
  select(-participant, -stimulus) %>%
  glm(rt~t+n+a+al+s+sl+ll+l,data=., family = "gaussian")
aug2 <- augment(model2)
aug2 %>%
  ggplot(aes(jitter(al),rt)) +
    geom_point(alpha=0.2,shape=18) +
    xlab("accuracy") + 
    geom_smooth(aes(y=.fitted), color='blue') +
    geom_smooth(aes(y=aug1$.fitted), color='red')
  
```

```{r models}

nb_split <- initial_split(NB2, prop = 0.75)
training_data <- training(nb_split)
testing_data <- testing(nb_split)
cv_split <- vfold_cv(training_data, v = 5)
cv_data <- cv_split %>% 
  mutate(
    train = map(splits, ~training(.x)), 
    validate = map(splits, ~testing(.x))
  )

cv_models_lm_a <- cv_data %>% 
  mutate(model = map(train, ~lm(formula = a~., data = .x)),
         tidied = map(model, tidy),
         glanced = map(model, glance),
         augment = map(model, augment))

cv_models_glm_a <- cv_data %>% 
  mutate(model = map(train, ~lm(formula = a~., data = .x)),
         tidied = map(model, tidy),
         glanced = map(model, glance),
         augment = map(model, augment))

cv_models_pls_a <- cv_data %>%
  mutate(model = map(train, ~plsr(a~., data = .x, validation = "CV")),
         best_dims = map_dbl(model, ~which.min(RMSEP(.x)$val[estimate="adjCV",,]) - 1)) %>%
  mutate(model = map(train, ~plsr(a ~ ., data = .x, ncomp = best_dims))
  )

head(cv_models_pls_a)


cv_models_pls_a1 <- cv_data[3][[1]]


NBx <- NB %>%
  group_by(participant) %>%
  summarise(freq = as.data.frame(table(stimulus)))

ggplot(NBx$freq, aes(, group=participant)) +
  geom_point(se = F)


#%>%
#  mutate(model.pls = map(variables, ~plsr(rt ~ ., data = .x, validation = "CV")))
  #mutate(model.pca = map(design_matrix, ~prcomp(~rt,.x, center=T,scale.=T, na.action=na.exclude))) %>%
  #mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2'])

# Compile cross-validation settings

#any(is.na(NB2))
#NB2 <- na.omit(NB2)

# set.seed(100)
# trainingfold <- createMultiFolds(NB2@correct, k = 5, times = 10)
# 
# # PLS
# mod1 <- train(correct ~ ., data = NB2[,c("correct","x_sl","x_ul","x_t","x_l")],
#  method = "pls",
#  metric = "Accuracy",
#  tuneLength = 20,
#  trControl = trainControl("repeatedcv", index = trainingfold, selectionFunction = "oneSE"),
#  preProc = c("zv","center","scale"))
#  
# # Check CV
# plot(mod1)


#plsResult <- plsR(rt ~ ., data=NB2[,c("rt","x_sl","x_ul","x_t","x_l")],3)
#plsResult <- plsR(correct ~ ., data=NB2[,c("correct","x_sl","x_ul","x_t","x_l")],3)

```