Newer
Older
notebooks / ccn2019.rev3.Rmd

$P=\langle V,D,C,W \rangle$

```{r setup, message=FALSE, include=FALSE, paged.print=FALSE}
#! ===============================================
#! load required packages

library(ggplot2)
library(tidyverse)
library(stringi)
library(pls)
library(caret)
library(here)
library(tsibble)
library(broom)
library(rsample)
library(inspectdf)
library(caTools)
library(pROC)

#! ===============================================
#! load data set and set running window size
load(here('notebooks/data/CL2015.RData'))
window_size <- 8

```


```{r preprocessing}

#! ===============================================
#! A function to mark lures in a sequence
with_lures <- function(stimulus, stimulus_type, n) {
  sapply(1:length(stimulus), function(i) {
    lures <- c(as.character(stimulus[i-n-1]), as.character(stimulus[i-n+1]))
    are_valid_trials <- i>n && all(!is.na(c(lures,stimulus[i])))
    ifelse(are_valid_trials && stimulus[i] %in% lures,
           "lure", 
           as.character(stimulus_type[i]))
  })
}

#! ===============================================
#! Preprocess data set to add t,tl,l,ll,u,ul,s,sl,a,al
#! a and al are respectively accuracy and recent accuracy
seqs <- NB %>%
  group_by(participant, block, condition) %>%
  mutate(n = ifelse(condition=='2-back',2,3)) %>%
  mutate(stimulus_type = with_lures(stimulus, stimulus_type, n)) %>%
  mutate(tl = slide2_dbl(stimulus_type, rt, ~length(which(.x=='target'))/length(which(!is.na(.y))), .partial=T,.size=window_size),
         ll = slide2_dbl(stimulus_type, rt, ~length(which(.x=='lure'))/length(which(!is.na(.y))), .partial=T, .size=window_size),
         sl = slide_dbl(stimulus_type, ~sum(sort(table(.x), decreasing = T)[1:2]) - 1, .partial=T, .size=window_size),
         ul = slide_dbl(stimulus, ~max(table(.))-1, .partial=T, .size=window_size),
         vl = slide_dbl(stimulus, ~length(unique(.)), .partial=T, .size=window_size),
         al = slide2_dbl(correct, rt, ~length(which(.x))/length(which(!is.na(.y))), .partial=T, .size=window_size),
         sl = ifelse(is.na(sl), 0, sl),
         tl = ifelse(is.na(tl), NA, tl),
         ll = ifelse(is.na(ll), NA, ll),
         al = ifelse(is.na(al), NA, al)
) %>%
  nest(.key='local_stats') %>%
  #mutate(stimuli = map(local_stats, ~paste0(.x$stimulus,collapse = ''))) %>%
  mutate(a  = map_dbl(local_stats, ~length(which(.x$correct)))) %>%
  mutate(t  = map_dbl(local_stats, ~length(which(.x$stimulus_type=='target')))) %>%
  mutate(l  = map_dbl(local_stats, ~length(which(.x$stimulus_type=='lure')))) %>%
  mutate(s  = map_dbl(local_stats, ~sum(sort(table(.x$stimulus), decreasing = T)[1:2]) - 1)) %>%
  mutate(v  = map_dbl(local_stats, ~length(unique(.x$stimulus)))) %>%
  mutate(dp = map_dbl(local_stats,
                      ~length(which(.x$stimulus_type=="target" & .x$correct)) -
                        length(which(.x$stimulus_type!="target" & !.x$correct)))) %>%
  mutate(cr = map_dbl(local_stats,
                      ~-(length(which(.x$stimulus_type=="target" & .x$correct==T)) +
                        length(which(.x$stimulus_type!="target" & .x$correct==F)))/2)) %>%
  mutate(local_stats = map(local_stats, ~.x %>% select(-trial,-choice))) %>%
  #mutate(local_stats = map(local_stats, ~.x %>% select(-trial,-stimulus,-choice))) %>%
  ungroup() %>%
  select(-participant,-block,-condition) %>%
  unnest(local_stats)

#! ===============================================
#! visualize correlations
#DEBUG inspect_cor(seqs, show_plot = T)
save(seqs,file=here("notebooks/data/nback_seqs.Rd"))
```

```{r remove_highly_correlated_predictors}
# WIP: This is an extra step for non-pls methods to remove highly correlated predictors
cor_matrix <- cor(seqs[,-1])
cor_high <- findCorrelation(cor_matrix, 0.8)
high_cor_remove <- row.names(cor_matrix)[cor_high]
#FIXME remove by column name
seqs.uncorr <- seqs %>% select(-high_cor_remove)
```