diff --git a/ccn2019.rev2.Rmd b/ccn2019.rev2.Rmd index 47e0435..7582591 100644 --- a/ccn2019.rev2.Rmd +++ b/ccn2019.rev2.Rmd @@ -73,21 +73,18 @@ library(ggplot2) library(tidyverse) library(stringi) -library(plsRglm) -library(plsdof) +library(pls) +#library(plsRglm) +#library(plsdof) +library(pls) library(caret) library(here) +library(tsibble) ``` -```{r params} +```{r preprocessing} load(here('data/CL2015.RData')) - window_size <- 8 -``` - - - -```{r history} with_lures <- function(stimulus, stimulus_type, n) { sapply(1:length(stimulus), function(i) { @@ -99,68 +96,29 @@ }) } -with_history <- function(stimuli, length=16, fixed=F) { - seq <- paste(stimuli, collapse = '') - - sapply(1:length(stimuli), function(i) { - stri_reverse(str_sub(seq, max(1,i-length+1), i)) - }) - #ifelse(fixed, h[str_length(h)==size], h) -} - -# $x_{s,local}$ -with_skewness <- function(history) { - sapply(history, function(h) { - freqs <- table(unlist(str_split(h,""))) - sum(sort(freqs, decreasing = T)[1:2]) - 1 - }) -} - -# $x_{u,local}$ -with_lumpiness <- function(history) { - sapply(history, function(h) { - freqs <- table(unlist(str_split(h,""))) - max(freqs) - 1 - }) -} - - -with_targets_ratio <- function(stimulus_type, length=16) { - sapply(1:length(stimulus_type), function(i) { - trials <- stimulus_type[max(1,i-length):i] - length(trials[trials=="target"]) / length(trials) - }) -} - -with_lures_ratio <- function(stimulus_type, length=16) { - sapply(1:length(stimulus_type), function(i) { - trials <- stimulus_type[max(1,i-length):i] - length(trials[trials=="lure"]) / length(trials) - }) -} - -#TODO change to list column workflow with broom for model fitting and evaluating the fits -# duh! we are using list nested insided a tibble, so put all new columns in a new list column -# instead of adding a new column for each. NB2 <- NB %>% group_by(participant, condition, block) %>% mutate(n = ifelse(condition=='2-back',2,3)) %>% mutate(stimulus_type = with_lures(stimulus, stimulus_type, n)) %>% - mutate(history = with_history(stimulus, window_size)) %>% - group_nest(.key='design_matrix') + mutate(tl = slide_dbl(stimulus_type, ~length(which(.=='target')), .partial=T,.size=window_size)) %>% + mutate(ll = slide_dbl(stimulus_type, ~length(which(.=='lure')), .partial=T, .size=window_size)) %>% + mutate(sl = slide_dbl(stimulus, ~sum(sort(table(.), decreasing = T)[1:2]) - 1, .partial=T, .size=window_size)) %>% + mutate(ul = slide_dbl(stimulus, ~max(table(.))-1, .partial=T, .size=window_size)) %>% + mutate(vl = slide_dbl(stimulus, ~length(unique(.)), .partial=T, .size=window_size)) %>% + mutate(t = length(which(stimulus_type=='target'))) %>% + mutate(l = length(which(stimulus_type=='lure'))) %>% + mutate(s = sum(sort(table(stimulus), decreasing = T)[1:2]) - 1) %>% + mutate(v = length(unique(stimulus))) %>% + # replace NAs in sl column + mutate(sl = ifelse(is.na(sl), 0, sl)) %>% + nest(.key='design_matrix') -#FIXME update the following functions to accept and also generate tidy list-columns -#FIXME use map() and map_*() from purrr +# Models NB2 <- NB2 %>% - mutate(x_sl = map(design_matrix, ~with_skewness(.x$history))) %>% - mutate(x_ul = map(design_matrix, ~with_lumpiness(.x$history))) %>% - mutate(x_t = map(design_matrix, ~with_targets_ratio(.x$stimulus_type, window_size))) %>% - mutate(x_l = map(design_matrix, ~with_lures_ratio(.x$stimulus_type, window_size))) - #%>% ungroup() %>% unnest() - -#FIXME perform modelings within tidy list-columns and put result in "model.pca" column -pca <- prcomp(~x_sl+x_ul+x_t+x_l, NB2, center = TRUE,scale. = TRUE, na.action=na.exclude) -NB2 <- NB2 %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) + mutate(model.lm = map(design_matrix, ~lm(rt~.,.x))) %>% + mutate(model.pls = map(design_matrix, ~plsr(rt ~ ., data = .x, validation = "CV"))) + #mutate(model.pca = map(design_matrix, ~prcomp(~rt,.x, center=T,scale.=T, na.action=na.exclude))) %>% + #mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2']) # caret library(caret)