notebooks/ccn2019.rev3.Rmd at 37ae4c8727f4e551c273795efb8ceded0154d4c8

Fork: 0
morteza / notebooks
Find file
Newer
Older
notebooks / ccn2019.rev3.Rmd
Morteza Ansarinia on 23 May 2019 4 KB plot PLS model output
Raw Blame History
```{r setup, message=FALSE, include=FALSE, paged.print=FALSE}
library(ggplot2)
library(tidyverse)
library(stringi)
library(pls)
#library(plsRglm)
#library(plsdof)
library(pls)
library(caret)
library(here)
library(tsibble)
library(broom)
library(rsample)
library(inspectdf)
```


```{r preprocessing}

load(here('notebooks/data/CL2015.RData'))
window_size <- 8

with_lures <- function(stimulus, stimulus_type, n) {
  sapply(1:length(stimulus), function(i) {
    lures <- c(as.character(stimulus[i-n-1]), as.character(stimulus[i-n+1]))
    are_valid_trials <- i>n && all(!is.na(c(lures,stimulus[i])))
    ifelse(are_valid_trials && stimulus[i] %in% lures,
           "lure", 
           as.character(stimulus_type[i]))
  })
}


invs <- function(s) {
  print(length(s)!=8)
  1
}

seqs <- NB %>%
  group_by(participant, block, condition) %>%
  mutate(n = ifelse(condition=='2-back',2,3)) %>%
  mutate(stimulus_type = with_lures(stimulus, stimulus_type, n)) %>%
  mutate(tl = slide2_dbl(stimulus_type, rt, ~length(which(.x=='target'))/length(which(!is.na(.y))), .partial=T,.size=window_size),
         ll = slide2_dbl(stimulus_type, rt, ~length(which(.x=='lure'))/length(which(!is.na(.y))), .partial=T, .size=window_size),
         sl = slide_dbl(stimulus_type, ~sum(sort(table(.x), decreasing = T)[1:2]) - 1, .partial=T, .size=window_size),
         sl = ifelse(is.na(sl), 0, sl),
         tl = ifelse(is.na(tl), NA, tl),
         ll = ifelse(is.na(ll), NA, ll),
         ul = slide_dbl(stimulus, ~max(table(.))-1, .partial=T, .size=window_size),
         vl = slide_dbl(stimulus, ~length(unique(.)), .partial=T, .size=window_size),
         al = slide2_dbl(correct, rt, ~length(which(.x))/length(which(!is.na(.y))), .partial=T, .size=window_size)) %>%
  nest(.key='local_stats') %>%
  #mutate(stimuli = map(local_stats, ~paste0(.x$stimulus,collapse = ''))) %>%
  mutate(a  = map_dbl(local_stats, ~length(which(.x$correct)))) %>%
  mutate(t  = map_dbl(local_stats, ~length(which(.x$stimulus_type=='target')))) %>%
  mutate(l  = map_dbl(local_stats, ~length(which(.x$stimulus_type=='lure')))) %>%
  mutate(s  = map_dbl(local_stats, ~sum(sort(table(.x$stimulus), decreasing = T)[1:2]) - 1)) %>%
  mutate(v  = map_dbl(local_stats, ~length(unique(.x$stimulus)))) %>%
  mutate(local_stats = map(local_stats, ~.x %>% select(-trial,-stimulus,-stimulus_type,-choice))) %>%
  ungroup() %>%
  select(-participant,-block,-condition)

inspect_cor(seqs %>% unnest(local_stats), show_plot = T)
#inspect_cor(NB,show_plot = T)
```

```{r models}
data <- seqs %>% 
  unnest(local_stats) %>%
  # restructure correct column to avoid caret errors
  # C stands for "CORRECT", and I is "INCORRECT"
  mutate(correct=factor(as.numeric(correct),labels=c("C","I"))) %>%
  filter(!is.na(correct), !is.na(rt))

shuff <- sample(nrow(data))
split <- nrow(data) * 0.8


train_data <- data[1:split,]
test_data <- data[(split+1):nrow(data),]

plsTrControl <- trainControl(
    method = "cv",
    number = 5
  )

# Train PLS model (accuracy+)
model_pls_rt <- train(
  a ~ .-rt-al-correct,
  data = train_data,
  method = "pls",
  tuneLength = 20,
  trControl = plsTrControl,
  preProc = c("zv","center","scale"))
 
# Check CV profile
plot(model_pls_rt)



# training control params for "correct" column
trControl <- trainControl(
    method = "cv",
    number = 5,
    classProbs = T,
    summaryFunction = twoClassSummary
  )
  
model_glm_correct <- train(
  correct ~ .-rt-a-al,
  data = train_data,
  method = "glm",
  family = "binomial",
  trControl = trControl
)

model_glm_correct

predicted_correct_data <- predict(model_glm_correct, test_data, type="prob")

confusionMatrix(test_data$correct, predicted_correct_data)

library(caTools)
colAUC(predicted_correct_data, test_data$correct, plotROC=T)


## OLD MODEL (only global features)
model_glm_correct_old <- train(
  correct ~ n+t+v,
  data = train_data,
  method = "glm",
  family = "binomial",
  trControl = trControl
)

model_glm_correct_old

predicted_old_correct_data <- predict(model_glm_correct_old, test_data, type="prob")

confusionMatrix(test_data$correct, predicted_old_correct_data)

library(caTools)
colAUC(predicted_old_correct_data,test_data$correct, plotROC=T)
```