---
title: "PLS Training"
output: html_notebook
---
PLS:
```{r}
#detach("package:MASS","plsdof") # to avoid conflict with dplyr::select
library(tidyverse)
library(pls)
## 1. load sample data
#data <- read.csv("http://wiki.q-researchsoftware.com/images/d/db/Stacked_colas.csv")
rm(NB)
load("./data/CL2015.RData")
data <- NB
str(data)
## 2. clean data (remove brand and URLID)
data <- data %>%
mutate(n=ifelse(condition=='2-back', 2, 3)) %>%
select(-condition,
-stimulus,
-block,
-trial)
# %>%
# rename(
# ev.participant=participant,
# ev.n=n,
# ev.block=block,
# ev.stimulus_type=stimulus_type,
# rv.choice=choice,
# rv.rt=rt,
# rv.correct=correct
# )
## 3. use cross validatation to find the optimal number of dimensions
pls.model = plsr(rt ~ ., data = data, validation = "CV")
## 3.1. find the model with lowest cv error
cv <- RMSEP(pls.model)
best_dims <- which.min(cv$val[estimate = "adjCV", , ]) - 1
## 4. rebuild the model
pls.model <- plsr(rt ~ ., data = data, ncomp = best_dims)
## 5. Sort, and visualize top coefficients
coefs <- coef(pls.model)
barplot(sort(coefs[,1,1], decreasing = T)[1:4])
```