--- title: "PLS Training" output: html_notebook --- PLS: ```{r} #detach("package:MASS","plsdof") # to avoid conflict with dplyr::select library(tidyverse) library(pls) ## 1. load sample data drinks <- read.csv("http://wiki.q-researchsoftware.com/images/d/db/Stacked_colas.csv") #str(drinks) ## 2. clean data (remove brand and URLID) drinks <- drinks %>% select(-URLID, -brand) ## 3. use cross validatation to find the optimal number of dimensions pls.model = plsr(pref ~ ., data = drinks, validation = "CV") ## 3.1. find the model with lowest cv error cv <- RMSEP(pls.model) best_dims <- which.min(cv$val[estimate = "adjCV", , ]) - 1 ## 4. rebuild the model pls.model <- plsr(pref ~ ., data = drinks, ncomp = best_dims) ## 5. Sort, and visualize top coefficients coefs <- coef(pls.model) barplot(tail(sort(coefs[,1,1]))) ```