--- title: "PLS Training" output: html_notebook editor_options: chunk_output_type: console --- PLS: ```{r} #detach("package:MASS","plsdof") # to avoid conflict with dplyr::select library(tidyverse) library(pls) ## 1. load sample data #data <- read.csv("http://wiki.q-researchsoftware.com/images/d/db/Stacked_colas.csv") rm(NB) load("./data/CL2015.RData") data <- NB str(data) ## 2. clean data (remove brand and URLID) data <- data %>% mutate(n=ifelse(condition=='2-back', 2, 3)) %>% select(-condition, -stimulus, -block, -trial) # %>% # rename( # ev.participant=participant, # ev.n=n, # ev.block=block, # ev.stimulus_type=stimulus_type, # rv.choice=choice, # rv.rt=rt, # rv.correct=correct # ) ## 3. use cross validatation to find the optimal number of dimensions pls.model = plsr(rt ~ ., data = data, validation = "CV") ## 3.1. find the model with lowest cv error best_dims <- which.min(RMSEP(pls.model)$val[estimate = "adjCV", , ]) - 1 ## 4. rebuild the model pls.model <- plsr(rt ~ ., data = data, ncomp = best_dims) ## 5. Sort, and visualize top coefficients coefs <- coef(pls.model) barplot(sort(coefs[,1,1], decreasing = T)[1:4]) ``` ```{r simulate} X <- matrix(rnorm(1100), 100, 11) Y <- matrix(rnorm(400), 100, 4) pls.model <- plsr(Y ~ X, validation = "CV") cv <- RMSEP(pls.model) best_dims <- which.min(cv$val[estimate = "adjCV", , ]) - 1 pls.model <- plsr(Y ~ X, ncomp = best_dims) coefs <- sort(coef(pls.model)[,1,1], decreasing = T) barplot(coefs) ``` ```{r cca-simulate} X <- matrix(rnorm(1100), 100, 11) Y <- matrix(rnorm(400), 100, 4) M <- cor(cbind(X,Y)) corrplot(M, method="ellipse", order="hclust", addrect=2, addCoef.col="black") cc <- cancor(X, Y) #NB: cc <- cancor(cbind(rt,correct, accuracy) ~ xt + xl + xtl, data = data) ``` ``` rm(list = ls()) library(plsRglm) data(Cornell) df <- Cornell x <- subset(df, select = -c(Y)) y <- df$Y ## K is the number of folds in CV, and nt is the maximum number of components, #cv.modpls<-cv.plsRglm(dataY=y,dataX=x ,nt=10,modele="pls-glm-logistic",K=8) modpls <- plsRglm(dataY = y,dataX = x, nt = 10, modele = "pls-glm-logistic", sparse=TRUE,sparseStop=TRUE) res.cv.modpls<-cvtable(summary(cv.modpls)) res6<-plsR(Y~.,data=Cornell, nt=6, typeVC="missing", pvals.expli=TRUE) ```