diff --git a/ccn2019-accuracy.R b/ccn2019-accuracy.R new file mode 100644 index 0000000..0a2d999 --- /dev/null +++ b/ccn2019-accuracy.R @@ -0,0 +1,101 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) + + +load(here("data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + select(-correct) + +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$a, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + a ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + a ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +pls.old_model +pls.new_model +varImp(pls.old_model) +varImp(pls.new_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) + + +# SSE and RMSE + +SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$a - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + +as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$a)) %>% + ggplot(aes(predicted, observed)) + + coord_cartesian(xlim = c(20, 30), ylim = c(20, 30)) + + geom_point(alpha = 0.1,shape=16) + + geom_smooth(method=lm,se=F) + + ggtitle("Accuracy: Predicted vs Actual") + + xlab("Predecited") + + ylab("Observed") diff --git a/ccn2019-accuracy.R b/ccn2019-accuracy.R new file mode 100644 index 0000000..0a2d999 --- /dev/null +++ b/ccn2019-accuracy.R @@ -0,0 +1,101 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) + + +load(here("data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + select(-correct) + +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$a, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + a ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + a ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +pls.old_model +pls.new_model +varImp(pls.old_model) +varImp(pls.new_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) + + +# SSE and RMSE + +SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$a - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + +as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$a)) %>% + ggplot(aes(predicted, observed)) + + coord_cartesian(xlim = c(20, 30), ylim = c(20, 30)) + + geom_point(alpha = 0.1,shape=16) + + geom_smooth(method=lm,se=F) + + ggtitle("Accuracy: Predicted vs Actual") + + xlab("Predecited") + + ylab("Observed") diff --git a/ccn2019-correct.R b/ccn2019-correct.R new file mode 100644 index 0000000..14c4175 --- /dev/null +++ b/ccn2019-correct.R @@ -0,0 +1,114 @@ +#==================================================# +# model the "correct" column + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) +library(pls) + +#devtools::install_github("sachsmc/plotROC") +library(plotROC) + + +load(here("notebooks/data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) + +inspect_cat(seqs.imputed) +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$correct, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "repeatedcv", + number = 5, + classProbs = T, + verboseIter = T, + summaryFunction = twoClassSummary, + savePredictions = T, + sampling = "down" +) + +pls.new_model <- train( + correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + correct ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.new_model +pls.old_model +varImp(pls.new_model) +varImp(pls.old_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(new = pls.new_model, old = pls.old_model)) +summary(resamps) +dotplot(resamps) +diffValues <- diff(resamps) +bwplot(diffValues, layout=c(1,3)) + + +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.new_predicted_prob <- predict(pls.new_model, test_data, type="prob") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") +pls.old_predicted_prob <- predict(pls.old_model, test_data, type="prob") + +confusionMatrix(pls.new_predicted, test_data$correct) +confusionMatrix(pls.old_predicted, test_data$correct) + +library(pROC) +par(pty="s") + +roc(test_data$correct, + pls.new_predicted_prob$CORRECT, + plot = T, + legacy.axes=T, + lwd=4, + col="black", + print.auc.y = 45, + percent = T, + print.auc=T) + +plot.roc(test_data$correct, + pls.old_predicted_prob$CORRECT, + legacy.axes=T, + lwd=4, + col="darkgray", + print.auc=T, + percent = T, + print.auc.y = 40, + lty = 3, + add=T) + +legend(100,100, legend=c("New Model", "Old Model"), + col=c("black", "darkgray"), lty=c(1,3),lwd=3, cex=0.8) + +# ggplot(pls.old_model, aes(d = pred$obs, m = pred$CORRECT)) + +# geom_roc() diff --git a/ccn2019-accuracy.R b/ccn2019-accuracy.R new file mode 100644 index 0000000..0a2d999 --- /dev/null +++ b/ccn2019-accuracy.R @@ -0,0 +1,101 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) + + +load(here("data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + select(-correct) + +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$a, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + a ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + a ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +pls.old_model +pls.new_model +varImp(pls.old_model) +varImp(pls.new_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) + + +# SSE and RMSE + +SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$a - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + +as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$a)) %>% + ggplot(aes(predicted, observed)) + + coord_cartesian(xlim = c(20, 30), ylim = c(20, 30)) + + geom_point(alpha = 0.1,shape=16) + + geom_smooth(method=lm,se=F) + + ggtitle("Accuracy: Predicted vs Actual") + + xlab("Predecited") + + ylab("Observed") diff --git a/ccn2019-correct.R b/ccn2019-correct.R new file mode 100644 index 0000000..14c4175 --- /dev/null +++ b/ccn2019-correct.R @@ -0,0 +1,114 @@ +#==================================================# +# model the "correct" column + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) +library(pls) + +#devtools::install_github("sachsmc/plotROC") +library(plotROC) + + +load(here("notebooks/data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) + +inspect_cat(seqs.imputed) +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$correct, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "repeatedcv", + number = 5, + classProbs = T, + verboseIter = T, + summaryFunction = twoClassSummary, + savePredictions = T, + sampling = "down" +) + +pls.new_model <- train( + correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + correct ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.new_model +pls.old_model +varImp(pls.new_model) +varImp(pls.old_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(new = pls.new_model, old = pls.old_model)) +summary(resamps) +dotplot(resamps) +diffValues <- diff(resamps) +bwplot(diffValues, layout=c(1,3)) + + +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.new_predicted_prob <- predict(pls.new_model, test_data, type="prob") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") +pls.old_predicted_prob <- predict(pls.old_model, test_data, type="prob") + +confusionMatrix(pls.new_predicted, test_data$correct) +confusionMatrix(pls.old_predicted, test_data$correct) + +library(pROC) +par(pty="s") + +roc(test_data$correct, + pls.new_predicted_prob$CORRECT, + plot = T, + legacy.axes=T, + lwd=4, + col="black", + print.auc.y = 45, + percent = T, + print.auc=T) + +plot.roc(test_data$correct, + pls.old_predicted_prob$CORRECT, + legacy.axes=T, + lwd=4, + col="darkgray", + print.auc=T, + percent = T, + print.auc.y = 40, + lty = 3, + add=T) + +legend(100,100, legend=c("New Model", "Old Model"), + col=c("black", "darkgray"), lty=c(1,3),lwd=3, cex=0.8) + +# ggplot(pls.old_model, aes(d = pred$obs, m = pred$CORRECT)) + +# geom_roc() diff --git a/ccn2019-criterion.R b/ccn2019-criterion.R new file mode 100644 index 0000000..3cdf184 --- /dev/null +++ b/ccn2019-criterion.R @@ -0,0 +1,120 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) +library(skimr) +library(ROSE) + +load(here("notebooks/data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) + +#DEBUG inspect_num(seqs.imputed) + +#seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +#DEBUG train_indexes <- createResample(seqs.imputed$cr,list=F)[,1] + +train_indexes <- createDataPartition(seqs.imputed$correct, + times = 1, + p = 0.7, + list = F) +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +train_data.imbalanced <- ROSE(correct ~ ., + data = train_data, + seed = 1)$data + +# VIsualize split +train_data.imbalanced$grp <- "train" +test_data$grp <- "test" + +rbind(train_data.imbalanced, test_data) %>% + ggplot(aes(x=correct, fill=grp)) + + geom_histogram(stat="count", position='dodge') + + labs(title="Imbalanced Split") + +rbind(train_data.imbalanced, test_data) %>% + ggplot(aes(x=cr, fill=grp)) + + geom_density(stat="count", position='dodge') + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + cr ~ t+l+s+n+tl+ll+sl+ul+vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + cr ~ t + n:v + n, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +summary(pls.old_model) +summary(pls.new_model) +plot(varImp(pls.old_model)) +plot(varImp(pls.new_model), main="Criterion - Variable Importance") + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) +summary(pls.old_model) + +# SSE and RMSE + +SSE <- sum((test_data$cr - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$cr - mean(train_data$cr))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$cr - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$cr - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$cr - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + +as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$cr)) %>% + ggplot(aes(predicted, observed)) + + #coord_cartesian(xlim = c(-5, -2.8), ylim = c(-7, 0)) + + geom_point(alpha = 0.1,shape=16) + + geom_smooth(method=lm,se=F) + + ggtitle("Criterion: Predicted vs Actual") + + xlab("Predecited") + + ylab("Observed") diff --git a/ccn2019-accuracy.R b/ccn2019-accuracy.R new file mode 100644 index 0000000..0a2d999 --- /dev/null +++ b/ccn2019-accuracy.R @@ -0,0 +1,101 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) + + +load(here("data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + select(-correct) + +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$a, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + a ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + a ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +pls.old_model +pls.new_model +varImp(pls.old_model) +varImp(pls.new_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) + + +# SSE and RMSE + +SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$a - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + +as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$a)) %>% + ggplot(aes(predicted, observed)) + + coord_cartesian(xlim = c(20, 30), ylim = c(20, 30)) + + geom_point(alpha = 0.1,shape=16) + + geom_smooth(method=lm,se=F) + + ggtitle("Accuracy: Predicted vs Actual") + + xlab("Predecited") + + ylab("Observed") diff --git a/ccn2019-correct.R b/ccn2019-correct.R new file mode 100644 index 0000000..14c4175 --- /dev/null +++ b/ccn2019-correct.R @@ -0,0 +1,114 @@ +#==================================================# +# model the "correct" column + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) +library(pls) + +#devtools::install_github("sachsmc/plotROC") +library(plotROC) + + +load(here("notebooks/data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) + +inspect_cat(seqs.imputed) +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$correct, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "repeatedcv", + number = 5, + classProbs = T, + verboseIter = T, + summaryFunction = twoClassSummary, + savePredictions = T, + sampling = "down" +) + +pls.new_model <- train( + correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + correct ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.new_model +pls.old_model +varImp(pls.new_model) +varImp(pls.old_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(new = pls.new_model, old = pls.old_model)) +summary(resamps) +dotplot(resamps) +diffValues <- diff(resamps) +bwplot(diffValues, layout=c(1,3)) + + +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.new_predicted_prob <- predict(pls.new_model, test_data, type="prob") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") +pls.old_predicted_prob <- predict(pls.old_model, test_data, type="prob") + +confusionMatrix(pls.new_predicted, test_data$correct) +confusionMatrix(pls.old_predicted, test_data$correct) + +library(pROC) +par(pty="s") + +roc(test_data$correct, + pls.new_predicted_prob$CORRECT, + plot = T, + legacy.axes=T, + lwd=4, + col="black", + print.auc.y = 45, + percent = T, + print.auc=T) + +plot.roc(test_data$correct, + pls.old_predicted_prob$CORRECT, + legacy.axes=T, + lwd=4, + col="darkgray", + print.auc=T, + percent = T, + print.auc.y = 40, + lty = 3, + add=T) + +legend(100,100, legend=c("New Model", "Old Model"), + col=c("black", "darkgray"), lty=c(1,3),lwd=3, cex=0.8) + +# ggplot(pls.old_model, aes(d = pred$obs, m = pred$CORRECT)) + +# geom_roc() diff --git a/ccn2019-criterion.R b/ccn2019-criterion.R new file mode 100644 index 0000000..3cdf184 --- /dev/null +++ b/ccn2019-criterion.R @@ -0,0 +1,120 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) +library(skimr) +library(ROSE) + +load(here("notebooks/data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) + +#DEBUG inspect_num(seqs.imputed) + +#seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +#DEBUG train_indexes <- createResample(seqs.imputed$cr,list=F)[,1] + +train_indexes <- createDataPartition(seqs.imputed$correct, + times = 1, + p = 0.7, + list = F) +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +train_data.imbalanced <- ROSE(correct ~ ., + data = train_data, + seed = 1)$data + +# VIsualize split +train_data.imbalanced$grp <- "train" +test_data$grp <- "test" + +rbind(train_data.imbalanced, test_data) %>% + ggplot(aes(x=correct, fill=grp)) + + geom_histogram(stat="count", position='dodge') + + labs(title="Imbalanced Split") + +rbind(train_data.imbalanced, test_data) %>% + ggplot(aes(x=cr, fill=grp)) + + geom_density(stat="count", position='dodge') + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + cr ~ t+l+s+n+tl+ll+sl+ul+vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + cr ~ t + n:v + n, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +summary(pls.old_model) +summary(pls.new_model) +plot(varImp(pls.old_model)) +plot(varImp(pls.new_model), main="Criterion - Variable Importance") + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) +summary(pls.old_model) + +# SSE and RMSE + +SSE <- sum((test_data$cr - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$cr - mean(train_data$cr))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$cr - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$cr - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$cr - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + +as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$cr)) %>% + ggplot(aes(predicted, observed)) + + #coord_cartesian(xlim = c(-5, -2.8), ylim = c(-7, 0)) + + geom_point(alpha = 0.1,shape=16) + + geom_smooth(method=lm,se=F) + + ggtitle("Criterion: Predicted vs Actual") + + xlab("Predecited") + + ylab("Observed") diff --git a/cnn2019-accuracy.R b/cnn2019-accuracy.R deleted file mode 100644 index 0a2d999..0000000 --- a/cnn2019-accuracy.R +++ /dev/null @@ -1,101 +0,0 @@ -#==================================================# -# model the "accuract" column (a for global, and al for local accuracy) - -library(here) -library(tidyverse) -library(caret) -library(inspectdf) - - -load(here("data/nback_seqs.Rd")) - -set.seed(42) - -seqs.imputed <- seqs %>% - filter(!is.na(correct), !is.na(rt)) %>% - select(-correct) - -inspect_num(seqs.imputed) - -seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) - - -train_indexes <- createDataPartition(seqs.imputed$a, - times = 1, - p = 0.7, - list = F) - -train_data <- seqs.imputed[train_indexes,] -test_data <- seqs.imputed[-train_indexes,] - -control <- trainControl( - method = "cv", - number = 5, - verboseIter = T -) - -pls.new_model <- train( - a ~ t + l + s + v + n + tl + ll + sl + ul + vl, - data = train_data, - method = "pls", - preProcess = c("center","scale"), - trControl = control -) - -pls.old_model <- train( - a ~ t + n + v, - data = train_data, - method = "pls", - preProcess = c("center","scale"), - trControl = control -) - - -pls.old_model -pls.new_model -varImp(pls.old_model) -varImp(pls.new_model) - -trellis.par.set(caretTheme()) -densityplot(pls.new_model, pch = "|") -densityplot(pls.old_model, pch = "|") - -resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) -summary(resamps) -dotplot(resamps, metric = "Rsquared") -difValues <- diff(resamps) -bwplot(difValues, layout=c(1,3)) - - -pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") -pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") -pls.new_predicted <- predict(pls.new_model, test_data, type="raw") -pls.old_predicted <- predict(pls.old_model, test_data, type="raw") - - -summary(pls.new_model) - - -# SSE and RMSE - -SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors -SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here -R_square <- 1 - SSE/SST -SSE <- sum((test_data$a - pls.new_predicted)^2) -RMSE <- sqrt(SSE/length(pls.new_predicted)) - - -SSE <- sum((test_data$a - pls.old_predicted)^2) -R_square <- 1 - SSE/SST -SSE <- sum((test_data$a - pls.old_predicted)^2) -RMSE <- sqrt(SSE/length(pls.old_predicted)) - - -as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$a)) %>% - ggplot(aes(predicted, observed)) + - coord_cartesian(xlim = c(20, 30), ylim = c(20, 30)) + - geom_point(alpha = 0.1,shape=16) + - geom_smooth(method=lm,se=F) + - ggtitle("Accuracy: Predicted vs Actual") + - xlab("Predecited") + - ylab("Observed") diff --git a/ccn2019-accuracy.R b/ccn2019-accuracy.R new file mode 100644 index 0000000..0a2d999 --- /dev/null +++ b/ccn2019-accuracy.R @@ -0,0 +1,101 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) + + +load(here("data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + select(-correct) + +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$a, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + a ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + a ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +pls.old_model +pls.new_model +varImp(pls.old_model) +varImp(pls.new_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) + + +# SSE and RMSE + +SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$a - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + +as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$a)) %>% + ggplot(aes(predicted, observed)) + + coord_cartesian(xlim = c(20, 30), ylim = c(20, 30)) + + geom_point(alpha = 0.1,shape=16) + + geom_smooth(method=lm,se=F) + + ggtitle("Accuracy: Predicted vs Actual") + + xlab("Predecited") + + ylab("Observed") diff --git a/ccn2019-correct.R b/ccn2019-correct.R new file mode 100644 index 0000000..14c4175 --- /dev/null +++ b/ccn2019-correct.R @@ -0,0 +1,114 @@ +#==================================================# +# model the "correct" column + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) +library(pls) + +#devtools::install_github("sachsmc/plotROC") +library(plotROC) + + +load(here("notebooks/data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) + +inspect_cat(seqs.imputed) +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$correct, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "repeatedcv", + number = 5, + classProbs = T, + verboseIter = T, + summaryFunction = twoClassSummary, + savePredictions = T, + sampling = "down" +) + +pls.new_model <- train( + correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + correct ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.new_model +pls.old_model +varImp(pls.new_model) +varImp(pls.old_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(new = pls.new_model, old = pls.old_model)) +summary(resamps) +dotplot(resamps) +diffValues <- diff(resamps) +bwplot(diffValues, layout=c(1,3)) + + +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.new_predicted_prob <- predict(pls.new_model, test_data, type="prob") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") +pls.old_predicted_prob <- predict(pls.old_model, test_data, type="prob") + +confusionMatrix(pls.new_predicted, test_data$correct) +confusionMatrix(pls.old_predicted, test_data$correct) + +library(pROC) +par(pty="s") + +roc(test_data$correct, + pls.new_predicted_prob$CORRECT, + plot = T, + legacy.axes=T, + lwd=4, + col="black", + print.auc.y = 45, + percent = T, + print.auc=T) + +plot.roc(test_data$correct, + pls.old_predicted_prob$CORRECT, + legacy.axes=T, + lwd=4, + col="darkgray", + print.auc=T, + percent = T, + print.auc.y = 40, + lty = 3, + add=T) + +legend(100,100, legend=c("New Model", "Old Model"), + col=c("black", "darkgray"), lty=c(1,3),lwd=3, cex=0.8) + +# ggplot(pls.old_model, aes(d = pred$obs, m = pred$CORRECT)) + +# geom_roc() diff --git a/ccn2019-criterion.R b/ccn2019-criterion.R new file mode 100644 index 0000000..3cdf184 --- /dev/null +++ b/ccn2019-criterion.R @@ -0,0 +1,120 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) +library(skimr) +library(ROSE) + +load(here("notebooks/data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) + +#DEBUG inspect_num(seqs.imputed) + +#seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +#DEBUG train_indexes <- createResample(seqs.imputed$cr,list=F)[,1] + +train_indexes <- createDataPartition(seqs.imputed$correct, + times = 1, + p = 0.7, + list = F) +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +train_data.imbalanced <- ROSE(correct ~ ., + data = train_data, + seed = 1)$data + +# VIsualize split +train_data.imbalanced$grp <- "train" +test_data$grp <- "test" + +rbind(train_data.imbalanced, test_data) %>% + ggplot(aes(x=correct, fill=grp)) + + geom_histogram(stat="count", position='dodge') + + labs(title="Imbalanced Split") + +rbind(train_data.imbalanced, test_data) %>% + ggplot(aes(x=cr, fill=grp)) + + geom_density(stat="count", position='dodge') + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + cr ~ t+l+s+n+tl+ll+sl+ul+vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + cr ~ t + n:v + n, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +summary(pls.old_model) +summary(pls.new_model) +plot(varImp(pls.old_model)) +plot(varImp(pls.new_model), main="Criterion - Variable Importance") + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) +summary(pls.old_model) + +# SSE and RMSE + +SSE <- sum((test_data$cr - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$cr - mean(train_data$cr))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$cr - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$cr - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$cr - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + +as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$cr)) %>% + ggplot(aes(predicted, observed)) + + #coord_cartesian(xlim = c(-5, -2.8), ylim = c(-7, 0)) + + geom_point(alpha = 0.1,shape=16) + + geom_smooth(method=lm,se=F) + + ggtitle("Criterion: Predicted vs Actual") + + xlab("Predecited") + + ylab("Observed") diff --git a/cnn2019-accuracy.R b/cnn2019-accuracy.R deleted file mode 100644 index 0a2d999..0000000 --- a/cnn2019-accuracy.R +++ /dev/null @@ -1,101 +0,0 @@ -#==================================================# -# model the "accuract" column (a for global, and al for local accuracy) - -library(here) -library(tidyverse) -library(caret) -library(inspectdf) - - -load(here("data/nback_seqs.Rd")) - -set.seed(42) - -seqs.imputed <- seqs %>% - filter(!is.na(correct), !is.na(rt)) %>% - select(-correct) - -inspect_num(seqs.imputed) - -seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) - - -train_indexes <- createDataPartition(seqs.imputed$a, - times = 1, - p = 0.7, - list = F) - -train_data <- seqs.imputed[train_indexes,] -test_data <- seqs.imputed[-train_indexes,] - -control <- trainControl( - method = "cv", - number = 5, - verboseIter = T -) - -pls.new_model <- train( - a ~ t + l + s + v + n + tl + ll + sl + ul + vl, - data = train_data, - method = "pls", - preProcess = c("center","scale"), - trControl = control -) - -pls.old_model <- train( - a ~ t + n + v, - data = train_data, - method = "pls", - preProcess = c("center","scale"), - trControl = control -) - - -pls.old_model -pls.new_model -varImp(pls.old_model) -varImp(pls.new_model) - -trellis.par.set(caretTheme()) -densityplot(pls.new_model, pch = "|") -densityplot(pls.old_model, pch = "|") - -resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) -summary(resamps) -dotplot(resamps, metric = "Rsquared") -difValues <- diff(resamps) -bwplot(difValues, layout=c(1,3)) - - -pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") -pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") -pls.new_predicted <- predict(pls.new_model, test_data, type="raw") -pls.old_predicted <- predict(pls.old_model, test_data, type="raw") - - -summary(pls.new_model) - - -# SSE and RMSE - -SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors -SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here -R_square <- 1 - SSE/SST -SSE <- sum((test_data$a - pls.new_predicted)^2) -RMSE <- sqrt(SSE/length(pls.new_predicted)) - - -SSE <- sum((test_data$a - pls.old_predicted)^2) -R_square <- 1 - SSE/SST -SSE <- sum((test_data$a - pls.old_predicted)^2) -RMSE <- sqrt(SSE/length(pls.old_predicted)) - - -as.data.frame(cbind(predicted = pls.old_predicted, observed = test_data$a)) %>% - ggplot(aes(predicted, observed)) + - coord_cartesian(xlim = c(20, 30), ylim = c(20, 30)) + - geom_point(alpha = 0.1,shape=16) + - geom_smooth(method=lm,se=F) + - ggtitle("Accuracy: Predicted vs Actual") + - xlab("Predecited") + - ylab("Observed") diff --git a/cnn2019-correct.R b/cnn2019-correct.R deleted file mode 100644 index b888bc1..0000000 --- a/cnn2019-correct.R +++ /dev/null @@ -1,85 +0,0 @@ -#==================================================# -# model the "correct" column - -library(here) -library(tidyverse) -library(caret) -library(inspectdf) - - -load(here("data/nback_seqs.Rd")) - -set.seed(42) - -seqs.imputed <- seqs %>% - filter(!is.na(correct), !is.na(rt)) %>% - mutate(correct=factor(correct,labels=c("INCORRECT","CORRECT"))) - -inspect_cat(seqs.imputed) -inspect_num(seqs.imputed) - -seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) - - -train_indexes <- createDataPartition(seqs.imputed$correct, - times = 1, - p = 0.7, - list = F) - -train_data <- seqs.imputed[train_indexes,] -test_data <- seqs.imputed[-train_indexes,] - -control <- trainControl( - method = "cv", - number = 5, - classProbs = T, - verboseIter = T, - summaryFunction = twoClassSummary -) - -pls.model1 <- train( - correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, - data = train_data, - method = "pls", - preProcess = c("center","scale"), - trControl = control -) - -pls.model2 <- train( - correct ~ t + n + v, - data = train_data, - method = "pls", - preProcess = c("center","scale"), - trControl = control -) - -pls.model1 -pls.model2 -varImp(pls.model1) -varImp(pls.model2) - -trellis.par.set(caretTheme()) -densityplot(pls.model1, pch = "|") -densityplot(pls.model2, pch = "|") - -resamps <- resamples(list(model1 = pls.model1, model2 = pls.model2)) -summary(resamps) -dotplot(resamps, metric = "ROC") -difValues <- diff(resamps) -bwplot(difValues, layout=c(1,3)) - - -pls.train_predicted1 <- predict(pls.model1, train_data, type="raw") -pls.train_predicted2 <- predict(pls.model2, train_data, type="raw") -pls.predicted1 <- predict(pls.model1, test_data, type="raw") -pls.predicted2 <- predict(pls.model2, test_data, type="raw") - -#FIXME -confusionMatrix(pls.train_predicted1, train_data$correct) -confusionMatrix(pls.train_predicted2, train_data$correct) -confusionMatrix(pls.predicted1, test_data$correct) -confusionMatrix(pls.predicted2, test_data$correct) - -colAUC(pls.predicted1, test_data$correct, plotROC=T) -colAUC(pls.predicted2, test_data$correct, plotROC=T) -