diff --git a/ccn2019-correct.R b/ccn2019-correct.R index 14c4175..a07e4e5 100644 --- a/ccn2019-correct.R +++ b/ccn2019-correct.R @@ -36,52 +36,60 @@ control <- trainControl( method = "repeatedcv", number = 5, + repeats = 2, classProbs = T, verboseIter = T, - summaryFunction = twoClassSummary, savePredictions = T, - sampling = "down" + sampling = "down", + selectionFunction = "oneSE" ) pls.new_model <- train( - correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, + correct ~ .-a-al-dp-cr-rt, data = train_data, method = "pls", - preProcess = c("center","scale"), + metric = "Accuracy", + tuneLength = 20, + preProcess = c("zv","center","scale"), trControl = control ) -pls.old_model <- train( - correct ~ t + n + v, +plot(pls.new_model) +plot(varImp(pls.new_model), main="Variables Importance for Correctness (New Model)") + +pls.common_model <- train( + correct ~ .-a-al-dp-cr-rt-tl-ul-sl-s-ll-vl-l, data = train_data, method = "pls", - preProcess = c("center","scale"), + metric = "Accuracy", + tuneLength = 20, + preProcess = c("zv","center","scale"), trControl = control ) -pls.new_model -pls.old_model -varImp(pls.new_model) -varImp(pls.old_model) +plot(pls.common_model) +plot(varImp(pls.common_model), main="Variable Importance for Correctness (Common Model)") -trellis.par.set(caretTheme()) -densityplot(pls.new_model, pch = "|") -densityplot(pls.old_model, pch = "|") -resamps <- resamples(list(new = pls.new_model, old = pls.old_model)) -summary(resamps) -dotplot(resamps) -diffValues <- diff(resamps) -bwplot(diffValues, layout=c(1,3)) +#trellis.par.set(caretTheme()) +#densityplot(pls.new_model, pch = "|") +#densityplot(pls.common_model, pch = "|") + +# Compile models and compare performance +pls.models <- resamples(list(new = pls.new_model, common = pls.common_model)) +#DEBUG summary(pls.models) +#DEBUG dotplot(pls.models) +#DEBUG diffValues <- diff(resamps) +bwplot(pls.models, metric = "Accuracy", layout=c(1,1), main="Correctness Model Performance") pls.new_predicted <- predict(pls.new_model, test_data, type="raw") pls.new_predicted_prob <- predict(pls.new_model, test_data, type="prob") -pls.old_predicted <- predict(pls.old_model, test_data, type="raw") -pls.old_predicted_prob <- predict(pls.old_model, test_data, type="prob") +pls.common_predicted <- predict(pls.common_model, test_data, type="raw") +pls.common_predicted_prob <- predict(pls.common_model, test_data, type="prob") confusionMatrix(pls.new_predicted, test_data$correct) -confusionMatrix(pls.old_predicted, test_data$correct) +confusionMatrix(pls.common_predicted, test_data$correct) library(pROC) par(pty="s") @@ -97,7 +105,7 @@ print.auc=T) plot.roc(test_data$correct, - pls.old_predicted_prob$CORRECT, + pls.common_predicted_prob$CORRECT, legacy.axes=T, lwd=4, col="darkgray", @@ -107,8 +115,9 @@ lty = 3, add=T) -legend(100,100, legend=c("New Model", "Old Model"), +legend(100,100, legend=c("New Model", "Common Model"), col=c("black", "darkgray"), lty=c(1,3),lwd=3, cex=0.8) -# ggplot(pls.old_model, aes(d = pred$obs, m = pred$CORRECT)) + -# geom_roc() +# requires plotROC package +#DEBUG ggplot(pls.common_model, aes(d = pred$obs, m = pred$CORRECT)) + +#DEBUG geom_roc() diff --git a/ccn2019-correct.R b/ccn2019-correct.R index 14c4175..a07e4e5 100644 --- a/ccn2019-correct.R +++ b/ccn2019-correct.R @@ -36,52 +36,60 @@ control <- trainControl( method = "repeatedcv", number = 5, + repeats = 2, classProbs = T, verboseIter = T, - summaryFunction = twoClassSummary, savePredictions = T, - sampling = "down" + sampling = "down", + selectionFunction = "oneSE" ) pls.new_model <- train( - correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, + correct ~ .-a-al-dp-cr-rt, data = train_data, method = "pls", - preProcess = c("center","scale"), + metric = "Accuracy", + tuneLength = 20, + preProcess = c("zv","center","scale"), trControl = control ) -pls.old_model <- train( - correct ~ t + n + v, +plot(pls.new_model) +plot(varImp(pls.new_model), main="Variables Importance for Correctness (New Model)") + +pls.common_model <- train( + correct ~ .-a-al-dp-cr-rt-tl-ul-sl-s-ll-vl-l, data = train_data, method = "pls", - preProcess = c("center","scale"), + metric = "Accuracy", + tuneLength = 20, + preProcess = c("zv","center","scale"), trControl = control ) -pls.new_model -pls.old_model -varImp(pls.new_model) -varImp(pls.old_model) +plot(pls.common_model) +plot(varImp(pls.common_model), main="Variable Importance for Correctness (Common Model)") -trellis.par.set(caretTheme()) -densityplot(pls.new_model, pch = "|") -densityplot(pls.old_model, pch = "|") -resamps <- resamples(list(new = pls.new_model, old = pls.old_model)) -summary(resamps) -dotplot(resamps) -diffValues <- diff(resamps) -bwplot(diffValues, layout=c(1,3)) +#trellis.par.set(caretTheme()) +#densityplot(pls.new_model, pch = "|") +#densityplot(pls.common_model, pch = "|") + +# Compile models and compare performance +pls.models <- resamples(list(new = pls.new_model, common = pls.common_model)) +#DEBUG summary(pls.models) +#DEBUG dotplot(pls.models) +#DEBUG diffValues <- diff(resamps) +bwplot(pls.models, metric = "Accuracy", layout=c(1,1), main="Correctness Model Performance") pls.new_predicted <- predict(pls.new_model, test_data, type="raw") pls.new_predicted_prob <- predict(pls.new_model, test_data, type="prob") -pls.old_predicted <- predict(pls.old_model, test_data, type="raw") -pls.old_predicted_prob <- predict(pls.old_model, test_data, type="prob") +pls.common_predicted <- predict(pls.common_model, test_data, type="raw") +pls.common_predicted_prob <- predict(pls.common_model, test_data, type="prob") confusionMatrix(pls.new_predicted, test_data$correct) -confusionMatrix(pls.old_predicted, test_data$correct) +confusionMatrix(pls.common_predicted, test_data$correct) library(pROC) par(pty="s") @@ -97,7 +105,7 @@ print.auc=T) plot.roc(test_data$correct, - pls.old_predicted_prob$CORRECT, + pls.common_predicted_prob$CORRECT, legacy.axes=T, lwd=4, col="darkgray", @@ -107,8 +115,9 @@ lty = 3, add=T) -legend(100,100, legend=c("New Model", "Old Model"), +legend(100,100, legend=c("New Model", "Common Model"), col=c("black", "darkgray"), lty=c(1,3),lwd=3, cex=0.8) -# ggplot(pls.old_model, aes(d = pred$obs, m = pred$CORRECT)) + -# geom_roc() +# requires plotROC package +#DEBUG ggplot(pls.common_model, aes(d = pred$obs, m = pred$CORRECT)) + +#DEBUG geom_roc() diff --git a/ccn2019-criterion.R b/ccn2019-criterion.R index 3cdf184..66c66a0 100644 --- a/ccn2019-criterion.R +++ b/ccn2019-criterion.R @@ -57,19 +57,18 @@ cr ~ t+l+s+n+tl+ll+sl+ul+vl, data = train_data, method = "pls", - preProcess = c("center","scale"), + preProcess = c("zv","center","scale"), trControl = control ) pls.old_model <- train( - cr ~ t + n:v + n, + dp ~ n*t, data = train_data, method = "pls", preProcess = c("center","scale"), trControl = control ) - summary(pls.old_model) summary(pls.new_model) plot(varImp(pls.old_model))