diff --git a/ccn2019/ccn2019.rev3.Rmd b/ccn2019/ccn2019.rev3.Rmd index 61a178e..d8c501e 100644 --- a/ccn2019/ccn2019.rev3.Rmd +++ b/ccn2019/ccn2019.rev3.Rmd @@ -163,7 +163,9 @@ ```{r rfe_feature_selection} #f <- rt_cat ~ n + t + s + v + l + vl + sl + tl + ul + ll -f <- rt_cat ~ n + t + s + v + l + vl + sl + tl + ul + ll +#f <- correct ~ n + t + s + v + l + vl + sl + tl + ul + ll + rt_cat +f <- rt ~ n + t + s + v + l + vl + sl + tl + ul + ll# + rt_cat +#f <- a ~ n + t + v + l + vl + sl + tl + ul + ll# + rt_cat splt <- split_data(f, seqs) @@ -176,7 +178,6 @@ message("Optimal Variables: ") rfeProfile$optVariables -message("Model-Free Variable Importance") # ROC for each categories filterVarImp(as.data.frame(splt$train.x), splt$train.y) @@ -191,13 +192,25 @@ In addition to the model comparasion plotd, he final output of this section is a single ROC plot that compares new and old models. The same chunk can be used for other categorical models, including RT_CAT and penalized models. +Note: uncomment desired `old.f` and `new.f` formulas to run respective analysis. + ```{r comapre_correct_models} -old.f <- rt_cat ~ n + t + v -new.f <- rt_cat ~ n + t + tl + l + sl + vl + sl + tl + ul + ll +#1. atypical local features lead to longer response time. +#old.f <- rt_cat ~ n + t + v + rt_cat +#new.f <- rt_cat ~ n + t + tl + l + sl + vl + sl + tl + ul + ll + rt_cat -old.f <- correct ~ n + t + v + rt_cat -new.f <- correct ~ n + t + tl + l + sl + vl + sl + tl + ul + ll + rt_cat +old.f <- correct ~ n + t +new.f <- correct ~ n + t + rt_cat + +#old.f <- correct ~ n + t + tl + l + sl + vl + sl + tl + ul + ll +#new.f <- correct ~ n + t + tl + l + sl + vl + sl + tl + ul + ll + rt_cat + +#2. local features provide a better prediction for the correctness of choice +#old.f <- correct ~ n + t + v +#new.f <- correct ~ n + t + tl + l + sl + vl + sl + tl + ul + ll + +#3. Longer ctrl <- trainControl(method="cv", number=5, @@ -207,7 +220,8 @@ savePredictions = T, summaryFunction=twoClassSummary) -train_model <- function(f, splt, ctrl) { +train_model <- function(f, data, ctrl) { + splt <- split_data(f, data, .balance = T) model <- train(splt$train.x, splt$train.y, method = "pls", @@ -225,11 +239,13 @@ ) } -splt <- split_data(f, seqs) +old.model <- seqs %>% + #filter(rt_cat == "high") %>% + train_model(old.f, ., ctrl) - -old.model <- train_model(old.f, splt, ctrl) -new.model <- train_model(new.f, splt, ctrl) +new.model <- seqs %>% + #filter(rt_cat == "high") %>% + train_model(new.f, ., ctrl) bwplot(resamples(list(old=old.model$model, new=new.model$model))) densityplot(resamples(list(old=old.model$model, new=new.model$model))) @@ -240,9 +256,8 @@ confusionMatrix(new.model$test.y, new.model$test.observed_y) plot(varImp(new.model$model, scale = F, useModel = F)) - roc(old.model$test.observed_y, - old.model$test.y_prob$mid, + old.model$test.y_prob$YES, legacy.axes=T, plot = T, lwd=2, @@ -257,7 +272,7 @@ ci = T) roc(new.model$test.observed_y, - new.model$test.y_prob$mid, + new.model$test.y_prob$YES, legacy.axes=T, add = T, plot = T, @@ -275,7 +290,7 @@ ``` -## Compare RT models (high vs average RT) +## RT Analysis ```{r comapre_rt_models} f <- rt_cat ~ n + t + v