diff --git a/ccn2019.rev3.Rmd b/ccn2019.rev3.Rmd index 06a6d04..223277f 100644 --- a/ccn2019.rev3.Rmd +++ b/ccn2019.rev3.Rmd @@ -64,9 +64,11 @@ #inspect_cor(NB,show_plot = T) ``` -```{r} +```{r models} data <- seqs %>% unnest(local_stats) %>% + # restructure correct column to avoid caret errors + # C stands for "CORRECT", and I is "INCORRECT" mutate(correct=factor(as.numeric(correct),labels=c("C","I"))) %>% filter(!is.na(correct), !is.na(rt)) @@ -74,12 +76,12 @@ split <- nrow(data) * 0.8 -train <- data[1:split,] -test <- data[(split+1):nrow(data),] +train_data <- data[1:split,] +test_data <- data[(split+1):nrow(data),] -model <- train( - correct ~ ., - data = train, +new_model <- train( + correct ~ .-rt-a-al, + data = train_data, method = "glm", family = "binomial", trControl = trainControl( @@ -90,13 +92,36 @@ ) ) -model +new_model -p <- predict(model, test, type="prob") +predicted_new_data <- predict(new_model, test_data, type="prob") -confusionMatrix(ds$correct, prd) +confusionMatrix(test_data$correct, predicted_new_data) library(caTools) -colAUC(p,test$correct, plotROC=T) -``` +colAUC(predicted_new_data, test_data$correct, plotROC=T) + +## OLD MODEL (only global features) +old_model <- train( + correct ~ n+t+v, + data = train_data, + method = "glm", + family = "binomial", + trControl = trainControl( + method = "cv", + number = 5, + classProbs = T, + summaryFunction = twoClassSummary + ) +) + +old_model + +predicted_old_data <- predict(old_model, test_data, type="prob") + +confusionMatrix(test_data$correct, predicted_old_data) + +library(caTools) +colAUC(predicted_old_data,test_data$correct, plotROC=T) +```