diff --git a/ccn2019.rev3.Rmd b/ccn2019.rev3.Rmd index 468f63b..c0f15cd 100644 --- a/ccn2019.rev3.Rmd +++ b/ccn2019.rev3.Rmd @@ -1,3 +1,6 @@ + +$P=\langle V,D,C,W \rangle$ + ```{r setup, message=FALSE, include=FALSE, paged.print=FALSE} #! =============================================== #! load required packages @@ -94,7 +97,7 @@ #! prepare data for modeling (remove na, etc) seqs <- seqs %>% filter(!is.na(correct), !is.na(rt)) %>% - mutate(correct=factor(as.numeric(correct),labels=c("I","C"))) + mutate(correct=factor(as.numeric(correct),labels=c("INCORRECT","CORRECT"))) #mutate(correct=as.numeric(correct)) #FIXME remove outcomes before dummy out the data and imputing missing values diff --git a/ccn2019.rev3.Rmd b/ccn2019.rev3.Rmd index 468f63b..c0f15cd 100644 --- a/ccn2019.rev3.Rmd +++ b/ccn2019.rev3.Rmd @@ -1,3 +1,6 @@ + +$P=\langle V,D,C,W \rangle$ + ```{r setup, message=FALSE, include=FALSE, paged.print=FALSE} #! =============================================== #! load required packages @@ -94,7 +97,7 @@ #! prepare data for modeling (remove na, etc) seqs <- seqs %>% filter(!is.na(correct), !is.na(rt)) %>% - mutate(correct=factor(as.numeric(correct),labels=c("I","C"))) + mutate(correct=factor(as.numeric(correct),labels=c("INCORRECT","CORRECT"))) #mutate(correct=as.numeric(correct)) #FIXME remove outcomes before dummy out the data and imputing missing values diff --git a/cnn2019-accuracy.R b/cnn2019-accuracy.R new file mode 100644 index 0000000..42109e3 --- /dev/null +++ b/cnn2019-accuracy.R @@ -0,0 +1,93 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) + + +load(here("data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + select(-correct) + +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$a, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + a ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + a ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +pls.old_model +pls.new_model +varImp(pls.old_model) +varImp(pls.new_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) + + +# SSE and RMSE + +SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$a - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + diff --git a/ccn2019.rev3.Rmd b/ccn2019.rev3.Rmd index 468f63b..c0f15cd 100644 --- a/ccn2019.rev3.Rmd +++ b/ccn2019.rev3.Rmd @@ -1,3 +1,6 @@ + +$P=\langle V,D,C,W \rangle$ + ```{r setup, message=FALSE, include=FALSE, paged.print=FALSE} #! =============================================== #! load required packages @@ -94,7 +97,7 @@ #! prepare data for modeling (remove na, etc) seqs <- seqs %>% filter(!is.na(correct), !is.na(rt)) %>% - mutate(correct=factor(as.numeric(correct),labels=c("I","C"))) + mutate(correct=factor(as.numeric(correct),labels=c("INCORRECT","CORRECT"))) #mutate(correct=as.numeric(correct)) #FIXME remove outcomes before dummy out the data and imputing missing values diff --git a/cnn2019-accuracy.R b/cnn2019-accuracy.R new file mode 100644 index 0000000..42109e3 --- /dev/null +++ b/cnn2019-accuracy.R @@ -0,0 +1,93 @@ +#==================================================# +# model the "accuract" column (a for global, and al for local accuracy) + +library(here) +library(tidyverse) +library(caret) +library(inspectdf) + + +load(here("data/nback_seqs.Rd")) + +set.seed(42) + +seqs.imputed <- seqs %>% + filter(!is.na(correct), !is.na(rt)) %>% + select(-correct) + +inspect_num(seqs.imputed) + +seqs.dummy <- predict(dummyVars(~.,data=seqs.imputed),seqs.imputed) + + +train_indexes <- createDataPartition(seqs.imputed$a, + times = 1, + p = 0.7, + list = F) + +train_data <- seqs.imputed[train_indexes,] +test_data <- seqs.imputed[-train_indexes,] + +control <- trainControl( + method = "cv", + number = 5, + verboseIter = T +) + +pls.new_model <- train( + a ~ t + l + s + v + n + tl + ll + sl + ul + vl, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + +pls.old_model <- train( + a ~ t + n + v, + data = train_data, + method = "pls", + preProcess = c("center","scale"), + trControl = control +) + + +pls.old_model +pls.new_model +varImp(pls.old_model) +varImp(pls.new_model) + +trellis.par.set(caretTheme()) +densityplot(pls.new_model, pch = "|") +densityplot(pls.old_model, pch = "|") + +resamps <- resamples(list(old = pls.old_model, new = pls.new_model)) +summary(resamps) +dotplot(resamps, metric = "Rsquared") +difValues <- diff(resamps) +bwplot(difValues, layout=c(1,3)) + + +pls.new_train_predicted <- predict(pls.new_model, train_data, type="raw") +pls.old_train_predicted <- predict(pls.old_model, train_data, type="raw") +pls.new_predicted <- predict(pls.new_model, test_data, type="raw") +pls.old_predicted <- predict(pls.old_model, test_data, type="raw") + + +summary(pls.new_model) + + +# SSE and RMSE + +SSE <- sum((test_data$a - pls.new_predicted)^2) # sum of squared errors +SST <- sum((test_data$a - mean(train_data$a))^2) # total sum of squares, remember to use training data here +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.new_predicted)^2) +RMSE <- sqrt(SSE/length(pls.new_predicted)) + + +SSE <- sum((test_data$a - pls.old_predicted)^2) +R_square <- 1 - SSE/SST +SSE <- sum((test_data$a - pls.old_predicted)^2) +RMSE <- sqrt(SSE/length(pls.old_predicted)) + + diff --git a/cnn2019-correct.R b/cnn2019-correct.R index a4e0b4c..b888bc1 100644 --- a/cnn2019-correct.R +++ b/cnn2019-correct.R @@ -41,6 +41,7 @@ correct ~ t + l + s + v + n + tl + ll + sl + ul + vl, data = train_data, method = "pls", + preProcess = c("center","scale"), trControl = control ) @@ -48,6 +49,7 @@ correct ~ t + n + v, data = train_data, method = "pls", + preProcess = c("center","scale"), trControl = control ) @@ -67,7 +69,7 @@ bwplot(difValues, layout=c(1,3)) -pls.train_predicted1 <- predict(pls.model1, train_data, type="prob") +pls.train_predicted1 <- predict(pls.model1, train_data, type="raw") pls.train_predicted2 <- predict(pls.model2, train_data, type="raw") pls.predicted1 <- predict(pls.model1, test_data, type="raw") pls.predicted2 <- predict(pls.model2, test_data, type="raw")