diff --git a/ccn2019.Rmd b/ccn2019.Rmd
index d01e020..9074227 100644
--- a/ccn2019.Rmd
+++ b/ccn2019.Rmd
@@ -9,34 +9,20 @@
 library(stringi)
 library(GA)
 library(dbscan)
+library(inspectdf)
+
+load('./data/CL2015.RData')
 ```
 
-# Introduction
-
-## Problem
- - local statistical properties of the n-back affect how we respond.
- - local vs. global properties, what matters the most?
-
-### 
- - an agent to replicate behavioral data sets
- - an online service to generate and evaluate n-back sequences
-
-## Method
- - create a history window for each trial (a.k.a, contiguous subsequences)
- - calculate local $T$, $L$, $S$, $U$, $RT_{mean}$, $Accuracy_{mean}$ for each subsequence
- - Model RT/Acc (response vars) with local properties (exp. vars)
- - Cluster responses (or exp. vars?)
- - Investigate if extracted clusters are statistically different
-
-### Explanatory Variables
+### Variables
 - $T$ number of targets
 - $L$ number of lures
 - $S$ Skewness score
 - $U$ Uniformity (!repetition)
-
-### Response Variables
 - $RT_{mean}$
 - $Accuracy_{mean}$
+- $dprime$
+- $criterion$
 
 ## Constraints
 
@@ -45,52 +31,13 @@
 - uniform distribution of choices
 - controlled local lumpiness
 
-## Modeling
- - Create two models for local and global features as explanatory vars
- - Continue with modeling RT and Accuracy based upon local and global feats and compare them. Which model provides a better description of the recoreded RT and Accuracy vars? (model comparasion, model selection, etc)
 
-
-The N-Back data set from @cardoso-leite2015 contains all required parameters for this study including RT, accuracy, and stimuli.
-
-```{r, echo=F}
-load('./data/CL2015.RData')
-```
-
-
-
-```{r, include=F}
-trials <- c('a','b','c','d','c','d','b','a','a','d','b','a','c','c','a','c')
-min_len <-4
-max_len <-4
-
-contig_seqs = list()
-
-for (st in 1:length(trials)) {
-  min_fin_index <- st + min_len - 1
-  max_fin_index <- min(st + max_len -1, length(trials))
-
-  for (fin in min_fin_index:max_fin_index) {
-    seq <- list(trials[st:fin])
-    contig_seqs <- c(contig_seqs, seq)
-  }
-}
-
-```
-
-
-Each constraint is a cost function to minimize for each sequence of stimuli.
+Each constraint is an up side down quadratic function to be minimized.
 
 ```{r, eval=F}
-
-# Codes for fitness and loss functions
-history <- contig_seqs
-targets <- 4
-lures <- 2
-targets_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
-lures_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
-
-# calc skewness
-skewness_fitness <- function(x, choices) {
+targets_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
+lures_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
+skewness_cost <- function(x, choices) {
   uniform_ratio <- length(x) / length(choices)
   deviation_from_uniform <- setNames(vector('numeric', length(choices)), choices)
   for (c in choices) {
@@ -100,33 +47,24 @@
   max(deviation_from_uniform)
 }
 
-ralph2014_skewed <- function(x, choices) {
+lumpiness_cost <- function(x, choices) {
   #trials = len(seq)
   #freqs = [float(seq.count(c)) for c in choices]
   #ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3)
   #return ralph_skewed
-  F
+  NA
 }
 
-merged_fitness <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x)
-GA <- ga(type = "real-valued", fitness = fitness, lower = -10, upper = 10)
-plot(GA)
-
-targets_sample <- data.frame(x=-targets:targets)
-targets_sample %>%
-  ggplot(aes(x,y=targets_fitness(x))) +
-  geom_line()
+#merged_cost <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x)
+#GA <- ga(type = "real-valued", fitness = merged_cost, lower = -10, upper = 10)
+#plot(GA)
 ```
 
 
 ```{r}
 
-# Codes to calculate local statistical properties
 with_lures <- function(condition, stim, stim_type, history = NA) {
-  
-  # extend to 2-back/3-back
-  sapply(
-    1:length(stim), 
+  sapply(1:length(stim), 
     function(i) {
       switch(as.character(condition[i]),
              "2-back" = {
@@ -170,6 +108,10 @@
   })
 }
 
+with_lag <- function(stimulus, history) {
+  # find last occurance the of stimulus
+}
+
 with_skewness_score <- function(stimulus, history) {
   sapply(1:length(history), function(i) {
     trials <- stimulus[(i-str_length(history[i])):i]
@@ -192,28 +134,39 @@
   sapply(1:length(targets_ratio), function(i) 0)
 }
 
+window_size <- 8
+
 NB_modified <- NB %>%
   group_by(participant, condition, block) %>%
-  mutate(history = with_history(stimulus)) %>%
+  mutate(history = with_history(stimulus, size=window_size)) %>%
   #mutate(stimulus_type = map_chr(.x=stimulus, stim_type=stimulus_type, history=history,.f=with_lures))
   mutate(stimulus_type_2 = with_lures(condition, stimulus, stimulus_type, history)) %>%
-  mutate(t = with_targets_ratio(stimulus_type_2, history)) %>%
-  mutate(l = with_lures_ratio(stimulus_type_2, history)) %>%
-  mutate(s = with_skewness_score(stimulus, history)) %>%
-  mutate(u = with_lumpiness_score(stimulus, history)) %>%
-  filter(stri_length(history)==16) %>%
-  #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>%
+  mutate(targets = with_targets_ratio(stimulus_type_2, history)) %>%
+  mutate(lures = with_lures_ratio(stimulus_type_2, history)) %>%
+  mutate(skewness = with_skewness_score(stimulus, history)) %>%
+  mutate(lumpiness = with_lumpiness_score(stimulus, history)) %>%
+  filter(stri_length(history)==window_size) %>%
+  mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>%
+    #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>%
   ungroup()
 
-NB_modified %>%
-  mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) 
-## group-level averaged NB, a single row represent an observation for a single subject
-## in a single condition
+pca <- prcomp(NB_modified[,c('targets','lures','skewness','lumpiness')], center = TRUE,scale. = TRUE)
 
+NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2'])
+
+
+## participant-level averaged NB, a single row represent an observation for a single subject
+## in a single condition
 NB_avg <- NB_modified %>%
   group_by(participant, condition) %>%
   mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>%
-  summarise(targets=sum(t), lures=sum(l), skewness=sum(s), lumpiness=sum(u), rts = mean(rt, na.rm=T), accuracy=sum(correct,na.rm=T)/90) %>%
+  summarise(
+    targets=sum(targets), 
+    lures=sum(lures), 
+    skewness=sum(skewness), 
+    lumpiness=sum(lumpiness), 
+    rt = mean(rt, na.rm=T), 
+    correct=sum(correct,na.rm=T)/90) %>%
   ungroup()
 
 # print
@@ -222,11 +175,8 @@
 #   View()
 #   
   
-pca <- prcomp(NB_modified[,c('t','l','s','u')], center = TRUE,scale. = TRUE)
 
-NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2'])
-
-fit <- lm(correct ~ t * s, NB_modified)
+fit <- lm(correct ~ t * s * u * l * d, NB_modified)
 
 ```
 
@@ -234,10 +184,12 @@
 ```{r}
 
 # DBSCAN Clustering (RT+ACCURACY against skewness)
+NB_avg <- NB_avg %>%
+  mutate(cluster = dbscan(cbind(correct,rt), eps = 0.3, minPts = 3)$cluster)
+
 NB_avg %>%
-  mutate(cluster = dbscan(cbind(accuracy,rts), eps = 0.5, minPts = 3)$cluster) %>%
-  ggplot(aes(skewness, accuracy, color=factor(cluster))) +
-  ggtitle("targets (window = 16 trials)", "NOTE: each point is a single participant") +
+  ggplot(aes(skewness, correct, color=factor(cluster))) +
+  ggtitle(" clusters (window = 16 trials)", "NOTE: each point is a single participant") +
   geom_point(alpha=0.3) +
   #geom_smooth(method='lm', se = F) +
   facet_wrap(~condition)
@@ -252,6 +204,19 @@
   geom_jitter() +
   geom_point() +
   stat_summary(fun.y="mean")
+
+NB_modified %>%
+  inspect_cor(show_plot = T)
+
+NB_avg %>%
+  inspect_cor(show_plot = T)
+
+NB_modified %>%
+  ggplot(aes(rt,correct,color=u)) +
+  geom_jitter() +
+  geom_point() +
+  stat_summary(fun.y="mean")
+
 NB_modified %>%
   filter(!is.na(correct)) %>%
   ggplot(aes(jitter(s),jitter(t),color=correct)) +
@@ -285,7 +250,7 @@
 NB_modified %>%
   filter(!is.na(correct)) %>%
   ggplot(aes(pc1,rt,color=correct)) +
-    geom_point() +
+    geom_point(alpha=0.3) +
     geom_smooth(method="lm",se = F) + 
     facet_wrap(~condition, scales="free")
 
@@ -307,3 +272,10 @@
   - ggplot(kmeans_clusters$rt)
   - 
 
+
+
+
+```{python}
+a=2
+p
+```

diff --git a/ccn2019.Rmd b/ccn2019.Rmd
index d01e020..9074227 100644
--- a/ccn2019.Rmd
+++ b/ccn2019.Rmd
@@ -9,34 +9,20 @@
 library(stringi)
 library(GA)
 library(dbscan)
+library(inspectdf)
+
+load('./data/CL2015.RData')
 ```
 
-# Introduction
-
-## Problem
- - local statistical properties of the n-back affect how we respond.
- - local vs. global properties, what matters the most?
-
-### 
- - an agent to replicate behavioral data sets
- - an online service to generate and evaluate n-back sequences
-
-## Method
- - create a history window for each trial (a.k.a, contiguous subsequences)
- - calculate local $T$, $L$, $S$, $U$, $RT_{mean}$, $Accuracy_{mean}$ for each subsequence
- - Model RT/Acc (response vars) with local properties (exp. vars)
- - Cluster responses (or exp. vars?)
- - Investigate if extracted clusters are statistically different
-
-### Explanatory Variables
+### Variables
 - $T$ number of targets
 - $L$ number of lures
 - $S$ Skewness score
 - $U$ Uniformity (!repetition)
-
-### Response Variables
 - $RT_{mean}$
 - $Accuracy_{mean}$
+- $dprime$
+- $criterion$
 
 ## Constraints
 
@@ -45,52 +31,13 @@
 - uniform distribution of choices
 - controlled local lumpiness
 
-## Modeling
- - Create two models for local and global features as explanatory vars
- - Continue with modeling RT and Accuracy based upon local and global feats and compare them. Which model provides a better description of the recoreded RT and Accuracy vars? (model comparasion, model selection, etc)
 
-
-The N-Back data set from @cardoso-leite2015 contains all required parameters for this study including RT, accuracy, and stimuli.
-
-```{r, echo=F}
-load('./data/CL2015.RData')
-```
-
-
-
-```{r, include=F}
-trials <- c('a','b','c','d','c','d','b','a','a','d','b','a','c','c','a','c')
-min_len <-4
-max_len <-4
-
-contig_seqs = list()
-
-for (st in 1:length(trials)) {
-  min_fin_index <- st + min_len - 1
-  max_fin_index <- min(st + max_len -1, length(trials))
-
-  for (fin in min_fin_index:max_fin_index) {
-    seq <- list(trials[st:fin])
-    contig_seqs <- c(contig_seqs, seq)
-  }
-}
-
-```
-
-
-Each constraint is a cost function to minimize for each sequence of stimuli.
+Each constraint is an up side down quadratic function to be minimized.
 
 ```{r, eval=F}
-
-# Codes for fitness and loss functions
-history <- contig_seqs
-targets <- 4
-lures <- 2
-targets_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
-lures_fitness <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
-
-# calc skewness
-skewness_fitness <- function(x, choices) {
+targets_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
+lures_cost <- function(x) 1.0 - 10*dnorm(x,mean=0,sd=4)
+skewness_cost <- function(x, choices) {
   uniform_ratio <- length(x) / length(choices)
   deviation_from_uniform <- setNames(vector('numeric', length(choices)), choices)
   for (c in choices) {
@@ -100,33 +47,24 @@
   max(deviation_from_uniform)
 }
 
-ralph2014_skewed <- function(x, choices) {
+lumpiness_cost <- function(x, choices) {
   #trials = len(seq)
   #freqs = [float(seq.count(c)) for c in choices]
   #ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3)
   #return ralph_skewed
-  F
+  NA
 }
 
-merged_fitness <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x)
-GA <- ga(type = "real-valued", fitness = fitness, lower = -10, upper = 10)
-plot(GA)
-
-targets_sample <- data.frame(x=-targets:targets)
-targets_sample %>%
-  ggplot(aes(x,y=targets_fitness(x))) +
-  geom_line()
+#merged_cost <- function(x) targets_fitness(x) + lures_fitness(x) + skewness_fitness(x)
+#GA <- ga(type = "real-valued", fitness = merged_cost, lower = -10, upper = 10)
+#plot(GA)
 ```
 
 
 ```{r}
 
-# Codes to calculate local statistical properties
 with_lures <- function(condition, stim, stim_type, history = NA) {
-  
-  # extend to 2-back/3-back
-  sapply(
-    1:length(stim), 
+  sapply(1:length(stim), 
     function(i) {
       switch(as.character(condition[i]),
              "2-back" = {
@@ -170,6 +108,10 @@
   })
 }
 
+with_lag <- function(stimulus, history) {
+  # find last occurance the of stimulus
+}
+
 with_skewness_score <- function(stimulus, history) {
   sapply(1:length(history), function(i) {
     trials <- stimulus[(i-str_length(history[i])):i]
@@ -192,28 +134,39 @@
   sapply(1:length(targets_ratio), function(i) 0)
 }
 
+window_size <- 8
+
 NB_modified <- NB %>%
   group_by(participant, condition, block) %>%
-  mutate(history = with_history(stimulus)) %>%
+  mutate(history = with_history(stimulus, size=window_size)) %>%
   #mutate(stimulus_type = map_chr(.x=stimulus, stim_type=stimulus_type, history=history,.f=with_lures))
   mutate(stimulus_type_2 = with_lures(condition, stimulus, stimulus_type, history)) %>%
-  mutate(t = with_targets_ratio(stimulus_type_2, history)) %>%
-  mutate(l = with_lures_ratio(stimulus_type_2, history)) %>%
-  mutate(s = with_skewness_score(stimulus, history)) %>%
-  mutate(u = with_lumpiness_score(stimulus, history)) %>%
-  filter(stri_length(history)==16) %>%
-  #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>%
+  mutate(targets = with_targets_ratio(stimulus_type_2, history)) %>%
+  mutate(lures = with_lures_ratio(stimulus_type_2, history)) %>%
+  mutate(skewness = with_skewness_score(stimulus, history)) %>%
+  mutate(lumpiness = with_lumpiness_score(stimulus, history)) %>%
+  filter(stri_length(history)==window_size) %>%
+  mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>%
+    #normalize_scores(targets_ratio, lures_ratio, skewness, lumpiness) %>%
   ungroup()
 
-NB_modified %>%
-  mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) 
-## group-level averaged NB, a single row represent an observation for a single subject
-## in a single condition
+pca <- prcomp(NB_modified[,c('targets','lures','skewness','lumpiness')], center = TRUE,scale. = TRUE)
 
+NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2'])
+
+
+## participant-level averaged NB, a single row represent an observation for a single subject
+## in a single condition
 NB_avg <- NB_modified %>%
   group_by(participant, condition) %>%
   mutate(correct = ifelse(stimulus_type=='burn-in',NA,correct)) %>%
-  summarise(targets=sum(t), lures=sum(l), skewness=sum(s), lumpiness=sum(u), rts = mean(rt, na.rm=T), accuracy=sum(correct,na.rm=T)/90) %>%
+  summarise(
+    targets=sum(targets), 
+    lures=sum(lures), 
+    skewness=sum(skewness), 
+    lumpiness=sum(lumpiness), 
+    rt = mean(rt, na.rm=T), 
+    correct=sum(correct,na.rm=T)/90) %>%
   ungroup()
 
 # print
@@ -222,11 +175,8 @@
 #   View()
 #   
   
-pca <- prcomp(NB_modified[,c('t','l','s','u')], center = TRUE,scale. = TRUE)
 
-NB_modified <- NB_modified %>% mutate(pc1=pca$x[,'PC1'], pc2=pca$x[,'PC2'])
-
-fit <- lm(correct ~ t * s, NB_modified)
+fit <- lm(correct ~ t * s * u * l * d, NB_modified)
 
 ```
 
@@ -234,10 +184,12 @@
 ```{r}
 
 # DBSCAN Clustering (RT+ACCURACY against skewness)
+NB_avg <- NB_avg %>%
+  mutate(cluster = dbscan(cbind(correct,rt), eps = 0.3, minPts = 3)$cluster)
+
 NB_avg %>%
-  mutate(cluster = dbscan(cbind(accuracy,rts), eps = 0.5, minPts = 3)$cluster) %>%
-  ggplot(aes(skewness, accuracy, color=factor(cluster))) +
-  ggtitle("targets (window = 16 trials)", "NOTE: each point is a single participant") +
+  ggplot(aes(skewness, correct, color=factor(cluster))) +
+  ggtitle(" clusters (window = 16 trials)", "NOTE: each point is a single participant") +
   geom_point(alpha=0.3) +
   #geom_smooth(method='lm', se = F) +
   facet_wrap(~condition)
@@ -252,6 +204,19 @@
   geom_jitter() +
   geom_point() +
   stat_summary(fun.y="mean")
+
+NB_modified %>%
+  inspect_cor(show_plot = T)
+
+NB_avg %>%
+  inspect_cor(show_plot = T)
+
+NB_modified %>%
+  ggplot(aes(rt,correct,color=u)) +
+  geom_jitter() +
+  geom_point() +
+  stat_summary(fun.y="mean")
+
 NB_modified %>%
   filter(!is.na(correct)) %>%
   ggplot(aes(jitter(s),jitter(t),color=correct)) +
@@ -285,7 +250,7 @@
 NB_modified %>%
   filter(!is.na(correct)) %>%
   ggplot(aes(pc1,rt,color=correct)) +
-    geom_point() +
+    geom_point(alpha=0.3) +
     geom_smooth(method="lm",se = F) + 
     facet_wrap(~condition, scales="free")
 
@@ -307,3 +272,10 @@
   - ggplot(kmeans_clusters$rt)
   - 
 
+
+
+
+```{python}
+a=2
+p
+```
diff --git a/ccn2019.rev2.Rmd b/ccn2019.rev2.Rmd
new file mode 100644
index 0000000..16bd274
--- /dev/null
+++ b/ccn2019.rev2.Rmd
@@ -0,0 +1,28 @@
+---
+title: "Statistical Properties of the N-Back Sequences"
+output: html_notebook
+---
+
+# Problems
+
+Statistical properties of n-back sequences bias behaviors. These bias, under specified structure, allows multiple cognitive strategies, producing heterogeneous behavior in a "gold standard" cognitive task.
+
+# Gaps
+
+- Unclear how to parameterize interesting variations for sequence generation
+- How do we model these multiple strategies (which requires identifying which sequence variations matter) 
+    - local vs. global properties, which one matters the most?
+    - Local:  lumpiness, short sequence patterns -> could be exploited by “reactive”/automaticity 
+    - Global:  No lures, large vocabulary -> pattern repeats implies a target
+
+
+```{r libraries, message=FALSE, include=FALSE, paged.print=FALSE}
+library(ggplot2)
+library(tidyverse)
+
+```
+
+```{r datasets}
+load('./data/CL2015.RData')
+```
+