diff --git a/efo/topic_modeling/gensim_playground.py b/efo/topic_modeling/gensim_playground.py
index 8d386f0..b109670 100644
--- a/efo/topic_modeling/gensim_playground.py
+++ b/efo/topic_modeling/gensim_playground.py
@@ -5,7 +5,8 @@
 from pprint import pprint
 from gensim import models
 from gensim import corpora
-from gensim.parsing.preprocessing import preprocess_documents, preprocess_string
+from gensim.parsing.preprocessing import preprocess_documents
+from gensim.models import Phrases
 
 
 # some functions like LsiModel.print_topics() outputs to logging streams
@@ -16,23 +17,29 @@
 
 # concepts: Document, Corpus, Vector, Model
 
+# TODO analysis: limit the corpus to the task corpus
 TEXTS = [
   'this is a document about measuring executive functions using cognitive tests.',
   'we measured executive functions skills using stop-signal task.',
-  'digit span can be used to measure intelligence and executive functions.'
+  'digit span can be used to measure intelligence and executive functions.',
+  'digit span also comes with different flavours that each taps executive functions differently.',
+  'executive functions are not always what we know by digit span, it can  be measured differently.',
+  'main executive function skills include reglulation and goal-driven behaviors.',
+  'digit span is not the only task that matters in executive functions literature'
 ]
 
-
 # Steps to be taken:
 # - load corpus
 # - preprocessing (tokenize, lower case documents, remove infrequent words, remove stop words, ...)
 
-
+# TODO analysis 2: has the task names be phrases
 CORPUS = preprocess_documents(TEXTS)
 # pprint(CORPUS)
 
 # create dictionary
 dictionary = corpora.Dictionary(CORPUS)
+# bigram = Phrases(CORPUS, min_count=3, threshold=1)
+# list(bigram[CORPUS])
 # pprint(dictionary.token2id)
 
 # bag-of-words for a new doc
@@ -83,4 +90,9 @@
     print(doc, as_text)
 
 
+# TODO evalue number of topics per task
+# TODO topic similarities across tasks (1 task to many topics and then compare topic vectors)
+
+
+
 # TODO: apply RP, LDA, and HDP and compare results