diff --git a/.gitignore b/.gitignore
index 24181f6..35d62ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 ._*
 tmp/
 /.ipynb_checkpoints/
-*.pyc
\ No newline at end of file
+*.pyc
+data/pubmed/*.xml

diff --git a/.gitignore b/.gitignore
index 24181f6..35d62ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 ._*
 tmp/
 /.ipynb_checkpoints/
-*.pyc
\ No newline at end of file
+*.pyc
+data/pubmed/*.xml
diff --git a/data/cognitive_atlas/concepts.pkl b/data/cognitive_atlas/concepts.pkl
new file mode 100644
index 0000000..4a8ccda
--- /dev/null
+++ b/data/cognitive_atlas/concepts.pkl
Binary files differ

diff --git a/.gitignore b/.gitignore
index 24181f6..35d62ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 ._*
 tmp/
 /.ipynb_checkpoints/
-*.pyc
\ No newline at end of file
+*.pyc
+data/pubmed/*.xml
diff --git a/data/cognitive_atlas/concepts.pkl b/data/cognitive_atlas/concepts.pkl
new file mode 100644
index 0000000..4a8ccda
--- /dev/null
+++ b/data/cognitive_atlas/concepts.pkl
Binary files differ
diff --git a/data/cognitive_atlas/tasks.pkl b/data/cognitive_atlas/tasks.pkl
new file mode 100644
index 0000000..09092bc
--- /dev/null
+++ b/data/cognitive_atlas/tasks.pkl
Binary files differ

diff --git a/.gitignore b/.gitignore
index 24181f6..35d62ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 ._*
 tmp/
 /.ipynb_checkpoints/
-*.pyc
\ No newline at end of file
+*.pyc
+data/pubmed/*.xml
diff --git a/data/cognitive_atlas/concepts.pkl b/data/cognitive_atlas/concepts.pkl
new file mode 100644
index 0000000..4a8ccda
--- /dev/null
+++ b/data/cognitive_atlas/concepts.pkl
Binary files differ
diff --git a/data/cognitive_atlas/tasks.pkl b/data/cognitive_atlas/tasks.pkl
new file mode 100644
index 0000000..09092bc
--- /dev/null
+++ b/data/cognitive_atlas/tasks.pkl
Binary files differ
diff --git a/py/20191225_cognitiveatlas.py b/py/20191225_cognitiveatlas.py
new file mode 100644
index 0000000..3ebc575
--- /dev/null
+++ b/py/20191225_cognitiveatlas.py
@@ -0,0 +1,113 @@
+#%% [markdown]
+
+# the following code retrives all task names and cognitive concepts from cognitive atlas and stores them as a file.
+# use `pip install cognitiveatlas` to install required packages.
+
+#%% get list of all tasks
+
+from cognitiveatlas.api import search, get_task, get_concept
+import pandas as pd
+from datetime import date
+
+import os
+os.environ['NCBI_API_KEY'] = '751ff4edfab973bd0bc913ee84a0062bf009'
+
+tasks = get_task().pandas
+concepts = get_concept().pandas
+
+tasks.to_pickle('data/cognitive_atlas/tasks.pkl')
+concepts.to_pickle('data/cognitive_atlas/concepts.pkl')
+
+print(len(tasks.index))
+print(len(concepts.index))
+
+#%%
+import requests
+import xmltodict
+
+base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
+
+def search_and_store(term, output_file, db='pubmed', api_key=os.environ['NCBI_API_KEY']):
+  """Search for a term and store all abstract in a file
+  """
+  search_query = f'({term}[TIAB])'
+  url = f'{base}esearch.fcgi'
+  params = {
+    'term': search_query.replace(' ','+'),
+    'usehistory': 'y',
+    'db': db,
+    'retmax': 0,
+    'reldate': 10 * 365,
+    'api_key': api_key
+  }
+
+  response = requests.get(url,params=params)
+  search_response = xmltodict.parse(response.text)
+
+  #DEBUG print(search_response)
+
+  _num_of_results = search_response['eSearchResult']['Count']
+
+  print(f"Succesfully searched and stored results for '{term}' on history server.\nNow retriving {_num_of_results} abstracts...")
+
+
+  # --- FETCH ABSRACTS
+  url = f'{base}efetch.fcgi'
+  params = {
+    'db': db,
+    'api_key': api_key,
+    'WebEnv': search_response['eSearchResult']['WebEnv'],
+    'query_key': search_response['eSearchResult']['QueryKey'],
+    'rettype': 'abstract',
+    'retmode': 'xml'
+  }
+
+  response = requests.post(url, params)
+
+  with open(f'{output_file}', 'w') as f:
+    f.write(response.text)
+
+  print(f'Succesfully stored results to {output_file}')
+
+  return None
+
+
+def fetch_pubs(cogat_obj):
+  search_and_store(cogat_obj['name'], f"data/pubmed/{cogat_obj['id']}.xml")
+
+_ = tasks.apply(fetch_pubs, axis=1)
+_ = concepts.apply(fetch_pubs, axis=1)
+# %% Remove empty resultsets
+import glob
+import os
+
+def has_result(xml_file):
+  with open(xml_file) as f:
+    content = xmltodict.parse(f.read())
+    print(xml_file,  'PubmedArticleSet' in content)
+    return ('PubmedArticleSet' in content)
+
+
+for file_path in glob.glob('data/pubmed/*.xml'):
+  if not has_result(file_path):
+    print(f'Found an empty resultset, removing {file_path}...')
+    os.remove(file_path)
+
+#%% list of tasks and concepts without any result from pubmed and filter them out
+
+#%% count articles per task and concept
+
+#%% for each task, count related concepts
+
+#%% for each concept, count related tasks
+
+#%% count total articles
+
+#%% count unrelated articles for each task and concept
+
+#%% frequency tables for all task-concept pairs
+
+#%% measure task-task similarity
+
+#%% measure concept-concept similarity
+