diff --git a/py/20190116_tidy_efo.py b/py/20190116_tidy_efo.py deleted file mode 100644 index 14b2db5..0000000 --- a/py/20190116_tidy_efo.py +++ /dev/null @@ -1,62 +0,0 @@ -#%% [markdown] -# EFO kickoff project collects several data from PubMed in a wide format. Codes provided below converts the wide format into a tidy format with the following expected structure in each row: -# ,,,,, - -# the context column is either EF or notEF, which shows that the context in which the query was performed was either executive functions or anything expect executive function. Please refer to my daily log of 20200116 or efo/kickoff codebook for more details on columns. - -# The following code expects a csv file (preprocessed csv file), and generated a new csv with _tidy suffix instead of _preproc in the same directory. -#%% - -import pandas as pd -import seaborn as sns - -import matplotlib.pyplot as plt - -sns.set(color_codes=True) - -def tidy_efo_preproc_csv(csv, output_csv): - df = pd.read_csv(csv) - - tidy_df = pd.DataFrame({ - "context": 'notEF', - "task": df['task'], - "construct": df['concept'], - "hits": df['task_concept_hits'] - df['task_concept_ef_hits'], - "task_hits": df['task_hits'] - df['task_ef_hits'], - "construct_hits": df['concept_hits'] - df['concept_ef_hits'] - }) - - ef_df = pd.DataFrame({ - "context": 'EF', - "task": df['task'], - "construct": df['concept'], - "hits": df['task_concept_ef_hits'], - "task_hits": df['task_ef_hits'], - "construct_hits": df['concept_ef_hits'] - }) - - tidy_df = tidy_df.append(ef_df, ignore_index=True) - - return tidy_df - -# params -csv_path = "/Users/morteza/workspace/efo_kickoff/datasets/efo_pubmed_hits.20200114_preproc.csv" -output_csv_path = csv_path.replace('_preproc.csv', '_tidy.csv') - -# make things tidy! -df = tidy_efo_preproc_csv(csv_path, output_csv_path) - - -# exploratory diagrams -wm_df = df[(df.context == 'EF') & (df.construct == 'Working Memory') & (df.task_hits<1000)] - -sns.kdeplot(wm_df.task_hits) - -#sns.pairplot(ef_df) -#sns.distplot(ef_df.task_hits, rug=True) - -#f, ax = plt.subplots(figsize=(6, 6)) -#sns.jointplot(ef_df.task_hits,ef_df.construct_hits, kind='kde', ax=ax) -#sns.rugplot(ef_df.task_hits, color="g", ax=ax) -#sns.rugplot(ef_df.construct_hits, color="b", vertical=True, ax=ax) -#sns.jointplot(ef_df.task_hits,ef_df.construct_hits) diff --git a/py/20190116_tidy_efo.py b/py/20190116_tidy_efo.py deleted file mode 100644 index 14b2db5..0000000 --- a/py/20190116_tidy_efo.py +++ /dev/null @@ -1,62 +0,0 @@ -#%% [markdown] -# EFO kickoff project collects several data from PubMed in a wide format. Codes provided below converts the wide format into a tidy format with the following expected structure in each row: -# ,,,,, - -# the context column is either EF or notEF, which shows that the context in which the query was performed was either executive functions or anything expect executive function. Please refer to my daily log of 20200116 or efo/kickoff codebook for more details on columns. - -# The following code expects a csv file (preprocessed csv file), and generated a new csv with _tidy suffix instead of _preproc in the same directory. -#%% - -import pandas as pd -import seaborn as sns - -import matplotlib.pyplot as plt - -sns.set(color_codes=True) - -def tidy_efo_preproc_csv(csv, output_csv): - df = pd.read_csv(csv) - - tidy_df = pd.DataFrame({ - "context": 'notEF', - "task": df['task'], - "construct": df['concept'], - "hits": df['task_concept_hits'] - df['task_concept_ef_hits'], - "task_hits": df['task_hits'] - df['task_ef_hits'], - "construct_hits": df['concept_hits'] - df['concept_ef_hits'] - }) - - ef_df = pd.DataFrame({ - "context": 'EF', - "task": df['task'], - "construct": df['concept'], - "hits": df['task_concept_ef_hits'], - "task_hits": df['task_ef_hits'], - "construct_hits": df['concept_ef_hits'] - }) - - tidy_df = tidy_df.append(ef_df, ignore_index=True) - - return tidy_df - -# params -csv_path = "/Users/morteza/workspace/efo_kickoff/datasets/efo_pubmed_hits.20200114_preproc.csv" -output_csv_path = csv_path.replace('_preproc.csv', '_tidy.csv') - -# make things tidy! -df = tidy_efo_preproc_csv(csv_path, output_csv_path) - - -# exploratory diagrams -wm_df = df[(df.context == 'EF') & (df.construct == 'Working Memory') & (df.task_hits<1000)] - -sns.kdeplot(wm_df.task_hits) - -#sns.pairplot(ef_df) -#sns.distplot(ef_df.task_hits, rug=True) - -#f, ax = plt.subplots(figsize=(6, 6)) -#sns.jointplot(ef_df.task_hits,ef_df.construct_hits, kind='kde', ax=ax) -#sns.rugplot(ef_df.task_hits, color="g", ax=ax) -#sns.rugplot(ef_df.construct_hits, color="b", vertical=True, ax=ax) -#sns.jointplot(ef_df.task_hits,ef_df.construct_hits) diff --git a/py/20200116_efo_analysis.py b/py/20200116_efo_analysis.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/py/20200116_efo_analysis.py diff --git a/py/20190116_tidy_efo.py b/py/20190116_tidy_efo.py deleted file mode 100644 index 14b2db5..0000000 --- a/py/20190116_tidy_efo.py +++ /dev/null @@ -1,62 +0,0 @@ -#%% [markdown] -# EFO kickoff project collects several data from PubMed in a wide format. Codes provided below converts the wide format into a tidy format with the following expected structure in each row: -# ,,,,, - -# the context column is either EF or notEF, which shows that the context in which the query was performed was either executive functions or anything expect executive function. Please refer to my daily log of 20200116 or efo/kickoff codebook for more details on columns. - -# The following code expects a csv file (preprocessed csv file), and generated a new csv with _tidy suffix instead of _preproc in the same directory. -#%% - -import pandas as pd -import seaborn as sns - -import matplotlib.pyplot as plt - -sns.set(color_codes=True) - -def tidy_efo_preproc_csv(csv, output_csv): - df = pd.read_csv(csv) - - tidy_df = pd.DataFrame({ - "context": 'notEF', - "task": df['task'], - "construct": df['concept'], - "hits": df['task_concept_hits'] - df['task_concept_ef_hits'], - "task_hits": df['task_hits'] - df['task_ef_hits'], - "construct_hits": df['concept_hits'] - df['concept_ef_hits'] - }) - - ef_df = pd.DataFrame({ - "context": 'EF', - "task": df['task'], - "construct": df['concept'], - "hits": df['task_concept_ef_hits'], - "task_hits": df['task_ef_hits'], - "construct_hits": df['concept_ef_hits'] - }) - - tidy_df = tidy_df.append(ef_df, ignore_index=True) - - return tidy_df - -# params -csv_path = "/Users/morteza/workspace/efo_kickoff/datasets/efo_pubmed_hits.20200114_preproc.csv" -output_csv_path = csv_path.replace('_preproc.csv', '_tidy.csv') - -# make things tidy! -df = tidy_efo_preproc_csv(csv_path, output_csv_path) - - -# exploratory diagrams -wm_df = df[(df.context == 'EF') & (df.construct == 'Working Memory') & (df.task_hits<1000)] - -sns.kdeplot(wm_df.task_hits) - -#sns.pairplot(ef_df) -#sns.distplot(ef_df.task_hits, rug=True) - -#f, ax = plt.subplots(figsize=(6, 6)) -#sns.jointplot(ef_df.task_hits,ef_df.construct_hits, kind='kde', ax=ax) -#sns.rugplot(ef_df.task_hits, color="g", ax=ax) -#sns.rugplot(ef_df.construct_hits, color="b", vertical=True, ax=ax) -#sns.jointplot(ef_df.task_hits,ef_df.construct_hits) diff --git a/py/20200116_efo_analysis.py b/py/20200116_efo_analysis.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/py/20200116_efo_analysis.py diff --git a/py/20200116_tidy_efo.py b/py/20200116_tidy_efo.py new file mode 100644 index 0000000..14b2db5 --- /dev/null +++ b/py/20200116_tidy_efo.py @@ -0,0 +1,62 @@ +#%% [markdown] +# EFO kickoff project collects several data from PubMed in a wide format. Codes provided below converts the wide format into a tidy format with the following expected structure in each row: +# ,,,,, + +# the context column is either EF or notEF, which shows that the context in which the query was performed was either executive functions or anything expect executive function. Please refer to my daily log of 20200116 or efo/kickoff codebook for more details on columns. + +# The following code expects a csv file (preprocessed csv file), and generated a new csv with _tidy suffix instead of _preproc in the same directory. +#%% + +import pandas as pd +import seaborn as sns + +import matplotlib.pyplot as plt + +sns.set(color_codes=True) + +def tidy_efo_preproc_csv(csv, output_csv): + df = pd.read_csv(csv) + + tidy_df = pd.DataFrame({ + "context": 'notEF', + "task": df['task'], + "construct": df['concept'], + "hits": df['task_concept_hits'] - df['task_concept_ef_hits'], + "task_hits": df['task_hits'] - df['task_ef_hits'], + "construct_hits": df['concept_hits'] - df['concept_ef_hits'] + }) + + ef_df = pd.DataFrame({ + "context": 'EF', + "task": df['task'], + "construct": df['concept'], + "hits": df['task_concept_ef_hits'], + "task_hits": df['task_ef_hits'], + "construct_hits": df['concept_ef_hits'] + }) + + tidy_df = tidy_df.append(ef_df, ignore_index=True) + + return tidy_df + +# params +csv_path = "/Users/morteza/workspace/efo_kickoff/datasets/efo_pubmed_hits.20200114_preproc.csv" +output_csv_path = csv_path.replace('_preproc.csv', '_tidy.csv') + +# make things tidy! +df = tidy_efo_preproc_csv(csv_path, output_csv_path) + + +# exploratory diagrams +wm_df = df[(df.context == 'EF') & (df.construct == 'Working Memory') & (df.task_hits<1000)] + +sns.kdeplot(wm_df.task_hits) + +#sns.pairplot(ef_df) +#sns.distplot(ef_df.task_hits, rug=True) + +#f, ax = plt.subplots(figsize=(6, 6)) +#sns.jointplot(ef_df.task_hits,ef_df.construct_hits, kind='kde', ax=ax) +#sns.rugplot(ef_df.task_hits, color="g", ax=ax) +#sns.rugplot(ef_df.construct_hits, color="b", vertical=True, ax=ax) +#sns.jointplot(ef_df.task_hits,ef_df.construct_hits)