diff --git a/py/20190116_tidy_efo.py b/py/20190116_tidy_efo.py index 5ebbfd3..14b2db5 100644 --- a/py/20190116_tidy_efo.py +++ b/py/20190116_tidy_efo.py @@ -8,6 +8,11 @@ #%% import pandas as pd +import seaborn as sns + +import matplotlib.pyplot as plt + +sns.set(color_codes=True) def tidy_efo_preproc_csv(csv, output_csv): df = pd.read_csv(csv) @@ -32,13 +37,26 @@ tidy_df = tidy_df.append(ef_df, ignore_index=True) - tidy_df.to_csv(output_csv) - print(f"Tidy dataset written successfully to {output_csv}") - + return tidy_df # params csv_path = "/Users/morteza/workspace/efo_kickoff/datasets/efo_pubmed_hits.20200114_preproc.csv" output_csv_path = csv_path.replace('_preproc.csv', '_tidy.csv') # make things tidy! -tidy_efo_preproc_csv(csv_path, output_csv_path) \ No newline at end of file +df = tidy_efo_preproc_csv(csv_path, output_csv_path) + + +# exploratory diagrams +wm_df = df[(df.context == 'EF') & (df.construct == 'Working Memory') & (df.task_hits<1000)] + +sns.kdeplot(wm_df.task_hits) + +#sns.pairplot(ef_df) +#sns.distplot(ef_df.task_hits, rug=True) + +#f, ax = plt.subplots(figsize=(6, 6)) +#sns.jointplot(ef_df.task_hits,ef_df.construct_hits, kind='kde', ax=ax) +#sns.rugplot(ef_df.task_hits, color="g", ax=ax) +#sns.rugplot(ef_df.construct_hits, color="b", vertical=True, ax=ax) +#sns.jointplot(ef_df.task_hits,ef_df.construct_hits)