diff --git a/py/20190116_tidy_efo.py b/py/20190116_tidy_efo.py index 7a0c441..5ebbfd3 100644 --- a/py/20190116_tidy_efo.py +++ b/py/20190116_tidy_efo.py @@ -12,9 +12,33 @@ def tidy_efo_preproc_csv(csv, output_csv): df = pd.read_csv(csv) - df.to_csv(output_csv) + tidy_df = pd.DataFrame({ + "context": 'notEF', + "task": df['task'], + "construct": df['concept'], + "hits": df['task_concept_hits'] - df['task_concept_ef_hits'], + "task_hits": df['task_hits'] - df['task_ef_hits'], + "construct_hits": df['concept_hits'] - df['concept_ef_hits'] + }) + + ef_df = pd.DataFrame({ + "context": 'EF', + "task": df['task'], + "construct": df['concept'], + "hits": df['task_concept_ef_hits'], + "task_hits": df['task_ef_hits'], + "construct_hits": df['concept_ef_hits'] + }) + + tidy_df = tidy_df.append(ef_df, ignore_index=True) + + tidy_df.to_csv(output_csv) + print(f"Tidy dataset written successfully to {output_csv}") # params csv_path = "/Users/morteza/workspace/efo_kickoff/datasets/efo_pubmed_hits.20200114_preproc.csv" -output_path = csv_path.replace('_preproc.csv', '_tidy.csv') +output_csv_path = csv_path.replace('_preproc.csv', '_tidy.csv') + +# make things tidy! +tidy_efo_preproc_csv(csv_path, output_csv_path) \ No newline at end of file