diff --git a/py/20191223_pubmed.py b/py/20191223_pubmed.py index 5a353e5..3abc17d 100644 --- a/py/20191223_pubmed.py +++ b/py/20191223_pubmed.py @@ -9,53 +9,63 @@ import requests import xmltodict +base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/' ts = date.today().strftime('%Y%m%d') -api_key = os.environ['NCBI_API_KEY'] -base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/' + +terms = ['Digit Span', 'Working Memory'] db = 'pubmed' # or 'pmc' -term = 'N-Back' - -# --- SEARCH TERM -search_query = f'({term}[TIAB])' - -url = f'{base}esearch.fcgi' -params = { - 'term': search_query.replace(' ','+'), - 'usehistory': 'y', - 'db': db, - 'retmax': 0, - 'reldate': 10 * 365, - 'api_key': api_key -} - -response = requests.get(url,params=params) -search_response = xmltodict.parse(response.text) - -num_of_results = search_response['eSearchResult']['Count'] - -print(f'Succesfully searched and stored results on history server.\nNow retriving {num_of_results} abstracts...') -# --- FETCH ABSRACTS -url = f'{base}efetch.fcgi' -params = { - 'db': db, - 'api_key': os.environ['NCBI_API_KEY'], - "WebEnv": search_response['eSearchResult']['WebEnv'], - "query_key": search_response['eSearchResult']['QueryKey'], - 'rettype': 'abstract', - 'retmode': 'xml' -} +def search_and_store(term, output_file, db='pubmed', api_key=os.environ['NCBI_API_KEY']): + """Search for a term and store all abstract in a file + """ + search_query = f'({term}[TIAB])' + url = f'{base}esearch.fcgi' + params = { + 'term': search_query.replace(' ','+'), + 'usehistory': 'y', + 'db': db, + 'retmax': 0, + 'reldate': 10 * 365, + 'api_key': api_key + } -response = requests.post(url, params) + response = requests.get(url,params=params) + search_response = xmltodict.parse(response.text) -with open(f'{ts}_{db}_{term}.txt', 'w') as f: - f.write(response.text) + #DEBUG print(search_response) -print('Succesfully stored results!') + _num_of_results = search_response['eSearchResult']['Count'] + + print(f'Succesfully searched and stored results on history server.\nNow retriving {_num_of_results} abstracts...') + # --- FETCH ABSRACTS + url = f'{base}efetch.fcgi' + params = { + 'db': db, + 'api_key': api_key, + 'WebEnv': search_response['eSearchResult']['WebEnv'], + 'query_key': search_response['eSearchResult']['QueryKey'], + 'rettype': 'abstract', + 'retmode': 'xml' + } + + response = requests.post(url, params) + + with open(f'{output_file}', 'w') as f: + f.write(response.text) + + print(f'Succesfully stored results to {output_file}') + + return None + + + +for term in terms: + print(f'searching NCBI for: {term}...') + search_and_store(term, db=db, output_file = f'data/{db}/{ts}_{db}_{term}.xml') #%% [markdown]