diff --git a/py/20191223_pubmed.py b/py/20191223_pubmed.py new file mode 100644 index 0000000..5a353e5 --- /dev/null +++ b/py/20191223_pubmed.py @@ -0,0 +1,107 @@ +#%% temp (REMOVE this, this snippet sets an env var. That's it!) +import os +os.environ['NCBI_API_KEY'] = '' + +#%% direct eutils xml requests with history support +#Note: pip install xmltodict +import os +from datetime import date +import requests +import xmltodict + +ts = date.today().strftime('%Y%m%d') + +api_key = os.environ['NCBI_API_KEY'] +base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/' +db = 'pubmed' # or 'pmc' +term = 'N-Back' + +# --- SEARCH TERM +search_query = f'({term}[TIAB])' + +url = f'{base}esearch.fcgi' +params = { + 'term': search_query.replace(' ','+'), + 'usehistory': 'y', + 'db': db, + 'retmax': 0, + 'reldate': 10 * 365, + 'api_key': api_key +} + +response = requests.get(url,params=params) +search_response = xmltodict.parse(response.text) + +num_of_results = search_response['eSearchResult']['Count'] + +print(f'Succesfully searched and stored results on history server.\nNow retriving {num_of_results} abstracts...') + + +# --- FETCH ABSRACTS +url = f'{base}efetch.fcgi' +params = { + 'db': db, + 'api_key': os.environ['NCBI_API_KEY'], + "WebEnv": search_response['eSearchResult']['WebEnv'], + "query_key": search_response['eSearchResult']['QueryKey'], + 'rettype': 'abstract', + 'retmode': 'xml' +} + +response = requests.post(url, params) + +with open(f'{ts}_{db}_{term}.txt', 'w') as f: + f.write(response.text) + +print('Succesfully stored results!') + + + + +#%% [markdown] +# DEPRECATED: POST ids to history server +# The following code posts a list of ids to the history server and retrives parameters to fetch abstracts of the articles. Although it helps to avoid facing the NCBI query limitations, the same functionality can be achieved with esearch/usehistory + efetch. + +#%% POST + +url = f"{base}epost.fcgi" + +params = { + 'db': db, + 'id': ','.join(map(str, [11237011,12466850])), + 'api_key': os.environ['NCBI_API_KEY'] +} + +response = requests.post(url, params) + +history_params = xmltodict.parse(response.text) + +#%% [markdown] +# ## DEPRECATED: metapub +# The following snippet shows how to use metapub package to retrive a list of records for a given search query. It's limited by the number of returned records and limited requests rate per second (10 queries/s with an API_KEY). The code also requires `metapub` package, to be install with `pip install metapub`. + +# Note: metapub requires an env variable named `NCBI_API_KEY`. + +#%% metapub + +import os +from metapub import PubMedFetcher + +terms = ['N-Back', 'Working Memory'] + +fetcher = PubMedFetcher() + +for term in terms: + abstracts = [] + + ids = fetcher.pmids_for_query(query=f'({term}[TIAB])', retmax=1000000, since='2010', pmc_only=True) + print(f'fetching articles for {term}') + + for index, id in enumerate(ids[:10]): + print(f'{index} of {len(ids)}...') + article = fetcher.article_by_pmid(id) + if article.abstract is not None: + abstracts.append(article.pmid + '\n' + article.title + '\n' + article.abstract) + + with open(f'data/{db}/{term}.txt','w') as f: + f.write('\n\n'.join(abstracts))