#%% temp (REMOVE this, this snippet sets an env var. That's it!) import os os.environ['NCBI_API_KEY'] = '<PUT_YOUR_NCBI_API_KEY_HERE>' #%% direct eutils xml requests with history support #Note: pip install xmltodict import os from datetime import date import requests import xmltodict base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/' ts = date.today().strftime('%Y%m%d') terms = ['Digit Span', 'Working Memory'] db = 'pubmed' # or 'pmc' def search_and_store(term, output_file, db='pubmed', api_key=os.environ['NCBI_API_KEY']): """Search for a term and store all abstract in a file """ search_query = f'({term}[TIAB])' url = f'{base}esearch.fcgi' params = { 'term': search_query.replace(' ','+'), 'usehistory': 'y', 'db': db, 'retmax': 0, 'reldate': 10 * 365, 'api_key': api_key } response = requests.get(url,params=params) search_response = xmltodict.parse(response.text) #DEBUG print(search_response) _num_of_results = search_response['eSearchResult']['Count'] print(f'Succesfully searched and stored results on history server.\nNow retriving {_num_of_results} abstracts...') # --- FETCH ABSRACTS url = f'{base}efetch.fcgi' params = { 'db': db, 'api_key': api_key, 'WebEnv': search_response['eSearchResult']['WebEnv'], 'query_key': search_response['eSearchResult']['QueryKey'], 'rettype': 'abstract', 'retmode': 'xml' } response = requests.post(url, params) with open(f'{output_file}', 'w') as f: f.write(response.text) print(f'Succesfully stored results to {output_file}') return None for term in terms: print(f'searching NCBI for: {term}...') search_and_store(term, db=db, output_file = f'data/{db}/{ts}_{db}_{term}.xml') #%% [markdown] # DEPRECATED: POST ids to history server # The following code posts a list of ids to the history server and retrives parameters to fetch abstracts of the articles. Although it helps to avoid facing the NCBI query limitations, the same functionality can be achieved with esearch/usehistory + efetch. #%% POST url = f"{base}epost.fcgi" params = { 'db': db, 'id': ','.join(map(str, [11237011,12466850])), 'api_key': os.environ['NCBI_API_KEY'] } response = requests.post(url, params) history_params = xmltodict.parse(response.text) #%% [markdown] # ## DEPRECATED: metapub # The following snippet shows how to use metapub package to retrive a list of records for a given search query. It's limited by the number of returned records and limited requests rate per second (10 queries/s with an API_KEY). The code also requires `metapub` package, to be install with `pip install metapub`. # Note: metapub requires an env variable named `NCBI_API_KEY`. #%% metapub import os from metapub import PubMedFetcher terms = ['N-Back', 'Working Memory'] fetcher = PubMedFetcher() for term in terms: abstracts = [] ids = fetcher.pmids_for_query(query=f'({term}[TIAB])', retmax=1000000, since='2010', pmc_only=True) print(f'fetching articles for {term}') for index, id in enumerate(ids[:10]): print(f'{index} of {len(ids)}...') article = fetcher.article_by_pmid(id) if article.abstract is not None: abstracts.append(article.pmid + '\n' + article.title + '\n' + article.abstract) with open(f'data/{db}/{term}.txt','w') as f: f.write('\n\n'.join(abstracts))