#%%
# pip install rdflib
from rdflib import OWL, Graph
from rdflib.util import guess_format
from owlready2 import *
from rdflib import URIRef
from rdflib import Graph
from rdflib.namespace import RDFS
owl_path = "file:///Users/morteza/workspace/ontologies/efo.owl"
efo = get_ontology(owl_path).load()
# extract class names of the tasks and concepts
#tasks = [t.name for t in efo.search(subclass_of = efo.Task)]
#concepts = [c.name for c in efo.search(subclass_of = efo.ExecutiveFunction)]
# the following code but queries the RDFS labels defined for tasks and concepts
# to query all descendants use "rdfs:subClassOf*" instead.
def query_labels(graph, parent_class):
class_name = parent_class[1:] if parent_class.startswith(":") else parent_class
query = f"""
prefix : <http://www.semanticweb.org/morteza/ontologies/2019/11/executive-functions-ontology#>
SELECT ?label
WHERE {{
?task rdfs:subClassOf :{class_name};
rdfs:label ?label
}}
"""
# select the first rdfs:label and convert it to python string
return [labels[0].toPython() for labels in graph.query(query)]
# preapre RDFLib graph for SPARQL queries
graph = default_world.as_rdflib_graph()
tasks = query_labels(graph, "Task")
concepts = query_labels(graph, "ExecutiveFunction")
print(f"Tasks: {len(tasks)}, Concepts: {len(concepts)}")
#%%
# goal: create rows with the following data: <task>,<concept>,<hits>,<task_total>
from metapub import PubMedFetcher
fetcher = PubMedFetcher()
for task, concept in [(task, concept) for task in tasks for concept in concepts]:
query = f"({task}[TIAB]) AND ({concept}[TIAB])"
pmids = fetcher.pmids_for_query(query=f'{query}', retmax=1000000, since='2010', pmc_only=True)
print(f"{query}: {len(pmids)}")