"""Data structures for representing OBO Graphs.
.. seealso::
- the defining repository https://github.com/geneontology/obographs
- the JSON schema
https://github.com/geneontology/obographs/blob/master/schema/obographs-schema.json
"""
from __future__ import annotations
import json
import logging
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING, Literal, TypeAlias, overload
import curies
from curies.vocabulary import SynonymScopeOIO
from pydantic import BaseModel, Field
from pystow.utils import safe_open
if TYPE_CHECKING:
from .standardized import StandardizedGraph, StandardizedGraphDocument
__all__ = [
"Definition",
"DomainRangeAxiom",
"Edge",
"EquivalentNodeSet",
"ExistentialRestrictionExpression",
"Graph",
"GraphDocument",
"LogicalDefinition",
"Meta",
"Node",
"NodeType",
"Property",
"PropertyChainAxiom",
"PropertyType",
"Synonym",
"Xref",
"read",
]
logger = logging.getLogger(__name__)
OBO_URI_PREFIX = "http://purl.obolibrary.org/obo/"
OBO_URI_PREFIX_LEN = len(OBO_URI_PREFIX)
NodeType: TypeAlias = Literal["CLASS", "PROPERTY", "INDIVIDUAL"]
#: When node type is ``PROPERTY``, this is extra information
PropertyType: TypeAlias = Literal["ANNOTATION", "OBJECT", "DATA"]
TimeoutHint = int | float | None
[docs]
class Property(BaseModel):
"""Represent a property inside a metadata element."""
pred: str
val: str | None = Field(
None,
description="Stores the value of the property. This can be a string representing a "
"literal or IRI. This isn't supposed to be nullable, but it happens a lot - might be a "
"bug in OWLAPI or ROBOT",
)
xrefs: list[str] | None = None
meta: Meta | None = None
[docs]
class Definition(BaseModel):
"""Represents a definition for a node."""
val: str | None = Field(default=None)
xrefs: list[str] | None = Field(default=None) # Just a list of CURIEs/IRIs
[docs]
class Xref(BaseModel):
"""Represents a cross-reference."""
val: str
[docs]
class Synonym(BaseModel):
"""Represents a synonym inside an object meta."""
val: str | None = Field(default=None)
pred: SynonymScopeOIO = Field(default="hasExactSynonym")
synonymType: str | None = Field(None, examples=["OMO:0003000"]) # noqa:N815
xrefs: list[str] = Field(
default_factory=list,
description="A list of CURIEs/IRIs for provenance for the synonym",
)
meta: Meta | None = None
[docs]
class Edge(BaseModel):
"""Represents an edge in an OBO Graph."""
sub: str = Field(..., examples=["http://purl.obolibrary.org/obo/CHEBI_99998"])
pred: str = Field(..., examples=["is_a"])
obj: str = Field(..., examples=["http://purl.obolibrary.org/obo/CHEBI_24995"])
meta: Meta | None = None
[docs]
class Node(BaseModel):
"""Represents a node in an OBO Graph."""
id: str = Field(..., description="The IRI for the node")
lbl: str | None = Field(None, description="The name of the node")
meta: Meta | None = None
type: NodeType | None = Field(None, description="Type of node")
propertyType: PropertyType | None = Field( # noqa:N815
None, description="Type of property, if the node type is a property"
)
[docs]
class DomainRangeAxiom(BaseModel):
"""Represents a domain/range axiom."""
predicateId: str # noqa:N815
domainClassIds: list[str] | None = None # noqa:N815
rangeClassIds: list[str] | None = None # noqa:N815
allValuesFromEdges: list[Edge] | None = None # noqa:N815
meta: Meta | None = None
[docs]
class PropertyChainAxiom(BaseModel):
"""Represents a property chain axiom."""
predicateId: str # noqa:N815
chainPredicateIds: list[str] # noqa:N815
meta: Meta | None = None
[docs]
class ExistentialRestrictionExpression(BaseModel):
"""Represents an existential restriction."""
propertyId: str # noqa:N815
fillerId: str # noqa:N815
[docs]
class LogicalDefinition(BaseModel):
"""Represents a logical definition chain axiom."""
definedClassId: str # noqa:N815
genusIds: list[str] | None = None # noqa:N815
restrictions: list[ExistentialRestrictionExpression] | None = None
meta: Meta | None = None
[docs]
class EquivalentNodeSet(BaseModel):
"""Represents a set of equivalent nodes."""
representativeNodeId: str # noqa:N815
nodeIds: list[str] # noqa:N815
meta: Meta | None = None
[docs]
class Graph(BaseModel):
"""A graph corresponds to an ontology."""
id: str | None = None
meta: Meta | None = None
nodes: list[Node] = Field(default_factory=list)
edges: list[Edge] = Field(default_factory=list)
equivalentNodesSets: list[EquivalentNodeSet] = Field(default_factory=list) # noqa:N815
logicalDefinitionAxioms: list[LogicalDefinition] = Field(default_factory=list) # noqa:N815
domainRangeAxioms: list[DomainRangeAxiom] = Field(default_factory=list) # noqa:N815
propertyChainAxioms: list[PropertyChainAxiom] = Field(default_factory=list) # noqa:N815
def _get_property(self, predicate: str) -> str | None:
if self.meta is not None:
for prop in self.meta.basicPropertyValues or []:
if prop.pred == predicate and prop.val:
return prop.val
return None
@property
def name(self) -> str | None:
"""Get the title."""
return self._get_property("http://purl.org/dc/terms/title")
@property
def version(self) -> str | None:
"""Get the version."""
return self._get_property("http://www.w3.org/2002/07/owl#versionInfo")
[docs]
def standardize(
self, converter: curies.Converter, *, strict: bool = False
) -> StandardizedGraph:
"""Standardize the graph."""
from .standardized import StandardizedGraph
return StandardizedGraph.from_obograph_raw(self, converter, strict=strict)
[docs]
class GraphDocument(BaseModel):
"""Represents a list of OBO graphs."""
graphs: list[Graph]
meta: Meta | None = None
[docs]
def standardize(self, converter: curies.Converter) -> StandardizedGraphDocument:
"""Standardize the graph."""
from .standardized import StandardizedGraphDocument
return StandardizedGraphDocument.from_obograph_raw(self, converter)
def get_id_to_node(graph: Graph) -> dict[str, Node]:
"""Get a dictionary from node ID to nodes."""
return {node.id: node for node in graph.nodes or []}
def get_id_to_edges(graph: Graph) -> dict[str, list[tuple[str, str]]]:
"""Get a dictionary from node ID to nodes."""
dd = defaultdict(set)
for edge in graph.edges or []:
dd[edge.sub].add((edge.pred, edge.obj))
return {node_id: list(predicate_object_pairs) for node_id, predicate_object_pairs in dd.items()}
# docstr-coverage:excused `overload`
@overload
def read(
source: str | Path,
*,
timeout: TimeoutHint = ...,
squeeze: Literal[False] = ...,
encoding: str = ...,
) -> GraphDocument: ...
# docstr-coverage:excused `overload`
@overload
def read(
source: str | Path,
*,
timeout: TimeoutHint = ...,
squeeze: Literal[True] = ...,
encoding: str = ...,
) -> Graph: ...
[docs]
def read(
source: str | Path,
*,
timeout: TimeoutHint = None,
squeeze: bool = True,
encoding: str | None = None,
newline: str | None = None,
) -> Graph | GraphDocument:
"""Read an OBO Graph document.
:param source: A file path or URL to an OBO Graph JSON
:param timeout: The timeout for getting a URL
:param squeeze: By default, will unpack the first graph from a graph document that
only has a single graph and return a :class:`Graph` object. If `true` and
multiple graphs are received, will raise an error. Set this to `false` to return
a GraphDocument containing all graphs.
:param encoding: The ``encoding`` when reading a local filepath, passed to
:func:`pystow.utils.safe_open`
:param newline: The ``newline`` when reading a local filepath, passed to
:func:`pystow.utils.safe_open`
:returns: A graph or graph document
:raises ValueError: If squeeze is set to true and multiple graphs are received
"""
if isinstance(source, str) and (source.startswith("https://") or source.startswith("http://")):
import requests
if source.endswith(".gz"):
raise NotImplementedError
else:
res = requests.get(source, timeout=timeout)
res_json = res.json()
graph_document = GraphDocument.model_validate(res_json)
elif isinstance(source, str | Path):
with safe_open(source, encoding=encoding, newline=newline) as file:
graph_document = GraphDocument.model_validate(json.load(file))
else:
raise TypeError(f"Unhandled source: {source}")
if not squeeze:
return graph_document
elif len(graph_document.graphs) != 1:
raise ValueError(
f"graph document has {len(graph_document.graphs)} graphs, "
f"so can not squeeze. set squeeze=False"
)
else:
return graph_document.graphs[0]