Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion packages/linkml/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ dependencies = [ # Specifier syntax: https://peps.python.org/pep-0631/
"openpyxl",
"parse",
"prefixcommons >= 0.1.7",
"prefixmaps >= 0.2.2",
# TODO(prefixmaps-0.2.8): Replace git pin with "prefixmaps >= 0.2.8" once released,
# then remove [tool.hatch.metadata] allow-direct-references and regenerate uv.lock.
# Tracked in: https://github.com/linkml/prefixmaps/issues/82
"prefixmaps @ git+https://github.com/linkml/prefixmaps@75435150a1b31760b9780af2b64a265943a9b263",
"pydantic >= 2.0.0, < 3.0.0",
"pyjsg >= 0.12.3",
"pyshex >= 0.9.0",
Expand Down Expand Up @@ -202,6 +205,10 @@ vcs = "git"
style = "pep440"
fallback-version = "0.0.0"

[tool.hatch.metadata]
# TODO(prefixmaps-0.2.8): Remove this section once the git pin is replaced with >= 0.2.8
allow-direct-references = true

[tool.hatch.version]
source = "uv-dynamic-versioning"

Expand Down
138 changes: 135 additions & 3 deletions packages/linkml/src/linkml/generators/jsonldcontextgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from linkml._version import __version__
from linkml.utils.deprecation import deprecated_fields
from linkml.utils.generator import Generator, shared_arguments
from linkml.utils.generator import Generator, shared_arguments, well_known_prefix_map
from linkml_runtime.linkml_model.meta import ClassDefinition, EnumDefinition, SlotDefinition
from linkml_runtime.linkml_model.types import SHEX
from linkml_runtime.utils.formatutils import camelcase, underscore
Expand Down Expand Up @@ -90,6 +90,9 @@ class ContextGenerator(Generator):
frame_root: str | None = None

def __post_init__(self) -> None:
# Must be set before super().__post_init__() because the parent triggers
# the visitor pattern (visit_schema), which accesses _prefix_remap.
self._prefix_remap: dict[str, str] = {}
super().__post_init__()
if self.namespaces is None:
raise TypeError("Schema text must be supplied to context generator. Preparsed schema will not work")
Expand Down Expand Up @@ -127,22 +130,92 @@ def _collect_external_elements(sv: SchemaView) -> tuple[set[str], set[str]]:
external_slots.update(schema_def.slots.keys())
return external_classes, external_slots

def add_prefix(self, ncname: str) -> None:
"""Add a prefix, applying well-known prefix normalisation when enabled."""
super().add_prefix(self._prefix_remap.get(ncname, ncname))

def visit_schema(self, base: str | Namespace | None = None, output: str | None = None, **_):
# Add any explicitly declared prefixes
# Add any explicitly declared prefixes.
# Direct .add() is safe here: the normalisation block below explicitly
# rewrites emit_prefixes entries for any renamed prefixes (Cases 1-3).
for prefix in self.schema.prefixes.values():
self.emit_prefixes.add(prefix.prefix_prefix)

# Add any prefixes explicitly declared
for pfx in self.schema.emit_prefixes:
self.add_prefix(pfx)

# Normalise well-known prefix names when --normalize-prefixes is set.
# If the schema declares a non-standard alias for a namespace that has
# a well-known standard name (e.g. ``sdo`` for
# ``https://schema.org/``), replace the alias with the standard name
# so that generated JSON-LD contexts use the conventional prefix.
#
# Three cases are handled:
# 1. Standard prefix is not yet bound → just rebind from old to new.
# 2. Standard prefix is bound to a *different* URI:
# a. User-declared (in schema.prefixes) → collision, skip with warning.
# b. Runtime default (e.g. linkml-runtime's ``schema: http://…``)
# → remove stale binding, then rebind.
# 3. Standard prefix is already bound to the *same* URI (duplicate)
# → just drop the non-standard alias.
#
# A remap dict is stored for ``_build_element_id`` because
# ``prefix_suffix()`` splits CURIEs on ``:`` without looking up the
# namespace dict.
self._prefix_remap.clear()
if self.normalize_prefixes:
wk = well_known_prefix_map()
for old_pfx in list(self.namespaces):
url = str(self.namespaces[old_pfx])
std_pfx = wk.get(url)
if not std_pfx or std_pfx == old_pfx:
continue
if std_pfx in self.namespaces:
if str(self.namespaces[std_pfx]) != url:
# Case 2: std_pfx is bound to a different URI.
# If the user explicitly declared std_pfx in the schema,
# it is intentional — skip to avoid data loss.
if std_pfx in self.schema.prefixes:
self.logger.warning(
"Prefix collision: cannot rename '%s' to '%s' because '%s' is "
"already declared for <%s>; skipping normalisation for <%s>",
old_pfx,
std_pfx,
std_pfx,
str(self.namespaces[std_pfx]),
url,
)
continue
# Not user-declared (e.g. linkml-runtime default) — safe to remove
self.emit_prefixes.discard(std_pfx)
del self.namespaces[std_pfx]
else:
# Case 3: standard prefix already bound to same URI
# — just drop the non-standard alias
del self.namespaces[old_pfx]
if old_pfx in self.emit_prefixes:
self.emit_prefixes.discard(old_pfx)
self.emit_prefixes.add(std_pfx)
self._prefix_remap[old_pfx] = std_pfx
continue
# Case 1 (or Case 2 after stale removal): bind standard name
self.namespaces[std_pfx] = self.namespaces[old_pfx]
del self.namespaces[old_pfx]
if old_pfx in self.emit_prefixes:
self.emit_prefixes.discard(old_pfx)
self.emit_prefixes.add(std_pfx)
self._prefix_remap[old_pfx] = std_pfx

# Add the default prefix
if self.schema.default_prefix:
dflt = self.namespaces.prefix_for(self.schema.default_prefix)
if dflt:
self.default_ns = dflt
if self.default_ns:
default_uri = self.namespaces[self.default_ns]
# Direct .add() is safe: default_ns is already resolved from
# the (possibly normalised) namespace bindings above.
self.emit_prefixes.add(self.default_ns)
else:
default_uri = self.schema.default_prefix
Expand Down Expand Up @@ -236,7 +309,61 @@ def end_schema(
with open(frame_path, "w", encoding="UTF-8") as f:
json.dump(frame, f, indent=2, ensure_ascii=False)

return str(as_json(context)) + "\n"
if self.deterministic:
return self._deterministic_context_json(json.loads(str(as_json(context))), indent=3)
return str(as_json(context))

@staticmethod
def _deterministic_context_json(data: dict, indent: int = 3) -> str:
"""Serialize a JSON-LD context with deterministic key ordering.

Preserves the conventional JSON-LD context structure:
1. ``comments`` block first (metadata)
2. ``@context`` block second, with:
a. ``@``-prefixed directives (``@vocab``, ``@base``) first
b. Prefix declarations (string values) second
c. Class/property term entries (object values) last
3. Each group sorted alphabetically within itself

Unlike :func:`deterministic_json`, this understands JSON-LD
conventions so that the output remains human-readable while
still being byte-identical across invocations.
"""
from linkml.utils.generator import deterministic_json

ordered = {}

# 1. "comments" first (if present)
if "comments" in data:
ordered["comments"] = data["comments"]

# 2. "@context" with structured internal ordering
if "@context" in data:
ctx = data["@context"]
ordered_ctx = {}

# 2a. @-prefixed directives (@vocab, @base, etc.)
for k in sorted(k for k in ctx if k.startswith("@")):
ordered_ctx[k] = ctx[k]

# 2b. Prefix declarations (string values — short namespace URIs)
for k in sorted(k for k in ctx if not k.startswith("@") and isinstance(ctx[k], str)):
ordered_ctx[k] = ctx[k]

# 2c. Term definitions (object values) — deep-sorted for determinism
term_entries = {k: v for k, v in ctx.items() if not k.startswith("@") and not isinstance(v, str)}
sorted_terms = json.loads(deterministic_json(term_entries))
for k in sorted(sorted_terms):
ordered_ctx[k] = sorted_terms[k]

ordered["@context"] = ordered_ctx

# 3. Any remaining top-level keys
for k in sorted(data):
if k not in ordered:
ordered[k] = data[k]

return json.dumps(ordered, indent=indent, ensure_ascii=False)

def visit_class(self, cls: ClassDefinition) -> bool:
if self.exclude_imports and cls.name not in self._local_classes:
Expand Down Expand Up @@ -486,6 +613,11 @@ def _build_element_id(self, definition: Any, uri: str) -> None:
@return: None
"""
uri_prefix, uri_suffix = self.namespaces.prefix_suffix(uri)
# Apply well-known prefix normalisation (e.g. sdo → schema).
# prefix_suffix() splits CURIEs on ':' without checking the
# namespace dict, so it may return a stale alias.
if uri_prefix and uri_prefix in self._prefix_remap:
uri_prefix = self._prefix_remap[uri_prefix]
is_default_namespace = uri_prefix == self.context_body["@vocab"] or uri_prefix == self.namespaces.prefix_for(
self.context_body["@vocab"]
)
Expand Down
7 changes: 7 additions & 0 deletions packages/linkml/src/linkml/generators/jsonldgen.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Generate JSONld from a LinkML schema."""

import json
import os
from collections.abc import Sequence
from copy import deepcopy
Expand Down Expand Up @@ -179,6 +180,8 @@ def end_schema(self, context: str | Sequence[str] | None = None, context_kwargs:
# TODO: The _visit function above alters the schema in situ
# force some context_kwargs
context_kwargs["metadata"] = False
# Forward prefix normalisation into the inline @context.
context_kwargs.setdefault("normalize_prefixes", self.normalize_prefixes)
add_prefixes = ContextGenerator(self.original_schema, **context_kwargs).serialize()
add_prefixes_json = loads(add_prefixes)
metamodel_ctx = self.metamodel_context or METAMODEL_CONTEXT_URI
Expand All @@ -203,6 +206,10 @@ def end_schema(self, context: str | Sequence[str] | None = None, context_kwargs:
self.schema["@context"].append({"@base": base_prefix})
# json_obj["@id"] = self.schema.id
out = str(as_json(self.schema, indent=" ")) + "\n"
if self.deterministic:
from linkml.utils.generator import deterministic_json

out = deterministic_json(json.loads(out), indent=2) + "\n"
self.schema = self.original_schema
return out

Expand Down
Loading
Loading