From c92d88ca8a22332b8ce335e2a3a13d75f2884208 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:28:41 +0200 Subject: [PATCH 01/89] feat(utils): :sparkles: add offline mode and cache management to HttpRequester Extend HttpRequester with `offline` and `no_cache` options, plus fetch_fresh/has_cached/clear_cache/cache_info/reset helpers. Add an OfflineCacheMissError and an _OfflineFallbackSession that yields a 504 when requests_cache is unavailable, mirroring the only-if-cached behavior. Emit standardized `CachedHttpRequester:` log lines describing each request's cache outcome. --- rocrate_validator/utils/http.py | 276 ++++++++++++++++++++++++++++++-- 1 file changed, 261 insertions(+), 15 deletions(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 197588dc4..3f0f0b343 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -19,7 +19,7 @@ import random import string import threading -from typing import Optional +from typing import Any, Optional import requests @@ -30,9 +30,55 @@ logger = logging.getLogger(__name__) +# HTTP status code used to signal a cache miss in offline mode. +# 504 is what requests_cache uses when only_if_cached is set and +# no cached response is available. +OFFLINE_CACHE_MISS_STATUS = 504 + + +def _log_cache_outcome(method: str, url: str, response, *, offline: bool, forced_refresh: bool = False) -> None: + """ + Emit a standardized ``CachedHttpRequester: ...`` message describing whether ``url`` was + served from the HTTP cache or fetched from the remote server. + """ + from_cache = getattr(response, "from_cache", None) + status = getattr(response, "status_code", None) + + if offline and status == OFFLINE_CACHE_MISS_STATUS: + outcome = "not available in HTTP cache (offline cache miss)" + elif from_cache is True: + outcome = "served from HTTP cache" + elif forced_refresh: + outcome = "fetched from remote (cache refresh)" + elif from_cache is False: + outcome = "fetched from remote" + else: + # No from_cache attribute: plain requests.Session or offline fallback stub. + outcome = "fetched from remote (no cache backend)" + + # Emitted at WARNING for now, pending a downgrade to DEBUG once the feature stabilizes. + logger.warning("CachedHttpRequester: %s %s %s", method, url, outcome) + + +class OfflineCacheMissError(RuntimeError): + """Raised when an HTTP resource is not available in the cache while offline.""" + + def __init__(self, url: str): + super().__init__( + f"Resource '{url}' is not available in the HTTP cache and " + f"the validator is running in offline mode. Run online once, or use " + f"`rocrate-validator cache warm` to pre-populate the cache." + ) + self.url = url + + class HttpRequester: """ - A singleton class to handle HTTP requests + A singleton class to handle HTTP requests. + + The session is backed by ``requests_cache`` when available. The requester + supports an offline mode in which only cached responses are served + (cache misses yield a 504 response instead of hitting the network). """ _instance = None _lock = threading.Lock() @@ -50,7 +96,9 @@ def __new__(cls, *args, **kwargs) -> HttpRequester: def __init__(self, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Optional[str] = None): + cache_path: Optional[str] = None, + offline: bool = False, + no_cache: bool = False): logger.debug(f"Initializing instance of {self.__class__.__name__} {self}") # check if the instance is already initialized if not hasattr(self, "_initialized"): @@ -66,6 +114,8 @@ def __init__(self, except ValueError: raise TypeError("cache_max_age must be an integer") self.cache_path_prefix = cache_path + self.offline = bool(offline) + self.no_cache = bool(no_cache) # flag to indicate if the cache is permanent or temporary self.permanent_cache = cache_path is not None # initialize the session @@ -83,7 +133,7 @@ def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = # check if requests_cache is installed # and set up the cached session try: - if cache_max_age >= 0: + if not self.no_cache: from requests_cache import CachedSession # If cache_path is not provided, use the default path prefix @@ -96,15 +146,25 @@ def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = else: logger.debug(f"Using provided cache path: {cache_path}") self.permanent_cache = True + # Negative cache_max_age means "never expire" (as documented in the CLI); + # offline mode also forces never-expire so stale entries remain usable. + expire_after = -1 if (self.offline or cache_max_age < 0) else cache_max_age # Initialize the session with a cache self.session = CachedSession( # Cache name with random suffix - cache_name=cache_path, - expire_after=cache_max_age, # Cache expiration time in seconds + cache_name=str(cache_path), + expire_after=expire_after, # Cache expiration time in seconds backend='sqlite', # Use SQLite backend allowable_methods=('GET',), # Cache GET allowable_codes=(200, 302, 404) # Cache responses with these status codes ) + # Apply offline policy: only return cached responses. + if self.offline: + try: + self.session.settings.only_if_cached = True + except AttributeError: + # Older requests_cache versions expose the flag on the session directly. + setattr(self.session, "only_if_cached", True) except ImportError: logger.warning("requests_cache is not installed. Using requests instead.") except Exception as e: @@ -114,8 +174,15 @@ def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = # use requests instead of requests_cache # and create a new session if not self.session: - logger.debug("Cache disabled: using requests instead of requests_cache") - self.session = requests.Session() + if self.offline: + logger.warning( + "Offline mode requested but requests_cache is not available: " + "HTTP requests will be blocked." + ) + self.session = _OfflineFallbackSession() + else: + logger.debug("Cache disabled: using requests instead of requests_cache") + self.session = requests.Session() def __del__(self): """ @@ -127,7 +194,7 @@ def __del__(self): def cleanup(self): """ - Destructor to clean up the cache file used by CachedSession. + Remove the SQLite cache file when the cache is marked as temporary. """ logger.debug(f"Deleting instance of {self.__class__.__name__}") if self.session and hasattr(self.session, 'cache') and self.session.cache: @@ -142,23 +209,202 @@ def cleanup(self): def __getattr__(self, name): """ - Delegate HTTP methods to the session object. + Delegate HTTP methods to the session object, wrapping the call with + cache-outcome logging. :param name: The name of the method to call. - :return: The method from the session object. + :return: A callable that proxies to the session method. """ if name.upper() in {"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH"}: - return getattr(self.session, name.lower()) + method = name.lower() + session_method = getattr(self.session, method) + + def _wrapped(url, *args, **kwargs): + response = session_method(url, *args, **kwargs) + _log_cache_outcome(method.upper(), url, response, offline=self.offline) + return response + + return _wrapped raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") + def fetch_fresh(self, url: str, **kwargs) -> requests.Response: + """ + Fetch ``url`` bypassing the HTTP cache and store the fresh response. + + Used for resources that must always reflect the current remote state + while still being available offline afterwards (e.g., a remote RO-Crate + whose cached copy must be refreshed on every online run). + + In offline mode, the cache is consulted as usual (no network traffic). + + :param url: The URL to fetch. + :return: The HTTP response. + """ + if self.offline: + response = self.session.get(url, **kwargs) + else: + # ``force_refresh=True`` tells requests_cache to bypass the cache + # entirely and overwrite the stored entry with the new response. + # Older requests_cache versions only understand ``refresh=True`` + # (revalidation), and plain ``requests.Session`` accepts neither. + response = None + for flag in ("force_refresh", "refresh"): + try: + response = self.session.get(url, **{flag: True}, **kwargs) + break + except TypeError: + continue + if response is None: + response = self.session.get(url, **kwargs) + _log_cache_outcome("GET", url, response, offline=self.offline, forced_refresh=not self.offline) + return response + + def has_cached(self, url: str) -> bool: + """ + Check whether ``url`` is already present in the HTTP cache. + + Returns ``False`` when the underlying session does not implement a cache. + """ + cache = getattr(self.session, "cache", None) + if cache is None: + return False + contains = getattr(cache, "contains", None) + try: + if contains is not None: + return bool(contains(url=url)) + # Fallback for older requests_cache versions. + return bool(cache.has_url(url)) + except Exception as e: + logger.debug("Cache lookup failed for %s: %s", url, e) + return False + + def clear_cache(self) -> None: + """ + Remove every entry from the HTTP cache. + """ + cache = getattr(self.session, "cache", None) + if cache is None: + logger.debug("No cache backend to clear") + return + try: + cache.clear() + logger.info("HTTP cache cleared") + except Exception as e: + logger.error("Failed to clear HTTP cache: %s", e) + raise + + def cache_info(self) -> dict[str, Any]: + """ + Return metadata about the current HTTP cache backend. + """ + info: dict[str, Any] = { + "backend": None, + "path": None, + "permanent": getattr(self, "permanent_cache", False), + "offline": getattr(self, "offline", False), + "entries": 0, + "size_bytes": 0, + } + cache = getattr(self.session, "cache", None) + if cache is None: + return info + info["backend"] = cache.__class__.__name__ + cache_name = getattr(cache, "cache_name", None) or getattr(cache, "db_path", None) + if cache_name: + info["path"] = f"{cache_name}.sqlite" if not str(cache_name).endswith(".sqlite") else str(cache_name) + try: + info["entries"] = len(cache.responses) + except Exception: + try: + info["entries"] = sum(1 for _ in cache.urls()) + except Exception as e: + logger.debug("Unable to count cache entries: %s", e) + if info["path"] and os.path.exists(info["path"]): + try: + info["size_bytes"] = os.path.getsize(info["path"]) + except OSError: + pass + return info + @classmethod def initialize_cache(cls, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Optional[str] = None) -> HttpRequester: + cache_path: Optional[str] = None, + offline: bool = False, + no_cache: bool = False) -> HttpRequester: """ Initialize the HttpRequester singleton with cache settings. - :param max_age: The maximum age of the cache in seconds. + :param cache_max_age: Maximum age of cached responses in seconds. + Negative values mean "never expire". :param cache_path: The path to the cache directory. + :param offline: When ``True``, only cached responses are served. + :param no_cache: When ``True``, disable the HTTP cache entirely and + use a plain ``requests.Session``. Incompatible with ``offline``. + """ + return cls(cache_max_age=cache_max_age, cache_path=cache_path, + offline=offline, no_cache=no_cache) + + @classmethod + def reset(cls) -> None: + """ + Drop the singleton instance. Primarily intended for tests and the + ``cache`` CLI subcommand that reconfigures the cache on the fly. """ - return cls(cache_max_age=cache_max_age, cache_path=cache_path) + with cls._lock: + instance = cls._instance + if instance is not None: + try: + session = getattr(instance, "session", None) + if session is not None and hasattr(session, "close"): + session.close() + except Exception as e: + logger.debug("Error closing previous session: %s", e) + if getattr(instance, "permanent_cache", True) is False: + try: + instance.cleanup() + except Exception as e: + logger.debug("Error cleaning up previous cache: %s", e) + cls._instance = None + + +class _OfflineFallbackSession: + """ + Minimal session used when offline mode is requested but no HTTP cache + backend is available. Every request yields a 504 response to signal a + cache miss, mirroring the behavior of ``requests_cache`` in offline mode. + """ + + cache = None + + def _offline_response(self, url: str) -> requests.Response: + response = requests.Response() + response.status_code = OFFLINE_CACHE_MISS_STATUS + response.reason = "Offline: no HTTP cache backend available" + response.url = url + # response._content = b"" + return response + + def get(self, url, **_kwargs): + return self._offline_response(url) + + def head(self, url, **_kwargs): + return self._offline_response(url) + + def post(self, url, **_kwargs): + return self._offline_response(url) + + def put(self, url, **_kwargs): + return self._offline_response(url) + + def delete(self, url, **_kwargs): + return self._offline_response(url) + + def options(self, url, **_kwargs): + return self._offline_response(url) + + def patch(self, url, **_kwargs): + return self._offline_response(url) + + def close(self): + pass From 7c9e82524f35da8825683c5969d51db77b3ed959 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:31:13 +0200 Subject: [PATCH 02/89] feat(utils): :sparkles: add helpers to configure the cache path Introduce `get_user_cache_dir()` (honoring `XDG_CACHE_HOME`, falling back to `~/.cache/rocrate-validator`) and `get_default_http_cache_path()`, plus the `USER_CACHE_DIR_NAME` / `USER_CACHE_FILE_NAME` constants, so the HTTP cache can be located under a stable, user-level directory instead of the previous `/tmp` prefix. --- rocrate_validator/constants.py | 4 ++++ rocrate_validator/utils/paths.py | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/rocrate_validator/constants.py b/rocrate_validator/constants.py index 93aad322a..42934e260 100644 --- a/rocrate_validator/constants.py +++ b/rocrate_validator/constants.py @@ -89,3 +89,7 @@ # Http Cache Settings DEFAULT_HTTP_CACHE_MAX_AGE = 300 # in seconds DEFAULT_HTTP_CACHE_PATH_PREFIX = '/tmp/rocrate_validator_cache' +# Directory name used under the user's cache root for the persistent HTTP cache +USER_CACHE_DIR_NAME = "rocrate-validator" +# Filename (without extension) of the persistent HTTP cache under the user cache dir +USER_CACHE_FILE_NAME = "http_cache" diff --git a/rocrate_validator/utils/paths.py b/rocrate_validator/utils/paths.py index 9903f6c8d..44fddc907 100644 --- a/rocrate_validator/utils/paths.py +++ b/rocrate_validator/utils/paths.py @@ -72,6 +72,33 @@ def get_profiles_path() -> Path: return Path(CURRENT_DIR).parent / constants.DEFAULT_PROFILES_PATH +def get_user_cache_dir() -> Path: + """ + Get the user-level cache directory for rocrate-validator. + + Honors the XDG Base Directory Specification: + - Uses ``$XDG_CACHE_HOME/rocrate-validator`` when ``XDG_CACHE_HOME`` is set + - Falls back to ``~/.cache/rocrate-validator`` otherwise + + :return: The path to the cache directory (not guaranteed to exist) + """ + xdg = os.environ.get("XDG_CACHE_HOME") + base = Path(xdg) if xdg else Path.home() / ".cache" + return base / constants.USER_CACHE_DIR_NAME + + +def get_default_http_cache_path() -> Path: + """ + Get the default persistent HTTP cache path under the user cache directory. + + The returned path is the cache *name* expected by ``requests_cache`` + (i.e., without the ``.sqlite`` suffix added by the backend). + + :return: The default persistent HTTP cache name path + """ + return get_user_cache_dir() / constants.USER_CACHE_FILE_NAME + + def list_matching_file_paths( directory: str = '.', serialization_format: constants.RDF_SERIALIZATION_FORMATS_TYPES = "turtle") -> list[str]: From 57eceb517faea883d7b3342ed54dbd78f9d8b261 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:33:21 +0200 Subject: [PATCH 03/89] feat(core): :sparkles: support offline mode when downloading remote RO-Crates --- rocrate_validator/services.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 530667ea1..256dea787 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -151,8 +151,26 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): logger.debug("RO-Crate is a remote RO-Crate") # create a temp folder to store the downloaded RO-Crate with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - # download the remote RO-Crate - with HttpRequester().get(rocrate_path.uri, stream=True, allow_redirects=True) as r: + requester = HttpRequester() + offline = bool(getattr(settings, "offline", False)) + # In offline mode, the cache is the only source of truth. Otherwise, + # bypass the cache to refresh the stored copy so that subsequent + # offline runs validate against the latest known remote state. + if offline: + response = requester.get(rocrate_path.uri, stream=True, allow_redirects=True) + else: + response = requester.fetch_fresh(rocrate_path.uri, stream=True, allow_redirects=True) + with response as r: + if r.status_code >= 400: + if offline and r.status_code == 504: + raise FileNotFoundError( + f"Remote RO-Crate '{rocrate_path.uri}' is not available in the HTTP cache. " + f"Validate it online first, or run " + f"`rocrate-validator cache warm --crate '{rocrate_path.uri}'`." + ) + raise FileNotFoundError( + f"Failed to download remote RO-Crate '{rocrate_path.uri}' (status {r.status_code})." + ) with open(tmp_file.name, "wb") as f: shutil.copyfileobj(r.raw, f) logger.debug("RO-Crate downloaded to temporary file: %s", tmp_file.name) From 6974912afdde3fb02b318efe3b0660f28755bdff Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:38:37 +0200 Subject: [PATCH 04/89] test(utils): :white_check_mark: add unit tests for HttpRequester offline mode Cover the offline/no-cache flags, fetch_fresh, has_cached, clear_cache, cache_info, reset and the _OfflineFallbackSession 504 behavior, plus the standardized cache-outcome log messages. --- tests/unit/test_http_requester_offline.py | 257 ++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 tests/unit/test_http_requester_offline.py diff --git a/tests/unit/test_http_requester_offline.py b/tests/unit/test_http_requester_offline.py new file mode 100644 index 000000000..96b35b2fa --- /dev/null +++ b/tests/unit/test_http_requester_offline.py @@ -0,0 +1,257 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for the HttpRequester offline-mode extensions.""" + +from __future__ import annotations + +import io +from unittest.mock import MagicMock, patch + +import pytest +import urllib3 + +from rocrate_validator.utils import http as http_module +from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, + HttpRequester) + + +def _build_urllib3_response(body: bytes = b'{"ok": true}', + status: int = 200, + content_type: str = "application/json") -> urllib3.HTTPResponse: + return urllib3.HTTPResponse( + body=io.BytesIO(body), + headers={"Content-Type": content_type, "Content-Length": str(len(body))}, + status=status, + preload_content=False, + decode_content=False, + ) + + +@pytest.fixture +def mock_network(monkeypatch): + """Route every outbound HTTP call to a fake urllib3 response.""" + from requests.adapters import HTTPAdapter + + def fake_send(self, request, **kwargs): + raw = _build_urllib3_response() + response = self.build_response(request, raw) + return response + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +@pytest.fixture(autouse=True) +def _reset_singleton(): + HttpRequester.reset() + yield + HttpRequester.reset() + + +def _initialize(cache_path, offline=False, cache_max_age=-1): + HttpRequester.reset() + return HttpRequester.initialize_cache( + cache_path=str(cache_path), + cache_max_age=cache_max_age, + offline=offline, + ) + + +def test_initialize_offline_sets_only_if_cached(tmp_path): + requester = _initialize(tmp_path / "cache", offline=True) + assert requester.offline is True + assert getattr(requester.session.settings, "only_if_cached", False) is True + + +def test_offline_cache_miss_returns_504(tmp_path): + requester = _initialize(tmp_path / "cache", offline=True) + response = requester.get("https://example.org/missing") + assert response.status_code == OFFLINE_CACHE_MISS_STATUS + + +def test_online_unknown_url_is_not_cached(tmp_path): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + assert requester.has_cached("https://example.org/anything") is False + + +def test_has_cached_returns_true_after_successful_fetch(tmp_path, mock_network): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + url = "https://example.org/ctx" + assert requester.has_cached(url) is False + response = requester.get(url) + assert response.status_code == 200 + assert requester.has_cached(url) is True + + +def test_offline_serves_cached_response_populated_online(tmp_path, mock_network): + cache_path = tmp_path / "cache" + requester = _initialize(cache_path, offline=False, cache_max_age=60) + url = "https://example.org/ctx" + requester.get(url) + HttpRequester.reset() + # Re-open the cache in offline mode and confirm the hit. + requester = _initialize(cache_path, offline=True) + response = requester.get(url) + assert response.status_code == 200 + assert response.content == b'{"ok": true}' + + +def test_fetch_fresh_bypasses_cache_when_online(tmp_path): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + session_mock = MagicMock() + fresh_response = MagicMock() + fresh_response.status_code = 200 + fresh_response.from_cache = False + session_mock.get.return_value = fresh_response + requester.session = session_mock + result = requester.fetch_fresh("https://example.org/fresh", allow_redirects=True) + assert result is fresh_response + session_mock.get.assert_called_once() + kwargs = session_mock.get.call_args.kwargs + assert kwargs.get("force_refresh") is True + assert kwargs.get("allow_redirects") is True + + +def test_fetch_fresh_falls_back_when_force_refresh_unsupported(tmp_path): + """Older requests_cache versions lack force_refresh; fall back to refresh.""" + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + + class _LegacySession: + def __init__(self): + self.calls: list[dict] = [] + + def get(self, url, **kwargs): + self.calls.append(kwargs) + if "force_refresh" in kwargs: + raise TypeError("unexpected keyword argument 'force_refresh'") + fake = MagicMock() + fake.status_code = 200 + fake.from_cache = False + return fake + + legacy = _LegacySession() + requester.session = legacy + response = requester.fetch_fresh("https://example.org/fresh") + assert response.status_code == 200 + assert len(legacy.calls) == 2 + assert "refresh" in legacy.calls[1] + + +def test_fetch_fresh_in_offline_does_not_refresh(tmp_path): + requester = _initialize(tmp_path / "cache", offline=True) + session_mock = MagicMock() + cached_response = MagicMock() + cached_response.status_code = 200 + cached_response.from_cache = True + session_mock.get.return_value = cached_response + requester.session = session_mock + result = requester.fetch_fresh("https://example.org/x") + assert result is cached_response + assert "force_refresh" not in session_mock.get.call_args.kwargs + assert "refresh" not in session_mock.get.call_args.kwargs + + +def test_clear_cache_empties_backend(tmp_path, mock_network): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + requester.get("https://example.org/a") + requester.get("https://example.org/b") + assert requester.cache_info()["entries"] >= 2 + requester.clear_cache() + assert requester.cache_info()["entries"] == 0 + + +def test_cache_info_reports_metadata(tmp_path): + cache_path = tmp_path / "cache" + requester = _initialize(cache_path, offline=False, cache_max_age=60) + info = requester.cache_info() + assert info["backend"] == "SQLiteCache" + assert info["path"].endswith(".sqlite") + assert info["permanent"] is True + assert info["offline"] is False + assert info["entries"] == 0 + + +class _RecordCollector: + """Context manager that attaches a capturing handler to the http logger.""" + + def __init__(self): + self.records: list = [] + + def __enter__(self): + import logging as _logging + + from rocrate_validator.utils import http as http_module + self.records.clear() + self.handler = _logging.Handler() + self.handler.setLevel(_logging.WARNING) + self.handler.emit = lambda record: self.records.append(record) # type: ignore[assignment] + # Force initialization of the underlying logger via the proxy. + http_module.logger.warning # noqa: B018 + self._target = http_module.logger._instance + self._target.addHandler(self.handler) + self._previous_level = self._target.level + self._target.setLevel(_logging.WARNING) + return self + + def __exit__(self, exc_type, exc, tb): + self._target.removeHandler(self.handler) + self._target.setLevel(self._previous_level) + return False + + def messages(self) -> list[str]: + return [r.getMessage() for r in self.records] + + +def test_offline_prefix_logs_remote_then_cache(tmp_path, mock_network): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + with _RecordCollector() as collector: + requester.get("https://example.org/ctx") + requester.get("https://example.org/ctx") + messages = [m for m in collector.messages() if "CachedHttpRequester:" in m] + assert any("fetched from remote" in m for m in messages) + assert any("served from HTTP cache" in m for m in messages) + + +def test_offline_prefix_logs_cache_miss_in_offline_mode(tmp_path): + requester = _initialize(tmp_path / "cache", offline=True) + with _RecordCollector() as collector: + requester.get("https://example.org/unknown") + messages = [m for m in collector.messages() if "CachedHttpRequester:" in m] + assert any("not available in HTTP cache" in m for m in messages) + + +def test_offline_prefix_logs_fetch_fresh_as_refresh(tmp_path, mock_network): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + # Populate the cache first. + requester.get("https://example.org/x") + with _RecordCollector() as collector: + requester.fetch_fresh("https://example.org/x") + messages = [m for m in collector.messages() if "CachedHttpRequester:" in m] + assert any("cache refresh" in m for m in messages) + + +def test_offline_without_requests_cache_uses_fallback_session(tmp_path, monkeypatch): + """When requests_cache is unavailable, offline mode falls back to a 504 stub.""" + original_import = __import__ + + def fake_import(name, globals=None, locals=None, fromlist=(), level=0): + if name == "requests_cache" or (fromlist and "CachedSession" in fromlist and name.endswith("requests_cache")): + raise ImportError("simulated missing dependency") + return original_import(name, globals, locals, fromlist, level) + + with patch("builtins.__import__", side_effect=fake_import): + requester = _initialize(tmp_path / "cache", offline=True) + assert isinstance(requester.session, http_module._OfflineFallbackSession) + response = requester.get("https://example.org/whatever") + assert response.status_code == OFFLINE_CACHE_MISS_STATUS From 949ec6f2bcb0814f0f113c8c6c67e548d5f1cbc7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:42:25 +0200 Subject: [PATCH 05/89] feat(utils): :sparkles: add HTTP cache warm-up from profile artifacts Introduce `cache_warmup` helpers that discover external artifacts declared by profile descriptors via `prof:hasResource`/`prof:hasArtifact` and prefetch them so subsequent offline runs resolve every required resource from the local HTTP cache. Add the `ROCRATE_VALIDATOR_AUTO_WARM` environment variable to toggle automatic warm-up. --- rocrate_validator/constants.py | 2 + rocrate_validator/models.py | 7 + rocrate_validator/utils/cache_warmup.py | 228 ++++++++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 rocrate_validator/utils/cache_warmup.py diff --git a/rocrate_validator/constants.py b/rocrate_validator/constants.py index 42934e260..6984eeffe 100644 --- a/rocrate_validator/constants.py +++ b/rocrate_validator/constants.py @@ -93,3 +93,5 @@ USER_CACHE_DIR_NAME = "rocrate-validator" # Filename (without extension) of the persistent HTTP cache under the user cache dir USER_CACHE_FILE_NAME = "http_cache" +# Environment variable to disable automatic warm-up of the HTTP cache +AUTO_WARM_ENV_VAR = "ROCRATE_VALIDATOR_AUTO_WARM" diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 78607f2a2..b6d0b4d92 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -381,6 +381,13 @@ def name(self): """ return self.label or f"Profile {self.uri}" + @property + def profile_specification_graph(self) -> Graph: + """ + The RDF graph of the profile specification. + """ + return self._profile_specification_graph # type: ignore + @property def profile_node(self): return self._profile_node diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py new file mode 100644 index 000000000..e22f5d737 --- /dev/null +++ b/rocrate_validator/utils/cache_warmup.py @@ -0,0 +1,228 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Helpers to populate the HTTP cache with resources referenced by profile +descriptors. + +Profiles describe their external resources using the W3C Profiles Vocabulary +(``prof:hasResource`` / ``prof:hasArtifact``). The URLs declared there are the +ones the validator needs to resolve at runtime (JSON-LD contexts, ontologies, +schemas, ...). By discovering them dynamically we can warm the cache so that +subsequent offline runs find every required resource locally. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence + +from rocrate_validator import constants +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, + HttpRequester) + +if TYPE_CHECKING: + from rocrate_validator.models import Profile, ValidationSettings + +# set up logging +logger = logging.getLogger(__name__) + +# Guard to prevent multiple warm-up attempts in the same run. +# This is not a thread-safe mechanism. +__profiles_loaded = False + + +# SPARQL query returning every artifact URL declared in a profile descriptor. +# We intentionally do not filter by role: any resource the profile declares is +# considered a candidate for warm-up (Vocabulary, Constraints, Schema, ...). +_CACHEABLE_URLS_SPARQL = """ +PREFIX prof: +SELECT DISTINCT ?artifact +WHERE { + ?profile prof:hasResource ?resource . + ?resource prof:hasArtifact ?artifact . +} +""" + + +@dataclass +class WarmUpResult: + """Outcome of a warm-up operation.""" + url: str + status: str # "ok", "skipped", "failed" + detail: Optional[str] = None + + +def discover_profile_cacheable_urls(profile: "Profile") -> List[str]: + """ + Return the list of HTTP(S) URLs declared by ``profile`` as cacheable + artifacts. Returns an empty list when the profile has no declared + artifacts or cannot be parsed. + """ + graph = profile.profile_specification_graph + if graph is None: + logger.debug( + "Profile %s has no specification graph loaded", getattr(profile, "identifier", "?")) + return [] + urls: List[str] = [] + try: + for row in graph.query(_CACHEABLE_URLS_SPARQL): + artifact = row.artifact + if artifact is None: + continue + value = str(artifact) + if value.lower().startswith(("http://", "https://")) and value not in urls: + urls.append(value) + except Exception as e: + logger.debug("Failed to query cacheable URLs for profile %s: %s", + getattr(profile, "identifier", "?"), e) + return urls + + +def discover_cacheable_urls_from_profiles(profiles: Iterable["Profile"]) -> List[str]: + """ + Aggregate cacheable URLs from the given profiles, preserving order and + removing duplicates. + """ + seen: set[str] = set() + result: List[str] = [] + for profile in profiles: + for url in discover_profile_cacheable_urls(profile): + if url not in seen: + seen.add(url) + result.append(url) + return result + + +def warm_up_urls(urls: Sequence[str]) -> List[WarmUpResult]: + """ + Fetch each URL so that its response is stored in the HTTP cache. + + Already-cached URLs are skipped. Failures (including HTTP errors and + offline cache misses) are reported but do not raise. + """ + requester = HttpRequester() + results: List[WarmUpResult] = [] + offline = bool(getattr(requester, "offline", False)) + for url in urls: + try: + if requester.has_cached(url): + results.append(WarmUpResult(url=url, status="skipped", detail="already cached")) + continue + if offline: + response = requester.get(url) + else: + response = requester.fetch_fresh(url) + status_code = getattr(response, "status_code", None) + if status_code is None: + results.append(WarmUpResult(url=url, status="failed", detail="no status code")) + elif status_code == OFFLINE_CACHE_MISS_STATUS and offline: + results.append(WarmUpResult(url=url, status="failed", detail="offline cache miss")) + elif status_code >= 400: + results.append(WarmUpResult(url=url, status="failed", detail=f"HTTP {status_code}")) + else: + results.append(WarmUpResult(url=url, status="ok", detail=f"HTTP {status_code}")) + except Exception as e: + logger.debug("Warm-up failed for %s: %s", url, e) + results.append(WarmUpResult(url=url, status="failed", detail=str(e))) + return results + + +def auto_warm_up_for_settings(settings: "ValidationSettings") -> Optional[List[WarmUpResult]]: + """ + Perform a best-effort synchronous warm-up triggered by + ``ValidationSettings.__post_init__``. + + The warm-up is skipped when: + + - offline mode is enabled (nothing to fetch from the network); + - the cache path is not persistent (auto warm-up only makes sense when + the cache survives the run); + - the environment variable ``ROCRATE_VALIDATOR_AUTO_WARM`` is set to a + value disabling the feature (``0``, ``false``, ``no``, ``off``). + """ + if getattr(settings, "offline", False): + return None + if getattr(settings, "cache_path", None) is None: + return None + env_value = os.environ.get(constants.AUTO_WARM_ENV_VAR, "1").strip().lower() + if env_value in {"0", "false", "no", "off"}: + logger.debug("Auto warm-up disabled via %s=%s", constants.AUTO_WARM_ENV_VAR, env_value) + return None + + profile_identifier = getattr(settings, "profile_identifier", None) + if not profile_identifier: + return None + + profile = _find_profile(profile_identifier, settings) + if profile is None: + return None + urls = discover_profile_cacheable_urls(profile) + if not urls: + return None + requester = HttpRequester() + urls_to_fetch = [u for u in urls if not requester.has_cached(u)] + if not urls_to_fetch: + logger.debug("Auto warm-up: all %d resources already cached for profile %s", + len(urls), profile_identifier) + return [] + results = warm_up_urls(urls_to_fetch) + ok = sum(1 for r in results if r.status == "ok") + logger.info("Auto warm-up: pre-loaded %d/%d resources for profile %s", + ok, len(urls_to_fetch), profile_identifier) + return results + + +def _find_profile(identifier, settings) -> Optional["Profile"]: + """ + Look up a loaded profile by identifier. Accepts either a string or a list + (the settings sometimes store a list of identifiers). + """ + # Import here to avoid a circular import with models.py. + from rocrate_validator.models import Profile + from rocrate_validator.utils.paths import get_profiles_path + + # Load profiles to ensure the requested one is available and its graph is parsed. + global __profiles_loaded + if not __profiles_loaded: + profiles_path = getattr(settings, "profiles_path", None) or get_profiles_path() + extra_profiles_path = getattr(settings, "extra_profiles_path", None) + try: + Profile.load_profiles( + profiles_path=profiles_path, + publicID=None, + extra_profiles_path=extra_profiles_path, + ) + __profiles_loaded = True + except Exception as e: + logger.debug("Unable to preload profiles for auto warm-up: %s", e) + return None + + if isinstance(identifier, (list, tuple)): + if not identifier: + return None + identifier = identifier[0] + try: + return Profile.get_by_identifier(identifier) + except Exception: + # Fall back to scanning all loaded profiles. + try: + for profile in Profile.all(): + if getattr(profile, "identifier", None) == identifier: + return profile + except Exception: + return None + return None From eedabf11097246741f511fd8352f6b1038f25f6d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:47:50 +0200 Subject: [PATCH 06/89] feat(utils): :sparkles: add cache-aware JSON-LD document loader Introduce `install_document_loader()` that patches rdflib's `source_to_json` so remote `@context` resolution goes through HttpRequester, benefiting from the HTTP cache and honoring offline mode (raising OfflineCacheMissError on offline cache misses). The install is idempotent and reversible via `uninstall_document_loader()` for tests. --- rocrate_validator/utils/document_loader.py | 138 +++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 rocrate_validator/utils/document_loader.py diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py new file mode 100644 index 000000000..0242fac67 --- /dev/null +++ b/rocrate_validator/utils/document_loader.py @@ -0,0 +1,138 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +JSON-LD document loader that routes remote ``@context`` resolution through +``HttpRequester``. + +``rdflib``'s built-in JSON-LD parser fetches remote contexts via ``urllib``, +which bypasses the HTTP cache managed by this project. Installing the loader +ensures every remote context resolution benefits from the cache and honors +offline mode. +""" + +from __future__ import annotations + +import json +import threading +from typing import Any, Optional, Tuple + +from rdflib.plugins.shared.jsonld import context as jsonld_context +from rdflib.plugins.shared.jsonld import util as jsonld_util + +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, + HttpRequester, OfflineCacheMissError) + +logger = logging.getLogger(__name__) + +_install_lock = threading.Lock() +_installed = False +_original_source_to_json = jsonld_util.source_to_json + + +def _patched_source_to_json(source, fragment_id=None, extract_all_scripts=False): + # Only intercept remote URL strings; let the original handle everything else. + if isinstance(source, str) and source.lower().startswith(("http://", "https://")): + try: + return _fetch_json_ld(source), None + except OfflineCacheMissError: + raise + except Exception as e: + logger.debug("Custom JSON-LD loader failed for %s: %s; falling back", source, e) + return _original_source_to_json(source, fragment_id, extract_all_scripts) + + +def install_document_loader() -> bool: + """ + Install the custom JSON-LD document loader. Idempotent. + + Returns ``True`` when the loader is active after the call, ``False`` when + installation raised an unexpected error (which is logged). + """ + global _installed + + with _install_lock: + if _installed: + return True + + try: + jsonld_util.source_to_json = _patched_source_to_json + # The context module imports source_to_json at module import time, + # so it must be patched separately. + jsonld_context.source_to_json = _patched_source_to_json # type: ignore[attr-defined] + except Exception as e: + logger.error("Failed to install JSON-LD document loader: %s", e) + return False + + _installed = True + logger.debug("JSON-LD document loader installed") + return True + + +def uninstall_document_loader() -> bool: + """ + Restore the original JSON-LD document loader. Primarily intended for tests. + + Returns ``True`` when the loader is no longer active after the call, + ``False`` when uninstallation raised an unexpected error (which is logged). + """ + global _installed + with _install_lock: + if not _installed: + return True + + try: + jsonld_util.source_to_json = _original_source_to_json + jsonld_context.source_to_json = _original_source_to_json # type: ignore[attr-defined] + except Exception as e: + logger.error("Failed to uninstall JSON-LD document loader: %s", e) + return False + + _installed = False + return True + + +def _fetch_json_ld(url: str) -> Any: + """ + Fetch a JSON-LD document through ``HttpRequester``. + + Raises ``OfflineCacheMissError`` when running offline and the document + is not available in the cache. Raises ``RuntimeError`` for other + non-successful responses. + """ + requester = HttpRequester() + headers = {"Accept": "application/ld+json, application/json, */*;q=0.1"} + response = requester.get(url, headers=headers, allow_redirects=True) + status = getattr(response, "status_code", None) + if status == OFFLINE_CACHE_MISS_STATUS and getattr(requester, "offline", False): + raise OfflineCacheMissError(url) + if status is None or status >= 400: + raise RuntimeError(f"Unable to retrieve JSON-LD document from {url} (status {status})") + try: + return response.json() + except ValueError: + return json.loads(response.text) + + +def resolve_remote_document(url: str) -> Tuple[Optional[dict], Optional[str]]: + """ + Resolve a remote JSON-LD document, returning ``(json, content_type)``. + + Exposed primarily for tests and warm-up routines that need to reuse the + loader's semantics (offline handling, cache integration) without wiring + through rdflib. + """ + data = _fetch_json_ld(url) + return data, "application/ld+json" From 9ef1ff5a5e96548f0a01c4d11600751d3c56e169 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:49:31 +0200 Subject: [PATCH 07/89] feat(models): :sparkles: add offline and no-cache options to ValidationSettings Expose `offline` and `no_cache` flags on ValidationSettings and default `cache_path` to the persistent user HTTP cache so consecutive online/ offline runs share the same store. Validate that `offline` and `no_cache` are mutually exclusive. Install the JSON-LD document loader so rdflib's remote `@context` resolution goes through the cache. --- rocrate_validator/models.py | 60 ++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index b6d0b4d92..f6bba6d13 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -57,9 +57,14 @@ from rocrate_validator.events import Event, EventType, Publisher, Subscriber from rocrate_validator.rocrate import ROCrate from rocrate_validator.utils import log as logging +from rocrate_validator.utils.cache_warmup import auto_warm_up_for_settings from rocrate_validator.utils.collections import MapIndex, MultiIndexMap +from rocrate_validator.utils.document_loader import install_document_loader from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.utils.paths import get_profiles_path +from rocrate_validator.utils.paths import ( + get_default_http_cache_path, + get_profiles_path, +) from rocrate_validator.utils.python_helpers import ( get_requirement_name_from_file, ) @@ -267,7 +272,7 @@ def __init__( self._profile_node = None # init property to store the RDF graph of the profile specification - self._profile_specification_graph = None + self._profile_specification_graph: Optional[Graph] = None # check if the profile specification file exists spec_file = self.profile_specification_file_path @@ -2637,22 +2642,63 @@ class ValidationSettings: metadata_dict: dict = None #: Verbose output verbose: bool = False - #: Cache max age in seconds + #: Cache max age in seconds (negative values mean "never expire") cache_max_age: Optional[int] = DEFAULT_HTTP_CACHE_MAX_AGE #: Cache path cache_path: Optional[Path] = None + #: Flag to enable offline mode: HTTP requests are served only from the cache + offline: bool = False + #: Flag to disable the HTTP cache entirely: every request hits the network + no_cache: bool = False def __post_init__(self): # if requirement_severity is a str, convert to Severity if isinstance(self.requirement_severity, str): self.requirement_severity = Severity[self.requirement_severity] + # Offline mode needs the cache to serve responses, so it cannot be + # combined with an explicit cache disable. + if self.offline and self.no_cache: + raise ValueError( + "Offline mode requires the HTTP cache to be enabled; " + "no_cache=True is incompatible with offline=True." + ) + # Default to the persistent user cache whenever caching is enabled so that + # consecutive runs (online then offline) share the same HTTP cache: this + # is what lets the offline mode find the resources fetched online. + if self.cache_path is None and not self.no_cache: + default_path = get_default_http_cache_path() + default_path.parent.mkdir(parents=True, exist_ok=True) + self.cache_path = default_path + logger.debug("Cache path not set: defaulting to persistent user cache %s", self.cache_path) + if self.offline and self.cache_path is None: + logger.warning( + "Offline mode enabled without a persistent cache path: " + "all HTTP-backed resources will fail unless pre-populated." + ) + # Reset any previously initialized singleton so new settings take effect. + HttpRequester.reset() # initialize the HTTP cache - HttpRequester.initialize_cache(cache_path=self.cache_path, cache_max_age=self.cache_max_age) + HttpRequester.initialize_cache( + cache_path=str(self.cache_path) if self.cache_path is not None else None, + cache_max_age=self.cache_max_age, + offline=self.offline, + no_cache=self.no_cache, + ) logger.debug( - "HTTP cache initialized at %s with max age %s seconds", - self.cache_path, - self.cache_max_age, + "HTTP cache initialized at %s with max age %s seconds (offline=%s, no_cache=%s)", + self.cache_path, self.cache_max_age, self.offline, self.no_cache, ) + # Install the JSON-LD document loader so context resolution goes through the cache. + try: + install_document_loader() + except Exception as e: + logger.debug("Could not install JSON-LD document loader: %s", e) + # Best-effort synchronous warm-up of profile-declared URLs. + if not self.offline: + try: + auto_warm_up_for_settings(self) + except Exception as e: + logger.debug("Auto warm-up skipped: %s", e) def to_dict(self): """ From f8b99bcaf3fda0970934a89ac4d32380a7b1319a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:55:21 +0200 Subject: [PATCH 08/89] feat(cli): :sparkles: add `--offline` flag and refine `--no-cache` help for the `validate` command --- rocrate_validator/cli/commands/validate.py | 54 ++++++++++++++++++++-- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index e574e6674..0683780d1 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -223,10 +223,22 @@ def validate_uri(ctx, param, value): '-nc', '--no-cache', is_flag=True, - help="Disable the HTTP cache", + help=( + "Disable the HTTP cache entirely: every request goes to the network " + "and nothing is persisted. Incompatible with [bold]--offline[/bold]." + ), + default=False, + show_default=True, +) +@click.option( + '--offline', + is_flag=True, + help=( + "Offline mode: HTTP requests are served only from the cache. " + "Pre-populate the cache with [bold]rocrate-validator cache warm[/bold]." + ), default=False, show_default=True, - hidden=True ) @click.pass_context def validate(ctx, @@ -249,7 +261,8 @@ def validate(ctx, output_line_width: Optional[int] = None, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, cache_path: Optional[Path] = None, - no_cache: bool = False): + no_cache: bool = False, + offline: bool = False): """ [magenta]rocrate-validator:[/magenta] Validate a RO-Crate against a profile """ @@ -277,10 +290,36 @@ def validate(ctx, logger.debug("cache_max_age: %s", cache_max_age) logger.debug("cache_path: %s", os.path.abspath(cache_path) if cache_path else None) logger.debug("no_cache: %s", no_cache) + logger.debug("offline: %s", offline) + + # --no-cache and --offline are contradictory: offline mode requires a cache + # to serve requests from, while no-cache disables caching entirely. + if no_cache and offline: + raise click.UsageError( + "The --no-cache and --offline flags are mutually exclusive: " + "offline mode relies on the HTTP cache to serve resources." + ) if rocrate_uri: logger.debug("rocrate_path: %s", os.path.abspath(rocrate_uri)) + # Warn the user when a remote RO-Crate is about to be validated in offline mode: + # the cached copy (if any) will be used, and it may be out of sync with the remote. + if offline and isinstance(rocrate_uri, str) and rocrate_uri.split(":", 1)[0].lower() in ("http", "https", "ftp"): + console.print( + Padding( + Rule( + "[bold yellow]WARNING:[/bold yellow] " + "[bold]The target RO-Crate is remote and offline mode is enabled.[/bold]\n" + "The cached version of the RO-Crate will be used if available.\n" + "The cached copy may be out of sync with the version currently published remotely.", + align="center", + style="bold yellow", + ), + (1, 2, 0, 2), + ) + ) + # Parse the skip_checks option logger.debug("skip_checks: %s", skip_checks) # Parse the skip_checks option @@ -314,8 +353,13 @@ def validate(ctx, "abort_on_first": fail_fast, "skip_checks": skip_checks_list, "metadata_only": metadata_only, - "cache_max_age": cache_max_age if not no_cache else -1, - "cache_path": cache_path + "cache_max_age": cache_max_age, + "cache_path": cache_path, + "offline": offline, + "no_cache": no_cache, + # When offline is requested, remote crate fetching must use the cache + # instead of the "disable download" short-circuit. + "disable_remote_crate_download": False if offline else True, } # Print the application header From e296f10f006135e123e081ceef65fde61c12a2b2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:58:10 +0200 Subject: [PATCH 09/89] feat(cli): :sparkles: add `cache` subcommand to manage the HTTP cache Introduce `rocrate-validator cache` with `info`, `reset` and `warm` subcommands to inspect, clear and pre-populate the persistent HTTP cache used by offline validation. `warm` discovers cacheable URLs from profile descriptors and can also prefetch remote RO-Crates. --- rocrate_validator/cli/__init__.py | 4 +- rocrate_validator/cli/commands/cache.py | 341 ++++++++++++++++++++++++ 2 files changed, 343 insertions(+), 2 deletions(-) create mode 100644 rocrate_validator/cli/commands/cache.py diff --git a/rocrate_validator/cli/__init__.py b/rocrate_validator/cli/__init__.py index 22283ba1b..ec84cfba4 100644 --- a/rocrate_validator/cli/__init__.py +++ b/rocrate_validator/cli/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.cli.commands import profiles, validate +from rocrate_validator.cli.commands import cache, profiles, validate from rocrate_validator.cli.main import cli -__all__ = ["cli", "profiles", "validate"] +__all__ = ["cli", "cache", "profiles", "validate"] diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py new file mode 100644 index 000000000..94390cd31 --- /dev/null +++ b/rocrate_validator/cli/commands/cache.py @@ -0,0 +1,341 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +``rocrate-validator cache`` subcommand: inspect, warm and reset the HTTP cache +used by the validator. +""" + +from __future__ import annotations + +import shutil +import tempfile +from pathlib import Path +from typing import List, Optional + +from rich.table import Table + +from rocrate_validator.cli.commands.errors import handle_error +from rocrate_validator.cli.main import cli, click +from rocrate_validator.models import Profile +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.cache_warmup import ( + WarmUpResult, discover_cacheable_urls_from_profiles, warm_up_urls) +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.paths import (get_default_http_cache_path, + get_profiles_path) + +logger = logging.getLogger(__name__) + + +def _resolve_cache_path(cache_path: Optional[Path]) -> Path: + """Return the effective cache path, creating the parent directory.""" + if cache_path is None: + path = get_default_http_cache_path() + else: + path = Path(cache_path) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def _reset_requester(cache_path: Path, offline: bool = False) -> None: + """Re-initialize the HttpRequester singleton with the given cache path.""" + HttpRequester.reset() + HttpRequester.initialize_cache( + cache_path=str(cache_path), + cache_max_age=-1, + offline=offline, + ) + + +@cli.group("cache") +@click.pass_context +def cache(ctx): + """ + [magenta]rocrate-validator:[/magenta] Manage the HTTP cache + """ + + +@cache.command("info") +@click.option( + "--cache-path", + type=click.Path(), + default=None, + show_default=False, + help="Path to the HTTP cache directory (defaults to the user cache dir)", +) +@click.pass_context +def cache_info(ctx, cache_path: Optional[Path] = None): + """ + Display information about the HTTP cache. + """ + console = ctx.obj['console'] + try: + resolved = _resolve_cache_path(cache_path) + _reset_requester(resolved) + info = HttpRequester().cache_info() + table = Table(title="HTTP Cache", show_lines=False) + table.add_column("Property", style="bold") + table.add_column("Value") + table.add_row("Path", str(info.get("path") or resolved)) + table.add_row("Backend", str(info.get("backend") or "—")) + table.add_row("Persistent", "yes" if info.get("permanent") else "no") + table.add_row("Offline mode", "yes" if info.get("offline") else "no") + table.add_row("Entries", str(info.get("entries", 0))) + size = info.get("size_bytes", 0) or 0 + table.add_row("Size", _format_bytes(size)) + console.print(table) + except Exception as e: + handle_error(e, console) + + +@cache.command("reset") +@click.option( + "--cache-path", + type=click.Path(), + default=None, + show_default=False, + help="Path to the HTTP cache directory (defaults to the user cache dir)", +) +@click.option( + "-y", + "--yes", + is_flag=True, + default=False, + help="Do not prompt for confirmation before removing cache entries", +) +@click.pass_context +def cache_reset(ctx, cache_path: Optional[Path] = None, yes: bool = False): + """ + Remove every entry from the HTTP cache. + """ + console = ctx.obj['console'] + interactive = ctx.obj.get('interactive', False) + exit_code = 0 + try: + resolved = _resolve_cache_path(cache_path) + _reset_requester(resolved) + info = HttpRequester().cache_info() + entries = info.get("entries", 0) + size = _format_bytes(info.get("size_bytes", 0) or 0) + console.print( + f"[bold]HTTP cache:[/bold] {info.get('path') or resolved} " + f"([cyan]{entries}[/cyan] entries, {size})" + ) + if entries == 0: + console.print("[green]Cache is already empty.[/green]") + return + if not yes: + if not interactive: + console.print( + "[yellow]Use --yes to remove entries in non-interactive mode.[/yellow]" + ) + exit_code = 1 + else: + confirm = click.confirm( + f"Remove all {entries} cached entries?", default=False + ) + if not confirm: + console.print("Aborted.") + else: + HttpRequester().clear_cache() + console.print("[green]HTTP cache cleared.[/green]") + else: + HttpRequester().clear_cache() + console.print("[green]HTTP cache cleared.[/green]") + except Exception as e: + handle_error(e, console) + return + if exit_code: + ctx.exit(exit_code) + + +@cache.command("warm") +@click.option( + "--cache-path", + type=click.Path(), + default=None, + show_default=False, + help="Path to the HTTP cache directory (defaults to the user cache dir)", +) +@click.option( + "--profiles-path", + type=click.Path(exists=True), + default=None, + show_default=False, + help="Path containing the profile definitions", +) +@click.option( + "--extra-profiles-path", + type=click.Path(exists=True), + default=None, + show_default=False, + help="Path containing additional user profile definitions", +) +@click.option( + "-p", + "--profile-identifier", + multiple=True, + type=click.STRING, + default=None, + show_default=False, + metavar="Profile-ID", + help="Identifier of a profile to warm (may be given multiple times)", +) +@click.option( + "--all-profiles", + is_flag=True, + default=False, + help="Warm cacheable URLs declared by every installed profile", +) +@click.option( + "--crate", + multiple=True, + type=click.STRING, + default=None, + metavar="URI", + help="URL of a remote RO-Crate to download and cache (may be given multiple times)", +) +@click.pass_context +def cache_warm( + ctx, + cache_path: Optional[Path] = None, + profiles_path: Optional[Path] = None, + extra_profiles_path: Optional[Path] = None, + profile_identifier: Optional[List[str]] = None, + all_profiles: bool = False, + crate: Optional[List[str]] = None, +): + """ + Pre-populate the HTTP cache with resources declared by profiles and with + optional remote RO-Crate URLs. + """ + console = ctx.obj['console'] + exit_with_failure = False + try: + resolved_cache = _resolve_cache_path(cache_path) + _reset_requester(resolved_cache, offline=False) + profiles_dir = Path(profiles_path) if profiles_path else get_profiles_path() + extra_dir = Path(extra_profiles_path) if extra_profiles_path else None + + requested_ids = list(profile_identifier or []) + urls: List[str] = [] + profile_scope: Optional[str] = None + + if all_profiles or requested_ids or not crate: + Profile.load_profiles( + profiles_path=profiles_dir, + extra_profiles_path=extra_dir, + ) + loaded_profiles = list(Profile.all()) + if requested_ids: + selected = [] + missing = [] + for ident in requested_ids: + profile = Profile.get_by_identifier(ident) + if profile is None: + missing.append(ident) + else: + selected.append(profile) + if missing: + console.print( + f"[yellow]Profile(s) not found and skipped:[/yellow] {', '.join(missing)}" + ) + profile_scope = f"profiles: {', '.join(p.identifier for p in selected)}" + urls = discover_cacheable_urls_from_profiles(selected) + else: + profile_scope = "all installed profiles" + urls = discover_cacheable_urls_from_profiles(loaded_profiles) + + results: List[WarmUpResult] = [] + if urls: + console.print( + f"[bold]Warming cache for {profile_scope}[/bold] " + f"([cyan]{len(urls)}[/cyan] URL(s))..." + ) + results.extend(warm_up_urls(urls)) + + if crate: + console.print( + f"[bold]Fetching remote RO-Crates[/bold] ([cyan]{len(crate)}[/cyan] URL(s))..." + ) + results.extend(_warm_remote_crates(list(crate))) + + if not results: + console.print("[yellow]Nothing to warm up.[/yellow]") + return + + table = Table(title="Warm-up results", show_lines=False) + table.add_column("URL", overflow="fold") + table.add_column("Status") + table.add_column("Detail") + ok = 0 + failed = 0 + for r in results: + colour = {"ok": "green", "skipped": "cyan", "failed": "red"}.get(r.status, "white") + table.add_row(r.url, f"[{colour}]{r.status}[/{colour}]", r.detail or "") + if r.status == "ok": + ok += 1 + elif r.status == "failed": + failed += 1 + console.print(table) + console.print( + f"[bold]Summary:[/bold] {ok} cached, {failed} failed, " + f"{len(results) - ok - failed} skipped" + ) + exit_with_failure = failed > 0 + except Exception as e: + handle_error(e, console) + return + if exit_with_failure: + ctx.exit(1) + + +def _warm_remote_crates(urls: List[str]) -> List[WarmUpResult]: + """ + Download each remote RO-Crate URL via ``HttpRequester.fetch_fresh`` + so that its response is stored in the cache. + """ + requester = HttpRequester() + results: List[WarmUpResult] = [] + for url in urls: + try: + response = requester.fetch_fresh(url, stream=True, allow_redirects=True) + status = getattr(response, "status_code", None) + if status is None: + results.append(WarmUpResult(url=url, status="failed", detail="no status code")) + continue + if status >= 400: + results.append(WarmUpResult(url=url, status="failed", detail=f"HTTP {status}")) + continue + # Consume the response body so that the cache backend stores it. + with tempfile.TemporaryFile() as tmp: + shutil.copyfileobj(response.raw, tmp) + results.append(WarmUpResult(url=url, status="ok", detail=f"HTTP {status}")) + except Exception as e: + logger.debug("Remote crate warm-up failed for %s: %s", url, e) + results.append(WarmUpResult(url=url, status="failed", detail=str(e))) + return results + + +def _format_bytes(size: int) -> str: + if size <= 0: + return "0 B" + units = ["B", "KiB", "MiB", "GiB", "TiB"] + idx = 0 + value = float(size) + while value >= 1024 and idx < len(units) - 1: + value /= 1024 + idx += 1 + return f"{value:.2f} {units[idx]}" From 9b3eba660011fbd2b7321c9ca429c7380c01ddd5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 11:59:22 +0200 Subject: [PATCH 10/89] test(utils): :white_check_mark: add unit tests for profile URL discovery and cache warm-up --- tests/unit/test_cache_warmup.py | 175 ++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 tests/unit/test_cache_warmup.py diff --git a/tests/unit/test_cache_warmup.py b/tests/unit/test_cache_warmup.py new file mode 100644 index 000000000..7d53e5d52 --- /dev/null +++ b/tests/unit/test_cache_warmup.py @@ -0,0 +1,175 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for profile URL discovery and cache warm-up.""" + +from __future__ import annotations + +import io +from pathlib import Path + +import pytest +import urllib3 + +from rocrate_validator.models import Profile +from rocrate_validator.utils.cache_warmup import ( + auto_warm_up_for_settings, discover_cacheable_urls_from_profiles, + discover_profile_cacheable_urls, warm_up_urls) +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.paths import get_profiles_path + + +PROFILE_TTL_TEMPLATE = """ +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + rdfs:label "Sample profile" ; + prof:hasResource [ + a prof:ResourceDescriptor ; + prof:hasRole role:Vocabulary ; + prof:hasArtifact ; + ] ; + prof:hasResource [ + a prof:ResourceDescriptor ; + prof:hasRole role:Specification ; + prof:hasArtifact ; + ] ; + prof:hasResource [ + a prof:ResourceDescriptor ; + prof:hasArtifact "not-a-url" ; + ] ; + prof:hasToken "sample" ; +. +""" + + +@pytest.fixture(autouse=True) +def _reset_requester(): + HttpRequester.reset() + yield + HttpRequester.reset() + + +@pytest.fixture +def sample_profile(tmp_path): + profile_dir = tmp_path / "sample" + profile_dir.mkdir() + (profile_dir / "profile.ttl").write_text(PROFILE_TTL_TEMPLATE) + return Profile( + profiles_base_path=tmp_path, + profile_path=profile_dir, + ) + + +@pytest.fixture +def mock_network(monkeypatch): + from requests.adapters import HTTPAdapter + + def fake_send(self, request, **kwargs): + raw = urllib3.HTTPResponse( + body=io.BytesIO(b'{"ok": true}'), + headers={"Content-Type": "application/json", "Content-Length": "12"}, + status=200, + preload_content=False, + decode_content=False, + ) + return self.build_response(request, raw) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +def test_discover_urls_returns_all_declared_artifacts(sample_profile): + urls = discover_profile_cacheable_urls(sample_profile) + # Both declared roles are included; the non-URL artifact is dropped. + assert "https://example.org/ctx/v1" in urls + assert "https://example.org/spec/v1/index.html" in urls + assert all(u.lower().startswith("http") for u in urls) + assert len(urls) == 2 + + +def test_discover_urls_on_multiple_profiles_deduplicates(sample_profile, tmp_path): + other_dir = tmp_path / "sample_other" + other_dir.mkdir() + (other_dir / "profile.ttl").write_text( + PROFILE_TTL_TEMPLATE + .replace("", + "") + .replace('prof:hasToken "sample"', 'prof:hasToken "other"') + ) + other_profile = Profile(profiles_base_path=tmp_path, profile_path=other_dir) + aggregated = discover_cacheable_urls_from_profiles([sample_profile, other_profile]) + # Both profiles share the same two artifacts; the result should be deduped. + assert len(aggregated) == 2 + + +def test_warm_up_urls_skips_already_cached(tmp_path, mock_network): + HttpRequester.initialize_cache( + cache_path=str(tmp_path / "cache"), + cache_max_age=60, + ) + urls = ["https://example.org/a", "https://example.org/b"] + first = warm_up_urls(urls) + assert [r.status for r in first] == ["ok", "ok"] + second = warm_up_urls(urls) + assert [r.status for r in second] == ["skipped", "skipped"] + + +def test_warm_up_reports_offline_cache_miss(tmp_path): + HttpRequester.initialize_cache( + cache_path=str(tmp_path / "cache"), + cache_max_age=-1, + offline=True, + ) + results = warm_up_urls(["https://example.org/missing"]) + assert results[0].status == "failed" + assert "offline" in (results[0].detail or "").lower() + + +def test_auto_warm_up_noop_when_offline(tmp_path): + class _Settings: + offline = True + cache_path = tmp_path / "cache" + profile_identifier = "ro-crate-1.1" + profiles_path = get_profiles_path() + extra_profiles_path = None + + assert auto_warm_up_for_settings(_Settings()) is None + + +def test_auto_warm_up_disabled_via_env(monkeypatch, tmp_path): + monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "0") + + class _Settings: + offline = False + cache_path = tmp_path / "cache" + profile_identifier = "ro-crate-1.1" + profiles_path = get_profiles_path() + extra_profiles_path = None + + assert auto_warm_up_for_settings(_Settings()) is None + + +def test_auto_warm_up_noop_when_no_cache_path(): + class _Settings: + offline = False + cache_path = None + profile_identifier = "ro-crate-1.1" + profiles_path = get_profiles_path() + extra_profiles_path = None + + assert auto_warm_up_for_settings(_Settings()) is None From 72e29094fe348587263437119ea8efc66ec78067 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 12:00:14 +0200 Subject: [PATCH 11/89] test(utils): :white_check_mark: add unit tests for the JSON-LD document loader --- tests/unit/test_document_loader.py | 156 +++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 tests/unit/test_document_loader.py diff --git a/tests/unit/test_document_loader.py b/tests/unit/test_document_loader.py new file mode 100644 index 000000000..85f03db44 --- /dev/null +++ b/tests/unit/test_document_loader.py @@ -0,0 +1,156 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for the JSON-LD document loader.""" + +from __future__ import annotations + +import io + +import pytest +import urllib3 + +from rocrate_validator.utils import document_loader +from rocrate_validator.utils.document_loader import (install_document_loader, + resolve_remote_document, + uninstall_document_loader) +from rocrate_validator.utils.http import HttpRequester, OfflineCacheMissError + + +def _urllib3_response(payload: bytes = b'{"@context": {"name": "https://schema.org/name"}}', + status: int = 200) -> urllib3.HTTPResponse: + return urllib3.HTTPResponse( + body=io.BytesIO(payload), + headers={ + "Content-Type": "application/ld+json", + "Content-Length": str(len(payload)), + }, + status=status, + preload_content=False, + decode_content=False, + ) + + +@pytest.fixture +def mock_network(monkeypatch): + from requests.adapters import HTTPAdapter + + def fake_send(self, request, **kwargs): + raw = _urllib3_response() + return self.build_response(request, raw) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +@pytest.fixture(autouse=True) +def _cleanup(): + uninstall_document_loader() + HttpRequester.reset() + yield + uninstall_document_loader() + HttpRequester.reset() + + +def test_install_is_idempotent(tmp_path): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=-1) + assert install_document_loader() is True + assert install_document_loader() is True + assert document_loader._installed is True + + +def test_install_returns_false_on_error(tmp_path, monkeypatch): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=-1) + from rdflib.plugins.shared.jsonld import util as jsonld_util + + class _FrozenModule: + def __setattr__(self, _name, _value): + raise RuntimeError("boom") + + monkeypatch.setattr(document_loader, "jsonld_util", _FrozenModule()) + assert install_document_loader() is False + assert document_loader._installed is False + # Original module must remain untouched on failure. + assert jsonld_util.source_to_json is document_loader._original_source_to_json + + +def test_uninstall_returns_true_when_not_installed(): + assert uninstall_document_loader() is True + + +def test_uninstall_returns_false_on_error(tmp_path, monkeypatch): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=-1) + assert install_document_loader() is True + + class _FrozenModule: + def __setattr__(self, _name, _value): + raise RuntimeError("boom") + + monkeypatch.setattr(document_loader, "jsonld_util", _FrozenModule()) + assert uninstall_document_loader() is False + assert document_loader._installed is True + + +def test_resolve_remote_document_uses_http_requester(tmp_path, mock_network): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + payload, content_type = resolve_remote_document("https://example.org/context") + assert payload == {"@context": {"name": "https://schema.org/name"}} + assert content_type == "application/ld+json" + assert HttpRequester().has_cached("https://example.org/context") is True + + +def test_resolve_raises_offline_cache_miss(tmp_path): + HttpRequester.initialize_cache( + cache_path=str(tmp_path / "cache"), + cache_max_age=-1, + offline=True, + ) + with pytest.raises(OfflineCacheMissError): + resolve_remote_document("https://example.org/never-cached") + + +def test_patched_source_to_json_routes_http_urls(tmp_path, mock_network): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + install_document_loader() + from rdflib.plugins.shared.jsonld import util as jsonld_util + doc, _ = jsonld_util.source_to_json("https://example.org/context") + assert doc == {"@context": {"name": "https://schema.org/name"}} + + +def test_patched_source_to_json_ignores_non_http(tmp_path): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + install_document_loader() + from rdflib.plugins.shared.jsonld import util as jsonld_util + file_path = tmp_path / "context.jsonld" + file_path.write_text('{"@context": {"foo": "https://example.org/foo"}}') + doc, _ = jsonld_util.source_to_json(str(file_path)) + assert doc == {"@context": {"foo": "https://example.org/foo"}} + + +def test_resolve_maps_http_error_to_runtime(tmp_path, monkeypatch): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + + class _StubResponse: + status_code = 500 + text = "" + + def json(self): + raise ValueError + + monkeypatch.setattr( + HttpRequester(), + "get", + lambda *_, **__: _StubResponse(), + ) + with pytest.raises(RuntimeError): + resolve_remote_document("https://example.org/broken") From ed6e58e9d1a236156065528124eaa9005642a9b9 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 12:00:55 +0200 Subject: [PATCH 12/89] test(integration): :white_check_mark: add integration tests for offline mode, auto warm-up and cache CLI --- tests/integration/test_offline_mode.py | 407 +++++++++++++++++++++++++ 1 file changed, 407 insertions(+) create mode 100644 tests/integration/test_offline_mode.py diff --git a/tests/integration/test_offline_mode.py b/tests/integration/test_offline_mode.py new file mode 100644 index 000000000..6c48bf588 --- /dev/null +++ b/tests/integration/test_offline_mode.py @@ -0,0 +1,407 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Integration tests for offline mode, auto warm-up and the cache CLI.""" + +from __future__ import annotations + +import io + +import pytest +import urllib3 +from click.testing import CliRunner + +from rocrate_validator.cli.main import cli +from rocrate_validator.models import ValidationSettings +from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, + HttpRequester) +from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER +from tests.ro_crates import ValidROC + + +def _urllib3_response(payload: bytes = b'{"@context": {}}', + status: int = 200, + content_type: str = "application/ld+json") -> urllib3.HTTPResponse: + return urllib3.HTTPResponse( + body=io.BytesIO(payload), + headers={ + "Content-Type": content_type, + "Content-Length": str(len(payload)), + }, + status=status, + preload_content=False, + decode_content=False, + ) + + +@pytest.fixture +def network_interceptor(monkeypatch): + """ + Intercept every outbound HTTP call and record the requested URLs so tests + can assert whether the cache was actually consulted. + """ + from requests.adapters import HTTPAdapter + + recorder = {"calls": []} + + def fake_send(self, request, **kwargs): + recorder["calls"].append(request.url) + return self.build_response(request, _urllib3_response()) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + return recorder + + +@pytest.fixture(autouse=True) +def _clean_singleton(monkeypatch): + monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "0") + HttpRequester.reset() + yield + HttpRequester.reset() + + +@pytest.fixture +def cli_runner() -> CliRunner: + return CliRunner() + + +def test_offline_flag_configures_cache(tmp_path): + settings = ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=tmp_path / "cache", + ) + info = HttpRequester().cache_info() + assert info["offline"] is True + assert info["permanent"] is True + assert settings.offline is True + + +def test_offline_default_path_is_persistent(tmp_path, monkeypatch): + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg")) + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=None, + ) + info = HttpRequester().cache_info() + assert info["offline"] is True + assert info["permanent"] is True + assert str(tmp_path / "xdg") in str(info["path"]) + + +def test_offline_cache_miss_yields_504_response(tmp_path): + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=tmp_path / "cache", + ) + response = HttpRequester().get("https://example.org/never") + assert response.status_code == OFFLINE_CACHE_MISS_STATUS + + +def test_online_then_offline_share_default_cache(tmp_path, network_interceptor, monkeypatch): + """Reproduce the common user workflow: validate online without passing a + cache path, then validate offline without passing a cache path. Both runs + must share the same persistent XDG cache so the offline run finds every + resource fetched online. + """ + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg")) + url = "https://example.org/ctx" + + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + cache_max_age=60, + ) + online_info = HttpRequester().cache_info() + assert online_info["permanent"] is True + assert str(tmp_path / "xdg") in str(online_info["path"]) + HttpRequester().get(url) + assert HttpRequester().has_cached(url) is True + + HttpRequester.reset() + + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + ) + offline_info = HttpRequester().cache_info() + assert offline_info["path"] == online_info["path"] + assert HttpRequester().has_cached(url) is True + response = HttpRequester().get(url) + assert response.status_code == 200 + + +def test_offline_reuses_cached_response(tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + # First: online run populates the cache. + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + cache_path=cache_path, + cache_max_age=60, + ) + url = "https://example.org/context" + response = HttpRequester().get(url) + assert response.status_code == 200 + assert HttpRequester().has_cached(url) is True + pre_calls = len(network_interceptor["calls"]) + + # Second: offline run must not hit the network but still get the cached doc. + HttpRequester.reset() + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=cache_path, + ) + response = HttpRequester().get(url) + assert response.status_code == 200 + assert response.content == b'{"@context": {}}' + # No new network traffic in offline mode. + assert len(network_interceptor["calls"]) == pre_calls + + +def test_no_cache_disables_cache_backend(tmp_path, network_interceptor): + """no_cache=True must skip the cache and hit the network every call.""" + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + no_cache=True, + ) + requester = HttpRequester() + info = requester.cache_info() + assert info["backend"] is None + assert requester.has_cached("https://example.org/any") is False + # Two identical requests must both hit the network. + requester.get("https://example.org/any") + requester.get("https://example.org/any") + assert network_interceptor["calls"].count("https://example.org/any") == 2 + + +def test_negative_cache_max_age_means_never_expire(tmp_path, network_interceptor): + """cache_max_age<0 must enable the cache with no expiration.""" + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + cache_max_age=-1, + cache_path=tmp_path / "cache", + ) + requester = HttpRequester() + info = requester.cache_info() + assert info["backend"] is not None + url = "https://example.org/any" + requester.get(url) + # Second call must be served from the cache. + requester.get(url) + assert network_interceptor["calls"].count(url) == 1 + + +def test_offline_with_disabled_cache_raises(): + with pytest.raises(ValueError, match="Offline mode requires the HTTP cache"): + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + no_cache=True, + ) + + +def test_cli_no_cache_and_offline_rejected(cli_runner): + result = cli_runner.invoke( + cli, + [ + "-y", + "validate", + str(ValidROC().wrroc_paper_long_date), + "--no-paging", + "--no-cache", + "--offline", + ], + ) + assert result.exit_code != 0, result.output + assert "mutually exclusive" in result.output.lower() + + +def test_cli_no_cache_disables_cache_backend(cli_runner, tmp_path, network_interceptor): + """The --no-cache flag must skip the cache and hit the network on every call.""" + result = cli_runner.invoke( + cli, + [ + "-y", + "validate", + str(ValidROC().wrroc_paper_long_date), + "--no-paging", + "--no-cache", + "--skip-checks", SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) + # The validation itself may pass or fail depending on upstream checks; we + # only require that no cache file was written (ephemeral session). + assert "Traceback" not in result.output, result.output + info = HttpRequester().cache_info() + assert info["backend"] is None + + +def test_cli_cache_info(cli_runner, tmp_path): + result = cli_runner.invoke( + cli, + ["cache", "info", "--cache-path", str(tmp_path / "cache")], + ) + assert result.exit_code == 0, result.output + assert "HTTP Cache" in result.output or "Entries" in result.output + + +def test_cli_cache_reset_noninteractive_requires_yes(cli_runner, tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + # Populate the cache so the reset has something to do. + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=60) + HttpRequester().get("https://example.org/ctx") + assert HttpRequester().cache_info()["entries"] >= 1 + HttpRequester.reset() + + # Without --yes in non-interactive mode, reset must abort. + result = cli_runner.invoke( + cli, + ["-y", "cache", "reset", "--cache-path", str(cache_path)], + ) + assert result.exit_code == 1, result.output + # Cache should still contain the entry. + HttpRequester.reset() + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=3600) + assert HttpRequester().cache_info()["entries"] >= 1 + + +def test_cli_cache_reset_yes_clears_entries(cli_runner, tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=60) + HttpRequester().get("https://example.org/ctx") + HttpRequester().get("https://example.org/other") + assert HttpRequester().cache_info()["entries"] >= 2 + HttpRequester.reset() + + result = cli_runner.invoke( + cli, + ["-y", "cache", "reset", "--cache-path", str(cache_path), "--yes"], + ) + assert result.exit_code == 0, result.output + + HttpRequester.reset() + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=-1) + assert HttpRequester().cache_info()["entries"] == 0 + + +def test_cli_cache_warm_populates_profile_urls(cli_runner, tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + result = cli_runner.invoke( + cli, + [ + "-y", + "cache", "warm", + "--cache-path", str(cache_path), + "--profile-identifier", "ro-crate-1.1", + ], + ) + assert result.exit_code == 0, result.output + assert any("w3id.org" in c for c in network_interceptor["calls"]), \ + f"No expected URL fetched. Calls: {network_interceptor['calls']}" + # The URL must now be cached for offline use. + HttpRequester.reset() + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=3600, offline=True) + assert HttpRequester().has_cached("https://w3id.org/ro/crate/1.1/context") is True + + +def test_cli_cache_warm_crate_caches_remote_archive(cli_runner, tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + crate_url = "https://example.org/my-crate.zip" + result = cli_runner.invoke( + cli, + [ + "-y", + "cache", "warm", + "--cache-path", str(cache_path), + "--crate", crate_url, + ], + ) + assert result.exit_code == 0, result.output + HttpRequester.reset() + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=3600, offline=True) + assert HttpRequester().has_cached(crate_url) is True + + +def test_cli_validate_offline_warns_when_remote(cli_runner, tmp_path, network_interceptor): + """In offline mode with a remote URI the validator must emit a warning.""" + # Pre-populate the cache so the remote crate resolves in offline mode. + cache_path = tmp_path / "cache" + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=60) + HttpRequester().get("https://example.org/fake-crate.zip") + HttpRequester.reset() + + # We intentionally do not actually run the full validation here; the CLI + # will fail because the cached body is not a valid ZIP, but the warning is + # emitted before that point. + result = cli_runner.invoke( + cli, + [ + "-y", + "validate", + "https://example.org/fake-crate.zip", + "--no-paging", + "--offline", + "--cache-path", str(cache_path), + ], + ) + assert "offline mode is enabled" in result.output.lower() \ + or "cached version" in result.output.lower(), result.output + + +def test_cli_validate_offline_on_local_crate_succeeds(cli_runner, tmp_path): + """Validating a local crate in offline mode must work without network access.""" + cache_path = tmp_path / "cache" + result = cli_runner.invoke( + cli, + [ + "-y", + "validate", + str(ValidROC().wrroc_paper_long_date), + "--no-paging", + "--offline", + "--cache-path", str(cache_path), + "--skip-checks", SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) + # The validation may report issues for locally missing contexts; what we + # require is that no uncaught network-related exception aborts the run. + assert result.exit_code in (0, 1), result.output + assert "Traceback" not in result.output + + +def test_auto_warm_up_skipped_when_offline(tmp_path, network_interceptor, monkeypatch): + """Auto warm-up must not run when offline mode is active.""" + monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "1") + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=tmp_path / "cache", + ) + assert network_interceptor["calls"] == [] + + +def test_auto_warm_up_disabled_via_env(tmp_path, network_interceptor, monkeypatch): + monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "0") + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + cache_path=tmp_path / "cache", + ) + assert network_interceptor["calls"] == [] From 463b3161d3ea9b8e74cee46468d025b79fccbe97 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 12:04:52 +0200 Subject: [PATCH 13/89] =?UTF-8?q?fix(utils):=20:loud=5Fsound:=20fix=20log?= =?UTF-8?q?=20level=C2=A0message?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rocrate_validator/utils/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 3f0f0b343..6a548ce2b 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -57,7 +57,7 @@ def _log_cache_outcome(method: str, url: str, response, *, offline: bool, forced outcome = "fetched from remote (no cache backend)" # Emitted at WARNING for now, pending a downgrade to DEBUG once the feature stabilizes. - logger.warning("CachedHttpRequester: %s %s %s", method, url, outcome) + logger.debug("CachedHttpRequester: %s %s %s", method, url, outcome) class OfflineCacheMissError(RuntimeError): From 7a56e54fba91c7315622859e7ace131b1eea8829 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 12:34:38 +0200 Subject: [PATCH 14/89] style(tests): :rotating_light: fix linter warning F401 --- tests/unit/test_cache_warmup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit/test_cache_warmup.py b/tests/unit/test_cache_warmup.py index 7d53e5d52..3b591b962 100644 --- a/tests/unit/test_cache_warmup.py +++ b/tests/unit/test_cache_warmup.py @@ -17,7 +17,6 @@ from __future__ import annotations import io -from pathlib import Path import pytest import urllib3 @@ -29,7 +28,6 @@ from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.paths import get_profiles_path - PROFILE_TTL_TEMPLATE = """ @prefix dct: . @prefix prof: . From 0ee0abb2a771953af02df3ad7951359e33ebd2b0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 12:39:02 +0200 Subject: [PATCH 15/89] test(conftest): :wrench: update test configuration Redirect XDG_CACHE_HOME to a session-scoped tmp dir so tests never touch the developer's real ~/.cache, and default ROCRATE_VALIDATOR_AUTO_WARM=0 per test to prevent unintended network calls. Tests that need warm-up opt in explicitly. --- tests/conftest.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 704123b12..778b62efa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,7 @@ # and add it to the system path import os +import pytest from pytest import fixture from rocrate_validator.utils import log as logging @@ -48,6 +49,41 @@ SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER = check_local_data_entity_existence.identifier +@pytest.fixture(scope="session", autouse=True) +def _session_isolated_xdg(tmp_path_factory): + """ + Redirect the XDG user cache to a per-session temporary directory so that + tests do not write to, or read from, the developer's real ~/.cache. The + directory is shared across tests in the same session so that HTTP responses + fetched by one test remain available to subsequent ones (mirroring the + behavior users see in practice and preserving the HTTP-cache hit pattern + the existing test suite relies on). + """ + xdg_dir = tmp_path_factory.mktemp("rocrate_validator_xdg") + previous_xdg = os.environ.get("XDG_CACHE_HOME") + os.environ["XDG_CACHE_HOME"] = str(xdg_dir) + try: + yield xdg_dir + finally: + if previous_xdg is None: + os.environ.pop("XDG_CACHE_HOME", None) + else: + os.environ["XDG_CACHE_HOME"] = previous_xdg + + +@pytest.fixture(autouse=True) +def _per_test_auto_warm(monkeypatch): + """ + Disable the synchronous HTTP cache auto warm-up by default so tests do not + hit the network unexpectedly. Tests that exercise the warm-up opt in by + setting the environment variable before instantiating ValidationSettings. + """ + monkeypatch.setenv( + "ROCRATE_VALIDATOR_AUTO_WARM", + os.environ.get("ROCRATE_VALIDATOR_AUTO_WARM", "0"), + ) + + @fixture def skip_data_entity_existence_check_identifier(): return SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER From 4c5497c606e6c5f821d60e727935c15e87d5efbd Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 23 Apr 2026 14:08:33 +0200 Subject: [PATCH 16/89] chore: :loud_sound: adjust cache outcome log level --- rocrate_validator/utils/http.py | 1 - tests/unit/test_http_requester_offline.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 6a548ce2b..73a02bdd8 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -56,7 +56,6 @@ def _log_cache_outcome(method: str, url: str, response, *, offline: bool, forced # No from_cache attribute: plain requests.Session or offline fallback stub. outcome = "fetched from remote (no cache backend)" - # Emitted at WARNING for now, pending a downgrade to DEBUG once the feature stabilizes. logger.debug("CachedHttpRequester: %s %s %s", method, url, outcome) diff --git a/tests/unit/test_http_requester_offline.py b/tests/unit/test_http_requester_offline.py index 96b35b2fa..de1491328 100644 --- a/tests/unit/test_http_requester_offline.py +++ b/tests/unit/test_http_requester_offline.py @@ -194,14 +194,14 @@ def __enter__(self): from rocrate_validator.utils import http as http_module self.records.clear() self.handler = _logging.Handler() - self.handler.setLevel(_logging.WARNING) + self.handler.setLevel(_logging.DEBUG) self.handler.emit = lambda record: self.records.append(record) # type: ignore[assignment] # Force initialization of the underlying logger via the proxy. http_module.logger.warning # noqa: B018 self._target = http_module.logger._instance self._target.addHandler(self.handler) self._previous_level = self._target.level - self._target.setLevel(_logging.WARNING) + self._target.setLevel(_logging.DEBUG) return self def __exit__(self, exc_type, exc, tb): From 7b3e8bc8e9ef3f8995092cfcb83935e2853105e9 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Apr 2026 17:06:05 +0200 Subject: [PATCH 17/89] fix(models): :bug: forward `extra_profiles_path` when computing validation statistics --- rocrate_validator/models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index ffaed96a7..60614a7ec 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1638,7 +1638,10 @@ def __initialise__(cls, validation_settings: ValidationSettings): # extract the validation settings severity_validation = validation_settings.requirement_severity profiles: list[Profile] = Profile.load_profiles( - validation_settings.profiles_path, severity=severity_validation) + validation_settings.profiles_path, + extra_profiles_path=validation_settings.extra_profiles_path, + severity=severity_validation, + allow_requirement_check_override=validation_settings.allow_requirement_check_override) profile: Profile = Profile.find_in_list(profiles, validation_settings.profile_identifier) target_profile_identifier = profile.identifier # initialize the profiles list From f3a7e6de535dd9cd8a3c132356c77c9b16300d24 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Apr 2026 17:10:18 +0200 Subject: [PATCH 18/89] test(models): :white_check_mark: cover profile resoution from extra_profiles_path --- tests/unit/test_cli_internals.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/unit/test_cli_internals.py b/tests/unit/test_cli_internals.py index 2f90bcb88..ae4762ce3 100644 --- a/tests/unit/test_cli_internals.py +++ b/tests/unit/test_cli_internals.py @@ -81,3 +81,22 @@ def test_compute_stats(fake_profiles_path): _.requirement.profile.identifier == "a"} logger.error(stats) + + +def test_compute_stats_resolves_profile_from_extra_profiles_path(fake_profiles_path): + # ValidationStatistics.__initialise__ used to call Profile.load_profiles + # without forwarding extra_profiles_path, so any profile that lived only + # under --extra-profiles-path raised ProfileNotFound. + settings = ValidationSettings.parse({ + "profiles_path": DEFAULT_PROFILES_PATH, + "extra_profiles_path": fake_profiles_path, + "profile_identifier": "a", + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + "requirement_severity": "REQUIRED", + }) + + stats = ValidationStatistics.__initialise__(validation_settings=settings) + + assert any(p.identifier == "a" for p in stats["profiles"]), \ + "Profile 'a' from extra_profiles_path was not resolved by ValidationStatistics" From 50448145303ae484c0270e9b13e17b837fd6f6a4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 28 Apr 2026 13:02:19 +0200 Subject: [PATCH 19/89] fix(shacl): :bug: derive NodeShape level from nested PropertyShapes When a NodeShape lives in the profile root and does not declare sh:severity, fall back to the most severe level among its PropertyShapes instead of defaulting to REQUIRED. Also corrects the return type annotation of __compute_requirement_level__ from LevelCollection to RequirementLevel. --- rocrate_validator/requirements/shacl/checks.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 012c87810..87e54bbc6 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -23,6 +23,7 @@ Requirement, RequirementCheck, RequirementCheckValidationEvent, + RequirementLevel, SkipRequirementCheck, ValidationContext, ) @@ -105,13 +106,28 @@ def description(self) -> str: return self._shape.parent.description return f"Check for {self._shape.name}" if self._shape.name else "SHACL validation check" - def __compute_requirement_level__(self) -> LevelCollection: + def __compute_requirement_level__(self) -> RequirementLevel: if self._shape and self._shape.get_declared_level(): return self._shape.get_declared_level() if self.requirement and self.requirement.requirement_level_from_path: return self.requirement.requirement_level_from_path + # When the shape file lives in the profile root and the NodeShape + # itself does not declare sh:severity, derive the level from the + # most severe nested PropertyShape instead of defaulting to REQUIRED. + derived = self.__derive_level_from_properties__() + if derived: + return derived return LevelCollection.REQUIRED + def __derive_level_from_properties__(self) -> Optional[RequirementLevel]: + properties = getattr(self._shape, "properties", None) + if not properties: + return None + declared_levels = [lvl for lvl in (p.get_declared_level() for p in properties) if lvl] + if not declared_levels: + return None + return max(declared_levels, key=lambda lvl: lvl.severity.value) + @property def level(self) -> str: if not self._level: From a306f7f4b60c13558368020ead5706b7f50b7d3d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 28 Apr 2026 14:26:51 +0200 Subject: [PATCH 20/89] fix(core): :bug: filter failed requirements/checks by configured severity `ValidationResult.failed_requirements` and `failed_checks` now consider only issues whose severity is at least the configured `requirement_severity`, matching the behavior already documented for the validation context. Previously every recorded issue was returned, regardless of the threshold. --- rocrate_validator/models.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index ffaed96a7..eea58b078 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -2250,17 +2250,21 @@ def add_issue(self, @property def failed_requirements(self) -> Collection[Requirement]: """ - Get the requirements that failed + Get the requirements that failed at or above the configured `requirement_severity`. """ - return set(issue.check.requirement for issue in self._issues) + min_severity = self.context.requirement_severity + return set(issue.check.requirement for issue in self._issues + if issue.severity >= min_severity) # --- Checks --- @property def failed_checks(self) -> Collection[RequirementCheck]: """ - Get the checks that failed + Get the checks that failed at or above the configured `requirement_severity`. """ - return set(issue.check for issue in self._issues) + min_severity = self.context.requirement_severity + return set(issue.check for issue in self._issues + if issue.severity >= min_severity) def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collection[RequirementCheck]: """ From bb514b3dd102777c99c5ac9d6c1bba340996fe5f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 28 Apr 2026 14:35:01 +0200 Subject: [PATCH 21/89] test(shacl): :white_check_mark: cover NodeShape level derivation from nested PropertyShapes --- tests/unit/requirements/test_shacl_checks.py | 104 ++++++++++++++++++- 1 file changed, 101 insertions(+), 3 deletions(-) diff --git a/tests/unit/requirements/test_shacl_checks.py b/tests/unit/requirements/test_shacl_checks.py index 808f77bc3..f1c3fac0d 100644 --- a/tests/unit/requirements/test_shacl_checks.py +++ b/tests/unit/requirements/test_shacl_checks.py @@ -17,17 +17,20 @@ from rdflib import BNode, Graph, Namespace, URIRef from rocrate_validator.constants import SHACL_NS +from rocrate_validator.models import LevelCollection from rocrate_validator.requirements.shacl.checks import SHACLCheck -from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry +from rocrate_validator.requirements.shacl.models import (NodeShape, + PropertyShape, Shape, + ShapesRegistry) from rocrate_validator.requirements.shacl.utils import resolve_parent_shape logger = logging.getLogger(__name__) class MockRequirement: - def __init__(self): + def __init__(self, requirement_level_from_path=None): self.profile = None - self.requirement_level_from_path = None + self.requirement_level_from_path = requirement_level_from_path class MockParentShape: @@ -220,3 +223,98 @@ def test_resolve_parent_shape_with_property_bnode(): assert result is not None, "Should resolve parent shape for property BNode" assert result.key == shape.key + + +def _make_property(graph: Graph, severity_term: str = None) -> PropertyShape: + """Build a PropertyShape on a fresh BNode, optionally setting sh:severity.""" + prop = PropertyShape(BNode(), graph) + if severity_term is not None: + prop.severity = severity_term + return prop + + +def test_derive_level_picks_most_stringent_declared_property_severity(): + """ + Flat NodeShape with no declared severity inherits the highest severity + declared by its nested properties. + """ + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + shape.add_property(_make_property(g, f"{SHACL_NS}Warning")) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.RECOMMENDED + + +def test_derive_level_with_uniform_property_severity(): + """When every property declares the same severity, derive that severity.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.OPTIONAL + + +def test_derive_level_ignores_properties_without_declared_severity(): + """Properties without sh:severity are skipped; only declared ones drive the result.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g)) # no severity declared + shape.add_property(_make_property(g, f"{SHACL_NS}Warning")) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.RECOMMENDED + + +def test_derive_level_falls_back_to_required_when_no_property_declares_severity(): + """If no nested property declares a severity, fall back to REQUIRED.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g)) + shape.add_property(_make_property(g)) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.REQUIRED + + +def test_shape_declared_severity_takes_precedence_over_derivation(): + """An explicit severity on the NodeShape wins over property-based derivation.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.severity = f"{SHACL_NS}Warning" + shape.add_property(_make_property(g, f"{SHACL_NS}Violation")) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.RECOMMENDED + + +def test_path_based_level_takes_precedence_over_derivation(): + """When the requirement file is in a must/should/may folder the path level wins.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + + check = SHACLCheck( + MockRequirement(requirement_level_from_path=LevelCollection.SHOULD), shape + ) + + assert check.level == LevelCollection.SHOULD + + +def test_derive_level_for_node_shape_without_properties(): + """A NodeShape with no nested properties falls back to REQUIRED.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.REQUIRED From 254fb8894f3187b42d83ed29bd6e58dafb8d3732 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 28 Apr 2026 14:37:09 +0200 Subject: [PATCH 22/89] fix(shacl): :bug: drop sub-threshold pyshacl violations at the source Skip sh:ValidationResult nodes whose check severity is below the requested `requirement_severity` while iterating pyshacl results, so they never become Issues. This makes every consumer of `ValidationResult._issues` consistent with the threshold (not just `failed_requirements`/`failed_checks`) and avoids materializing non-actionable violations. --- rocrate_validator/requirements/shacl/checks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 87e54bbc6..f0b969645 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -282,6 +282,18 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): if requirementCheck is None: logger.warning("No check instance found for shape: %s", shape.key) continue + # Drop violations whose check severity is below the requested + # `requirement_severity`: pyshacl still emits sh:ValidationResult + # nodes for sh:Warning / sh:Info, but they are not actionable at a + # stricter validation level. + if requirementCheck.severity < shacl_context.settings.requirement_severity: + logger.debug( + "Dropping violation for check %s: severity %s below requested %s", + requirementCheck.identifier, + requirementCheck.severity, + shacl_context.settings.requirement_severity, + ) + continue if ( not shacl_context.settings.skip_checks or requirementCheck.identifier not in shacl_context.settings.skip_checks From 9f486746f6c97ddffe24f4b9ea6e2715e0beecb3 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 29 Apr 2026 23:47:42 +0200 Subject: [PATCH 23/89] feat(model): :sparkles: feat(core): :sparkles: add SourceSnippet class and RequirementCheck.get_source_snippet method --- rocrate_validator/models.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 78607f2a2..d394b9d63 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1374,6 +1374,19 @@ def ok_file(p: Path) -> bool: return requirements +@dataclass(frozen=True) +class SourceSnippet: + """ + A snippet of source code backing a :class:`RequirementCheck`. + :ivar language: language tag for syntax highlighting (e.g. ``"python"``, ``"turtle"``). + :ivar code: the source code as text. + :ivar source_path: path to the file the snippet was extracted from, when available. + """ + language: str + code: str + source_path: Optional[Path] = None + + @total_ordering class RequirementCheck(ABC): @@ -1472,6 +1485,14 @@ def hidden(self) -> bool: def execute_check(self, context: ValidationContext) -> bool: raise NotImplementedError() + def get_source_snippet(self) -> Optional[SourceSnippet]: + """ + Return the source code that implements this check, or ``None`` if the + backing source cannot be extracted for this check kind. + Concrete subclasses should override this method. + """ + return None + def to_dict(self, with_requirement: bool = True, with_profile: bool = True) -> dict: result = { "identifier": self.identifier, From fe740e440b58fd0e7fad1f9b2e9c1694461ba2d5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 29 Apr 2026 23:55:30 +0200 Subject: [PATCH 24/89] feat(Python): :sparkles: implement the `get_source_snippet` method for Python checks --- rocrate_validator/requirements/python/__init__.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index aaeb49d8e..b15d3ee2a 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -21,7 +21,7 @@ from rocrate_validator.models import (LevelCollection, Profile, Requirement, RequirementCheck, RequirementLevel, RequirementLoader, Severity, - ValidationContext) + SourceSnippet, ValidationContext) from rocrate_validator.utils.python_helpers import get_classes_from_file # set up logging @@ -63,6 +63,19 @@ def execute_check(self, context: ValidationContext) -> bool: return True return self._check_function(self, context) + def get_source_snippet(self) -> Optional[SourceSnippet]: + try: + code = inspect.getsource(self._check_function) + except (OSError, TypeError) as e: + logger.debug("Unable to read source for check %s: %s", self.identifier, e) + return None + source_file = inspect.getsourcefile(self._check_function) + return SourceSnippet( + language="python", + code=code, + source_path=Path(source_file) if source_file else None, + ) + class PyRequirement(Requirement): """ From 25bc7b4614732dcad513ec46e826ad6ed49a85f0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 30 Apr 2026 00:38:38 +0200 Subject: [PATCH 25/89] feat(SHACL): :sparkles: implement the `get_source_snippet` method for SHACL checks --- .../requirements/shacl/checks.py | 57 ++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index d82d900ef..7d9a42838 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -16,7 +16,7 @@ from timeit import default_timer as timer from typing import Optional -from rdflib import Literal, Namespace +from rdflib import RDF, BNode, Literal, Namespace from rocrate_validator.constants import SHACL_NS from rocrate_validator.errors import ROCrateMetadataNotFoundError @@ -28,10 +28,11 @@ RequirementCheckValidationEvent, RequirementLevel, SkipRequirementCheck, + SourceSnippet, ValidationContext, ) from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry -from rocrate_validator.requirements.shacl.utils import make_uris_relative, resolve_parent_shape +from rocrate_validator.requirements.shacl.utils import build_node_subgraph, make_uris_relative, resolve_parent_shape from rocrate_validator.requirements.shacl.validator import ( SHACLValidationAlreadyProcessed, SHACLValidationContext, @@ -175,6 +176,58 @@ def level(self) -> str: def severity(self) -> str: return self.level.severity + def get_source_snippet(self) -> Optional[SourceSnippet]: + if self._shape is None: + return None + try: + graph = self._shape.graph + # build a subgraph containing all the triples related to the shape + subgraph = build_node_subgraph(graph, self._shape.node) + # identify the owner of the shape + owner = self._shape + while getattr(owner, "parent", None) is not None: + owner = owner.parent + # if the shape is not a root shape, include the triples linking the owner to the shape + if owner is not self._shape: + shacl = Namespace(SHACL_NS) + target_predicates = ( + RDF.type, + shacl.targetClass, + shacl.targetNode, + shacl.targetSubjectsOf, + shacl.targetObjectsOf, + shacl.target, + ) + for predicate in target_predicates: + for triple in graph.triples((owner.node, predicate, None)): + subgraph.add(triple) + # follow BNode objects (e.g. sh:target referencing an inline SPARQL target) + _, _, obj = triple + if isinstance(obj, BNode): + subgraph += build_node_subgraph(graph, obj) + # link the owner to the property so the relationship is preserved in the serialization + subgraph.add((owner.node, shacl.property, self._shape.node)) + + # copy bindings so the serialized snippet uses the same prefix declarations as the source file + for prefix, namespace in graph.namespaces(): + subgraph.bind(prefix, namespace, replace=True) + # serialize the subgraph to Turtle format + code = subgraph.serialize(format="turtle") + except Exception as e: + logger.debug("Unable to serialize SHACL shape for check %s: %s", self.identifier, e) + return None + # if the code is bytes, decode it to string + if isinstance(code, bytes): + code = code.decode("utf-8") + # use the shape source file as the source path for the snippet if available + source_path = self.requirement.path if self.requirement else None + # build the source snippet for the check + return SourceSnippet( + language="turtle", + code=code, + source_path=source_path, + ) + def execute_check(self, context: ValidationContext): logger.debug("Starting check %s", self) try: From 23164b74c68ac692808328b3d31928a98560c982 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 30 Apr 2026 00:44:34 +0200 Subject: [PATCH 26/89] refactor(SHACL): :recycle: rewrite `build_node_subgraph` as an iterative BNode traversal --- rocrate_validator/requirements/shacl/utils.py | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 6ef1dfef3..3f2e0cac8 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -21,10 +21,10 @@ from rdflib import RDF, BNode, Graph, Namespace from rdflib.term import Node -from rocrate_validator.utils import log as logging -from rocrate_validator.constants import RDF_SYNTAX_NS, SHACL_NS +from rocrate_validator.constants import SHACL_NS from rocrate_validator.errors import BadSyntaxError from rocrate_validator.models import Severity +from rocrate_validator.utils import log as logging if TYPE_CHECKING: from rocrate_validator.requirements.shacl.models import Shape @@ -34,24 +34,23 @@ def build_node_subgraph(graph: Graph, node: Node) -> Graph: - shape_graph = Graph() - shape_graph += graph.triples((node, None, None)) - - # add BNodes - for _, _, o in shape_graph: - shape_graph += graph.triples((o, None, None)) - - # Use the triples method to get all triples that are part of a list - RDF = Namespace(RDF_SYNTAX_NS) - first_predicate = RDF.first - rest_predicate = RDF.rest - shape_graph += graph.triples((None, first_predicate, None)) - shape_graph += graph.triples((None, rest_predicate, None)) - for _, _, object in shape_graph: - shape_graph += graph.triples((object, None, None)) - - # return the subgraph - return shape_graph + """ + Build a subgraph with every triple reachable from ``node`` by following BNode objects. + """ + subgraph = Graph() + visited: set = set() + stack: list = [node] + while stack: + current = stack.pop() + if current in visited: + continue + visited.add(current) + for triple in graph.triples((current, None, None)): + subgraph.add(triple) + _, _, obj = triple + if isinstance(obj, BNode) and obj not in visited: + stack.append(obj) + return subgraph def map_severity(shacl_severity: str) -> Severity: From f3fb7f322244f3d92ddf26a905da8cef38f50e92 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 30 Apr 2026 00:48:01 +0200 Subject: [PATCH 27/89] feat(cli): extend CLI command to describe requirement checks --- rocrate_validator/cli/commands/profiles.py | 210 ++++++++++++++++++--- 1 file changed, 185 insertions(+), 25 deletions(-) diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index df4a91e67..7fc826b83 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -12,20 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re import sys from pathlib import Path +from typing import Optional from rich.align import Align from rich.markdown import Markdown from rich.padding import Padding from rich.panel import Panel +from rich.syntax import Syntax from rich.table import Table from rocrate_validator import services from rocrate_validator.cli.commands.errors import handle_error from rocrate_validator.cli.main import cli, click from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER -from rocrate_validator.models import (LevelCollection, RequirementLevel, +from rocrate_validator.models import (LevelCollection, Profile, + RequirementCheck, RequirementLevel, Severity) from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color @@ -158,11 +162,13 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA '-v', '--verbose', is_flag=True, - help="Show detailed list of requirements", + help="Show detailed list of requirements (or, when a check identifier is given, " + "show the source code of the check)", default=False, show_default=True ) @click.argument("profile-identifier", type=click.STRING, default=DEFAULT_PROFILE_IDENTIFIER, required=True) +@click.argument("check-identifier", type=click.STRING, required=False, default=None) @click.option( '--no-paging', is_flag=True, @@ -174,11 +180,19 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA @click.pass_context def describe_profile(ctx, profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER, + check_identifier: Optional[str] = None, profiles_path: Path = DEFAULT_PROFILES_PATH, extra_profiles_path: Path = None, verbose: bool = False, no_paging: bool = False): """ - Show a profile + Show a profile, or — when CHECK_IDENTIFIER is given — show a single requirement check. + + \b + The check identifier accepts either form: + * relative: . (e.g. "1.2") + * full: _. (e.g. "ro-crate-1.1_1.2") + + With -v on a single check, the source code of the check is shown. """ # Get the console console = ctx.obj['console'] @@ -197,6 +211,14 @@ def describe_profile(ctx, profile = services.get_profile(profile_identifier, profiles_path=profiles_path, extra_profiles_path=extra_profiles_path) + # Single-check view + if check_identifier: + check = __resolve_check__(profile, check_identifier) + with console.pager(pager=pager, styles=not console.no_color) if enable_pager else console: + console.print(get_app_header_rule()) + __describe_check__(console, profile, check, verbose=verbose) + return + # Set the subheader title subheader_title = f"[bold][cyan]Profile:[/cyan] [magenta italic]{profile.identifier}[/magenta italic][/bold]" @@ -237,6 +259,9 @@ def describe_profile(ctx, title_align="left", border_style="cyan"), (0, 1, 0, 1))) console.print(Padding(table, (1, 1))) + except click.ClickException: + # Let click format usage errors natively (e.g., BadParameter from check resolution) + raise except Exception as e: handle_error(e, console) @@ -313,28 +338,7 @@ def __verbose_describe_profile__(profile): color = get_severity_color(check.severity) level_info = f"[{color}]{check.severity.name}[/{color}]" levels_list.add(level_info) - override = None - # Uncomment the following lines to show the overridden checks - # if check.overridden_by: - # logger.debug("Check %s is overridden by: %s", check.identifier, check.overridden_by) - # override = "[overridden by: " - # for co in check.overridden_by: - # severity_color = get_severity_color(co.severity) - # override += f"[bold][magenta]{co.requirement.profile.identifier}[/magenta] "\ - # f"[{severity_color}]{co.relative_identifier}[/{severity_color}][/bold]" - # if co != check.overridden_by[-1]: - # override += ", " - # override += "]" - if check.overrides: - logger.debug("Check %s overrides: %s", check.identifier, check.overrides) - override = "[" + "overrides: " - for co in check.overrides: - severity_color = get_severity_color(co.severity) - override += f"[bold][magenta]{co.requirement.profile.identifier}[/magenta] " - f"[{severity_color}]{co.relative_identifier}[/{severity_color}][/bold]" - if co != check.overrides[-1]: - override += ", " - override += "]" + override = __format_overrides__(check.overrides, label="overrides") if check.overrides else None description_table = Table(show_header=False, show_footer=False, show_lines=False, show_edge=False) if override: @@ -367,3 +371,159 @@ def __verbose_describe_profile__(profile): for row in table_rows: table.add_row(*row) return table + + +_CHECK_ID_RE = re.compile(r"^(?P\d+)\.(?P\d+)$") + + +def __resolve_check__(profile: Profile, check_identifier: str) -> RequirementCheck: + """ + Resolve a check identifier to a RequirementCheck instance. + Accepts either the relative form ``.`` or the full form + ``_.``. + """ + raw = check_identifier.strip() + relative = raw + prefix = f"{profile.identifier}_" + if "_" in raw: + if not raw.startswith(prefix): + raise click.BadParameter( + f"Check identifier '{raw}' does not belong to profile '{profile.identifier}'.", + param_hint="CHECK_IDENTIFIER", + ) + relative = raw[len(prefix):] + + match = _CHECK_ID_RE.match(relative) + if not match: + raise click.BadParameter( + f"Invalid check identifier '{check_identifier}'. " + f"Expected '.' (e.g. '1.2') or " + f"'_.' (e.g. '{profile.identifier}_1.2').", + param_hint="CHECK_IDENTIFIER", + ) + req_number = int(match.group("req")) + check_number = int(match.group("check")) + + requirement = next( + (r for r in profile.requirements if not r.hidden and r.order_number == req_number), + None, + ) + if requirement is None: + raise click.BadParameter( + f"No requirement #{req_number} in profile '{profile.identifier}'. " + f"Run `rocrate-validator profiles describe {profile.identifier}` to list requirements.", + param_hint="CHECK_IDENTIFIER", + ) + check = next( + (c for c in requirement.get_checks() if c.order_number == check_number), + None, + ) + if check is None: + raise click.BadParameter( + f"No check #{check_number} in requirement #{req_number} of profile " + f"'{profile.identifier}'. Run `rocrate-validator profiles describe " + f"{profile.identifier} -v` to list checks.", + param_hint="CHECK_IDENTIFIER", + ) + return check + + +def __format_overrides__(checks: list, label: str) -> str: + """ + Format an "overrides" / "overridden by" Rich-styled string for a list of checks. + """ + parts = [] + for co in checks: + severity_color = get_severity_color(co.severity) + parts.append( + f"[bold][magenta]{co.requirement.profile.identifier}[/magenta] " + f"[{severity_color}]{co.relative_identifier}[/{severity_color}][/bold]" + ) + return f"[bold red]{label}:[/bold red] " + ", ".join(parts) + + +def __describe_check__(console, profile: Profile, check: RequirementCheck, verbose: bool = False) -> None: + """ + Render a single requirement check. + """ + severity_color = get_severity_color(check.severity) + requirement = check.requirement + + header = ( + f"[bold cyan]Profile:[/bold cyan] " + f"[italic magenta]{profile.identifier}[/italic magenta]\n" + f"[bold cyan]Identifier:[/bold cyan] " + f"[italic green]{check.identifier}[/italic green]\n" + f"[bold cyan]Name:[/bold cyan] [italic]{check.name}[/italic]\n" + f"[bold cyan]Severity:[/bold cyan] " + f"[bold {severity_color}]{check.severity.name}[/bold {severity_color}]\n" + f"[bold cyan]Requirement:[/bold cyan] " + f"[italic]#{requirement.order_number} — {requirement.name}[/italic]" + ) + if requirement.path: + header += ( + "\n[bold cyan]Source file:[/bold cyan] " + f"[italic green]{shorten_path(requirement.path)}[/italic green]" + ) + + title = f"[bold][cyan]Check:[/cyan] [magenta italic]{check.identifier}[/magenta italic][/bold]" + console.print(Padding( + Panel(header, title=title, padding=(1, 1, 1, 1), title_align="left", border_style="cyan"), + (0, 1, 0, 1), + )) + + description_panel = Panel( + Markdown(check.description.strip()), + title="[bold cyan]Description[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ) + console.print(Padding(description_panel, (1, 1, 0, 1))) + + if check.overrides: + overrides_text = __format_overrides__(check.overrides, label="overrides") + console.print(Padding(Panel( + overrides_text, + title="[bold cyan]Overrides[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), (1, 1, 0, 1))) + if check.overridden_by: + overridden_text = __format_overrides__(check.overridden_by, label="overridden by") + console.print(Padding(Panel( + overridden_text, + title="[bold cyan]Overridden by[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), (1, 1, 0, 1))) + + if verbose: + snippet = check.get_source_snippet() + if snippet is None: + console.print(Padding(Panel( + "[italic]Source code not available for this check kind.[/italic]", + title="[bold cyan]Source[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), (1, 1, 0, 1))) + else: + source_title = f"[bold cyan]Source ({snippet.language})[/bold cyan]" + if snippet.source_path: + source_title += f': [italic green]"{snippet.source_path.name}"[/italic green]' + console.print(Padding(Panel( + Syntax( + snippet.code, + snippet.language, + theme="ansi_dark", + line_numbers=False, + word_wrap=True, + ), + title=source_title, + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), (1, 1, 1, 1))) From 887e2a808d743a603f574b11e45d86b8e9060615 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 30 Apr 2026 11:15:52 +0200 Subject: [PATCH 28/89] test(cli): :white_check_mark: add unit tests for the `describe profile ` subcommand --- tests/test_cli.py | 144 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index fc897e098..028f72705 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -19,6 +19,10 @@ from click.testing import CliRunner from pytest import fixture +from rocrate_validator import services +from rocrate_validator.requirements.python import PyFunctionCheck +from rocrate_validator.requirements.shacl.checks import SHACLCheck + from rocrate_validator.utils import log as logging from rocrate_validator.cli.main import cli from rocrate_validator.utils.versioning import get_version @@ -157,3 +161,143 @@ def test_extra_profiles_list(cli_runner: CliRunner, fake_profiles_path: Path): assert result.exit_code == 0 # assert "Available profiles:" in result.output assert "Profile A" in result.output # Check for a known extra profile + + +# Profile used for `profiles describe` tests. +_DESCRIBE_TEST_PROFILE = "ro-crate-1.1" + + +def _first_visible_check(): + """Return the first non-hidden (Python-backed) check of the test profile.""" + profile = services.get_profile(_DESCRIBE_TEST_PROFILE) + for requirement in profile.requirements: + if requirement.hidden: + continue + for check in requirement.get_checks(): + if isinstance(check, PyFunctionCheck): + return profile, requirement, check + raise RuntimeError("No Python-backed check found in test profile") + + +def _first_shacl_check(): + """Return the first non-hidden SHACL-backed check of the test profile.""" + profile = services.get_profile(_DESCRIBE_TEST_PROFILE) + for requirement in profile.requirements: + if requirement.hidden: + continue + for check in requirement.get_checks(): + if isinstance(check, SHACLCheck): + return profile, requirement, check + raise RuntimeError("No SHACL-backed check found in test profile") + + +def test_profiles_describe_default(cli_runner: CliRunner): + """The default describe view (no check id) shows the profile compact view.""" + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "--no-paging"]) + assert result.exit_code == 0 + assert _DESCRIBE_TEST_PROFILE in result.output + assert "Profile Requirements" in result.output + + +def test_profiles_describe_verbose(cli_runner: CliRunner): + """The verbose describe view (no check id) shows individual check identifiers.""" + _, _, check = _first_visible_check() + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "-v", "--no-paging"]) + assert result.exit_code == 0 + assert check.identifier in result.output + + +def test_describe_check_relative_id(cli_runner: CliRunner): + """Resolving a check by '.' renders the single-check view.""" + _, requirement, check = _first_visible_check() + relative = f"{requirement.order_number}.{check.order_number}" + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "--no-paging"]) + assert result.exit_code == 0, result.output + assert check.identifier in result.output + assert check.severity.name in result.output + + +def test_describe_check_full_id(cli_runner: CliRunner): + """Resolving a check by full '_.'.""" + _, _, check = _first_visible_check() + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, check.identifier, "--no-paging"]) + assert result.exit_code == 0, result.output + assert check.identifier in result.output + + +def test_describe_check_unknown(cli_runner: CliRunner): + """An out-of-range check id produces a usage error with a hint.""" + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "99.99", "--no-paging"]) + assert result.exit_code == 2 + assert "No requirement #99" in result.output + + +def test_describe_check_bad_format(cli_runner: CliRunner): + """A non-numeric check id is rejected with a format hint.""" + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "not-an-id", "--no-paging"]) + assert result.exit_code == 2 + assert "Invalid check identifier" in result.output + + +def test_describe_check_profile_mismatch(cli_runner: CliRunner): + """A full id whose prefix doesn't match the requested profile is rejected.""" + result = cli_runner.invoke( + cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "some-other-profile_1.1", "--no-paging"] + ) + assert result.exit_code == 2 + assert "does not belong to profile" in result.output + + +def test_describe_check_verbose_python(cli_runner: CliRunner): + """Verbose single-check view on a Python-backed check shows the function source.""" + _, requirement, check = _first_visible_check() + relative = f"{requirement.order_number}.{check.order_number}" + result = cli_runner.invoke( + cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] + ) + assert result.exit_code == 0, result.output + assert "Source" in result.output + # The decorated check function is what gets serialized + assert "@check" in result.output + + +def test_describe_check_verbose_shacl(cli_runner: CliRunner): + """Verbose single-check view on a SHACL-backed check shows turtle source.""" + _, requirement, check = _first_shacl_check() + relative = f"{requirement.order_number}.{check.order_number}" + result = cli_runner.invoke( + cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] + ) + assert result.exit_code == 0, result.output + assert "Source" in result.output + # SHACL serialized as turtle should contain a sh: prefix and a NodeShape/PropertyShape declaration + assert "sh:" in result.output + + +def test_describe_check_verbose_shacl_includes_target(cli_runner: CliRunner): + """For nested PropertyShape checks, the snippet must include the owning NodeShape's target.""" + profile = services.get_profile(_DESCRIBE_TEST_PROFILE) + nested = None + for requirement in profile.requirements: + if requirement.hidden: + continue + for check in requirement.get_checks(): + if isinstance(check, SHACLCheck) and getattr(check._shape, "parent", None) is not None: + nested = (requirement, check) + break + if nested: + break + if nested is None: + # No nested PropertyShape check available in this profile; nothing to assert here. + return + requirement, check = nested + relative = f"{requirement.order_number}.{check.order_number}" + result = cli_runner.invoke( + cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] + ) + assert result.exit_code == 0, result.output + # The snippet must surface the owning shape's target declaration so the user can see + # what the property check applies to. + assert any(t in result.output for t in ("sh:targetClass", "sh:targetNode", + "sh:targetSubjectsOf", "sh:targetObjectsOf", + "sh:target ")) From f1c0cfdd1a58793c87abf510696068ce1b29e64e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 6 May 2026 13:23:50 +0200 Subject: [PATCH 29/89] fix(shacl): :bug: build property shape subgraph by reachability Rebuild `ShapesList.get_shape_property_graph` to include only triples reachable from the target property shape (constraints and RDF lists used by sh:and/sh:or/sh:xone) plus the `sh:property` link triple, instead of subtracting sibling properties from the node-shape graph. The previous subtractive approach could leak shared blank nodes and risked breaking sibling constructs in the merged shapes graph. --- rocrate_validator/requirements/shacl/utils.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 4200bd08c..6ef1dfef3 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -190,20 +190,23 @@ def get_shape_graph(self, shape_node: Node) -> Graph: def get_shape_property_graph(self, shape_node: Node, shape_property: Node) -> Graph: """ - Get the subgraph of the given shape node excluding the given property + Get the subgraph of a property shape nested inside a node shape. + + Includes only triples reachable from `shape_property` (its constraints + and any RDF lists used by `sh:and`/`sh:or`/`sh:xone`), plus the link + triple `(shape_node, sh:property, shape_property)`. Nothing reachable + only via sibling properties is included, so subtracting this graph + from the merged shapes graph cannot break sibling constructs. """ node_graph = self.get_shape_graph(shape_node) assert node_graph is not None, "The shape graph cannot be None" property_graph = Graph() - shacl_ns = Namespace(SHACL_NS) - nested_properties_to_exclude = [o for (_, _, o) in node_graph.triples( - (shape_node, shacl_ns.property, None)) if o != shape_property] - triples_to_exclude = [(s, _, o) for (s, _, o) in node_graph.triples((None, None, None)) - if s in nested_properties_to_exclude - or o in nested_properties_to_exclude] + for s, p, o in __extract_related_triples__(node_graph, shape_property): + property_graph.add((s, p, o)) - property_graph += node_graph - triples_to_exclude + shacl_ns = Namespace(SHACL_NS) + property_graph.add((shape_node, shacl_ns.property, shape_property)) return property_graph From 4f7ec5a65fbe890d368514f06ab156b96d9d76d1 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 6 May 2026 15:34:18 +0200 Subject: [PATCH 30/89] test(utils): :white_check_mark: add unit tests for the `get_shape_property_graph` method --- tests/unit/requirements/test_shacl_utils.py | 199 ++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 tests/unit/requirements/test_shacl_utils.py diff --git a/tests/unit/requirements/test_shacl_utils.py b/tests/unit/requirements/test_shacl_utils.py new file mode 100644 index 000000000..6a64c7cc0 --- /dev/null +++ b/tests/unit/requirements/test_shacl_utils.py @@ -0,0 +1,199 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Unit tests for ``ShapesList.get_shape_property_graph``. + +The method must return a subgraph that: +* contains every triple reachable from the property shape (its constraints + and any RDF lists used by ``sh:and``/``sh:or``/``sh:xone``); +* contains the link triple ``(shape_node, sh:property, shape_property)``; +* does NOT contain triples that belong only to sibling property shapes +""" + +import pytest +from rdflib import RDF, BNode, Graph, Literal, Namespace, URIRef +from rdflib.collection import Collection + +from rocrate_validator.constants import SHACL_NS +from rocrate_validator.requirements.shacl.utils import load_shapes_from_graph + +SH = Namespace(SHACL_NS) +EX = Namespace("http://example.org/") + + +def _build_two_property_shape() -> tuple[Graph, URIRef, URIRef, URIRef]: + """ + Build a NodeShape with two sibling property shapes. + + Returns ``(graph, node_shape, prop_a, prop_b)``. + + Each property shape is a BNode owning its own ``sh:path``, + ``sh:datatype``, ``sh:minCount`` constraints. + """ + g = Graph() + g.bind("sh", SH) + g.bind("ex", EX) + + node_shape = EX.PersonShape + g.add((node_shape, RDF.type, SH.NodeShape)) + g.add((node_shape, SH.targetClass, EX.Person)) + + prop_a = BNode("propA") + g.add((node_shape, SH.property, prop_a)) + g.add((prop_a, SH.path, EX.name)) + g.add((prop_a, SH.datatype, EX.stringType)) + g.add((prop_a, SH.minCount, Literal(1))) + + prop_b = BNode("propB") + g.add((node_shape, SH.property, prop_b)) + g.add((prop_b, SH.path, EX.age)) + g.add((prop_b, SH.datatype, EX.intType)) + g.add((prop_b, SH.minCount, Literal(0))) + + return g, node_shape, prop_a, prop_b + + +def test_returns_link_triple_to_target_property(): + """The link ``(node_shape, sh:property, shape_property)`` must be present.""" + g, node_shape, prop_a, prop_b = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + # The link to the prop_a shape must be present + assert (node_shape, SH.property, prop_a) in pg + # but not the link to the prop_b shape. + assert (node_shape, SH.property, prop_b) not in pg + + +def test_includes_all_constraints_of_target_property(): + """All triples whose subject is the target property shape must be included.""" + g, node_shape, prop_a, _ = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + assert (prop_a, SH.path, EX.name) in pg + assert (prop_a, SH.datatype, EX.stringType) in pg + assert (prop_a, SH.minCount, Literal(1)) in pg + + +def test_excludes_sibling_property_link_and_constraints(): + """ + Sibling property shapes and their link triples must not appear in the + returned subgraph. This is the regression the new implementation fixes. + """ + g, node_shape, prop_a, prop_b = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + # Sibling link triple must not be present. + assert (node_shape, SH.property, prop_b) not in pg + # Sibling constraints must not be present. + assert (prop_b, SH.path, EX.age) not in pg + assert (prop_b, SH.datatype, EX.intType) not in pg + assert (prop_b, SH.minCount, Literal(0)) not in pg + + +def test_subtraction_preserves_sibling_property_link(): + """ + Subtracting the returned subgraph from the merged shapes graph must + leave the sibling property's link to the parent NodeShape intact + """ + g, node_shape, prop_a, prop_b = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + remaining = shapes_list.shapes_graph - pg + + # The sibling property is still linked to the NodeShape. + assert (node_shape, SH.property, prop_b) in remaining + # And so are its constraints. + assert (prop_b, SH.path, EX.age) in remaining + + +def test_does_not_include_unrelated_node_shape_triples(): + """ + Triples on the parent NodeShape that are not the target link must + not be pulled in (e.g. ``sh:targetClass``). + """ + g, node_shape, prop_a, _ = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + assert (node_shape, SH.targetClass, EX.Person) not in pg + assert (node_shape, RDF.type, SH.NodeShape) not in pg + + +def test_includes_rdf_list_triples_for_sh_or(): + """ + When the property shape uses ``sh:or`` (an RDF list), the list spine + (``rdf:first``/``rdf:rest``) and every list member must be reachable + in the returned subgraph. + """ + g = Graph() + node_shape = EX.SomeShape + g.add((node_shape, RDF.type, SH.NodeShape)) + + prop = BNode("prop") + g.add((node_shape, SH.property, prop)) + g.add((prop, SH.path, EX.something)) + + member_a = BNode("memberA") + g.add((member_a, SH.datatype, EX.t1)) + member_b = BNode("memberB") + g.add((member_b, SH.datatype, EX.t2)) + + list_head = BNode("listHead") + Collection(g, list_head, [member_a, member_b]) + g.add((prop, SH["or"], list_head)) + + shapes_list = load_shapes_from_graph(g) + pg = shapes_list.get_shape_property_graph(node_shape, prop) + + # The sh:or link is reachable from the property. + assert (prop, SH["or"], list_head) in pg + # Both list members and their constraints are reachable. + assert (member_a, SH.datatype, EX.t1) in pg + assert (member_b, SH.datatype, EX.t2) in pg + # The RDF list spine is included so the list can be re-walked. + list_spine_subjects = {s for s, _, _ in pg.triples((None, RDF.first, None))} + assert list_head in list_spine_subjects + + +def test_only_target_link_present_when_node_has_multiple_properties(): + """ + The graph must contain exactly one ``sh:property`` triple originating + from the parent NodeShape — the one pointing at the target property. + """ + g, node_shape, prop_a, _ = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + property_links = list(pg.triples((node_shape, SH.property, None))) + assert len(property_links) == 1 + assert property_links[0] == (node_shape, SH.property, prop_a) + + +def test_unknown_shape_node_raises(): + """A shape node not in the registry should raise ``KeyError``.""" + g, _, prop_a, _ = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + with pytest.raises(KeyError): + shapes_list.get_shape_property_graph(EX.UnknownShape, prop_a) From e43364b863dfea5f406902f19aec92673824aeac Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 6 May 2026 17:07:34 +0200 Subject: [PATCH 31/89] refactor(models): :recycle: introduce pre/post internal validation hooks on Validator --- rocrate_validator/models.py | 50 ++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 60614a7ec..18bc83ba1 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1143,6 +1143,32 @@ def __get_requirement_loader__(cls, profile: Profile, requirement_path: Path) -> setattr(profile, loader_instance_name, loader_instance) return loader_instance + @staticmethod + def __get_requirement_classes__() -> list[Type[Requirement]]: + + # Ensure known requirement modules are imported so subclasses are registered. + for requirement_type in ("python", "shacl"): + module_name = f"rocrate_validator.requirements.{requirement_type}" + try: + importlib.import_module(module_name) + except Exception: + logger.debug( + "Unable to import requirement module: %s", + module_name, + exc_info=True, + ) + + def all_subclasses( + base_class: Type[Requirement], + ) -> list[Type[Requirement]]: + result: list[Type[Requirement]] = [] + for subcls in base_class.__subclasses__(): + result.append(subcls) + result.extend(all_subclasses(subcls)) + return result + + return all_subclasses(Requirement) + @staticmethod def load_requirements(profile: Profile, severity: Severity = None) -> list[Requirement]: """ @@ -2682,7 +2708,8 @@ def __do_validate__(self, # register the current context self.__current_context__ = context - try: + # initialize the requirement types + self.__invoke_pre_validation_hooks__(context) # set the profiles to validate against profiles = context.profiles @@ -2720,14 +2747,31 @@ def __do_validate__(self, self.notify(ProfileValidationEvent(EventType.PROFILE_VALIDATION_END, profile=profile)) if terminate: break - self.notify(ValidationEvent(EventType.VALIDATION_END, - validation_result=context.result)) + # finalize the requirement types + self.__invoke_post_validation_hooks__(context) + # notify the end of the validation + self.notify(ValidationEvent(EventType.VALIDATION_END, validation_result=context.result)) + # return the validation result return context.result finally: # clear the current context self.__current_context__ = None + def __invoke_pre_validation_hooks__(self, context: ValidationContext): + logger.debug("Initializing requirement types: starting...") + requirements_types = RequirementLoader.__get_requirement_classes__() + for requirement_type in requirements_types: + requirement_type.initialize(context) + logger.debug("Initializing requirement types: completed") + + def __invoke_post_validation_hooks__(self, context: ValidationContext): + logger.debug("Finalizing requirement types: starting...") + requirements_types = RequirementLoader.__get_requirement_classes__() + for requirement_type in requirements_types: + requirement_type.finalize(context) + logger.debug("Finalizing requirement types: completed") + def notify(self, event: Union[Event, EventType]): """ Override notify to update statistics """ assert self.__current_context__ is not None, "No current validation context" From 439c68fbb81b8347fd4a87de3a5d8246c39401c6 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 6 May 2026 17:28:29 +0200 Subject: [PATCH 32/89] test(model): :white_check_mark: add unit tests for the pre/post internal validation hooks --- tests/unit/test_requirement_lifecycle.py | 140 +++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tests/unit/test_requirement_lifecycle.py diff --git a/tests/unit/test_requirement_lifecycle.py b/tests/unit/test_requirement_lifecycle.py new file mode 100644 index 000000000..15c6d224c --- /dev/null +++ b/tests/unit/test_requirement_lifecycle.py @@ -0,0 +1,140 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from rocrate_validator import services +from rocrate_validator.models import RequirementLoader, Severity, ValidationContext, ValidationSettings +from tests.ro_crates import InvalidRootDataEntity + + +class _RequirementTypeSpy: + """Stand-in for a Requirement subclass that records lifecycle hook calls.""" + + def __init__(self, name: str, timeline: list): + self.__name__ = name + self._timeline = timeline + self.calls: list[tuple[str, ValidationContext]] = [] + + def initialize(self, context: ValidationContext) -> None: + self.calls.append(("initialize", context)) + self._timeline.append(("initialize", self.__name__)) + + def finalize(self, context: ValidationContext) -> None: + self.calls.append(("finalize", context)) + self._timeline.append(("finalize", self.__name__)) + + +@pytest.fixture +def validation_settings(): + return ValidationSettings( + rocrate_uri=str(InvalidRootDataEntity().invalid_root_type), + requirement_severity=Severity.OPTIONAL, + abort_on_first=False, + ) + + +@pytest.fixture +def lifecycle_spies(monkeypatch): + """ + Replace the registered requirement classes with two spy stand-ins. + + Returns (spies, timeline) where timeline records the global ordering + of hook invocations across all spies. + """ + timeline: list[tuple[str, str]] = [] + spies = [ + _RequirementTypeSpy("SpyTypeA", timeline), + _RequirementTypeSpy("SpyTypeB", timeline), + ] + monkeypatch.setattr( + RequirementLoader, + "__get_requirement_classes__", + staticmethod(lambda: spies), + ) + return spies, timeline + + +def test_initialize_and_finalize_called_once_per_requirement_type(lifecycle_spies, validation_settings): + """ + Check that each requirement type's initialize and + finalize hooks are called exactly once per validation run. + """ + spies, _ = lifecycle_spies + + services.validate(validation_settings) + + for spy in spies: + events = [evt for evt, _ in spy.calls] + assert events == ["initialize", "finalize"], ( + f"{spy.__name__} expected exactly one initialize then one finalize, got {events}" + ) + + +def test_lifecycle_hooks_receive_the_same_validation_context(lifecycle_spies, validation_settings): + """ + Check that all lifecycle hooks receive the same ValidationContext instance. + This ensures that the context is properly shared across all requirements. + """ + spies, _ = lifecycle_spies + + services.validate(validation_settings) + + contexts = [ctx for spy in spies for _, ctx in spy.calls] + assert contexts, "No lifecycle hook was invoked" + first = contexts[0] + assert isinstance(first, ValidationContext) + assert all(ctx is first for ctx in contexts), ( + "All initialize/finalize invocations must share the same ValidationContext" + ) + + +def test_all_initialize_hooks_run_before_any_finalize_hook(lifecycle_spies, validation_settings): + """ + Check that all initialize hooks are called before any finalize hook is called. + This ensures that the context is fully initialized before any requirement starts finalizing. + """ + _, timeline = lifecycle_spies + + services.validate(validation_settings) + + init_indices = [i for i, (evt, _) in enumerate(timeline) if evt == "initialize"] + finalize_indices = [i for i, (evt, _) in enumerate(timeline) if evt == "finalize"] + assert init_indices and finalize_indices, "Lifecycle hooks were not all triggered" + assert max(init_indices) < min(finalize_indices), ( + f"Expected every initialize to precede every finalize, got timeline {timeline}" + ) + + +def test_lifecycle_hooks_invoked_exactly_once_per_validation_run(lifecycle_spies, validation_settings): + """ + Run validation multiple times and check that each spy receives exactly one + initialize+finalize pair per run. + """ + + # extract spies from fixture + spies, _ = lifecycle_spies + + # run validation multiple times and + # check that each spy receives exactly one initialize+finalize pair per run + runs = 3 + for _ in range(runs): + services.validate(validation_settings) + + for spy in spies: + events = [evt for evt, _ in spy.calls] + assert events == ["initialize", "finalize"] * runs, ( + f"{spy.__name__} should receive exactly one initialize+finalize " + f"pair per validation run (got {events} across {runs} runs)" + ) From bcb5cac7cf26225adc710236756433cb81f170d5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 6 May 2026 17:31:15 +0200 Subject: [PATCH 33/89] fix(shacl): :bug: evaluate inherited shapes for zero-shape target profiles Override `SHACLRequirement.finalize` to force one pyshacl run on the merged shapes graph when the target profile contributes no SHACL checks of its own (e.g. extension profiles that purely inherit or only deactivate). Without it, the main loop never triggers a SHACL run and inherited shapes are silently skipped. --- .../requirements/shacl/requirements.py | 70 ++++++++++++++++++- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index e85bc3ec3..3133c11cd 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -19,8 +19,14 @@ from rocrate_validator.utils import log as logging from rocrate_validator.constants import VALIDATOR_NS -from rocrate_validator.models import (Profile, Requirement, RequirementCheck, - RequirementLevel, RequirementLoader) +from rocrate_validator.models import ( + Profile, + Requirement, + RequirementCheck, + RequirementLevel, + RequirementLoader, + ValidationContext, +) from rocrate_validator.requirements.shacl.checks import SHACLCheck from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry @@ -82,9 +88,67 @@ def hidden(self) -> bool: return True return False + @classmethod + def finalize(cls, context: ValidationContext) -> None: + """ " + Finalize the SHACL requirement by ensuring that a SHACL validation run is triggered for the target profile + if it has no shapes of its own (e.g. an extension profile that purely inherits or only deactivates). + + SHACL is normally driven by the first execute_check of a check + belonging to the target profile (see SHACLValidationContextManager). + If the target has zero SHACL checks of its own (e.g. an extension + profile that purely inherits or only deactivates), no pyshacl run + is ever triggered and inherited shapes are never evaluated. + Force one final run on the merged shapes graph in that case. + """ + + logger.debug("Starting %s requirement finalization for context %s", cls.__name__, context) + + # extract profiles and target profile from context + profiles = context.profiles + + from rocrate_validator.requirements.shacl.checks import SHACLCheck + from rocrate_validator.requirements.shacl.validator import SHACLValidationContext + + target = next((p for p in profiles if p.identifier == context.settings.profile_identifier), None) + if target is None: + return + + shacl_context = SHACLValidationContext.get_instance(context) + # If pyshacl already ran for the target during the main loop there is + # nothing to do. + if shacl_context.get_validation_result(target) is not None: + return + + # Pick any SHACLCheck across the loaded profiles to drive the run; the + # check identity is only used for logging inside __do_execute_check__, + # the actual validation is graph-wide. + runner = next( + (c for p in profiles for r in p.requirements for c in r.get_checks() if isinstance(c, SHACLCheck)), + None, + ) + if runner is None: + return + + # Make sure the target's shapes (if any) are in the merged registry + # and switch the current profile so violations are attributed under + # the target profile in the report. + shacl_context.__set_current_validation_profile__(target) + shacl_context._current_validation_profile = target + try: + runner.__do_execute_check__(shacl_context) + except Exception as e: + logger.warning("Forced SHACL run for zero-shape target profile %s failed: %s", target.identifier, e) + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + finally: + shacl_context.__unset_current_validation_profile__() + + # do finalization logic here (empty for now) + logger.debug("Completed %s requirement finalization for context %s", cls.__name__, context) -class SHACLRequirementLoader(RequirementLoader): +class SHACLRequirementLoader(RequirementLoader): def __init__(self, profile: Profile): super().__init__(profile) self._shape_registry = ShapesRegistry.get_instance(profile) From 93e2b0bba3f6db0724b1e72410d0a0a0d14713ec Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 6 May 2026 17:39:56 +0200 Subject: [PATCH 34/89] test(shacl): :white_check_mark: regression test for zero-shape target profile Add the `c-wrapper` profile fixture (pure inheritance from `c`, no own shapes) and a test asserting that validation still drives a pyshacl run on the merged shapes graph so inherited shapes are evaluated for the target profile. --- .../data/profiles/fake/c-wrapper/profile.ttl | 30 ++++++++++++++++ tests/unit/requirements/test_profiles.py | 34 ++++++++++++++++--- 2 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 tests/data/profiles/fake/c-wrapper/profile.ttl diff --git a/tests/data/profiles/fake/c-wrapper/profile.ttl b/tests/data/profiles/fake/c-wrapper/profile.ttl new file mode 100644 index 000000000..c44d1241b --- /dev/null +++ b/tests/data/profiles/fake/c-wrapper/profile.ttl @@ -0,0 +1,30 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix rdfs: . + +# A "wrapper" extension profile that inherits from c without declaring any +# SHACL shape of its own. Used to exercise the zero-shape target profile +# path: pyshacl must still run for inherited shapes to be evaluated. + + a prof:Profile ; + rdfs:label "Profile C-wrapper" ; + rdfs:comment """Pure inheritance from Profile C, no own checks."""@en ; + dct:publisher ; + prof:isProfileOf ; + prof:isTransitiveProfileOf , ; + prof:hasToken "c-wrapper" ; +. diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index 424b6419d..e9f33795e 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -16,13 +16,11 @@ import os import pytest +from requirements.shacl.checks import SHACLCheck from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER -from rocrate_validator.errors import (DuplicateRequirementCheck, - InvalidProfilePath, - ProfileSpecificationError) -from rocrate_validator.models import (Profile, ValidationContext, - ValidationSettings, Validator) +from rocrate_validator.errors import DuplicateRequirementCheck, InvalidProfilePath, ProfileSpecificationError +from rocrate_validator.models import Profile, ValidationContext, ValidationSettings, Validator from tests.ro_crates import InvalidFileDescriptorEntity, ValidROC # set up logging @@ -297,6 +295,32 @@ def test_load_valid_profile_with_override_on_inherited_profile(fake_profiles_pat assert len(requirements_checks) == 3, "The number of requirements should be 2" +def test_zero_shape_target_profile_triggers_pyshacl_run(fake_profiles_path: str): + """Regression test for the 0-shape profile bug: + when the target profile has no SHACL checks of its own, + Validator must still drive a single pyshacl run + on the merged shapes graph so inherited shapes get evaluated. + Without the fix in `Validator.__ensure_target_shacl_run__`, + no SHACLCheck would be recorded as executed for the wrapper target.""" + + settings = ValidationSettings(**{ + "profiles_path": fake_profiles_path, + "profile_identifier": "c-wrapper", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + "disable_check_for_duplicates": True, + }) + result = Validator(settings).validate() + + executed_shacl = [c for c in result.executed_checks + if isinstance(c, SHACLCheck)] + assert executed_shacl, ( + "Expected at least one inherited SHACLCheck to be executed for the " + "c-wrapper target. None recorded — the zero-shape pyshacl run was " + "skipped.") + + def test_profile_parents(check_overriding_profiles_path: str): """Test the order of the loaded profiles.""" logger.debug("The profiles path: %r", check_overriding_profiles_path) From 04d05525e3dbd4f74e5ae81c07424a667aff64dd Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 6 May 2026 18:04:06 +0200 Subject: [PATCH 35/89] fix(model): :rotating_light: add missing methods and reformat code --- rocrate_validator/models.py | 124 +++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 37 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 18bc83ba1..bfc9459f4 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -16,6 +16,7 @@ import bisect import enum +import importlib import inspect import json import re @@ -25,35 +26,43 @@ from datetime import datetime, timezone from functools import total_ordering from pathlib import Path -from typing import Optional, Protocol, Tuple, Union +from typing import Optional, Protocol, Tuple, Type, Union from urllib.error import HTTPError import enum_tools from rdflib import RDF, RDFS, Graph, Namespace, URIRef from rocrate_validator import __version__ -from rocrate_validator.constants import (DEFAULT_HTTP_CACHE_MAX_AGE, - DEFAULT_ONTOLOGY_FILE, - DEFAULT_PROFILE_IDENTIFIER, - DEFAULT_PROFILE_README_FILE, - IGNORED_PROFILE_DIRECTORIES, - JSON_OUTPUT_FORMAT_VERSION, PROF_NS, - PROFILE_FILE_EXTENSIONS, - PROFILE_SPECIFICATION_FILE, - ROCRATE_METADATA_FILE, SCHEMA_ORG_NS) -from rocrate_validator.errors import (DuplicateRequirementCheck, - InvalidProfilePath, ProfileNotFound, - ProfileSpecificationError, - ProfileSpecificationNotFound, - ROCrateMetadataNotFoundError) +from rocrate_validator.constants import ( + DEFAULT_HTTP_CACHE_MAX_AGE, + DEFAULT_ONTOLOGY_FILE, + DEFAULT_PROFILE_IDENTIFIER, + DEFAULT_PROFILE_README_FILE, + IGNORED_PROFILE_DIRECTORIES, + JSON_OUTPUT_FORMAT_VERSION, + PROF_NS, + PROFILE_FILE_EXTENSIONS, + PROFILE_SPECIFICATION_FILE, + ROCRATE_METADATA_FILE, + SCHEMA_ORG_NS, +) +from rocrate_validator.errors import ( + DuplicateRequirementCheck, + InvalidProfilePath, + ProfileNotFound, + ProfileSpecificationError, + ProfileSpecificationNotFound, + ROCrateMetadataNotFoundError, +) from rocrate_validator.events import Event, EventType, Publisher, Subscriber from rocrate_validator.rocrate import ROCrate from rocrate_validator.utils import log as logging from rocrate_validator.utils.collections import MapIndex, MultiIndexMap from rocrate_validator.utils.http import HttpRequester from rocrate_validator.utils.paths import get_profiles_path -from rocrate_validator.utils.python_helpers import \ - get_requirement_name_from_file +from rocrate_validator.utils.python_helpers import ( + get_requirement_name_from_file, +) from rocrate_validator.utils.uri import URI # set the default profiles path @@ -1108,9 +1117,36 @@ def to_dict(self, with_profile: bool = True, with_checks: bool = True) -> dict: result["checks"] = [_.to_dict(with_requirement=False, with_profile=False) for _ in self._checks] return result + @classmethod + def initialize(cls, context: ValidationContext) -> None: + logger.debug( + "Starting %s requirement initialization for context %s", + cls.__name__, + context, + ) + # do initialization logic here (empty for now) + logger.debug( + "Completed %s requirement initialization for context %s", + cls.__name__, + context, + ) + + @classmethod + def finalize(cls, context: ValidationContext) -> None: + logger.debug( + "Starting %s requirement finalization for context %s", + cls.__name__, + context, + ) + # do finalization logic here (empty for now) + logger.debug( + "Completed %s requirement finalization for context %s", + cls.__name__, + context, + ) -class RequirementLoader: +class RequirementLoader: def __init__(self, profile: Profile): self._profile = profile @@ -1129,7 +1165,6 @@ def __get_requirement_type__(requirement_path: Path) -> str: @classmethod def __get_requirement_loader__(cls, profile: Profile, requirement_path: Path) -> RequirementLoader: - import importlib requirement_type = cls.__get_requirement_type__(requirement_path) loader_instance_name = f"_{requirement_type}_loader_instance" loader_instance = getattr(profile, loader_instance_name, None) @@ -1174,16 +1209,21 @@ def load_requirements(profile: Profile, severity: Severity = None) -> list[Requi """ Load the requirements related to the profile """ - def ok_file(p: Path) -> bool: - return p.is_file() \ - and p.suffix in PROFILE_FILE_EXTENSIONS \ - and not p.name == DEFAULT_ONTOLOGY_FILE \ - and not p.name == PROFILE_SPECIFICATION_FILE \ - and not p.name.startswith('.') \ - and not p.name.startswith('_') - files = sorted((p for p in profile.path.rglob('*.*') if ok_file(p)), - key=lambda x: (not x.suffix == '.py', x)) + def ok_file(p: Path) -> bool: + return ( + p.is_file() + and p.suffix in PROFILE_FILE_EXTENSIONS + and not p.name == DEFAULT_ONTOLOGY_FILE + and not p.name == PROFILE_SPECIFICATION_FILE + and not p.name.startswith(".") + and not p.name.startswith("_") + ) + + files = sorted( + (p for p in profile.path.rglob("*.*") if ok_file(p)), + key=lambda x: (not x.suffix == ".py", x), + ) # set the requirement level corresponding to the severity requirement_level = LevelCollection.get(severity.name) @@ -1195,23 +1235,33 @@ def ok_file(p: Path) -> bool: if requirement_level_from_path < requirement_level: continue except ValueError: - logger.debug("The requirement level could not be determined from the path: %s", requirement_path) + logger.debug( + "The requirement level could not be determined from the path: %s", + requirement_path, + ) requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) for requirement in requirement_loader.load( - profile, requirement_level, - requirement_path, publicID=profile.publicID): + profile, + requirement_level, + requirement_path, + publicID=profile.publicID, + ): requirements.append(requirement) # sort the requirements by severity - requirements = sorted(requirements, - key=lambda x: (-x.severity_from_path.value, x.path.name, x.name) - if x.severity_from_path is not None else (0, x.path.name, x.name), - reverse=False) + requirements = sorted( + requirements, + key=lambda x: ( + (-x.severity_from_path.value, x.path.name, x.name) + if x.severity_from_path is not None + else (0, x.path.name, x.name) + ), + reverse=False, + ) # assign order numbers to requirements for i, requirement in enumerate(requirements): requirement._order_number = i + 1 # log and return the requirements - logger.debug("Profile %s loaded %s requirements: %s", - profile.identifier, len(requirements), requirements) + logger.debug("Profile %s loaded %s requirements: %s", profile.identifier, len(requirements), requirements) return requirements From 4c2fbbd85103a0d8953af6105c00f145020e5176 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 6 May 2026 18:07:22 +0200 Subject: [PATCH 36/89] fix: :rotating_light: reformat code to fix linter warnings --- rocrate_validator/models.py | 783 +++++++++++------- .../requirements/shacl/requirements.py | 30 +- tests/unit/requirements/test_profiles.py | 86 +- 3 files changed, 549 insertions(+), 350 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index bfc9459f4..3c6e1544a 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -102,13 +102,13 @@ def get(name: str) -> Severity: @total_ordering @dataclass class RequirementLevel: - """ Represents a requirement level. A requirement has a name and a severity level of type :class:`.Severity`. It implements the comparison operators to allow ordering of the requirement levels. """ + name: str severity: Severity @@ -130,7 +130,7 @@ def __hash__(self) -> int: return hash((self.name, self.severity)) def __repr__(self) -> str: - return f'RequirementLevel(name={self.name}, severity={self.severity})' + return f"RequirementLevel(name={self.name}, severity={self.severity})" def __str__(self) -> str: return self.name @@ -157,36 +157,39 @@ class LevelCollection: are to be interpreted as described in **RFC 2119**. """ + #: The requirement level OPTIONAL is mapped to the OPTIONAL severity level - OPTIONAL = RequirementLevel('OPTIONAL', Severity.OPTIONAL) + OPTIONAL = RequirementLevel("OPTIONAL", Severity.OPTIONAL) #: The requirement level MAY is mapped to the OPTIONAL severity level - MAY = RequirementLevel('MAY', Severity.OPTIONAL) + MAY = RequirementLevel("MAY", Severity.OPTIONAL) #: The requirement level REQUIRED is mapped to the REQUIRED severity level - REQUIRED = RequirementLevel('REQUIRED', Severity.REQUIRED) + REQUIRED = RequirementLevel("REQUIRED", Severity.REQUIRED) #: The requirement level SHOULD is mapped to the RECOMMENDED severity level - SHOULD = RequirementLevel('SHOULD', Severity.RECOMMENDED) + SHOULD = RequirementLevel("SHOULD", Severity.RECOMMENDED) #: The requirement level SHOULD NOT is mapped to the RECOMMENDED severity level - SHOULD_NOT = RequirementLevel('SHOULD_NOT', Severity.RECOMMENDED) + SHOULD_NOT = RequirementLevel("SHOULD_NOT", Severity.RECOMMENDED) #: The requirement level RECOMMENDED is mapped to the RECOMMENDED severity level - RECOMMENDED = RequirementLevel('RECOMMENDED', Severity.RECOMMENDED) + RECOMMENDED = RequirementLevel("RECOMMENDED", Severity.RECOMMENDED) #: The requirement level MUST is mapped to the REQUIRED severity level - MUST = RequirementLevel('MUST', Severity.REQUIRED) + MUST = RequirementLevel("MUST", Severity.REQUIRED) #: The requirement level MUST_NOT is mapped to the REQUIRED severity level - MUST_NOT = RequirementLevel('MUST_NOT', Severity.REQUIRED) + MUST_NOT = RequirementLevel("MUST_NOT", Severity.REQUIRED) #: The requirement level SHALL is mapped to the REQUIRED severity level - SHALL = RequirementLevel('SHALL', Severity.REQUIRED) + SHALL = RequirementLevel("SHALL", Severity.REQUIRED) #: The requirement level SHALL_NOT is mapped to the REQUIRED severity level - SHALL_NOT = RequirementLevel('SHALL_NOT', Severity.REQUIRED) + SHALL_NOT = RequirementLevel("SHALL_NOT", Severity.REQUIRED) def __init__(self): raise NotImplementedError(f"{type(self)} can't be instantiated") @staticmethod def all() -> list[RequirementLevel]: - return [level for name, level in inspect.getmembers(LevelCollection) - if not inspect.isroutine(level) - and not inspect.isdatadescriptor(level) and not name.startswith('__')] + return [ + level + for name, level in inspect.getmembers(LevelCollection) + if not inspect.isroutine(level) and not inspect.isdatadescriptor(level) and not name.startswith("__") + ] @staticmethod def get(name: str) -> RequirementLevel: @@ -198,7 +201,6 @@ def get(name: str) -> RequirementLevel: @total_ordering class Profile: - """ RO-Crate Validator profile. @@ -206,19 +208,25 @@ class Profile: """ # store the map of profiles: profile URI -> Profile instance - __profiles_map: MultiIndexMap = \ - MultiIndexMap("uri", indexes=[ - MapIndex("name"), MapIndex("token", unique=False), MapIndex("identifier", unique=True), - MapIndex("token_path", unique=False) - ]) - - def __init__(self, - profiles_base_path: Path, - profile_path: Path, - requirements: Optional[list[Requirement]] = None, - identifier: str = None, - publicID: Optional[str] = None, - severity: Severity = Severity.REQUIRED): + __profiles_map: MultiIndexMap = MultiIndexMap( + "uri", + indexes=[ + MapIndex("name"), + MapIndex("token", unique=False), + MapIndex("identifier", unique=True), + MapIndex("token_path", unique=False), + ], + ) + + def __init__( + self, + profiles_base_path: Path, + profile_path: Path, + requirements: Optional[list[Requirement]] = None, + identifier: str = None, + publicID: Optional[str] = None, + severity: Severity = Severity.REQUIRED, + ): """ Initialize the Profile instance @@ -283,11 +291,10 @@ def __init__(self, if existing_profile.path != profile_path: # if the profile already exists, log a warning logger.warning( - "Profile with identifier %s at %s is being overridden " - "by the profile loaded from %s.", + "Profile with identifier %s at %s is being overridden by the profile loaded from %s.", existing_profile.identifier, existing_profile.path, - profile_path + profile_path, ) # add the existing profile as an override self.__add_override__(existing_profile) @@ -295,17 +302,24 @@ def __init__(self, # add the profile to the profiles map self.__profiles_map.add( self._profile_node.toPython(), - self, token=self.token, - name=self.name, identifier=self.identifier, - token_path=self.__extract_token_from_path__() + self, + token=self.token, + name=self.name, + identifier=self.identifier, + token_path=self.__extract_token_from_path__(), ) # add the profile to the profiles map else: raise ProfileSpecificationError( - message=f"Profile specification file {spec_file} must contain exactly one profile") + message=f"Profile specification file {spec_file} must contain exactly one profile" + ) def __get_specification_property__( - self, property: str, namespace: Namespace, - pop_first: bool = True, as_Python_object: bool = True) -> Union[str, list[Union[str, URIRef]]]: + self, + property: str, + namespace: Namespace, + pop_first: bool = True, + as_Python_object: bool = True, + ) -> Union[str, list[Union[str, URIRef]]]: assert self._profile_specification_graph is not None, "Profile specification graph not loaded" values = list(self._profile_specification_graph.objects(self._profile_node, namespace[property])) if values and as_Python_object: @@ -493,23 +507,22 @@ def requirements(self) -> list[Requirement]: The list of requirements of the profile. """ if not self._requirements: - self._requirements = \ - RequirementLoader.load_requirements(self, severity=self.severity) + self._requirements = RequirementLoader.load_requirements(self, severity=self.severity) return self._requirements - def get_requirements( - self, severity: Severity = Severity.REQUIRED, - exact_match: bool = False) -> list[Requirement]: + def get_requirements(self, severity: Severity = Severity.REQUIRED, exact_match: bool = False) -> list[Requirement]: """ Get the requirements of the profile with the given severity level. If the exact_match flag is set to `True`, only the requirements with the exact severity level are returned; otherwise, the requirements with severity level greater than or equal to the given severity level are returned. """ - return [requirement for requirement in self.requirements - if (not exact_match and - (not requirement.severity_from_path or requirement.severity_from_path >= severity)) or - (exact_match and requirement.severity_from_path == severity)] + return [ + requirement + for requirement in self.requirements + if (not exact_match and (not requirement.severity_from_path or requirement.severity_from_path >= severity)) + or (exact_match and requirement.severity_from_path == severity) + ] def get_requirement(self, name: str) -> Optional[Requirement]: """ @@ -564,9 +577,7 @@ def remove_requirement(self, requirement: Requirement): self._requirements.remove(requirement) def __eq__(self, other: object) -> bool: - return isinstance(other, Profile) \ - and self.identifier == other.identifier \ - and self.path == other.path + return isinstance(other, Profile) and self.identifier == other.identifier and self.path == other.path def __lt__(self, other: object) -> bool: if not isinstance(other, Profile): @@ -582,10 +593,9 @@ def __hash__(self) -> int: def __repr__(self) -> str: return ( - f'Profile(identifier={self.identifier}, ' - f'name={self.name}, ' - f'path={self.path}, ' if self.path else '' - f'requirements={self.requirements})' + f"Profile(identifier={self.identifier}, name={self.name}, path={self.path}, " + if self.path + else f"requirements={self.requirements})" ) def __str__(self) -> str: @@ -596,7 +606,7 @@ def to_dict(self) -> dict: "identifier": self.identifier, "uri": self.uri, "name": self.name, - "description": self.description + "description": self.description, } @staticmethod @@ -610,12 +620,16 @@ def __extract_version_from_token__(token: str) -> Optional[str]: return None def __get_consistent_version__(self, candidate_token: str) -> str: - candidates = {_ for _ in [ - self.__get_specification_property__("version", SCHEMA_ORG_NS), - self.__extract_version_from_token__(candidate_token), - self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), - self.__extract_version_from_token__(str(self.uri)) - ] if _ is not None} + candidates = { + _ + for _ in [ + self.__get_specification_property__("version", SCHEMA_ORG_NS), + self.__extract_version_from_token__(candidate_token), + self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), + self.__extract_version_from_token__(str(self.uri)), + ] + if _ is not None + } if len(candidates) > 1: raise ProfileSpecificationError(f"Inconsistent versions found: {candidates}") logger.debug("Candidate versions: %s", candidates) @@ -630,7 +644,7 @@ def __extract_token_from_path__(self) -> str: # Remove the base path from the identifier identifier = identifier.replace(f"{base_path}/", "") # Replace slashes with hyphens - identifier = identifier.replace('/', '-') + identifier = identifier.replace("/", "-") return identifier def __init_token_version__(self) -> Tuple[str, str, str]: @@ -652,10 +666,13 @@ def __init_token_version__(self) -> Tuple[str, str, str]: return candidate_token, version @classmethod - def __load_profile_path__(cls, profiles_base_path: str, - profile_path: Union[str, Path], - publicID: Optional[str] = None, - severity: Severity = Severity.REQUIRED) -> Profile: + def __load_profile_path__( + cls, + profiles_base_path: str, + profile_path: Union[str, Path], + publicID: Optional[str] = None, + severity: Severity = Severity.REQUIRED, + ) -> Profile: # if the path is a string, convert it to a Path if isinstance(profile_path, str): profile_path = Path(profile_path) @@ -663,14 +680,21 @@ def __load_profile_path__(cls, profiles_base_path: str, if not profile_path.is_dir(): raise InvalidProfilePath(profile_path) # create a new profile - profile = Profile(profiles_base_path=profiles_base_path, - profile_path=profile_path, publicID=publicID, severity=severity) + profile = Profile( + profiles_base_path=profiles_base_path, + profile_path=profile_path, + publicID=publicID, + severity=severity, + ) logger.debug("Loaded profile: %s", profile) return profile @classmethod - def __load_profiles_paths__(cls, profiles_path: Union[str, Path] = None, - extra_profiles_path: Union[str, Path] = None) -> list[Tuple[Path, Path]]: + def __load_profiles_paths__( + cls, + profiles_path: Union[str, Path] = None, + extra_profiles_path: Union[str, Path] = None, + ) -> list[Tuple[Path, Path]]: """ Load the paths of the profiles from the given profiles path and extra profiles path. @@ -698,33 +722,47 @@ def __load_profiles_paths__(cls, profiles_path: Union[str, Path] = None, if not root_profile_directory.is_dir(): raise InvalidProfilePath(root_profile_directory) # if the path is a directory, get the profile directories - result.extend([(root_profile_directory, p.parent) - for p in root_profile_directory.rglob('*.*') if p.name == PROFILE_SPECIFICATION_FILE]) + result.extend( + [ + (root_profile_directory, p.parent) + for p in root_profile_directory.rglob("*.*") + if p.name == PROFILE_SPECIFICATION_FILE + ] + ) # return the list of profile directories return result @classmethod - def load_profiles(cls, - profiles_path: Union[str, Path], - extra_profiles_path: Union[str, Path] = None, - publicID: Optional[str] = None, - severity: Severity = Severity.REQUIRED, - allow_requirement_check_override: bool = True) -> list[Profile]: + def load_profiles( + cls, + profiles_path: Union[str, Path], + extra_profiles_path: Union[str, Path] = None, + publicID: Optional[str] = None, + severity: Severity = Severity.REQUIRED, + allow_requirement_check_override: bool = True, + ) -> list[Profile]: # initialize the profiles list profiles = [] # calculate the list of profiles path as the subdirectories of the profiles path # where the profile specification file is present - profiles_paths = cls.__load_profiles_paths__(profiles_path, - extra_profiles_path) + profiles_paths = cls.__load_profiles_paths__(profiles_path, extra_profiles_path) # iterate through the directories and load the profiles for root_profile_path, profile_path in profiles_paths: - logger.debug("Checking profile path: %s %s %r", profile_path, - profile_path.is_dir(), IGNORED_PROFILE_DIRECTORIES) + logger.debug( + "Checking profile path: %s %s %r", + profile_path, + profile_path.is_dir(), + IGNORED_PROFILE_DIRECTORIES, + ) # check if the profile path is a directory and not in the ignored directories if profile_path.is_dir() and profile_path not in IGNORED_PROFILE_DIRECTORIES: profile = Profile.__load_profile_path__( - root_profile_path, profile_path, publicID=publicID, severity=severity) + root_profile_path, + profile_path, + publicID=publicID, + severity=severity, + ) # if the profile overrides another profile, # remove the overridden profiles from the list of profiles # to avoid duplicates and ensure that the most specific profile is used @@ -760,7 +798,10 @@ def load_profiles(cls, # order profiles according to the number of profiles they depend on: # i.e, first the profiles that do not depend on any other profile # then the profiles that depend on the previous ones, and so on - return sorted(profiles, key=lambda x: f"{len(x.inherited_profiles)}_{x.identifier}") + return sorted( + profiles, + key=lambda x: f"{len(x.inherited_profiles)}_{x.identifier}", + ) @classmethod def get_by_identifier(cls, identifier: str) -> Profile: @@ -838,8 +879,7 @@ def all(cls) -> list[Profile]: return cls.__profiles_map.values() @classmethod - def find_in_list(cls, profiles: Collection[Profile], - profile_identifier: str) -> Optional[Profile]: + def find_in_list(cls, profiles: Collection[Profile], profile_identifier: str) -> Optional[Profile]: """ Find a profile with the given identifier in the given list of profiles @@ -852,8 +892,10 @@ def find_in_list(cls, profiles: Collection[Profile], :return: the profile if found, None otherwise :rtype: Optional[Profile] """ - profile = next((p for p in profiles if p.identifier == profile_identifier), None) or \ - next((p for p in profiles if str(p.identifier).replace(f"-{p.version}", '') == profile_identifier), None) + profile = next((p for p in profiles if p.identifier == profile_identifier), None) or next( + (p for p in profiles if str(p.identifier).replace(f"-{p.version}", "") == profile_identifier), + None, + ) if not profile: raise ProfileNotFound(profile_identifier) return profile @@ -875,12 +917,14 @@ class Requirement(ABC): A requirement is a named set of checks that can be used to validate an RO-Crate. """ - def __init__(self, - profile: Profile, - name: str = "", - description: Optional[str] = None, - path: Optional[Path] = None, - initialize_checks: bool = True): + def __init__( + self, + profile: Profile, + name: str = "", + description: Optional[str] = None, + path: Optional[Path] = None, + initialize_checks: bool = True, + ): """ Initialize the Requirement instance @@ -956,7 +1000,10 @@ def requirement_level_from_path(self) -> RequirementLevel: try: self._level_from_path = LevelCollection.get(self._path.parent.name) except ValueError: - logger.debug("The requirement level could not be determined from the path: %s", self._path) + logger.debug( + "The requirement level could not be determined from the path: %s", + self._path, + ) return self._level_from_path @property @@ -966,8 +1013,9 @@ def profile(self) -> Profile: @property def description(self) -> str: if not self._description: - self._description = self.__class__.__doc__.strip( - ) if self.__class__.__doc__ else f"Profile Requirement {self.name}" + self._description = ( + self.__class__.__doc__.strip() if self.__class__.__doc__ else f"Profile Requirement {self.name}" + ) return self._description @property @@ -1014,45 +1062,74 @@ def _do_validate_(self, context: ValidationContext) -> bool: :meta private: """ - logger.debug("Validating Requirement %s with %s checks", self.name, len(self._checks)) + logger.debug( + "Validating Requirement %s with %s checks", + self.name, + len(self._checks), + ) - logger.debug("Running %s checks for Requirement '%s'", len(self._checks), self.name) + logger.debug( + "Running %s checks for Requirement '%s'", + len(self._checks), + self.name, + ) all_passed = True checks_to_perform = [ - _ for _ in self._checks - if not context.settings.skip_checks - or _.identifier not in context.settings.skip_checks + _ + for _ in self._checks + if not context.settings.skip_checks or _.identifier not in context.settings.skip_checks ] for check in checks_to_perform: try: if check.overridden and not check.requirement.profile.identifier == context.profile_identifier: - logger.debug("Skipping check '%s' because overridden by '%r'", - check.identifier, [_.identifier for _ in check.overridden_by]) + logger.debug( + "Skipping check '%s' because overridden by '%r'", + check.identifier, + [_.identifier for _ in check.overridden_by], + ) continue # Determine whether to skip event notification for inherited profiles skip_event_notify = False - if check.requirement.profile.identifier != context.profile_identifier and \ - context.settings.disable_inherited_profiles_issue_reporting: - logger.debug("Inherited profiles reporting disabled. " - "Skipping requirement %s as it belongs to an inherited profile %s", - check.requirement.identifier, check.requirement.profile.identifier) + if ( + check.requirement.profile.identifier != context.profile_identifier + and context.settings.disable_inherited_profiles_issue_reporting + ): + logger.debug( + "Inherited profiles reporting disabled. " + "Skipping requirement %s as it belongs to an inherited profile %s", + check.requirement.identifier, + check.requirement.profile.identifier, + ) skip_event_notify = True # Notify the start of the check execution if not skip_event_notify is set to True if not skip_event_notify: - context.validator.notify(RequirementCheckValidationEvent( - EventType.REQUIREMENT_CHECK_VALIDATION_START, check)) + context.validator.notify( + RequirementCheckValidationEvent(EventType.REQUIREMENT_CHECK_VALIDATION_START, check) + ) # Execute the check check_result = check.execute_check(context) logger.debug("Result of check %s: %s", check.identifier, check_result) context.result._add_executed_check(check, check_result) # Notify the end of the check execution if not skip_event_notify is set to True if not skip_event_notify: - context.validator.notify(RequirementCheckValidationEvent( - EventType.REQUIREMENT_CHECK_VALIDATION_END, check, validation_result=check_result)) - logger.debug("Ran check '%s'. Got result %s", check.identifier, check_result) + context.validator.notify( + RequirementCheckValidationEvent( + EventType.REQUIREMENT_CHECK_VALIDATION_END, + check, + validation_result=check_result, + ) + ) + logger.debug( + "Ran check '%s'. Got result %s", + check.identifier, + check_result, + ) # Ensure the check result is a boolean if not isinstance(check_result, bool): - logger.warning("Ignoring the check %s as it returned the value %r instead of a boolean", check.name) + logger.warning( + "Ignoring the check %s as it returned the value %r instead of a boolean", + check.name, + ) raise RuntimeError(f"Ignoring invalid result from check {check.name}") # Aggregate the check result all_passed = all_passed and check_result @@ -1070,15 +1147,17 @@ def _do_validate_(self, context: ValidationContext) -> bool: logger.exception(e) skipped_checks = set(self._checks) - set(checks_to_perform) context.result.skipped_checks.update(skipped_checks) - logger.debug("Checks for Requirement '%s' completed. Checks passed? %s", self.name, all_passed) + logger.debug( + "Checks for Requirement '%s' completed. Checks passed? %s", + self.name, + all_passed, + ) return all_passed def __eq__(self, other: object) -> bool: if not isinstance(other, Requirement): raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - return self.name == other.name \ - and self.description == other.description \ - and self.path == other.path + return self.name == other.name and self.description == other.description and self.path == other.path def __ne__(self, other: object) -> bool: return not self.__eq__(other) @@ -1089,16 +1168,20 @@ def __hash__(self): def __lt__(self, other: object) -> bool: if not isinstance(other, Requirement): raise ValueError(f"Cannot compare Requirement with {type(other)}") - return (self._order_number, self.name) < (other._order_number, other.name) + return (self._order_number, self.name) < ( + other._order_number, + other.name, + ) def __repr__(self): return ( - f'ProfileRequirement(' - f'_order_number={self._order_number}, ' - f'name={self.name}, ' - f'description={self.description}' - f', path={self.path}, ' if self.path else '' - ')' + f"ProfileRequirement(" + f"_order_number={self._order_number}, " + f"name={self.name}, " + f"description={self.description}" + f", path={self.path}, " + if self.path + else ")" ) def __str__(self) -> str: @@ -1109,7 +1192,7 @@ def to_dict(self, with_profile: bool = True, with_checks: bool = True) -> dict: "identifier": self.identifier, "name": self.name, "description": self.description, - "order": self.order_number + "order": self.order_number, } if with_profile: result["profile"] = self.profile.to_dict() @@ -1268,12 +1351,14 @@ def ok_file(p: Path) -> bool: @total_ordering class RequirementCheck(ABC): - def __init__(self, - requirement: Requirement, - name: str, - level: Optional[RequirementLevel] = LevelCollection.REQUIRED, - description: Optional[str] = None, - hidden: Optional[bool] = None): + def __init__( + self, + requirement: Requirement, + name: str, + level: Optional[RequirementLevel] = LevelCollection.REQUIRED, + description: Optional[str] = None, + hidden: Optional[bool] = None, + ): self._requirement: Requirement = requirement self._order_number = 0 self._name = name @@ -1317,9 +1402,7 @@ def requirement(self) -> Requirement: @property def level(self) -> RequirementLevel: - return self._level or \ - self.requirement.requirement_level_from_path or \ - LevelCollection.REQUIRED + return self._level or self.requirement.requirement_level_from_path or LevelCollection.REQUIRED @property def severity(self) -> Severity: @@ -1364,7 +1447,7 @@ def to_dict(self, with_requirement: bool = True, with_profile: bool = True) -> d "order": self.order_number, "name": self.name, "description": self.description, - "severity": self.severity.name + "severity": self.severity.name, } if with_requirement: result["requirement"] = self.requirement.to_dict(with_profile=with_profile, with_checks=False) @@ -1378,7 +1461,10 @@ def __eq__(self, other: object) -> bool: def __lt__(self, other: object) -> bool: if not isinstance(other, RequirementCheck): raise ValueError(f"Cannot compare RequirementCheck with {type(other)}") - return (self.requirement, self.identifier) < (other.requirement, other.identifier) + return (self.requirement, self.identifier) < ( + other.requirement, + other.identifier, + ) def __ne__(self, other: object) -> bool: return not self.__eq__(other) @@ -1394,12 +1480,14 @@ class CheckIssue: during the validation process. """ - def __init__(self, - check: RequirementCheck, - message: Optional[str] = None, - violatingProperty: Optional[str] = None, - violatingEntity: Optional[str] = None, - value: Optional[str] = None): + def __init__( + self, + check: RequirementCheck, + message: Optional[str] = None, + violatingProperty: Optional[str] = None, + violatingEntity: Optional[str] = None, + value: Optional[str] = None, + ): self._message = message self._check: RequirementCheck = check self._violatingProperty = violatingProperty @@ -1464,9 +1552,7 @@ def violatingPropertyValue(self) -> Optional[str]: return self._propertyValue def __eq__(self, other: object) -> bool: - return isinstance(other, CheckIssue) and \ - self._check == other._check and \ - self._message == other._message + return isinstance(other, CheckIssue) and self._check == other._check and self._message == other._message def __lt__(self, other: object) -> bool: if not isinstance(other, CheckIssue): @@ -1477,34 +1563,43 @@ def __hash__(self) -> int: return hash((self._check, self._message)) def __repr__(self) -> str: - return f'CheckIssue(severity={self.severity}, check={self.check}, message={self.message})' + return f"CheckIssue(severity={self.severity}, check={self.check}, message={self.message})" def __str__(self) -> str: - return f"Issue of severity {self.severity.name} with check \"{self.check.identifier}\": {self.message}" - - def to_dict(self, with_check: bool = True, - with_requirement: bool = True, with_profile: bool = True) -> dict: + return f'Issue of severity {self.severity.name} with check "{self.check.identifier}": {self.message}' + + def to_dict( + self, + with_check: bool = True, + with_requirement: bool = True, + with_profile: bool = True, + ) -> dict: result = { "severity": self.severity.name, "message": self.message, "violatingEntity": self.violatingEntity, "violatingProperty": self.violatingProperty, - "violatingPropertyValue": self.violatingPropertyValue + "violatingPropertyValue": self.violatingPropertyValue, } if with_check: result["check"] = self.check.to_dict(with_requirement=with_requirement, with_profile=with_profile) return result - def to_json(self, - with_checks: bool = True, - with_requirements: bool = True, - with_profile: bool = True) -> str: + def to_json( + self, + with_checks: bool = True, + with_requirements: bool = True, + with_profile: bool = True, + ) -> str: return json.dumps( self.to_dict( with_check=with_checks, with_requirement=with_requirements, - with_profile=with_profile - ), indent=4, cls=CustomEncoder) + with_profile=with_profile, + ), + indent=4, + cls=CustomEncoder, + ) class ValidationStatisticsListener(Protocol): @@ -1521,9 +1616,12 @@ class ValidationStatistics(Subscriber): Computes and stores statistical metrics about the RO-Crate validation process. """ - def __init__(self, settings: Union[dict, ValidationSettings], - context: Optional[ValidationContext] = None, - skip_initialization: bool = False): + def __init__( + self, + settings: Union[dict, ValidationSettings], + context: Optional[ValidationContext] = None, + skip_initialization: bool = False, + ): if isinstance(settings, dict): settings = ValidationSettings.parse(settings) self._settings = settings @@ -1717,7 +1815,8 @@ def __initialise__(cls, validation_settings: ValidationSettings): validation_settings.profiles_path, extra_profiles_path=validation_settings.extra_profiles_path, severity=severity_validation, - allow_requirement_check_override=validation_settings.allow_requirement_check_override) + allow_requirement_check_override=validation_settings.allow_requirement_check_override, + ) profile: Profile = Profile.find_in_list(profiles, validation_settings.profile_identifier) target_profile_identifier = profile.identifier # initialize the profiles list @@ -1734,7 +1833,11 @@ def __initialise__(cls, validation_settings: ValidationSettings): requirements: set[Requirement] = set() # Initialize the counters - for severity in (Severity.REQUIRED, Severity.RECOMMENDED, Severity.OPTIONAL): + for severity in ( + Severity.REQUIRED, + Severity.RECOMMENDED, + Severity.OPTIONAL, + ): checks_by_severity[severity] = set() # Process the requirements and checks @@ -1748,26 +1851,23 @@ def __initialise__(cls, validation_settings: ValidationSettings): continue requirement_checks_count = 0 - for severity in (Severity.REQUIRED, Severity.RECOMMENDED, Severity.OPTIONAL): + for severity in ( + Severity.REQUIRED, + Severity.RECOMMENDED, + Severity.OPTIONAL, + ): logger.debug( - f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}") + f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}" + ) # skip requirements with lower severity if severity < severity_validation: continue # count the checks requirement_checks = [ _ - for _ in requirement.get_checks_by_level( - LevelCollection.get(severity.name) - ) - if ( - not validation_settings.skip_checks - or _.identifier not in validation_settings.skip_checks - ) - and ( - not _.overridden - or _.requirement.profile.identifier == target_profile_identifier - ) + for _ in requirement.get_checks_by_level(LevelCollection.get(severity.name)) + if (not validation_settings.skip_checks or _.identifier not in validation_settings.skip_checks) + and (not _.overridden or _.requirement.profile.identifier == target_profile_identifier) ] num_checks = len(requirement_checks) requirement_checks_count += num_checks @@ -1787,7 +1887,11 @@ def __initialise__(cls, validation_settings: ValidationSettings): requirements.add(requirement) # log processed requirements - logger.debug("Processed requirements %r: %r", len(processed_requirements), processed_requirements) + logger.debug( + "Processed requirements %r: %r", + len(processed_requirements), + processed_requirements, + ) # Prepare the result result = { @@ -1805,7 +1909,7 @@ def __initialise__(cls, validation_settings: ValidationSettings): "finished_at": None, "validated_profiles": [], "validated_requirements": [], - "validated_checks": [] + "validated_checks": [], } logger.debug(result) return result @@ -1823,9 +1927,10 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: logger.debug("Requirement check validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: target_profile = ctx.target_validation_profile - if not event.requirement_check.requirement.hidden and \ - (not event.requirement_check.overridden - or target_profile.identifier == event.requirement_check.requirement.profile.identifier): + if not event.requirement_check.requirement.hidden and ( + not event.requirement_check.overridden + or target_profile.identifier == event.requirement_check.requirement.profile.identifier + ): if event.validation_result is not None: if event.validation_result: self._stats["passed_checks"].append(event.requirement_check) @@ -1834,10 +1939,15 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: self._stats["validated_checks"].append(event.requirement_check) self.notify_listeners() else: - logger.debug("Requirement check validation result is None: %s", - event.requirement_check.identifier) + logger.debug( + "Requirement check validation result is None: %s", + event.requirement_check.identifier, + ) else: - logger.debug("Skipping requirement check validation: %s", event.requirement_check.identifier) + logger.debug( + "Skipping requirement check validation: %s", + event.requirement_check.identifier, + ) elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: if not event.requirement.hidden: if event.validation_result: @@ -1863,12 +1973,10 @@ def to_dict(self) -> dict: "started_at": self.started_at.isoformat() if self.started_at else None, "finished_at": self.finished_at.isoformat() if self.finished_at else None, "duration": self.duration, - # Profile details "profile": self.profile.identifier if self.profile else None, "profiles": [p.identifier for p in self.profiles], "severity": self.severity.name if self.severity else None, - # Computed totals "total_requirements": self.total_requirements, "total_passed_requirements": len(self.passed_requirements), @@ -1877,43 +1985,45 @@ def to_dict(self) -> dict: "total_passed_checks": len(self.passed_checks), "total_failed_checks": len(self.failed_checks), "total_checks_by_severity": {k.name: len(v) for k, v in self.checks_by_severity.items()}, - # Requirements involved "requirements": { "count": self.total_requirements, "passed": { "count": len(self.passed_requirements), - "percentage": (len(self.passed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 else 0.0, - "identifiers": sorted([r.identifier for r in self.passed_requirements]) + "percentage": ( + (len(self.passed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": sorted([r.identifier for r in self.passed_requirements]), }, "failed": { "count": len(self.failed_requirements), - "percentage": (len(self.failed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 else 0.0, - "identifiers": sorted([r.identifier for r in self.failed_requirements]) + "percentage": ( + (len(self.failed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": sorted([r.identifier for r in self.failed_requirements]), }, - "identifiers": sorted([r.identifier for r in self.requirements]) + "identifiers": sorted([r.identifier for r in self.requirements]), }, - # Checks involved "checks": { "count": self.total_checks, "passed": { "count": len(self.passed_checks), - "percentage": (len(self.passed_checks) / self.total_checks * 100) - if self.total_checks > 0 else 0.0, - "identifiers": sorted([c.identifier for c in self.passed_checks]) + "percentage": (len(self.passed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": sorted([c.identifier for c in self.passed_checks]), }, "failed": { "count": len(self.failed_checks), - "percentage": (len(self.failed_checks) / self.total_checks * 100) - if self.total_checks > 0 else 0.0, - "identifiers": sorted([c.identifier for c in self.failed_checks]) + "percentage": (len(self.failed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": sorted([c.identifier for c in self.failed_checks]), }, "identifiers": sorted([c.identifier for c in self.checks]), - "by_severity": {k.name: len(v) for k, v in self._stats.get("checks_by_severity", {}).items()} - } + "by_severity": {k.name: len(v) for k, v in self._stats.get("checks_by_severity", {}).items()}, + }, } def to_json(self) -> str: @@ -1953,10 +2063,8 @@ def to_dict(self) -> dict: "started_at": self.started_at.isoformat() if self.started_at else None, "finished_at": self.finished_at.isoformat() if self.finished_at else None, "duration": self.duration, - # Profiles involved "profiles": [p.identifier for p in self.profiles], - # Computed totals "total_requirements": self.total_requirements, "total_passed_requirements": len(self.passed_requirements), @@ -1965,40 +2073,43 @@ def to_dict(self) -> dict: "total_passed_checks": len(self.passed_checks), "total_failed_checks": len(self.failed_checks), "total_checks_by_severity": {k.name: len(v) for k, v in self.checks_by_severity.items()}, - # Requirements involved "requirements": { "count": self.total_requirements, "passed": { "count": len(self.passed_requirements), - "percentage": (len(self.passed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 else 0.0, - "identifiers": [r.identifier for r in self.passed_requirements] + "percentage": ( + (len(self.passed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": [r.identifier for r in self.passed_requirements], }, "failed": { "count": len(self.failed_requirements), - "percentage": (len(self.failed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 else 0.0, - "identifiers": [r.identifier for r in self.failed_requirements] + "percentage": ( + (len(self.failed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": [r.identifier for r in self.failed_requirements], }, - "identifiers": [r.identifier for r in self.requirements] + "identifiers": [r.identifier for r in self.requirements], }, # Checks involved "checks": { "count": self.total_checks, "passed": { "count": len(self.passed_checks), - "percentage": (len(self.passed_checks) / self.total_checks * 100) - if self.total_checks > 0 else 0.0, - "identifiers": [c.identifier for c in self.passed_checks] + "percentage": (len(self.passed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": [c.identifier for c in self.passed_checks], }, "failed": { "count": len(self.failed_checks), - "percentage": (len(self.failed_checks) / self.total_checks * 100) - if self.total_checks > 0 else 0.0, - "identifiers": [c.identifier for c in self.failed_checks] + "percentage": (len(self.failed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": [c.identifier for c in self.failed_checks], }, - "identifiers": [c.identifier for c in self.checks] + "identifiers": [c.identifier for c in self.checks], }, } @@ -2116,7 +2227,7 @@ def __compute_averall_stats__(self): "passed_checks": set(), "started_at": None, "finished_at": None, - "duration": 0.0 + "duration": 0.0, } # Aggregate statistics from each ValidationStatistics instance @@ -2137,10 +2248,12 @@ def __compute_averall_stats__(self): result["passed_checks"].update(stats.passed_checks) # Aggregate started_at and finished_at - result["started_at"] = min(result["started_at"], stats.started_at) \ - if result["started_at"] else stats.started_at - result["finished_at"] = max(result["finished_at"], stats.finished_at) \ - if result["finished_at"] else stats.finished_at + result["started_at"] = ( + min(result["started_at"], stats.started_at) if result["started_at"] else stats.started_at + ) + result["finished_at"] = ( + max(result["finished_at"], stats.finished_at) if result["finished_at"] else stats.finished_at + ) # Aggregate duration result["duration"] += stats.duration or 0.0 @@ -2148,8 +2261,9 @@ def __compute_averall_stats__(self): result["profiles"] = sorted(result["profiles"], key=lambda p: p.identifier) result["requirements"] = sorted(result["requirements"], key=lambda r: r.identifier) result["checks"] = sorted(result["checks"], key=lambda c: c.identifier) - result["checks_by_severity"] = {k: sorted(v, key=lambda c: c.identifier) - for k, v in result["checks_by_severity"].items()} + result["checks_by_severity"] = { + k: sorted(v, key=lambda c: c.identifier) for k, v in result["checks_by_severity"].items() + } result["failed_requirements"] = sorted(result["failed_requirements"], key=lambda r: r.identifier) result["failed_checks"] = sorted(result["failed_checks"], key=lambda c: c.identifier) result["passed_requirements"] = sorted(result["passed_requirements"], key=lambda r: r.identifier) @@ -2217,6 +2331,7 @@ def statistics(self) -> ValidationStatistics: The validation statistics """ return self._statistics + # --- Checks --- @property @@ -2277,9 +2392,7 @@ def get_issues(self, min_severity: Optional[Severity] = None) -> list[CheckIssue min_severity = min_severity or self.context.requirement_severity return [issue for issue in self._issues if issue.severity >= min_severity] - def get_issues_by_check(self, - check: RequirementCheck, - min_severity: Severity = None) -> list[CheckIssue]: + def get_issues_by_check(self, check: RequirementCheck, min_severity: Severity = None) -> list[CheckIssue]: """ Get the issues found during the validation for a specific check with a severity greater than or equal to `min_severity` @@ -2304,12 +2417,14 @@ def passed(self, min_severity: Optional[Severity] = None) -> bool: min_severity = min_severity or self.context.requirement_severity return not any(issue.severity >= min_severity for issue in self._issues) - def add_issue(self, - message: str, - check: RequirementCheck, - violatingEntity: Optional[str] = None, - violatingProperty: Optional[str] = None, - violatingPropertyValue: Optional[str] = None) -> CheckIssue: + def add_issue( + self, + message: str, + check: RequirementCheck, + violatingEntity: Optional[str] = None, + violatingProperty: Optional[str] = None, + violatingPropertyValue: Optional[str] = None, + ) -> CheckIssue: """ Add an issue to the validation result @@ -2320,8 +2435,13 @@ def add_issue(self, violatingProperty(Optional[str]): The property that caused the issue (if any) violatingPropertyValue(Optional[str]): The value of the violatingProperty (if any) """ - c = CheckIssue(check, message, violatingProperty=violatingProperty, - violatingEntity=violatingEntity, value=violatingPropertyValue) + c = CheckIssue( + check, + message, + violatingProperty=violatingProperty, + violatingEntity=violatingEntity, + value=violatingPropertyValue, + ) bisect.insort(self._issues, c) return c @@ -2348,13 +2468,14 @@ def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collecti return [check for check in self.failed_checks if check.requirement == requirement] def get_failed_checks_by_requirement_and_severity( - self, requirement: Requirement, severity: Severity) -> Collection[RequirementCheck]: + self, requirement: Requirement, severity: Severity + ) -> Collection[RequirementCheck]: """ Get the checks that failed for a specific requirement and severity """ - return [check for check in self.failed_checks - if check.requirement == requirement - and check.severity == severity] + return [ + check for check in self.failed_checks if check.requirement == requirement and check.severity == severity + ] def __str__(self) -> str: return f"Validation result: passed={len(self.failed_checks) == 0}, {len(self._issues)} issues" @@ -2371,17 +2492,20 @@ def to_dict(self) -> dict: """ Convert the ValidationResult to a dictionary """ - allowed_properties = ["profile_identifier", "enable_profile_inheritance", - "requirement_severity", "abort_on_first"] - validation_settings = {key: value for key, value in self.validation_settings.to_dict().items() - if key in allowed_properties} + allowed_properties = [ + "profile_identifier", + "enable_profile_inheritance", + "requirement_severity", + "abort_on_first", + ] + validation_settings = { + key: value for key, value in self.validation_settings.to_dict().items() if key in allowed_properties + } result = { - "meta": { - "version": JSON_OUTPUT_FORMAT_VERSION - }, + "meta": {"version": JSON_OUTPUT_FORMAT_VERSION}, "validation_settings": validation_settings, "passed": self.passed(self.context.settings.requirement_severity), - "issues": [issue.to_dict() for issue in self.issues] + "issues": [issue.to_dict() for issue in self.issues], } # add validator version to the settings result["validation_settings"]["rocrate_validator_version"] = __version__ @@ -2423,6 +2547,7 @@ class ValidationSettings: It includes the following attributes: """ + #: The URI of the RO-Crate rocrate_uri: URI #: The relative root path of the RO-Crate @@ -2480,20 +2605,23 @@ def __post_init__(self): self.requirement_severity = Severity[self.requirement_severity] # initialize the HTTP cache HttpRequester.initialize_cache(cache_path=self.cache_path, cache_max_age=self.cache_max_age) - logger.debug("HTTP cache initialized at %s with max age %s seconds", - self.cache_path, self.cache_max_age) + logger.debug( + "HTTP cache initialized at %s with max age %s seconds", + self.cache_path, + self.cache_max_age, + ) def to_dict(self): """ Convert the ValidationSettings to a dictionary """ result = asdict(self) - result['rocrate_uri'] = str(self.rocrate_uri) - result.pop('metadata_dict', None) # exclude metadata_dict from the dict representation + result["rocrate_uri"] = str(self.rocrate_uri) + result.pop("metadata_dict", None) # exclude metadata_dict from the dict representation # Remove disable_crate_download from the dict representation - result.pop('disable_remote_crate_download', None) + result.pop("disable_remote_crate_download", None) # Remove requirement_severity_only from the dict representation - result.pop('requirement_severity_only', None) + result.pop("requirement_severity_only", None) return result @property @@ -2540,8 +2668,12 @@ def parse(cls, settings: Union[dict, ValidationSettings]) -> ValidationSettings: class ValidationEvent(Event): - def __init__(self, event_type: EventType, - validation_result: Optional[ValidationResult] = None, message: Optional[str] = None): + def __init__( + self, + event_type: EventType, + validation_result: Optional[ValidationResult] = None, + message: Optional[str] = None, + ): super().__init__(event_type, message) self._validation_result = validation_result @@ -2551,8 +2683,16 @@ def validation_result(self) -> Optional[ValidationResult]: class ProfileValidationEvent(Event): - def __init__(self, event_type: EventType, profile: Profile, message: Optional[str] = None): - assert event_type in (EventType.PROFILE_VALIDATION_START, EventType.PROFILE_VALIDATION_END) + def __init__( + self, + event_type: EventType, + profile: Profile, + message: Optional[str] = None, + ): + assert event_type in ( + EventType.PROFILE_VALIDATION_START, + EventType.PROFILE_VALIDATION_END, + ) super().__init__(event_type, message) self._profile = profile @@ -2579,12 +2719,17 @@ def __hash__(self) -> int: class RequirementValidationEvent(Event): - def __init__(self, - event_type: EventType, - requirement: Requirement, - validation_result: Optional[bool] = None, - message: Optional[str] = None): - assert event_type in (EventType.REQUIREMENT_VALIDATION_START, EventType.REQUIREMENT_VALIDATION_END) + def __init__( + self, + event_type: EventType, + requirement: Requirement, + validation_result: Optional[bool] = None, + message: Optional[str] = None, + ): + assert event_type in ( + EventType.REQUIREMENT_VALIDATION_START, + EventType.REQUIREMENT_VALIDATION_END, + ) super().__init__(event_type, message) self._requirement = requirement self._validation_result = validation_result @@ -2616,10 +2761,17 @@ def __hash__(self) -> int: class RequirementCheckValidationEvent(Event): - def __init__(self, event_type: EventType, - requirement_check: RequirementCheck, - validation_result: Optional[bool] = None, message: Optional[str] = None): - assert event_type in (EventType.REQUIREMENT_CHECK_VALIDATION_START, EventType.REQUIREMENT_CHECK_VALIDATION_END) + def __init__( + self, + event_type: EventType, + requirement_check: RequirementCheck, + validation_result: Optional[bool] = None, + message: Optional[str] = None, + ): + assert event_type in ( + EventType.REQUIREMENT_CHECK_VALIDATION_START, + EventType.REQUIREMENT_CHECK_VALIDATION_END, + ) super().__init__(event_type, message) self._requirement_check = requirement_check self._validation_result = validation_result @@ -2636,8 +2788,9 @@ def __str__(self) -> str: return f"RequirementCheckValidationEvent({self.event_type}, {self.requirement_check})" def __repr__(self) -> str: - return f"RequirementCheckValidationEvent(event_type={self.event_type}, " \ - f"requirement_check={self.requirement_check})" + return ( + f"RequirementCheckValidationEvent(event_type={self.event_type}, requirement_check={self.requirement_check})" + ) def __eq__(self, other: object) -> bool: if not isinstance(other, RequirementCheckValidationEvent): @@ -2710,7 +2863,8 @@ def detect_rocrate_profiles(self) -> list[Profile]: context.profiles_path, extra_profiles_path=context.extra_profiles_path, publicID=context.publicID, - severity=context.requirement_severity) + severity=context.requirement_severity, + ) profiles = [p for p in available_profiles if p.uri in candidate_profiles_uris] # get the candidate profiles for profile in profiles: @@ -2719,7 +2873,11 @@ def detect_rocrate_profiles(self) -> list[Profile]: for inherited_profile in inherited_profiles: if inherited_profile in candidate_profiles: candidate_profiles.remove(inherited_profile) - logger.debug("%d Candidate Profiles found: %s", len(candidate_profiles), candidate_profiles) + logger.debug( + "%d Candidate Profiles found: %s", + len(candidate_profiles), + candidate_profiles, + ) # unmatched candidate profiles unmatched_profiles = candidate_profiles_uris.difference(set(p.uri for p in profiles)) logger.debug("Unmatched Candidate Profiles URIs: %s", unmatched_profiles) @@ -2745,13 +2903,11 @@ def validate_requirements(self, requirements: list[Requirement]) -> ValidationRe """ Validates the RO-Crate against the specified subset of the profile requirements """ - assert all(isinstance(requirement, Requirement) for requirement in requirements), \ - "Invalid requirement type" + assert all(isinstance(requirement, Requirement) for requirement in requirements), "Invalid requirement type" # perform the requirements validation return self.__do_validate__(requirements) - def __do_validate__(self, - requirements: Optional[list[Requirement]] = None) -> ValidationResult: + def __do_validate__(self, requirements: Optional[list[Requirement]] = None) -> ValidationResult: # initialize the validation context context = ValidationContext(self, self.validation_settings) @@ -2761,31 +2917,59 @@ def __do_validate__(self, # initialize the requirement types self.__invoke_pre_validation_hooks__(context) + try: # set the profiles to validate against profiles = context.profiles assert len(profiles) > 0, "No profiles to validate" self.notify(EventType.VALIDATION_START) for profile in profiles: - logger.debug("Validating profile %s (id: %s)", profile.name, profile.identifier) + logger.debug( + "Validating profile %s (id: %s)", + profile.name, + profile.identifier, + ) # set the target profile in the context context._target_validation_profile = profile self.notify(ProfileValidationEvent(EventType.PROFILE_VALIDATION_START, profile=profile)) # perform the requirements validation requirements = profile.get_requirements( - context.requirement_severity, exact_match=context.requirement_severity_only) - logger.debug("Validating profile %s with %s requirements", profile.identifier, len(requirements)) - logger.debug("For profile %s, validating these %s requirements: %s", - profile.identifier, len(requirements), requirements) + context.requirement_severity, + exact_match=context.requirement_severity_only, + ) + logger.debug( + "Validating profile %s with %s requirements", + profile.identifier, + len(requirements), + ) + logger.debug( + "For profile %s, validating these %s requirements: %s", + profile.identifier, + len(requirements), + requirements, + ) terminate = False for requirement in requirements: if not requirement.overridden: - self.notify(RequirementValidationEvent( - EventType.REQUIREMENT_VALIDATION_START, requirement=requirement)) + self.notify( + RequirementValidationEvent( + EventType.REQUIREMENT_VALIDATION_START, + requirement=requirement, + ) + ) passed = requirement._do_validate_(context) - logger.debug("Requirement %s passed: %s", requirement.identifier, passed) + logger.debug( + "Requirement %s passed: %s", + requirement.identifier, + passed, + ) if not requirement.overridden: - self.notify(RequirementValidationEvent( - EventType.REQUIREMENT_VALIDATION_END, requirement=requirement, validation_result=passed)) + self.notify( + RequirementValidationEvent( + EventType.REQUIREMENT_VALIDATION_END, + requirement=requirement, + validation_result=passed, + ) + ) if passed: logger.debug("Validation Requirement passed") else: @@ -2823,7 +3007,7 @@ def __invoke_post_validation_hooks__(self, context: ValidationContext): logger.debug("Finalizing requirement types: completed") def notify(self, event: Union[Event, EventType]): - """ Override notify to update statistics """ + """Override notify to update statistics""" assert self.__current_context__ is not None, "No current validation context" result: ValidationResult = self.__current_context__.result if isinstance(event, EventType): @@ -2857,8 +3041,10 @@ def __init__(self, validator: Validator, settings: ValidationSettings): if settings.metadata_dict: self._rocrate = ROCrate.from_metadata_dict(settings.metadata_dict) else: - self._rocrate = ROCrate.new_instance(settings.rocrate_uri, - relative_root_path=settings.rocrate_relative_root_path) + self._rocrate = ROCrate.new_instance( + settings.rocrate_uri, + relative_root_path=settings.rocrate_relative_root_path, + ) assert isinstance(self._rocrate, ROCrate), "Invalid RO-Crate instance" @property @@ -2997,8 +3183,11 @@ def rel_fd_path(self) -> Path: def __load_data_graph__(self) -> Graph: data_graph = Graph() logger.debug("Loading RO-Crate metadata of: %s", self.ro_crate.uri) - _ = data_graph.parse(data=self.ro_crate.metadata.as_dict(), - format="json-ld", publicID=self.publicID) + _ = data_graph.parse( + data=self.ro_crate.metadata.as_dict(), + format="json-ld", + publicID=self.publicID, + ) logger.debug("RO-Crate metadata loaded: %s", data_graph) return data_graph @@ -3083,7 +3272,8 @@ def __load_profiles__(self) -> list[Profile]: extra_profiles_path=self.settings.extra_profiles_path, publicID=self.publicID, severity=self.requirement_severity, - allow_requirement_check_override=self.allow_requirement_check_override) + allow_requirement_check_override=self.allow_requirement_check_override, + ) # Check if the target profile is in the list of profiles profile = Profile.get_by_identifier(self.profile_identifier) @@ -3104,7 +3294,8 @@ def __load_profiles__(self) -> list[Profile]: logger.exception(e) raise ProfileNotFound( self.profile_identifier, - message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'") from e + message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", + ) from e # if the inheritance is enabled, return only the target profile if not self.inheritance_enabled: diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index 3133c11cd..f5245a0ed 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -35,16 +35,11 @@ class SHACLRequirement(Requirement): - - def __init__(self, - shape: Shape, - profile: Profile, - path: Path): + def __init__(self, shape: Shape, profile: Profile, path: Path): self._shape = shape - super().__init__(profile, - shape.name if shape.name else "", - shape.description if shape.description else "", - path) + super().__init__( + profile, shape.name if shape.name else "", shape.description if shape.description else "", path + ) # init checks self._checks = self.__init_checks__() # assign check IDs @@ -65,8 +60,15 @@ def __init_checks__(self) -> list[RequirementCheck]: # check if the shape has nested properties has_properties = hasattr(self.shape, "properties") and len(self.shape.properties) > 0 # create a check for the shape itself, hidden if the shape has nested properties - checks.append(SHACLCheck(self, self.shape, name=f"Check {self.shape.name}" if has_properties else None, - hidden=has_properties, root=True)) + checks.append( + SHACLCheck( + self, + self.shape, + name=f"Check {self.shape.name}" if has_properties else None, + hidden=has_properties, + root=True, + ) + ) # create a check for each property if the shape has nested properties if has_properties: for prop in self.shape.properties: @@ -159,9 +161,9 @@ def __init__(self, profile: Profile): def shapes_registry(self) -> ShapesRegistry: return self._shape_registry - def load(self, profile: Profile, - requirement_level: RequirementLevel, - file_path: Path, publicID: Optional[str] = None) -> list[Requirement]: + def load( + self, profile: Profile, requirement_level: RequirementLevel, file_path: Path, publicID: Optional[str] = None + ) -> list[Requirement]: assert file_path is not None, "The file path cannot be None" shapes: list[Shape] = self.shapes_registry.load_shapes(file_path, publicID) logger.debug("Loaded %s shapes: %s", len(shapes), shapes) diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index e9f33795e..d0dad53f3 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -16,11 +16,11 @@ import os import pytest -from requirements.shacl.checks import SHACLCheck from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER from rocrate_validator.errors import DuplicateRequirementCheck, InvalidProfilePath, ProfileSpecificationError from rocrate_validator.models import Profile, ValidationContext, ValidationSettings, Validator +from rocrate_validator.requirements.shacl.checks import SHACLCheck from tests.ro_crates import InvalidFileDescriptorEntity, ValidROC # set up logging @@ -55,7 +55,7 @@ def test_load_invalid_profile_from_validation_context(fake_profiles_path: str): "profiles_path": "/tmp/random_path_xxx", "profile_identifier": DEFAULT_PROFILE_IDENTIFIER, "rocrate_uri": ValidROC().wrroc_paper, - "enable_profile_inheritance": False + "enable_profile_inheritance": False, } settings = ValidationSettings(**settings) @@ -77,7 +77,7 @@ def test_load_valid_profile_without_inheritance_from_validation_context(fake_pro "profiles_path": fake_profiles_path, "profile_identifier": "c", "rocrate_uri": ValidROC().wrroc_paper, - "enable_profile_inheritance": False + "enable_profile_inheritance": False, } settings = ValidationSettings(**settings) @@ -126,7 +126,8 @@ def test_profile_spec_properties(fake_profiles_path: str): assert profile.version == "1.0.0", "The profile version should be 1.0.0" assert profile.is_profile_of == ["https://w3id.org/a"], "The profileOf property should be ['a']" assert profile.is_transitive_profile_of == [ - "https://w3id.org/a"], "The transitiveProfileOf property should be ['a']" + "https://w3id.org/a" + ], "The transitiveProfileOf property should be ['a']" def test_profiles_loading_free_folder_structure(profiles_with_free_folder_structure_path: str): @@ -204,8 +205,9 @@ def __perform_test__(profile_identifier: str, expected_inherited_profiles: list[ # The number of profiles should be 1 profiles_names = [_.token for _ in profile.inherited_profiles] - assert profiles_names == expected_inherited_profiles, \ - f"The number of profiles should be {expected_inherited_profiles}" + assert ( + profiles_names == expected_inherited_profiles + ), f"The number of profiles should be {expected_inherited_profiles}" # Test the inheritance mode with 1 profile __perform_test__("a", []) @@ -250,7 +252,7 @@ def test_load_invalid_profile_with_override_on_same_profile(fake_profiles_path: "profile_identifier": "invalid-duplicated-shapes", "rocrate_uri": ValidROC().wrroc_paper, "enable_profile_inheritance": True, - "allow_requirement_check_override": False + "allow_requirement_check_override": False, } settings = ValidationSettings(**settings) @@ -273,7 +275,7 @@ def test_load_valid_profile_with_override_on_inherited_profile(fake_profiles_pat "profile_identifier": "c-overridden", "rocrate_uri": ValidROC().wrroc_paper, "enable_profile_inheritance": True, - "allow_requirement_check_override": True + "allow_requirement_check_override": True, } settings = ValidationSettings(**settings) @@ -296,29 +298,31 @@ def test_load_valid_profile_with_override_on_inherited_profile(fake_profiles_pat def test_zero_shape_target_profile_triggers_pyshacl_run(fake_profiles_path: str): - """Regression test for the 0-shape profile bug: - when the target profile has no SHACL checks of its own, + """Regression test for the 0-shape profile bug: + when the target profile has no SHACL checks of its own, Validator must still drive a single pyshacl run - on the merged shapes graph so inherited shapes get evaluated. - Without the fix in `Validator.__ensure_target_shacl_run__`, + on the merged shapes graph so inherited shapes get evaluated. + Without the fix in `Validator.__ensure_target_shacl_run__`, no SHACLCheck would be recorded as executed for the wrapper target.""" - - settings = ValidationSettings(**{ - "profiles_path": fake_profiles_path, - "profile_identifier": "c-wrapper", - "rocrate_uri": ValidROC().wrroc_paper, - "enable_profile_inheritance": True, - "allow_requirement_check_override": True, - "disable_check_for_duplicates": True, - }) + + settings = ValidationSettings( + **{ + "profiles_path": fake_profiles_path, + "profile_identifier": "c-wrapper", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + "disable_check_for_duplicates": True, + } + ) result = Validator(settings).validate() - executed_shacl = [c for c in result.executed_checks - if isinstance(c, SHACLCheck)] + executed_shacl = [c for c in result.executed_checks if isinstance(c, SHACLCheck)] assert executed_shacl, ( "Expected at least one inherited SHACLCheck to be executed for the " "c-wrapper target. None recorded — the zero-shape pyshacl run was " - "skipped.") + "skipped." + ) def test_profile_parents(check_overriding_profiles_path: str): @@ -390,29 +394,31 @@ def test_profile_check_overriding(check_overriding_profiles_path: str): def check_profile(profile, check, inherited_profiles, overridden_by, override): # Check inherited profiles - assert len(profile.inherited_profiles) == len(inherited_profiles), \ - f"The number of inherited profiles should be {len(inherited_profiles)}" + assert len(profile.inherited_profiles) == len( + inherited_profiles + ), f"The number of inherited profiles should be {len(inherited_profiles)}" inherited_profiles_tokens = [_.token for _ in profile.inherited_profiles] - assert set(inherited_profiles_tokens) == set(inherited_profiles), \ - f"The inherited profiles should be {inherited_profiles}" + assert set(inherited_profiles_tokens) == set( + inherited_profiles + ), f"The inherited profiles should be {inherited_profiles}" # Check overridden status - logger.debug("%r overridden by: %r", check.identifier, [ - _.requirement.profile.identifier for _ in check.overridden_by]) - assert check.overridden == (len(overridden_by) > 0), \ - f"The check overridden status should be {len(overridden_by) > 0}" - assert len(check.overridden_by) == len(overridden_by), \ - f"The number of overridden checks should be {len(overridden_by)}" + logger.debug( + "%r overridden by: %r", check.identifier, [_.requirement.profile.identifier for _ in check.overridden_by] + ) + assert check.overridden == ( + len(overridden_by) > 0 + ), f"The check overridden status should be {len(overridden_by) > 0}" + assert len(check.overridden_by) == len( + overridden_by + ), f"The number of overridden checks should be {len(overridden_by)}" overridden_by_tokens = [_.requirement.profile.identifier for _ in check.overridden_by] - assert set(overridden_by_tokens) == set(overridden_by), \ - f"The overridden checks should be {overridden_by}" + assert set(overridden_by_tokens) == set(overridden_by), f"The overridden checks should be {overridden_by}" # Check override status - assert len(check.overrides) == len(override), \ - f"The number of overridden checks should be {len(override)}" + assert len(check.overrides) == len(override), f"The number of overridden checks should be {len(override)}" override_tokens = [_.requirement.profile.identifier for _ in check.overrides] - assert set(override_tokens) == set(override), \ - f"The overridden checks should be {override}" + assert set(override_tokens) == set(override), f"The overridden checks should be {override}" # Check the number of requirements and checks of each profile for profile in profiles: From 03a32fc3761ad428d1726437cf268d414954eb28 Mon Sep 17 00:00:00 2001 From: simleo Date: Thu, 7 May 2026 12:34:01 +0200 Subject: [PATCH 37/89] fix error message for bad position in HowToStep --- .../profiles/provenance-run-crate/must/1_howtostep.ttl | 4 ++-- .../profiles/provenance-run-crate/test_provrc_howtostep.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/profiles/provenance-run-crate/must/1_howtostep.ttl b/rocrate_validator/profiles/provenance-run-crate/must/1_howtostep.ttl index c74f6ef71..cd86b44f4 100644 --- a/rocrate_validator/profiles/provenance-run-crate/must/1_howtostep.ttl +++ b/rocrate_validator/profiles/provenance-run-crate/must/1_howtostep.ttl @@ -52,14 +52,14 @@ provenance-run-crate:ProvRCHowToStepRequired a sh:NodeShape ; sh:property [ a sh:PropertyShape ; sh:name "HowToStep position type" ; - sh:description "If specified, position must be a string representing an integer" ; + sh:description "If specified, position must be an integer or a string representing an integer" ; sh:path schema:position ; sh:or ( [ sh:datatype xsd:string ; ] [ sh:datatype xsd:integer ; ] ) ; sh:pattern "\\d+" ; - sh:message "If specified, position must be a string representing an integer" ; + sh:message "If specified, position must be an integer or a string representing an integer" ; ] ; sh:property [ a sh:PropertyShape ; diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py b/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py index a8cf1cce8..41654474d 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py @@ -97,14 +97,14 @@ def test_provrc_howtostep_no_position(): def test_provrc_howtostep_bad_position(): """\ Test a Provenance Run Crate where a HowToStep has a position that is not - a string representing an integer. + an integer or a string representing an integer. """ do_entity_test( InvalidProvRC().howtostep_bad_position, Severity.REQUIRED, False, ["ProvRC HowToStep MUST"], - ["If specified, position must be a string representing an integer"], + ["If specified, position must be an integer or a string representing an integer"], profile_identifier="provenance-run-crate" ) From a9783ce0ac2df6ea6c601a2b09b55225f635a16d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 8 May 2026 11:03:11 +0200 Subject: [PATCH 38/89] feat(model): :sparkles: extend Profile model to compute descendants --- rocrate_validator/models.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 3c6e1544a..ac4144b5a 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -456,6 +456,14 @@ def siblings(self) -> list[Profile]: """ return self.get_sibling_profiles(self) + @property + def descendants(self) -> list[Profile]: + """ + The list of profiles that are descendants of this profile + (i.e., profiles that have this profile among their inherited profiles). + """ + return self.get_descendants(self) + @property def readme_file_path(self) -> Path: """ @@ -868,6 +876,20 @@ def get_sibling_profiles(cls, profile: Profile) -> list[Profile]: """ return [p for p in cls.__profiles_map.values() if profile in p.parents] + @classmethod + def get_descendants(cls, profile: Profile) -> list[Profile]: + """ + Get the transitive descendants of the given profile (any profile + that has `profile` among its `inherited_profiles`). + + :param profile: the profile + :type profile: Profile + + :return: the list of descendant profiles + :rtype: list[Profile] + """ + return [p for p in cls.__profiles_map.values() if profile in p.inherited_profiles] + @classmethod def all(cls) -> list[Profile]: """ From dd84c324f93defba9c934590e8e758bb65e9eb06 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 8 May 2026 11:50:15 +0200 Subject: [PATCH 39/89] feat(checks): :sparkles: support `deactivated` flag on RequirementCheck for Python and SHACL --- rocrate_validator/models.py | 6 +++ .../requirements/python/__init__.py | 20 ++++++++-- .../requirements/shacl/checks.py | 39 ++++++++++++++++++- .../requirements/shacl/models.py | 12 +++++- 4 files changed, 71 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index ac4144b5a..e4752b1e6 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1380,6 +1380,7 @@ def __init__( level: Optional[RequirementLevel] = LevelCollection.REQUIRED, description: Optional[str] = None, hidden: Optional[bool] = None, + deactivated: bool = False, ): self._requirement: Requirement = requirement self._order_number = 0 @@ -1387,6 +1388,7 @@ def __init__( self._level = level self._description = description self._hidden = hidden + self._deactivated = deactivated @property def order_number(self) -> int: @@ -1452,6 +1454,10 @@ def overrides(self) -> list[RequirementCheck]: def overridden(self) -> bool: return len(self.overridden_by) > 0 + @property + def deactivated(self) -> bool: + return self._deactivated + @property def hidden(self) -> bool: if self._hidden is not None: diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index b23c4a9d4..aaeb49d8e 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -38,11 +38,12 @@ def __init__(self, name: str, check_function: Callable[[RequirementCheck, ValidationContext], bool], description: Optional[str] = None, - level: Optional[LevelCollection] = LevelCollection.REQUIRED): + level: Optional[LevelCollection] = LevelCollection.REQUIRED, + deactivated: bool = False): """ check_function: a function that accepts an instance of PyFunctionCheck and a ValidationContext. """ - super().__init__(requirement, name, description=description, level=level) + super().__init__(requirement, name, description=description, level=level, deactivated=deactivated) sig = inspect.signature(check_function) if len(sig.parameters) != 2: @@ -115,11 +116,13 @@ def __init_checks__(self): f"Getting severity from path: {self.severity_from_path}") severity = self.severity_from_path or Severity.REQUIRED logger.debug("Severity log: %r", severity) + deactivated = bool(getattr(member, "deactivated", False)) check = self.requirement_check_class(self, check_name, member, description=check_description, - level=LevelCollection.get(severity.name) if severity else None) + level=LevelCollection.get(severity.name) if severity else None, + deactivated=deactivated) self._checks.append(check) logger.debug("Added check: %s %r", check_name, check) @@ -159,7 +162,9 @@ def decorator(cls): return decorator -def check(name: Optional[str] = None, severity: Optional[Severity] = None): +def check(name: Optional[str] = None, + severity: Optional[Severity] = None, + deactivated: bool = False): """ A decorator to mark a function as a check. @@ -178,6 +183,12 @@ def check(name: Optional[str] = None, severity: Optional[Severity] = None): :param severity: the severity level :type severity: Optional[Severity] + :param deactivated: when True, the check is skipped during validation. + Mirrors SHACL's ``sh:deactivated``: an extension profile may redeclare + a check with the same name as one in a parent profile and set this + flag to disable the inherited check. + :type deactivated: bool + :return: the decorated function :rtype: Callable """ @@ -193,6 +204,7 @@ def decorator(func): func.check = True func.name = check_name func.severity = severity + func.deactivated = deactivated return func return decorator diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 012c87810..063a850c3 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -16,6 +16,9 @@ from timeit import default_timer as timer from typing import Optional +from rdflib import Literal, Namespace + +from rocrate_validator.constants import SHACL_NS from rocrate_validator.errors import ROCrateMetadataNotFoundError from rocrate_validator.events import EventType from rocrate_validator.models import ( @@ -26,7 +29,7 @@ SkipRequirementCheck, ValidationContext, ) -from rocrate_validator.requirements.shacl.models import Shape +from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry from rocrate_validator.requirements.shacl.utils import make_uris_relative, resolve_parent_shape from rocrate_validator.requirements.shacl.validator import ( SHACLValidationAlreadyProcessed, @@ -40,6 +43,9 @@ logger = logging.getLogger(__name__) +_SH = Namespace(SHACL_NS) +_TRUE_LITERALS = (Literal(True), Literal("true", datatype=None)) + class SHACLCheck(RequirementCheck): """ @@ -97,6 +103,37 @@ def shape(self) -> Shape: def root(self) -> bool: return self._root + @property + def deactivated(self) -> bool: + if self._deactivated: + return True + shape = self._shape + if shape is None: + return False + # Same-profile deactivation (cases B & C): the shape itself carries + # `sh:deactivated true`, possibly because it was redeclared in an + # extension profile via override-by-name. + for value in shape.graph.objects(subject=shape.node, predicate=_SH.deactivated): + if isinstance(value, Literal) and bool(value.toPython()): + return True + # Cross-profile deactivation (case A): a descendant profile may add + # ` sh:deactivated true` to its own shapes graph, + # without redeclaring the shape. Scan only profiles that inherit + # (transitively) from the shape's owning profile, so unrelated + # profiles loaded in the same process can't influence the result. + # Validator.__do_validate__ pre-loads the shape graphs. + from rocrate_validator.models import Profile + + owning_profile = self.requirement.profile + for profile in Profile.get_descendants(owning_profile): + try: + registry = ShapesRegistry.get_instance(profile) + except Exception: + continue + if registry.is_node_deactivated(shape.node): + return True + return False + @property def description(self) -> str: if self._shape.description: diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 7b61bf622..4da019ad2 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import Optional, Union -from rdflib import Graph, Namespace, URIRef +from rdflib import Graph, Literal, Namespace, URIRef from rdflib.term import Node from rocrate_validator.constants import SHACL_NS @@ -344,6 +344,16 @@ def shapes_graph(self) -> Graph: g += self._shapes_graph return g + def is_node_deactivated(self, node: Node) -> bool: + """Return True if the underlying shapes graph asserts + ` sh:deactivated true`. Avoids the copy made by `shapes_graph` + so it is safe to call from hot paths.""" + deactivated = Namespace(SHACL_NS).deactivated + for value in self._shapes_graph.objects(subject=node, predicate=deactivated): + if isinstance(value, Literal) and bool(value.toPython()): + return True + return False + def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = None) -> list[Shape]: """ Load the shapes from the graph From 4196f0e8f57db44c4b7aad97b0a38f8e2fa6a15e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 8 May 2026 11:56:38 +0200 Subject: [PATCH 40/89] feat(model): :sparkles: skip deactivated Python checks --- rocrate_validator/models.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index e4752b1e6..9a6212854 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -1110,6 +1110,10 @@ def _do_validate_(self, context: ValidationContext) -> bool: [_.identifier for _ in check.overridden_by], ) continue + if check.deactivated: + logger.debug("Skipping check '%s' because deactivated", check.identifier) + context.result._add_skipped_check(check) + continue # Determine whether to skip event notification for inherited profiles skip_event_notify = False if ( @@ -2949,6 +2953,12 @@ def __do_validate__(self, requirements: Optional[list[Requirement]] = None) -> V # set the profiles to validate against profiles = context.profiles assert len(profiles) > 0, "No profiles to validate" + # Pre-load every profile's requirements so all shape graphs are + # populated before the validation loop runs. This lets a check + # see `sh:deactivated true` triples declared by descendant + # profiles that have not yet been visited. + for p in profiles: + _ = p.requirements self.notify(EventType.VALIDATION_START) for profile in profiles: logger.debug( From 71540b1dba2445a19d3964131e182b0f43bfd7da Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 8 May 2026 11:57:04 +0200 Subject: [PATCH 41/89] chore(model): :art: reformat code --- .../requirements/shacl/checks.py | 14 ++-- rocrate_validator/services.py | 71 ++++++++++--------- 2 files changed, 48 insertions(+), 37 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 063a850c3..ca95b18b6 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -366,12 +366,16 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): # all together and not profile by profile if requirementCheck.identifier not in failed_requirement_checks_notified: shacl_context.result._add_executed_check(requirementCheck, False) - if requirementCheck.identifier not in failed_requirement_checks_notified and \ - requirementCheck.requirement.profile != shacl_context.current_validation_profile: + if ( + requirementCheck.identifier not in failed_requirement_checks_notified + and requirementCheck.requirement.profile != shacl_context.current_validation_profile + ): failed_requirement_checks_notified.append(requirementCheck.identifier) - shacl_context.validator.notify(RequirementCheckValidationEvent( - EventType.REQUIREMENT_CHECK_VALIDATION_END, - requirementCheck, validation_result=False)) + shacl_context.validator.notify( + RequirementCheckValidationEvent( + EventType.REQUIREMENT_CHECK_VALIDATION_END, requirementCheck, validation_result=False + ) + ) logger.debug( "Added failed check to the context: %s", requirementCheck.identifier, diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 5e2461087..530667ea1 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + import shutil import tempfile import zipfile @@ -20,8 +21,7 @@ from rocrate_validator.utils import log as logging from rocrate_validator.events import Subscriber -from rocrate_validator.models import (Profile, Severity, ValidationResult, - ValidationSettings, Validator) +from rocrate_validator.models import Profile, Severity, ValidationResult, ValidationSettings, Validator from rocrate_validator.utils.uri import URI from rocrate_validator.utils.paths import get_profiles_path from rocrate_validator.utils.http import HttpRequester @@ -43,9 +43,8 @@ def detect_profiles(settings: Union[dict, ValidationSettings]) -> list[Profile]: def validate_metadata_as_dict( - metadata_dict: dict, - settings: Union[dict, ValidationSettings], - subscribers: Optional[list[Subscriber]] = None) -> ValidationResult: + metadata_dict: dict, settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None +) -> ValidationResult: """ Validate the RO-Crate metadata only against a profile and return the validation result. """ @@ -62,8 +61,9 @@ def validate_metadata_as_dict( return validate(settings, subscribers) -def validate(settings: Union[dict, ValidationSettings], - subscribers: Optional[list[Subscriber]] = None) -> ValidationResult: +def validate( + settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None +) -> ValidationResult: """ Validate a RO-Crate against a profile and return the validation result @@ -85,8 +85,9 @@ def validate(settings: Union[dict, ValidationSettings], return result -def __initialise_validator__(settings: Union[dict, ValidationSettings], - subscribers: Optional[list[Subscriber]] = None) -> Validator: +def __initialise_validator__( + settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None +) -> Validator: """ Validate a RO-Crate against a profile """ @@ -146,13 +147,13 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): # i.e., if the RO-Crate is a URL. If so, download the RO-Crate # and extract it to a temporary directory. We support either http or https # or ftp protocols to download the remote RO-Crate. - if rocrate_path.scheme in ('http', 'https', 'ftp'): + if rocrate_path.scheme in ("http", "https", "ftp"): logger.debug("RO-Crate is a remote RO-Crate") # create a temp folder to store the downloaded RO-Crate with tempfile.NamedTemporaryFile(delete=False) as tmp_file: # download the remote RO-Crate with HttpRequester().get(rocrate_path.uri, stream=True, allow_redirects=True) as r: - with open(tmp_file.name, 'wb') as f: + with open(tmp_file.name, "wb") as f: shutil.copyfileobj(r.raw, f) logger.debug("RO-Crate downloaded to temporary file: %s", tmp_file.name) # continue with the validation process by extracting the RO-Crate and validating it @@ -171,15 +172,16 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): return __init_validator__(settings) else: raise ValueError( - f"Invalid RO-Crate URI: {rocrate_path}. " - "It MUST be a local directory or a ZIP file (local or remote).") + f"Invalid RO-Crate URI: {rocrate_path}. It MUST be a local directory or a ZIP file (local or remote)." + ) -def get_profiles(profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None, - severity=Severity.OPTIONAL, - allow_requirement_check_override: bool = - ValidationSettings.allow_requirement_check_override) -> list[Profile]: +def get_profiles( + profiles_path: Path = DEFAULT_PROFILES_PATH, + extra_profiles_path: Optional[Path] = None, + severity=Severity.OPTIONAL, + allow_requirement_check_override: bool = ValidationSettings.allow_requirement_check_override, +) -> list[Profile]: """ Get the list of profiles supported by the package. The profile source path can be overridden by specifying ``profiles_path``. @@ -203,20 +205,23 @@ def get_profiles(profiles_path: Path = DEFAULT_PROFILES_PATH, :return: the list of profiles :rtype: list[Profile] """ - profiles = Profile.load_profiles(profiles_path, - extra_profiles_path=extra_profiles_path, - severity=severity, - allow_requirement_check_override=allow_requirement_check_override) + profiles = Profile.load_profiles( + profiles_path, + extra_profiles_path=extra_profiles_path, + severity=severity, + allow_requirement_check_override=allow_requirement_check_override, + ) logger.debug("Profiles loaded: %s", profiles) return profiles -def get_profile(profile_identifier: str, - profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None, - severity=Severity.OPTIONAL, - allow_requirement_check_override: bool = - ValidationSettings.allow_requirement_check_override) -> Profile: +def get_profile( + profile_identifier: str, + profiles_path: Path = DEFAULT_PROFILES_PATH, + extra_profiles_path: Optional[Path] = None, + severity=Severity.OPTIONAL, + allow_requirement_check_override: bool = ValidationSettings.allow_requirement_check_override, +) -> Profile: """ Get the profile with the given identifier. The profile source path can be overridden through ``profiles_path``. @@ -245,8 +250,10 @@ def get_profile(profile_identifier: str, :rtype: Profile """ - profiles = get_profiles(profiles_path, - extra_profiles_path=extra_profiles_path, - severity=severity, - allow_requirement_check_override=allow_requirement_check_override) + profiles = get_profiles( + profiles_path, + extra_profiles_path=extra_profiles_path, + severity=severity, + allow_requirement_check_override=allow_requirement_check_override, + ) return Profile.find_in_list(profiles, profile_identifier) From 1acd74dd8bd5e5a9168a7d7119434d23a1a5d3ae Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 8 May 2026 11:59:49 +0200 Subject: [PATCH 42/89] test(checks): :white_check_mark: cover `deactivated` flag on RequirementCheck --- .../fake/c-deactivated-direct/profile.ttl | 28 ++++ .../profiles/fake/c-deactivated/profile.ttl | 28 ++++ .../profiles/fake/c-deactivated/shape_c.ttl | 41 +++++ tests/unit/requirements/test_profiles.py | 153 +++++++++++++++++- 4 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 tests/data/profiles/fake/c-deactivated-direct/profile.ttl create mode 100644 tests/data/profiles/fake/c-deactivated/profile.ttl create mode 100644 tests/data/profiles/fake/c-deactivated/shape_c.ttl diff --git a/tests/data/profiles/fake/c-deactivated-direct/profile.ttl b/tests/data/profiles/fake/c-deactivated-direct/profile.ttl new file mode 100644 index 000000000..695b2e4bf --- /dev/null +++ b/tests/data/profiles/fake/c-deactivated-direct/profile.ttl @@ -0,0 +1,28 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + rdfs:label "Profile C4" ; + rdfs:comment """Comment for Profile C4 (deactivates Profile C's ShapeC by IRI, no override-by-name)."""@en ; + dct:publisher ; + prof:isProfileOf ; + prof:isTransitiveProfileOf , ; + prof:hasToken "c-deactivated-direct" ; +. diff --git a/tests/data/profiles/fake/c-deactivated/profile.ttl b/tests/data/profiles/fake/c-deactivated/profile.ttl new file mode 100644 index 000000000..8d62ab379 --- /dev/null +++ b/tests/data/profiles/fake/c-deactivated/profile.ttl @@ -0,0 +1,28 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + rdfs:label "Profile C3" ; + rdfs:comment """Comment for Profile C3 (deactivates the inherited check from Profile C)."""@en ; + dct:publisher ; + prof:isProfileOf ; + prof:isTransitiveProfileOf , ; + prof:hasToken "c-deactivated" ; +. diff --git a/tests/data/profiles/fake/c-deactivated/shape_c.ttl b/tests/data/profiles/fake/c-deactivated/shape_c.ttl new file mode 100644 index 000000000..26cdb9bd1 --- /dev/null +++ b/tests/data/profiles/fake/c-deactivated/shape_c.ttl @@ -0,0 +1,41 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix dct: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix xml1: . +@prefix xsd: . + + +# Same NodeShape + PropertyShape names as the inherited Shape C in profile c, +# so the existing override-by-name mechanism wires this shape to the parent's. +# The PropertyShape is marked sh:deactivated true: pyshacl skips it during +# validation, and the override surfaces via RequirementCheck.deactivated. +ro:ShapeC + a sh:NodeShape ; + sh:name "The Shape C" ; + sh:description "Deactivates the inherited Shape C check from profile c." ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Check Metadata File Descriptor entity existence" ; + sh:description "Check disabled by extension profile" ; + sh:path rdf:type ; + sh:minCount 1 ; + sh:deactivated true ; + sh:message "Disabled" ; + ] . diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index d0dad53f3..bb1f46469 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -16,11 +16,13 @@ import os import pytest +from rdflib import Literal, Namespace -from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER +from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER, SHACL_NS from rocrate_validator.errors import DuplicateRequirementCheck, InvalidProfilePath, ProfileSpecificationError from rocrate_validator.models import Profile, ValidationContext, ValidationSettings, Validator from rocrate_validator.requirements.shacl.checks import SHACLCheck +from rocrate_validator.requirements.shacl.models import ShapesRegistry from tests.ro_crates import InvalidFileDescriptorEntity, ValidROC # set up logging @@ -468,3 +470,152 @@ def check_profile(profile, check, inherited_profiles, overridden_by, override): # Check the profile 'x' elif profile.token == "x": check_profile(profile, check, ["a", "b", "d"], [], ["d"]) + + +def test_python_check_decorator_sets_deactivated_flag(): + """The @check decorator must propagate the `deactivated` flag onto the + decorated function so that PyRequirement.__init_checks__ can read it.""" + from rocrate_validator.requirements.python import check + + @check(name="off", deactivated=True) + def disabled(self, ctx): # noqa: ANN001 + return False + + @check(name="on") + def enabled(self, ctx): # noqa: ANN001 + return True + + assert disabled.deactivated is True + assert enabled.deactivated is False + + +def test_shacl_shape_with_deactivated_marks_check_skipped(fake_profiles_path: str): + """A child profile that overrides an inherited NodeShape by `sh:name` and + sets `sh:deactivated true` should produce a check whose `deactivated` + property is True; the parent's check should be marked as `overridden`.""" + settings = { + "profiles_path": fake_profiles_path, + "profile_identifier": "c-deactivated", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + } + + settings = ValidationSettings(**settings) + validator = Validator(settings) + context = ValidationContext(validator, validator.validation_settings) + + profiles = context.profiles + profile_tokens = sorted(p.token for p in profiles) + # Inheritance chain: a <- c <- c-deactivated + assert profile_tokens == ["a", "c", "c-deactivated"] + + target = next(p for p in profiles if p.token == "c-deactivated") + parent_c = next(p for p in profiles if p.token == "c") + + # The PropertyShape carries `sh:deactivated true`; the matching check is + # the second one (the first is the hidden NodeShape root check). + target_property_check = target.requirements[0].get_checks()[1] + parent_property_check = parent_c.requirements[0].get_checks()[1] + + assert ( + target_property_check.deactivated is True + ), "The deactivated property should reflect sh:deactivated true on the PropertyShape" + + # The parent property check is overridden by the child's (same sh:name). + overridden_by_tokens = [c.requirement.profile.token for c in parent_property_check.overridden_by] + assert "c-deactivated" in overridden_by_tokens, "The parent check should be reported as overridden by c-deactivated" + assert parent_property_check.overridden is True + + # Default state for a non-deactivated check. + assert parent_property_check.deactivated is False + + +def test_shacl_check_deactivated_via_cross_profile_triple(fake_profiles_path: str): + """A child profile that adds ` sh:deactivated true` to its + own shapes graph (without redeclaring the shape) should cause the parent's + check to report `deactivated=True`. Verifies the cross-profile lookup in + SHACLCheck.deactivated and the pre-load pass in Validator.""" + + settings = ValidationSettings( + **{ + "profiles_path": fake_profiles_path, + "profile_identifier": "c-deactivated-direct", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + } + ) + validator = Validator(settings) + context = ValidationContext(validator, validator.validation_settings) + + profiles = context.profiles + profile_tokens = sorted(p.token for p in profiles) + assert profile_tokens == ["a", "c", "c-deactivated-direct"] + + target = next(p for p in profiles if p.token == "c-deactivated-direct") + parent_c = next(p for p in profiles if p.token == "c") + + # Trigger lazy loading of every profile's shape graph (the Validator + # would do this in __do_validate__; we replay it here for the unit test). + for p in profiles: + _ = p.requirements + + parent_shape_check = next(c for c in parent_c.requirements[0].get_checks() if isinstance(c, SHACLCheck)) + assert parent_shape_check.deactivated is False, "Sanity check: the parent shape should not be deactivated yet" + + # Simulate what a child-profile shape file would contribute: a single + # ` sh:deactivated true` triple in its own shapes graph. + target_registry = ShapesRegistry.get_instance(target) + sh = Namespace(SHACL_NS) + target_registry._shapes_graph.add((parent_shape_check.shape.node, sh.deactivated, Literal(True))) + + assert ( + parent_shape_check.deactivated is True + ), "The parent's check must read sh:deactivated true from the child's shapes graph" + + +def test_shacl_check_deactivation_scoped_to_descendants(fake_profiles_path: str): + """A `sh:deactivated true` triple declared by a profile that does NOT + inherit from the shape's owning profile must be ignored. Otherwise + unrelated profiles loaded in the same process could spuriously deactivate + one another's checks.""" + + settings = ValidationSettings( + **{ + "profiles_path": fake_profiles_path, + "profile_identifier": "c", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + } + ) + validator = Validator(settings) + context = ValidationContext(validator, validator.validation_settings) + + # Force population of the global profile registry by listing all profiles + # under the fake_profiles_path, then resolve the specific ones we need. + all_profiles = Profile.load_profiles(profiles_path=fake_profiles_path) + parent_c = next(p for p in all_profiles if p.token == "c") + # Profile "b" is a descendant of "a" but NOT of "c" — unrelated. + profile_b = next(p for p in all_profiles if p.token == "b") + assert parent_c not in profile_b.inherited_profiles + assert profile_b not in Profile.get_descendants(parent_c) + + # Trigger lazy loading. + for p in all_profiles: + _ = p.requirements + _ = context.profiles # warm context too + + parent_shape_check = next(c for c in parent_c.requirements[0].get_checks() if isinstance(c, SHACLCheck)) + assert parent_shape_check.deactivated is False + + # Inject a deactivation triple into an unrelated profile's registry. + sh = Namespace(SHACL_NS) + ShapesRegistry.get_instance(profile_b)._shapes_graph.add( + (parent_shape_check.shape.node, sh.deactivated, Literal(True)) + ) + + assert ( + parent_shape_check.deactivated is False + ), "An unrelated profile's deactivation triple must not affect the check" From 0b6bff7fa74e51b08ef42dd7f2f3cf0d4e0f2384 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 8 May 2026 12:24:49 +0200 Subject: [PATCH 43/89] docs(profiles): :sparkles: document check override-by-name and deactivation --- docs/11_writing_a_profile.rst | 148 ++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/docs/11_writing_a_profile.rst b/docs/11_writing_a_profile.rst index e46873509..20a27b03e 100644 --- a/docs/11_writing_a_profile.rst +++ b/docs/11_writing_a_profile.rst @@ -94,6 +94,154 @@ These instructions assume you are familiar with code development using Python an #. When your profile & tests are written, open a pull request to contribute it back to the repository! +Overriding inherited checks +--------------------------- + +When a profile inherits from another profile (via ``prof:isProfileOf`` / +``prof:isTransitiveProfileOf``), it automatically receives every check +declared by its ancestors. The validator additionally supports +**override-by-name**: a child profile can replace an inherited check by +declaring a new check with the **same name**. + +This allows an extension profile to *redefine* the content of an inherited +check — for example, to make a constraint stricter or looser, change its +severity, or, as described in the next section, fully deactivate it. + +Override-by-name is enabled by default. It can be disabled via the +``allow_requirement_check_override`` validation setting (CLI / API), which +will raise an error on duplicate check names instead. + +SHACL checks +^^^^^^^^^^^^ + +Each SHACL ``NodeShape`` / ``PropertyShape`` becomes a check whose name is +its ``sh:name``. To override an inherited check, declare a shape in the +extension profile with the **same** ``sh:name`` as the inherited one: + +.. code-block:: turtle + + # Parent profile + ro:ShapeC + a sh:NodeShape ; + sh:name "The Shape C" ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Check Metadata File Descriptor entity existence" ; + sh:path rdf:type ; + sh:minCount 1 ; + sh:message "Missing entity" ; + ] . + +.. code-block:: turtle + + # Extension profile — overrides the inherited PropertyShape by sh:name + ro:ShapeC + a sh:NodeShape ; + sh:name "The Shape C" ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Check Metadata File Descriptor entity existence" ; + sh:path rdf:type ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:message "Stricter override from extension profile" ; + ] . + +Both top-level shapes and ``PropertyShape`` entries nested inside a parent +``NodeShape`` (i.e., declared inline, without an absolute IRI) can be +overridden this way. + +Python checks +^^^^^^^^^^^^^ + +Python checks declared via the ``@check`` decorator are matched by their +``name`` argument. To override an inherited Python check, declare a new +function with the same ``name`` in the extension profile: + +.. code-block:: python + + # In the extension profile's checks module + from rocrate_validator.requirements.python import check + + @check(name="Check Metadata File Descriptor entity existence") + def overridden_check(self, ctx): + # New implementation that replaces the inherited one + ... + +Deactivating inherited checks +----------------------------- + +A child profile can also **fully deactivate** a check inherited from one of +its ancestors. A deactivated check is skipped during validation and +reported as such in the validation result. This is useful when an extension +profile relaxes the parent's expectations, or replaces a coarse-grained +check with a more specific one declared elsewhere in the same profile. + +SHACL checks +^^^^^^^^^^^^ + +Two complementary mechanisms are supported, depending on whether the shape +to disable has an absolute IRI of its own. + +**Shape with an absolute IRI** (e.g. a top-level ``NodeShape`` or a named +``PropertyShape``): reference the shape by IRI from the extension profile +and mark it as deactivated, without redeclaring it. + +.. code-block:: turtle + + # Extension profile + sh:deactivated true . + +**Nested ``PropertyShape`` without an absolute IRI** (a property declared +inline inside a parent ``NodeShape``): use the override-by-name mechanism +described in the previous section. Declare a new ``PropertyShape`` in the +extension profile with the same ``sh:name`` as the one to disable, and set +``sh:deactivated true`` on it. This overrides the parent's +``PropertyShape``, and the validator reports the resulting check as +deactivated. + +.. code-block:: turtle + + # Extension profile — disables the inherited PropertyShape by sh:name + ro:ShapeC + a sh:NodeShape ; + sh:name "The Shape C" ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Check Metadata File Descriptor entity existence" ; + sh:path rdf:type ; + sh:deactivated true ; + ] . + +.. note:: + + Cross-profile deactivation is scoped to the shape's transitive + descendants: a ``sh:deactivated true`` triple declared by a profile + that does not inherit (directly or transitively) from the shape's + owning profile is ignored. This prevents unrelated profiles loaded in + the same process from interfering with one another. + +Python checks +^^^^^^^^^^^^^ + +The ``@check`` decorator accepts a ``deactivated`` flag, mirroring SHACL's +``sh:deactivated``. Combined with override-by-name, an extension profile +can disable an inherited Python check by redeclaring it with +``deactivated=True``: + +.. code-block:: python + + from rocrate_validator.requirements.python import check + + @check(name="Check Metadata File Descriptor entity existence", + deactivated=True) + def disabled(self, ctx): + # Body is irrelevant — the check is skipped during validation. + return True + Running validator & tests during profile development ---------------------------------------------------- From d1eabba92ea0867effb7f0938edd3f5212e6923a Mon Sep 17 00:00:00 2001 From: Florian Wetzels Date: Mon, 11 May 2026 16:28:26 +0200 Subject: [PATCH 44/89] started working on ISA process fix --- .../profiles/isa-ro-crate/3_process.ttl | 25 ++++++++++++++++++- .../profiles/isa-ro-crate/ontology.ttl | 5 ++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl index 40b6fdc7e..29751e1f8 100644 --- a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl @@ -24,6 +24,29 @@ @prefix validator: . @prefix xsd: . +ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA processes within the RO-Crate" ; + sh:description "A Process has type LabProcess and is attached to a Study or Assay." ; + sh:targetClass bioschemas:LabProcess ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Process ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + sh:property [ + sh:path [ sh:inversePath schema:about ] ; + sh:or ( + [sh:class isa-ro-crate:Study] + [sh:class isa-ro-crate:Assay] + ) ; + ] ; + ] ; + ] +. + # check process must have name @@ -33,7 +56,7 @@ isa-ro-crate:ProcessMustHaveName a sh:NodeShape ; sh:name "Process MUST have name" ; sh:description "A Process MUST have a name" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; diff --git a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl index 8e48b75bc..7b42b8279 100644 --- a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl @@ -36,6 +36,11 @@ isa-ro-crate:Assay rdf:type owl:Class ; rdfs:subClassOf schema:Dataset ; rdfs:label "Assay"@en . +# Process +isa-ro-crate:Process rdf:type owl:Class ; + rdfs:subClassOf bioschemas:LabProcess ; + rdfs:label "Process"@en . + isa-ro-crate:Parameter rdf:type owl:Class ; rdfs:subClassOf schema:PropertyValue ; rdfs:label "Parameter"@en . From c3e2d5f6e3611161b073205834492c91715e5689 Mon Sep 17 00:00:00 2001 From: Florian Wetzels Date: Mon, 11 May 2026 16:33:54 +0200 Subject: [PATCH 45/89] Process class for all process checks --- rocrate_validator/profiles/isa-ro-crate/3_process.ttl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl index 29751e1f8..f2e27cb0e 100644 --- a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl @@ -74,7 +74,7 @@ isa-ro-crate:ProcessMustHaveName a sh:NodeShape ; isa-ro-crate:ProcessMustBeReferencedFromDataset a sh:NodeShape ; sh:name "Process MUST be directly referenced from a dataset" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; @@ -90,7 +90,7 @@ isa-ro-crate:ProcessMustBeReferencedFromDataset a sh:NodeShape ; isa-ro-crate:ProcessShouldHaveObject a sh:NodeShape ; sh:name "Process SHOULD have an object" ; sh:description "A Process SHOULD have an object" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path schema:object ; @@ -116,7 +116,7 @@ isa-ro-crate:ProcessShouldHaveObject a sh:NodeShape ; isa-ro-crate:ProcessShouldHaveResult a sh:NodeShape ; sh:name "Process SHOULD have a result" ; sh:description "A Process SHOULD have a result" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path schema:result ; @@ -142,7 +142,7 @@ isa-ro-crate:ProcessShouldHaveResult a sh:NodeShape ; isa-ro-crate:ProcessShouldHaveProtocol a sh:NodeShape ; sh:name "Process SHOULD have a protocol" ; sh:description "A Process SHOULD have a protocol" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path bioschemas-prop:executesLabProtocol ; @@ -164,7 +164,7 @@ isa-ro-crate:ProcessShouldHaveProtocol a sh:NodeShape ; isa-ro-crate:ProcessShouldHaveParamValue a sh:NodeShape ; sh:name "Process SHOULD have a parameter value" ; sh:description "A Process SHOULD have a parameter value" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path bioschemas-prop:parameterValue ; From c5e29e4cf360516d4e6f7373393ad736746b0067 Mon Sep 17 00:00:00 2001 From: Florian Wetzels Date: Tue, 12 May 2026 12:40:18 +0200 Subject: [PATCH 46/89] fixed non-determinism with manual assay/study definition in process --- .../profiles/isa-ro-crate/1_study.ttl | 2 +- .../profiles/isa-ro-crate/2_assay.ttl | 2 +- .../profiles/isa-ro-crate/3_process.ttl | 47 +++++++++++++++++-- 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl index dc5ab239b..5207f7b02 100644 --- a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl @@ -60,7 +60,7 @@ # ] . # Find studies and add isa-ro-crate:Study type to them, for easier retrieval for checks -ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; +isa-ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; sh:name "Identify Studies within the RO-Crate" ; sh:description "A Study has type Dataset and additionalType 'Study'." ; sh:targetClass schema:Dataset ; diff --git a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl index 8158498de..806fea23a 100644 --- a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl @@ -52,7 +52,7 @@ # sh:object isa-ro-crate:Assay ; # ] . -ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; +isa-ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; sh:name "Identify Assays within the RO-Crate" ; sh:description "An Assay has type Dataset and additionalType 'Assay'." ; sh:targetClass schema:Dataset ; diff --git a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl index f2e27cb0e..ddd4fd6e5 100644 --- a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl @@ -24,7 +24,7 @@ @prefix validator: . @prefix xsd: . -ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; +isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; sh:name "Identify ISA processes within the RO-Crate" ; sh:description "A Process has type LabProcess and is attached to a Study or Assay." ; sh:targetClass bioschemas:LabProcess ; @@ -38,15 +38,52 @@ ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; sh:condition [ sh:property [ sh:path [ sh:inversePath schema:about ] ; - sh:or ( - [sh:class isa-ro-crate:Study] - [sh:class isa-ro-crate:Assay] - ) ; + # sh:qualifiedValueShape [ + # sh:class isa-ro-crate:Assay + # ] ; + sh:qualifiedValueShape [ + sh:class schema:Dataset ; + sh:property [ + sh:path schema:additionalType ; + sh:or ( + [sh:hasValue "Study"] + [sh:hasValue "Assay"] + ) ; + ] ; + ] ; + sh:qualifiedMinCount 1 ; ] ; ] ; ] . +# isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; +# sh:name "Identify ISA processes within the RO-Crate" ; +# sh:description "A Process has type LabProcess and is attached to a Study or Assay." ; +# sh:target [ +# a sh:SPARQLTarget ; +# sh:select """ +# PREFIX bioschemas: +# PREFIX isa-ro-crate: +# PREFIX schema: + +# SELECT ?this +# WHERE { +# ?this a bioschemas:LabProcess . +# ?assay schema:about ?this . +# ?assay a isa-ro-crate:Assay . +# } +# """ ; +# ] ; +# # Expand data graph with triples from the file data entity +# sh:rule [ +# a sh:TripleRule ; +# sh:subject sh:this ; +# sh:predicate rdf:type ; +# sh:object isa-ro-crate:Process ; +# ] +# . + # check process must have name From e3fda59beb614572b9b7cd8da4222ad3bf34761f Mon Sep 17 00:00:00 2001 From: Florian Wetzels Date: Wed, 13 May 2026 14:24:58 +0200 Subject: [PATCH 47/89] added classes for most ISA types --- .../profiles/isa-ro-crate/1_study.ttl | 1 + .../profiles/isa-ro-crate/2_assay.ttl | 1 + .../profiles/isa-ro-crate/3_process.ttl | 17 ++--- .../profiles/isa-ro-crate/4_protocol.ttl | 30 +++++++- .../profiles/isa-ro-crate/5_sample.ttl | 43 ++++++++++- .../profiles/isa-ro-crate/6_data.ttl | 29 ++++++- .../profiles/isa-ro-crate/7_person.ttl | 67 +++++++++++++++-- .../profiles/isa-ro-crate/8_article.ttl | 33 +++++++- .../profiles/isa-ro-crate/ontology.ttl | 25 +++++++ .../profiles/isa-ro-crate/test_3_process.py | 75 ++++++++++--------- .../profiles/isa-ro-crate/test_4_protocol.py | 40 ++++++++++ 11 files changed, 297 insertions(+), 64 deletions(-) diff --git a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl index 5207f7b02..b0c2b53e4 100644 --- a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl @@ -64,6 +64,7 @@ isa-ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; sh:name "Identify Studies within the RO-Crate" ; sh:description "A Study has type Dataset and additionalType 'Study'." ; sh:targetClass schema:Dataset ; + sh:order 1 ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; diff --git a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl index 806fea23a..9a92f8090 100644 --- a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl @@ -56,6 +56,7 @@ isa-ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; sh:name "Identify Assays within the RO-Crate" ; sh:description "An Assay has type Dataset and additionalType 'Assay'." ; sh:targetClass schema:Dataset ; + sh:order 1 ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; diff --git a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl index ddd4fd6e5..5805ce1f5 100644 --- a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl @@ -28,6 +28,7 @@ isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; sh:name "Identify ISA processes within the RO-Crate" ; sh:description "A Process has type LabProcess and is attached to a Study or Assay." ; sh:targetClass bioschemas:LabProcess ; + sh:order 2 ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; @@ -38,18 +39,11 @@ isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; sh:condition [ sh:property [ sh:path [ sh:inversePath schema:about ] ; - # sh:qualifiedValueShape [ - # sh:class isa-ro-crate:Assay - # ] ; sh:qualifiedValueShape [ - sh:class schema:Dataset ; - sh:property [ - sh:path schema:additionalType ; - sh:or ( - [sh:hasValue "Study"] - [sh:hasValue "Assay"] - ) ; - ] ; + sh:or( + [sh:class isa-ro-crate:Assay] + [sh:class isa-ro-crate:Study] + ) ] ; sh:qualifiedMinCount 1 ; ] ; @@ -60,6 +54,7 @@ isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; # isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; # sh:name "Identify ISA processes within the RO-Crate" ; # sh:description "A Process has type LabProcess and is attached to a Study or Assay." ; +# sh:order 3 ; # sh:target [ # a sh:SPARQLTarget ; # sh:select """ diff --git a/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl b/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl index 6f5ef82e2..3079cdffa 100644 --- a/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl @@ -24,14 +24,36 @@ @prefix validator: . @prefix xsd: . - +isa-ro-crate:FindISAProtocols a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA protocols within the RO-Crate" ; + sh:description "A Protocol has type LabProtocol and is attached to an ISA process." ; + sh:targetClass bioschemas:LabProtocol ; + sh:order 3 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Protocol ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + sh:property [ + sh:path [ sh:inversePath bioschemas-prop:executesLabProtocol ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process ; + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ] ; +. # check protocol attributes must have correct types isa-ro-crate:ProtocolShouldHaveNameOfCorrectType a sh:NodeShape ; sh:name "Protocol SHOULD have name" ; sh:description "A Protocol SHOULD have a name" ; - sh:targetClass bioschemas:LabProtocol ; + sh:targetClass isa-ro-crate:Protocol ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -57,7 +79,7 @@ isa-ro-crate:ProtocolShouldHaveNameOfCorrectType a sh:NodeShape ; isa-ro-crate:ProtocolShouldHaveDescriptionOfCorrectType a sh:NodeShape ; sh:name "Protocol SHOULD have description" ; sh:description "A Protocol SHOULD have a description" ; - sh:targetClass bioschemas:LabProtocol ; + sh:targetClass isa-ro-crate:Protocol ; sh:property [ a sh:PropertyShape ; sh:path schema:description ; @@ -83,7 +105,7 @@ isa-ro-crate:ProtocolShouldHaveDescriptionOfCorrectType a sh:NodeShape ; isa-ro-crate:ProtocolShouldHaveIntendedUse a sh:NodeShape ; sh:name "Protocol SHOULD have intended use" ; sh:description "A Protocol SHOULD have an intended use" ; - sh:targetClass bioschemas:LabProtocol ; + sh:targetClass isa-ro-crate:Protocol ; sh:property [ a sh:PropertyShape ; sh:path bioschemas-prop:intendedUse ; diff --git a/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl b/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl index 5b6bf2f5f..dcdc9b576 100644 --- a/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl @@ -24,14 +24,51 @@ @prefix validator: . @prefix xsd: . - +isa-ro-crate:FindISASamples a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA samples within the RO-Crate" ; + sh:description "A Sample has type Sample and is attached to an ISA process." ; + sh:targetClass bioschemas:Sample ; + sh:order 3 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Sample ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + a sh:NodeShape ; + sh:name "Sample is attached to a process" ; + sh:or( + [ + sh:property [ + sh:path [ sh:inversePath schema:object ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:path [ sh:inversePath schema:result ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + ) ; + ] ; + ] ; +. # check Sample attributes must have correct types isa-ro-crate:SampledMustHaveName a sh:NodeShape ; sh:name "Sample MUST have name" ; sh:description "A Sample MUST have a name" ; - sh:targetClass bioschemas:Sample ; + sh:targetClass isa-ro-crate:Sample ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -50,7 +87,7 @@ isa-ro-crate:SampledMustHaveName a sh:NodeShape ; isa-ro-crate:SampleShouldHaveAdditionalPropertyOfCorrectType a sh:NodeShape ; sh:name "Sample SHOULD have additional properties" ; sh:description "A Sample SHOULD have at least one additional property" ; - sh:targetClass bioschemas:Sample ; + sh:targetClass isa-ro-crate:Sample ; sh:property [ a sh:PropertyShape ; sh:path bioschemas-prop:additionalProperty ; diff --git a/rocrate_validator/profiles/isa-ro-crate/6_data.ttl b/rocrate_validator/profiles/isa-ro-crate/6_data.ttl index 3bc9da6ad..3a8d1948d 100644 --- a/rocrate_validator/profiles/isa-ro-crate/6_data.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/6_data.ttl @@ -24,14 +24,39 @@ @prefix validator: . @prefix xsd: . - +isa-ro-crate:FindISAFiles a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA files within the RO-Crate" ; + sh:description "A data file has type File/MediaObject and is attached to an ISA Study or Assay." ; + sh:targetClass schema:MediaObject ; + sh:order 2 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Data ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + sh:property [ + sh:path [ sh:inversePath schema:hasPart ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class isa-ro-crate:Assay] + [sh:class isa-ro-crate:Study] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ] ; +. # check File attributes must have correct types isa-ro-crate:FileMustHaveName a sh:NodeShape ; sh:name "File MUST have name" ; sh:description "A File MUST have a name" ; - sh:targetClass schema:MediaObject ; + sh:targetClass isa-ro-crate:Data ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; diff --git a/rocrate_validator/profiles/isa-ro-crate/7_person.ttl b/rocrate_validator/profiles/isa-ro-crate/7_person.ttl index fe9976f60..d01f50841 100644 --- a/rocrate_validator/profiles/isa-ro-crate/7_person.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/7_person.ttl @@ -24,14 +24,67 @@ @prefix validator: . @prefix xsd: . - +isa-ro-crate:FindISAPerson a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA persons within the RO-Crate" ; + sh:description "A Person has type Person and is attached to an Investigation, Study, Assay, Process, or Article." ; + sh:targetClass schema:Person ; + sh:order 3 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Person ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + a sh:NodeShape; + sh:name: "Person is attached to an Investigation, Study, Assay, Process, or Article" ; + sh:or ( + [ + sh:property [ + sh:name "Person is creator of a Study, Assay, or RootDataEntity" ; + sh:path [ sh:inversePath schema:creator ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class isa-ro-crate:Assay] + [sh:class isa-ro-crate:Study] + [sh:class ro-crate:RootDataEntity] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "Person is author of an Article" ; + sh:path [ sh:inversePath schema:author ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Article + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "Person is agent of a Process" ; + sh:path [ sh:inversePath schema:agent ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + ) ; + ] ; + ] ; +. # check Person attributes must have correct types isa-ro-crate:PersonMustHaveGivenName a sh:NodeShape ; sh:name "Person MUST have a given name" ; sh:description "A Person MUST have a given name" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:givenName ; @@ -50,7 +103,7 @@ isa-ro-crate:PersonMustHaveGivenName a sh:NodeShape ; isa-ro-crate:PersonShouldHaveAffiliationOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have affiliation" ; sh:description "A Person SHOULD have at least one affiliation" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:affiliation ; @@ -72,7 +125,7 @@ isa-ro-crate:PersonShouldHaveAffiliationOfCorrectType a sh:NodeShape ; isa-ro-crate:PersonShouldHaveJobTitleOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have job title" ; sh:description "A Person SHOULD have at least one job title" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:jobTitle ; @@ -94,7 +147,7 @@ isa-ro-crate:PersonShouldHaveJobTitleOfCorrectType a sh:NodeShape ; isa-ro-crate:PersonShouldHaveEmailOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have email" ; sh:description "A Person SHOULD have at least one email" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:email ; @@ -119,7 +172,7 @@ isa-ro-crate:PersonShouldHaveEmailOfCorrectType a sh:NodeShape ; isa-ro-crate:PersonShouldHaveFamilyNameOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have family name" ; sh:description "A Person SHOULD have at least one family name" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:familyName ; @@ -144,7 +197,7 @@ isa-ro-crate:PersonShouldHaveFamilyNameOfCorrectType a sh:NodeShape ; isa-ro-crate:PersonShouldHaveIdentifierOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have identifier" ; sh:description "A Person SHOULD have at least one identifier" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:identifier ; diff --git a/rocrate_validator/profiles/isa-ro-crate/8_article.ttl b/rocrate_validator/profiles/isa-ro-crate/8_article.ttl index 176122490..2aac05a27 100644 --- a/rocrate_validator/profiles/isa-ro-crate/8_article.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/8_article.ttl @@ -24,11 +24,38 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAPublication a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA publications within the RO-Crate" ; + sh:description "A Publication has type ScholarlyArticle and is attached to a Study or Assay." ; + sh:targetClass schema:ScholarlyArticle ; + sh:order 2 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Article ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + sh:property [ + sh:path [ sh:inversePath schema:citation ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class ro-crate:RootDataEntity] + [sh:class isa-ro-crate:Study] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ]; +. + isa-ro-crate:ArticleMustHaveHeadline a sh:NodeShape ; sh:name "Article MUST have a headline" ; sh:description "An Article MUST have a headline" ; - sh:targetClass schema:ScholarlyArticle ; + sh:targetClass isa-ro-crate:Article ; sh:property [ a sh:PropertyShape ; sh:path schema:headline ; @@ -47,7 +74,7 @@ isa-ro-crate:ArticleMustHaveHeadline a sh:NodeShape ; isa-ro-crate:ArticleMustHaveIdentifier a sh:NodeShape ; sh:name "Article MUST have an identifier" ; sh:description "An Article MUST have an identifier" ; - sh:targetClass schema:ScholarlyArticle ; + sh:targetClass isa-ro-crate:Article ; sh:property [ a sh:PropertyShape ; sh:path schema:identifier ; @@ -69,7 +96,7 @@ isa-ro-crate:ArticleMustHaveIdentifier a sh:NodeShape ; isa-ro-crate:ArticleShouldHaveAuthorOfCorrectType a sh:NodeShape ; sh:name "Article SHOULD have author" ; sh:description "An Article SHOULD have at least one author" ; - sh:targetClass schema:ScholarlyArticle ; + sh:targetClass isa-ro-crate:Article ; sh:property [ a sh:PropertyShape ; sh:path schema:author ; diff --git a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl index 7b42b8279..db7411fbe 100644 --- a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl @@ -41,6 +41,31 @@ isa-ro-crate:Process rdf:type owl:Class ; rdfs:subClassOf bioschemas:LabProcess ; rdfs:label "Process"@en . +# Protocol +isa-ro-crate:Protocol rdf:type owl:Class ; + rdfs:subClassOf bioschemas:LabProtocol ; + rdfs:label "Protocol"@en . + +# Person +isa-ro-crate:Person rdf:type owl:Class ; + rdfs:subClassOf schema:Person ; + rdfs:label "Person"@en . + +# Article +isa-ro-crate:Article rdf:type owl:Class ; + rdfs:subClassOf schema:ScholarlyArticle ; + rdfs:label "Article"@en . + +# Sample +isa-ro-crate:Sample rdf:type owl:Class ; + rdfs:subClassOf bioschemas:Sample ; + rdfs:label "Sample"@en . + +# Data +isa-ro-crate:Data rdf:type owl:Class ; + rdfs:subClassOf schema:MediaObject ; + rdfs:label "Data"@en . + isa-ro-crate:Parameter rdf:type owl:Class ; rdfs:subClassOf schema:PropertyValue ; rdfs:label "Parameter"@en . diff --git a/tests/integration/profiles/isa-ro-crate/test_3_process.py b/tests/integration/profiles/isa-ro-crate/test_3_process.py index 7c1234cd3..2ac4dbff4 100644 --- a/tests/integration/profiles/isa-ro-crate/test_3_process.py +++ b/tests/integration/profiles/isa-ro-crate/test_3_process.py @@ -16,6 +16,7 @@ import logging from rocrate_validator.models import Severity +# from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import ValidROC from tests.shared import do_entity_test, SPARQL_PREFIXES @@ -55,40 +56,46 @@ def test_isa_process_name(): ) -def test_isa_process_not_correctly_referenced_from_dataset(): - """ - Test an ISA RO-Crate where a Process is referenced from a Dataset with wrong property. - """ - sparql = ( - SPARQL_PREFIXES - + """ - PREFIX bioschemas: - DELETE { - ?dataset schema:about ?process . - } - INSERT { - ?dataset schema:mentions ?process . - } - WHERE { - ?dataset a schema:Dataset . - ?dataset schema:about ?process. - } - """ - ) - - do_entity_test( - rocrate_path=ValidROC().isa_ro_crate, - requirement_severity=Severity.REQUIRED, - expected_validation_result=False, - expected_triggered_requirements=[ - "Process MUST be directly referenced from a dataset" - ], - expected_triggered_issues=[ - "Process MUST be directly referenced in about on a Dataset" - ], - profile_identifier="isa-ro-crate", - rocrate_entity_mod_sparql=sparql, - ) +# def test_isa_process_not_correctly_referenced_from_dataset(): +# """ +# Test an ISA RO-Crate where an invalid Process is referenced from a Dataset with wrong property. +# Such processes should be ignored, meaning the validation should pass. +# """ +# sparql = ( +# SPARQL_PREFIXES +# + """ +# PREFIX schema: +# PREFIX bioschemas: +# PREFIX bioschemas-prop: +# DELETE { +# ?dataset schema:about ?process . +# ?process schema:name ?name . +# } +# INSERT { +# ?dataset schema:mentions ?process . +# } +# WHERE { +# ?dataset a schema:Dataset . +# ?dataset schema:about ?process. +# ?process schema:name ?name . +# } +# """ +# ) + +# do_entity_test( +# rocrate_path=ValidROC().isa_ro_crate, +# requirement_severity=Severity.REQUIRED, +# expected_validation_result=True, +# # expected_triggered_requirements=[ +# # "Process MUST be directly referenced from a dataset" +# # ], +# # expected_triggered_issues=[ +# # "Process MUST be directly referenced in about on a Dataset" +# # ], +# profile_identifier="isa-ro-crate", +# rocrate_entity_mod_sparql=sparql, +# skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], +# ) def test_isa_process_no_object(): diff --git a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py index c48885b51..7e5f1d7af 100644 --- a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py +++ b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py @@ -16,6 +16,7 @@ import logging from rocrate_validator.models import Severity +# from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import ValidROC from tests.shared import do_entity_test, SPARQL_PREFIXES @@ -182,6 +183,45 @@ def test_isa_protocol_no_intendedUse(): rocrate_entity_mod_sparql=sparql, ) +# def test_isa_protocol_not_correctly_referenced_from_process(): +# """ +# Test an ISA RO-Crate where an invalid Protocol is referenced from a process with wrong property. +# Such protocols should be ignored, meaning the validation should pass. +# """ +# sparql = ( +# SPARQL_PREFIXES +# + """ +# PREFIX bioschemas: +# PREFIX bioschemas-prop: +# DELETE { +# ?process bioschemas-prop:executesProtocol ?protocol . +# ?protocol schema:name ?name . +# } +# INSERT { +# ?dataset schema:mentions ?protocol . +# } +# WHERE { +# ?dataset a schema:Dataset . +# ?dataset schema:additionalType "Assay" . +# ?process a bioschemas:LabProcess . +# ?protocol a bioschemas:LabProtocol . +# ?process bioschemas-prop:executesProtocol ?protocol . +# ?protocol schema:name ?name . +# } +# """ +# ) + +# do_entity_test( +# rocrate_path=ValidROC().isa_ro_crate, +# requirement_severity=Severity.RECOMMENDED, +# expected_validation_result=True, +# # expected_triggered_requirements=["Protocol SHOULD have intended use"], +# # expected_triggered_issues=["Protocol intended use MUST be of type string or DefinedTerm"], +# profile_identifier="isa-ro-crate", +# rocrate_entity_mod_sparql=sparql, +# skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], +# ) + def test_isa_protocol_intendedUse_incorrect_type(): """ From 3ff42458b83683ede983ab94b80745a96e4ee396 Mon Sep 17 00:00:00 2001 From: Florian Wetzels Date: Wed, 13 May 2026 16:39:58 +0200 Subject: [PATCH 48/89] finalized rdf classes for ISA types --- .../profiles/isa-ro-crate/10_definedterm.ttl | 104 ++++++++++++++++- .../isa-ro-crate/11_propertyvalue.ttl | 106 +++++++++++++++++- .../profiles/isa-ro-crate/1_study.ttl | 38 ------- .../profiles/isa-ro-crate/2_assay.ttl | 30 ----- .../profiles/isa-ro-crate/9_comment.ttl | 34 +++++- .../profiles/isa-ro-crate/ontology.ttl | 17 ++- 6 files changed, 253 insertions(+), 76 deletions(-) diff --git a/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl b/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl index e481ade1f..83bb43386 100644 --- a/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl @@ -24,11 +24,111 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA OntologyAnnotations within the RO-Crate" ; + sh:description "An OntologyAnnotation has type DefinedTerm and is attached to an Assay, Protocol, Person, or Article." ; + sh:targetClass schema:DefinedTerm ; + sh:order 4 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:OntologyAnnotation ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + a sh:NodeShape; + sh:name: "OntologyAnnotation is attached to an Assay, Protocol, Person, or Article" ; + sh:or ( + [ + sh:property [ + sh:name "OntologyAnnotation is measurementMethod of an Assay" ; + sh:path [ sh:inversePath schema:measurementMethod ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Assay ; + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is measurementTechnique of an Assay" ; + sh:path [ sh:inversePath schema:measurementTechnique ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Assay + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is intendedUse of a Protocol" ; + sh:path [ sh:inversePath schema:intendedUse ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is computationalTool of a Protocol" ; + sh:path [ sh:inversePath schema:computationalTool ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is labEquipment of a Protocol" ; + sh:path [ sh:inversePath schema:labEquipment ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is reagent of a Protocol" ; + sh:path [ sh:inversePath schema:reagent ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is job title of a Person" ; + sh:path [ sh:inversePath schema:jobTitle ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Person + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is status of a Publication" ; + sh:path [ sh:inversePath schema:creativeWorkStatus ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Publication + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + ) ; + ] ; + ] ; +. isa-ro-crate:DefinedTermMustHaveName a sh:NodeShape ; sh:name "DefinedTerm MUST have a name" ; sh:description "A DefinedTerm MUST have a name" ; - sh:targetClass schema:DefinedTerm ; + sh:targetClass isa-ro-crate:OntologyAnnotation ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -47,7 +147,7 @@ isa-ro-crate:DefinedTermMustHaveName a sh:NodeShape ; isa-ro-crate:DefinedTermShouldHaveTermCodeOfCorrectType a sh:NodeShape ; sh:name "DefinedTerm SHOULD have termCode of correct type" ; sh:description "A DefinedTerm SHOULD have at least one termCode of correct type" ; - sh:targetClass schema:DefinedTerm ; + sh:targetClass isa-ro-crate:OntologyAnnotation ; sh:property [ a sh:PropertyShape ; sh:path schema:termCode ; diff --git a/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl b/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl index b0c8c990b..ba9d108a3 100644 --- a/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl @@ -24,11 +24,111 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA PropertyValues within the RO-Crate" ; + sh:description "An ISA PropertyValue has type PropertyValue and is attached to an Assay, Process, Protocol, Sample, Person, or Article." ; + sh:targetClass schema:PropertyValue ; + sh:order 4 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:PropertyValue ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + a sh:NodeShape; + sh:name: "PropertyValue is attached to an Assay" ; + sh:or ( + [ + sh:property [ + sh:name "PropertyValue is measurementMethod of an Assay" ; + sh:path [ sh:inversePath schema:variableMeasured ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Assay ; + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is parameterValue of a Process" ; + sh:path [ sh:inversePath bioschemas-prop:parameterValue ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is computationalTool of a Protocol" ; + sh:path [ sh:inversePath schema:computationalTool ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is labEquipment of a Protocol" ; + sh:path [ sh:inversePath schema:labEquipment ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is reagent of a Protocol" ; + sh:path [ sh:inversePath schema:reagent ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is characteristic of a Sample" ; + sh:path [ sh:inversePath bioschemas-prop:additionalProperty ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Sample + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is identifier of a Person" ; + sh:path [ sh:inversePath schema:identifier ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Person + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is identifier of a Publication" ; + sh:path [ sh:inversePath schema:identifier ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Publication + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + ) ; + ] ; + ] ; +. ro-crate:FindPropertyValueSubtypes a sh:NodeShape, validator:HiddenShape; sh:name "Identify PropertyValue subtypes within the RO-Crate" ; sh:description "A PropertyValue has type Parameter, Characteristic, Factor or Component if additionalType is set accordingly." ; - sh:targetClass schema:PropertyValue ; + sh:targetClass isa-ro-crate:PropertyValue ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; @@ -87,7 +187,7 @@ ro-crate:FindPropertyValueSubtypes a sh:NodeShape, validator:HiddenShape; isa-ro-crate:PropertyValueMustHaveName a sh:NodeShape ; sh:name "PropertyValue MUST have a name" ; sh:description "A PropertyValue MUST have a name" ; - sh:targetClass schema:PropertyValue ; + sh:targetClass isa-ro-crate:PropertyValue ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -106,7 +206,7 @@ isa-ro-crate:PropertyValueMustHaveName a sh:NodeShape ; isa-ro-crate:PropertyValueShouldHaveValueOfCorrectType a sh:NodeShape ; sh:name "PropertyValue SHOULD have value of correct type" ; sh:description "A PropertyValue SHOULD have at least one value of correct type" ; - sh:targetClass schema:PropertyValue ; + sh:targetClass isa-ro-crate:PropertyValue ; sh:property [ a sh:PropertyShape ; sh:path schema:value ; diff --git a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl index b0c2b53e4..11d5ec2ff 100644 --- a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl @@ -23,43 +23,6 @@ @prefix validator: . @prefix xsd: . -# class:study -# isTypeDataset -# isAdditionalType"Study - - -# check study must have identifier -# check study must have name -# check study must have description -# check study should have about - -# check study must be pointed to by investigation through hasPart - -# # Find studies and add isa-ro-crate:Study type to them, for easier retrieval for checks -# ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; -# sh:name "Identify Studies within the RO-Crate" ; -# sh:description "A Study has type Dataset and additionalType 'Study'." ; -# sh:target [ -# a sh:SPARQLTarget ; -# sh:prefixes ro-crate:sparqlPrefixes ; -# sh:select """ -# SELECT ?this -# WHERE { -# ?this a schema:Dataset . -# ?this schema:additionalType "Study" . -# } -# """ -# ] ; - -# # Expand data graph with triples from the file data entity -# sh:rule [ -# a sh:TripleRule ; -# sh:subject sh:this ; -# sh:predicate rdf:type ; -# sh:object isa-ro-crate:Study ; -# ] . - -# Find studies and add isa-ro-crate:Study type to them, for easier retrieval for checks isa-ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; sh:name "Identify Studies within the RO-Crate" ; sh:description "A Study has type Dataset and additionalType 'Study'." ; @@ -81,7 +44,6 @@ isa-ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; ] . -# WIP isa-ro-crate:StudyMustHaveBaseDescriptors a sh:NodeShape ; sh:name "Study MUST have base properties" ; sh:description "A Study MUST have identifier, name and description" ; diff --git a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl index 9a92f8090..475627afc 100644 --- a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl @@ -23,35 +23,6 @@ @prefix validator: . @prefix xsd: . - -# check assay must have name - -# check assay must be pointed to by investigation through hasPart - -# Find assays and add isa-ro-crate:Assay type to them, for easier retrieval for checks -# ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; -# sh:name "Identify Assays within the RO-Crate" ; -# sh:description "An Assay has type Dataset and additionalType 'Assay'." ; -# sh:target [ -# a sh:SPARQLTarget ; -# sh:prefixes ro-crate:sparqlPrefixes ; -# sh:select """ -# SELECT ?this -# WHERE { -# ?this a schema:Dataset . -# ?this schema:additionalType "Assay" . -# } -# """ -# ] ; - -# # Expand data graph with triples from the file data entity -# sh:rule [ -# a sh:TripleRule ; -# sh:subject sh:this ; -# sh:predicate rdf:type ; -# sh:object isa-ro-crate:Assay ; -# ] . - isa-ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; sh:name "Identify Assays within the RO-Crate" ; sh:description "An Assay has type Dataset and additionalType 'Assay'." ; @@ -73,7 +44,6 @@ isa-ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; ] . -# WIP isa-ro-crate:AssayMustHaveBaseDescriptors a sh:NodeShape ; sh:name "Assay MUST have base properties" ; sh:description "An Assay MUST have identifier" ; diff --git a/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl b/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl index 4d64e1198..306ab76f9 100644 --- a/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl @@ -24,11 +24,41 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAComment a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA comments within the RO-Crate" ; + sh:description "A Comment has type Comment and is attached to an Investigation, Study, Assay, Protocol, Data file, or Publication." ; + sh:targetClass schema:Comment ; + sh:order 3 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Comment ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + sh:property [ + sh:path [ sh:inversePath schema:comment ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class ro-crate:RootDataEntity] + [sh:class isa-ro-crate:Study] + [sh:class isa-ro-crate:Assay] + [sh:class isa-ro-crate:Protocol] + [sh:class isa-ro-crate:Data] + [sh:class isa-ro-crate:Article] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ]; +. isa-ro-crate:CommentShouldHaveName a sh:NodeShape ; sh:name "Comment SHOULD have name" ; sh:description "A Comment SHOULD have at least one name" ; - sh:targetClass schema:Comment ; + sh:targetClass isa-ro-crate:Comment ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -53,7 +83,7 @@ isa-ro-crate:CommentShouldHaveName a sh:NodeShape ; isa-ro-crate:CommentShouldHaveText a sh:NodeShape ; sh:name "Comment SHOULD have text" ; sh:description "A Comment SHOULD have at least one text" ; - sh:targetClass schema:Comment ; + sh:targetClass isa-ro-crate:Comment ; sh:property [ a sh:PropertyShape ; sh:path schema:text ; diff --git a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl index db7411fbe..4c7b5d926 100644 --- a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl @@ -64,7 +64,22 @@ isa-ro-crate:Sample rdf:type owl:Class ; # Data isa-ro-crate:Data rdf:type owl:Class ; rdfs:subClassOf schema:MediaObject ; - rdfs:label "Data"@en . + rdfs:label "Data"@en + +# Comment +isa-ro-crate:Comment rdf:type owl:Class ; + rdfs:subClassOf schema:MediaObject ; + rdfs:label "Comment"@en . + +# OntologyAnnotation +isa-ro-crate:OntologyAnnotation rdf:type owl:Class ; + rdfs:subClassOf schema:DefinedTerm ; + rdfs:label "OntologyAnnotation"@en . + +# PropertyValue +isa-ro-crate:PropertyValue rdf:type owl:Class ; + rdfs:subClassOf schema:PropertyValue ; + rdfs:label "PropertyValue"@en . isa-ro-crate:Parameter rdf:type owl:Class ; rdfs:subClassOf schema:PropertyValue ; From 8da4b606b7f1931e617f6de10e3ddcb5b8f409a4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 13 May 2026 18:28:39 +0200 Subject: [PATCH 49/89] fix(shacl): :bug: reference `owner.graph` in `get_source_snippet` --- rocrate_validator/requirements/shacl/checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 7d9a42838..1ce62ab0b 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -199,12 +199,12 @@ def get_source_snippet(self) -> Optional[SourceSnippet]: shacl.target, ) for predicate in target_predicates: - for triple in graph.triples((owner.node, predicate, None)): + for triple in owner.graph.triples((owner.node, predicate, None)): subgraph.add(triple) # follow BNode objects (e.g. sh:target referencing an inline SPARQL target) _, _, obj = triple if isinstance(obj, BNode): - subgraph += build_node_subgraph(graph, obj) + subgraph += build_node_subgraph(owner.graph, obj) # link the owner to the property so the relationship is preserved in the serialization subgraph.add((owner.node, shacl.property, self._shape.node)) From 419fece51539530abc2a218c13f8a9d6cc2164fb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 13 May 2026 19:01:45 +0200 Subject: [PATCH 50/89] feat(constants): :wrench: default HTTP cache to never expire Switch DEFAULT_HTTP_CACHE_MAX_AGE from 300s to -1 so cached HTTP resources (JSON-LD contexts, profile artifacts, etc.) persist indefinitely by default. The `-1` sentinel is already supported throughout the cache stack and is the value used by `cache warm`. Users can still opt into a finite TTL via `--cache-max-age`. Note: this does not affect remote RO-Crates downloaded for validation, which are always re-fetched online via `fetch_fresh` (and the cached copy overwritten) so that subsequent offline runs validate against the latest known remote state. The `max_age` setting only governs the regular cached session used for other HTTP-backed resources. --- rocrate_validator/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocrate_validator/constants.py b/rocrate_validator/constants.py index 6984eeffe..c717219f7 100644 --- a/rocrate_validator/constants.py +++ b/rocrate_validator/constants.py @@ -87,7 +87,7 @@ JSON_OUTPUT_FORMAT_VERSION = "0.2" # Http Cache Settings -DEFAULT_HTTP_CACHE_MAX_AGE = 300 # in seconds +DEFAULT_HTTP_CACHE_MAX_AGE = -1 # in seconds; negative means "never expire" DEFAULT_HTTP_CACHE_PATH_PREFIX = '/tmp/rocrate_validator_cache' # Directory name used under the user's cache root for the persistent HTTP cache USER_CACHE_DIR_NAME = "rocrate-validator" From 757b86af39aab58dd00da98cdcf960817a4476fe Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 15 May 2026 13:25:27 +0200 Subject: [PATCH 51/89] fix(validation): :mute: report offline cache misses once per URL --- rocrate_validator/models.py | 33 ++++++++++++++++--- .../requirements/shacl/requirements.py | 12 +++++-- rocrate_validator/utils/http.py | 16 +++++++++ 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index f6bba6d13..bbb681a75 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -60,7 +60,7 @@ from rocrate_validator.utils.cache_warmup import auto_warm_up_for_settings from rocrate_validator.utils.collections import MapIndex, MultiIndexMap from rocrate_validator.utils.document_loader import install_document_loader -from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.http import HttpRequester, find_offline_cache_miss from rocrate_validator.utils.paths import ( get_default_http_cache_path, get_profiles_path, @@ -1179,10 +1179,13 @@ def _do_validate_(self, context: ValidationContext) -> bool: continue except Exception as e: # Ignore the fact that the check failed as far as the validation result is concerned. - logger.warning("Unexpected error during check %s. Exception: %s", check, e) - logger.warning("Consider reporting this as a bug.") - if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + if context.maybe_warn_offline_cache_miss(e): + logger.debug("Offline cache miss during check %s: %s", check, e) + else: + logger.warning("Unexpected error during check %s. Exception: %s", check, e) + logger.warning("Consider reporting this as a bug.") + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) skipped_checks = set(self._checks) - set(checks_to_perform) context.result.skipped_checks.update(skipped_checks) logger.debug( @@ -3131,6 +3134,8 @@ def __init__(self, validator: Validator, settings: ValidationSettings): self._result = None # additional properties for the context self._properties = {} + # URLs already reported as missing from the HTTP cache during this run + self._offline_cache_misses_warned: set[str] = set() # initialize the ROCrate object if settings.metadata_dict: @@ -3466,3 +3471,21 @@ def get_profile_by_identifier(self, identifier: str) -> list[Profile]: if p.identifier == identifier: return p raise ProfileNotFound(identifier) + + def maybe_warn_offline_cache_miss(self, exc: BaseException) -> bool: + """ + If ``exc`` (or any cause/context in its chain) is an + :class:`OfflineCacheMissError`, emit a single user-facing warning + for the missing URL — but only the first time that URL is seen + during this validation run — and return ``True``. + + Returns ``False`` when the exception is unrelated to offline cache + misses, so callers can fall back to their generic handling. + """ + miss = find_offline_cache_miss(exc) + if miss is None: + return False + if miss.url not in self._offline_cache_misses_warned: + self._offline_cache_misses_warned.add(miss.url) + logger.warning("%s", miss) + return True diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index f5245a0ed..9db999d65 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -140,9 +140,15 @@ def finalize(cls, context: ValidationContext) -> None: try: runner.__do_execute_check__(shacl_context) except Exception as e: - logger.warning("Forced SHACL run for zero-shape target profile %s failed: %s", target.identifier, e) - if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + if context.maybe_warn_offline_cache_miss(e): + logger.debug( + "Forced SHACL run for zero-shape target profile %s skipped due to offline cache miss: %s", + target.identifier, e, + ) + else: + logger.warning("Forced SHACL run for zero-shape target profile %s failed: %s", target.identifier, e) + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) finally: shacl_context.__unset_current_validation_profile__() diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 73a02bdd8..a5e1d71f8 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -71,6 +71,22 @@ def __init__(self, url: str): self.url = url +def find_offline_cache_miss(exc: BaseException) -> Optional[OfflineCacheMissError]: + """ + Walk the chain of an exception (``__cause__``/``__context__``) looking + for an :class:`OfflineCacheMissError`. Returns the first match, or + ``None`` if the chain does not contain one. + """ + seen: set[int] = set() + current: Optional[BaseException] = exc + while current is not None and id(current) not in seen: + seen.add(id(current)) + if isinstance(current, OfflineCacheMissError): + return current + current = current.__cause__ or current.__context__ + return None + + class HttpRequester: """ A singleton class to handle HTTP requests. From 79268323e9fb00070cb6f445ee53d00735fc687c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 15 May 2026 13:38:29 +0200 Subject: [PATCH 52/89] fix(cli/cache): :sparkles: resolve profile tokens in `cache warm` Mirror the fallback used by `validate`: when -p is given a token with no exact identifier match, pick the highest-version profile sharing that token. Emit a note listing the chosen identifier and available alternatives only when more than one version matched, so the common single-version case stays quiet. --- rocrate_validator/cli/commands/cache.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 94390cd31..12ff57c30 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -242,12 +242,35 @@ def cache_warm( if requested_ids: selected = [] missing = [] + # (requested, resolved, all candidates) for tokens that matched + # more than one versioned profile — we warn so the user knows + # which one was picked and how to opt for a different version. + ambiguous_fallbacks = [] for ident in requested_ids: profile = Profile.get_by_identifier(ident) + if profile is None: + # Mirror the fallback used by `validate`: if no exact + # identifier match, treat the value as a token and + # pick the highest-version profile sharing it. + candidates = Profile.get_by_token(ident) or [] + if candidates: + profile = max(candidates, key=lambda p: p.version) + if len(candidates) > 1: + ambiguous_fallbacks.append((ident, profile, candidates)) if profile is None: missing.append(ident) else: selected.append(profile) + for requested, resolved, candidates in ambiguous_fallbacks: + other_versions = sorted( + p.identifier for p in candidates if p.identifier != resolved.identifier + ) + console.print( + f"[yellow]Note:[/yellow] '{requested}' matched multiple profiles; " + f"using [cyan]{resolved.identifier}[/cyan] (highest version). " + f"Pass the full identifier to pick a different one " + f"(available: {', '.join(other_versions)})." + ) if missing: console.print( f"[yellow]Profile(s) not found and skipped:[/yellow] {', '.join(missing)}" From 6925d5592d6b9f0e797515256c8b8f8d5e554b18 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 15 May 2026 13:52:14 +0200 Subject: [PATCH 53/89] feat(cli/cache): :sparkles: allow caching of explicit URLs via 'cache warm' --- rocrate_validator/cli/commands/cache.py | 29 ++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 12ff57c30..3ac96146d 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -207,6 +207,15 @@ def cache_reset(ctx, cache_path: Optional[Path] = None, yes: bool = False): metavar="URI", help="URL of a remote RO-Crate to download and cache (may be given multiple times)", ) +@click.option( + "-u", + "--url", + multiple=True, + type=click.STRING, + default=None, + metavar="URL", + help="Arbitrary URL to fetch and cache (may be given multiple times)", +) @click.pass_context def cache_warm( ctx, @@ -216,12 +225,21 @@ def cache_warm( profile_identifier: Optional[List[str]] = None, all_profiles: bool = False, crate: Optional[List[str]] = None, + url: Optional[List[str]] = None, ): """ Pre-populate the HTTP cache with resources declared by profiles and with optional remote RO-Crate URLs. """ console = ctx.obj['console'] + explicit_urls = list(url or []) + invalid_urls = [u for u in explicit_urls if not u.lower().startswith(("http://", "https://"))] + if invalid_urls: + raise click.BadParameter( + f"expected an http(s):// address; got: {', '.join(invalid_urls)}", + param_hint="'--url' / '-u'", + ) + exit_with_failure = False try: resolved_cache = _resolve_cache_path(cache_path) @@ -233,7 +251,10 @@ def cache_warm( urls: List[str] = [] profile_scope: Optional[str] = None - if all_profiles or requested_ids or not crate: + # Only fall back to "warm all profiles" when the user gave no other + # source (no -p, no --crate, no --url, no --all-profiles). + any_explicit_source = bool(crate or explicit_urls or requested_ids or all_profiles) + if all_profiles or requested_ids or not any_explicit_source: Profile.load_profiles( profiles_path=profiles_dir, extra_profiles_path=extra_dir, @@ -295,6 +316,12 @@ def cache_warm( ) results.extend(_warm_remote_crates(list(crate))) + if explicit_urls: + console.print( + f"[bold]Fetching explicit URLs[/bold] ([cyan]{len(explicit_urls)}[/cyan] URL(s))..." + ) + results.extend(warm_up_urls(explicit_urls)) + if not results: console.print("[yellow]Nothing to warm up.[/yellow]") return From f9b43ae276ad86bcb9177759636fc0609c58ba26 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 15 May 2026 14:23:55 +0200 Subject: [PATCH 54/89] feat(cli/cache): :sparkles: add `cache list` (alias `ls`) --- rocrate_validator/cli/commands/cache.py | 195 +++++++++++++++++++++++- 1 file changed, 191 insertions(+), 4 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 3ac96146d..966b992b2 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -19,8 +19,11 @@ from __future__ import annotations +import copy as _copy +import json import shutil import tempfile +from datetime import datetime from pathlib import Path from typing import List, Optional @@ -30,11 +33,9 @@ from rocrate_validator.cli.main import cli, click from rocrate_validator.models import Profile from rocrate_validator.utils import log as logging -from rocrate_validator.utils.cache_warmup import ( - WarmUpResult, discover_cacheable_urls_from_profiles, warm_up_urls) +from rocrate_validator.utils.cache_warmup import WarmUpResult, discover_cacheable_urls_from_profiles, warm_up_urls from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.utils.paths import (get_default_http_cache_path, - get_profiles_path) +from rocrate_validator.utils.paths import get_default_http_cache_path, get_profiles_path logger = logging.getLogger(__name__) @@ -100,6 +101,102 @@ def cache_info(ctx, cache_path: Optional[Path] = None): handle_error(e, console) +@cache.command("list") +@click.option( + "--cache-path", + type=click.Path(), + default=None, + show_default=False, + help="Path to the HTTP cache directory (defaults to the user cache dir)", +) +@click.option( + "--url", + "url_filter", + type=click.STRING, + default=None, + metavar="SUBSTRING", + help="Show only entries whose URL contains SUBSTRING (case-insensitive)", +) +@click.option( + "--sort", + "sort_by", + type=click.Choice(["url", "size", "created"], case_sensitive=False), + default="created", + show_default=True, + help="Field to sort entries by", +) +@click.option( + "--order", + "sort_order", + type=click.Choice(["asc", "desc"], case_sensitive=False), + default=None, + show_default=False, + help="Sort direction (default: desc for size/created, asc for url)", +) +@click.option( + "--json", + "as_json", + is_flag=True, + default=False, + help="Print entries as JSON (size in bytes, datetimes as ISO 8601)", +) +@click.pass_context +def cache_list( + ctx, + cache_path: Optional[Path] = None, + url_filter: Optional[str] = None, + sort_by: str = "created", + sort_order: Optional[str] = None, + as_json: bool = False, +): + """ + List entries currently stored in the HTTP cache (alias: `ls`). + """ + console = ctx.obj['console'] + try: + resolved = _resolve_cache_path(cache_path) + _reset_requester(resolved) + entries = _collect_cache_entries( + url_filter=url_filter, + sort_by=sort_by.lower(), + sort_order=sort_order.lower() if sort_order else None, + ) + + if as_json: + click.echo(json.dumps([_entry_to_dict(e) for e in entries], indent=2)) + return + + if not entries: + if url_filter: + console.print(f"[yellow]No entries match URL filter:[/yellow] {url_filter}") + else: + console.print("[yellow]Cache is empty.[/yellow]") + return + + table = Table(title=f"HTTP Cache entries ({len(entries)})", show_lines=False) + table.add_column("URL", overflow="fold") + table.add_column("Status", justify="right") + table.add_column("Size", justify="right") + table.add_column("Content-Type") + table.add_column("Created") + table.add_column("Expires") + total = 0 + for e in entries: + total += e["size"] + table.add_row( + e["url"], + str(e["status"] if e["status"] is not None else "—"), + _format_bytes(e["size"]), + e["content_type"] or "—", + _format_dt(e["created_at"]), + _format_expires(e["expires"], e["is_expired"]), + ) + console.print(table) + console.print(f"[bold]Total:[/bold] {len(entries)} entries, {_format_bytes(total)}") + except Exception as e: + handle_error(e, console) + + @cache.command("reset") @click.option( "--cache-path", @@ -389,3 +486,93 @@ def _format_bytes(size: int) -> str: value /= 1024 idx += 1 return f"{value:.2f} {units[idx]}" + + +def _format_dt(value: Optional[datetime]) -> str: + if value is None: + return "—" + return value.strftime("%Y-%m-%d %H:%M:%SZ") if value.tzinfo else value.strftime("%Y-%m-%d %H:%M:%S") + + +def _format_expires(value: Optional[datetime], is_expired: bool) -> str: + if value is None: + return "never" + formatted = _format_dt(value) + return f"[red]{formatted} (expired)[/red]" if is_expired else formatted + + +_DEFAULT_SORT_ORDER = {"url": "asc", "size": "desc", "created": "desc"} + + +def _collect_cache_entries( + url_filter: Optional[str] = None, + sort_by: str = "size", + sort_order: Optional[str] = None, +) -> List[dict]: + """ + Read every cached response and return a list of plain dicts. Filtering + and sorting happen here so the CLI rendering paths (table / JSON) share + the same data shape. + + ``sort_order`` is one of ``"asc"``/``"desc"`` or ``None`` to use the + field's natural default (URLs sort ascending; size and timestamps sort + descending so the largest/most recent come first). + """ + cache = getattr(HttpRequester().session, "cache", None) + if cache is None: + return [] + needle = url_filter.lower() if url_filter else None + entries: List[dict] = [] + responses = getattr(cache, "responses", None) or {} + for key in list(responses): + try: + resp = responses[key] + except Exception as exc: + logger.debug("Skipping unreadable cache entry %s: %s", key, exc) + continue + url = getattr(resp, "url", "") or "" + if needle and needle not in url.lower(): + continue + entries.append({ + "key": key, + "url": url, + "status": getattr(resp, "status_code", None), + "size": int(getattr(resp, "size", 0) or 0), + "content_type": (getattr(resp, "headers", {}) or {}).get("Content-Type"), + "created_at": getattr(resp, "created_at", None), + "expires": getattr(resp, "expires", None), + "is_expired": bool(getattr(resp, "is_expired", False)), + }) + effective_order = sort_order or _DEFAULT_SORT_ORDER.get(sort_by, "desc") + reverse = effective_order == "desc" + if sort_by == "url": + entries.sort(key=lambda e: e["url"].lower(), reverse=reverse) + elif sort_by == "created": + entries.sort(key=lambda e: e["created_at"] or datetime.min, reverse=reverse) + else: # "size" + entries.sort(key=lambda e: e["size"], reverse=reverse) + return entries + + +def _entry_to_dict(entry: dict) -> dict: + """JSON-safe view of an entry produced by ``_collect_cache_entries``.""" + def _iso(value: Optional[datetime]) -> Optional[str]: + return value.isoformat() if value is not None else None + return { + "url": entry["url"], + "status": entry["status"], + "size_bytes": entry["size"], + "content_type": entry["content_type"], + "created_at": _iso(entry["created_at"]), + "expires": _iso(entry["expires"]), + "is_expired": entry["is_expired"], + } + + +# Shell-style alias: `cache ls` runs the same callback as `cache list`. +# A shallow copy gives the alias its own name and hides it from --help so +# the command appears only once in the listing. +_cache_ls_alias = _copy.copy(cache_list) +_cache_ls_alias.name = "ls" +_cache_ls_alias.hidden = True +cache.add_command(_cache_ls_alias) From c0bea7f3bd7f25e198bf3b74e91044568df48bc7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 15 May 2026 14:46:56 +0200 Subject: [PATCH 55/89] test(conftest): :wrench: force wide terminal for Rich-based CLI assertions --- tests/conftest.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 778b62efa..d14ff836a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -49,6 +49,30 @@ SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER = check_local_data_entity_existence.identifier +@pytest.fixture(scope="session", autouse=True) +def _session_wide_terminal(): + """ + Force Rich (and other ``os.get_terminal_size`` consumers) to render with + a wide terminal. ``click.testing.CliRunner`` captures stdout into a + StringIO, so Rich falls back to its 80-column default and truncates + table cells / wraps panel rows — breaking ``"substring" in result.output`` + assertions in a non-deterministic way. Setting ``COLUMNS`` early keeps + the rendered output predictable across machines and CI. + """ + previous_columns = os.environ.get("COLUMNS") + previous_lines = os.environ.get("LINES") + os.environ["COLUMNS"] = "200" + os.environ["LINES"] = "50" + try: + yield + finally: + for name, prev in (("COLUMNS", previous_columns), ("LINES", previous_lines)): + if prev is None: + os.environ.pop(name, None) + else: + os.environ[name] = prev + + @pytest.fixture(scope="session", autouse=True) def _session_isolated_xdg(tmp_path_factory): """ From 0419466d0c214d9b16566654a468cd28a3b482f5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 15 May 2026 14:49:21 +0200 Subject: [PATCH 56/89] test(validation): :white_check_mark: cover offline cache-miss warning de-duplication --- tests/unit/test_offline_cache_miss_warning.py | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 tests/unit/test_offline_cache_miss_warning.py diff --git a/tests/unit/test_offline_cache_miss_warning.py b/tests/unit/test_offline_cache_miss_warning.py new file mode 100644 index 000000000..eae79b10e --- /dev/null +++ b/tests/unit/test_offline_cache_miss_warning.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from rocrate_validator import models as models_module +from rocrate_validator.models import ValidationContext +from rocrate_validator.utils.http import OfflineCacheMissError, find_offline_cache_miss + + +# ---------- find_offline_cache_miss ---------- +def test_find_offline_cache_miss_direct(): + exc = OfflineCacheMissError("https://example.org/x") + assert find_offline_cache_miss(exc) is exc + + +def test_find_offline_cache_miss_walks_cause_chain(): + inner = OfflineCacheMissError("https://example.org/x") + try: + try: + raise inner + except OfflineCacheMissError as e: + raise RuntimeError("wrapped") from e + except Exception as outer: + found = find_offline_cache_miss(outer) + assert found is inner + + +def test_find_offline_cache_miss_walks_context_chain(): + # `raise` inside `except` without `from` populates __context__. + try: + try: + raise OfflineCacheMissError("https://example.org/y") + except OfflineCacheMissError: + raise RuntimeError("wrapped via context") + except Exception as outer: + found = find_offline_cache_miss(outer) + assert isinstance(found, OfflineCacheMissError) + assert found.url == "https://example.org/y" + + +def test_find_offline_cache_miss_returns_none_for_unrelated(): + assert find_offline_cache_miss(ValueError("nope")) is None + + +def test_find_offline_cache_miss_handles_cyclic_chain(): + # Two exceptions referencing each other must not loop forever. + a = RuntimeError("a") + b = RuntimeError("b") + a.__context__ = b + b.__context__ = a + assert find_offline_cache_miss(a) is None + + +# ---------- ValidationContext.maybe_warn_offline_cache_miss ---------- +@pytest.fixture +def bare_context(): + """A ValidationContext with only the state needed by the dedup helper.""" + ctx = ValidationContext.__new__(ValidationContext) + ctx._offline_cache_misses_warned = set() + return ctx + + +@pytest.fixture +def mock_logger(monkeypatch): + """ + Replace the module-level logger in ``rocrate_validator.models`` with a + MagicMock. The project's custom logger sets ``propagate=False``, so + pytest's ``caplog`` does not see its records — observing the mock is + both simpler and more precise. + """ + fake = MagicMock() + monkeypatch.setattr(models_module, "logger", fake) + return fake + + +def test_maybe_warn_returns_false_for_unrelated_exception(bare_context, mock_logger): + assert bare_context.maybe_warn_offline_cache_miss(ValueError("nope")) is False + mock_logger.warning.assert_not_called() + + +def test_maybe_warn_emits_once_per_url(bare_context, mock_logger): + url = "https://example.org/ctx" + for _ in range(3): + assert bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url)) is True + assert mock_logger.warning.call_count == 1 + # The bare miss exception is logged via "%s" so it stringifies and the + # URL appears verbatim in the formatted message. + args, _ = mock_logger.warning.call_args + assert url in str(args[1]) + + +def test_maybe_warn_emits_once_per_distinct_url(bare_context, mock_logger): + url_a = "https://example.org/a" + url_b = "https://example.org/b" + bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url_a)) + bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url_b)) + bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url_a)) + assert mock_logger.warning.call_count == 2 + logged = " ".join(str(call.args[1]) for call in mock_logger.warning.call_args_list) + assert url_a in logged + assert url_b in logged + + +def test_maybe_warn_dedups_when_miss_is_wrapped(bare_context, mock_logger): + url = "https://example.org/ctx" + try: + raise RuntimeError("wrapped") from OfflineCacheMissError(url) + except RuntimeError as wrapped_exc: + wrapped = wrapped_exc + # First call: direct miss; warning emitted. + assert bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url)) is True + # Second call: same URL but reached via a wrapper exception. Must still + # be recognized through the __cause__ chain and dedup'd against the first. + assert bare_context.maybe_warn_offline_cache_miss(wrapped) is True + assert mock_logger.warning.call_count == 1 From b6873facfb74ce26c891ac20166085cac215f73c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 15 May 2026 14:52:40 +0200 Subject: [PATCH 57/89] test(cli/cache): :white_check_mark: add CLI tests for 'cache warm --url' and 'cache list' --- tests/test_cli_cache.py | 402 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 402 insertions(+) create mode 100644 tests/test_cli_cache.py diff --git a/tests/test_cli_cache.py b/tests/test_cli_cache.py new file mode 100644 index 000000000..860835e01 --- /dev/null +++ b/tests/test_cli_cache.py @@ -0,0 +1,402 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +CLI tests for the ``rocrate-validator cache`` subcommands: + +* ``cache warm`` profile-token fallback (mirrors ``validate``). +* ``cache warm -u/--url`` arbitrary URL warming. +* ``cache list`` / ``cache ls`` entry listing with filter/sort/--json. +""" + +from __future__ import annotations + +import io +import json + +import pytest +import urllib3 +from click.testing import CliRunner + +from rocrate_validator.cli.main import cli +from rocrate_validator.models import Profile +from rocrate_validator.utils.http import HttpRequester + + +# ---------- shared fixtures ---------- +@pytest.fixture +def cli_runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture(autouse=True) +def _reset_requester(): + HttpRequester.reset() + yield + HttpRequester.reset() + + +@pytest.fixture +def mock_network(monkeypatch): + """Route every outbound HTTP call to a fake successful response.""" + from requests.adapters import HTTPAdapter + + def fake_send(self, request, **kwargs): + raw = urllib3.HTTPResponse( + body=io.BytesIO(b'{"ok": true}'), + headers={"Content-Type": "application/json", "Content-Length": "12"}, + status=200, + preload_content=False, + decode_content=False, + ) + return self.build_response(request, raw) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +@pytest.fixture +def tmp_cache(tmp_path): + """Path passed via ``--cache-path`` to keep tests off the user cache.""" + return tmp_path / "cache" + + +def _make_profile_stub(identifier: str, version: str, token: str): + """Lightweight stand-in for a Profile used only by token fallback tests.""" + + class _Stub: + pass + + stub = _Stub() + stub.identifier = identifier + stub.version = version + stub.token = token + return stub + + +# ==================================================================== +# cache warm: profile-token fallback +# ==================================================================== +def test_warm_token_resolves_to_single_versioned_profile( + cli_runner, + mock_network, + tmp_cache, + monkeypatch, +): + """`-p process-run-crate` should resolve to the only versioned variant.""" + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-p", "process-run-crate"], + ) + assert result.exit_code == 0, result.output + assert "process-run-crate-0.5" in result.output + # Single-version token must resolve silently — no "Note:" line. + assert "Note:" not in result.output + assert "not found and skipped" not in result.output + + +def test_warm_token_with_multiple_versions_emits_note( + cli_runner, + mock_network, + tmp_cache, + monkeypatch, +): + """When a token matches more than one version, the picked identifier and + the alternatives must appear in a one-line Note.""" + candidates = [ + _make_profile_stub("fakeprof-0.1", "0.1", "fakeprof"), + _make_profile_stub("fakeprof-0.2", "0.2", "fakeprof"), + ] + + real_by_id = Profile.get_by_identifier + real_by_token = Profile.get_by_token + + def fake_by_id(ident): + if ident == "fakeprof": + return None + return real_by_id(ident) + + def fake_by_token(tok): + if tok == "fakeprof": + return candidates + return real_by_token(tok) + + monkeypatch.setattr(Profile, "get_by_identifier", staticmethod(fake_by_id)) + monkeypatch.setattr(Profile, "get_by_token", staticmethod(fake_by_token)) + # Skip URL discovery entirely — the test cares about the resolver, not + # what's warmed. + monkeypatch.setattr( + "rocrate_validator.cli.commands.cache.discover_cacheable_urls_from_profiles", + lambda profiles: [], + ) + + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-p", "fakeprof"], + ) + assert result.exit_code == 0, result.output + assert "Note:" in result.output + assert "fakeprof-0.2" in result.output # picked (highest version) + assert "fakeprof-0.1" in result.output # listed as alternative + + +def test_warm_unknown_profile_still_reported_as_missing( + cli_runner, + mock_network, + tmp_cache, +): + """A profile id that matches neither identifier nor token must end up + in the existing 'Profile(s) not found and skipped' message.""" + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-p", "definitely-not-a-profile"], + ) + assert result.exit_code == 0, result.output + assert "not found and skipped" in result.output + assert "definitely-not-a-profile" in result.output + + +# ==================================================================== +# cache warm: -u / --url +# ==================================================================== +def test_warm_url_alone_does_not_fall_back_to_all_profiles( + cli_runner, + mock_network, + tmp_cache, + monkeypatch, +): + """``cache warm -u `` with no -p must warm only the URL — not every + installed profile (which is the default when no explicit source is + given).""" + seen = {"profile_calls": 0} + + def fake_discover(profiles): + seen["profile_calls"] += 1 + return [] + + monkeypatch.setattr( + "rocrate_validator.cli.commands.cache.discover_cacheable_urls_from_profiles", + fake_discover, + ) + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-u", "https://example.org/a"], + ) + assert result.exit_code == 0, result.output + assert seen["profile_calls"] == 0 + assert "Fetching explicit URLs" in result.output + assert "https://example.org/a" in result.output + + +def test_warm_url_invalid_value_is_rejected(cli_runner, tmp_cache): + """Non-http(s) values must trip Click's parameter validation and exit 2.""" + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-u", "notaurl"], + ) + assert result.exit_code == 2 + assert "http(s)" in result.output + assert "notaurl" in result.output + + +def test_warm_url_combined_with_profile_warms_both( + cli_runner, + mock_network, + tmp_cache, + monkeypatch, +): + """``-p -u `` must warm the profile URLs *and* the extra + URL in the same invocation.""" + # Make the profile contribute a single deterministic URL. + monkeypatch.setattr( + "rocrate_validator.cli.commands.cache.discover_cacheable_urls_from_profiles", + lambda profiles: ["https://example.org/from-profile"], + ) + result = cli_runner.invoke( + cli, + [ + "cache", + "warm", + "--cache-path", + str(tmp_cache), + "-p", + "ro-crate-1.1", + "-u", + "https://example.org/explicit", + ], + ) + assert result.exit_code == 0, result.output + assert "Warming cache for profiles" in result.output + assert "Fetching explicit URLs" in result.output + assert "https://example.org/from-profile" in result.output + assert "https://example.org/explicit" in result.output + + +# ==================================================================== +# cache list / ls +# ==================================================================== +def _warm_some(cli_runner, tmp_cache, urls): + args = ["cache", "warm", "--cache-path", str(tmp_cache)] + for u in urls: + args += ["-u", u] + return cli_runner.invoke(cli, args) + + +def test_list_reports_empty_cache(cli_runner, tmp_cache): + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache)], + ) + assert result.exit_code == 0, result.output + assert "Cache is empty" in result.output + + +def test_list_shows_warmed_entries(cli_runner, mock_network, tmp_cache): + urls = [ + "https://example.org/alpha", + "https://example.org/beta", + "https://example.org/gamma", + ] + warm = _warm_some(cli_runner, tmp_cache, urls) + assert warm.exit_code == 0, warm.output + + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache)], + ) + assert result.exit_code == 0, result.output + for u in urls: + # URLs are wrapped/folded in the Rich table, so check a stable token. + assert u.rsplit("/", 1)[1] in result.output + assert "Total:" in result.output + + +def test_list_url_filter_narrows_results(cli_runner, mock_network, tmp_cache): + urls = [ + "https://example.org/keep-me", + "https://example.org/other", + ] + _warm_some(cli_runner, tmp_cache, urls) + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--url", "keep-me"], + ) + assert result.exit_code == 0, result.output + assert "keep-me" in result.output + # The filter is case-insensitive substring on URL; "other" must be absent. + assert "/other" not in result.output + + +def test_list_filter_with_no_match_message(cli_runner, mock_network, tmp_cache): + _warm_some(cli_runner, tmp_cache, ["https://example.org/only"]) + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--url", "no-such-fragment"], + ) + assert result.exit_code == 0, result.output + assert "No entries match" in result.output + + +def test_list_json_output_is_well_formed(cli_runner, mock_network, tmp_cache): + urls = [ + "https://example.org/a", + "https://example.org/b", + ] + _warm_some(cli_runner, tmp_cache, urls) + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--json"], + ) + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + assert isinstance(payload, list) + assert {e["url"] for e in payload} == set(urls) + sample = payload[0] + # Every entry must carry the documented fields. + assert {"url", "status", "size_bytes", "content_type", "created_at", "expires", "is_expired"} <= set(sample) + assert isinstance(sample["size_bytes"], int) + + +def test_list_sort_by_url_asc_then_desc(cli_runner, mock_network, tmp_cache): + """`--sort url` defaults to asc; `--order desc` must reverse it.""" + _warm_some( + cli_runner, + tmp_cache, + [ + "https://example.org/c", + "https://example.org/a", + "https://example.org/b", + ], + ) + + asc = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--sort", "url", "--json"], + ) + assert asc.exit_code == 0, asc.output + asc_urls = [e["url"] for e in json.loads(asc.output)] + assert asc_urls == sorted(asc_urls) + + desc = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--sort", "url", "--order", "desc", "--json"], + ) + assert desc.exit_code == 0, desc.output + desc_urls = [e["url"] for e in json.loads(desc.output)] + assert desc_urls == sorted(desc_urls, reverse=True) + + +def test_list_default_sort_is_created_desc(cli_runner, mock_network, tmp_cache): + """No --sort flag: entries come back ordered by created_at, most recent + first (the documented default).""" + _warm_some( + cli_runner, + tmp_cache, + [ + "https://example.org/first", + "https://example.org/second", + "https://example.org/third", + ], + ) + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--json"], + ) + assert result.exit_code == 0, result.output + created = [e["created_at"] for e in json.loads(result.output)] + # Each entry has a timestamp (mocked response goes through requests_cache); + # the sequence must be monotonically non-increasing. + assert all(a >= b for a, b in zip(created, created[1:])) + + +def test_list_invalid_order_is_rejected(cli_runner, tmp_cache): + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--order", "sideways"], + ) + assert result.exit_code == 2 + assert "'sideways'" in result.output + + +def test_ls_alias_runs_the_same_command(cli_runner, mock_network, tmp_cache): + _warm_some(cli_runner, tmp_cache, ["https://example.org/x"]) + list_result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--json"], + ) + ls_result = cli_runner.invoke( + cli, + ["cache", "ls", "--cache-path", str(tmp_cache), "--json"], + ) + assert list_result.exit_code == ls_result.exit_code == 0 + assert json.loads(list_result.output) == json.loads(ls_result.output) From 392df1a61095ce3143d71f31a945661312d932a0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 15 May 2026 15:24:33 +0200 Subject: [PATCH 58/89] refactor(cli/cache): :art: drop Status column from `cache list` table --- rocrate_validator/cli/commands/cache.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 966b992b2..5492af2eb 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -175,7 +175,6 @@ def cache_list( table = Table(title=f"HTTP Cache entries ({len(entries)})", show_lines=False) table.add_column("URL", overflow="fold") - table.add_column("Status", justify="right") table.add_column("Size", justify="right") table.add_column("Content-Type") table.add_column("Created") @@ -185,7 +184,6 @@ def cache_list( total += e["size"] table.add_row( e["url"], - str(e["status"] if e["status"] is not None else "—"), _format_bytes(e["size"]), e["content_type"] or "—", _format_dt(e["created_at"]), From 1568ef6a1de5bc2ba6768c69e5a89658b8616196 Mon Sep 17 00:00:00 2001 From: Florian Wetzels Date: Mon, 18 May 2026 14:31:46 +0200 Subject: [PATCH 59/89] Added tests for objects with ISA types that are not connected to ISA part of the crate --- .../isa-ro-crate/test_10_definedterm.py | 46 +++++++++++ .../profiles/isa-ro-crate/test_3_process.py | 80 +++++++++---------- .../profiles/isa-ro-crate/test_4_protocol.py | 74 ++++++++--------- .../profiles/isa-ro-crate/test_5_sample.py | 36 +++++++++ .../profiles/isa-ro-crate/test_6_data.py | 36 +++++++++ .../profiles/isa-ro-crate/test_7_person.py | 39 +++++++++ .../profiles/isa-ro-crate/test_8_article.py | 35 ++++++++ .../profiles/isa-ro-crate/test_9_comment.py | 36 +++++++++ 8 files changed, 305 insertions(+), 77 deletions(-) diff --git a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py index 9f129b2c5..6835d1b27 100644 --- a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py +++ b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py @@ -31,6 +31,8 @@ def test_isa_defined_term_name(): sparql = ( SPARQL_PREFIXES + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: DELETE { ?defined_term schema:name ?name . } @@ -146,3 +148,47 @@ def test_isa_defined_term_termCode_of_incorrect_type(): profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) + + +def test_isa_term_not_correctly_referenced(): + """ + Test an ISA RO-Crate where an invalid defined term is not correctly referenced. + Such defined terms should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?protocol bioschemas-prop:intendedUse ?term . + ?person schema:jobTitle ?term . + ?assay schema:measurementTechnique ?term . + ?assay schema:measurementMethod ?term . + ?term schema:name ?name . + } + INSERT { + ?assay schema:mentions ?term . + } + WHERE { + ?protocol a bioschemas:LabProtocol . + ?person a schema:Person . + ?assay a schema:Dataset . + ?term a schema:DefinedTerm . + ?term schema:name ?name . + ?protocol bioschemas-prop:intendedUse ?term . + ?person schema:jobTitle ?term . + ?assay schema:measurementTechnique ?term . + ?assay schema:measurementMethod ?term . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) \ No newline at end of file diff --git a/tests/integration/profiles/isa-ro-crate/test_3_process.py b/tests/integration/profiles/isa-ro-crate/test_3_process.py index 2ac4dbff4..d7d9d15fb 100644 --- a/tests/integration/profiles/isa-ro-crate/test_3_process.py +++ b/tests/integration/profiles/isa-ro-crate/test_3_process.py @@ -56,46 +56,46 @@ def test_isa_process_name(): ) -# def test_isa_process_not_correctly_referenced_from_dataset(): -# """ -# Test an ISA RO-Crate where an invalid Process is referenced from a Dataset with wrong property. -# Such processes should be ignored, meaning the validation should pass. -# """ -# sparql = ( -# SPARQL_PREFIXES -# + """ -# PREFIX schema: -# PREFIX bioschemas: -# PREFIX bioschemas-prop: -# DELETE { -# ?dataset schema:about ?process . -# ?process schema:name ?name . -# } -# INSERT { -# ?dataset schema:mentions ?process . -# } -# WHERE { -# ?dataset a schema:Dataset . -# ?dataset schema:about ?process. -# ?process schema:name ?name . -# } -# """ -# ) - -# do_entity_test( -# rocrate_path=ValidROC().isa_ro_crate, -# requirement_severity=Severity.REQUIRED, -# expected_validation_result=True, -# # expected_triggered_requirements=[ -# # "Process MUST be directly referenced from a dataset" -# # ], -# # expected_triggered_issues=[ -# # "Process MUST be directly referenced in about on a Dataset" -# # ], -# profile_identifier="isa-ro-crate", -# rocrate_entity_mod_sparql=sparql, -# skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], -# ) +def test_isa_process_not_correctly_referenced_from_dataset(): + """ + Test an ISA RO-Crate where an invalid Process is not correctly referenced. + Such processes should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX schema: + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?dataset schema:about ?process . + ?process schema:name ?name . + } + INSERT { + ?dataset schema:mentions ?process . + } + WHERE { + ?dataset a schema:Dataset . + ?dataset schema:about ?process. + ?process schema:name ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + # expected_triggered_requirements=[ + # "Process MUST be directly referenced from a dataset" + # ], + # expected_triggered_issues=[ + # "Process MUST be directly referenced in about on a Dataset" + # ], + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) def test_isa_process_no_object(): diff --git a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py index 7e5f1d7af..eed5feff7 100644 --- a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py +++ b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py @@ -183,44 +183,44 @@ def test_isa_protocol_no_intendedUse(): rocrate_entity_mod_sparql=sparql, ) -# def test_isa_protocol_not_correctly_referenced_from_process(): -# """ -# Test an ISA RO-Crate where an invalid Protocol is referenced from a process with wrong property. -# Such protocols should be ignored, meaning the validation should pass. -# """ -# sparql = ( -# SPARQL_PREFIXES -# + """ -# PREFIX bioschemas: -# PREFIX bioschemas-prop: -# DELETE { -# ?process bioschemas-prop:executesProtocol ?protocol . -# ?protocol schema:name ?name . -# } -# INSERT { -# ?dataset schema:mentions ?protocol . -# } -# WHERE { -# ?dataset a schema:Dataset . -# ?dataset schema:additionalType "Assay" . -# ?process a bioschemas:LabProcess . -# ?protocol a bioschemas:LabProtocol . -# ?process bioschemas-prop:executesProtocol ?protocol . -# ?protocol schema:name ?name . -# } -# """ -# ) -# do_entity_test( -# rocrate_path=ValidROC().isa_ro_crate, -# requirement_severity=Severity.RECOMMENDED, -# expected_validation_result=True, -# # expected_triggered_requirements=["Protocol SHOULD have intended use"], -# # expected_triggered_issues=["Protocol intended use MUST be of type string or DefinedTerm"], -# profile_identifier="isa-ro-crate", -# rocrate_entity_mod_sparql=sparql, -# skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], -# ) +def test_isa_protocol_not_correctly_referenced_from_process(): + """ + Test an ISA RO-Crate where an invalid Protocol is not correctly referenced. + Such protocols should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?process bioschemas-prop:executesLabProtocol ?protocol . + ?protocol schema:description ?description . + } + INSERT { + ?process schema:mentions ?protocol . + ?protocol schema:description 42 . + } + WHERE { + ?process a bioschemas:LabProcess . + ?protocol a bioschemas:LabProtocol . + ?process bioschemas-prop:executesLabProtocol ?protocol . + ?protocol schema:description ?description . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + # expected_triggered_requirements=["Protocol SHOULD have intended use"], + # expected_triggered_issues=["Protocol intended use MUST be of type string or DefinedTerm"], + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) def test_isa_protocol_intendedUse_incorrect_type(): diff --git a/tests/integration/profiles/isa-ro-crate/test_5_sample.py b/tests/integration/profiles/isa-ro-crate/test_5_sample.py index 305ccca0f..bc2714d0e 100644 --- a/tests/integration/profiles/isa-ro-crate/test_5_sample.py +++ b/tests/integration/profiles/isa-ro-crate/test_5_sample.py @@ -55,6 +55,42 @@ def test_isa_sample_name(): ) +def test_isa_sample_not_correctly_referenced_from_process(): + """ + Test an ISA RO-Crate where an invalid Sample is not correctly referenced. + Such samples should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?process schema:object ?sample . + ?sample schema:name ?name . + } + INSERT { + ?process schema:mentions ?sample . + } + WHERE { + ?process a bioschemas:LabProcess . + ?sample a bioschemas:Sample . + ?process schema:object ?sample . + ?sample schema:name ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_sample_name_of_incorrect_type(): """ Test an ISA RO-Crate where a sample name has wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_6_data.py b/tests/integration/profiles/isa-ro-crate/test_6_data.py index 1f19f0ecf..bb79248db 100644 --- a/tests/integration/profiles/isa-ro-crate/test_6_data.py +++ b/tests/integration/profiles/isa-ro-crate/test_6_data.py @@ -54,6 +54,42 @@ def test_isa_file_name(): ) +def test_isa_data_not_correctly_referenced_from_process(): + """ + Test an ISA RO-Crate where an invalid data file is not correctly referenced. + Such files should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?dataset schema:hasPart ?file . + ?file schema:name ?name . + } + INSERT { + ?dataset schema:mentions ?file . + } + WHERE { + ?dataset a schema:Dataset . + ?file a schema:MediaObject . + ?dataset schema:hasPart ?file . + ?file schema:name ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_file_name_of_incorrect_type(): """ Test an ISA RO-Crate where a file name has wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_7_person.py b/tests/integration/profiles/isa-ro-crate/test_7_person.py index 9135b633a..c7263ed6b 100644 --- a/tests/integration/profiles/isa-ro-crate/test_7_person.py +++ b/tests/integration/profiles/isa-ro-crate/test_7_person.py @@ -54,6 +54,45 @@ def test_isa_person_given_name(): ) +def test_isa_person_not_correctly_referenced(): + """ + Test an ISA RO-Crate where an invalid person is not correctly referenced. + Such persons should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?dataset schema:creator ?person . + ?person schema:givenName ?name . + ?article schema:author ?person . + } + INSERT { + ?dataset schema:mentions ?person . + } + WHERE { + ?dataset a schema:Dataset . + ?person a schema:Person . + ?article a schema:ScholarlyArticle . + ?article schema:author ?person . + ?dataset schema:creator ?person . + ?person schema:givenName ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_person_given_name_of_incorrect_type(): """ Test an ISA RO-Crate where a person given name has wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_8_article.py b/tests/integration/profiles/isa-ro-crate/test_8_article.py index fac95cbb6..31041f667 100644 --- a/tests/integration/profiles/isa-ro-crate/test_8_article.py +++ b/tests/integration/profiles/isa-ro-crate/test_8_article.py @@ -54,6 +54,41 @@ def test_isa_article_headline(): ) +def test_isa_publication_not_correctly_referenced(): + """ + Test an ISA RO-Crate where an invalid publication is not correctly referenced. + Such publications should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?dataset schema:citation ?publication . + ?publication schema:headline ?headline . + } + INSERT { + ?dataset schema:mentions ?publication . + } + WHERE { + ?dataset a schema:Dataset . + ?publication a schema:ScholarlyArticle . + ?publication schema:headline ?headline . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_article_headline_of_incorrect_type(): """ Test an ISA RO-Crate where an article headline has wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_9_comment.py b/tests/integration/profiles/isa-ro-crate/test_9_comment.py index c2b7db4c5..8a1b5290c 100644 --- a/tests/integration/profiles/isa-ro-crate/test_9_comment.py +++ b/tests/integration/profiles/isa-ro-crate/test_9_comment.py @@ -81,3 +81,39 @@ def test_isa_comment_text_of_incorrect_type(): profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) + + +def test_isa_comment_not_correctly_referenced(): + """ + Test an ISA RO-Crate where an invalid comment is not correctly referenced. + Such comments should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?publication schema:comment ?comment . + ?comment schema:text ?text . + } + INSERT { + ?publication schema:mentions ?comment . + ?comment schema:text 42 . + } + WHERE { + ?publication a schema:ScholarlyArticle . + ?comment a schema:Comment . + ?comment schema:text ?text . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) \ No newline at end of file From 1f5439a4b13cdf2ee9c74c751de73ccdceb5c925 Mon Sep 17 00:00:00 2001 From: Florian Wetzels Date: Mon, 18 May 2026 14:37:59 +0200 Subject: [PATCH 60/89] Fixed some comments and descriptions in ISA profile --- .../profiles/isa-ro-crate/10_definedterm.ttl | 2 +- .../isa-ro-crate/11_propertyvalue.ttl | 4 +-- .../profiles/isa-ro-crate/3_process.ttl | 28 ------------------- .../profiles/isa-ro-crate/4_protocol.ttl | 2 +- .../profiles/isa-ro-crate/5_sample.ttl | 2 +- .../profiles/isa-ro-crate/7_person.ttl | 2 +- .../profiles/isa-ro-crate/8_article.ttl | 4 +-- .../profiles/isa-ro-crate/9_comment.ttl | 2 +- 8 files changed, 9 insertions(+), 37 deletions(-) diff --git a/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl b/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl index 83bb43386..3755a6ebb 100644 --- a/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl @@ -35,7 +35,7 @@ isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; sh:subject sh:this ; sh:predicate rdf:type ; sh:object isa-ro-crate:OntologyAnnotation ; - # The condition: need to be attached to a Study or Assay + # The condition: need to be attached to a Assay, Protocol, Person, or Article sh:condition [ a sh:NodeShape; sh:name: "OntologyAnnotation is attached to an Assay, Protocol, Person, or Article" ; diff --git a/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl b/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl index ba9d108a3..6bdee423d 100644 --- a/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl @@ -35,10 +35,10 @@ isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; sh:subject sh:this ; sh:predicate rdf:type ; sh:object isa-ro-crate:PropertyValue ; - # The condition: need to be attached to a Study or Assay + # The condition: need to be attached to a Assay, Process, Protocol, Sample, Person, or Article sh:condition [ a sh:NodeShape; - sh:name: "PropertyValue is attached to an Assay" ; + sh:name: "PropertyValue is attached to an Assay, Process, Protocol, Sample, Person, or Article" ; sh:or ( [ sh:property [ diff --git a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl index 5805ce1f5..3d71bf39a 100644 --- a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl @@ -51,34 +51,6 @@ isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; ] . -# isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; -# sh:name "Identify ISA processes within the RO-Crate" ; -# sh:description "A Process has type LabProcess and is attached to a Study or Assay." ; -# sh:order 3 ; -# sh:target [ -# a sh:SPARQLTarget ; -# sh:select """ -# PREFIX bioschemas: -# PREFIX isa-ro-crate: -# PREFIX schema: - -# SELECT ?this -# WHERE { -# ?this a bioschemas:LabProcess . -# ?assay schema:about ?this . -# ?assay a isa-ro-crate:Assay . -# } -# """ ; -# ] ; -# # Expand data graph with triples from the file data entity -# sh:rule [ -# a sh:TripleRule ; -# sh:subject sh:this ; -# sh:predicate rdf:type ; -# sh:object isa-ro-crate:Process ; -# ] -# . - # check process must have name diff --git a/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl b/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl index 3079cdffa..64e762242 100644 --- a/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl @@ -35,7 +35,7 @@ isa-ro-crate:FindISAProtocols a sh:NodeShape, validator:HiddenShape; sh:subject sh:this ; sh:predicate rdf:type ; sh:object isa-ro-crate:Protocol ; - # The condition: need to be attached to a Study or Assay + # The condition: need to be attached to a Process sh:condition [ sh:property [ sh:path [ sh:inversePath bioschemas-prop:executesLabProtocol ] ; diff --git a/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl b/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl index dcdc9b576..5bfab5038 100644 --- a/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl @@ -35,7 +35,7 @@ isa-ro-crate:FindISASamples a sh:NodeShape, validator:HiddenShape; sh:subject sh:this ; sh:predicate rdf:type ; sh:object isa-ro-crate:Sample ; - # The condition: need to be attached to a Study or Assay + # The condition: need to be attached to a Process sh:condition [ a sh:NodeShape ; sh:name "Sample is attached to a process" ; diff --git a/rocrate_validator/profiles/isa-ro-crate/7_person.ttl b/rocrate_validator/profiles/isa-ro-crate/7_person.ttl index d01f50841..9e2855265 100644 --- a/rocrate_validator/profiles/isa-ro-crate/7_person.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/7_person.ttl @@ -35,7 +35,7 @@ isa-ro-crate:FindISAPerson a sh:NodeShape, validator:HiddenShape; sh:subject sh:this ; sh:predicate rdf:type ; sh:object isa-ro-crate:Person ; - # The condition: need to be attached to a Study or Assay + # The condition: need to be attached to a Investigation, Study, Assay, Process, or Article sh:condition [ a sh:NodeShape; sh:name: "Person is attached to an Investigation, Study, Assay, Process, or Article" ; diff --git a/rocrate_validator/profiles/isa-ro-crate/8_article.ttl b/rocrate_validator/profiles/isa-ro-crate/8_article.ttl index 2aac05a27..c9deffb53 100644 --- a/rocrate_validator/profiles/isa-ro-crate/8_article.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/8_article.ttl @@ -26,7 +26,7 @@ isa-ro-crate:FindISAPublication a sh:NodeShape, validator:HiddenShape; sh:name "Identify ISA publications within the RO-Crate" ; - sh:description "A Publication has type ScholarlyArticle and is attached to a Study or Assay." ; + sh:description "A Publication has type ScholarlyArticle and is attached to a Study or Investigation." ; sh:targetClass schema:ScholarlyArticle ; sh:order 2 ; # Expand data graph with triples from the file data entity @@ -35,7 +35,7 @@ isa-ro-crate:FindISAPublication a sh:NodeShape, validator:HiddenShape; sh:subject sh:this ; sh:predicate rdf:type ; sh:object isa-ro-crate:Article ; - # The condition: need to be attached to a Study or Assay + # The condition: need to be attached to a Study or Investigation sh:condition [ sh:property [ sh:path [ sh:inversePath schema:citation ] ; diff --git a/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl b/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl index 306ab76f9..d02c0eb3a 100644 --- a/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl @@ -35,7 +35,7 @@ isa-ro-crate:FindISAComment a sh:NodeShape, validator:HiddenShape; sh:subject sh:this ; sh:predicate rdf:type ; sh:object isa-ro-crate:Comment ; - # The condition: need to be attached to a Study or Assay + # The condition: need to be attached to a Investigation, Study, Assay, Protocol, Data file, or Publication sh:condition [ sh:property [ sh:path [ sh:inversePath schema:comment ] ; From 31caaa4cd79046f685b77d6f77f6b4faa9eeeafe Mon Sep 17 00:00:00 2001 From: Florian Wetzels Date: Mon, 18 May 2026 14:57:48 +0200 Subject: [PATCH 61/89] minor formatting fixes --- tests/integration/profiles/isa-ro-crate/test_10_definedterm.py | 2 +- tests/integration/profiles/isa-ro-crate/test_9_comment.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py index 6835d1b27..74719843b 100644 --- a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py +++ b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py @@ -191,4 +191,4 @@ def test_isa_term_not_correctly_referenced(): profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, disable_inherited_profiles_issue_reporting=True, - ) \ No newline at end of file + ) diff --git a/tests/integration/profiles/isa-ro-crate/test_9_comment.py b/tests/integration/profiles/isa-ro-crate/test_9_comment.py index 8a1b5290c..53ddcf6b5 100644 --- a/tests/integration/profiles/isa-ro-crate/test_9_comment.py +++ b/tests/integration/profiles/isa-ro-crate/test_9_comment.py @@ -116,4 +116,4 @@ def test_isa_comment_not_correctly_referenced(): profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, disable_inherited_profiles_issue_reporting=True, - ) \ No newline at end of file + ) From 0b8289b8f0934f3d3db12efc926eed9a11237cc7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 16:57:29 +0200 Subject: [PATCH 62/89] refactor(errors): :recycle: accept str, Path or URI in ROCrateInvalidURIError --- rocrate_validator/errors.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index daf58fe4c..74b1ed794 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -12,7 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, Optional, Union + +if TYPE_CHECKING: + # Imported only for type-checking to avoid a circular import: + # rocrate_validator.utils.uri imports this module at runtime. + from rocrate_validator.utils.uri import URI class ROCValidatorError(Exception): @@ -243,29 +251,34 @@ def __repr__(self): class ROCrateInvalidURIError(ROCValidatorError): """Raised when an invalid URI is provided.""" - def __init__(self, uri: str, message: Optional[str] = None): + def __init__(self, uri: Union[str, Path, URI], message: Optional[str] = None): self._uri = uri self._message = message or self.default_error_message(uri) @property - def uri(self) -> Optional[str]: - """The invalid URI.""" + def uri(self) -> Union[str, Path, URI]: + """The invalid URI, as originally provided (str, Path, or URI).""" return self._uri @property - def message(self) -> Optional[str]: + def uri_string(self) -> str: + """The invalid URI normalised to its string form.""" + return str(self._uri) + + @property + def message(self) -> str: """The error message.""" return self._message def __str__(self) -> str: return self._message - def __repr__(self): + def __repr__(self) -> str: return f"ROCrateInvalidURIError({self._uri!r})" @classmethod - def default_error_message(cls, uri: str) -> str: - return f"\"{uri}\" is not a valid RO-Crate URI. "\ + def default_error_message(cls, uri: Union[str, Path, URI]) -> str: + return f"\"{str(uri)}\" is not a valid RO-Crate URI. "\ "It MUST be either a local path to the RO-Crate root directory or a local/remote RO-Crate ZIP file." From 62f89c0cc6fcdc5cda9ad4b12b2272b31e6e965d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 16:59:53 +0200 Subject: [PATCH 63/89] feat(uri): :sparkles: add `is_external_reference()` scheme detector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a purely syntactic helper that detects whether a value carries an explicit URI/IRI scheme per RFC 3986/3987, accepting both authority-based forms (http://...) and scheme-only forms (urn:, doi:, arcp://) as required by RO-Crate 1.1 §4.2.2. Scheme-only input (noauthority/path/query/fragment) is rejected as semantically unusable. --- rocrate_validator/utils/uri.py | 43 +++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 3d92c8abb..5a05fa0e0 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -15,7 +15,7 @@ import re from pathlib import Path from typing import Optional, Union -from urllib.parse import ParseResult, parse_qsl, urlparse +from urllib.parse import ParseResult, parse_qsl, urlparse, urlsplit from rocrate_validator import errors from rocrate_validator.utils import log as logging @@ -25,6 +25,47 @@ logger = logging.getLogger(__name__) +# RFC 3986 §3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +# Require length >= 2 to disambiguate from Windows drive letters +# (e.g. ``C:\path``). RFC 3986 allows single-character schemes but no +# IANA-registered scheme is one character long, so this is an acceptable +# trade-off. +_SCHEME_RE = re.compile(r"^[A-Za-z][A-Za-z0-9+\-.]+$") + + +def is_external_reference(value: object) -> bool: + """ + Check if `value` is an external reference (i.e. has a URI scheme). + + Return True if *value* has an explicit URI or IRI scheme (RFC 3986 for URI, and RFC 3987 for IRIs). + Both authority-based forms (``http://...``) + and scheme-only forms (``urn:...``, ``doi:...``, ``arcp://...``) are + accepted, as required by RO-Crate 1.1 §4.2.2. + + The check is purely syntactic: the scheme is not verified against + the IANA registry and the hier-part is not resolved. + """ + if not isinstance(value, str) or not value: + return False + + try: + parts = urlsplit(value) + except ValueError: + # urlsplit can raise on malformed IPv6 literals, invalid ports, etc. + return False + + # Scheme must conform to RFC 3986 (and be at least 2 chars long). + if not _SCHEME_RE.match(parts.scheme): + return False + + # Reject scheme-only input (``urn:``, ``doi:``): syntactically valid + # per the grammar but semantically unusable as an identifier. + if not (parts.netloc or parts.path or parts.query or parts.fragment): + return False + + return True + + class URI: REMOTE_SUPPORTED_SCHEMA = ('http', 'https', 'ftp') From 469bbe55f0cafd3472391fee59492c1850126f78 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 17:08:43 +0200 Subject: [PATCH 64/89] feat(uri): :sparkles: classify remote schemes and report granular availability Replace the single REMOTE_SUPPORTED_SCHEMA tuple with purpose-specific scheme sets (natively-checkable, supported RO-Crate roots, known remote) and add AvailabilityStatus to distinguish AVAILABLE, UNAVAILABLE, UNAUTHORIZED and UNCHECKABLE outcomes. --- rocrate_validator/utils/uri.py | 125 +++++++++++++++++++++++++++++---- 1 file changed, 112 insertions(+), 13 deletions(-) diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 5a05fa0e0..efb79219f 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import enum import re from pathlib import Path from typing import Optional, Union @@ -25,6 +26,15 @@ logger = logging.getLogger(__name__) +class AvailabilityStatus(enum.Enum): + """Outcome of a URI availability check.""" + + AVAILABLE = "available" + UNAVAILABLE = "unavailable" + UNAUTHORIZED = "unauthorized" + UNCHECKABLE = "uncheckable" + + # RFC 3986 §3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) # Require length >= 2 to disambiguate from Windows drive letters # (e.g. ``C:\path``). RFC 3986 allows single-character schemes but no @@ -67,18 +77,62 @@ def is_external_reference(value: object) -> bool: class URI: - - REMOTE_SUPPORTED_SCHEMA = ('http', 'https', 'ftp') + # Schemes that the validator can fetch natively to verify availability. + # Anything outside this set is treated as remote but un-checkable. + NATIVELY_CHECKABLE_SCHEMES = ("http", "https") + + # Schemes accepted as RO-Crate root URIs (the loading code can only + # handle these as crate locations). + SUPPORTED_ROCRATE_SCHEMES = ("http", "https", "ftp", "file") + + # Well-known remote schemes commonly used to reference data resources + # (used to distinguish "recognized but un-checkable" from "unknown"). + KNOWN_REMOTE_SCHEMES = ( + # Web + "http", + "https", + # FTP family + "ftp", + "ftps", + "sftp", + # Remote shell / transfer + "scp", + "ssh", + "rsync", + # Cloud object stores + "s3", + "gs", + "abfs", + "abfss", + "wasb", + "wasbs", + # WebDAV + "dav", + "davs", + # Research / big-data filesystems + "irods", + "hdfs", + ) + + # Backwards-compatible alias kept for callers that still inspect it. + REMOTE_SUPPORTED_SCHEMA = SUPPORTED_ROCRATE_SCHEMES[:-1] # http, https, ftp def __init__(self, uri: Union[str, Path]): + if uri is None or (isinstance(uri, str) and not uri.strip()): + raise ValueError("Invalid URI: empty value") self._uri = uri = str(uri) try: - # map local path to URI with file scheme - if not re.match(r'^\w+://', uri): + # Inputs that are not external references are assumed to be local + # paths, so the ``file://`` scheme is added explicitly. The + # detection covers both authority-based schemes (``http://``, + # ``scp://``) and scheme-only ones (``urn:``, ``doi:``), as + # defined by RFC 3986. + if not is_external_reference(uri): uri = f"file://{uri}" # parse the value to extract the scheme self._parse_result = urlparse(uri) - assert self.scheme in self.REMOTE_SUPPORTED_SCHEMA + ('file',), "Invalid URI scheme" + if not self.scheme: + raise ValueError("URI has no scheme") except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.debug(e) @@ -127,10 +181,23 @@ def as_path(self) -> Path: return Path(self._uri) def is_remote_resource(self) -> bool: - return self.scheme in self.REMOTE_SUPPORTED_SCHEMA + """Return True for any well-formed URI whose scheme is not `file`.""" + return bool(self.scheme) and self.scheme != "file" def is_local_resource(self) -> bool: - return not self.is_remote_resource() + return self.scheme == "file" + + def is_natively_checkable(self) -> bool: + """Return True if availability can be verified via a native request.""" + return self.scheme in self.NATIVELY_CHECKABLE_SCHEMES + + def is_known_remote_scheme(self) -> bool: + """Return True if the scheme is one of the well-known remote schemes.""" + return self.scheme in self.KNOWN_REMOTE_SCHEMES + + def has_supported_rocrate_scheme(self) -> bool: + """Return True if the scheme is supported as an RO-Crate root URI.""" + return self.scheme in self.SUPPORTED_ROCRATE_SCHEMES def is_local_directory(self) -> bool: return self.is_local_resource() and self.as_path().is_dir() @@ -138,17 +205,46 @@ def is_local_directory(self) -> bool: def is_local_file(self) -> bool: return self.is_local_resource() and self.as_path().is_file() - def is_available(self) -> bool: - """Check if the resource is available""" + def check_availability(self) -> AvailabilityStatus: + """ + Inspect the resource availability with as much detail as possible. + + Distinguishes: + - AVAILABLE: confirmed reachable + - UNAUTHORIZED: reachable but protected (HTTP 401/403) + - UNAVAILABLE: confirmed not reachable + - UNCHECKABLE: scheme has no native check (e.g. scp://, s3://) + """ if self.is_remote_resource(): + if not self.is_natively_checkable(): + logger.debug( + "Cannot natively verify availability for URI '%s' (scheme '%s')", + self._uri, + self.scheme, + ) + return AvailabilityStatus.UNCHECKABLE try: response = HttpRequester().head(self._uri, allow_redirects=True) - return response.status_code in (200, 302) + if response.status_code in (200, 302): + return AvailabilityStatus.AVAILABLE + if response.status_code in (401, 403): + return AvailabilityStatus.UNAUTHORIZED + return AvailabilityStatus.UNAVAILABLE except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.debug(e) - return False - return Path(self._uri).exists() + return AvailabilityStatus.UNAVAILABLE + return AvailabilityStatus.AVAILABLE if Path(self._uri).exists() else AvailabilityStatus.UNAVAILABLE + + def is_available(self) -> bool: + """ + Return True only when the resource is confirmed available. + + Resources that cannot be verified (unsupported scheme, auth-protected) + return False here; callers that need to distinguish those cases should + use :meth:`check_availability` instead. + """ + return self.check_availability() == AvailabilityStatus.AVAILABLE def __str__(self): return self._uri @@ -179,6 +275,9 @@ def validate_rocrate_uri(uri: Union[str, Path, URI], silent: bool = False) -> bo try: # parse the value to extract the scheme uri = URI(str(uri)) if isinstance(uri, str) or isinstance(uri, Path) else uri + # restrict RO-Crate roots to schemes the loader can actually handle + if not uri.has_supported_rocrate_scheme(): + raise errors.ROCrateInvalidURIError(uri) # check if the URI is a remote resource or local directory or local file if not uri.is_remote_resource() and not uri.is_local_directory() and not uri.is_local_file(): raise errors.ROCrateInvalidURIError(uri) @@ -187,7 +286,7 @@ def validate_rocrate_uri(uri: Union[str, Path, URI], silent: bool = False) -> bo raise errors.ROCrateInvalidURIError(uri) # check if the resource is available if not uri.is_available(): - raise errors.ROCrateInvalidURIError(uri, message=f"The RO-crate at the URI \"{uri}\" is not available") + raise errors.ROCrateInvalidURIError(uri, message=f'The RO-crate at the URI "{uri}" is not available') return True except ValueError as e: logger.error(e) From 76e92a4289d09e920037e1b27edff19afcc9858c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 17:19:03 +0200 Subject: [PATCH 65/89] feat(rocrate): :sparkles: add check_availability() with AvailabilityStatus on entities Replace ROCrateEntity.is_available() with check_availability() returning AvailabilityStatus, resolve @id via is_external_reference(), and report non-natively-checkable remote schemes as UNCHECKABLE. is_available() becomes a boolean wrapper. --- rocrate_validator/rocrate.py | 97 +++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 40 deletions(-) diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index a5affdb1a..0d9d4c6bb 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -30,7 +30,7 @@ from rocrate_validator.errors import ROCrateInvalidURIError from rocrate_validator.utils.uri import validate_rocrate_uri from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.utils.uri import URI +from rocrate_validator.utils.uri import URI, AvailabilityStatus, is_external_reference # set up logging logger = logging.getLogger(__name__) @@ -140,8 +140,17 @@ def id_as_path(self) -> Path: @classmethod def get_id_as_uri(cls, entity_id: str, ro_crate: ROCrate) -> URI: assert entity_id, "Entity ID cannot be None" - if entity_id.startswith("http"): + # Per RO-Crate 1.1 § 4.2.2, an `@id` is either a relative URI path or + # an external URI/IRI (RFC 3986/3987). External references are used + # as-is (without resolving them against the crate URI) so the entity + # is classified as remote/web-based; this covers both authority-based + # forms (``http://``, ``scp://``) and scheme-only ones (``urn:``, + # ``doi:``, ``arcp:``). + if is_external_reference(entity_id): return URI(entity_id) + # Otherwise the `@id` is a relative path: if the RO-Crate itself is + # remote, resolve it against the crate URI so the entity is still + # classified as remote/web-based. if ro_crate.uri.is_remote_resource(): if entity_id.startswith("./"): return URI(f"{ro_crate.uri}/{entity_id[2:]}") @@ -208,58 +217,66 @@ def raw_data(self) -> object: def is_local(self) -> bool: return not self.is_remote() - def is_available(self) -> bool: + def check_availability(self) -> AvailabilityStatus: + """ + Return a fine-grained availability status for this entity. + + This is the primary check; :meth:`is_available` is the boolean + shortcut built on top of it. The status distinguishes definitely + unavailable resources, auth-protected ones, and remote URIs whose + scheme the validator cannot natively check (scp://, s3://, ...). + """ try: - # check if the entity points to an external file - if self.id.startswith("http"): + entity_uri = self.id_as_uri + # Remote entities with a scheme we can natively reach are checked + # by inspecting the remote response status. + if entity_uri.is_natively_checkable(): logger.debug("Checking the availability of a remote entity") - return self.ro_crate.get_external_file_size(self.id) > 0 - - # check if the entity is part of the local RO-Crate + return entity_uri.check_availability() + + # Remote entities with a non-natively-checkable scheme cannot be + # verified (scp://, sftp://, s3://, ...): report UNCHECKABLE so + # callers can warn without invalidating the validation. + if entity_uri.is_remote_resource(): + logger.debug( + "Cannot natively verify availability for entity '%s' (scheme '%s')", + self.id, + entity_uri.scheme, + ) + return AvailabilityStatus.UNCHECKABLE + + # Local entity: locate it inside the (local or remote) RO-Crate. if self.ro_crate.uri.is_local_resource(): - # check if the file exists in the local file system if isinstance(self.ro_crate, ROCrateLocalFolder): - logger.debug( - "Checking the availability of a local entity in a local folder" - ) - return self.ro_crate.has_file( - self.id_as_path - ) or self.ro_crate.has_directory(self.id_as_path) - # check if the file exists in the local zip file + found = self.ro_crate.has_file(self.id_as_path) or self.ro_crate.has_directory(self.id_as_path) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE if isinstance(self.ro_crate, ROCrateLocalZip): - logger.debug( - "Checking the availability of a local entity in a local zip file" - ) - # Skip the check for the root of a ZIP archive if self.id == "./": - logger.debug( - "Skipping the check for the presence of the Data Entity '%s' within the RO-Crate " - "as it is the root of a ZIP archive", - self.id, - ) - return True - return self.ro_crate.has_directory( + return AvailabilityStatus.AVAILABLE + found = self.ro_crate.has_directory(unquote(str(self.id))) or self.ro_crate.has_file( unquote(str(self.id)) - ) or self.ro_crate.has_file(unquote(str(self.id))) + ) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE - # check if the entity is part of the remote RO-Crate - logger.debug( - "Checking the availability of a remote entity in a remote RO-Crate" - ) if self.ro_crate.uri.is_remote_resource(): if self.id == "./": - return self.ro_crate.get_file_size(Path(self.id_as_uri())) > 0 - return self.ro_crate.has_directory( - unquote(str(self.id)) - ) or self.ro_crate.has_file(unquote(str(self.id))) + found = self.ro_crate.get_file_size(Path(self.id_as_uri())) > 0 + else: + found = self.ro_crate.has_directory(unquote(str(self.id))) or self.ro_crate.has_file( + unquote(str(self.id)) + ) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(e) - return False + return AvailabilityStatus.UNAVAILABLE - raise ROCrateInvalidURIError( - uri=self.id, message="Could not determine the availability of the entity" - ) + # Fallthrough: the crate URI is neither a recognized local nor a + # remote resource — the entity location cannot be determined. + raise ROCrateInvalidURIError(uri=self.id, message="Could not determine the availability of the entity") + + def is_available(self) -> bool: + return self.check_availability() == AvailabilityStatus.AVAILABLE def get_size(self) -> int: try: From 75d0442f0bd8197cd6f1a8318f16e59b8692e642 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 17:20:41 +0200 Subject: [PATCH 66/89] refactor(checks): :recycle: document web-data-entity exclusion --- .../profiles/ro-crate/must/4_data_entity_metadata.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py index 1abcbcfa0..d96d8ee03 100644 --- a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py @@ -38,11 +38,13 @@ def check_availability(self, context: ValidationContext) -> bool: return True # Perform the check result = True + # Web-based Data Entities (absolute URIs with any scheme other than `file`, + # e.g. http://, https://, ftp://, scp://, s3://, ...) are not required to + # be part of the local payload per the RO-Crate specification. for entity in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True): assert entity.id is not None, "Entity has no @id" logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id) try: - logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id) if entity.has_local_identifier(): logger.debug( "Ignoring the Data Entity '%s' as it is a local entity with a local identifier. " From 0196dc9cfb9eb41b300b2f57b0ea123290db067b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 17:29:13 +0200 Subject: [PATCH 67/89] feat(checks): :sparkles: handle UNAUTHORIZED/UNCHECKABLE web data entities as warnings Use AvailabilityStatus in the SHOULD web-data-entity check so auth-protected and non-natively-checkable resources are reported as recommendation-level issues, not validation failures; skip them in the contentSize check. --- .../should/5_web_data_entity_metadata.py | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py index 13ef914aa..424269361 100644 --- a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py @@ -16,6 +16,7 @@ from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import (PyFunctionCheck, check, requirement) +from rocrate_validator.utils.uri import AvailabilityStatus # set up logging logger = logging.getLogger(__name__) @@ -32,13 +33,37 @@ class WebDataEntityRecommendedChecker(PyFunctionCheck): def check_availability(self, context: ValidationContext) -> bool: """ Check if the Web-based Data Entity is directly downloadable - by a simple retrieval (e.g. HTTP GET) permitting redirection and HTTP/HTTPS URIs + by a simple retrieval (e.g. HTTP GET) permitting redirection and HTTP/HTTPS URIs. + + Resources that cannot be natively retrieved by the validator (e.g. + `scp://`, `s3://`, `sftp://`) or that are protected by an authorization + mechanism (HTTP 401/403) are reported as recommendation-level issues + and logged as warnings, without invalidating the validation. """ result = True for entity in context.ro_crate.metadata.get_web_data_entities(): assert entity.id is not None, "Entity has no @id" try: - if not entity.is_available(): + status = entity.check_availability() + if status == AvailabilityStatus.AVAILABLE: + continue + if status == AvailabilityStatus.UNAUTHORIZED: + msg = ( + f"Web-based Data Entity {entity.id} is protected by an " + f"authorization mechanism; availability could not be verified" + ) + logger.warning(msg) + context.result.add_issue(msg, self) + elif status == AvailabilityStatus.UNCHECKABLE: + scheme = entity.id_as_uri.scheme + msg = ( + f"Web-based Data Entity {entity.id} uses scheme " + f"'{scheme}' which is not natively supported by the " + f"validator; availability could not be verified" + ) + logger.warning(msg) + context.result.add_issue(msg, self) + else: context.result.add_issue( f'Web-based Data Entity {entity.id} is not available', self) result = False @@ -59,6 +84,12 @@ def check_content_size(self, context: ValidationContext) -> bool: result = True for entity in context.ro_crate.metadata.get_web_data_entities(): assert entity.id is not None, "Entity has no @id" + # Skip entities whose scheme the validator cannot natively fetch + # (e.g. scp://, s3://): without retrieving the content there is + # no actual size to compare `contentSize` against. Reachability + # is then checked separately via `is_available()` below. + if not entity.id_as_uri.is_natively_checkable(): + continue if entity.is_available(): content_size = entity.get_property("contentSize") if content_size and int(content_size) != context.ro_crate.get_external_file_size(entity.id): From f5d0d8f4b6de5fee7c65372b91c01c45ea06a32d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 17:38:04 +0200 Subject: [PATCH 68/89] test(uri): :white_check_mark: add tests for scheme classification and rocrate-root checks --- tests/unit/test_uri.py | 53 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_uri.py b/tests/unit/test_uri.py index 45ceaf7a7..1b806d214 100644 --- a/tests/unit/test_uri.py +++ b/tests/unit/test_uri.py @@ -25,11 +25,52 @@ def test_valid_url(): uri = URI("http://example.com") assert uri.is_remote_resource() + assert uri.is_natively_checkable() + assert uri.has_supported_rocrate_scheme() + + +def test_uri_with_unknown_scheme_is_accepted_but_not_supported_as_rocrate_root(): + # Schemes outside the natively-supported set are valid URIs (they may + # appear as Data Entity identifiers, e.g. scp://, s3://) but they are + # not accepted as RO-Crate root URIs. + uri = URI("httpx:///example.com") + assert uri.is_remote_resource() + assert not uri.is_natively_checkable() + assert not uri.has_supported_rocrate_scheme() def test_invalid_url(): + # A bare token without any scheme/path separator is not a valid URI. with pytest.raises(ValueError): - URI("httpx:///example.com") + URI("") + + +def test_scp_uri_is_remote(): + uri = URI("scp://transfer.example.org//data/A.0.0") + assert uri.is_remote_resource() + assert uri.is_known_remote_scheme() + assert not uri.is_natively_checkable() + + +def test_s3_uri_is_remote(): + uri = URI("s3://bucket/key/path") + assert uri.is_remote_resource() + assert uri.is_known_remote_scheme() + assert not uri.is_natively_checkable() + + +@pytest.mark.parametrize("uri_str,expected_scheme", [ + # Scheme-only (no authority) absolute URIs are valid per RFC 3986 and + # accepted by RO-Crate 1.1 § 4.2.2 as Data Entity `@id` values. + ("urn:doi:10.5281/zenodo.1234", "urn"), + ("doi:10.5281/zenodo.1234", "doi"), + ("arcp://name,foo/bar", "arcp"), +]) +def test_scheme_only_absolute_uri_is_remote(uri_str, expected_scheme): + uri = URI(uri_str) + assert uri.scheme == expected_scheme + assert uri.is_remote_resource() + assert not uri.is_natively_checkable() def test_url_with_query_params(): @@ -131,10 +172,12 @@ def test_rocrate_uri_remote_valid(): def test_rocrate_uri_remote_invalid(): - - with pytest.raises(ValueError) as excinfo: - URI("httpx:///example.com") - assert str(excinfo.value) == "Invalid URI: httpx:///example.com" + # An unknown scheme is a valid URI but cannot be used as an RO-Crate root. + uri = URI("httpx:///example.com") + assert not validate_rocrate_uri(uri, silent=True), \ + f"The URI {uri} should not be accepted as an RO-Crate root" + with pytest.raises(ROCrateInvalidURIError): + validate_rocrate_uri(uri, silent=False) # Test with an invalid remote URL uri = URI("https:///example.com") From e8c170fe082049a9eee6ffbeead8e5830a2e49f9 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 17:42:00 +0200 Subject: [PATCH 69/89] test(rocrate): :white_check_mark: test data entities with external references are classified as remote --- tests/unit/test_rocrate.py | 68 ++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_rocrate.py b/tests/unit/test_rocrate.py index d91ce3f4e..27e552ee9 100644 --- a/tests/unit/test_rocrate.py +++ b/tests/unit/test_rocrate.py @@ -556,20 +556,74 @@ def test_entity_path_from_identifier(): quoted_entity_id = "pics/2017-06-11%2012.56.14.jpg" path = ROCrateEntity.get_path_from_identifier(quoted_entity_id, rocrate_path=rocrate_path) logger.debug(f"Quoted Entity Path: {path}") - assert str(path) == f"{rocrate_path}/pics/2017-06-11%2012.56.14.jpg", \ + assert str(path) == f"{rocrate_path}/pics/2017-06-11%2012.56.14.jpg", ( "Path should be pics/2017-06-11%2012.56.14.jpg" + ) # Test quoted entity id which does not exist within the ro-crate quoted_entity_id = "pics/2018-06-11%2012.56.14.jpg" - path = ROCrateEntity.get_path_from_identifier( - quoted_entity_id, rocrate_path=rocrate_path, decode=True) + path = ROCrateEntity.get_path_from_identifier(quoted_entity_id, rocrate_path=rocrate_path, decode=True) logger.debug(f"Quoted Entity Path: {path}") - assert str(path) == f"{rocrate_path}/pics/2018-06-11 12.56.14.jpg", \ - "Path should be pics/2018-06-11 12.56.14.jpg" + assert str(path) == f"{rocrate_path}/pics/2018-06-11 12.56.14.jpg", "Path should be pics/2018-06-11 12.56.14.jpg" # Test unquoted entity id which exists within the ro-crate unquoted_entity_id = "pics/2017-06-11 12.56.14.jpg" path = ROCrateEntity.get_path_from_identifier(unquoted_entity_id, rocrate_path=rocrate_path) logger.debug(f"Unquoted Entity Path: {path}") - assert str(path) == f"{rocrate_path}/pics/2017-06-11 12.56.14.jpg", \ - "Path should be pics/2017-06-11 12.56.14.jpg" + assert str(path) == f"{rocrate_path}/pics/2017-06-11 12.56.14.jpg", "Path should be pics/2017-06-11 12.56.14.jpg" + + +def _metadata_dict_with_id(entity_id: str) -> dict: + """Build a minimal RO-Crate metadata dict referencing a single data entity.""" + return { + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "about": {"@id": "./"}, + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test crate", + "hasPart": [{"@id": entity_id}], + }, + {"@id": entity_id, "@type": "File", "name": "remote-file"}, + ], + } + + +@pytest.mark.parametrize( + "entity_id", + [ + # authority-based absolute URIs (with a `//` authority component) + "scp://transfer.example.org//data/A.0.0", + "sftp://user@host/path/to/file", + "s3://bucket/key", + "https://example.org/data.txt", + "arcp://name,foo/bar", + # scheme-only absolute URIs (no authority; RO-Crate 1.1 § 4.2.2 + RFC 3986) + "urn:doi:10.5281/zenodo.1234", + "doi:10.5281/zenodo.1234", + ], +) +def test_absolute_uri_data_entity_is_classified_as_remote(entity_id): + """ + Data entities whose @id is an absolute URI (any non-file scheme, with or + without authority) MUST be recognized as remote (web-based) data entities + so that the must/4 payload check is skipped for them. + + Regression test for issue #176. + """ + crate = ROCrate.from_metadata_dict(_metadata_dict_with_id(entity_id)) + entity = crate.metadata.get_entity(entity_id) + assert entity is not None, "Entity should be present in the metadata" + assert entity.is_remote(), f"Entity with absolute URI '{entity_id}' should be classified as remote" + assert entity in crate.metadata.get_web_data_entities(), ( + f"Entity '{entity_id}' should be listed as a web data entity" + ) + assert entity not in crate.metadata.get_data_entities(exclude_web_data_entities=True), ( + f"Entity '{entity_id}' should be excluded from local-only data entities" + ) From 2762f4b194b57d1efed7477f293ec4b375118f83 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 19 May 2026 17:50:35 +0200 Subject: [PATCH 70/89] test(checks): :white_check_mark: test external-reference data entities pass the REQUIRED payload check --- .../ro-crate/test_data_entity_metadata.py | 110 ++++++++++++++---- 1 file changed, 88 insertions(+), 22 deletions(-) diff --git a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py index 01b1a5a09..baf00774d 100644 --- a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py +++ b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py @@ -12,9 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import logging -from rocrate_validator import models +import pytest + +from rocrate_validator import models, services from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import InvalidDataEntity, ValidROC from tests.shared import do_entity_test @@ -34,7 +37,7 @@ def test_missing_data_entity_reference(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED properties"], - ["sort-and-change-case.ga", "foo/xxx"] + ["sort-and-change-case.ga", "foo/xxx"], ) @@ -44,7 +47,7 @@ def test_data_entity_must_be_directly_linked(): paths.direct_hasPart_data_entity_reference, models.Severity.REQUIRED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -54,7 +57,7 @@ def test_data_entity_not_linked(): paths.dataset_not_linked_to_root, models.Severity.REQUIRED, False, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -64,7 +67,7 @@ def test_data_entity_must_be_indirectly_linked(): paths.indirect_hasPart_data_entity_reference, models.Severity.REQUIRED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -75,7 +78,7 @@ def test_directory_data_entity_wo_trailing_slash(): models.Severity.RECOMMENDED, False, ["Directory Data Entity: RECOMMENDED value restriction"], - ["Every Data Entity Directory URI SHOULD end with `/`"] + ["Every Data Entity Directory URI SHOULD end with `/`"], ) @@ -86,7 +89,7 @@ def test_missing_data_entity_encoding_format(): models.Severity.RECOMMENDED, False, ["File Data Entity: RECOMMENDED properties"], - ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"] + ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"], ) @@ -97,7 +100,7 @@ def test_invalid_data_entity_encoding_format_pronom(): models.Severity.RECOMMENDED, False, ["File Data Entity: RECOMMENDED properties"], - ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"] + ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"], ) @@ -108,7 +111,7 @@ def test_invalid_data_entity_encoding_format_ctx_website_type(): models.Severity.RECOMMENDED, False, ["File Data Entity: RECOMMENDED properties"], - ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"] + ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"], ) @@ -119,7 +122,7 @@ def test_invalid_data_entity_encoding_format_ctx_website_name(): models.Severity.RECOMMENDED, False, ["WebSite RECOMMENDED Properties"], - ["A WebSite MUST have a `name` property"] + ["A WebSite MUST have a `name` property"], ) @@ -129,7 +132,7 @@ def test_valid_data_entity_encoding_format_pronom(): paths.valid_encoding_format_pronom, models.Severity.RECOMMENDED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -139,7 +142,7 @@ def test_valid_data_entity_encoding_format_ctx_website(): paths.valid_encoding_format_ctx_entity, models.Severity.RECOMMENDED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -150,7 +153,7 @@ def test_missing_file_data_entity_with_quoted_name(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED resource availability"], - ["The RO-Crate does not include the Data Entity 'pics/2017-06-11%2012.56.14.jpg' as part of its payload"] + ["The RO-Crate does not include the Data Entity 'pics/2017-06-11%2012.56.14.jpg' as part of its payload"], ) @@ -161,7 +164,7 @@ def test_missing_file_data_entity_with_unquoted_name(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED resource availability"], - ["The RO-Crate does not include the Data Entity 'pics/2017-06-11 12.56.14.jpg' as part of its payload"] + ["The RO-Crate does not include the Data Entity 'pics/2017-06-11 12.56.14.jpg' as part of its payload"], ) @@ -172,7 +175,7 @@ def test_missing_dataset_entity_with_quoted_name(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED resource availability"], - ["The RO-Crate does not include the Data Entity 'data%20set/' as part of its payload"] + ["The RO-Crate does not include the Data Entity 'data%20set/' as part of its payload"], ) @@ -183,7 +186,7 @@ def test_missing_dataset_entity_with_unquoted_name(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED resource availability"], - ["The RO-Crate does not include the Data Entity 'data set/' as part of its payload"] + ["The RO-Crate does not include the Data Entity 'data set/' as part of its payload"], ) @@ -194,15 +197,78 @@ def test_missing_absolute_path_data_entity(): models.Severity.RECOMMENDED, False, ["Data Entity: RECOMMENDED resource availability"], - ["Data Entity file:///tmp/test.txt is not available"] + ["Data Entity file:///tmp/test.txt is not available"], ) def test_valid_rocrate_with_data_entities(): """""" - do_entity_test( - ValidROC().rocrate_with_data_entities, - models.Severity.REQUIRED, - True, - profile_identifier="ro-crate" + do_entity_test(ValidROC().rocrate_with_data_entities, models.Severity.REQUIRED, True, profile_identifier="ro-crate") + + +@pytest.mark.parametrize( + "remote_entity_id", + [ + "scp://transfer.example.org//data/A.0.0", + "sftp://user@host/path/to/file", + "s3://bucket/key", + ], +) +def test_remote_data_entity_does_not_fail_required_check(tmp_path, remote_entity_id): + """Regression test for issue #176. + + A Data Entity whose `@id` is an absolute URI with a non-file scheme (e.g. + ``scp://``, ``sftp://``, ``s3://``) MUST NOT trigger the + "Data Entity: REQUIRED resource availability" violation: per the RO-Crate + spec, any absolute-URI Data Entity is web-based and is not required to be + part of the local payload. + """ + crate_dir = tmp_path / "crate-with-remote-entity" + crate_dir.mkdir() + metadata = { + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "about": {"@id": "./"}, + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate with remote entity", + "description": "Regression fixture for issue #176", + "datePublished": "2026-05-15T07:30:50+00:00", + "license": {"@id": "https://spdx.org/licenses/CC0-1.0"}, + "hasPart": [{"@id": remote_entity_id}], + }, + { + "@id": remote_entity_id, + "@type": "File", + "name": "Remote file", + "contentSize": 16, + "dateModified": "2026-05-15T07:30:50+00:00", + "sdDatePublished": "2026-05-15T07:31:03+00:00", + }, + {"@id": "https://spdx.org/licenses/CC0-1.0", "@type": "CreativeWork", "name": "CC0"}, + ], + } + (crate_dir / "ro-crate-metadata.json").write_text(json.dumps(metadata)) + + result = services.validate( + models.ValidationSettings( + rocrate_uri=crate_dir, + requirement_severity=models.Severity.REQUIRED, + profile_identifier="ro-crate", + ) + ) + assert result.passed(), ( + f"RO-Crate with remote entity '{remote_entity_id}' should pass REQUIRED " + f"validation; got issues: {[i.message for i in result.get_issues()]}" ) + # And the specific must/4 violation must NOT be among the issues. + for issue in result.get_issues(): + assert "as part of its payload" not in (issue.message or ""), ( + f"Unexpected payload violation raised for remote entity: {issue.message}" + ) From 63acb6aeab349130b79f2d86a8aa0ec2538286e9 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 29 May 2026 11:07:38 +0200 Subject: [PATCH 71/89] fix(uri): :bug: treat file:// URIs with non-local authority as remote --- rocrate_validator/utils/uri.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index efb79219f..2111f584c 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -114,6 +114,10 @@ class URI: "hdfs", ) + # ``file://`` authorities that denote the local machine (RFC 8089 §2): + # an empty authority (``file:///path``) or the special ``localhost`` host. + LOCAL_FILE_AUTHORITIES = ("", "localhost") + # Backwards-compatible alias kept for callers that still inspect it. REMOTE_SUPPORTED_SCHEMA = SUPPORTED_ROCRATE_SCHEMES[:-1] # http, https, ftp @@ -123,12 +127,20 @@ def __init__(self, uri: Union[str, Path]): self._uri = uri = str(uri) try: # Inputs that are not external references are assumed to be local - # paths, so the ``file://`` scheme is added explicitly. The + # paths, so the ``file:`` scheme is added explicitly. The # detection covers both authority-based schemes (``http://``, # ``scp://``) and scheme-only ones (``urn:``, ``doi:``), as # defined by RFC 3986. + # + # The authority-less ``file:`` form (no ``//``) is used on purpose: + # ``file://data/x`` would parse ``data`` as the authority (host), + # while ``file:data/x`` keeps ``data/x`` as the path with an empty + # authority. This way a local path never gains a spurious host and + # the authority remains a reliable signal to tell a local file + # (``file:///path``) from a remote one (``file://host/path``, + # RFC 8089). if not is_external_reference(uri): - uri = f"file://{uri}" + uri = f"file:{uri}" # parse the value to extract the scheme self._parse_result = urlparse(uri) if not self.scheme: @@ -181,11 +193,23 @@ def as_path(self) -> Path: return Path(self._uri) def is_remote_resource(self) -> bool: - """Return True for any well-formed URI whose scheme is not `file`.""" - return bool(self.scheme) and self.scheme != "file" + """ + Return True for any well-formed URI that points to a non-local resource. + + Schemes other than ``file`` (``http``, ``scp``, ``s3``, ...) are always + remote. A ``file://`` URI is remote when it carries an explicit, + non-local authority (host) — e.g. ``file://hostname/path`` per + RFC 8089: the referenced file lives on another machine and is therefore + not part of the local RO-Crate payload. + """ + if not self.scheme: + return False + if self.scheme == "file": + return self.get_netloc().lower() not in self.LOCAL_FILE_AUTHORITIES + return True def is_local_resource(self) -> bool: - return self.scheme == "file" + return self.scheme == "file" and self.get_netloc().lower() in self.LOCAL_FILE_AUTHORITIES def is_natively_checkable(self) -> bool: """Return True if availability can be verified via a native request.""" From 379d297accf108ff508545b15b383028a584b4f7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 29 May 2026 11:11:31 +0200 Subject: [PATCH 72/89] test(cli): :wrench: fix config to support small terminals windows --- tests/test_cli.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index fc897e098..d6c6209bf 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -31,7 +31,11 @@ @fixture def cli_runner() -> CliRunner: - return CliRunner() + # Force a wide terminal: the CLI renders output through Rich, which wraps + # and truncates tables/panels to the terminal width (defaulting to 80 + # columns when stdout is captured). Pinning COLUMNS keeps the rendered + # output deterministic regardless of the environment's actual width. + return CliRunner(env={"COLUMNS": "200"}) def test_version(cli_runner: CliRunner): @@ -134,7 +138,11 @@ def test_validate_with_invalid_profiles_path_dir(cli_runner: CliRunner): ) assert result.exit_code == 2 # logger.debug(result.output) - assert re.search(f"Path '{dummy_profiles_path}' does not exist.", result.output) + # On narrow terminals the Rich error panel wraps the message across lines + # and inserts box-drawing borders (│) between words; strip those and + # collapse whitespace so the match does not depend on terminal width. + normalized_output = re.sub(r"[\s│]+", " ", result.output) + assert re.search(f"Path '{dummy_profiles_path}' does not exist.", normalized_output) def test_profiles_list(cli_runner: CliRunner): From 45b2f49a57cdb623063f679cbd0be035eb40087c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 29 May 2026 11:15:54 +0200 Subject: [PATCH 73/89] test(uri): :white_check_mark: add tests for file:// URIs with non-local authority --- tests/unit/test_rocrate.py | 30 ++++++++++++++++++++++++++++++ tests/unit/test_uri.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/tests/unit/test_rocrate.py b/tests/unit/test_rocrate.py index 27e552ee9..69e0b4fe7 100644 --- a/tests/unit/test_rocrate.py +++ b/tests/unit/test_rocrate.py @@ -604,6 +604,9 @@ def _metadata_dict_with_id(entity_id: str) -> dict: "s3://bucket/key", "https://example.org/data.txt", "arcp://name,foo/bar", + # `file://` URIs with a (non-local) authority denote files living on + # another host (RFC 8089), so they are remote too. + "file://gs02r3b58-ib0/scratch/tmp/5190874/tmp_rf_samples_slt86rc0", # scheme-only absolute URIs (no authority; RO-Crate 1.1 § 4.2.2 + RFC 3986) "urn:doi:10.5281/zenodo.1234", "doi:10.5281/zenodo.1234", @@ -627,3 +630,30 @@ def test_absolute_uri_data_entity_is_classified_as_remote(entity_id): assert entity not in crate.metadata.get_data_entities(exclude_web_data_entities=True), ( f"Entity '{entity_id}' should be excluded from local-only data entities" ) + + +@pytest.mark.parametrize( + "entity_id", + [ + # `file://` URIs without an authority (RFC 8089) or with the special + # `localhost` authority refer to the local machine, so they describe + # local payload members that the must/4 check must still verify. + "file:///absolute/path/to/file.txt", + "file://localhost/absolute/path/to/file.txt", + ], +) +def test_local_file_uri_data_entity_is_not_remote(entity_id): + """ + `file://` Data Entity identifiers that point to the local machine (empty or + `localhost` authority) MUST NOT be treated as remote/web-based: only + `file:///...` URIs with a real host are remote (issue #176 follow-up). + """ + crate = ROCrate.from_metadata_dict(_metadata_dict_with_id(entity_id)) + entity = crate.metadata.get_entity(entity_id) + assert entity is not None, "Entity should be present in the metadata" + assert not entity.is_remote(), ( + f"Entity with local file URI '{entity_id}' should NOT be classified as remote" + ) + assert entity not in crate.metadata.get_web_data_entities(), ( + f"Entity '{entity_id}' should not be listed as a web data entity" + ) diff --git a/tests/unit/test_uri.py b/tests/unit/test_uri.py index 1b806d214..d6d4990db 100644 --- a/tests/unit/test_uri.py +++ b/tests/unit/test_uri.py @@ -73,6 +73,39 @@ def test_scheme_only_absolute_uri_is_remote(uri_str, expected_scheme): assert not uri.is_natively_checkable() +def test_file_uri_with_remote_host_is_remote(): + # A `file://` URI carrying a (non-local) authority points to a file on + # another host (RFC 8089) and must be treated as remote, not as a local + # payload member (regression for issue #176 with `file://` schemes). + uri = URI("file://gs02r3b58-ib0/scratch/tmp/5190874/tmp_rf_samples_slt86rc0") + assert uri.scheme == "file" + assert uri.is_remote_resource() + assert not uri.is_local_resource() + assert not uri.is_natively_checkable() + + +@pytest.mark.parametrize("uri_str", [ + "file:///absolute/path/file.txt", + "file://localhost/absolute/path/file.txt", +]) +def test_file_uri_to_local_host_is_local(uri_str): + # An empty or `localhost` authority denotes the local machine. + uri = URI(uri_str) + assert uri.scheme == "file" + assert uri.is_local_resource() + assert not uri.is_remote_resource() + + +@pytest.mark.parametrize("path", ["README.md", "data/file.txt", "./", "/abs/dir"]) +def test_local_path_never_gains_a_spurious_host(path): + # Plain filesystem paths are normalized to authority-less `file:` URIs, so + # the first path segment is never mistaken for a remote host. + uri = URI(path) + assert uri.is_local_resource() + assert not uri.is_remote_resource() + assert uri.get_netloc() == "" + + def test_url_with_query_params(): uri = URI("http://example.com?param1=value1¶m2=value2") assert uri.get_query_param("param1") == "value1" From 33012b6bfb9521e713f4722cfd9be6b21bdaa769 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 09:10:52 +0200 Subject: [PATCH 74/89] fix(cli/cache): :bug: avoid stream=True when fetching remote crates --- rocrate_validator/cli/commands/cache.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py index 5492af2eb..83ac8035a 100644 --- a/rocrate_validator/cli/commands/cache.py +++ b/rocrate_validator/cli/commands/cache.py @@ -21,8 +21,6 @@ import copy as _copy import json -import shutil -import tempfile from datetime import datetime from pathlib import Path from typing import List, Optional @@ -456,7 +454,7 @@ def _warm_remote_crates(urls: List[str]) -> List[WarmUpResult]: results: List[WarmUpResult] = [] for url in urls: try: - response = requester.fetch_fresh(url, stream=True, allow_redirects=True) + response = requester.fetch_fresh(url, allow_redirects=True) status = getattr(response, "status_code", None) if status is None: results.append(WarmUpResult(url=url, status="failed", detail="no status code")) @@ -464,9 +462,8 @@ def _warm_remote_crates(urls: List[str]) -> List[WarmUpResult]: if status >= 400: results.append(WarmUpResult(url=url, status="failed", detail=f"HTTP {status}")) continue - # Consume the response body so that the cache backend stores it. - with tempfile.TemporaryFile() as tmp: - shutil.copyfileobj(response.raw, tmp) + # Touch the body so the cache backend stores the full response. + _ = response.content results.append(WarmUpResult(url=url, status="ok", detail=f"HTTP {status}")) except Exception as e: logger.debug("Remote crate warm-up failed for %s: %s", url, e) From 64cc050665d13ee7a92908feee0687c398754c4e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 09:50:37 +0200 Subject: [PATCH 75/89] test(cli/cache): :white_check_mark: add regression test for 'cache warm --crate' --- tests/test_cli_cache.py | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/test_cli_cache.py b/tests/test_cli_cache.py index 860835e01..380b20bfc 100644 --- a/tests/test_cli_cache.py +++ b/tests/test_cli_cache.py @@ -242,6 +242,68 @@ def test_warm_url_combined_with_profile_warms_both( assert "https://example.org/explicit" in result.output +# ==================================================================== +# cache warm: --crate (remote RO-Crate) +# ==================================================================== +@pytest.fixture +def mock_network_gzip(monkeypatch): + """ + Same as ``mock_network``, but returns a ``Content-Encoding: gzip`` body. + This encoded response is required to reproduce the warm-crate bug. + """ + import gzip + + from requests.adapters import HTTPAdapter + + body = gzip.compress(b'{"@context": "https://w3id.org/ro/crate/1.2/context"}') + + def fake_send(self, request, **kwargs): + raw = urllib3.HTTPResponse( + body=io.BytesIO(body), + headers={ + "Content-Type": "application/json", + "Content-Encoding": "gzip", + "Content-Length": str(len(body)), + }, + status=200, + preload_content=False, + decode_content=False, + ) + return self.build_response(request, raw) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +def test_warm_crate_caches_remote_metadata(cli_runner, mock_network_gzip, tmp_cache): + """ + Regression: ``cache warm --crate `` must consume the body via + ``response.content`` rather than streaming ``response.raw``. + + With ``stream=True`` + ``shutil.copyfileobj(response.raw, ...)`` the warm-up + crashed with urllib3's "Calling read(decode_content=False) is not supported + after read(decode_content=True) was called": requests_cache buffers the + streamed body (decode_content=True) to store it, after which a raw read + (decode_content=False) is rejected. The body must therefore be touched in a + way that goes through the already-decoded content. + """ + url = "https://example.org/ro-crate/ro-crate-metadata.json" + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "--crate", url], + ) + assert result.exit_code == 0, result.output + assert "Fetching remote RO-Crates" in result.output + assert "Summary: 1 cached, 0 failed, 0 skipped" in result.output + + # The fetched crate must actually be retrievable from the cache afterwards. + listed = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache)], + ) + assert listed.exit_code == 0, listed.output + assert "ro-crate-metadata.json" in listed.output + + # ==================================================================== # cache list / ls # ==================================================================== From 39414e0b4176173648f4337bea200c48c5fc9a95 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 10:37:15 +0200 Subject: [PATCH 76/89] chore(release): :arrow_up: update dependencies in `poetry.lock` file --- poetry.lock | 750 ++++++++++++++++++++++++++-------------------------- 1 file changed, 380 insertions(+), 370 deletions(-) diff --git a/poetry.lock b/poetry.lock index e750af1dc..202f58df2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -161,23 +161,21 @@ lxml = ["lxml"] [[package]] name = "bleach" -version = "6.3.0" +version = "4.1.0" description = "An easy safelist-based HTML-sanitizing tool." optional = false -python-versions = ">=3.10" +python-versions = ">=3.6" groups = ["docs"] files = [ - {file = "bleach-6.3.0-py3-none-any.whl", hash = "sha256:fe10ec77c93ddf3d13a73b035abaac7a9f5e436513864ccdad516693213c65d6"}, - {file = "bleach-6.3.0.tar.gz", hash = "sha256:6f3b91b1c0a02bb9a78b5a454c92506aa0fdf197e1d5e114d2e00c6f64306d22"}, + {file = "bleach-4.1.0-py2.py3-none-any.whl", hash = "sha256:4d2651ab93271d1129ac9cbc679f524565cc8a1b791909c4a51eac4446a15994"}, + {file = "bleach-4.1.0.tar.gz", hash = "sha256:0900d8b37eba61a802ee40ac0061f8c2b5dee29c1927dd1d233e075ebf5a71da"}, ] [package.dependencies] -tinycss2 = {version = ">=1.1.0,<1.5", optional = true, markers = "extra == \"css\""} +packaging = "*" +six = ">=1.9.0" webencodings = "*" -[package.extras] -css = ["tinycss2 (>=1.1.0,<1.5)"] - [[package]] name = "cachecontrol" version = "0.14.4" @@ -230,14 +228,14 @@ ujson = ["ujson (>=5.10.0)"] [[package]] name = "certifi" -version = "2026.2.25" +version = "2026.5.20" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" groups = ["main", "docs"] files = [ - {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"}, - {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, + {file = "certifi-2026.5.20-py3-none-any.whl", hash = "sha256:3c52e209ba0a4ad7aebe60436a4ab349c39e1e602e8c134221e546902ad25897"}, + {file = "certifi-2026.5.20.tar.gz", hash = "sha256:69dea482ab64caa7b9f6aba1c6bf48bb6a5448d1c0f1b17ab42ad8c763a5344d"}, ] [[package]] @@ -338,52 +336,6 @@ files = [ [package.dependencies] pycparser = {version = "*", markers = "implementation_name != \"PyPy\""} -[[package]] -name = "chardet" -version = "7.4.3" -description = "Universal character encoding detector" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -files = [ - {file = "chardet-7.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0c79b13c9908ac7dfe0a74116ebc9a0f28b2319d23c32f3dfcdfbe1279c7eaf"}, - {file = "chardet-7.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bba8bea1b28d927b3e99e47deafe53658d34497c0a891d95ff1ba8ff6663f01c"}, - {file = "chardet-7.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23163921dccf3103ce59540b0443c106d2c0a0ff2e0503e05196f5e6fdea453f"}, - {file = "chardet-7.4.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cfb54563fe5f130da17c44c6a4e2e8052ba628e5ab4eab7ef8190f736f0f8f72"}, - {file = "chardet-7.4.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3990fffcc6a6045f2234ab72752ad037e3b2d48c72037f244d42738db397eb75"}, - {file = "chardet-7.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:c7116b0452994734ccff35e154b44240090eb0f4f74b9106292668133557c175"}, - {file = "chardet-7.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:25a862cddc6a9ac07023e808aedd297115345fbaabc2690479481ddc0f980e09"}, - {file = "chardet-7.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7005c88da26fd95d8abb8acbe6281d833e9a9181b03cf49b4546c4555389bd97"}, - {file = "chardet-7.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc50f28bad067393cce0af9091052c3b8df7a23115afd8ba7b2e0947f0cef1f8"}, - {file = "chardet-7.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3da294de1a681097848ab58bd3f2771a674f8039d2d87a5538b28856b815e9"}, - {file = "chardet-7.4.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c45e116dd51b66226a53ade3f9f635e870de5399b90e00ce45dcc311093bf4"}, - {file = "chardet-7.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:ccc1f83ab4bcfb901cf39e0c4ba6bc6e726fc6264735f10e24ceb5cb47387578"}, - {file = "chardet-7.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:75d3c65cc16bddf40b8da1fd25ba84fca5f8070f2b14e86083653c1c85aee971"}, - {file = "chardet-7.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:29af5999f654e8729d251f1724a62b538b1262d9292cccaefddf8a02aae1ef6a"}, - {file = "chardet-7.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:626f00299ad62dfe937058a09572beed442ccc7b58f87aa667949b20fd3db235"}, - {file = "chardet-7.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a4904dd5f071b7a7d7f50b4a67a86db3c902d243bf31708f1d5cde2f68239cb"}, - {file = "chardet-7.4.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5d2879598bc220689e8ce509fe9c3f37ad2fca53a36be9c9bd91abdd91dd364f"}, - {file = "chardet-7.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:4b2799bd58e7245cfa8d4ab2e8ad1d76a5c3a5b1f32318eb6acca4c69a3e7101"}, - {file = "chardet-7.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a9e4486df251b8962e86ea9f139ca235aa6e0542a00f7844c9a04160afb99aa9"}, - {file = "chardet-7.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4fbff1907925b0c5a1064cffb5e040cd5e338585c9c552625f30de6bc2f3107a"}, - {file = "chardet-7.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:365135eaf37ba65a828f8e668eb0a8c38c479dcbec724dc25f4dfd781049c357"}, - {file = "chardet-7.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfc134b70c846c21ead8e43ada3ae1a805fff732f6922f8abcf2ff27b8f6493d"}, - {file = "chardet-7.4.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9acd9988a93e09390f3cd231201ea7166c415eb8da1b735928990ffc05cb9fbb"}, - {file = "chardet-7.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:e1b98790c284ff813f18f7cf7de5f05ea2435a080030c7f1a8318f3a4f80b131"}, - {file = "chardet-7.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d892d3dcd652fdef53e3d6327d39b17c0df40a899dfc919abaeb64c974497531"}, - {file = "chardet-7.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:acc46d1b8b7d5783216afe15db56d1c179b9a40e5a1558bc13164c4fd20674c4"}, - {file = "chardet-7.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ac3bf11c645734a1701a3804e43eabd98851838192267d08c353a834ab79fea"}, - {file = "chardet-7.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e3bd9f936e04bae89c254262af08d9e5b98f805175ba1e29d454e6cba3107b7"}, - {file = "chardet-7.4.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:27cc23da03630cdecc9aa81a895aa86629c211f995cd57651f0fbc280717bf93"}, - {file = "chardet-7.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:b95c934b9ad59e2ba8abb9be49df70d3ad1b0d95d864b9fdb7588d4fa8bd921c"}, - {file = "chardet-7.4.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c77867f0c1cb8bd819502249fcdc500364aedb07881e11b743726fa2148e7b6e"}, - {file = "chardet-7.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cf1efeaf65a6ef2f5b9cc3a1df6f08ba2831b369ccaa4c7018eaf90aa757bb11"}, - {file = "chardet-7.4.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f3504c139a2ad544077dd2d9e412cd08b01786843d76997cd43bb6de311723c"}, - {file = "chardet-7.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457f619882ba66327d4d8d14c6c342269bdb1e4e1c38e8117df941d14d351b04"}, - {file = "chardet-7.4.3-py3-none-any.whl", hash = "sha256:1173b74051570cf08099d7429d92e4882d375ad4217f92a6e5240ccfb26f231e"}, - {file = "chardet-7.4.3.tar.gz", hash = "sha256:cc1d4eb92a4ec1c2df3b490836ffa46922e599d34ce0bb75cf41fd2bf6303d56"}, -] - [[package]] name = "charset-normalizer" version = "3.4.7" @@ -525,14 +477,14 @@ files = [ [[package]] name = "click" -version = "8.3.2" +version = "8.4.1" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.10" groups = ["main"] files = [ - {file = "click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d"}, - {file = "click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5"}, + {file = "click-8.4.1-py3-none-any.whl", hash = "sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2"}, + {file = "click-8.4.1.tar.gz", hash = "sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96"}, ] [package.dependencies] @@ -586,118 +538,118 @@ test = ["pytest"] [[package]] name = "coverage" -version = "7.13.5" +version = "7.14.1" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.10" groups = ["test"] files = [ - {file = "coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5"}, - {file = "coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0"}, - {file = "coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58"}, - {file = "coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e"}, - {file = "coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d"}, - {file = "coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8"}, - {file = "coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf"}, - {file = "coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9"}, - {file = "coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028"}, - {file = "coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01"}, - {file = "coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c"}, - {file = "coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf"}, - {file = "coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810"}, - {file = "coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de"}, - {file = "coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1"}, - {file = "coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17"}, - {file = "coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85"}, - {file = "coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b"}, - {file = "coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664"}, - {file = "coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d"}, - {file = "coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2"}, - {file = "coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a"}, - {file = "coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819"}, - {file = "coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911"}, - {file = "coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f"}, - {file = "coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0"}, - {file = "coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc"}, - {file = "coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633"}, - {file = "coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8"}, - {file = "coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b"}, - {file = "coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a"}, - {file = "coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215"}, - {file = "coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43"}, - {file = "coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45"}, - {file = "coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61"}, - {file = "coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179"}, + {file = "coverage-7.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3e3680291c4a1d0dadfa84a2c459576a4af5133abb617905714339a0c73138cf"}, + {file = "coverage-7.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a5274669f37f2343635a347b91a60777621341ab3378e9c6ac9335eee704bddf"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cfe5a5fec635799ef33428f1e5e61bafa45a92a96190ba731561ba558ccc214d"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:62a9f70b52e0b5a95cfef4a5c5641b06983cadc5e538a3feeb5c00211f523ac2"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c18ebc343e15be53049b3a2dce38fe82d58f37e20ab9094b3a39c0aa4f6bb47"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b84ffdf877644e7096aa936991efeed873f7f3df57b9cd001312b7668ab08550"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e854312c4103f2ad4c0dc023b69b77ebfd2c89db5f86c4c94dc2353f9a92167e"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c643734307300234fafa36bf2a040a7235f8f177ea1fd6ec1423aea6fb7b929f"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84ac9499e48700399a5dd0ea7085b5091961fec52c68d66b4ec0d3cf7f4441b1"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:7f02d09f70776579b926d889a4c9c235070a1f47c40458aeaca563fae5acfdb5"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:ce66d8e46da2bb5ee313a745cbd2e391d319176c1f7a9451bfcd3a2fb920859b"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c912c259304cfb5ee584481cfb7ce1ff932b4d61e6c9140b8f19cb7b5ed82332"}, + {file = "coverage-7.14.1-cp310-cp310-win32.whl", hash = "sha256:1238cb94638e610e972c60dac68e813f868dc7d6e982535270558443058d9d59"}, + {file = "coverage-7.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:fc459e5d73be2d6332fcfe8dbf3d8994671fe33c700f4565988ecfa511547253"}, + {file = "coverage-7.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:478b5bcd63c2e1357c5c7e16c070690df7b07f676b1c114d7b93e533c664309f"}, + {file = "coverage-7.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a24a81f9715ee42ef59a316cc11611c98fe23920f7c81861315c9f3ff4a230f4"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:196a13319ad88d6d8ef5ab489ec4f44ddde2143c0c7d5b27786f6c3ffd56a7e1"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3d452fd08b5c72c5167c93e6867b5c08500bd40f2a21e1e854a500550b6cc36f"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23bf7fa51ac02e07fc7c96849b82946da47ae862dc8f86d183b2a4864fc38129"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bcaa50684dcaadfa599ac48f81103c756d791cfd85c97203d2217c593d48b860"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4ea1c034f95c9b056e856b794630b17f9fa3d57e4800ff1e503d3be0f9c9078c"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c7e057326434e441306226fbeb5d1aaf14a2637efe97ba668306635835f32ad7"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:59baf88468dbc8d63b1887afd92bda52e40bb1561696e5819670601403810cec"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d34d75f892b3ab73ba11cab5442cce7b3e168fd64162b16f0e1e0d09c508edef"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:3a56abc20a472baf0304c455721bc601477440d28ecfde8a03dde79ede07e0df"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6a3cb83d1552c0cd1b4906655b6a33fd4a8473229633a901c6b73bf86914dee9"}, + {file = "coverage-7.14.1-cp311-cp311-win32.whl", hash = "sha256:10274a1fbeb8ec5d72966e17bb198a3104257aca4ac09d98667c5f8aca8c8548"}, + {file = "coverage-7.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:87ebdf787d4888e3f3f2d523eadc6e18c6d18c6d0eb173801a189641627fb37e"}, + {file = "coverage-7.14.1-cp311-cp311-win_arm64.whl", hash = "sha256:dd34767fa19848d35659ffc0a75314f58c7af3f1cd87ec521e8292a1238398a3"}, + {file = "coverage-7.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a06c76364a9360e33d6d23769aefdf7f66f38e2ffb60ceb1baaa4989d83b695c"}, + {file = "coverage-7.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fad54e871165f6ec2f536063ac74c3104508a12963e64072ba44bd822de52b0c"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:84b535f00655ecafe1d929d1fb00ed5d6fa3051ea643ab2c161a3887b86f294b"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6b6b0853b895fe0e98cbfc580d1ec3393d9302b4b1e96a77b3f5c91fdab899e6"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:442cc9c952b2df400cda54bb04ab87330cf2cd08a8692cbbea36773531eb6f37"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8270544c361ed405a27a060dbc9ed2c124b084d96dfdc2d9a2510482aef981ad"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:48b283b1dd6372e8de2a7a9a4c4d5dc06f4d4fd209b876f3c88a7a205a0c8f84"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5b0c99ba93a07d56f6df340bb79be53202a082b2fdb81bfe6190b741a3470d54"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e471bc5769ff073b058cfadb0d736b56ce067c8560eabeb0da88462df98c23e7"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f497a1ea81d4cd7c10ddcaa685135b9aabd291af3d55775a9ddf3cb7a364cdd9"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2222be86d0b54f5dd5a38f45f17f315f737245e857bf0bdedc70734f84a13c02"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:85e85586565842f6932abebd4c18bcb1074223dc0b3576e7d173ca710622813a"}, + {file = "coverage-7.14.1-cp312-cp312-win32.whl", hash = "sha256:4a28fd227808366b196a75476dced2eb35b351d6766ba9c858dc93319e87f4f1"}, + {file = "coverage-7.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:54acdb6674a4661768d7bf7db32dfb9f46ab1d764f8aba6df75ce1a6a088724e"}, + {file = "coverage-7.14.1-cp312-cp312-win_arm64.whl", hash = "sha256:99cd41ff91afd94896fea3bc002706b6ae4ce95727d06e4a0f39c0a8d8bd8b1a"}, + {file = "coverage-7.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:be9f2c802dcfce3f71298303aa5dad0dce440a76c52f2f60dacd8656dab78793"}, + {file = "coverage-7.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6223a72fd0e4c7156353ec0f08a5f93623e1d3034d0e2683b9bb8ea674131b1d"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7279d2110a28cebc738b6459ecda2771735a4c18465fbbd36b3288fe5ed92247"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9eeb3fcbc13ba40dfbdb22d01d196a28e9cef9ed4c29b60061a1e0e823a9929d"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f0cfc27c539f07cf5c0a4cfe211d0b6cae039f8f40526dbaa71944e64b50a7b"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:221c70f316241a78e77e607c227cefc8808d4e08f28d99c04f35694690e940be"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:da028256b04ec30e5e0114b6f76172938c313991f0a2d3d894271315cf5d5e43"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76a085d7005236a767e3426148b2c407e53ad61695c562f8a81da2d373324901"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b553d04b5e778a8e56d57eb134aff42a92718ecba45e79c4764ecfa40efd92ff"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:46f714d2fb8ae2f4f29f23ada7f1e79b759fff5a70f94a1dac23af204c3ec9e4"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:1896f5e19ff3f0431c7ce2172adc54890fd97f86b59ced8ca1649145d9ffe35d"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:62fd185ef9df3c33d1c8178c5af105f762afbad96038de9a4ae100aa6297ca33"}, + {file = "coverage-7.14.1-cp313-cp313-win32.whl", hash = "sha256:ab4af6352741a604c431c6072fce5bee33bf0f20dc7a56618d6bf6bb89e9810c"}, + {file = "coverage-7.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:7af486dabe8954d03b087f0021540897afe084f04e16ff5579e08cc46f871416"}, + {file = "coverage-7.14.1-cp313-cp313-win_arm64.whl", hash = "sha256:2224f89ffd0c5605ccce1ed7a584da162bc7c55f601ab1c946bc9de31a486b42"}, + {file = "coverage-7.14.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:de286598cc65d2b489411174b1faec2f5a7775fb3201fd925db2a76b4030f37d"}, + {file = "coverage-7.14.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:042c46ded7c288aeb07cf14a28b6c1e10b78fcba40171c3fa1e939377eeef0b5"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f4ddbe407477f04c45115d1a4e5bc480f753553b534d338d4c3358b1cdd0ea52"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d13e6725992e2d2fd7d81d4f5241952d13740121dfd501da09201be39b2c003a"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f747dc8edcfe740130f28f32f3995e955494285717e86ee25af51db2219df08a"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ced2f09ef276fd58611a1ef502164ad266d2b75174e5a40cabbdb4033f9f6cf2"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b84800013769a78ccb9ef4659402e26d06867e337b61ec365f77ad008adea80e"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ea8cd6ca0ee9f616aaef3afc6882e32c2cbf18b00d96313ffd76af650574034d"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:aa5e304a873fabddc11e484e9b6b738bd38bd7bed17b09aa84eecf5332e8b8bb"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:5a1c5215be81035e629d5bc756650634d0bf31991038db7a0eccb90f025ce16d"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:79058c47dae6788504b5effb319961bcd72d7240551464b91d474bc0ed186d69"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:370c5afae3fa0658e11694a32b24c2778f6bc2d17718121f94ee185e69f26b54"}, + {file = "coverage-7.14.1-cp313-cp313t-win32.whl", hash = "sha256:3758dd0a7f1fa57365ef2e781df0f0731d38b6e3772259d13dae4bd8a958d4b1"}, + {file = "coverage-7.14.1-cp313-cp313t-win_amd64.whl", hash = "sha256:6ff665fb023a77386fe11685190cee1f60a7d635994a30d9b0a061533d470fce"}, + {file = "coverage-7.14.1-cp313-cp313t-win_arm64.whl", hash = "sha256:17a5a241e5997621a956a7f402a7433ef4221e5152809b785bec79e2323799f1"}, + {file = "coverage-7.14.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d5ed429d0b8edaac649e889b4ffcedb6c80b06629a3f93050e3dddfb99235bee"}, + {file = "coverage-7.14.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8011224a62280e50dab346960c03cf47aca1a1e09e608c0fb33fd6e0cc8e9500"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:12c42ec1e14f553c4f817e989365982e646e27211f10a0f717855b94a79c8906"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:06144cd511cf2624873a035c5069cf297144f6e77a73ee3d7a55b605ec5efb42"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a311d8e1da24be5c1ccf85cbfb06315dbaa1703d5a1eab3f6432c72b837917c8"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c79cead5b5bc584d9c71451cb984d0e3a84e0c0937379c8efcbf27c8d661b851"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:dcbf65f1f66a26cdd88c35cf68fb4729c5d1cd2e88added72420541dfb212034"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fd86572566fb40189a8260446158235159bc7a82dfbc87a3b39cf4fb57fcec1c"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:7771b601718fdde84832c3a434ca9bbf4ae9adbc49d84198b4110700c3c77c36"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:39b21e212c55af06fa375e3dbf90a8a8e38792f3a910c580066d23563830ddd5"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f2302660e32562a532b442480121aef8aa61a5bdb20b30bf0adab29f10a5a4b4"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:03a6f93c1ec3b7f2e77b5dbcc5573a2c21f12529a5c6bbe0f16f72303cc2fa4d"}, + {file = "coverage-7.14.1-cp314-cp314-win32.whl", hash = "sha256:8a3ce026d73290f42f08dafecbd82c193a74df280461fbf97300fec51fd133ee"}, + {file = "coverage-7.14.1-cp314-cp314-win_amd64.whl", hash = "sha256:114c95ef29302423b87d159075805f4ab973254a2638a5d7d046c94887cc87d7"}, + {file = "coverage-7.14.1-cp314-cp314-win_arm64.whl", hash = "sha256:a07891c3f4805442b31b71e84ba3cf29ed1aa9a428284e06deeb4b23e5b46343"}, + {file = "coverage-7.14.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1101a5ebb083aecb625ebb6209d4105b58f647b093cb2dc8122d7b33f743cfe1"}, + {file = "coverage-7.14.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:851b9e1e4e8a4608e77c79714b2e77c0970d2ed7202a05e92ae407817481887b"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d5b89cdfb2ee051b71e8c3c70bd81a9eff81100f736a269136fe1a68efe00474"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0177614a0370f227888b4e436a7c55686d6a9f90eb1ade2b624ba685a1686e86"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2d69af5dea2de76fc485a83032a630523f985198b7e25be901ec60181587b01e"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:35ab22d91de736e8966b980dc355cbcdd2c6dbbcfe275f9a2991bc8a91b3df65"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:357d4e32935c36588aaba057d734fa32428c360c9fc2e4442afbf1b646beee6e"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:51bd64741cc6fa065abd300ede1afe5a5291ece9c31da8b24884deda48bcc3f8"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:9132cd363a68a4c3daa7c8704a654b1e39d3360f6f5b8ddd470608a945236c07"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:07c6290b1697b862c0478eab545eec949a0d0e4d6d03497f446d706da3b4f2de"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:5ea0c297e27133853b4d8a3eb799bff5a2dbd9f2f41537a240d337ac9b4df890"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:01b7733daad0237daa01ef80fe2dfceffc911e6a17fa7b55d14aa8214eaaaecd"}, + {file = "coverage-7.14.1-cp314-cp314t-win32.whl", hash = "sha256:6adc5a36984624a70bf11d7184e20fa0a49aa7c47ffab43804106a1a695ea22e"}, + {file = "coverage-7.14.1-cp314-cp314t-win_amd64.whl", hash = "sha256:ddf799247318f34dbcd2efa8c95a8d0642674e926bb1774cf9b63dfd2a389d1c"}, + {file = "coverage-7.14.1-cp314-cp314t-win_arm64.whl", hash = "sha256:145986fe66647eb489f18d9a997567a3fd358584c4b5a808769113abc07466af"}, + {file = "coverage-7.14.1-py3-none-any.whl", hash = "sha256:a252f21c27e38347e60111a3266b03827422a7d5525951aceee313aa68bab1d2"}, + {file = "coverage-7.14.1.tar.gz", hash = "sha256:30c08f7d90415aa98b3c990385dea2939b0da55f38515e5b369b83655f8523be"}, ] [package.dependencies] @@ -706,30 +658,6 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli ; python_full_version <= \"3.11.0a6\""] -[[package]] -name = "cssutils" -version = "2.14.0" -description = "A CSS Cascading Style Sheets library for Python" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -files = [ - {file = "cssutils-2.14.0-py3-none-any.whl", hash = "sha256:79ad979e4a383f39f0b3f0ca82ee3f1b01065da9fa02701b63bfed38ac76eb91"}, - {file = "cssutils-2.14.0.tar.gz", hash = "sha256:c33256f0cbc215ad405b647117ace63c9e22af96fe42dcb7861742a591e6464c"}, -] - -[package.dependencies] -encutils = "*" -more_itertools = "*" - -[package.extras] -check = ["pytest-checkdocs (>=2.14)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] -cover = ["pytest-cov"] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=3.4)"] -test = ["cssselect", "importlib_resources ; python_version < \"3.9\"", "jaraco.test (>=5.1)", "lxml ; python_version < \"3.11\"", "pytest (>=6,!=8.1.*)"] -type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] - [[package]] name = "debugpy" version = "1.8.20" @@ -772,14 +700,14 @@ files = [ [[package]] name = "decorator" -version = "5.2.1" +version = "5.3.1" description = "Decorators for Humans" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"}, - {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"}, + {file = "decorator-5.3.1-py3-none-any.whl", hash = "sha256:f47fe6fdbd2edd623ecfe36875d37aba411624e2670dd395dddae1358689bb3c"}, + {file = "decorator-5.3.1.tar.gz", hash = "sha256:4cbcdd55a6efadb9dbea26b858f4fb3264567b52d69ca0d25b721b553f60ea82"}, ] [[package]] @@ -796,19 +724,19 @@ files = [ [[package]] name = "dict2css" -version = "0.3.0.post1" +version = "0.6.0" description = "A μ-library for constructing cascading style sheets from Python dictionaries." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["docs"] files = [ - {file = "dict2css-0.3.0.post1-py3-none-any.whl", hash = "sha256:f006a6b774c3e31869015122ae82c491fd25e7de4a75607a62aa3e798f837e0d"}, - {file = "dict2css-0.3.0.post1.tar.gz", hash = "sha256:89c544c21c4ca7472c3fffb9d37d3d926f606329afdb751dc1de67a411b70719"}, + {file = "dict2css-0.6.0-py3-none-any.whl", hash = "sha256:5251f1df1c78ffdf09313657a7f88add0ad219127d9aeb18fb343b052d6bfbbe"}, + {file = "dict2css-0.6.0.tar.gz", hash = "sha256:143e55cb71c98a88c79f2c41e08a5fa4d875659275756f794e31ccd69936ce88"}, ] [package.dependencies] -cssutils = ">=2.2.0" domdf-python-tools = ">=2.2.0" +tinycss2 = ">=1.2.1" [[package]] name = "dill" @@ -858,21 +786,6 @@ typing-extensions = ">=3.7.4.1" all = ["pytz (>=2019.1)"] dates = ["pytz (>=2019.1)"] -[[package]] -name = "encutils" -version = "1.0.0" -description = "" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -files = [ - {file = "encutils-1.0.0-py3-none-any.whl", hash = "sha256:605297da19a23d1b2da7d3b9bd75513acc979e9facf03aa7ec7ba04b5f567a79"}, - {file = "encutils-1.0.0.tar.gz", hash = "sha256:38eca5af18cebabd8be43c17f14c9d3fbba83cc5f7ac8e3ab1c86e24c4b2b91a"}, -] - -[package.dependencies] -chardet = "*" - [[package]] name = "enum-tools" version = "0.12.0" @@ -1022,18 +935,18 @@ files = [ [[package]] name = "idna" -version = "3.11" +version = "3.17" description = "Internationalized Domain Names in Applications (IDNA)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main", "docs"] files = [ - {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, - {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, + {file = "idna-3.17-py3-none-any.whl", hash = "sha256:466e48829084efe2548012b855df21540b96f2e20e51bd124c851536556a592c"}, + {file = "idna-3.17.tar.gz", hash = "sha256:5eb0cb53bc467c12eadcf6de83163ad8527cec9416f44b9b61b19caedad2b87f"}, ] [package.extras] -all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] +all = ["mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] [[package]] name = "imagesize" @@ -1193,49 +1106,15 @@ test-extra = ["curio", "ipython[test]", "jupyter_ai", "matplotlib (!=3.2.0)", "n [[package]] name = "ipython" -version = "9.10.1" +version = "9.14.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.11" groups = ["dev"] -markers = "python_version == \"3.11\"" -files = [ - {file = "ipython-9.10.1-py3-none-any.whl", hash = "sha256:82d18ae9fb9164ded080c71ef92a182ee35ee7db2395f67616034bebb020a232"}, - {file = "ipython-9.10.1.tar.gz", hash = "sha256:e170e9b2a44312484415bdb750492699bf329233b03f2557a9692cce6466ada4"}, -] - -[package.dependencies] -colorama = {version = ">=0.4.4", markers = "sys_platform == \"win32\""} -decorator = ">=4.3.2" -ipython-pygments-lexers = ">=1.0.0" -jedi = ">=0.18.1" -matplotlib-inline = ">=0.1.5" -pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} -prompt_toolkit = ">=3.0.41,<3.1.0" -pygments = ">=2.11.0" -stack_data = ">=0.6.0" -traitlets = ">=5.13.0" -typing_extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} - -[package.extras] -all = ["argcomplete (>=3.0)", "ipython[doc,matplotlib,terminal,test,test-extra]"] -black = ["black"] -doc = ["docrepr", "exceptiongroup", "intersphinx_registry", "ipykernel", "ipython[matplotlib,test]", "setuptools (>=70.0)", "sphinx (>=8.0)", "sphinx-rtd-theme (>=0.1.8)", "sphinx_toml (==0.0.4)", "typing_extensions"] -matplotlib = ["matplotlib (>3.9)"] -test = ["packaging (>=20.1.0)", "pytest (>=7.0.0)", "pytest-asyncio (>=1.0.0)", "setuptools (>=61.2)", "testpath (>=0.2)"] -test-extra = ["curio", "ipykernel (>6.30)", "ipython[matplotlib]", "ipython[test]", "jupyter_ai", "nbclient", "nbformat", "numpy (>=1.27)", "pandas (>2.1)", "trio (>=0.1.0)"] - -[[package]] -name = "ipython" -version = "9.12.0" -description = "IPython: Productive Interactive Computing" -optional = false -python-versions = ">=3.12" -groups = ["dev"] -markers = "python_version >= \"3.12\"" +markers = "python_version >= \"3.11\"" files = [ - {file = "ipython-9.12.0-py3-none-any.whl", hash = "sha256:0f2701e8ee86e117e37f50563205d36feaa259d2e08d4a6bc6b6d74b18ce128d"}, - {file = "ipython-9.12.0.tar.gz", hash = "sha256:01daa83f504b693ba523b5a407246cabde4eb4513285a3c6acaff11a66735ee4"}, + {file = "ipython-9.14.0-py3-none-any.whl", hash = "sha256:8fd984a3372c14b12790b084ba6b5cff5678c0cb063244a0034f06a51f20d6c2"}, + {file = "ipython-9.14.0.tar.gz", hash = "sha256:6f27ff0f1d9ea050e0551f71568bc4b34d8aba579e8f111c5b4175f44ac6b4aa"}, ] [package.dependencies] @@ -1246,9 +1125,11 @@ jedi = ">=0.18.2" matplotlib-inline = ">=0.1.6" pexpect = {version = ">4.6", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} prompt_toolkit = ">=3.0.41,<3.1.0" +psutil = {version = ">=7", markers = "sys_platform != \"emscripten\""} pygments = ">=2.14.0" stack_data = ">=0.6.0" traitlets = ">=5.13.0" +typing_extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} [package.extras] all = ["argcomplete (>=3.0)", "ipython[doc,matplotlib,terminal,test,test-extra]", "types-decorator"] @@ -1305,23 +1186,22 @@ plugins = ["setuptools"] [[package]] name = "jedi" -version = "0.19.2" +version = "0.20.0" description = "An autocompletion tool for Python that can be used for text editors." optional = false -python-versions = ">=3.6" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"}, - {file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"}, + {file = "jedi-0.20.0-py2.py3-none-any.whl", hash = "sha256:7bdd9c2634f56713299976f4cbd59cb3fa92165cc5e05ea811fb253480728b67"}, + {file = "jedi-0.20.0.tar.gz", hash = "sha256:c3f4ccbd276696f4b19c54618d4fb18f9fc24b0aef02acf704b23f487daa1011"}, ] [package.dependencies] -parso = ">=0.8.4,<0.9.0" +parso = ">=0.8.6,<0.9.0" [package.extras] -docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"] -qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] -testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"] +dev = ["Django", "attrs", "colorama", "docopt", "flake8 (==7.1.2)", "pytest (<9.0.0)", "types-setuptools (==80.9.0.20250529)", "typing-extensions", "zuban (==0.7.0)"] +docs = ["Jinja2 (==3.1.6)", "MarkupSafe (==3.0.3)", "Pygments (==2.20.0)", "Sphinx (==9.1.0)", "alabaster (==1.0.0)", "babel (==2.18.0)", "certifi (==2026.4.22)", "charset-normalizer (==3.4.7)", "docutils (==0.22.4)", "idna (==3.13)", "imagesize (==2.0.0)", "iniconfig (==2.3.0)", "packaging (==26.2)", "pluggy (==1.6.0)", "pytest (==9.0.3)", "requests (==2.33.1)", "roman-numerals (==4.1.0)", "snowballstemmer (==3.0.1)", "sphinx-rtd-theme (==3.1.0)", "sphinxcontrib-applehelp (==2.0.0)", "sphinxcontrib-devhelp (==2.0.0)", "sphinxcontrib-htmlhelp (==2.1.0)", "sphinxcontrib-jquery (==4.1)", "sphinxcontrib-jsmath (==1.0.1)", "sphinxcontrib-qthelp (==2.0.0)", "sphinxcontrib-serializinghtml (==2.0.0)", "urllib3 (==2.6.3)"] [[package]] name = "jinja2" @@ -1560,21 +1440,21 @@ files = [ [[package]] name = "matplotlib-inline" -version = "0.2.1" +version = "0.2.2" description = "Inline Matplotlib backend for Jupyter" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76"}, - {file = "matplotlib_inline-0.2.1.tar.gz", hash = "sha256:e1ee949c340d771fc39e241ea75683deb94762c8fa5f2927ec57c83c4dffa9fe"}, + {file = "matplotlib_inline-0.2.2-py3-none-any.whl", hash = "sha256:3c821cf1c209f59fb2d2d64abbf5b23b67bcb2210d663f9918dd851c6da1fcf6"}, + {file = "matplotlib_inline-0.2.2.tar.gz", hash = "sha256:72f3fe8fce36b70d4a5b612f899090cd0401deddc4ea90e1572b9f4bfb058c79"}, ] [package.dependencies] traitlets = "*" [package.extras] -test = ["flake8", "nbdime", "nbval", "notebook", "pytest"] +test = ["flake8", "matplotlib", "nbdime", "nbval", "notebook", "pytest"] [[package]] name = "mccabe" @@ -1590,14 +1470,14 @@ files = [ [[package]] name = "mdit-py-plugins" -version = "0.5.0" +version = "0.6.1" description = "Collection of plugins for markdown-it-py" optional = false python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f"}, - {file = "mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6"}, + {file = "mdit_py_plugins-0.6.1-py3-none-any.whl", hash = "sha256:214c82fb2ac524472ab6a5bcab1de80f73b50443e187f401bfd77efbc7c6481d"}, + {file = "mdit_py_plugins-0.6.1.tar.gz", hash = "sha256:a2bca0f039f39dbd35fb74ae1b5f998608c437463371f0ff7f49a19a17a114d0"}, ] [package.dependencies] @@ -1606,7 +1486,7 @@ markdown-it-py = ">=2.0.0,<5.0.0" [package.extras] code-style = ["pre-commit"] rtd = ["myst-parser", "sphinx-book-theme"] -testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions", "pytest-timeout"] [[package]] name = "mdurl" @@ -1622,31 +1502,19 @@ files = [ [[package]] name = "mistune" -version = "3.2.0" +version = "3.2.1" description = "A sane and fast Markdown parser with useful plugins and renderers" optional = false python-versions = ">=3.8" groups = ["docs"] files = [ - {file = "mistune-3.2.0-py3-none-any.whl", hash = "sha256:febdc629a3c78616b94393c6580551e0e34cc289987ec6c35ed3f4be42d0eee1"}, - {file = "mistune-3.2.0.tar.gz", hash = "sha256:708487c8a8cdd99c9d90eb3ed4c3ed961246ff78ac82f03418f5183ab70e398a"}, + {file = "mistune-3.2.1-py3-none-any.whl", hash = "sha256:78cdb0ba5e938053ccf63651b352508d2efa9411dc8810bfb05f2dc5140c0048"}, + {file = "mistune-3.2.1.tar.gz", hash = "sha256:7c8e5501d38bac1582e067e46c8343f17d57ea1aaa735823f3aba1fd59c88a28"}, ] [package.dependencies] typing-extensions = {version = "*", markers = "python_version < \"3.11\""} -[[package]] -name = "more-itertools" -version = "11.0.2" -description = "More routines for operating on iterables, beyond itertools" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -files = [ - {file = "more_itertools-11.0.2-py3-none-any.whl", hash = "sha256:6e35b35f818b01f691643c6c611bc0902f2e92b46c18fffa77ae1e7c46e912e4"}, - {file = "more_itertools-11.0.2.tar.gz", hash = "sha256:392a9e1e362cbc106a2457d37cabf9b36e5e12efd4ebff1654630e76597df804"}, -] - [[package]] name = "msgpack" version = "1.1.2" @@ -1893,14 +1761,14 @@ rdflib = ">=7.1.4" [[package]] name = "packaging" -version = "26.1" +version = "26.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" groups = ["main", "dev", "docs", "test"] files = [ - {file = "packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f"}, - {file = "packaging-26.1.tar.gz", hash = "sha256:f042152b681c4bfac5cae2742a55e103d27ab2ec0f3d88037136b6bfe7c9c5de"}, + {file = "packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e"}, + {file = "packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661"}, ] [[package]] @@ -1917,14 +1785,14 @@ files = [ [[package]] name = "parso" -version = "0.8.6" +version = "0.8.7" description = "A Python Parser" optional = false python-versions = ">=3.6" groups = ["dev"] files = [ - {file = "parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff"}, - {file = "parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd"}, + {file = "parso-0.8.7-py2.py3-none-any.whl", hash = "sha256:a8926eb2a1b915486941fdbd31e86a4baf88fe8c210f25f2f35ecec5b574ca1c"}, + {file = "parso-0.8.7.tar.gz", hash = "sha256:eaaac4c9fdd5e9e8852dc778d2d7405897ec510f2a298071453e5e3a07914bb1"}, ] [package.extras] @@ -1964,14 +1832,14 @@ docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst [[package]] name = "platformdirs" -version = "4.9.6" +version = "4.10.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.10" groups = ["main", "dev", "docs"] files = [ - {file = "platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917"}, - {file = "platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a"}, + {file = "platformdirs-4.10.0-py3-none-any.whl", hash = "sha256:fb516cdb12eb0d857d0cd85a7c57cea4d060bee4578d6cf5a14dfdf8cbf8784a"}, + {file = "platformdirs-4.10.0.tar.gz", hash = "sha256:31e761a6a0ca04faf7353ea759bdba55652be214725111e5aac52dfa29d4bef7"}, ] [[package]] @@ -2540,14 +2408,14 @@ typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""} [[package]] name = "requests" -version = "2.33.1" +version = "2.34.2" description = "Python HTTP for Humans." optional = false python-versions = ">=3.10" groups = ["main", "docs"] files = [ - {file = "requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a"}, - {file = "requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517"}, + {file = "requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0"}, + {file = "requests-2.34.2.tar.gz", hash = "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed"}, ] [package.dependencies] @@ -2562,14 +2430,14 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<8)"] [[package]] name = "requests-cache" -version = "1.3.1" +version = "1.3.2" description = "A persistent cache for python requests" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "requests_cache-1.3.1-py3-none-any.whl", hash = "sha256:43a67448c3b2964c631ac7027b84607f2f63438e28104b68ad2211f32d9f606c"}, - {file = "requests_cache-1.3.1.tar.gz", hash = "sha256:784e9d07f72db4fe234830a065230c59eb446489528f271ba288c640897e47c4"}, + {file = "requests_cache-1.3.2-py3-none-any.whl", hash = "sha256:c52666c76b08daa94d05a99327dd24afc46f405abc044e8c2267b540f90673d0"}, + {file = "requests_cache-1.3.2.tar.gz", hash = "sha256:bdc3680931f98a1dea509d339ea6b45cea526945b47b250ce63ffd2744ee0b14"}, ] [package.dependencies] @@ -2581,7 +2449,7 @@ url-normalize = ">=2.0" urllib3 = ">=1.25.5" [package.extras] -all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "orjson (>=3.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] +all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "orjson (>=3.0) ; python_version < \"3.14\"", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"] mongodb = ["pymongo (>=3)"] redis = ["redis (>=3)"] @@ -2610,14 +2478,14 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "rich-click" -version = "1.9.7" +version = "1.9.8" description = "Format click help output nicely with rich" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "rich_click-1.9.7-py3-none-any.whl", hash = "sha256:2f99120fca78f536e07b114d3b60333bc4bb2a0969053b1250869bcdc1b5351b"}, - {file = "rich_click-1.9.7.tar.gz", hash = "sha256:022997c1e30731995bdbc8ec2f82819340d42543237f033a003c7b1f843fc5dc"}, + {file = "rich_click-1.9.8-py3-none-any.whl", hash = "sha256:12873865396e6927835d4eabb1cc3996edcd65b7ac9b2391a29eca4f335a2f93"}, + {file = "rich_click-1.9.8.tar.gz", hash = "sha256:4008f921da88b5d91646c134ec881c1500e5a6b3f093e90e8f29400e09608371"}, ] [package.dependencies] @@ -2627,8 +2495,8 @@ rich = ">=12" typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} [package.extras] -dev = ["inline-snapshot (>=0.24)", "jsonschema (>=4)", "mypy (>=1.14.1)", "nodeenv (>=1.9.1)", "packaging (>=25)", "pre-commit (>=3.5)", "pytest (>=8.3.5)", "pytest-cov (>=5)", "rich-codex (>=1.2.11)", "ruff (>=0.12.4)", "typer (>=0.15)", "types-setuptools (>=75.8.0.20250110)"] -docs = ["markdown-include (>=0.8.1)", "mike (>=2.1.3)", "mkdocs-github-admonitions-plugin (>=0.1.1)", "mkdocs-glightbox (>=0.4)", "mkdocs-include-markdown-plugin (>=7.1.7) ; python_version >= \"3.9\"", "mkdocs-material-extensions (>=1.3.1)", "mkdocs-material[imaging] (>=9.5.18,<9.6.0)", "mkdocs-redirects (>=1.2.2)", "mkdocs-rss-plugin (>=1.15)", "mkdocs[docs] (>=1.6.1)", "mkdocstrings[python] (>=0.26.1)", "rich-codex (>=1.2.11)", "typer (>=0.15)"] +dev = ["inline-snapshot (>=0.24)", "jsonschema (>=4)", "mypy (>=1.14.1)", "nodeenv (>=1.9.1)", "packaging (>=25)", "pre-commit (>=3.5)", "pytest (>=8.3.5)", "pytest-cov (>=5)", "rich-codex (>=1.2.11)", "ruff (>=0.12.4)", "typer (>=0.15,<0.26)", "types-setuptools (>=75.8.0.20250110)"] +docs = ["markdown-include (>=0.8.1)", "mike (>=2.1.3)", "mkdocs-github-admonitions-plugin (>=0.1.1)", "mkdocs-glightbox (>=0.4)", "mkdocs-include-markdown-plugin (>=7.1.7) ; python_version >= \"3.9\"", "mkdocs-material-extensions (>=1.3.1)", "mkdocs-material[imaging] (>=9.5.18,<9.6.0)", "mkdocs-redirects (>=1.2.2)", "mkdocs-rss-plugin (>=1.15)", "mkdocs[docs] (>=1.6.1)", "mkdocstrings[python] (>=0.26.1)", "rich-codex (>=1.2.11)", "typer (>=0.15,<0.26)"] [[package]] name = "roman-numerals" @@ -2666,6 +2534,7 @@ description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.10" groups = ["docs"] +markers = "python_version == \"3.10\"" files = [ {file = "rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288"}, {file = "rpds_py-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4cc2206b76b4f576934f0ed374b10d7ca5f457858b157ca52064bdfc26b9fc00"}, @@ -2784,6 +2653,147 @@ files = [ {file = "rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84"}, ] +[[package]] +name = "rpds-py" +version = "2026.5.1" +description = "Python bindings to Rust's persistent data structures (rpds)" +optional = false +python-versions = ">=3.11" +groups = ["docs"] +markers = "python_version >= \"3.11\"" +files = [ + {file = "rpds_py-2026.5.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3397a5ed7174dc2786bb214030232fc36fe8e5584fec43a9952cc542b1a12036"}, + {file = "rpds_py-2026.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:99ab6ba7bfa2cb0f96a04e3652355bf04e3f51aceb1e943b8541dab7ba4828cc"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0efbe45632665e53e3db8fe1e5692db58fc5cb9bab4459d570b83efefe11164"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:01d17b29c0c23d82b1f4751147ec49cf451f1fc2554eb9ef5f957e55d2656ead"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7559f72b94ae52659086c595dfa017cde03155f7832071d30959049052cb3ece"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e25b7088f9ccbfc0dfcaa52bf969300ca229e10ecf758974ebcbb080a4b37bb"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613fc4ee9eaef26dc5840666214dd6fbcebcf32f46e76f4abc473059f4e13dda"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:85264a90ff4c05c1568dd65f5921c837614b67c60358fb4c17df3b7f2e90690a"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe71bca7d547acb17027c7fd1624ff8aae623499c498d3e7011182c4de5c25e0"}, + {file = "rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05fa4f41f37ec97c9c260441a940450a192f78d774d2b097eee1379f1e1246a"}, + {file = "rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df1d2a1996755b24b9ecee92cb4d36c28f86f464a6a173349c26bab41e94b8c2"}, + {file = "rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8895840ac4809e5f60c88fd07617cd71326e73d6e5a8aa783c5c0f7c24985de2"}, + {file = "rpds_py-2026.5.1-cp311-cp311-win32.whl", hash = "sha256:3684a59b158a7683aaeb8e25352e9a9dd2122cec78f2d8530266e4f91b4c7b3f"}, + {file = "rpds_py-2026.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:7bd530e6a530bb3ea892f194fafa455f3516ac25ecf7143fd33c09be62b0470a"}, + {file = "rpds_py-2026.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:0a5ae4dbe43c1076983b72616496919872ae7bbe7a1e21cc48336bc3154d130b"}, + {file = "rpds_py-2026.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3abe24a66e57adcfa645d718063a5fa5103ecc71ddbf26d78af8f9368018ff1d"}, + {file = "rpds_py-2026.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58b1d94308ddf0b1982f61f2eb54bf92997c9ece8a8093ef014250f4a517906c"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa92420128dadce7f54bd73ba1825a273e9268fe9e35dbf7e6362890efa4e08"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca653c6546386227cd9800d1bef6a348099acf8db4250341da6d90f663d6dfcb"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66c93681c4729e4e3ecba31b8179fae083ff3118841672835140338b4b9867c1"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40ff257542e04796880e011e15cd4dc21c2599975df2aaa8f2c8495ca574e1a5"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6825cc329b290e93c5f6a9be2393118a763f6ccf6abd83704e0c102ca583644"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:de42116e69cb53b911cc34aee5ab98f36c597b822545045d49e938818b99e5e4"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0f920015df2a504bebaba6d4c31ccf3fcf942f92655c086da30b671aad19aa6"}, + {file = "rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0408a24e44feb919423dc6d9da677cb5cddb894d2ca9e763967d156d9c60fab4"}, + {file = "rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cea68bcd53467561ae2f96a6bdad1544299ba97b5b0ddcd5ac3d376e5c781c24"}, + {file = "rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4be8b1d2a705cc37d08256004e1d07de143fa0075c8e85a3df020b776f62b732"}, + {file = "rpds_py-2026.5.1-cp312-cp312-win32.whl", hash = "sha256:6736718bd4fc49cbcb538ba30516fdbef161522acefb739657d48b97bd864fed"}, + {file = "rpds_py-2026.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:0a7d1eec967df0e9b22614a5e177622e0c89611d03727fa0cb48e45028907870"}, + {file = "rpds_py-2026.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:1841d067089e117142d79b98aa0df2f08b52f2ecc1819dd2700636c0db74a473"}, + {file = "rpds_py-2026.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:efef4ac29c6ff495531eb17ee705b62841ecaa291b7c7077e848ea03e237164d"}, + {file = "rpds_py-2026.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c39f5b67a8a2e67179ada2a954227d670fe65fa9098457f698f56ddf248709b3"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5c30f3f04eef4fbd362226a6f31d7c8895ca4fbb6e0b790f6890a98d8da8559"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:277f6c82f0580848796c7ecc8a7173aa3bfb928e4ff831261c2f60a81dc270db"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63c2c4c213f1a4e3f3de28ecab029dbdee976324e729c0d7a55211be72576b02"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3350ec808fb538fe71a1f94dfaa0e29c598dfad805ce49f0caec5ae3183c652b"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b964e3ab599e718dc46c018d104b1ebc007cbc6567d827c94a687fca56d77e"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:19cb09fab7b7fc96b2a6e28f2e34b72a3705ff27b37edb77455316e5d3f3dc9b"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abe76bcdba31e576cb83eeb8797aa0d882b738fef6dc65d0601fc753806a5b46"}, + {file = "rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bff7073db3899158fff55ebf57b113a67030af26f80a18978f9f0aa60250ddf"}, + {file = "rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8ba264fa49be666cd9cc56bf34ec7002fb3d27a4aee5bcb4d43d0d18feb1bb6f"}, + {file = "rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4860b603ddda0475a8885499b3729e90229d480105b42651962a5397d995fa89"}, + {file = "rpds_py-2026.5.1-cp313-cp313-win32.whl", hash = "sha256:7944270ae71383f6e2657dd7d5ce4eeb4ac2d0059a6738f0510583d462ab4842"}, + {file = "rpds_py-2026.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:88647f43a73c4e01be19b04ceef0c8d3a1958153604d13c773becd8016f2a0cf"}, + {file = "rpds_py-2026.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:453895624ecf7db7063b1004e44037522bbaef9ff6a945e59bc71662d7a03abd"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:b4e4bc98639ec915f512fde3aa7a95e0041d95d9c3cc86eea841fa63cb1e8600"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cacedb7a6e167680acba45ad5716e89067d225dc80da0d7040cae8c81d4572fa"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68700371c5d7ae1412862ddfa719090925c93ecf351c566d66f09d04b136ea00"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:296c799becfa849c779c8725494fe9ed94959ed886787df4364b058465bad7f0"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3858b908218ee108d0bbfb2095ccc237648053c9bf98affad7cb079acaf1d97"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4fb8d2e7cb2f850b169806d61d1b991738acec96500a75c30f49caf064ce7cef"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b74c10ed6a8f190f4287f53bcfea348b92a84a9c9f70d30183d1e6172d580d"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:b9a6528956191c48c52294a592dbd4a8386d7048bdb25c0efcb6b966466c6d83"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af03e34e860047bc7a352b842856fcf78798fbb81132cc98bd2f907ab4eb9cd2"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fea6e836d10abbe191d557d33bd58bd5987725fe63aa1eefe557d230209855bd"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:fc0c0f878ea770a0a8a462456c5ad36fc9fe6358e6b76fdadc7f17575e0b8bf1"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e0b360f316d966b048b085857630b3cc51f3db2f07b06f440eac8f695374d1e3"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-win32.whl", hash = "sha256:a2999883eedf72fdfb7520b92c7d4ec2572a71ff40239377aa604cc529eecafc"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e07be2a9d7122bd6e82dea89814ef8dc893feb1aae97fec1630f3263bbb30e55"}, + {file = "rpds_py-2026.5.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:1f2c391c3059798093b65df23aca2cac150460ae9c630d99dec83d703d9485b9"}, + {file = "rpds_py-2026.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:413b424f7c4ee65ab5e5be91f5731be0f8b41a1ee2b12dfe810d716312e95a78"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c595a1d9255dce0599e13130d1440ab2506654f2b50294226ee06402f8fef63"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c27c5f6102eac8c03e7595a00827a53b271ba40a53b59ff8709170e0855ea4a"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c7fcf61d44cacecaf3aea542b0e053db77972a4573e7ceda16fb2b399161195"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c817a189d4ee14290420e5ff051e4dd6baa13f3edf84685071dee07a6d538ee"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21846aac0ed2e0589f38c12dc44e77bb64e494b771eadbcf169cba00566ba7ba"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b317c87a13f769a4e787819bd508aaa5d69aa09b0880de9af6d3a8a54571cdec"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce87129d9f2c14fa6c4a8601fb80eb4488c80d38a20cd13758ef11123e14995d"}, + {file = "rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9cdddb6c1207d284d94fd1530adf57fbd797fe7c4b8704ba85f49414f2557e7d"}, + {file = "rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4e237e139f94d3c036fd28eb9f564c99055476ff4ff05cd42be55ce349b5aa02"}, + {file = "rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ed0954b524873214369184a9c82b0eaa45a3fbb9a798cd95b17e0d98499e7ea0"}, + {file = "rpds_py-2026.5.1-cp314-cp314-win32.whl", hash = "sha256:2d88621d6a7d4dfa633d21abe90f280bb205274e16b1d1e61c6ad4640b2453b7"}, + {file = "rpds_py-2026.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:cef8ac28d26f4dda3533060c20fbf80a325458fa9fd23ea72a73cdfa8e978838"}, + {file = "rpds_py-2026.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:eaaea962c68cdc68d4a533ba985ab8e9484277910bbfaa2ab3ef7732667bfed8"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:21942f52dbbd5f8758bf021213d28bd45c39e873e65e2407faf5f1846f5761ad"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f414556f6e3958300ff941e40c9f97e3dc9774ddd1b3434c475d73dd354bbed3"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1013a8625c74043210190b246f5b1551e09757c1f356c6e4160ef96c5bc081"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cc68e231a77a5f0d774ae278a1f8e55c0456501820847c1e4efb3829f3441df6"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9baffb505aff33acc69b422a19f77806680f3c8632227d79f48de8a810d1c2c5"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8d2f912928d426e8cfa396f7f3f8d29a59e6689c86dcca3c420730c1096322b"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f628283be835db980c941767d41c9a27b5239e54ba0a9c1335247e82406964"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:1ebb2f0ab7e16132995a72de805170e0203df0c3dd22e1ef1cd1fdd90bd7a131"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f3df3d16ded76f1f8c9cdebd0e1ea55fdf4c23b812de189814da7cf229c22a81"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9af8905b8f854990e40d5206aa5ac58d9b0fe0b7f351ff2bb086c20f6c8c6a47"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:036a36a87fb1cd3b214d11c4b3c4f7d2ddad933625dca1c900b56a057c07740a"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ae3853454fe9ef283a03c96c2d835d39e84b14643a9d62c82ef0fb87d702ca"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-win32.whl", hash = "sha256:6c3d771a46ec18b12af06ce36243a9a80b07a5d0515236332d90863ca8bb326a"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c93c629be4636cf54337bd5f06c104d55e42ced54d681f6fe21ae510a65116f6"}, + {file = "rpds_py-2026.5.1-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:3574b55c604b8f75dacb007136508bbc0db406e626301778096a133327e7f2fb"}, + {file = "rpds_py-2026.5.1-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:94068eb3ae6d43f5a786b7db96a406a34e6d5c24489feef32fd6e8946ea7b291"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a5b10e8ce894825f380a8f1b6444cf73c294dfea62afbb2d13e3a9e630cec1"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc09f82e63d4bcd58149572f857a431bae851dc747e313c3b5bdf7abb907fda8"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e10464d17df3b582745c25cec695cb9558bca2cb6ddb631aee1787fc72c767b2"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba05adbf15d994c38ec0b7ab32e858e5110c21e9009a00a86545fd220f84e038"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77c004fdc7b891967106f78ddfd7b076bfe6813c6139c6fff6aed3bcaa960b26"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:83bcf894486c9d78dd290d3c0124ff6dd8875d3025e2090a8ec49fcc37c55fdd"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3df104083952a0e0c6f10de33e440eabe98fb6317d23e1a58c68f6df08d01b9"}, + {file = "rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:980450826cf22e133c57e0835070bdd0dd3f73b9b708c3ce223def2cb9469e14"}, + {file = "rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_i686.whl", hash = "sha256:205dde846f24332ab0c1188699a043b8d165b79bb84529ce272c45048ff6be01"}, + {file = "rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:3966b82dd563176396df030f3dd52a6e54cb69b718e95e78bd555ed3d1e0185d"}, + {file = "rpds_py-2026.5.1-cp315-cp315-win32.whl", hash = "sha256:7818f8d0a415be74d2be3590b0a1c1f463a642f4d0217e7d10602dceef5b79aa"}, + {file = "rpds_py-2026.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:b3cc20c0d800af78fd0fac68086e28c1856cec51ea528bb81ea851aa40d39325"}, + {file = "rpds_py-2026.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:3609e9939a8a76cd904cf98a3f1f13b5dc7e150adeaee89e0ea09652ea213e16"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-macosx_10_12_x86_64.whl", hash = "sha256:5d333a7127d4b307601ac37792bee01bb95c867cbfacf21b6375b804d6bbd723"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-macosx_11_0_arm64.whl", hash = "sha256:b5f077b44a4f7808520f66dae234988d867deb9aed9be5da057ce9ba831b2a41"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d8f9b7b78c9538fc9e04e82ec0e888ff0c3cffcfad152c77e57cd09351a98a"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e3a8ae58895ac107ed934a6bf51e5846f95c53b9b940c2c6d310838fd5846358"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0957cf3c2b8632ec7aaebffebea8005b353cc2a237b6e2ae3c2cac0820704cfb"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c396c1304de421050b3681ea70f371874b54d41b0151e96109758144c231e30b"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad1bff7f666b9598e573815affd666aac6a13a585dde336f843e33350c7fadc"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_31_riscv64.whl", hash = "sha256:656a042550878f12d45752452d47094b7cfe5ad1e9d7b87b5a22ad3ae5ff8015"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c4bd4f70294737b5206a3e8e30ccadbf8a60301831c8ea23eec5dbeea1ecfa"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:43bca78665423cabae77146f2fe7ce55272b6c8d55d82cca83effd42c7e13972"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_i686.whl", hash = "sha256:42d0f20e85e549c870749d0e247f0c10d318a45b7e9676d575d2dcb04a1b2e66"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:b1be5c35683684d5331b93600c210e8367c254683d8a6df6bd21bd2da3a334fb"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-win32.whl", hash = "sha256:75808f6c38ce7749bb68cc2770161aae5045e6c6f6781a9782e74b93304399df"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:90bd6630002a1c7f09e7843dd79f0d24f3d2897cc25a753480917865d14f15b3"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:edf2765d84e42447f112ad877af8fe1db0089aaec5b28e88d6eab45e7fe99cea"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ad3773236e95f7f33991eb125224b7da66f206504d032a253a02da7e134519fb"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a04df86b3f0fade39ec8fd0e0aab089b1da9fbd2b48df778a57ef96f5e7d38df"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6142dbd80c4df62a5d899f0d616d417f84e0bc8d32526c8e5589019d75d028a7"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b35217adefe87f2fe4db7e9766cabe84744bfe9616d9667be18988928c7f2dc"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b95d5e11fc712b752081183a55a244c03cd00570489edd7014d8899f8ceb8162"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141c9498daf2ace9eda35d2b0e376f9ea8b058d84f2aef4f96fccfd449a2f251"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:6f249f8b860a200ad35193af961183ebe9132710484e6f6ce0cf89fd83c63a9a"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4abbf391a70be864920858bf360f4fb380577c9a0f732438a1996726e2c195b"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c74005a7bb87752acf351c93897ec63ad77a07a0da7ecad9c050e32e7286ba34"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:8213afbe8a3a906fb9acb2014423fe3359ee783d0bf90995f70623a3217bfa6c"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8c43a8a973270fd173bf48cdf80bbe66312421cba68d40845034f174f2389049"}, + {file = "rpds_py-2026.5.1.tar.gz", hash = "sha256:07b24fea40541e28570e5b795a4a38fbdcd12550c06bd0748005ecc8116ca256"}, +] + [[package]] name = "ruamel-yaml" version = "0.19.1" @@ -2816,26 +2826,26 @@ files = [ [[package]] name = "snowballstemmer" -version = "3.0.1" -description = "This package provides 32 stemmers for 30 languages generated from Snowball algorithms." +version = "3.1.0" +description = "This package provides 36 stemmers for 34 languages generated from Snowball algorithms." optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*" +python-versions = ">=3.3" groups = ["docs"] files = [ - {file = "snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064"}, - {file = "snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895"}, + {file = "snowballstemmer-3.1.0-py3-none-any.whl", hash = "sha256:17e6d1da216aa07db6dad37139ea70cf13c4b2e9a096f6e64a9648fc657d3154"}, + {file = "snowballstemmer-3.1.0.tar.gz", hash = "sha256:fd9e34526b23340cd23ffea6c9f9760974ecc2c2ac9e1d81401443ccdb2a801f"}, ] [[package]] name = "soupsieve" -version = "2.8.3" +version = "2.8.4" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.9" groups = ["docs"] files = [ - {file = "soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95"}, - {file = "soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349"}, + {file = "soupsieve-2.8.4-py3-none-any.whl", hash = "sha256:e7e6b0769c8f51ed59acab6e994b00621096cfb1c640a7509295987388fbaf65"}, + {file = "soupsieve-2.8.4.tar.gz", hash = "sha256:e121fd02e975c695e4e9e8774a5ee35d74714b59307868dcc5319ad2d9e3328e"}, ] [[package]] @@ -3273,21 +3283,21 @@ widechars = ["wcwidth"] [[package]] name = "tinycss2" -version = "1.4.0" +version = "1.5.1" description = "A tiny CSS parser" optional = false -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289"}, - {file = "tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7"}, + {file = "tinycss2-1.5.1-py3-none-any.whl", hash = "sha256:3415ba0f5839c062696996998176c4a3751d18b7edaaeeb658c9ce21ec150661"}, + {file = "tinycss2-1.5.1.tar.gz", hash = "sha256:d339d2b616ba90ccce58da8495a78f46e55d4d25f9fd71dfd526f07e7d53f957"}, ] [package.dependencies] webencodings = ">=0.4" [package.extras] -doc = ["sphinx", "sphinx_rtd_theme"] +doc = ["furo", "sphinx"] test = ["pytest", "ruff"] [[package]] @@ -3362,51 +3372,51 @@ files = [ [[package]] name = "tomlkit" -version = "0.14.0" +version = "0.15.0" description = "Style preserving TOML library" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680"}, - {file = "tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064"}, + {file = "tomlkit-0.15.0-py3-none-any.whl", hash = "sha256:4dbc8f0fc024412b57ced8757ac7461305126a648ff8c2c807fcb8e133a78738"}, + {file = "tomlkit-0.15.0.tar.gz", hash = "sha256:7d1a9ecba3086638211b13814ea79c90dd54dd11993564376f3aa92271f5c7a3"}, ] [[package]] name = "tornado" -version = "6.5.5" +version = "6.5.6" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." optional = false python-versions = ">=3.9" groups = ["dev", "docs"] files = [ - {file = "tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa"}, - {file = "tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521"}, - {file = "tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5"}, - {file = "tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07"}, - {file = "tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e"}, - {file = "tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca"}, - {file = "tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7"}, - {file = "tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b"}, - {file = "tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6"}, - {file = "tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9"}, + {file = "tornado-6.5.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:65fcfaafb079435c2c19dc9e07c0f1cf0fa9051759ed0a7d0a3ba7ea7f64919c"}, + {file = "tornado-6.5.6-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:38bc01b4acacded2de63ae78023548e41ebe6fbed3ec05a796d7ae3ad893887e"}, + {file = "tornado-6.5.6-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b942e6a137fda31ff54bf8e6e2c8d1c37f1f50583f3ed53fb840b53b9601d104"}, + {file = "tornado-6.5.6-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8666946e70171b8c3f1fc9b7876fac492e84822c4c7f3746f4e8f8bc9ac92a79"}, + {file = "tornado-6.5.6-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1c34cfab7ad6d104f052f55de06d39bbafc5885cfeb4da688803308dbcfa90b7"}, + {file = "tornado-6.5.6-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:385f35e4e22fb52551dfcda4cdc8c30c61c2c001aef5ddad99cdfe116952efd3"}, + {file = "tornado-6.5.6-cp39-abi3-win32.whl", hash = "sha256:db475f1b67b2809b10bb16264829087724ca8d24fe4ed47f7b8675cae453ef86"}, + {file = "tornado-6.5.6-cp39-abi3-win_amd64.whl", hash = "sha256:6739bf1e8eb09230f1280ddbd3236f0309db70f2c551a8dbc40f62babdf82f79"}, + {file = "tornado-6.5.6-cp39-abi3-win_arm64.whl", hash = "sha256:2543597b24a695d72338a9a77818362d72387c03ae173f1f169eadc5c91466ac"}, + {file = "tornado-6.5.6.tar.gz", hash = "sha256:9a365179fe8ff6b8766f602c0f67c185d778193e9bdd828b19f0b6ed7764177d"}, ] [[package]] name = "traitlets" -version = "5.14.3" +version = "5.15.0" description = "Traitlets Python configuration system" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev", "docs"] files = [ - {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, - {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, + {file = "traitlets-5.15.0-py3-none-any.whl", hash = "sha256:fb36a18867a6803deab09f3c5e0fa81bb7b26a5c9e82501c9933f759166eff40"}, + {file = "traitlets-5.15.0.tar.gz", hash = "sha256:4fead733f81cf1c4c938e06f8ca4633896833c9d89eff878159457f4d4392971"}, ] [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "mypy (>=1.7.0,<1.19) ; platform_python_implementation == \"PyPy\"", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] [[package]] name = "typing-extensions" @@ -3423,34 +3433,34 @@ markers = {dev = "python_version < \"3.12\"", test = "python_version == \"3.10\" [[package]] name = "typos" -version = "1.45.1" +version = "1.47.0" description = "Source Code Spelling Correction" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "typos-1.45.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f3cd3d7e7e35f971e04974c7b34563dc1efb101841be3a39fec36c51f3d6ca2d"}, - {file = "typos-1.45.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:be6f26c580915e63df107f88bc766f131efe5f7d01d41c7bad83e6f9e5fe42be"}, - {file = "typos-1.45.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cd6a6ccbb1fc4fb8f0d9fee0201642d7a7560bd1661ebbefb9eac2da1ae4a5c"}, - {file = "typos-1.45.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d33c7750a29524dff020a17f356ed079227f36f43ec57f193e9681606a35749b"}, - {file = "typos-1.45.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:745b0584eeead4593858671113fceed3c28b8ca67bdc7a517120127aa509c6a6"}, - {file = "typos-1.45.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e962d414fb92ad31dc4c930fc5d07ac9e4b55fdd4f42688468040fc5649d92da"}, - {file = "typos-1.45.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f39afdfcc2d159705f3ffb11162e13e8affd994d07836738c8d2a592194604ab"}, - {file = "typos-1.45.1-py3-none-win32.whl", hash = "sha256:212fdbb7b90d40522fe77efb69c15f7063c146812df01d5605e5d7816a3f37d3"}, - {file = "typos-1.45.1-py3-none-win_amd64.whl", hash = "sha256:67a56bd1f06184f3761883f4f75dd3cc196f939180de595d0980164d4a19d363"}, - {file = "typos-1.45.1.tar.gz", hash = "sha256:a1ac7ab02e74d4c4a2f8525b1529e1ce6261051df3229701836175fb91bb0583"}, + {file = "typos-1.47.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c4104785d237fd2256023ba4339e404ed2db58888af703eb0726a1441a8e85d7"}, + {file = "typos-1.47.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:cb95b6fde16fe5bab11788bc14d3d9ec49dcbeec9517378e2fca9e283e6b7822"}, + {file = "typos-1.47.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:653eba984d2cc55eb47d50771761bb7e0d6e52771c2489fd76b1f86fbed49a2a"}, + {file = "typos-1.47.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be447ce8400154e4ae515cc9ecef99532cee6b29271ba3adbe3487304cd2c3c2"}, + {file = "typos-1.47.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9856752e08face132c7d08de875567675f2c54e3e04096d6ebad09c6430e16f0"}, + {file = "typos-1.47.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:438b8579e440ff88baf51cb577b2eb4514d065509ba41a10981e1ea9048a519b"}, + {file = "typos-1.47.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:19cfe53c33ac7d0c5f029fb97939b082139b63fbbb88350ab7e6df28be31629e"}, + {file = "typos-1.47.0-py3-none-win32.whl", hash = "sha256:2de87acff0b6857ce693059a6291227eec999284e16a87162178c847236bafca"}, + {file = "typos-1.47.0-py3-none-win_amd64.whl", hash = "sha256:05c1547e3dbbb6fe8a861b56cb98e9922cd5d20170ee2e7e649faa1605dfdb49"}, + {file = "typos-1.47.0.tar.gz", hash = "sha256:f00d98b8338abd6016f968fb7a3911c911010c17c333c2e102e8893b1c97db8f"}, ] [[package]] name = "url-normalize" -version = "2.2.1" +version = "3.0.0" description = "URL normalization for Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["main"] files = [ - {file = "url_normalize-2.2.1-py3-none-any.whl", hash = "sha256:3deb687587dc91f7b25c9ae5162ffc0f057ae85d22b1e15cf5698311247f567b"}, - {file = "url_normalize-2.2.1.tar.gz", hash = "sha256:74a540a3b6eba1d95bdc610c24f2c0141639f3ba903501e61a52a8730247ff37"}, + {file = "url_normalize-3.0.0-py3-none-any.whl", hash = "sha256:95234bd359f86831c1fd87c248877f2a6887db2f3b5087120083f2fffcba4889"}, + {file = "url_normalize-3.0.0.tar.gz", hash = "sha256:0552cbf2831a32a28994a13d29bca58a60e10ff6c0380e343ec6d1c2a0d232d8"}, ] [package.dependencies] @@ -3461,14 +3471,14 @@ dev = ["mypy", "pre-commit", "pytest", "pytest-cov", "pytest-socket", "ruff"] [[package]] name = "urllib3" -version = "2.6.3" +version = "2.7.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main", "docs"] files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897"}, + {file = "urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c"}, ] [package.extras] @@ -3479,14 +3489,14 @@ zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] [[package]] name = "wcwidth" -version = "0.6.0" +version = "0.7.0" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad"}, - {file = "wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159"}, + {file = "wcwidth-0.7.0-py3-none-any.whl", hash = "sha256:5d69154c429a82910e241c738cd0e2976fac8a2dd47a1a805f4afed1c0f136f2"}, + {file = "wcwidth-0.7.0.tar.gz", hash = "sha256:90e3a7ea092341c44b99562e75d09e4d5160fe7a3974c6fb842a101a95e7eed0"}, ] [[package]] @@ -3503,24 +3513,24 @@ files = [ [[package]] name = "zipp" -version = "3.23.1" +version = "4.1.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "python_version < \"3.12\"" files = [ - {file = "zipp-3.23.1-py3-none-any.whl", hash = "sha256:0b3596c50a5c700c9cb40ba8d86d9f2cc4807e9bedb06bcdf7fac85633e444dc"}, - {file = "zipp-3.23.1.tar.gz", hash = "sha256:32120e378d32cd9714ad503c1d024619063ec28aad2248dc6672ad13edfa5110"}, + {file = "zipp-4.1.0-py3-none-any.whl", hash = "sha256:25ad4e16390cd314347dd8f1de67a2ac538ae658ed4ab9db16029c07c188e97f"}, + {file = "zipp-4.1.0.tar.gz", hash = "sha256:4cb57381f544315db7688e976e922a2b18cdb513d21cc194eb42232ba2a3e602"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.14)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] +enabler = ["pytest-enabler (>=3.4)"] test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] -type = ["pytest-mypy"] +type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [metadata] lock-version = "2.1" From 7569fff722c18e5d6e5d1d863ae062b7658d3ecd Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 10:38:38 +0200 Subject: [PATCH 77/89] chore(release): :bookmark: bump version to 0.10.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 794420b08..81044e45f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "roc-validator" -version = "0.9.0" +version = "0.10.0" description = "A Python package to validate RO-Crates" authors = [ "Marco Enrico Piras ", From e10c3b5228c1186cfd2e8b68ed745be7f418c0d7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 11:50:01 +0200 Subject: [PATCH 78/89] docs: :memo: add documentation for cache and offline mode features --- docs/0_toc.rst | 1 + docs/2_usage_cli.rst | 6 ++ docs/3_usage_api.rst | 6 ++ docs/5_offline_mode.rst | 154 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 167 insertions(+) create mode 100644 docs/5_offline_mode.rst diff --git a/docs/0_toc.rst b/docs/0_toc.rst index 7a6b1c48a..e397d56e5 100644 --- a/docs/0_toc.rst +++ b/docs/0_toc.rst @@ -21,6 +21,7 @@ 2_usage_cli 3_usage_api 4_how_it_works + 5_offline_mode .. toctree:: :maxdepth: 5 diff --git a/docs/2_usage_cli.rst b/docs/2_usage_cli.rst index 729cdcf5c..e12413beb 100644 --- a/docs/2_usage_cli.rst +++ b/docs/2_usage_cli.rst @@ -31,3 +31,9 @@ CLI Validation :parser: myst_parser.sphinx_ :start-line: 93 :end-line: 120 + +.. seealso:: + + To validate without network access and manage the HTTP cache from the + command line (the ``--offline`` and ``--no-cache`` flags and the ``cache`` + subcommand), see :ref:`offline_mode`. diff --git a/docs/3_usage_api.rst b/docs/3_usage_api.rst index bb1e9b660..d9026e752 100644 --- a/docs/3_usage_api.rst +++ b/docs/3_usage_api.rst @@ -30,6 +30,12 @@ Programmatic Validation :start-line: 121 :end-line: 162 +.. seealso:: + + To resolve resources from a local cache or run validation without network + access (the ``offline`` / ``no_cache`` settings of ``ValidationSettings``), + see :ref:`offline_mode`. + Metadata-only Validation ------------------------ diff --git a/docs/5_offline_mode.rst b/docs/5_offline_mode.rst new file mode 100644 index 000000000..b8e870a4b --- /dev/null +++ b/docs/5_offline_mode.rst @@ -0,0 +1,154 @@ +.. + Copyright (c) 2024 CRS4 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +.. _offline_mode: + +Offline Mode and HTTP Caching +============================= + +To resolve remote resources — JSON-LD ``@context`` documents, profile artifacts +and, optionally, remote RO-Crates — the validator performs HTTP requests. These +requests go through a **persistent HTTP cache**, which makes validation faster +and reproducible and enables an **offline mode** where requests are served +exclusively from the cache. + +This page covers how to use offline mode and manage the cache both from the +:ref:`command line ` and through the +:ref:`Python API `. + + +How caching works +----------------- + +Every HTTP-backed resource fetched during validation is stored in a persistent +cache (by default under the user cache directory, shared across runs). On the +first online validation against a profile, the resources it declares are cached +automatically, so a later run can reuse the same cache without any network +access. + +Offline mode (``--offline`` / ``offline=True``) forbids network access +altogether: every request must be satisfied by the cache, otherwise the affected +resource is reported as a cache miss. For this reason offline mode requires the +cache to be enabled and cannot be combined with the cache-disabling options. + + +.. _offline_mode_cli: + +Command-line usage +------------------ + +Offline validation +~~~~~~~~~~~~~~~~~~~ + +Pass ``--offline`` to the ``validate`` command to forbid any network access: +every HTTP request must then be satisfied by the cache. + +.. code-block:: bash + + rocrate-validator validate --offline path/to/ro-crate + +Related options: + +- ``--cache-path PATH`` — use a specific cache directory. By default a persistent + directory under the user cache dir is used, so entries are shared across runs. +- ``--cache-max-age SECONDS`` — maximum age of cached entries; ``-1`` (the + default) means entries never expire. +- ``--no-cache`` / ``-nc`` — disable the cache entirely: every request hits the + network and nothing is persisted. This flag is **mutually exclusive** with + ``--offline``, since offline mode needs the cache to serve requests. + +Managing the cache +~~~~~~~~~~~~~~~~~~ + +The ``cache`` subcommand inspects and manages the HTTP cache: + +.. code-block:: bash + + # Show the cache location, backend, size and offline status + rocrate-validator cache info + + # List cached entries (alias: `ls`); filter, sort or emit JSON + rocrate-validator cache list + rocrate-validator cache list --url w3id.org --sort size + rocrate-validator cache list --json + + # Remove every cached entry + rocrate-validator cache reset --yes + +Pre-populating the cache (warm-up) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Before going offline you can pre-fetch everything you will need with +``cache warm``: + +.. code-block:: bash + + # Warm the resources declared by every installed profile + rocrate-validator cache warm --all-profiles + + # Warm only specific profiles + rocrate-validator cache warm -p ro-crate-1.1 -p workflow-ro-crate-1.0 + + # Also fetch and cache remote RO-Crates or arbitrary URLs + rocrate-validator cache warm --crate https://example.org/crate.zip + rocrate-validator cache warm -u https://w3id.org/ro/crate/1.1/context + +When invoked without any source option, ``cache warm`` defaults to warming all +installed profiles. A summary table reports which URLs were cached, skipped or +failed; the command exits with a non-zero status if any URL fails. + + +.. _offline_mode_api: + +Programmatic usage +------------------ + +The same offline behaviour can be enabled programmatically through +``ValidationSettings``: + +.. code-block:: python + + from rocrate_validator import services, models + + settings = services.ValidationSettings( + rocrate_uri='/path/to/ro-crate', + profile_identifier='ro-crate-1.1', + # Serve every HTTP request from the cache; uncached resources fail. + offline=True, + # Optional: use a dedicated cache directory (defaults to the user cache). + # cache_path='/tmp/rocv-cache', + # Optional: maximum age of cached entries; -1 (default) = never expire. + # cache_max_age=-1, + ) + + result = services.validate(settings) + +The cache-related settings are: + +- ``offline`` (``bool``, default ``False``) — when ``True``, HTTP requests are + served only from the cache; uncached resources raise a cache-miss error. +- ``no_cache`` (``bool``, default ``False``) — disable the cache entirely. It is + **incompatible** with ``offline=True`` and raises ``ValueError`` if combined. +- ``cache_path`` (``Path``, optional) — cache directory; defaults to the + persistent user cache so online and offline runs share the same entries. +- ``cache_max_age`` (``int``, optional) — maximum entry age in seconds; ``-1`` + means entries never expire. + +When ``offline`` is ``False``, the resources declared by the selected profiles +are warmed up automatically before validation, so that a later offline run +reusing the same cache succeeds without network access. To pre-populate the cache +explicitly (e.g. in a CI pipeline), use the ``rocrate-validator cache warm`` +command described in :ref:`offline_mode_cli`. From c5bf1f56f176b5a1cc0235f51332cb27195b83e0 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 12:34:41 +0200 Subject: [PATCH 79/89] docs: :books: update copyright years and refactor acknowledgements section --- docs/0_toc.rst | 8 +++++++- docs/10_api.rst | 4 ++-- docs/1_installation.rst | 2 +- docs/2_usage_cli.rst | 2 +- docs/3_usage_api.rst | 2 +- docs/4_how_it_works.rst | 2 +- docs/5_offline_mode.rst | 2 +- docs/ack.rst | 23 ++++++----------------- docs/conf.py | 2 +- docs/index.rst | 2 +- 10 files changed, 22 insertions(+), 27 deletions(-) diff --git a/docs/0_toc.rst b/docs/0_toc.rst index e397d56e5..a1f299bb7 100644 --- a/docs/0_toc.rst +++ b/docs/0_toc.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,3 +31,9 @@ 11_writing_a_profile 10_api genindex + +.. toctree:: + :maxdepth: 1 + :caption: About + + ack diff --git a/docs/10_api.rst b/docs/10_api.rst index f327e407f..715d3ca41 100644 --- a/docs/10_api.rst +++ b/docs/10_api.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -135,7 +135,7 @@ Python Check API ====================== Requirement Class ----------- +----------------- .. autoclass:: rocrate_validator.requirements.python.PyRequirement :members: diff --git a/docs/1_installation.rst b/docs/1_installation.rst index c8be2703b..6a476edf9 100644 --- a/docs/1_installation.rst +++ b/docs/1_installation.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/docs/2_usage_cli.rst b/docs/2_usage_cli.rst index e12413beb..62951f5b1 100644 --- a/docs/2_usage_cli.rst +++ b/docs/2_usage_cli.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/docs/3_usage_api.rst b/docs/3_usage_api.rst index d9026e752..bc3f57c1d 100644 --- a/docs/3_usage_api.rst +++ b/docs/3_usage_api.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/docs/4_how_it_works.rst b/docs/4_how_it_works.rst index 51bd7e950..7801eaed1 100644 --- a/docs/4_how_it_works.rst +++ b/docs/4_how_it_works.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/docs/5_offline_mode.rst b/docs/5_offline_mode.rst index b8e870a4b..b65e6831f 100644 --- a/docs/5_offline_mode.rst +++ b/docs/5_offline_mode.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/docs/ack.rst b/docs/ack.rst index 3a66255d1..cc486da36 100644 --- a/docs/ack.rst +++ b/docs/ack.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,22 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. -.. _acknowledgements: - -.. toctree:: - :maxdepth: 5 - :caption: Contents: - Acknowledgements ================ -This work has been partially funded by the following sources: - -.. image:: https://raw.githubusercontent.com/crs4/rocrate-validator/develop/docs/img/eu-logo/EN_Co-fundedbytheEU_RGB_POS.png - :alt: Co-funded by the EU - :width: 250px - :align: right - -- the `BY-COVID `_ project (HORIZON Europe grant agreement number 101046203); -- the `LIFEMap `_ project, funded by the Italian Ministry of Health (Piano Operative Salute, Trajectory 3). - +.. Body sourced from the README so it stays in sync with it (the README +.. "## Acknowledgements" heading is skipped: the page title above replaces it). +.. include:: ../README.md + :parser: myst_parser.sphinx_ + :start-line: 182 diff --git a/docs/conf.py b/docs/conf.py index 965240cbf..55be6d2b4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,7 +31,7 @@ # Set project metadata project = 'rocrate-validator' -copyright = '2024, CRS4' +copyright = '2024-2026, CRS4' author = 'Marco Enrico Piras, Luca Pireddu, Simone Leo' release = __version__ diff --git a/docs/index.rst b/docs/index.rst index 21e58d0d8..143c4fdae 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From 15f2259eed7f393692408b360e6188cbd43083f4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 12:37:31 +0200 Subject: [PATCH 80/89] chore(docs): update configuration to suppress warnings --- docs/conf.py | 14 ++++++++++++++ docs/requirements.txt | 1 + 2 files changed, 15 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 55be6d2b4..872323e38 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -68,6 +68,20 @@ 'sphinx_copybutton', ] +# Only auto-generate section labels for the top two heading levels: deeper +# subsections (e.g. the repeated "SHACL checks" / "Python checks" headings) +# would otherwise produce duplicate-label warnings within the same document. +autosectionlabel_maxdepth = 2 + +# Warnings raised while embedding the Markdown README into the Sphinx pages. +# The README is the canonical GitHub document: its slices intentionally start +# below H1 (myst.header) and use GitHub-relative anchor links that span pages +# (myst.xref_missing). These are expected when including it here. +suppress_warnings = [ + 'myst.header', + 'myst.xref_missing', +] + templates_path = ['_templates'] # exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'experiments', 'ontologies', 'tests', 'logs', 'examples', 'debug'] # List of patterns, relative to source directory, that match files and diff --git a/docs/requirements.txt b/docs/requirements.txt index 22ad40616..8d92b0be1 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,3 +5,4 @@ enum-tools==0.12.0 sphinx-toolbox==3.8.1 myst-parser==4.0.0 sphinx_rtd_theme==3 +sphinx-copybutton==0.5.2 From af0fe9b89a7966fbf35e8ca192011ea685619b7e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 12:45:30 +0200 Subject: [PATCH 81/89] chore: :bookmark: update changelog (v0.10.0) --- CHANGELOG.md | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34299c010..2a97ae233 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,85 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.10.0] - 2026-06-01 + +Full changelog: https://github.com/crs4/rocrate-validator/compare/0.9.0...0.10.0 + +### ✨ Added + +- feat(cli): add offline mode with an `--offline` flag and an HTTP `cache` subcommand to validate RO-Crates without network access ([e296f10](https://github.com/crs4/rocrate-validator/commit/e296f10), [f8b99bc](https://github.com/crs4/rocrate-validator/commit/f8b99bc)) +- feat(utils): add a cache-aware JSON-LD document loader and HTTP cache warm-up from profile artifacts ([eedabf1](https://github.com/crs4/rocrate-validator/commit/eedabf1), [949ec6f](https://github.com/crs4/rocrate-validator/commit/949ec6f)) +- feat(utils): add offline mode and cache management to `HttpRequester`, with configurable cache path ([c92d88c](https://github.com/crs4/rocrate-validator/commit/c92d88c), [7c9e825](https://github.com/crs4/rocrate-validator/commit/7c9e825)) +- feat(core): support offline mode when downloading remote RO-Crates ([57eceb5](https://github.com/crs4/rocrate-validator/commit/57eceb5)) +- feat(cli/cache): add `cache list` (alias `ls`) and allow caching of explicit URLs via `cache warm` ([f9b43ae](https://github.com/crs4/rocrate-validator/commit/f9b43ae), [6925d55](https://github.com/crs4/rocrate-validator/commit/6925d55)) +- feat(cli): extend the `describe` command to describe individual requirement checks ([f3fb7f3](https://github.com/crs4/rocrate-validator/commit/f3fb7f3)) +- feat(model): add a `SourceSnippet` class and `RequirementCheck.get_source_snippet`, implemented for both SHACL and Python checks ([9f48674](https://github.com/crs4/rocrate-validator/commit/9f48674), [25bc7b4](https://github.com/crs4/rocrate-validator/commit/25bc7b4), [fe740e4](https://github.com/crs4/rocrate-validator/commit/fe740e4)) +- feat(checks): support a `deactivated` flag on `RequirementCheck` to override/deactivate checks by name for Python and SHACL ([dd84c32](https://github.com/crs4/rocrate-validator/commit/dd84c32), [4196f0e](https://github.com/crs4/rocrate-validator/commit/4196f0e)) +- feat(rocrate): add `check_availability()` with `AvailabilityStatus` on entities and granular remote-scheme classification ([76e92a4](https://github.com/crs4/rocrate-validator/commit/76e92a4), [469bbe5](https://github.com/crs4/rocrate-validator/commit/469bbe5)) +- feat(checks): handle `UNAUTHORIZED`/`UNCHECKABLE` web data entities as warnings ([0196dc9](https://github.com/crs4/rocrate-validator/commit/0196dc9)) +- feat(uri): add an `is_external_reference()` scheme detector ([62f89c0](https://github.com/crs4/rocrate-validator/commit/62f89c0)) +- feat(model): extend the `Profile` model to compute descendants ([a9783ce](https://github.com/crs4/rocrate-validator/commit/a9783ce)) +- ISA profile: add RDF classes for ISA types and a dedicated `Process` class for process checks ([3ff4245](https://github.com/crs4/rocrate-validator/commit/3ff4245), [e3fda59](https://github.com/crs4/rocrate-validator/commit/e3fda59), [c3e2d5f](https://github.com/crs4/rocrate-validator/commit/c3e2d5f)) + +### 🔧 Changed + +- refactor(models): introduce pre/post internal validation hooks on `Validator` ([e43364b](https://github.com/crs4/rocrate-validator/commit/e43364b)) +- refactor(SHACL): rewrite `build_node_subgraph` as an iterative BNode traversal ([23164b7](https://github.com/crs4/rocrate-validator/commit/23164b7)) +- refactor(errors): accept `str`, `Path` or `URI` in `ROCrateInvalidURIError` ([0b8289b](https://github.com/crs4/rocrate-validator/commit/0b8289b)) +- refactor(cli/cache): drop the `Status` column from the `cache list` table ([392df1a](https://github.com/crs4/rocrate-validator/commit/392df1a)) +- feat(constants): default the HTTP cache to never expire ([419fece](https://github.com/crs4/rocrate-validator/commit/419fece)) + +### 🐛 Fixed + +- fix(shacl): build property shape subgraphs by reachability and derive `NodeShape` level from nested `PropertyShape`s ([f1c0cfd](https://github.com/crs4/rocrate-validator/commit/f1c0cfd), [50448145](https://github.com/crs4/rocrate-validator/commit/5044814)) +- fix(shacl): evaluate inherited shapes for zero-shape target profiles ([bcb5cac](https://github.com/crs4/rocrate-validator/commit/bcb5cac)) +- fix(shacl): drop sub-threshold PySHACL violations at the source ([254fb88](https://github.com/crs4/rocrate-validator/commit/254fb88)) +- fix(core): filter failed requirements/checks by the configured severity ([a306f7f](https://github.com/crs4/rocrate-validator/commit/a306f7f)) +- fix(models): forward `extra_profiles_path` when computing validation statistics ([7b3e8bc](https://github.com/crs4/rocrate-validator/commit/7b3e8bc)) +- fix(uri): treat `file://` URIs with a non-local authority as remote ([63acb6a](https://github.com/crs4/rocrate-validator/commit/63acb6a)) +- fix(cli/cache): resolve profile tokens in `cache warm` and avoid `stream=True` when fetching remote crates ([7926832](https://github.com/crs4/rocrate-validator/commit/7926832), [33012b6](https://github.com/crs4/rocrate-validator/commit/33012b6)) +- fix(validation): report offline cache misses once per URL ([757b86a](https://github.com/crs4/rocrate-validator/commit/757b86a)) +- fix(ISA): correct the error message for a bad position in `HowToStep` ([03a32fc](https://github.com/crs4/rocrate-validator/commit/03a32fc)) + +### 📚 Documentation + +- docs: add a dedicated documentation page for the cache and offline mode features ([e10c3b5](https://github.com/crs4/rocrate-validator/commit/e10c3b5)) +- docs(profiles): document check override-by-name and deactivation ([0b6bff7](https://github.com/crs4/rocrate-validator/commit/0b6bff7)) + +## [0.9.0] - 2026-04-20 + +Full changelog: https://github.com/crs4/rocrate-validator/compare/0.8.1...0.9.0 + +### ✨ Added + +- feat(profiles/isa): add the ISA RO-Crate profile, with checks and tests for Investigation, Study, Assay, Process, Protocol, Sample, Data, Person and PropertyValue entities ([852fb23](https://github.com/crs4/rocrate-validator/commit/852fb23), [d62e214](https://github.com/crs4/rocrate-validator/commit/d62e214), [727b6f0](https://github.com/crs4/rocrate-validator/commit/727b6f0)) +- feat(cli): add CLI options to configure the HTTP cache (`--cache-path`, `--cache-max-age`) ([564230f](https://github.com/crs4/rocrate-validator/commit/564230f)) +- feat(model): enable cache configuration in `ValidationSettings` ([b2b47ba](https://github.com/crs4/rocrate-validator/commit/b2b47ba)) +- feat(utils): extend the `HttpRequester` constructor to support cache configuration parameters ([2f2a873](https://github.com/crs4/rocrate-validator/commit/2f2a873)) +- feat(ro-crate): refine the constraint enforcing metadata descriptor existence ([2c6ea76](https://github.com/crs4/rocrate-validator/commit/2c6ea76)) +- feat(file-descriptor): add an internal remote-context retrieval method supporting the alternate `Link` header ([f8b0e55](https://github.com/crs4/rocrate-validator/commit/f8b0e55)) + +### 🔧 Changed + +- refactor(ro-crate): relax the `ROCrateMetadataFileDescriptor` class definition ([61ddbb5](https://github.com/crs4/rocrate-validator/commit/61ddbb5)) +- refactor(file-descriptor): route checks through the new remote-context retrieval method ([0ce2619](https://github.com/crs4/rocrate-validator/commit/0ce2619)) +- chore(utils): increase the session cache max age to 300 seconds ([36ca0ac](https://github.com/crs4/rocrate-validator/commit/36ca0ac)) +- ci(gh-actions): update outdated GitHub Actions ([d565c5d](https://github.com/crs4/rocrate-validator/commit/d565c5d)) + +### 🐛 Fixed + +- fix(ro-crate): target metadata descriptor shapes by class and select the candidate descriptor via SPARQL ([8219f27](https://github.com/crs4/rocrate-validator/commit/8219f27), [39bd761](https://github.com/crs4/rocrate-validator/commit/39bd761), [1a91aa4](https://github.com/crs4/rocrate-validator/commit/1a91aa4)) +- fix(shacl): extract `@base` from the JSON-LD document for ontology parsing ([57f5c54](https://github.com/crs4/rocrate-validator/commit/57f5c54)) +- fix(SHACL-core): improve SHACL violation parsing with better error handling ([90a9f06](https://github.com/crs4/rocrate-validator/commit/90a9f06)) +- fix(file-descriptor): accept `application/json` and treat the `Link` header case-insensitively for remote context retrieval ([fe5ba1c](https://github.com/crs4/rocrate-validator/commit/fe5ba1c)) +- fix(file-descriptor): refine the compacted JSON-LD key validation logic ([45a7017](https://github.com/crs4/rocrate-validator/commit/45a7017)) +- fix(core): allow terms defined by context prefixes ([5fe8171](https://github.com/crs4/rocrate-validator/commit/5fe8171)) +- fix(core): fix output formatting ([523fbf4](https://github.com/crs4/rocrate-validator/commit/523fbf4)) + +### 📚 Documentation + +- docs(cli): document the `-1` value for no cache expiration in the `--cache-max-age` help ([c5848bc](https://github.com/crs4/rocrate-validator/commit/c5848bc)) + ## [0.8.1] - 2026-02-18 Full changelog: https://github.com/crs4/rocrate-validator/compare/0.8.0...0.8.1 From 46b74d75878d4b7de601141ab589e2f164cc9886 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 16:40:35 +0200 Subject: [PATCH 82/89] chore(typos): :wrench: exclude hexadecimal identifiers from spell checking --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 81044e45f..277814026 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,5 +115,8 @@ filterwarnings = [ "ignore::DeprecationWarning:rdflib.plugins.parsers.jsonld", ] +[tool.typos.default] +extend-ignore-re = ["[0-9a-f]{7,40}"] # Ignore long hexadecimal strings, which are often used as identifiers (e.g., Git commit hashes, UUIDs) and are not typically misspelled words. + [tool.typos.files] extend-exclude = ["tests/data", "docs/diagrams", "*.json", "*.html", "*__init__.py"] From 5847d4ac6001f0058e30aa59204a6d67ee0b916a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 16:55:50 +0200 Subject: [PATCH 83/89] chore(release): :arrow_up: update dependencies in `poetry.lock` file --- poetry.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 202f58df2..ef545a0f4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.4.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -1028,7 +1028,7 @@ pfzy = ">=0.3.1,<0.4.0" prompt-toolkit = ">=3.0.1,<4.0.0" [package.extras] -docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] [[package]] name = "ipykernel" @@ -1235,7 +1235,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.03.6" +jsonschema-specifications = ">=2023.3.6" referencing = ">=0.28.4" rpds-py = ">=0.25.0" @@ -1828,7 +1828,7 @@ files = [ ] [package.extras] -docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] [[package]] name = "platformdirs" @@ -2023,8 +2023,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, - {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, {version = ">=0.3.6", markers = "python_version == \"3.11\""}, + {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, ] isort = ">=4.2.5,<5.13 || >5.13,<7" mccabe = ">=0.6,<0.8" From ded02794e84a908b036438bae86eb47a37a01e7c Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 1 Jun 2026 17:09:05 +0200 Subject: [PATCH 84/89] =?UTF-8?q?=20chore:=20=F0=9F=94=A7=20update=20obsol?= =?UTF-8?q?ete=20GitHub=20actions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 🚀 crate-ci/typos: v1.41.0 → v1.47.0 - 📦 actions/upload-artifact: v4 → v7 - 📥 actions/download-artifact: v4 → v8 - 🖊️ sigstore/gh-action-sigstore-python: v3.0.0 → v3.3.0 --- .github/workflows/release.yaml | 16 ++++++++-------- .github/workflows/testing.yaml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ccf71abee..5b089fbcc 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -92,7 +92,7 @@ jobs: - name: 🏗️ Build a binary wheel and a source tarball run: poetry build - name: 📦 Store the distribution packages - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: python-package-distributions path: | @@ -111,7 +111,7 @@ jobs: id-token: write # IMPORTANT: mandatory for trusted publishing steps: - name: ⬇️ Download all the distribution packages - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ @@ -132,7 +132,7 @@ jobs: id-token: write # IMPORTANT: mandatory for trusted publishing steps: - name: ⬇️ Download all the dists - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ @@ -151,18 +151,18 @@ jobs: steps: - name: ⬇️ Download all the distribution packages - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ - name: 🖊️ Sign the dists with Sigstore - uses: sigstore/gh-action-sigstore-python@v3.0.0 + uses: sigstore/gh-action-sigstore-python@v3.3.0 with: inputs: >- ./dist/*.tar.gz ./dist/*.whl - name: 📦 Store the signed distribution packages - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: python-package-signatures path: dist/*.json @@ -177,12 +177,12 @@ jobs: id-token: write # IMPORTANT: mandatory for sigstore steps: - name: ⬇️ Download all the distribution packages - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ - name: ⬇️ Download all the distribution signatures - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-signatures path: dist/ diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index 97b68d503..dd1c3c137 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -56,7 +56,7 @@ jobs: - name: ⌛ Lint Python code run: flake8 -v rocrate_validator tests - name: ⌛ Spell check code and profiles (covers Python and SHACL) - uses: crate-ci/typos@v1.41.0 + uses: crate-ci/typos@v1.47.0 # Runs the tests test: From 9a82482dfc2f3a74a811ed2ca02434a15fce90e4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 9 Jun 2026 12:40:51 +0200 Subject: [PATCH 85/89] refactor(http): extract session teardown into _close_session helper --- rocrate_validator/utils/http.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index a5e1d71f8..be4abeee8 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -357,8 +357,19 @@ def initialize_cache(cls, :param no_cache: When ``True``, disable the HTTP cache entirely and use a plain ``requests.Session``. Incompatible with ``offline``. """ - return cls(cache_max_age=cache_max_age, cache_path=cache_path, - offline=offline, no_cache=no_cache) + def _close_session(self) -> None: + """Close the current session and remove its cache file if it is temporary.""" + session = getattr(self, "session", None) + if session is not None and hasattr(session, "close"): + try: + session.close() + except Exception as e: + logger.debug("Error closing previous session: %s", e) + if getattr(self, "permanent_cache", True) is False: + try: + self.cleanup() + except Exception as e: + logger.debug("Error cleaning up previous cache: %s", e) @classmethod def reset(cls) -> None: @@ -369,17 +380,7 @@ def reset(cls) -> None: with cls._lock: instance = cls._instance if instance is not None: - try: - session = getattr(instance, "session", None) - if session is not None and hasattr(session, "close"): - session.close() - except Exception as e: - logger.debug("Error closing previous session: %s", e) - if getattr(instance, "permanent_cache", True) is False: - try: - instance.cleanup() - except Exception as e: - logger.debug("Error cleaning up previous cache: %s", e) + instance._close_session() cls._instance = None From 80b15c952270a49dc596fd37f3635226c4eb4d2b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 9 Jun 2026 12:47:09 +0200 Subject: [PATCH 86/89] feat(http): reconfigure existing HttpRequester singleton instead of recreating it --- rocrate_validator/utils/http.py | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index be4abeee8..b239a3a1a 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -357,6 +357,18 @@ def initialize_cache(cls, :param no_cache: When ``True``, disable the HTTP cache entirely and use a plain ``requests.Session``. Incompatible with ``offline``. """ + with cls._lock: + instance = cls._instance + if instance is None: + return cls(cache_max_age=cache_max_age, cache_path=cache_path, + offline=offline, no_cache=no_cache) + # Re-apply the configuration without recreating the instance: + # we keep the same singleton in place and only rebuild its underlying session, + # rather than dropping and recreating the object (as ``reset`` does). + instance._reconfigure(cache_max_age=cache_max_age, cache_path=cache_path, + offline=offline, no_cache=no_cache) + return instance + def _close_session(self) -> None: """Close the current session and remove its cache file if it is temporary.""" session = getattr(self, "session", None) @@ -371,6 +383,30 @@ def _close_session(self) -> None: except Exception as e: logger.debug("Error cleaning up previous cache: %s", e) + def _reconfigure(self, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: Optional[str] = None, + offline: bool = False, + no_cache: bool = False) -> None: + """ + Rebuild the underlying session with new cache settings while preserving + the singleton instance (and any attributes set on it, e.g. test patches). + """ + with self._lock: + self._close_session() + try: + self.cache_max_age = int(cache_max_age) + except ValueError: + raise TypeError("cache_max_age must be an integer") + self.cache_path_prefix = cache_path + self.offline = bool(offline) + self.no_cache = bool(no_cache) + self.permanent_cache = cache_path is not None + # ``__initialize_session__`` asserts the instance is not yet initialized. + self._initialized = False + self.__initialize_session__(cache_max_age, cache_path) + self._initialized = True + @classmethod def reset(cls) -> None: """ From c9b4cc2f190573ddecca65df90f8980fa625f6eb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 9 Jun 2026 12:48:11 +0200 Subject: [PATCH 87/89] fix(http): resolve session method lazily in HTTP wrapper --- rocrate_validator/utils/http.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index b239a3a1a..fa825bfa5 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -232,9 +232,13 @@ def __getattr__(self, name): """ if name.upper() in {"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH"}: method = name.lower() - session_method = getattr(self.session, method) def _wrapped(url, *args, **kwargs): + # Resolve the session method lazily, at call time, so the wrapper + # always targets the current session. This keeps the wrapper valid + # after the session is rebuilt in place (see ``_reconfigure``) and + # avoids holding a reference to a closed session. + session_method = getattr(self.session, method) response = session_method(url, *args, **kwargs) _log_cache_outcome(method.upper(), url, response, offline=self.offline) return response From 38482fc6a611721e4e74cc408d86ed41b83c3eed Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 9 Jun 2026 12:49:55 +0200 Subject: [PATCH 88/89] fix(models): reconfigure HTTP requester in place instead of resetting it --- rocrate_validator/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index 08e44d707..cb69fe77d 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -2699,9 +2699,9 @@ def __post_init__(self): "Offline mode enabled without a persistent cache path: " "all HTTP-backed resources will fail unless pre-populated." ) - # Reset any previously initialized singleton so new settings take effect. - HttpRequester.reset() - # initialize the HTTP cache + # Re-apply the cache settings to the HTTP requester. ``initialize_cache`` + # reconfigures the existing singleton in place (rather than dropping it), + # so new settings take effect without discarding state set on the instance. HttpRequester.initialize_cache( cache_path=str(self.cache_path) if self.cache_path is not None else None, cache_max_age=self.cache_max_age, From 877ea8d98cadf3f43827cfe454ec306f88761708 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 9 Jun 2026 16:16:43 +0200 Subject: [PATCH 89/89] test(http): add unit tests --- tests/unit/test_http_requester_reconfigure.py | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 tests/unit/test_http_requester_reconfigure.py diff --git a/tests/unit/test_http_requester_reconfigure.py b/tests/unit/test_http_requester_reconfigure.py new file mode 100644 index 000000000..46e05a184 --- /dev/null +++ b/tests/unit/test_http_requester_reconfigure.py @@ -0,0 +1,154 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from rocrate_validator.utils.http import HttpRequester + + +@pytest.fixture(autouse=True) +def _reset_singleton(): + HttpRequester.reset() + yield + HttpRequester.reset() + + +def _initialize(cache_path, offline=False, cache_max_age=-1): + return HttpRequester.initialize_cache( + cache_path=str(cache_path), + cache_max_age=cache_max_age, + offline=offline, + ) + + +def _fake_session(status_code=200): + """A session-like mock whose ``get`` returns a sentinel response.""" + session = MagicMock() + response = MagicMock(status_code=status_code, from_cache=False) + session.get.return_value = response + return session, response + + +def test_initialize_cache_creates_instance_when_absent(tmp_path): + assert HttpRequester._instance is None + requester = _initialize(tmp_path / "cache") + assert isinstance(requester, HttpRequester) + assert HttpRequester._instance is requester + + +def test_initialize_cache_reuses_existing_instance(tmp_path): + first = _initialize(tmp_path / "cache-1") + second = _initialize(tmp_path / "cache-2") + # The singleton is reconfigured in place rather than recreated. + assert second is first + + +def test_reconfigure_applies_new_settings(tmp_path): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + assert requester.offline is False + + same = _initialize(tmp_path / "cache", offline=True, cache_max_age=-1) + assert same is requester + assert same.offline is True + # Offline mode is enforced on the freshly rebuilt session. + assert getattr(same.session.settings, "only_if_cached", False) is True + + +def test_reconfigure_rebuilds_underlying_session(tmp_path): + requester = _initialize(tmp_path / "cache-1", cache_max_age=60) + old_session = requester.session + _initialize(tmp_path / "cache-2", cache_max_age=60) + assert requester.session is not old_session + + +def test_reconfigure_preserves_instance_attributes(tmp_path): + """Regression: reconfiguring the cache must not discard state set on the + singleton (e.g. methods patched by tests).""" + requester = _initialize(tmp_path / "cache-1", cache_max_age=60) + sentinel = object() + requester.custom_marker = sentinel + + _initialize(tmp_path / "cache-2", cache_max_age=60) + + assert requester.custom_marker is sentinel + + +def test_method_wrapper_targets_current_session(tmp_path): + """The ``__getattr__`` HTTP wrappers resolve the session at call time, so a + wrapper obtained before a session swap still hits the live session.""" + requester = _initialize(tmp_path / "cache", cache_max_age=60) + + first_session, _ = _fake_session() + requester.session = first_session + wrapper = requester.get # captured before swapping the session + + second_session, expected = _fake_session(status_code=201) + requester.session = second_session + + result = wrapper("https://example.org/x") + + assert result is expected + second_session.get.assert_called_once() + first_session.get.assert_not_called() + + +def test_pinned_wrapper_survives_reconfigure(tmp_path): + """Mimics how ``pytest.monkeypatch`` teardown leaves a method wrapper pinned + as an instance attribute: after a reconfigure rebuilds the session, that + wrapper must still target the live session, not a closed one.""" + requester = _initialize(tmp_path / "cache-1", cache_max_age=60) + requester.get = requester.get # pin the wrapper as an instance attribute + + _initialize(tmp_path / "cache-2", cache_max_age=60) # rebuilds the session + + mock_session, expected = _fake_session() + requester.session = mock_session + + result = requester.get("https://example.org/x") + + assert result is expected + mock_session.get.assert_called_once() + + +def test_reset_drops_instance(tmp_path): + requester = _initialize(tmp_path / "cache", cache_max_age=60) + HttpRequester.reset() + assert HttpRequester._instance is None + # A subsequent initialization yields a brand-new instance. + assert _initialize(tmp_path / "cache", cache_max_age=60) is not requester + + +def test_validation_settings_preserves_singleton(tmp_path): + """Constructing ``ValidationSettings`` reconfigures the cache in place and + must not drop the existing requester (nor any state held on it).""" + from rocrate_validator.models import ValidationSettings + from rocrate_validator.utils.uri import URI + + requester = _initialize(tmp_path / "cache", cache_max_age=60) + marker = object() + requester.custom_marker = marker + + # ``offline=True`` keeps the construction self-contained (no warm-up/network). + ValidationSettings( + rocrate_uri=URI("."), + offline=True, + cache_path=tmp_path / "cache", + ) + + assert HttpRequester._instance is requester + assert requester.custom_marker is marker