diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ccf71abee..5b089fbcc 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -92,7 +92,7 @@ jobs: - name: ๐Ÿ—๏ธ Build a binary wheel and a source tarball run: poetry build - name: ๐Ÿ“ฆ Store the distribution packages - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: python-package-distributions path: | @@ -111,7 +111,7 @@ jobs: id-token: write # IMPORTANT: mandatory for trusted publishing steps: - name: โฌ‡๏ธ Download all the distribution packages - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ @@ -132,7 +132,7 @@ jobs: id-token: write # IMPORTANT: mandatory for trusted publishing steps: - name: โฌ‡๏ธ Download all the dists - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ @@ -151,18 +151,18 @@ jobs: steps: - name: โฌ‡๏ธ Download all the distribution packages - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ - name: ๐Ÿ–Š๏ธ Sign the dists with Sigstore - uses: sigstore/gh-action-sigstore-python@v3.0.0 + uses: sigstore/gh-action-sigstore-python@v3.3.0 with: inputs: >- ./dist/*.tar.gz ./dist/*.whl - name: ๐Ÿ“ฆ Store the signed distribution packages - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: python-package-signatures path: dist/*.json @@ -177,12 +177,12 @@ jobs: id-token: write # IMPORTANT: mandatory for sigstore steps: - name: โฌ‡๏ธ Download all the distribution packages - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ - name: โฌ‡๏ธ Download all the distribution signatures - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: python-package-signatures path: dist/ diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index 97b68d503..dd1c3c137 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -56,7 +56,7 @@ jobs: - name: โŒ› Lint Python code run: flake8 -v rocrate_validator tests - name: โŒ› Spell check code and profiles (covers Python and SHACL) - uses: crate-ci/typos@v1.41.0 + uses: crate-ci/typos@v1.47.0 # Runs the tests test: diff --git a/CHANGELOG.md b/CHANGELOG.md index 34299c010..2a97ae233 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,85 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.10.0] - 2026-06-01 + +Full changelog: https://github.com/crs4/rocrate-validator/compare/0.9.0...0.10.0 + +### โœจ Added + +- feat(cli): add offline mode with an `--offline` flag and an HTTP `cache` subcommand to validate RO-Crates without network access ([e296f10](https://github.com/crs4/rocrate-validator/commit/e296f10), [f8b99bc](https://github.com/crs4/rocrate-validator/commit/f8b99bc)) +- feat(utils): add a cache-aware JSON-LD document loader and HTTP cache warm-up from profile artifacts ([eedabf1](https://github.com/crs4/rocrate-validator/commit/eedabf1), [949ec6f](https://github.com/crs4/rocrate-validator/commit/949ec6f)) +- feat(utils): add offline mode and cache management to `HttpRequester`, with configurable cache path ([c92d88c](https://github.com/crs4/rocrate-validator/commit/c92d88c), [7c9e825](https://github.com/crs4/rocrate-validator/commit/7c9e825)) +- feat(core): support offline mode when downloading remote RO-Crates ([57eceb5](https://github.com/crs4/rocrate-validator/commit/57eceb5)) +- feat(cli/cache): add `cache list` (alias `ls`) and allow caching of explicit URLs via `cache warm` ([f9b43ae](https://github.com/crs4/rocrate-validator/commit/f9b43ae), [6925d55](https://github.com/crs4/rocrate-validator/commit/6925d55)) +- feat(cli): extend the `describe` command to describe individual requirement checks ([f3fb7f3](https://github.com/crs4/rocrate-validator/commit/f3fb7f3)) +- feat(model): add a `SourceSnippet` class and `RequirementCheck.get_source_snippet`, implemented for both SHACL and Python checks ([9f48674](https://github.com/crs4/rocrate-validator/commit/9f48674), [25bc7b4](https://github.com/crs4/rocrate-validator/commit/25bc7b4), [fe740e4](https://github.com/crs4/rocrate-validator/commit/fe740e4)) +- feat(checks): support a `deactivated` flag on `RequirementCheck` to override/deactivate checks by name for Python and SHACL ([dd84c32](https://github.com/crs4/rocrate-validator/commit/dd84c32), [4196f0e](https://github.com/crs4/rocrate-validator/commit/4196f0e)) +- feat(rocrate): add `check_availability()` with `AvailabilityStatus` on entities and granular remote-scheme classification ([76e92a4](https://github.com/crs4/rocrate-validator/commit/76e92a4), [469bbe5](https://github.com/crs4/rocrate-validator/commit/469bbe5)) +- feat(checks): handle `UNAUTHORIZED`/`UNCHECKABLE` web data entities as warnings ([0196dc9](https://github.com/crs4/rocrate-validator/commit/0196dc9)) +- feat(uri): add an `is_external_reference()` scheme detector ([62f89c0](https://github.com/crs4/rocrate-validator/commit/62f89c0)) +- feat(model): extend the `Profile` model to compute descendants ([a9783ce](https://github.com/crs4/rocrate-validator/commit/a9783ce)) +- ISA profile: add RDF classes for ISA types and a dedicated `Process` class for process checks ([3ff4245](https://github.com/crs4/rocrate-validator/commit/3ff4245), [e3fda59](https://github.com/crs4/rocrate-validator/commit/e3fda59), [c3e2d5f](https://github.com/crs4/rocrate-validator/commit/c3e2d5f)) + +### ๐Ÿ”ง Changed + +- refactor(models): introduce pre/post internal validation hooks on `Validator` ([e43364b](https://github.com/crs4/rocrate-validator/commit/e43364b)) +- refactor(SHACL): rewrite `build_node_subgraph` as an iterative BNode traversal ([23164b7](https://github.com/crs4/rocrate-validator/commit/23164b7)) +- refactor(errors): accept `str`, `Path` or `URI` in `ROCrateInvalidURIError` ([0b8289b](https://github.com/crs4/rocrate-validator/commit/0b8289b)) +- refactor(cli/cache): drop the `Status` column from the `cache list` table ([392df1a](https://github.com/crs4/rocrate-validator/commit/392df1a)) +- feat(constants): default the HTTP cache to never expire ([419fece](https://github.com/crs4/rocrate-validator/commit/419fece)) + +### ๐Ÿ› Fixed + +- fix(shacl): build property shape subgraphs by reachability and derive `NodeShape` level from nested `PropertyShape`s ([f1c0cfd](https://github.com/crs4/rocrate-validator/commit/f1c0cfd), [50448145](https://github.com/crs4/rocrate-validator/commit/5044814)) +- fix(shacl): evaluate inherited shapes for zero-shape target profiles ([bcb5cac](https://github.com/crs4/rocrate-validator/commit/bcb5cac)) +- fix(shacl): drop sub-threshold PySHACL violations at the source ([254fb88](https://github.com/crs4/rocrate-validator/commit/254fb88)) +- fix(core): filter failed requirements/checks by the configured severity ([a306f7f](https://github.com/crs4/rocrate-validator/commit/a306f7f)) +- fix(models): forward `extra_profiles_path` when computing validation statistics ([7b3e8bc](https://github.com/crs4/rocrate-validator/commit/7b3e8bc)) +- fix(uri): treat `file://` URIs with a non-local authority as remote ([63acb6a](https://github.com/crs4/rocrate-validator/commit/63acb6a)) +- fix(cli/cache): resolve profile tokens in `cache warm` and avoid `stream=True` when fetching remote crates ([7926832](https://github.com/crs4/rocrate-validator/commit/7926832), [33012b6](https://github.com/crs4/rocrate-validator/commit/33012b6)) +- fix(validation): report offline cache misses once per URL ([757b86a](https://github.com/crs4/rocrate-validator/commit/757b86a)) +- fix(ISA): correct the error message for a bad position in `HowToStep` ([03a32fc](https://github.com/crs4/rocrate-validator/commit/03a32fc)) + +### ๐Ÿ“š Documentation + +- docs: add a dedicated documentation page for the cache and offline mode features ([e10c3b5](https://github.com/crs4/rocrate-validator/commit/e10c3b5)) +- docs(profiles): document check override-by-name and deactivation ([0b6bff7](https://github.com/crs4/rocrate-validator/commit/0b6bff7)) + +## [0.9.0] - 2026-04-20 + +Full changelog: https://github.com/crs4/rocrate-validator/compare/0.8.1...0.9.0 + +### โœจ Added + +- feat(profiles/isa): add the ISA RO-Crate profile, with checks and tests for Investigation, Study, Assay, Process, Protocol, Sample, Data, Person and PropertyValue entities ([852fb23](https://github.com/crs4/rocrate-validator/commit/852fb23), [d62e214](https://github.com/crs4/rocrate-validator/commit/d62e214), [727b6f0](https://github.com/crs4/rocrate-validator/commit/727b6f0)) +- feat(cli): add CLI options to configure the HTTP cache (`--cache-path`, `--cache-max-age`) ([564230f](https://github.com/crs4/rocrate-validator/commit/564230f)) +- feat(model): enable cache configuration in `ValidationSettings` ([b2b47ba](https://github.com/crs4/rocrate-validator/commit/b2b47ba)) +- feat(utils): extend the `HttpRequester` constructor to support cache configuration parameters ([2f2a873](https://github.com/crs4/rocrate-validator/commit/2f2a873)) +- feat(ro-crate): refine the constraint enforcing metadata descriptor existence ([2c6ea76](https://github.com/crs4/rocrate-validator/commit/2c6ea76)) +- feat(file-descriptor): add an internal remote-context retrieval method supporting the alternate `Link` header ([f8b0e55](https://github.com/crs4/rocrate-validator/commit/f8b0e55)) + +### ๐Ÿ”ง Changed + +- refactor(ro-crate): relax the `ROCrateMetadataFileDescriptor` class definition ([61ddbb5](https://github.com/crs4/rocrate-validator/commit/61ddbb5)) +- refactor(file-descriptor): route checks through the new remote-context retrieval method ([0ce2619](https://github.com/crs4/rocrate-validator/commit/0ce2619)) +- chore(utils): increase the session cache max age to 300 seconds ([36ca0ac](https://github.com/crs4/rocrate-validator/commit/36ca0ac)) +- ci(gh-actions): update outdated GitHub Actions ([d565c5d](https://github.com/crs4/rocrate-validator/commit/d565c5d)) + +### ๐Ÿ› Fixed + +- fix(ro-crate): target metadata descriptor shapes by class and select the candidate descriptor via SPARQL ([8219f27](https://github.com/crs4/rocrate-validator/commit/8219f27), [39bd761](https://github.com/crs4/rocrate-validator/commit/39bd761), [1a91aa4](https://github.com/crs4/rocrate-validator/commit/1a91aa4)) +- fix(shacl): extract `@base` from the JSON-LD document for ontology parsing ([57f5c54](https://github.com/crs4/rocrate-validator/commit/57f5c54)) +- fix(SHACL-core): improve SHACL violation parsing with better error handling ([90a9f06](https://github.com/crs4/rocrate-validator/commit/90a9f06)) +- fix(file-descriptor): accept `application/json` and treat the `Link` header case-insensitively for remote context retrieval ([fe5ba1c](https://github.com/crs4/rocrate-validator/commit/fe5ba1c)) +- fix(file-descriptor): refine the compacted JSON-LD key validation logic ([45a7017](https://github.com/crs4/rocrate-validator/commit/45a7017)) +- fix(core): allow terms defined by context prefixes ([5fe8171](https://github.com/crs4/rocrate-validator/commit/5fe8171)) +- fix(core): fix output formatting ([523fbf4](https://github.com/crs4/rocrate-validator/commit/523fbf4)) + +### ๐Ÿ“š Documentation + +- docs(cli): document the `-1` value for no cache expiration in the `--cache-max-age` help ([c5848bc](https://github.com/crs4/rocrate-validator/commit/c5848bc)) + ## [0.8.1] - 2026-02-18 Full changelog: https://github.com/crs4/rocrate-validator/compare/0.8.0...0.8.1 diff --git a/docs/0_toc.rst b/docs/0_toc.rst index 7a6b1c48a..a1f299bb7 100644 --- a/docs/0_toc.rst +++ b/docs/0_toc.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ 2_usage_cli 3_usage_api 4_how_it_works + 5_offline_mode .. toctree:: :maxdepth: 5 @@ -30,3 +31,9 @@ 11_writing_a_profile 10_api genindex + +.. toctree:: + :maxdepth: 1 + :caption: About + + ack diff --git a/docs/10_api.rst b/docs/10_api.rst index f327e407f..715d3ca41 100644 --- a/docs/10_api.rst +++ b/docs/10_api.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -135,7 +135,7 @@ Python Check API ====================== Requirement Class ----------- +----------------- .. autoclass:: rocrate_validator.requirements.python.PyRequirement :members: diff --git a/docs/11_writing_a_profile.rst b/docs/11_writing_a_profile.rst index e46873509..20a27b03e 100644 --- a/docs/11_writing_a_profile.rst +++ b/docs/11_writing_a_profile.rst @@ -94,6 +94,154 @@ These instructions assume you are familiar with code development using Python an #. When your profile & tests are written, open a pull request to contribute it back to the repository! +Overriding inherited checks +--------------------------- + +When a profile inherits from another profile (via ``prof:isProfileOf`` / +``prof:isTransitiveProfileOf``), it automatically receives every check +declared by its ancestors. The validator additionally supports +**override-by-name**: a child profile can replace an inherited check by +declaring a new check with the **same name**. + +This allows an extension profile to *redefine* the content of an inherited +check โ€” for example, to make a constraint stricter or looser, change its +severity, or, as described in the next section, fully deactivate it. + +Override-by-name is enabled by default. It can be disabled via the +``allow_requirement_check_override`` validation setting (CLI / API), which +will raise an error on duplicate check names instead. + +SHACL checks +^^^^^^^^^^^^ + +Each SHACL ``NodeShape`` / ``PropertyShape`` becomes a check whose name is +its ``sh:name``. To override an inherited check, declare a shape in the +extension profile with the **same** ``sh:name`` as the inherited one: + +.. code-block:: turtle + + # Parent profile + ro:ShapeC + a sh:NodeShape ; + sh:name "The Shape C" ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Check Metadata File Descriptor entity existence" ; + sh:path rdf:type ; + sh:minCount 1 ; + sh:message "Missing entity" ; + ] . + +.. code-block:: turtle + + # Extension profile โ€” overrides the inherited PropertyShape by sh:name + ro:ShapeC + a sh:NodeShape ; + sh:name "The Shape C" ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Check Metadata File Descriptor entity existence" ; + sh:path rdf:type ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:message "Stricter override from extension profile" ; + ] . + +Both top-level shapes and ``PropertyShape`` entries nested inside a parent +``NodeShape`` (i.e., declared inline, without an absolute IRI) can be +overridden this way. + +Python checks +^^^^^^^^^^^^^ + +Python checks declared via the ``@check`` decorator are matched by their +``name`` argument. To override an inherited Python check, declare a new +function with the same ``name`` in the extension profile: + +.. code-block:: python + + # In the extension profile's checks module + from rocrate_validator.requirements.python import check + + @check(name="Check Metadata File Descriptor entity existence") + def overridden_check(self, ctx): + # New implementation that replaces the inherited one + ... + +Deactivating inherited checks +----------------------------- + +A child profile can also **fully deactivate** a check inherited from one of +its ancestors. A deactivated check is skipped during validation and +reported as such in the validation result. This is useful when an extension +profile relaxes the parent's expectations, or replaces a coarse-grained +check with a more specific one declared elsewhere in the same profile. + +SHACL checks +^^^^^^^^^^^^ + +Two complementary mechanisms are supported, depending on whether the shape +to disable has an absolute IRI of its own. + +**Shape with an absolute IRI** (e.g. a top-level ``NodeShape`` or a named +``PropertyShape``): reference the shape by IRI from the extension profile +and mark it as deactivated, without redeclaring it. + +.. code-block:: turtle + + # Extension profile + sh:deactivated true . + +**Nested ``PropertyShape`` without an absolute IRI** (a property declared +inline inside a parent ``NodeShape``): use the override-by-name mechanism +described in the previous section. Declare a new ``PropertyShape`` in the +extension profile with the same ``sh:name`` as the one to disable, and set +``sh:deactivated true`` on it. This overrides the parent's +``PropertyShape``, and the validator reports the resulting check as +deactivated. + +.. code-block:: turtle + + # Extension profile โ€” disables the inherited PropertyShape by sh:name + ro:ShapeC + a sh:NodeShape ; + sh:name "The Shape C" ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Check Metadata File Descriptor entity existence" ; + sh:path rdf:type ; + sh:deactivated true ; + ] . + +.. note:: + + Cross-profile deactivation is scoped to the shape's transitive + descendants: a ``sh:deactivated true`` triple declared by a profile + that does not inherit (directly or transitively) from the shape's + owning profile is ignored. This prevents unrelated profiles loaded in + the same process from interfering with one another. + +Python checks +^^^^^^^^^^^^^ + +The ``@check`` decorator accepts a ``deactivated`` flag, mirroring SHACL's +``sh:deactivated``. Combined with override-by-name, an extension profile +can disable an inherited Python check by redeclaring it with +``deactivated=True``: + +.. code-block:: python + + from rocrate_validator.requirements.python import check + + @check(name="Check Metadata File Descriptor entity existence", + deactivated=True) + def disabled(self, ctx): + # Body is irrelevant โ€” the check is skipped during validation. + return True + Running validator & tests during profile development ---------------------------------------------------- diff --git a/docs/1_installation.rst b/docs/1_installation.rst index c8be2703b..6a476edf9 100644 --- a/docs/1_installation.rst +++ b/docs/1_installation.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/docs/2_usage_cli.rst b/docs/2_usage_cli.rst index 729cdcf5c..62951f5b1 100644 --- a/docs/2_usage_cli.rst +++ b/docs/2_usage_cli.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,3 +31,9 @@ CLI Validation :parser: myst_parser.sphinx_ :start-line: 93 :end-line: 120 + +.. seealso:: + + To validate without network access and manage the HTTP cache from the + command line (the ``--offline`` and ``--no-cache`` flags and the ``cache`` + subcommand), see :ref:`offline_mode`. diff --git a/docs/3_usage_api.rst b/docs/3_usage_api.rst index bb1e9b660..bc3f57c1d 100644 --- a/docs/3_usage_api.rst +++ b/docs/3_usage_api.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,6 +30,12 @@ Programmatic Validation :start-line: 121 :end-line: 162 +.. seealso:: + + To resolve resources from a local cache or run validation without network + access (the ``offline`` / ``no_cache`` settings of ``ValidationSettings``), + see :ref:`offline_mode`. + Metadata-only Validation ------------------------ diff --git a/docs/4_how_it_works.rst b/docs/4_how_it_works.rst index 51bd7e950..7801eaed1 100644 --- a/docs/4_how_it_works.rst +++ b/docs/4_how_it_works.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/docs/5_offline_mode.rst b/docs/5_offline_mode.rst new file mode 100644 index 000000000..b65e6831f --- /dev/null +++ b/docs/5_offline_mode.rst @@ -0,0 +1,154 @@ +.. + Copyright (c) 2024-2026 CRS4 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +.. _offline_mode: + +Offline Mode and HTTP Caching +============================= + +To resolve remote resources โ€” JSON-LD ``@context`` documents, profile artifacts +and, optionally, remote RO-Crates โ€” the validator performs HTTP requests. These +requests go through a **persistent HTTP cache**, which makes validation faster +and reproducible and enables an **offline mode** where requests are served +exclusively from the cache. + +This page covers how to use offline mode and manage the cache both from the +:ref:`command line ` and through the +:ref:`Python API `. + + +How caching works +----------------- + +Every HTTP-backed resource fetched during validation is stored in a persistent +cache (by default under the user cache directory, shared across runs). On the +first online validation against a profile, the resources it declares are cached +automatically, so a later run can reuse the same cache without any network +access. + +Offline mode (``--offline`` / ``offline=True``) forbids network access +altogether: every request must be satisfied by the cache, otherwise the affected +resource is reported as a cache miss. For this reason offline mode requires the +cache to be enabled and cannot be combined with the cache-disabling options. + + +.. _offline_mode_cli: + +Command-line usage +------------------ + +Offline validation +~~~~~~~~~~~~~~~~~~~ + +Pass ``--offline`` to the ``validate`` command to forbid any network access: +every HTTP request must then be satisfied by the cache. + +.. code-block:: bash + + rocrate-validator validate --offline path/to/ro-crate + +Related options: + +- ``--cache-path PATH`` โ€” use a specific cache directory. By default a persistent + directory under the user cache dir is used, so entries are shared across runs. +- ``--cache-max-age SECONDS`` โ€” maximum age of cached entries; ``-1`` (the + default) means entries never expire. +- ``--no-cache`` / ``-nc`` โ€” disable the cache entirely: every request hits the + network and nothing is persisted. This flag is **mutually exclusive** with + ``--offline``, since offline mode needs the cache to serve requests. + +Managing the cache +~~~~~~~~~~~~~~~~~~ + +The ``cache`` subcommand inspects and manages the HTTP cache: + +.. code-block:: bash + + # Show the cache location, backend, size and offline status + rocrate-validator cache info + + # List cached entries (alias: `ls`); filter, sort or emit JSON + rocrate-validator cache list + rocrate-validator cache list --url w3id.org --sort size + rocrate-validator cache list --json + + # Remove every cached entry + rocrate-validator cache reset --yes + +Pre-populating the cache (warm-up) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Before going offline you can pre-fetch everything you will need with +``cache warm``: + +.. code-block:: bash + + # Warm the resources declared by every installed profile + rocrate-validator cache warm --all-profiles + + # Warm only specific profiles + rocrate-validator cache warm -p ro-crate-1.1 -p workflow-ro-crate-1.0 + + # Also fetch and cache remote RO-Crates or arbitrary URLs + rocrate-validator cache warm --crate https://example.org/crate.zip + rocrate-validator cache warm -u https://w3id.org/ro/crate/1.1/context + +When invoked without any source option, ``cache warm`` defaults to warming all +installed profiles. A summary table reports which URLs were cached, skipped or +failed; the command exits with a non-zero status if any URL fails. + + +.. _offline_mode_api: + +Programmatic usage +------------------ + +The same offline behaviour can be enabled programmatically through +``ValidationSettings``: + +.. code-block:: python + + from rocrate_validator import services, models + + settings = services.ValidationSettings( + rocrate_uri='/path/to/ro-crate', + profile_identifier='ro-crate-1.1', + # Serve every HTTP request from the cache; uncached resources fail. + offline=True, + # Optional: use a dedicated cache directory (defaults to the user cache). + # cache_path='/tmp/rocv-cache', + # Optional: maximum age of cached entries; -1 (default) = never expire. + # cache_max_age=-1, + ) + + result = services.validate(settings) + +The cache-related settings are: + +- ``offline`` (``bool``, default ``False``) โ€” when ``True``, HTTP requests are + served only from the cache; uncached resources raise a cache-miss error. +- ``no_cache`` (``bool``, default ``False``) โ€” disable the cache entirely. It is + **incompatible** with ``offline=True`` and raises ``ValueError`` if combined. +- ``cache_path`` (``Path``, optional) โ€” cache directory; defaults to the + persistent user cache so online and offline runs share the same entries. +- ``cache_max_age`` (``int``, optional) โ€” maximum entry age in seconds; ``-1`` + means entries never expire. + +When ``offline`` is ``False``, the resources declared by the selected profiles +are warmed up automatically before validation, so that a later offline run +reusing the same cache succeeds without network access. To pre-populate the cache +explicitly (e.g. in a CI pipeline), use the ``rocrate-validator cache warm`` +command described in :ref:`offline_mode_cli`. diff --git a/docs/ack.rst b/docs/ack.rst index 3a66255d1..cc486da36 100644 --- a/docs/ack.rst +++ b/docs/ack.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,22 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. -.. _acknowledgements: - -.. toctree:: - :maxdepth: 5 - :caption: Contents: - Acknowledgements ================ -This work has been partially funded by the following sources: - -.. image:: https://raw.githubusercontent.com/crs4/rocrate-validator/develop/docs/img/eu-logo/EN_Co-fundedbytheEU_RGB_POS.png - :alt: Co-funded by the EU - :width: 250px - :align: right - -- the `BY-COVID `_ project (HORIZON Europe grant agreement number 101046203); -- the `LIFEMap `_ project, funded by the Italian Ministry of Health (Piano Operative Salute, Trajectory 3). - +.. Body sourced from the README so it stays in sync with it (the README +.. "## Acknowledgements" heading is skipped: the page title above replaces it). +.. include:: ../README.md + :parser: myst_parser.sphinx_ + :start-line: 182 diff --git a/docs/conf.py b/docs/conf.py index 965240cbf..872323e38 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,7 +31,7 @@ # Set project metadata project = 'rocrate-validator' -copyright = '2024, CRS4' +copyright = '2024-2026, CRS4' author = 'Marco Enrico Piras, Luca Pireddu, Simone Leo' release = __version__ @@ -68,6 +68,20 @@ 'sphinx_copybutton', ] +# Only auto-generate section labels for the top two heading levels: deeper +# subsections (e.g. the repeated "SHACL checks" / "Python checks" headings) +# would otherwise produce duplicate-label warnings within the same document. +autosectionlabel_maxdepth = 2 + +# Warnings raised while embedding the Markdown README into the Sphinx pages. +# The README is the canonical GitHub document: its slices intentionally start +# below H1 (myst.header) and use GitHub-relative anchor links that span pages +# (myst.xref_missing). These are expected when including it here. +suppress_warnings = [ + 'myst.header', + 'myst.xref_missing', +] + templates_path = ['_templates'] # exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'experiments', 'ontologies', 'tests', 'logs', 'examples', 'debug'] # List of patterns, relative to source directory, that match files and diff --git a/docs/index.rst b/docs/index.rst index 21e58d0d8..143c4fdae 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ .. - Copyright (c) 2024 CRS4 + Copyright (c) 2024-2026 CRS4 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/docs/requirements.txt b/docs/requirements.txt index 22ad40616..8d92b0be1 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,3 +5,4 @@ enum-tools==0.12.0 sphinx-toolbox==3.8.1 myst-parser==4.0.0 sphinx_rtd_theme==3 +sphinx-copybutton==0.5.2 diff --git a/poetry.lock b/poetry.lock index e750af1dc..ef545a0f4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.4.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -161,23 +161,21 @@ lxml = ["lxml"] [[package]] name = "bleach" -version = "6.3.0" +version = "4.1.0" description = "An easy safelist-based HTML-sanitizing tool." optional = false -python-versions = ">=3.10" +python-versions = ">=3.6" groups = ["docs"] files = [ - {file = "bleach-6.3.0-py3-none-any.whl", hash = "sha256:fe10ec77c93ddf3d13a73b035abaac7a9f5e436513864ccdad516693213c65d6"}, - {file = "bleach-6.3.0.tar.gz", hash = "sha256:6f3b91b1c0a02bb9a78b5a454c92506aa0fdf197e1d5e114d2e00c6f64306d22"}, + {file = "bleach-4.1.0-py2.py3-none-any.whl", hash = "sha256:4d2651ab93271d1129ac9cbc679f524565cc8a1b791909c4a51eac4446a15994"}, + {file = "bleach-4.1.0.tar.gz", hash = "sha256:0900d8b37eba61a802ee40ac0061f8c2b5dee29c1927dd1d233e075ebf5a71da"}, ] [package.dependencies] -tinycss2 = {version = ">=1.1.0,<1.5", optional = true, markers = "extra == \"css\""} +packaging = "*" +six = ">=1.9.0" webencodings = "*" -[package.extras] -css = ["tinycss2 (>=1.1.0,<1.5)"] - [[package]] name = "cachecontrol" version = "0.14.4" @@ -230,14 +228,14 @@ ujson = ["ujson (>=5.10.0)"] [[package]] name = "certifi" -version = "2026.2.25" +version = "2026.5.20" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" groups = ["main", "docs"] files = [ - {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"}, - {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, + {file = "certifi-2026.5.20-py3-none-any.whl", hash = "sha256:3c52e209ba0a4ad7aebe60436a4ab349c39e1e602e8c134221e546902ad25897"}, + {file = "certifi-2026.5.20.tar.gz", hash = "sha256:69dea482ab64caa7b9f6aba1c6bf48bb6a5448d1c0f1b17ab42ad8c763a5344d"}, ] [[package]] @@ -338,52 +336,6 @@ files = [ [package.dependencies] pycparser = {version = "*", markers = "implementation_name != \"PyPy\""} -[[package]] -name = "chardet" -version = "7.4.3" -description = "Universal character encoding detector" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -files = [ - {file = "chardet-7.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0c79b13c9908ac7dfe0a74116ebc9a0f28b2319d23c32f3dfcdfbe1279c7eaf"}, - {file = "chardet-7.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bba8bea1b28d927b3e99e47deafe53658d34497c0a891d95ff1ba8ff6663f01c"}, - {file = "chardet-7.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23163921dccf3103ce59540b0443c106d2c0a0ff2e0503e05196f5e6fdea453f"}, - {file = "chardet-7.4.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cfb54563fe5f130da17c44c6a4e2e8052ba628e5ab4eab7ef8190f736f0f8f72"}, - {file = "chardet-7.4.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3990fffcc6a6045f2234ab72752ad037e3b2d48c72037f244d42738db397eb75"}, - {file = "chardet-7.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:c7116b0452994734ccff35e154b44240090eb0f4f74b9106292668133557c175"}, - {file = "chardet-7.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:25a862cddc6a9ac07023e808aedd297115345fbaabc2690479481ddc0f980e09"}, - {file = "chardet-7.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7005c88da26fd95d8abb8acbe6281d833e9a9181b03cf49b4546c4555389bd97"}, - {file = "chardet-7.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc50f28bad067393cce0af9091052c3b8df7a23115afd8ba7b2e0947f0cef1f8"}, - {file = "chardet-7.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3da294de1a681097848ab58bd3f2771a674f8039d2d87a5538b28856b815e9"}, - {file = "chardet-7.4.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c45e116dd51b66226a53ade3f9f635e870de5399b90e00ce45dcc311093bf4"}, - {file = "chardet-7.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:ccc1f83ab4bcfb901cf39e0c4ba6bc6e726fc6264735f10e24ceb5cb47387578"}, - {file = "chardet-7.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:75d3c65cc16bddf40b8da1fd25ba84fca5f8070f2b14e86083653c1c85aee971"}, - {file = "chardet-7.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:29af5999f654e8729d251f1724a62b538b1262d9292cccaefddf8a02aae1ef6a"}, - {file = "chardet-7.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:626f00299ad62dfe937058a09572beed442ccc7b58f87aa667949b20fd3db235"}, - {file = "chardet-7.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a4904dd5f071b7a7d7f50b4a67a86db3c902d243bf31708f1d5cde2f68239cb"}, - {file = "chardet-7.4.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5d2879598bc220689e8ce509fe9c3f37ad2fca53a36be9c9bd91abdd91dd364f"}, - {file = "chardet-7.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:4b2799bd58e7245cfa8d4ab2e8ad1d76a5c3a5b1f32318eb6acca4c69a3e7101"}, - {file = "chardet-7.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a9e4486df251b8962e86ea9f139ca235aa6e0542a00f7844c9a04160afb99aa9"}, - {file = "chardet-7.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4fbff1907925b0c5a1064cffb5e040cd5e338585c9c552625f30de6bc2f3107a"}, - {file = "chardet-7.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:365135eaf37ba65a828f8e668eb0a8c38c479dcbec724dc25f4dfd781049c357"}, - {file = "chardet-7.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfc134b70c846c21ead8e43ada3ae1a805fff732f6922f8abcf2ff27b8f6493d"}, - {file = "chardet-7.4.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9acd9988a93e09390f3cd231201ea7166c415eb8da1b735928990ffc05cb9fbb"}, - {file = "chardet-7.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:e1b98790c284ff813f18f7cf7de5f05ea2435a080030c7f1a8318f3a4f80b131"}, - {file = "chardet-7.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d892d3dcd652fdef53e3d6327d39b17c0df40a899dfc919abaeb64c974497531"}, - {file = "chardet-7.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:acc46d1b8b7d5783216afe15db56d1c179b9a40e5a1558bc13164c4fd20674c4"}, - {file = "chardet-7.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ac3bf11c645734a1701a3804e43eabd98851838192267d08c353a834ab79fea"}, - {file = "chardet-7.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e3bd9f936e04bae89c254262af08d9e5b98f805175ba1e29d454e6cba3107b7"}, - {file = "chardet-7.4.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:27cc23da03630cdecc9aa81a895aa86629c211f995cd57651f0fbc280717bf93"}, - {file = "chardet-7.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:b95c934b9ad59e2ba8abb9be49df70d3ad1b0d95d864b9fdb7588d4fa8bd921c"}, - {file = "chardet-7.4.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c77867f0c1cb8bd819502249fcdc500364aedb07881e11b743726fa2148e7b6e"}, - {file = "chardet-7.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cf1efeaf65a6ef2f5b9cc3a1df6f08ba2831b369ccaa4c7018eaf90aa757bb11"}, - {file = "chardet-7.4.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f3504c139a2ad544077dd2d9e412cd08b01786843d76997cd43bb6de311723c"}, - {file = "chardet-7.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457f619882ba66327d4d8d14c6c342269bdb1e4e1c38e8117df941d14d351b04"}, - {file = "chardet-7.4.3-py3-none-any.whl", hash = "sha256:1173b74051570cf08099d7429d92e4882d375ad4217f92a6e5240ccfb26f231e"}, - {file = "chardet-7.4.3.tar.gz", hash = "sha256:cc1d4eb92a4ec1c2df3b490836ffa46922e599d34ce0bb75cf41fd2bf6303d56"}, -] - [[package]] name = "charset-normalizer" version = "3.4.7" @@ -525,14 +477,14 @@ files = [ [[package]] name = "click" -version = "8.3.2" +version = "8.4.1" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.10" groups = ["main"] files = [ - {file = "click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d"}, - {file = "click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5"}, + {file = "click-8.4.1-py3-none-any.whl", hash = "sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2"}, + {file = "click-8.4.1.tar.gz", hash = "sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96"}, ] [package.dependencies] @@ -586,118 +538,118 @@ test = ["pytest"] [[package]] name = "coverage" -version = "7.13.5" +version = "7.14.1" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.10" groups = ["test"] files = [ - {file = "coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5"}, - {file = "coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930"}, - {file = "coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0"}, - {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0"}, - {file = "coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58"}, - {file = "coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e"}, - {file = "coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d"}, - {file = "coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743"}, - {file = "coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd"}, - {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8"}, - {file = "coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf"}, - {file = "coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9"}, - {file = "coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028"}, - {file = "coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01"}, - {file = "coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256"}, - {file = "coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf"}, - {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c"}, - {file = "coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf"}, - {file = "coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810"}, - {file = "coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de"}, - {file = "coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1"}, - {file = "coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a"}, - {file = "coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6"}, - {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17"}, - {file = "coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85"}, - {file = "coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b"}, - {file = "coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664"}, - {file = "coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d"}, - {file = "coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd"}, - {file = "coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479"}, - {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2"}, - {file = "coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a"}, - {file = "coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819"}, - {file = "coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911"}, - {file = "coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f"}, - {file = "coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6"}, - {file = "coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0"}, - {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0"}, - {file = "coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc"}, - {file = "coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633"}, - {file = "coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8"}, - {file = "coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b"}, - {file = "coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90"}, - {file = "coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea"}, - {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a"}, - {file = "coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215"}, - {file = "coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43"}, - {file = "coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45"}, - {file = "coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61"}, - {file = "coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179"}, + {file = "coverage-7.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3e3680291c4a1d0dadfa84a2c459576a4af5133abb617905714339a0c73138cf"}, + {file = "coverage-7.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a5274669f37f2343635a347b91a60777621341ab3378e9c6ac9335eee704bddf"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cfe5a5fec635799ef33428f1e5e61bafa45a92a96190ba731561ba558ccc214d"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:62a9f70b52e0b5a95cfef4a5c5641b06983cadc5e538a3feeb5c00211f523ac2"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c18ebc343e15be53049b3a2dce38fe82d58f37e20ab9094b3a39c0aa4f6bb47"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b84ffdf877644e7096aa936991efeed873f7f3df57b9cd001312b7668ab08550"}, + {file = "coverage-7.14.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e854312c4103f2ad4c0dc023b69b77ebfd2c89db5f86c4c94dc2353f9a92167e"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c643734307300234fafa36bf2a040a7235f8f177ea1fd6ec1423aea6fb7b929f"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84ac9499e48700399a5dd0ea7085b5091961fec52c68d66b4ec0d3cf7f4441b1"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:7f02d09f70776579b926d889a4c9c235070a1f47c40458aeaca563fae5acfdb5"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:ce66d8e46da2bb5ee313a745cbd2e391d319176c1f7a9451bfcd3a2fb920859b"}, + {file = "coverage-7.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c912c259304cfb5ee584481cfb7ce1ff932b4d61e6c9140b8f19cb7b5ed82332"}, + {file = "coverage-7.14.1-cp310-cp310-win32.whl", hash = "sha256:1238cb94638e610e972c60dac68e813f868dc7d6e982535270558443058d9d59"}, + {file = "coverage-7.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:fc459e5d73be2d6332fcfe8dbf3d8994671fe33c700f4565988ecfa511547253"}, + {file = "coverage-7.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:478b5bcd63c2e1357c5c7e16c070690df7b07f676b1c114d7b93e533c664309f"}, + {file = "coverage-7.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a24a81f9715ee42ef59a316cc11611c98fe23920f7c81861315c9f3ff4a230f4"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:196a13319ad88d6d8ef5ab489ec4f44ddde2143c0c7d5b27786f6c3ffd56a7e1"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3d452fd08b5c72c5167c93e6867b5c08500bd40f2a21e1e854a500550b6cc36f"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23bf7fa51ac02e07fc7c96849b82946da47ae862dc8f86d183b2a4864fc38129"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bcaa50684dcaadfa599ac48f81103c756d791cfd85c97203d2217c593d48b860"}, + {file = "coverage-7.14.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4ea1c034f95c9b056e856b794630b17f9fa3d57e4800ff1e503d3be0f9c9078c"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c7e057326434e441306226fbeb5d1aaf14a2637efe97ba668306635835f32ad7"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:59baf88468dbc8d63b1887afd92bda52e40bb1561696e5819670601403810cec"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d34d75f892b3ab73ba11cab5442cce7b3e168fd64162b16f0e1e0d09c508edef"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:3a56abc20a472baf0304c455721bc601477440d28ecfde8a03dde79ede07e0df"}, + {file = "coverage-7.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6a3cb83d1552c0cd1b4906655b6a33fd4a8473229633a901c6b73bf86914dee9"}, + {file = "coverage-7.14.1-cp311-cp311-win32.whl", hash = "sha256:10274a1fbeb8ec5d72966e17bb198a3104257aca4ac09d98667c5f8aca8c8548"}, + {file = "coverage-7.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:87ebdf787d4888e3f3f2d523eadc6e18c6d18c6d0eb173801a189641627fb37e"}, + {file = "coverage-7.14.1-cp311-cp311-win_arm64.whl", hash = "sha256:dd34767fa19848d35659ffc0a75314f58c7af3f1cd87ec521e8292a1238398a3"}, + {file = "coverage-7.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a06c76364a9360e33d6d23769aefdf7f66f38e2ffb60ceb1baaa4989d83b695c"}, + {file = "coverage-7.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fad54e871165f6ec2f536063ac74c3104508a12963e64072ba44bd822de52b0c"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:84b535f00655ecafe1d929d1fb00ed5d6fa3051ea643ab2c161a3887b86f294b"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6b6b0853b895fe0e98cbfc580d1ec3393d9302b4b1e96a77b3f5c91fdab899e6"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:442cc9c952b2df400cda54bb04ab87330cf2cd08a8692cbbea36773531eb6f37"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8270544c361ed405a27a060dbc9ed2c124b084d96dfdc2d9a2510482aef981ad"}, + {file = "coverage-7.14.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:48b283b1dd6372e8de2a7a9a4c4d5dc06f4d4fd209b876f3c88a7a205a0c8f84"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5b0c99ba93a07d56f6df340bb79be53202a082b2fdb81bfe6190b741a3470d54"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e471bc5769ff073b058cfadb0d736b56ce067c8560eabeb0da88462df98c23e7"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f497a1ea81d4cd7c10ddcaa685135b9aabd291af3d55775a9ddf3cb7a364cdd9"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2222be86d0b54f5dd5a38f45f17f315f737245e857bf0bdedc70734f84a13c02"}, + {file = "coverage-7.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:85e85586565842f6932abebd4c18bcb1074223dc0b3576e7d173ca710622813a"}, + {file = "coverage-7.14.1-cp312-cp312-win32.whl", hash = "sha256:4a28fd227808366b196a75476dced2eb35b351d6766ba9c858dc93319e87f4f1"}, + {file = "coverage-7.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:54acdb6674a4661768d7bf7db32dfb9f46ab1d764f8aba6df75ce1a6a088724e"}, + {file = "coverage-7.14.1-cp312-cp312-win_arm64.whl", hash = "sha256:99cd41ff91afd94896fea3bc002706b6ae4ce95727d06e4a0f39c0a8d8bd8b1a"}, + {file = "coverage-7.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:be9f2c802dcfce3f71298303aa5dad0dce440a76c52f2f60dacd8656dab78793"}, + {file = "coverage-7.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6223a72fd0e4c7156353ec0f08a5f93623e1d3034d0e2683b9bb8ea674131b1d"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7279d2110a28cebc738b6459ecda2771735a4c18465fbbd36b3288fe5ed92247"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9eeb3fcbc13ba40dfbdb22d01d196a28e9cef9ed4c29b60061a1e0e823a9929d"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f0cfc27c539f07cf5c0a4cfe211d0b6cae039f8f40526dbaa71944e64b50a7b"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:221c70f316241a78e77e607c227cefc8808d4e08f28d99c04f35694690e940be"}, + {file = "coverage-7.14.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:da028256b04ec30e5e0114b6f76172938c313991f0a2d3d894271315cf5d5e43"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76a085d7005236a767e3426148b2c407e53ad61695c562f8a81da2d373324901"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b553d04b5e778a8e56d57eb134aff42a92718ecba45e79c4764ecfa40efd92ff"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:46f714d2fb8ae2f4f29f23ada7f1e79b759fff5a70f94a1dac23af204c3ec9e4"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:1896f5e19ff3f0431c7ce2172adc54890fd97f86b59ced8ca1649145d9ffe35d"}, + {file = "coverage-7.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:62fd185ef9df3c33d1c8178c5af105f762afbad96038de9a4ae100aa6297ca33"}, + {file = "coverage-7.14.1-cp313-cp313-win32.whl", hash = "sha256:ab4af6352741a604c431c6072fce5bee33bf0f20dc7a56618d6bf6bb89e9810c"}, + {file = "coverage-7.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:7af486dabe8954d03b087f0021540897afe084f04e16ff5579e08cc46f871416"}, + {file = "coverage-7.14.1-cp313-cp313-win_arm64.whl", hash = "sha256:2224f89ffd0c5605ccce1ed7a584da162bc7c55f601ab1c946bc9de31a486b42"}, + {file = "coverage-7.14.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:de286598cc65d2b489411174b1faec2f5a7775fb3201fd925db2a76b4030f37d"}, + {file = "coverage-7.14.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:042c46ded7c288aeb07cf14a28b6c1e10b78fcba40171c3fa1e939377eeef0b5"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f4ddbe407477f04c45115d1a4e5bc480f753553b534d338d4c3358b1cdd0ea52"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d13e6725992e2d2fd7d81d4f5241952d13740121dfd501da09201be39b2c003a"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f747dc8edcfe740130f28f32f3995e955494285717e86ee25af51db2219df08a"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ced2f09ef276fd58611a1ef502164ad266d2b75174e5a40cabbdb4033f9f6cf2"}, + {file = "coverage-7.14.1-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b84800013769a78ccb9ef4659402e26d06867e337b61ec365f77ad008adea80e"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ea8cd6ca0ee9f616aaef3afc6882e32c2cbf18b00d96313ffd76af650574034d"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:aa5e304a873fabddc11e484e9b6b738bd38bd7bed17b09aa84eecf5332e8b8bb"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:5a1c5215be81035e629d5bc756650634d0bf31991038db7a0eccb90f025ce16d"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:79058c47dae6788504b5effb319961bcd72d7240551464b91d474bc0ed186d69"}, + {file = "coverage-7.14.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:370c5afae3fa0658e11694a32b24c2778f6bc2d17718121f94ee185e69f26b54"}, + {file = "coverage-7.14.1-cp313-cp313t-win32.whl", hash = "sha256:3758dd0a7f1fa57365ef2e781df0f0731d38b6e3772259d13dae4bd8a958d4b1"}, + {file = "coverage-7.14.1-cp313-cp313t-win_amd64.whl", hash = "sha256:6ff665fb023a77386fe11685190cee1f60a7d635994a30d9b0a061533d470fce"}, + {file = "coverage-7.14.1-cp313-cp313t-win_arm64.whl", hash = "sha256:17a5a241e5997621a956a7f402a7433ef4221e5152809b785bec79e2323799f1"}, + {file = "coverage-7.14.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d5ed429d0b8edaac649e889b4ffcedb6c80b06629a3f93050e3dddfb99235bee"}, + {file = "coverage-7.14.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8011224a62280e50dab346960c03cf47aca1a1e09e608c0fb33fd6e0cc8e9500"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:12c42ec1e14f553c4f817e989365982e646e27211f10a0f717855b94a79c8906"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:06144cd511cf2624873a035c5069cf297144f6e77a73ee3d7a55b605ec5efb42"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a311d8e1da24be5c1ccf85cbfb06315dbaa1703d5a1eab3f6432c72b837917c8"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c79cead5b5bc584d9c71451cb984d0e3a84e0c0937379c8efcbf27c8d661b851"}, + {file = "coverage-7.14.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:dcbf65f1f66a26cdd88c35cf68fb4729c5d1cd2e88added72420541dfb212034"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fd86572566fb40189a8260446158235159bc7a82dfbc87a3b39cf4fb57fcec1c"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:7771b601718fdde84832c3a434ca9bbf4ae9adbc49d84198b4110700c3c77c36"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:39b21e212c55af06fa375e3dbf90a8a8e38792f3a910c580066d23563830ddd5"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f2302660e32562a532b442480121aef8aa61a5bdb20b30bf0adab29f10a5a4b4"}, + {file = "coverage-7.14.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:03a6f93c1ec3b7f2e77b5dbcc5573a2c21f12529a5c6bbe0f16f72303cc2fa4d"}, + {file = "coverage-7.14.1-cp314-cp314-win32.whl", hash = "sha256:8a3ce026d73290f42f08dafecbd82c193a74df280461fbf97300fec51fd133ee"}, + {file = "coverage-7.14.1-cp314-cp314-win_amd64.whl", hash = "sha256:114c95ef29302423b87d159075805f4ab973254a2638a5d7d046c94887cc87d7"}, + {file = "coverage-7.14.1-cp314-cp314-win_arm64.whl", hash = "sha256:a07891c3f4805442b31b71e84ba3cf29ed1aa9a428284e06deeb4b23e5b46343"}, + {file = "coverage-7.14.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1101a5ebb083aecb625ebb6209d4105b58f647b093cb2dc8122d7b33f743cfe1"}, + {file = "coverage-7.14.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:851b9e1e4e8a4608e77c79714b2e77c0970d2ed7202a05e92ae407817481887b"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d5b89cdfb2ee051b71e8c3c70bd81a9eff81100f736a269136fe1a68efe00474"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0177614a0370f227888b4e436a7c55686d6a9f90eb1ade2b624ba685a1686e86"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2d69af5dea2de76fc485a83032a630523f985198b7e25be901ec60181587b01e"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:35ab22d91de736e8966b980dc355cbcdd2c6dbbcfe275f9a2991bc8a91b3df65"}, + {file = "coverage-7.14.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:357d4e32935c36588aaba057d734fa32428c360c9fc2e4442afbf1b646beee6e"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:51bd64741cc6fa065abd300ede1afe5a5291ece9c31da8b24884deda48bcc3f8"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:9132cd363a68a4c3daa7c8704a654b1e39d3360f6f5b8ddd470608a945236c07"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:07c6290b1697b862c0478eab545eec949a0d0e4d6d03497f446d706da3b4f2de"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:5ea0c297e27133853b4d8a3eb799bff5a2dbd9f2f41537a240d337ac9b4df890"}, + {file = "coverage-7.14.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:01b7733daad0237daa01ef80fe2dfceffc911e6a17fa7b55d14aa8214eaaaecd"}, + {file = "coverage-7.14.1-cp314-cp314t-win32.whl", hash = "sha256:6adc5a36984624a70bf11d7184e20fa0a49aa7c47ffab43804106a1a695ea22e"}, + {file = "coverage-7.14.1-cp314-cp314t-win_amd64.whl", hash = "sha256:ddf799247318f34dbcd2efa8c95a8d0642674e926bb1774cf9b63dfd2a389d1c"}, + {file = "coverage-7.14.1-cp314-cp314t-win_arm64.whl", hash = "sha256:145986fe66647eb489f18d9a997567a3fd358584c4b5a808769113abc07466af"}, + {file = "coverage-7.14.1-py3-none-any.whl", hash = "sha256:a252f21c27e38347e60111a3266b03827422a7d5525951aceee313aa68bab1d2"}, + {file = "coverage-7.14.1.tar.gz", hash = "sha256:30c08f7d90415aa98b3c990385dea2939b0da55f38515e5b369b83655f8523be"}, ] [package.dependencies] @@ -706,30 +658,6 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli ; python_full_version <= \"3.11.0a6\""] -[[package]] -name = "cssutils" -version = "2.14.0" -description = "A CSS Cascading Style Sheets library for Python" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -files = [ - {file = "cssutils-2.14.0-py3-none-any.whl", hash = "sha256:79ad979e4a383f39f0b3f0ca82ee3f1b01065da9fa02701b63bfed38ac76eb91"}, - {file = "cssutils-2.14.0.tar.gz", hash = "sha256:c33256f0cbc215ad405b647117ace63c9e22af96fe42dcb7861742a591e6464c"}, -] - -[package.dependencies] -encutils = "*" -more_itertools = "*" - -[package.extras] -check = ["pytest-checkdocs (>=2.14)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] -cover = ["pytest-cov"] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=3.4)"] -test = ["cssselect", "importlib_resources ; python_version < \"3.9\"", "jaraco.test (>=5.1)", "lxml ; python_version < \"3.11\"", "pytest (>=6,!=8.1.*)"] -type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] - [[package]] name = "debugpy" version = "1.8.20" @@ -772,14 +700,14 @@ files = [ [[package]] name = "decorator" -version = "5.2.1" +version = "5.3.1" description = "Decorators for Humans" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"}, - {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"}, + {file = "decorator-5.3.1-py3-none-any.whl", hash = "sha256:f47fe6fdbd2edd623ecfe36875d37aba411624e2670dd395dddae1358689bb3c"}, + {file = "decorator-5.3.1.tar.gz", hash = "sha256:4cbcdd55a6efadb9dbea26b858f4fb3264567b52d69ca0d25b721b553f60ea82"}, ] [[package]] @@ -796,19 +724,19 @@ files = [ [[package]] name = "dict2css" -version = "0.3.0.post1" +version = "0.6.0" description = "A ฮผ-library for constructing cascading style sheets from Python dictionaries." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["docs"] files = [ - {file = "dict2css-0.3.0.post1-py3-none-any.whl", hash = "sha256:f006a6b774c3e31869015122ae82c491fd25e7de4a75607a62aa3e798f837e0d"}, - {file = "dict2css-0.3.0.post1.tar.gz", hash = "sha256:89c544c21c4ca7472c3fffb9d37d3d926f606329afdb751dc1de67a411b70719"}, + {file = "dict2css-0.6.0-py3-none-any.whl", hash = "sha256:5251f1df1c78ffdf09313657a7f88add0ad219127d9aeb18fb343b052d6bfbbe"}, + {file = "dict2css-0.6.0.tar.gz", hash = "sha256:143e55cb71c98a88c79f2c41e08a5fa4d875659275756f794e31ccd69936ce88"}, ] [package.dependencies] -cssutils = ">=2.2.0" domdf-python-tools = ">=2.2.0" +tinycss2 = ">=1.2.1" [[package]] name = "dill" @@ -858,21 +786,6 @@ typing-extensions = ">=3.7.4.1" all = ["pytz (>=2019.1)"] dates = ["pytz (>=2019.1)"] -[[package]] -name = "encutils" -version = "1.0.0" -description = "" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -files = [ - {file = "encutils-1.0.0-py3-none-any.whl", hash = "sha256:605297da19a23d1b2da7d3b9bd75513acc979e9facf03aa7ec7ba04b5f567a79"}, - {file = "encutils-1.0.0.tar.gz", hash = "sha256:38eca5af18cebabd8be43c17f14c9d3fbba83cc5f7ac8e3ab1c86e24c4b2b91a"}, -] - -[package.dependencies] -chardet = "*" - [[package]] name = "enum-tools" version = "0.12.0" @@ -1022,18 +935,18 @@ files = [ [[package]] name = "idna" -version = "3.11" +version = "3.17" description = "Internationalized Domain Names in Applications (IDNA)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main", "docs"] files = [ - {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, - {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, + {file = "idna-3.17-py3-none-any.whl", hash = "sha256:466e48829084efe2548012b855df21540b96f2e20e51bd124c851536556a592c"}, + {file = "idna-3.17.tar.gz", hash = "sha256:5eb0cb53bc467c12eadcf6de83163ad8527cec9416f44b9b61b19caedad2b87f"}, ] [package.extras] -all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] +all = ["mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] [[package]] name = "imagesize" @@ -1115,7 +1028,7 @@ pfzy = ">=0.3.1,<0.4.0" prompt-toolkit = ">=3.0.1,<4.0.0" [package.extras] -docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] [[package]] name = "ipykernel" @@ -1193,49 +1106,15 @@ test-extra = ["curio", "ipython[test]", "jupyter_ai", "matplotlib (!=3.2.0)", "n [[package]] name = "ipython" -version = "9.10.1" +version = "9.14.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.11" groups = ["dev"] -markers = "python_version == \"3.11\"" -files = [ - {file = "ipython-9.10.1-py3-none-any.whl", hash = "sha256:82d18ae9fb9164ded080c71ef92a182ee35ee7db2395f67616034bebb020a232"}, - {file = "ipython-9.10.1.tar.gz", hash = "sha256:e170e9b2a44312484415bdb750492699bf329233b03f2557a9692cce6466ada4"}, -] - -[package.dependencies] -colorama = {version = ">=0.4.4", markers = "sys_platform == \"win32\""} -decorator = ">=4.3.2" -ipython-pygments-lexers = ">=1.0.0" -jedi = ">=0.18.1" -matplotlib-inline = ">=0.1.5" -pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} -prompt_toolkit = ">=3.0.41,<3.1.0" -pygments = ">=2.11.0" -stack_data = ">=0.6.0" -traitlets = ">=5.13.0" -typing_extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} - -[package.extras] -all = ["argcomplete (>=3.0)", "ipython[doc,matplotlib,terminal,test,test-extra]"] -black = ["black"] -doc = ["docrepr", "exceptiongroup", "intersphinx_registry", "ipykernel", "ipython[matplotlib,test]", "setuptools (>=70.0)", "sphinx (>=8.0)", "sphinx-rtd-theme (>=0.1.8)", "sphinx_toml (==0.0.4)", "typing_extensions"] -matplotlib = ["matplotlib (>3.9)"] -test = ["packaging (>=20.1.0)", "pytest (>=7.0.0)", "pytest-asyncio (>=1.0.0)", "setuptools (>=61.2)", "testpath (>=0.2)"] -test-extra = ["curio", "ipykernel (>6.30)", "ipython[matplotlib]", "ipython[test]", "jupyter_ai", "nbclient", "nbformat", "numpy (>=1.27)", "pandas (>2.1)", "trio (>=0.1.0)"] - -[[package]] -name = "ipython" -version = "9.12.0" -description = "IPython: Productive Interactive Computing" -optional = false -python-versions = ">=3.12" -groups = ["dev"] -markers = "python_version >= \"3.12\"" +markers = "python_version >= \"3.11\"" files = [ - {file = "ipython-9.12.0-py3-none-any.whl", hash = "sha256:0f2701e8ee86e117e37f50563205d36feaa259d2e08d4a6bc6b6d74b18ce128d"}, - {file = "ipython-9.12.0.tar.gz", hash = "sha256:01daa83f504b693ba523b5a407246cabde4eb4513285a3c6acaff11a66735ee4"}, + {file = "ipython-9.14.0-py3-none-any.whl", hash = "sha256:8fd984a3372c14b12790b084ba6b5cff5678c0cb063244a0034f06a51f20d6c2"}, + {file = "ipython-9.14.0.tar.gz", hash = "sha256:6f27ff0f1d9ea050e0551f71568bc4b34d8aba579e8f111c5b4175f44ac6b4aa"}, ] [package.dependencies] @@ -1246,9 +1125,11 @@ jedi = ">=0.18.2" matplotlib-inline = ">=0.1.6" pexpect = {version = ">4.6", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} prompt_toolkit = ">=3.0.41,<3.1.0" +psutil = {version = ">=7", markers = "sys_platform != \"emscripten\""} pygments = ">=2.14.0" stack_data = ">=0.6.0" traitlets = ">=5.13.0" +typing_extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} [package.extras] all = ["argcomplete (>=3.0)", "ipython[doc,matplotlib,terminal,test,test-extra]", "types-decorator"] @@ -1305,23 +1186,22 @@ plugins = ["setuptools"] [[package]] name = "jedi" -version = "0.19.2" +version = "0.20.0" description = "An autocompletion tool for Python that can be used for text editors." optional = false -python-versions = ">=3.6" +python-versions = ">=3.10" groups = ["dev"] files = [ - {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"}, - {file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"}, + {file = "jedi-0.20.0-py2.py3-none-any.whl", hash = "sha256:7bdd9c2634f56713299976f4cbd59cb3fa92165cc5e05ea811fb253480728b67"}, + {file = "jedi-0.20.0.tar.gz", hash = "sha256:c3f4ccbd276696f4b19c54618d4fb18f9fc24b0aef02acf704b23f487daa1011"}, ] [package.dependencies] -parso = ">=0.8.4,<0.9.0" +parso = ">=0.8.6,<0.9.0" [package.extras] -docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"] -qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] -testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"] +dev = ["Django", "attrs", "colorama", "docopt", "flake8 (==7.1.2)", "pytest (<9.0.0)", "types-setuptools (==80.9.0.20250529)", "typing-extensions", "zuban (==0.7.0)"] +docs = ["Jinja2 (==3.1.6)", "MarkupSafe (==3.0.3)", "Pygments (==2.20.0)", "Sphinx (==9.1.0)", "alabaster (==1.0.0)", "babel (==2.18.0)", "certifi (==2026.4.22)", "charset-normalizer (==3.4.7)", "docutils (==0.22.4)", "idna (==3.13)", "imagesize (==2.0.0)", "iniconfig (==2.3.0)", "packaging (==26.2)", "pluggy (==1.6.0)", "pytest (==9.0.3)", "requests (==2.33.1)", "roman-numerals (==4.1.0)", "snowballstemmer (==3.0.1)", "sphinx-rtd-theme (==3.1.0)", "sphinxcontrib-applehelp (==2.0.0)", "sphinxcontrib-devhelp (==2.0.0)", "sphinxcontrib-htmlhelp (==2.1.0)", "sphinxcontrib-jquery (==4.1)", "sphinxcontrib-jsmath (==1.0.1)", "sphinxcontrib-qthelp (==2.0.0)", "sphinxcontrib-serializinghtml (==2.0.0)", "urllib3 (==2.6.3)"] [[package]] name = "jinja2" @@ -1355,7 +1235,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.03.6" +jsonschema-specifications = ">=2023.3.6" referencing = ">=0.28.4" rpds-py = ">=0.25.0" @@ -1560,21 +1440,21 @@ files = [ [[package]] name = "matplotlib-inline" -version = "0.2.1" +version = "0.2.2" description = "Inline Matplotlib backend for Jupyter" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76"}, - {file = "matplotlib_inline-0.2.1.tar.gz", hash = "sha256:e1ee949c340d771fc39e241ea75683deb94762c8fa5f2927ec57c83c4dffa9fe"}, + {file = "matplotlib_inline-0.2.2-py3-none-any.whl", hash = "sha256:3c821cf1c209f59fb2d2d64abbf5b23b67bcb2210d663f9918dd851c6da1fcf6"}, + {file = "matplotlib_inline-0.2.2.tar.gz", hash = "sha256:72f3fe8fce36b70d4a5b612f899090cd0401deddc4ea90e1572b9f4bfb058c79"}, ] [package.dependencies] traitlets = "*" [package.extras] -test = ["flake8", "nbdime", "nbval", "notebook", "pytest"] +test = ["flake8", "matplotlib", "nbdime", "nbval", "notebook", "pytest"] [[package]] name = "mccabe" @@ -1590,14 +1470,14 @@ files = [ [[package]] name = "mdit-py-plugins" -version = "0.5.0" +version = "0.6.1" description = "Collection of plugins for markdown-it-py" optional = false python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f"}, - {file = "mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6"}, + {file = "mdit_py_plugins-0.6.1-py3-none-any.whl", hash = "sha256:214c82fb2ac524472ab6a5bcab1de80f73b50443e187f401bfd77efbc7c6481d"}, + {file = "mdit_py_plugins-0.6.1.tar.gz", hash = "sha256:a2bca0f039f39dbd35fb74ae1b5f998608c437463371f0ff7f49a19a17a114d0"}, ] [package.dependencies] @@ -1606,7 +1486,7 @@ markdown-it-py = ">=2.0.0,<5.0.0" [package.extras] code-style = ["pre-commit"] rtd = ["myst-parser", "sphinx-book-theme"] -testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions", "pytest-timeout"] [[package]] name = "mdurl" @@ -1622,31 +1502,19 @@ files = [ [[package]] name = "mistune" -version = "3.2.0" +version = "3.2.1" description = "A sane and fast Markdown parser with useful plugins and renderers" optional = false python-versions = ">=3.8" groups = ["docs"] files = [ - {file = "mistune-3.2.0-py3-none-any.whl", hash = "sha256:febdc629a3c78616b94393c6580551e0e34cc289987ec6c35ed3f4be42d0eee1"}, - {file = "mistune-3.2.0.tar.gz", hash = "sha256:708487c8a8cdd99c9d90eb3ed4c3ed961246ff78ac82f03418f5183ab70e398a"}, + {file = "mistune-3.2.1-py3-none-any.whl", hash = "sha256:78cdb0ba5e938053ccf63651b352508d2efa9411dc8810bfb05f2dc5140c0048"}, + {file = "mistune-3.2.1.tar.gz", hash = "sha256:7c8e5501d38bac1582e067e46c8343f17d57ea1aaa735823f3aba1fd59c88a28"}, ] [package.dependencies] typing-extensions = {version = "*", markers = "python_version < \"3.11\""} -[[package]] -name = "more-itertools" -version = "11.0.2" -description = "More routines for operating on iterables, beyond itertools" -optional = false -python-versions = ">=3.10" -groups = ["docs"] -files = [ - {file = "more_itertools-11.0.2-py3-none-any.whl", hash = "sha256:6e35b35f818b01f691643c6c611bc0902f2e92b46c18fffa77ae1e7c46e912e4"}, - {file = "more_itertools-11.0.2.tar.gz", hash = "sha256:392a9e1e362cbc106a2457d37cabf9b36e5e12efd4ebff1654630e76597df804"}, -] - [[package]] name = "msgpack" version = "1.1.2" @@ -1893,14 +1761,14 @@ rdflib = ">=7.1.4" [[package]] name = "packaging" -version = "26.1" +version = "26.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" groups = ["main", "dev", "docs", "test"] files = [ - {file = "packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f"}, - {file = "packaging-26.1.tar.gz", hash = "sha256:f042152b681c4bfac5cae2742a55e103d27ab2ec0f3d88037136b6bfe7c9c5de"}, + {file = "packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e"}, + {file = "packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661"}, ] [[package]] @@ -1917,14 +1785,14 @@ files = [ [[package]] name = "parso" -version = "0.8.6" +version = "0.8.7" description = "A Python Parser" optional = false python-versions = ">=3.6" groups = ["dev"] files = [ - {file = "parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff"}, - {file = "parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd"}, + {file = "parso-0.8.7-py2.py3-none-any.whl", hash = "sha256:a8926eb2a1b915486941fdbd31e86a4baf88fe8c210f25f2f35ecec5b574ca1c"}, + {file = "parso-0.8.7.tar.gz", hash = "sha256:eaaac4c9fdd5e9e8852dc778d2d7405897ec510f2a298071453e5e3a07914bb1"}, ] [package.extras] @@ -1960,18 +1828,18 @@ files = [ ] [package.extras] -docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] [[package]] name = "platformdirs" -version = "4.9.6" +version = "4.10.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.10" groups = ["main", "dev", "docs"] files = [ - {file = "platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917"}, - {file = "platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a"}, + {file = "platformdirs-4.10.0-py3-none-any.whl", hash = "sha256:fb516cdb12eb0d857d0cd85a7c57cea4d060bee4578d6cf5a14dfdf8cbf8784a"}, + {file = "platformdirs-4.10.0.tar.gz", hash = "sha256:31e761a6a0ca04faf7353ea759bdba55652be214725111e5aac52dfa29d4bef7"}, ] [[package]] @@ -2155,8 +2023,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, - {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, {version = ">=0.3.6", markers = "python_version == \"3.11\""}, + {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, ] isort = ">=4.2.5,<5.13 || >5.13,<7" mccabe = ">=0.6,<0.8" @@ -2540,14 +2408,14 @@ typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""} [[package]] name = "requests" -version = "2.33.1" +version = "2.34.2" description = "Python HTTP for Humans." optional = false python-versions = ">=3.10" groups = ["main", "docs"] files = [ - {file = "requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a"}, - {file = "requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517"}, + {file = "requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0"}, + {file = "requests-2.34.2.tar.gz", hash = "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed"}, ] [package.dependencies] @@ -2562,14 +2430,14 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<8)"] [[package]] name = "requests-cache" -version = "1.3.1" +version = "1.3.2" description = "A persistent cache for python requests" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "requests_cache-1.3.1-py3-none-any.whl", hash = "sha256:43a67448c3b2964c631ac7027b84607f2f63438e28104b68ad2211f32d9f606c"}, - {file = "requests_cache-1.3.1.tar.gz", hash = "sha256:784e9d07f72db4fe234830a065230c59eb446489528f271ba288c640897e47c4"}, + {file = "requests_cache-1.3.2-py3-none-any.whl", hash = "sha256:c52666c76b08daa94d05a99327dd24afc46f405abc044e8c2267b540f90673d0"}, + {file = "requests_cache-1.3.2.tar.gz", hash = "sha256:bdc3680931f98a1dea509d339ea6b45cea526945b47b250ce63ffd2744ee0b14"}, ] [package.dependencies] @@ -2581,7 +2449,7 @@ url-normalize = ">=2.0" urllib3 = ">=1.25.5" [package.extras] -all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "orjson (>=3.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] +all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "orjson (>=3.0) ; python_version < \"3.14\"", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"] mongodb = ["pymongo (>=3)"] redis = ["redis (>=3)"] @@ -2610,14 +2478,14 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "rich-click" -version = "1.9.7" +version = "1.9.8" description = "Format click help output nicely with rich" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "rich_click-1.9.7-py3-none-any.whl", hash = "sha256:2f99120fca78f536e07b114d3b60333bc4bb2a0969053b1250869bcdc1b5351b"}, - {file = "rich_click-1.9.7.tar.gz", hash = "sha256:022997c1e30731995bdbc8ec2f82819340d42543237f033a003c7b1f843fc5dc"}, + {file = "rich_click-1.9.8-py3-none-any.whl", hash = "sha256:12873865396e6927835d4eabb1cc3996edcd65b7ac9b2391a29eca4f335a2f93"}, + {file = "rich_click-1.9.8.tar.gz", hash = "sha256:4008f921da88b5d91646c134ec881c1500e5a6b3f093e90e8f29400e09608371"}, ] [package.dependencies] @@ -2627,8 +2495,8 @@ rich = ">=12" typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} [package.extras] -dev = ["inline-snapshot (>=0.24)", "jsonschema (>=4)", "mypy (>=1.14.1)", "nodeenv (>=1.9.1)", "packaging (>=25)", "pre-commit (>=3.5)", "pytest (>=8.3.5)", "pytest-cov (>=5)", "rich-codex (>=1.2.11)", "ruff (>=0.12.4)", "typer (>=0.15)", "types-setuptools (>=75.8.0.20250110)"] -docs = ["markdown-include (>=0.8.1)", "mike (>=2.1.3)", "mkdocs-github-admonitions-plugin (>=0.1.1)", "mkdocs-glightbox (>=0.4)", "mkdocs-include-markdown-plugin (>=7.1.7) ; python_version >= \"3.9\"", "mkdocs-material-extensions (>=1.3.1)", "mkdocs-material[imaging] (>=9.5.18,<9.6.0)", "mkdocs-redirects (>=1.2.2)", "mkdocs-rss-plugin (>=1.15)", "mkdocs[docs] (>=1.6.1)", "mkdocstrings[python] (>=0.26.1)", "rich-codex (>=1.2.11)", "typer (>=0.15)"] +dev = ["inline-snapshot (>=0.24)", "jsonschema (>=4)", "mypy (>=1.14.1)", "nodeenv (>=1.9.1)", "packaging (>=25)", "pre-commit (>=3.5)", "pytest (>=8.3.5)", "pytest-cov (>=5)", "rich-codex (>=1.2.11)", "ruff (>=0.12.4)", "typer (>=0.15,<0.26)", "types-setuptools (>=75.8.0.20250110)"] +docs = ["markdown-include (>=0.8.1)", "mike (>=2.1.3)", "mkdocs-github-admonitions-plugin (>=0.1.1)", "mkdocs-glightbox (>=0.4)", "mkdocs-include-markdown-plugin (>=7.1.7) ; python_version >= \"3.9\"", "mkdocs-material-extensions (>=1.3.1)", "mkdocs-material[imaging] (>=9.5.18,<9.6.0)", "mkdocs-redirects (>=1.2.2)", "mkdocs-rss-plugin (>=1.15)", "mkdocs[docs] (>=1.6.1)", "mkdocstrings[python] (>=0.26.1)", "rich-codex (>=1.2.11)", "typer (>=0.15,<0.26)"] [[package]] name = "roman-numerals" @@ -2666,6 +2534,7 @@ description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.10" groups = ["docs"] +markers = "python_version == \"3.10\"" files = [ {file = "rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288"}, {file = "rpds_py-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4cc2206b76b4f576934f0ed374b10d7ca5f457858b157ca52064bdfc26b9fc00"}, @@ -2784,6 +2653,147 @@ files = [ {file = "rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84"}, ] +[[package]] +name = "rpds-py" +version = "2026.5.1" +description = "Python bindings to Rust's persistent data structures (rpds)" +optional = false +python-versions = ">=3.11" +groups = ["docs"] +markers = "python_version >= \"3.11\"" +files = [ + {file = "rpds_py-2026.5.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3397a5ed7174dc2786bb214030232fc36fe8e5584fec43a9952cc542b1a12036"}, + {file = "rpds_py-2026.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:99ab6ba7bfa2cb0f96a04e3652355bf04e3f51aceb1e943b8541dab7ba4828cc"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0efbe45632665e53e3db8fe1e5692db58fc5cb9bab4459d570b83efefe11164"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:01d17b29c0c23d82b1f4751147ec49cf451f1fc2554eb9ef5f957e55d2656ead"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7559f72b94ae52659086c595dfa017cde03155f7832071d30959049052cb3ece"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e25b7088f9ccbfc0dfcaa52bf969300ca229e10ecf758974ebcbb080a4b37bb"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613fc4ee9eaef26dc5840666214dd6fbcebcf32f46e76f4abc473059f4e13dda"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:85264a90ff4c05c1568dd65f5921c837614b67c60358fb4c17df3b7f2e90690a"}, + {file = "rpds_py-2026.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe71bca7d547acb17027c7fd1624ff8aae623499c498d3e7011182c4de5c25e0"}, + {file = "rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05fa4f41f37ec97c9c260441a940450a192f78d774d2b097eee1379f1e1246a"}, + {file = "rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df1d2a1996755b24b9ecee92cb4d36c28f86f464a6a173349c26bab41e94b8c2"}, + {file = "rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8895840ac4809e5f60c88fd07617cd71326e73d6e5a8aa783c5c0f7c24985de2"}, + {file = "rpds_py-2026.5.1-cp311-cp311-win32.whl", hash = "sha256:3684a59b158a7683aaeb8e25352e9a9dd2122cec78f2d8530266e4f91b4c7b3f"}, + {file = "rpds_py-2026.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:7bd530e6a530bb3ea892f194fafa455f3516ac25ecf7143fd33c09be62b0470a"}, + {file = "rpds_py-2026.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:0a5ae4dbe43c1076983b72616496919872ae7bbe7a1e21cc48336bc3154d130b"}, + {file = "rpds_py-2026.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3abe24a66e57adcfa645d718063a5fa5103ecc71ddbf26d78af8f9368018ff1d"}, + {file = "rpds_py-2026.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58b1d94308ddf0b1982f61f2eb54bf92997c9ece8a8093ef014250f4a517906c"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa92420128dadce7f54bd73ba1825a273e9268fe9e35dbf7e6362890efa4e08"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca653c6546386227cd9800d1bef6a348099acf8db4250341da6d90f663d6dfcb"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66c93681c4729e4e3ecba31b8179fae083ff3118841672835140338b4b9867c1"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40ff257542e04796880e011e15cd4dc21c2599975df2aaa8f2c8495ca574e1a5"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6825cc329b290e93c5f6a9be2393118a763f6ccf6abd83704e0c102ca583644"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:de42116e69cb53b911cc34aee5ab98f36c597b822545045d49e938818b99e5e4"}, + {file = "rpds_py-2026.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0f920015df2a504bebaba6d4c31ccf3fcf942f92655c086da30b671aad19aa6"}, + {file = "rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0408a24e44feb919423dc6d9da677cb5cddb894d2ca9e763967d156d9c60fab4"}, + {file = "rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cea68bcd53467561ae2f96a6bdad1544299ba97b5b0ddcd5ac3d376e5c781c24"}, + {file = "rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4be8b1d2a705cc37d08256004e1d07de143fa0075c8e85a3df020b776f62b732"}, + {file = "rpds_py-2026.5.1-cp312-cp312-win32.whl", hash = "sha256:6736718bd4fc49cbcb538ba30516fdbef161522acefb739657d48b97bd864fed"}, + {file = "rpds_py-2026.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:0a7d1eec967df0e9b22614a5e177622e0c89611d03727fa0cb48e45028907870"}, + {file = "rpds_py-2026.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:1841d067089e117142d79b98aa0df2f08b52f2ecc1819dd2700636c0db74a473"}, + {file = "rpds_py-2026.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:efef4ac29c6ff495531eb17ee705b62841ecaa291b7c7077e848ea03e237164d"}, + {file = "rpds_py-2026.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c39f5b67a8a2e67179ada2a954227d670fe65fa9098457f698f56ddf248709b3"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5c30f3f04eef4fbd362226a6f31d7c8895ca4fbb6e0b790f6890a98d8da8559"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:277f6c82f0580848796c7ecc8a7173aa3bfb928e4ff831261c2f60a81dc270db"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63c2c4c213f1a4e3f3de28ecab029dbdee976324e729c0d7a55211be72576b02"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3350ec808fb538fe71a1f94dfaa0e29c598dfad805ce49f0caec5ae3183c652b"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b964e3ab599e718dc46c018d104b1ebc007cbc6567d827c94a687fca56d77e"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:19cb09fab7b7fc96b2a6e28f2e34b72a3705ff27b37edb77455316e5d3f3dc9b"}, + {file = "rpds_py-2026.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abe76bcdba31e576cb83eeb8797aa0d882b738fef6dc65d0601fc753806a5b46"}, + {file = "rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bff7073db3899158fff55ebf57b113a67030af26f80a18978f9f0aa60250ddf"}, + {file = "rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8ba264fa49be666cd9cc56bf34ec7002fb3d27a4aee5bcb4d43d0d18feb1bb6f"}, + {file = "rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4860b603ddda0475a8885499b3729e90229d480105b42651962a5397d995fa89"}, + {file = "rpds_py-2026.5.1-cp313-cp313-win32.whl", hash = "sha256:7944270ae71383f6e2657dd7d5ce4eeb4ac2d0059a6738f0510583d462ab4842"}, + {file = "rpds_py-2026.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:88647f43a73c4e01be19b04ceef0c8d3a1958153604d13c773becd8016f2a0cf"}, + {file = "rpds_py-2026.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:453895624ecf7db7063b1004e44037522bbaef9ff6a945e59bc71662d7a03abd"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:b4e4bc98639ec915f512fde3aa7a95e0041d95d9c3cc86eea841fa63cb1e8600"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cacedb7a6e167680acba45ad5716e89067d225dc80da0d7040cae8c81d4572fa"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68700371c5d7ae1412862ddfa719090925c93ecf351c566d66f09d04b136ea00"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:296c799becfa849c779c8725494fe9ed94959ed886787df4364b058465bad7f0"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3858b908218ee108d0bbfb2095ccc237648053c9bf98affad7cb079acaf1d97"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4fb8d2e7cb2f850b169806d61d1b991738acec96500a75c30f49caf064ce7cef"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b74c10ed6a8f190f4287f53bcfea348b92a84a9c9f70d30183d1e6172d580d"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:b9a6528956191c48c52294a592dbd4a8386d7048bdb25c0efcb6b966466c6d83"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af03e34e860047bc7a352b842856fcf78798fbb81132cc98bd2f907ab4eb9cd2"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fea6e836d10abbe191d557d33bd58bd5987725fe63aa1eefe557d230209855bd"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:fc0c0f878ea770a0a8a462456c5ad36fc9fe6358e6b76fdadc7f17575e0b8bf1"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e0b360f316d966b048b085857630b3cc51f3db2f07b06f440eac8f695374d1e3"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-win32.whl", hash = "sha256:a2999883eedf72fdfb7520b92c7d4ec2572a71ff40239377aa604cc529eecafc"}, + {file = "rpds_py-2026.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e07be2a9d7122bd6e82dea89814ef8dc893feb1aae97fec1630f3263bbb30e55"}, + {file = "rpds_py-2026.5.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:1f2c391c3059798093b65df23aca2cac150460ae9c630d99dec83d703d9485b9"}, + {file = "rpds_py-2026.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:413b424f7c4ee65ab5e5be91f5731be0f8b41a1ee2b12dfe810d716312e95a78"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c595a1d9255dce0599e13130d1440ab2506654f2b50294226ee06402f8fef63"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c27c5f6102eac8c03e7595a00827a53b271ba40a53b59ff8709170e0855ea4a"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c7fcf61d44cacecaf3aea542b0e053db77972a4573e7ceda16fb2b399161195"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c817a189d4ee14290420e5ff051e4dd6baa13f3edf84685071dee07a6d538ee"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21846aac0ed2e0589f38c12dc44e77bb64e494b771eadbcf169cba00566ba7ba"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b317c87a13f769a4e787819bd508aaa5d69aa09b0880de9af6d3a8a54571cdec"}, + {file = "rpds_py-2026.5.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce87129d9f2c14fa6c4a8601fb80eb4488c80d38a20cd13758ef11123e14995d"}, + {file = "rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9cdddb6c1207d284d94fd1530adf57fbd797fe7c4b8704ba85f49414f2557e7d"}, + {file = "rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4e237e139f94d3c036fd28eb9f564c99055476ff4ff05cd42be55ce349b5aa02"}, + {file = "rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ed0954b524873214369184a9c82b0eaa45a3fbb9a798cd95b17e0d98499e7ea0"}, + {file = "rpds_py-2026.5.1-cp314-cp314-win32.whl", hash = "sha256:2d88621d6a7d4dfa633d21abe90f280bb205274e16b1d1e61c6ad4640b2453b7"}, + {file = "rpds_py-2026.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:cef8ac28d26f4dda3533060c20fbf80a325458fa9fd23ea72a73cdfa8e978838"}, + {file = "rpds_py-2026.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:eaaea962c68cdc68d4a533ba985ab8e9484277910bbfaa2ab3ef7732667bfed8"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:21942f52dbbd5f8758bf021213d28bd45c39e873e65e2407faf5f1846f5761ad"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f414556f6e3958300ff941e40c9f97e3dc9774ddd1b3434c475d73dd354bbed3"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1013a8625c74043210190b246f5b1551e09757c1f356c6e4160ef96c5bc081"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cc68e231a77a5f0d774ae278a1f8e55c0456501820847c1e4efb3829f3441df6"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9baffb505aff33acc69b422a19f77806680f3c8632227d79f48de8a810d1c2c5"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8d2f912928d426e8cfa396f7f3f8d29a59e6689c86dcca3c420730c1096322b"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f628283be835db980c941767d41c9a27b5239e54ba0a9c1335247e82406964"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:1ebb2f0ab7e16132995a72de805170e0203df0c3dd22e1ef1cd1fdd90bd7a131"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f3df3d16ded76f1f8c9cdebd0e1ea55fdf4c23b812de189814da7cf229c22a81"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9af8905b8f854990e40d5206aa5ac58d9b0fe0b7f351ff2bb086c20f6c8c6a47"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:036a36a87fb1cd3b214d11c4b3c4f7d2ddad933625dca1c900b56a057c07740a"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ae3853454fe9ef283a03c96c2d835d39e84b14643a9d62c82ef0fb87d702ca"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-win32.whl", hash = "sha256:6c3d771a46ec18b12af06ce36243a9a80b07a5d0515236332d90863ca8bb326a"}, + {file = "rpds_py-2026.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c93c629be4636cf54337bd5f06c104d55e42ced54d681f6fe21ae510a65116f6"}, + {file = "rpds_py-2026.5.1-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:3574b55c604b8f75dacb007136508bbc0db406e626301778096a133327e7f2fb"}, + {file = "rpds_py-2026.5.1-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:94068eb3ae6d43f5a786b7db96a406a34e6d5c24489feef32fd6e8946ea7b291"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a5b10e8ce894825f380a8f1b6444cf73c294dfea62afbb2d13e3a9e630cec1"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc09f82e63d4bcd58149572f857a431bae851dc747e313c3b5bdf7abb907fda8"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e10464d17df3b582745c25cec695cb9558bca2cb6ddb631aee1787fc72c767b2"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba05adbf15d994c38ec0b7ab32e858e5110c21e9009a00a86545fd220f84e038"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77c004fdc7b891967106f78ddfd7b076bfe6813c6139c6fff6aed3bcaa960b26"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:83bcf894486c9d78dd290d3c0124ff6dd8875d3025e2090a8ec49fcc37c55fdd"}, + {file = "rpds_py-2026.5.1-cp315-cp315-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3df104083952a0e0c6f10de33e440eabe98fb6317d23e1a58c68f6df08d01b9"}, + {file = "rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:980450826cf22e133c57e0835070bdd0dd3f73b9b708c3ce223def2cb9469e14"}, + {file = "rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_i686.whl", hash = "sha256:205dde846f24332ab0c1188699a043b8d165b79bb84529ce272c45048ff6be01"}, + {file = "rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:3966b82dd563176396df030f3dd52a6e54cb69b718e95e78bd555ed3d1e0185d"}, + {file = "rpds_py-2026.5.1-cp315-cp315-win32.whl", hash = "sha256:7818f8d0a415be74d2be3590b0a1c1f463a642f4d0217e7d10602dceef5b79aa"}, + {file = "rpds_py-2026.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:b3cc20c0d800af78fd0fac68086e28c1856cec51ea528bb81ea851aa40d39325"}, + {file = "rpds_py-2026.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:3609e9939a8a76cd904cf98a3f1f13b5dc7e150adeaee89e0ea09652ea213e16"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-macosx_10_12_x86_64.whl", hash = "sha256:5d333a7127d4b307601ac37792bee01bb95c867cbfacf21b6375b804d6bbd723"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-macosx_11_0_arm64.whl", hash = "sha256:b5f077b44a4f7808520f66dae234988d867deb9aed9be5da057ce9ba831b2a41"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d8f9b7b78c9538fc9e04e82ec0e888ff0c3cffcfad152c77e57cd09351a98a"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e3a8ae58895ac107ed934a6bf51e5846f95c53b9b940c2c6d310838fd5846358"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0957cf3c2b8632ec7aaebffebea8005b353cc2a237b6e2ae3c2cac0820704cfb"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c396c1304de421050b3681ea70f371874b54d41b0151e96109758144c231e30b"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad1bff7f666b9598e573815affd666aac6a13a585dde336f843e33350c7fadc"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_31_riscv64.whl", hash = "sha256:656a042550878f12d45752452d47094b7cfe5ad1e9d7b87b5a22ad3ae5ff8015"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c4bd4f70294737b5206a3e8e30ccadbf8a60301831c8ea23eec5dbeea1ecfa"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:43bca78665423cabae77146f2fe7ce55272b6c8d55d82cca83effd42c7e13972"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_i686.whl", hash = "sha256:42d0f20e85e549c870749d0e247f0c10d318a45b7e9676d575d2dcb04a1b2e66"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:b1be5c35683684d5331b93600c210e8367c254683d8a6df6bd21bd2da3a334fb"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-win32.whl", hash = "sha256:75808f6c38ce7749bb68cc2770161aae5045e6c6f6781a9782e74b93304399df"}, + {file = "rpds_py-2026.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:90bd6630002a1c7f09e7843dd79f0d24f3d2897cc25a753480917865d14f15b3"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:edf2765d84e42447f112ad877af8fe1db0089aaec5b28e88d6eab45e7fe99cea"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ad3773236e95f7f33991eb125224b7da66f206504d032a253a02da7e134519fb"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a04df86b3f0fade39ec8fd0e0aab089b1da9fbd2b48df778a57ef96f5e7d38df"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6142dbd80c4df62a5d899f0d616d417f84e0bc8d32526c8e5589019d75d028a7"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b35217adefe87f2fe4db7e9766cabe84744bfe9616d9667be18988928c7f2dc"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b95d5e11fc712b752081183a55a244c03cd00570489edd7014d8899f8ceb8162"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141c9498daf2ace9eda35d2b0e376f9ea8b058d84f2aef4f96fccfd449a2f251"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:6f249f8b860a200ad35193af961183ebe9132710484e6f6ce0cf89fd83c63a9a"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4abbf391a70be864920858bf360f4fb380577c9a0f732438a1996726e2c195b"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c74005a7bb87752acf351c93897ec63ad77a07a0da7ecad9c050e32e7286ba34"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:8213afbe8a3a906fb9acb2014423fe3359ee783d0bf90995f70623a3217bfa6c"}, + {file = "rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8c43a8a973270fd173bf48cdf80bbe66312421cba68d40845034f174f2389049"}, + {file = "rpds_py-2026.5.1.tar.gz", hash = "sha256:07b24fea40541e28570e5b795a4a38fbdcd12550c06bd0748005ecc8116ca256"}, +] + [[package]] name = "ruamel-yaml" version = "0.19.1" @@ -2816,26 +2826,26 @@ files = [ [[package]] name = "snowballstemmer" -version = "3.0.1" -description = "This package provides 32 stemmers for 30 languages generated from Snowball algorithms." +version = "3.1.0" +description = "This package provides 36 stemmers for 34 languages generated from Snowball algorithms." optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*" +python-versions = ">=3.3" groups = ["docs"] files = [ - {file = "snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064"}, - {file = "snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895"}, + {file = "snowballstemmer-3.1.0-py3-none-any.whl", hash = "sha256:17e6d1da216aa07db6dad37139ea70cf13c4b2e9a096f6e64a9648fc657d3154"}, + {file = "snowballstemmer-3.1.0.tar.gz", hash = "sha256:fd9e34526b23340cd23ffea6c9f9760974ecc2c2ac9e1d81401443ccdb2a801f"}, ] [[package]] name = "soupsieve" -version = "2.8.3" +version = "2.8.4" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.9" groups = ["docs"] files = [ - {file = "soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95"}, - {file = "soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349"}, + {file = "soupsieve-2.8.4-py3-none-any.whl", hash = "sha256:e7e6b0769c8f51ed59acab6e994b00621096cfb1c640a7509295987388fbaf65"}, + {file = "soupsieve-2.8.4.tar.gz", hash = "sha256:e121fd02e975c695e4e9e8774a5ee35d74714b59307868dcc5319ad2d9e3328e"}, ] [[package]] @@ -3273,21 +3283,21 @@ widechars = ["wcwidth"] [[package]] name = "tinycss2" -version = "1.4.0" +version = "1.5.1" description = "A tiny CSS parser" optional = false -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["docs"] files = [ - {file = "tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289"}, - {file = "tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7"}, + {file = "tinycss2-1.5.1-py3-none-any.whl", hash = "sha256:3415ba0f5839c062696996998176c4a3751d18b7edaaeeb658c9ce21ec150661"}, + {file = "tinycss2-1.5.1.tar.gz", hash = "sha256:d339d2b616ba90ccce58da8495a78f46e55d4d25f9fd71dfd526f07e7d53f957"}, ] [package.dependencies] webencodings = ">=0.4" [package.extras] -doc = ["sphinx", "sphinx_rtd_theme"] +doc = ["furo", "sphinx"] test = ["pytest", "ruff"] [[package]] @@ -3362,51 +3372,51 @@ files = [ [[package]] name = "tomlkit" -version = "0.14.0" +version = "0.15.0" description = "Style preserving TOML library" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680"}, - {file = "tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064"}, + {file = "tomlkit-0.15.0-py3-none-any.whl", hash = "sha256:4dbc8f0fc024412b57ced8757ac7461305126a648ff8c2c807fcb8e133a78738"}, + {file = "tomlkit-0.15.0.tar.gz", hash = "sha256:7d1a9ecba3086638211b13814ea79c90dd54dd11993564376f3aa92271f5c7a3"}, ] [[package]] name = "tornado" -version = "6.5.5" +version = "6.5.6" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." optional = false python-versions = ">=3.9" groups = ["dev", "docs"] files = [ - {file = "tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa"}, - {file = "tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521"}, - {file = "tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5"}, - {file = "tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07"}, - {file = "tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e"}, - {file = "tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca"}, - {file = "tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7"}, - {file = "tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b"}, - {file = "tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6"}, - {file = "tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9"}, + {file = "tornado-6.5.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:65fcfaafb079435c2c19dc9e07c0f1cf0fa9051759ed0a7d0a3ba7ea7f64919c"}, + {file = "tornado-6.5.6-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:38bc01b4acacded2de63ae78023548e41ebe6fbed3ec05a796d7ae3ad893887e"}, + {file = "tornado-6.5.6-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b942e6a137fda31ff54bf8e6e2c8d1c37f1f50583f3ed53fb840b53b9601d104"}, + {file = "tornado-6.5.6-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8666946e70171b8c3f1fc9b7876fac492e84822c4c7f3746f4e8f8bc9ac92a79"}, + {file = "tornado-6.5.6-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1c34cfab7ad6d104f052f55de06d39bbafc5885cfeb4da688803308dbcfa90b7"}, + {file = "tornado-6.5.6-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:385f35e4e22fb52551dfcda4cdc8c30c61c2c001aef5ddad99cdfe116952efd3"}, + {file = "tornado-6.5.6-cp39-abi3-win32.whl", hash = "sha256:db475f1b67b2809b10bb16264829087724ca8d24fe4ed47f7b8675cae453ef86"}, + {file = "tornado-6.5.6-cp39-abi3-win_amd64.whl", hash = "sha256:6739bf1e8eb09230f1280ddbd3236f0309db70f2c551a8dbc40f62babdf82f79"}, + {file = "tornado-6.5.6-cp39-abi3-win_arm64.whl", hash = "sha256:2543597b24a695d72338a9a77818362d72387c03ae173f1f169eadc5c91466ac"}, + {file = "tornado-6.5.6.tar.gz", hash = "sha256:9a365179fe8ff6b8766f602c0f67c185d778193e9bdd828b19f0b6ed7764177d"}, ] [[package]] name = "traitlets" -version = "5.14.3" +version = "5.15.0" description = "Traitlets Python configuration system" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["dev", "docs"] files = [ - {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, - {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, + {file = "traitlets-5.15.0-py3-none-any.whl", hash = "sha256:fb36a18867a6803deab09f3c5e0fa81bb7b26a5c9e82501c9933f759166eff40"}, + {file = "traitlets-5.15.0.tar.gz", hash = "sha256:4fead733f81cf1c4c938e06f8ca4633896833c9d89eff878159457f4d4392971"}, ] [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "mypy (>=1.7.0,<1.19) ; platform_python_implementation == \"PyPy\"", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] [[package]] name = "typing-extensions" @@ -3423,34 +3433,34 @@ markers = {dev = "python_version < \"3.12\"", test = "python_version == \"3.10\" [[package]] name = "typos" -version = "1.45.1" +version = "1.47.0" description = "Source Code Spelling Correction" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "typos-1.45.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f3cd3d7e7e35f971e04974c7b34563dc1efb101841be3a39fec36c51f3d6ca2d"}, - {file = "typos-1.45.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:be6f26c580915e63df107f88bc766f131efe5f7d01d41c7bad83e6f9e5fe42be"}, - {file = "typos-1.45.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cd6a6ccbb1fc4fb8f0d9fee0201642d7a7560bd1661ebbefb9eac2da1ae4a5c"}, - {file = "typos-1.45.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d33c7750a29524dff020a17f356ed079227f36f43ec57f193e9681606a35749b"}, - {file = "typos-1.45.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:745b0584eeead4593858671113fceed3c28b8ca67bdc7a517120127aa509c6a6"}, - {file = "typos-1.45.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e962d414fb92ad31dc4c930fc5d07ac9e4b55fdd4f42688468040fc5649d92da"}, - {file = "typos-1.45.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f39afdfcc2d159705f3ffb11162e13e8affd994d07836738c8d2a592194604ab"}, - {file = "typos-1.45.1-py3-none-win32.whl", hash = "sha256:212fdbb7b90d40522fe77efb69c15f7063c146812df01d5605e5d7816a3f37d3"}, - {file = "typos-1.45.1-py3-none-win_amd64.whl", hash = "sha256:67a56bd1f06184f3761883f4f75dd3cc196f939180de595d0980164d4a19d363"}, - {file = "typos-1.45.1.tar.gz", hash = "sha256:a1ac7ab02e74d4c4a2f8525b1529e1ce6261051df3229701836175fb91bb0583"}, + {file = "typos-1.47.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c4104785d237fd2256023ba4339e404ed2db58888af703eb0726a1441a8e85d7"}, + {file = "typos-1.47.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:cb95b6fde16fe5bab11788bc14d3d9ec49dcbeec9517378e2fca9e283e6b7822"}, + {file = "typos-1.47.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:653eba984d2cc55eb47d50771761bb7e0d6e52771c2489fd76b1f86fbed49a2a"}, + {file = "typos-1.47.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be447ce8400154e4ae515cc9ecef99532cee6b29271ba3adbe3487304cd2c3c2"}, + {file = "typos-1.47.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9856752e08face132c7d08de875567675f2c54e3e04096d6ebad09c6430e16f0"}, + {file = "typos-1.47.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:438b8579e440ff88baf51cb577b2eb4514d065509ba41a10981e1ea9048a519b"}, + {file = "typos-1.47.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:19cfe53c33ac7d0c5f029fb97939b082139b63fbbb88350ab7e6df28be31629e"}, + {file = "typos-1.47.0-py3-none-win32.whl", hash = "sha256:2de87acff0b6857ce693059a6291227eec999284e16a87162178c847236bafca"}, + {file = "typos-1.47.0-py3-none-win_amd64.whl", hash = "sha256:05c1547e3dbbb6fe8a861b56cb98e9922cd5d20170ee2e7e649faa1605dfdb49"}, + {file = "typos-1.47.0.tar.gz", hash = "sha256:f00d98b8338abd6016f968fb7a3911c911010c17c333c2e102e8893b1c97db8f"}, ] [[package]] name = "url-normalize" -version = "2.2.1" +version = "3.0.0" description = "URL normalization for Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.10" groups = ["main"] files = [ - {file = "url_normalize-2.2.1-py3-none-any.whl", hash = "sha256:3deb687587dc91f7b25c9ae5162ffc0f057ae85d22b1e15cf5698311247f567b"}, - {file = "url_normalize-2.2.1.tar.gz", hash = "sha256:74a540a3b6eba1d95bdc610c24f2c0141639f3ba903501e61a52a8730247ff37"}, + {file = "url_normalize-3.0.0-py3-none-any.whl", hash = "sha256:95234bd359f86831c1fd87c248877f2a6887db2f3b5087120083f2fffcba4889"}, + {file = "url_normalize-3.0.0.tar.gz", hash = "sha256:0552cbf2831a32a28994a13d29bca58a60e10ff6c0380e343ec6d1c2a0d232d8"}, ] [package.dependencies] @@ -3461,14 +3471,14 @@ dev = ["mypy", "pre-commit", "pytest", "pytest-cov", "pytest-socket", "ruff"] [[package]] name = "urllib3" -version = "2.6.3" +version = "2.7.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main", "docs"] files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897"}, + {file = "urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c"}, ] [package.extras] @@ -3479,14 +3489,14 @@ zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] [[package]] name = "wcwidth" -version = "0.6.0" +version = "0.7.0" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad"}, - {file = "wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159"}, + {file = "wcwidth-0.7.0-py3-none-any.whl", hash = "sha256:5d69154c429a82910e241c738cd0e2976fac8a2dd47a1a805f4afed1c0f136f2"}, + {file = "wcwidth-0.7.0.tar.gz", hash = "sha256:90e3a7ea092341c44b99562e75d09e4d5160fe7a3974c6fb842a101a95e7eed0"}, ] [[package]] @@ -3503,24 +3513,24 @@ files = [ [[package]] name = "zipp" -version = "3.23.1" +version = "4.1.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "python_version < \"3.12\"" files = [ - {file = "zipp-3.23.1-py3-none-any.whl", hash = "sha256:0b3596c50a5c700c9cb40ba8d86d9f2cc4807e9bedb06bcdf7fac85633e444dc"}, - {file = "zipp-3.23.1.tar.gz", hash = "sha256:32120e378d32cd9714ad503c1d024619063ec28aad2248dc6672ad13edfa5110"}, + {file = "zipp-4.1.0-py3-none-any.whl", hash = "sha256:25ad4e16390cd314347dd8f1de67a2ac538ae658ed4ab9db16029c07c188e97f"}, + {file = "zipp-4.1.0.tar.gz", hash = "sha256:4cb57381f544315db7688e976e922a2b18cdb513d21cc194eb42232ba2a3e602"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] +check = ["pytest-checkdocs (>=2.14)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] +enabler = ["pytest-enabler (>=3.4)"] test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] -type = ["pytest-mypy"] +type = ["pytest-mypy (>=1.0.1) ; platform_python_implementation != \"PyPy\""] [metadata] lock-version = "2.1" diff --git a/pyproject.toml b/pyproject.toml index 794420b08..277814026 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "roc-validator" -version = "0.9.0" +version = "0.10.0" description = "A Python package to validate RO-Crates" authors = [ "Marco Enrico Piras ", @@ -115,5 +115,8 @@ filterwarnings = [ "ignore::DeprecationWarning:rdflib.plugins.parsers.jsonld", ] +[tool.typos.default] +extend-ignore-re = ["[0-9a-f]{7,40}"] # Ignore long hexadecimal strings, which are often used as identifiers (e.g., Git commit hashes, UUIDs) and are not typically misspelled words. + [tool.typos.files] extend-exclude = ["tests/data", "docs/diagrams", "*.json", "*.html", "*__init__.py"] diff --git a/rocrate_validator/cli/__init__.py b/rocrate_validator/cli/__init__.py index 22283ba1b..ec84cfba4 100644 --- a/rocrate_validator/cli/__init__.py +++ b/rocrate_validator/cli/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate_validator.cli.commands import profiles, validate +from rocrate_validator.cli.commands import cache, profiles, validate from rocrate_validator.cli.main import cli -__all__ = ["cli", "profiles", "validate"] +__all__ = ["cli", "cache", "profiles", "validate"] diff --git a/rocrate_validator/cli/commands/cache.py b/rocrate_validator/cli/commands/cache.py new file mode 100644 index 000000000..83ac8035a --- /dev/null +++ b/rocrate_validator/cli/commands/cache.py @@ -0,0 +1,573 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +``rocrate-validator cache`` subcommand: inspect, warm and reset the HTTP cache +used by the validator. +""" + +from __future__ import annotations + +import copy as _copy +import json +from datetime import datetime +from pathlib import Path +from typing import List, Optional + +from rich.table import Table + +from rocrate_validator.cli.commands.errors import handle_error +from rocrate_validator.cli.main import cli, click +from rocrate_validator.models import Profile +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.cache_warmup import WarmUpResult, discover_cacheable_urls_from_profiles, warm_up_urls +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.paths import get_default_http_cache_path, get_profiles_path + +logger = logging.getLogger(__name__) + + +def _resolve_cache_path(cache_path: Optional[Path]) -> Path: + """Return the effective cache path, creating the parent directory.""" + if cache_path is None: + path = get_default_http_cache_path() + else: + path = Path(cache_path) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def _reset_requester(cache_path: Path, offline: bool = False) -> None: + """Re-initialize the HttpRequester singleton with the given cache path.""" + HttpRequester.reset() + HttpRequester.initialize_cache( + cache_path=str(cache_path), + cache_max_age=-1, + offline=offline, + ) + + +@cli.group("cache") +@click.pass_context +def cache(ctx): + """ + [magenta]rocrate-validator:[/magenta] Manage the HTTP cache + """ + + +@cache.command("info") +@click.option( + "--cache-path", + type=click.Path(), + default=None, + show_default=False, + help="Path to the HTTP cache directory (defaults to the user cache dir)", +) +@click.pass_context +def cache_info(ctx, cache_path: Optional[Path] = None): + """ + Display information about the HTTP cache. + """ + console = ctx.obj['console'] + try: + resolved = _resolve_cache_path(cache_path) + _reset_requester(resolved) + info = HttpRequester().cache_info() + table = Table(title="HTTP Cache", show_lines=False) + table.add_column("Property", style="bold") + table.add_column("Value") + table.add_row("Path", str(info.get("path") or resolved)) + table.add_row("Backend", str(info.get("backend") or "โ€”")) + table.add_row("Persistent", "yes" if info.get("permanent") else "no") + table.add_row("Offline mode", "yes" if info.get("offline") else "no") + table.add_row("Entries", str(info.get("entries", 0))) + size = info.get("size_bytes", 0) or 0 + table.add_row("Size", _format_bytes(size)) + console.print(table) + except Exception as e: + handle_error(e, console) + + +@cache.command("list") +@click.option( + "--cache-path", + type=click.Path(), + default=None, + show_default=False, + help="Path to the HTTP cache directory (defaults to the user cache dir)", +) +@click.option( + "--url", + "url_filter", + type=click.STRING, + default=None, + metavar="SUBSTRING", + help="Show only entries whose URL contains SUBSTRING (case-insensitive)", +) +@click.option( + "--sort", + "sort_by", + type=click.Choice(["url", "size", "created"], case_sensitive=False), + default="created", + show_default=True, + help="Field to sort entries by", +) +@click.option( + "--order", + "sort_order", + type=click.Choice(["asc", "desc"], case_sensitive=False), + default=None, + show_default=False, + help="Sort direction (default: desc for size/created, asc for url)", +) +@click.option( + "--json", + "as_json", + is_flag=True, + default=False, + help="Print entries as JSON (size in bytes, datetimes as ISO 8601)", +) +@click.pass_context +def cache_list( + ctx, + cache_path: Optional[Path] = None, + url_filter: Optional[str] = None, + sort_by: str = "created", + sort_order: Optional[str] = None, + as_json: bool = False, +): + """ + List entries currently stored in the HTTP cache (alias: `ls`). + """ + console = ctx.obj['console'] + try: + resolved = _resolve_cache_path(cache_path) + _reset_requester(resolved) + entries = _collect_cache_entries( + url_filter=url_filter, + sort_by=sort_by.lower(), + sort_order=sort_order.lower() if sort_order else None, + ) + + if as_json: + click.echo(json.dumps([_entry_to_dict(e) for e in entries], indent=2)) + return + + if not entries: + if url_filter: + console.print(f"[yellow]No entries match URL filter:[/yellow] {url_filter}") + else: + console.print("[yellow]Cache is empty.[/yellow]") + return + + table = Table(title=f"HTTP Cache entries ({len(entries)})", show_lines=False) + table.add_column("URL", overflow="fold") + table.add_column("Size", justify="right") + table.add_column("Content-Type") + table.add_column("Created") + table.add_column("Expires") + total = 0 + for e in entries: + total += e["size"] + table.add_row( + e["url"], + _format_bytes(e["size"]), + e["content_type"] or "โ€”", + _format_dt(e["created_at"]), + _format_expires(e["expires"], e["is_expired"]), + ) + console.print(table) + console.print(f"[bold]Total:[/bold] {len(entries)} entries, {_format_bytes(total)}") + except Exception as e: + handle_error(e, console) + + +@cache.command("reset") +@click.option( + "--cache-path", + type=click.Path(), + default=None, + show_default=False, + help="Path to the HTTP cache directory (defaults to the user cache dir)", +) +@click.option( + "-y", + "--yes", + is_flag=True, + default=False, + help="Do not prompt for confirmation before removing cache entries", +) +@click.pass_context +def cache_reset(ctx, cache_path: Optional[Path] = None, yes: bool = False): + """ + Remove every entry from the HTTP cache. + """ + console = ctx.obj['console'] + interactive = ctx.obj.get('interactive', False) + exit_code = 0 + try: + resolved = _resolve_cache_path(cache_path) + _reset_requester(resolved) + info = HttpRequester().cache_info() + entries = info.get("entries", 0) + size = _format_bytes(info.get("size_bytes", 0) or 0) + console.print( + f"[bold]HTTP cache:[/bold] {info.get('path') or resolved} " + f"([cyan]{entries}[/cyan] entries, {size})" + ) + if entries == 0: + console.print("[green]Cache is already empty.[/green]") + return + if not yes: + if not interactive: + console.print( + "[yellow]Use --yes to remove entries in non-interactive mode.[/yellow]" + ) + exit_code = 1 + else: + confirm = click.confirm( + f"Remove all {entries} cached entries?", default=False + ) + if not confirm: + console.print("Aborted.") + else: + HttpRequester().clear_cache() + console.print("[green]HTTP cache cleared.[/green]") + else: + HttpRequester().clear_cache() + console.print("[green]HTTP cache cleared.[/green]") + except Exception as e: + handle_error(e, console) + return + if exit_code: + ctx.exit(exit_code) + + +@cache.command("warm") +@click.option( + "--cache-path", + type=click.Path(), + default=None, + show_default=False, + help="Path to the HTTP cache directory (defaults to the user cache dir)", +) +@click.option( + "--profiles-path", + type=click.Path(exists=True), + default=None, + show_default=False, + help="Path containing the profile definitions", +) +@click.option( + "--extra-profiles-path", + type=click.Path(exists=True), + default=None, + show_default=False, + help="Path containing additional user profile definitions", +) +@click.option( + "-p", + "--profile-identifier", + multiple=True, + type=click.STRING, + default=None, + show_default=False, + metavar="Profile-ID", + help="Identifier of a profile to warm (may be given multiple times)", +) +@click.option( + "--all-profiles", + is_flag=True, + default=False, + help="Warm cacheable URLs declared by every installed profile", +) +@click.option( + "--crate", + multiple=True, + type=click.STRING, + default=None, + metavar="URI", + help="URL of a remote RO-Crate to download and cache (may be given multiple times)", +) +@click.option( + "-u", + "--url", + multiple=True, + type=click.STRING, + default=None, + metavar="URL", + help="Arbitrary URL to fetch and cache (may be given multiple times)", +) +@click.pass_context +def cache_warm( + ctx, + cache_path: Optional[Path] = None, + profiles_path: Optional[Path] = None, + extra_profiles_path: Optional[Path] = None, + profile_identifier: Optional[List[str]] = None, + all_profiles: bool = False, + crate: Optional[List[str]] = None, + url: Optional[List[str]] = None, +): + """ + Pre-populate the HTTP cache with resources declared by profiles and with + optional remote RO-Crate URLs. + """ + console = ctx.obj['console'] + explicit_urls = list(url or []) + invalid_urls = [u for u in explicit_urls if not u.lower().startswith(("http://", "https://"))] + if invalid_urls: + raise click.BadParameter( + f"expected an http(s):// address; got: {', '.join(invalid_urls)}", + param_hint="'--url' / '-u'", + ) + + exit_with_failure = False + try: + resolved_cache = _resolve_cache_path(cache_path) + _reset_requester(resolved_cache, offline=False) + profiles_dir = Path(profiles_path) if profiles_path else get_profiles_path() + extra_dir = Path(extra_profiles_path) if extra_profiles_path else None + + requested_ids = list(profile_identifier or []) + urls: List[str] = [] + profile_scope: Optional[str] = None + + # Only fall back to "warm all profiles" when the user gave no other + # source (no -p, no --crate, no --url, no --all-profiles). + any_explicit_source = bool(crate or explicit_urls or requested_ids or all_profiles) + if all_profiles or requested_ids or not any_explicit_source: + Profile.load_profiles( + profiles_path=profiles_dir, + extra_profiles_path=extra_dir, + ) + loaded_profiles = list(Profile.all()) + if requested_ids: + selected = [] + missing = [] + # (requested, resolved, all candidates) for tokens that matched + # more than one versioned profile โ€” we warn so the user knows + # which one was picked and how to opt for a different version. + ambiguous_fallbacks = [] + for ident in requested_ids: + profile = Profile.get_by_identifier(ident) + if profile is None: + # Mirror the fallback used by `validate`: if no exact + # identifier match, treat the value as a token and + # pick the highest-version profile sharing it. + candidates = Profile.get_by_token(ident) or [] + if candidates: + profile = max(candidates, key=lambda p: p.version) + if len(candidates) > 1: + ambiguous_fallbacks.append((ident, profile, candidates)) + if profile is None: + missing.append(ident) + else: + selected.append(profile) + for requested, resolved, candidates in ambiguous_fallbacks: + other_versions = sorted( + p.identifier for p in candidates if p.identifier != resolved.identifier + ) + console.print( + f"[yellow]Note:[/yellow] '{requested}' matched multiple profiles; " + f"using [cyan]{resolved.identifier}[/cyan] (highest version). " + f"Pass the full identifier to pick a different one " + f"(available: {', '.join(other_versions)})." + ) + if missing: + console.print( + f"[yellow]Profile(s) not found and skipped:[/yellow] {', '.join(missing)}" + ) + profile_scope = f"profiles: {', '.join(p.identifier for p in selected)}" + urls = discover_cacheable_urls_from_profiles(selected) + else: + profile_scope = "all installed profiles" + urls = discover_cacheable_urls_from_profiles(loaded_profiles) + + results: List[WarmUpResult] = [] + if urls: + console.print( + f"[bold]Warming cache for {profile_scope}[/bold] " + f"([cyan]{len(urls)}[/cyan] URL(s))..." + ) + results.extend(warm_up_urls(urls)) + + if crate: + console.print( + f"[bold]Fetching remote RO-Crates[/bold] ([cyan]{len(crate)}[/cyan] URL(s))..." + ) + results.extend(_warm_remote_crates(list(crate))) + + if explicit_urls: + console.print( + f"[bold]Fetching explicit URLs[/bold] ([cyan]{len(explicit_urls)}[/cyan] URL(s))..." + ) + results.extend(warm_up_urls(explicit_urls)) + + if not results: + console.print("[yellow]Nothing to warm up.[/yellow]") + return + + table = Table(title="Warm-up results", show_lines=False) + table.add_column("URL", overflow="fold") + table.add_column("Status") + table.add_column("Detail") + ok = 0 + failed = 0 + for r in results: + colour = {"ok": "green", "skipped": "cyan", "failed": "red"}.get(r.status, "white") + table.add_row(r.url, f"[{colour}]{r.status}[/{colour}]", r.detail or "") + if r.status == "ok": + ok += 1 + elif r.status == "failed": + failed += 1 + console.print(table) + console.print( + f"[bold]Summary:[/bold] {ok} cached, {failed} failed, " + f"{len(results) - ok - failed} skipped" + ) + exit_with_failure = failed > 0 + except Exception as e: + handle_error(e, console) + return + if exit_with_failure: + ctx.exit(1) + + +def _warm_remote_crates(urls: List[str]) -> List[WarmUpResult]: + """ + Download each remote RO-Crate URL via ``HttpRequester.fetch_fresh`` + so that its response is stored in the cache. + """ + requester = HttpRequester() + results: List[WarmUpResult] = [] + for url in urls: + try: + response = requester.fetch_fresh(url, allow_redirects=True) + status = getattr(response, "status_code", None) + if status is None: + results.append(WarmUpResult(url=url, status="failed", detail="no status code")) + continue + if status >= 400: + results.append(WarmUpResult(url=url, status="failed", detail=f"HTTP {status}")) + continue + # Touch the body so the cache backend stores the full response. + _ = response.content + results.append(WarmUpResult(url=url, status="ok", detail=f"HTTP {status}")) + except Exception as e: + logger.debug("Remote crate warm-up failed for %s: %s", url, e) + results.append(WarmUpResult(url=url, status="failed", detail=str(e))) + return results + + +def _format_bytes(size: int) -> str: + if size <= 0: + return "0 B" + units = ["B", "KiB", "MiB", "GiB", "TiB"] + idx = 0 + value = float(size) + while value >= 1024 and idx < len(units) - 1: + value /= 1024 + idx += 1 + return f"{value:.2f} {units[idx]}" + + +def _format_dt(value: Optional[datetime]) -> str: + if value is None: + return "โ€”" + return value.strftime("%Y-%m-%d %H:%M:%SZ") if value.tzinfo else value.strftime("%Y-%m-%d %H:%M:%S") + + +def _format_expires(value: Optional[datetime], is_expired: bool) -> str: + if value is None: + return "never" + formatted = _format_dt(value) + return f"[red]{formatted} (expired)[/red]" if is_expired else formatted + + +_DEFAULT_SORT_ORDER = {"url": "asc", "size": "desc", "created": "desc"} + + +def _collect_cache_entries( + url_filter: Optional[str] = None, + sort_by: str = "size", + sort_order: Optional[str] = None, +) -> List[dict]: + """ + Read every cached response and return a list of plain dicts. Filtering + and sorting happen here so the CLI rendering paths (table / JSON) share + the same data shape. + + ``sort_order`` is one of ``"asc"``/``"desc"`` or ``None`` to use the + field's natural default (URLs sort ascending; size and timestamps sort + descending so the largest/most recent come first). + """ + cache = getattr(HttpRequester().session, "cache", None) + if cache is None: + return [] + needle = url_filter.lower() if url_filter else None + entries: List[dict] = [] + responses = getattr(cache, "responses", None) or {} + for key in list(responses): + try: + resp = responses[key] + except Exception as exc: + logger.debug("Skipping unreadable cache entry %s: %s", key, exc) + continue + url = getattr(resp, "url", "") or "" + if needle and needle not in url.lower(): + continue + entries.append({ + "key": key, + "url": url, + "status": getattr(resp, "status_code", None), + "size": int(getattr(resp, "size", 0) or 0), + "content_type": (getattr(resp, "headers", {}) or {}).get("Content-Type"), + "created_at": getattr(resp, "created_at", None), + "expires": getattr(resp, "expires", None), + "is_expired": bool(getattr(resp, "is_expired", False)), + }) + effective_order = sort_order or _DEFAULT_SORT_ORDER.get(sort_by, "desc") + reverse = effective_order == "desc" + if sort_by == "url": + entries.sort(key=lambda e: e["url"].lower(), reverse=reverse) + elif sort_by == "created": + entries.sort(key=lambda e: e["created_at"] or datetime.min, reverse=reverse) + else: # "size" + entries.sort(key=lambda e: e["size"], reverse=reverse) + return entries + + +def _entry_to_dict(entry: dict) -> dict: + """JSON-safe view of an entry produced by ``_collect_cache_entries``.""" + def _iso(value: Optional[datetime]) -> Optional[str]: + return value.isoformat() if value is not None else None + return { + "url": entry["url"], + "status": entry["status"], + "size_bytes": entry["size"], + "content_type": entry["content_type"], + "created_at": _iso(entry["created_at"]), + "expires": _iso(entry["expires"]), + "is_expired": entry["is_expired"], + } + + +# Shell-style alias: `cache ls` runs the same callback as `cache list`. +# A shallow copy gives the alias its own name and hides it from --help so +# the command appears only once in the listing. +_cache_ls_alias = _copy.copy(cache_list) +_cache_ls_alias.name = "ls" +_cache_ls_alias.hidden = True +cache.add_command(_cache_ls_alias) diff --git a/rocrate_validator/cli/commands/profiles.py b/rocrate_validator/cli/commands/profiles.py index df4a91e67..7fc826b83 100644 --- a/rocrate_validator/cli/commands/profiles.py +++ b/rocrate_validator/cli/commands/profiles.py @@ -12,20 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re import sys from pathlib import Path +from typing import Optional from rich.align import Align from rich.markdown import Markdown from rich.padding import Padding from rich.panel import Panel +from rich.syntax import Syntax from rich.table import Table from rocrate_validator import services from rocrate_validator.cli.commands.errors import handle_error from rocrate_validator.cli.main import cli, click from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER -from rocrate_validator.models import (LevelCollection, RequirementLevel, +from rocrate_validator.models import (LevelCollection, Profile, + RequirementCheck, RequirementLevel, Severity) from rocrate_validator.utils import log as logging from rocrate_validator.utils.io_helpers.colors import get_severity_color @@ -158,11 +162,13 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA '-v', '--verbose', is_flag=True, - help="Show detailed list of requirements", + help="Show detailed list of requirements (or, when a check identifier is given, " + "show the source code of the check)", default=False, show_default=True ) @click.argument("profile-identifier", type=click.STRING, default=DEFAULT_PROFILE_IDENTIFIER, required=True) +@click.argument("check-identifier", type=click.STRING, required=False, default=None) @click.option( '--no-paging', is_flag=True, @@ -174,11 +180,19 @@ def list_profiles(ctx, no_paging: bool = False): # , profiles_path: Path = DEFA @click.pass_context def describe_profile(ctx, profile_identifier: str = DEFAULT_PROFILE_IDENTIFIER, + check_identifier: Optional[str] = None, profiles_path: Path = DEFAULT_PROFILES_PATH, extra_profiles_path: Path = None, verbose: bool = False, no_paging: bool = False): """ - Show a profile + Show a profile, or โ€” when CHECK_IDENTIFIER is given โ€” show a single requirement check. + + \b + The check identifier accepts either form: + * relative: . (e.g. "1.2") + * full: _. (e.g. "ro-crate-1.1_1.2") + + With -v on a single check, the source code of the check is shown. """ # Get the console console = ctx.obj['console'] @@ -197,6 +211,14 @@ def describe_profile(ctx, profile = services.get_profile(profile_identifier, profiles_path=profiles_path, extra_profiles_path=extra_profiles_path) + # Single-check view + if check_identifier: + check = __resolve_check__(profile, check_identifier) + with console.pager(pager=pager, styles=not console.no_color) if enable_pager else console: + console.print(get_app_header_rule()) + __describe_check__(console, profile, check, verbose=verbose) + return + # Set the subheader title subheader_title = f"[bold][cyan]Profile:[/cyan] [magenta italic]{profile.identifier}[/magenta italic][/bold]" @@ -237,6 +259,9 @@ def describe_profile(ctx, title_align="left", border_style="cyan"), (0, 1, 0, 1))) console.print(Padding(table, (1, 1))) + except click.ClickException: + # Let click format usage errors natively (e.g., BadParameter from check resolution) + raise except Exception as e: handle_error(e, console) @@ -313,28 +338,7 @@ def __verbose_describe_profile__(profile): color = get_severity_color(check.severity) level_info = f"[{color}]{check.severity.name}[/{color}]" levels_list.add(level_info) - override = None - # Uncomment the following lines to show the overridden checks - # if check.overridden_by: - # logger.debug("Check %s is overridden by: %s", check.identifier, check.overridden_by) - # override = "[overridden by: " - # for co in check.overridden_by: - # severity_color = get_severity_color(co.severity) - # override += f"[bold][magenta]{co.requirement.profile.identifier}[/magenta] "\ - # f"[{severity_color}]{co.relative_identifier}[/{severity_color}][/bold]" - # if co != check.overridden_by[-1]: - # override += ", " - # override += "]" - if check.overrides: - logger.debug("Check %s overrides: %s", check.identifier, check.overrides) - override = "[" + "overrides: " - for co in check.overrides: - severity_color = get_severity_color(co.severity) - override += f"[bold][magenta]{co.requirement.profile.identifier}[/magenta] " - f"[{severity_color}]{co.relative_identifier}[/{severity_color}][/bold]" - if co != check.overrides[-1]: - override += ", " - override += "]" + override = __format_overrides__(check.overrides, label="overrides") if check.overrides else None description_table = Table(show_header=False, show_footer=False, show_lines=False, show_edge=False) if override: @@ -367,3 +371,159 @@ def __verbose_describe_profile__(profile): for row in table_rows: table.add_row(*row) return table + + +_CHECK_ID_RE = re.compile(r"^(?P\d+)\.(?P\d+)$") + + +def __resolve_check__(profile: Profile, check_identifier: str) -> RequirementCheck: + """ + Resolve a check identifier to a RequirementCheck instance. + Accepts either the relative form ``.`` or the full form + ``_.``. + """ + raw = check_identifier.strip() + relative = raw + prefix = f"{profile.identifier}_" + if "_" in raw: + if not raw.startswith(prefix): + raise click.BadParameter( + f"Check identifier '{raw}' does not belong to profile '{profile.identifier}'.", + param_hint="CHECK_IDENTIFIER", + ) + relative = raw[len(prefix):] + + match = _CHECK_ID_RE.match(relative) + if not match: + raise click.BadParameter( + f"Invalid check identifier '{check_identifier}'. " + f"Expected '.' (e.g. '1.2') or " + f"'_.' (e.g. '{profile.identifier}_1.2').", + param_hint="CHECK_IDENTIFIER", + ) + req_number = int(match.group("req")) + check_number = int(match.group("check")) + + requirement = next( + (r for r in profile.requirements if not r.hidden and r.order_number == req_number), + None, + ) + if requirement is None: + raise click.BadParameter( + f"No requirement #{req_number} in profile '{profile.identifier}'. " + f"Run `rocrate-validator profiles describe {profile.identifier}` to list requirements.", + param_hint="CHECK_IDENTIFIER", + ) + check = next( + (c for c in requirement.get_checks() if c.order_number == check_number), + None, + ) + if check is None: + raise click.BadParameter( + f"No check #{check_number} in requirement #{req_number} of profile " + f"'{profile.identifier}'. Run `rocrate-validator profiles describe " + f"{profile.identifier} -v` to list checks.", + param_hint="CHECK_IDENTIFIER", + ) + return check + + +def __format_overrides__(checks: list, label: str) -> str: + """ + Format an "overrides" / "overridden by" Rich-styled string for a list of checks. + """ + parts = [] + for co in checks: + severity_color = get_severity_color(co.severity) + parts.append( + f"[bold][magenta]{co.requirement.profile.identifier}[/magenta] " + f"[{severity_color}]{co.relative_identifier}[/{severity_color}][/bold]" + ) + return f"[bold red]{label}:[/bold red] " + ", ".join(parts) + + +def __describe_check__(console, profile: Profile, check: RequirementCheck, verbose: bool = False) -> None: + """ + Render a single requirement check. + """ + severity_color = get_severity_color(check.severity) + requirement = check.requirement + + header = ( + f"[bold cyan]Profile:[/bold cyan] " + f"[italic magenta]{profile.identifier}[/italic magenta]\n" + f"[bold cyan]Identifier:[/bold cyan] " + f"[italic green]{check.identifier}[/italic green]\n" + f"[bold cyan]Name:[/bold cyan] [italic]{check.name}[/italic]\n" + f"[bold cyan]Severity:[/bold cyan] " + f"[bold {severity_color}]{check.severity.name}[/bold {severity_color}]\n" + f"[bold cyan]Requirement:[/bold cyan] " + f"[italic]#{requirement.order_number} โ€” {requirement.name}[/italic]" + ) + if requirement.path: + header += ( + "\n[bold cyan]Source file:[/bold cyan] " + f"[italic green]{shorten_path(requirement.path)}[/italic green]" + ) + + title = f"[bold][cyan]Check:[/cyan] [magenta italic]{check.identifier}[/magenta italic][/bold]" + console.print(Padding( + Panel(header, title=title, padding=(1, 1, 1, 1), title_align="left", border_style="cyan"), + (0, 1, 0, 1), + )) + + description_panel = Panel( + Markdown(check.description.strip()), + title="[bold cyan]Description[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ) + console.print(Padding(description_panel, (1, 1, 0, 1))) + + if check.overrides: + overrides_text = __format_overrides__(check.overrides, label="overrides") + console.print(Padding(Panel( + overrides_text, + title="[bold cyan]Overrides[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), (1, 1, 0, 1))) + if check.overridden_by: + overridden_text = __format_overrides__(check.overridden_by, label="overridden by") + console.print(Padding(Panel( + overridden_text, + title="[bold cyan]Overridden by[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), (1, 1, 0, 1))) + + if verbose: + snippet = check.get_source_snippet() + if snippet is None: + console.print(Padding(Panel( + "[italic]Source code not available for this check kind.[/italic]", + title="[bold cyan]Source[/bold cyan]", + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), (1, 1, 0, 1))) + else: + source_title = f"[bold cyan]Source ({snippet.language})[/bold cyan]" + if snippet.source_path: + source_title += f': [italic green]"{snippet.source_path.name}"[/italic green]' + console.print(Padding(Panel( + Syntax( + snippet.code, + snippet.language, + theme="ansi_dark", + line_numbers=False, + word_wrap=True, + ), + title=source_title, + title_align="left", + border_style="bright_black", + padding=(1, 1, 1, 1), + ), (1, 1, 1, 1))) diff --git a/rocrate_validator/cli/commands/validate.py b/rocrate_validator/cli/commands/validate.py index e574e6674..0683780d1 100644 --- a/rocrate_validator/cli/commands/validate.py +++ b/rocrate_validator/cli/commands/validate.py @@ -223,10 +223,22 @@ def validate_uri(ctx, param, value): '-nc', '--no-cache', is_flag=True, - help="Disable the HTTP cache", + help=( + "Disable the HTTP cache entirely: every request goes to the network " + "and nothing is persisted. Incompatible with [bold]--offline[/bold]." + ), + default=False, + show_default=True, +) +@click.option( + '--offline', + is_flag=True, + help=( + "Offline mode: HTTP requests are served only from the cache. " + "Pre-populate the cache with [bold]rocrate-validator cache warm[/bold]." + ), default=False, show_default=True, - hidden=True ) @click.pass_context def validate(ctx, @@ -249,7 +261,8 @@ def validate(ctx, output_line_width: Optional[int] = None, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, cache_path: Optional[Path] = None, - no_cache: bool = False): + no_cache: bool = False, + offline: bool = False): """ [magenta]rocrate-validator:[/magenta] Validate a RO-Crate against a profile """ @@ -277,10 +290,36 @@ def validate(ctx, logger.debug("cache_max_age: %s", cache_max_age) logger.debug("cache_path: %s", os.path.abspath(cache_path) if cache_path else None) logger.debug("no_cache: %s", no_cache) + logger.debug("offline: %s", offline) + + # --no-cache and --offline are contradictory: offline mode requires a cache + # to serve requests from, while no-cache disables caching entirely. + if no_cache and offline: + raise click.UsageError( + "The --no-cache and --offline flags are mutually exclusive: " + "offline mode relies on the HTTP cache to serve resources." + ) if rocrate_uri: logger.debug("rocrate_path: %s", os.path.abspath(rocrate_uri)) + # Warn the user when a remote RO-Crate is about to be validated in offline mode: + # the cached copy (if any) will be used, and it may be out of sync with the remote. + if offline and isinstance(rocrate_uri, str) and rocrate_uri.split(":", 1)[0].lower() in ("http", "https", "ftp"): + console.print( + Padding( + Rule( + "[bold yellow]WARNING:[/bold yellow] " + "[bold]The target RO-Crate is remote and offline mode is enabled.[/bold]\n" + "The cached version of the RO-Crate will be used if available.\n" + "The cached copy may be out of sync with the version currently published remotely.", + align="center", + style="bold yellow", + ), + (1, 2, 0, 2), + ) + ) + # Parse the skip_checks option logger.debug("skip_checks: %s", skip_checks) # Parse the skip_checks option @@ -314,8 +353,13 @@ def validate(ctx, "abort_on_first": fail_fast, "skip_checks": skip_checks_list, "metadata_only": metadata_only, - "cache_max_age": cache_max_age if not no_cache else -1, - "cache_path": cache_path + "cache_max_age": cache_max_age, + "cache_path": cache_path, + "offline": offline, + "no_cache": no_cache, + # When offline is requested, remote crate fetching must use the cache + # instead of the "disable download" short-circuit. + "disable_remote_crate_download": False if offline else True, } # Print the application header diff --git a/rocrate_validator/constants.py b/rocrate_validator/constants.py index 93aad322a..c717219f7 100644 --- a/rocrate_validator/constants.py +++ b/rocrate_validator/constants.py @@ -87,5 +87,11 @@ JSON_OUTPUT_FORMAT_VERSION = "0.2" # Http Cache Settings -DEFAULT_HTTP_CACHE_MAX_AGE = 300 # in seconds +DEFAULT_HTTP_CACHE_MAX_AGE = -1 # in seconds; negative means "never expire" DEFAULT_HTTP_CACHE_PATH_PREFIX = '/tmp/rocrate_validator_cache' +# Directory name used under the user's cache root for the persistent HTTP cache +USER_CACHE_DIR_NAME = "rocrate-validator" +# Filename (without extension) of the persistent HTTP cache under the user cache dir +USER_CACHE_FILE_NAME = "http_cache" +# Environment variable to disable automatic warm-up of the HTTP cache +AUTO_WARM_ENV_VAR = "ROCRATE_VALIDATOR_AUTO_WARM" diff --git a/rocrate_validator/errors.py b/rocrate_validator/errors.py index daf58fe4c..74b1ed794 100644 --- a/rocrate_validator/errors.py +++ b/rocrate_validator/errors.py @@ -12,7 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, Optional, Union + +if TYPE_CHECKING: + # Imported only for type-checking to avoid a circular import: + # rocrate_validator.utils.uri imports this module at runtime. + from rocrate_validator.utils.uri import URI class ROCValidatorError(Exception): @@ -243,29 +251,34 @@ def __repr__(self): class ROCrateInvalidURIError(ROCValidatorError): """Raised when an invalid URI is provided.""" - def __init__(self, uri: str, message: Optional[str] = None): + def __init__(self, uri: Union[str, Path, URI], message: Optional[str] = None): self._uri = uri self._message = message or self.default_error_message(uri) @property - def uri(self) -> Optional[str]: - """The invalid URI.""" + def uri(self) -> Union[str, Path, URI]: + """The invalid URI, as originally provided (str, Path, or URI).""" return self._uri @property - def message(self) -> Optional[str]: + def uri_string(self) -> str: + """The invalid URI normalised to its string form.""" + return str(self._uri) + + @property + def message(self) -> str: """The error message.""" return self._message def __str__(self) -> str: return self._message - def __repr__(self): + def __repr__(self) -> str: return f"ROCrateInvalidURIError({self._uri!r})" @classmethod - def default_error_message(cls, uri: str) -> str: - return f"\"{uri}\" is not a valid RO-Crate URI. "\ + def default_error_message(cls, uri: Union[str, Path, URI]) -> str: + return f"\"{str(uri)}\" is not a valid RO-Crate URI. "\ "It MUST be either a local path to the RO-Crate root directory or a local/remote RO-Crate ZIP file." diff --git a/rocrate_validator/models.py b/rocrate_validator/models.py index ffaed96a7..cb69fe77d 100644 --- a/rocrate_validator/models.py +++ b/rocrate_validator/models.py @@ -16,6 +16,7 @@ import bisect import enum +import importlib import inspect import json import re @@ -25,35 +26,48 @@ from datetime import datetime, timezone from functools import total_ordering from pathlib import Path -from typing import Optional, Protocol, Tuple, Union +from typing import Optional, Protocol, Tuple, Type, Union from urllib.error import HTTPError import enum_tools from rdflib import RDF, RDFS, Graph, Namespace, URIRef from rocrate_validator import __version__ -from rocrate_validator.constants import (DEFAULT_HTTP_CACHE_MAX_AGE, - DEFAULT_ONTOLOGY_FILE, - DEFAULT_PROFILE_IDENTIFIER, - DEFAULT_PROFILE_README_FILE, - IGNORED_PROFILE_DIRECTORIES, - JSON_OUTPUT_FORMAT_VERSION, PROF_NS, - PROFILE_FILE_EXTENSIONS, - PROFILE_SPECIFICATION_FILE, - ROCRATE_METADATA_FILE, SCHEMA_ORG_NS) -from rocrate_validator.errors import (DuplicateRequirementCheck, - InvalidProfilePath, ProfileNotFound, - ProfileSpecificationError, - ProfileSpecificationNotFound, - ROCrateMetadataNotFoundError) +from rocrate_validator.constants import ( + DEFAULT_HTTP_CACHE_MAX_AGE, + DEFAULT_ONTOLOGY_FILE, + DEFAULT_PROFILE_IDENTIFIER, + DEFAULT_PROFILE_README_FILE, + IGNORED_PROFILE_DIRECTORIES, + JSON_OUTPUT_FORMAT_VERSION, + PROF_NS, + PROFILE_FILE_EXTENSIONS, + PROFILE_SPECIFICATION_FILE, + ROCRATE_METADATA_FILE, + SCHEMA_ORG_NS, +) +from rocrate_validator.errors import ( + DuplicateRequirementCheck, + InvalidProfilePath, + ProfileNotFound, + ProfileSpecificationError, + ProfileSpecificationNotFound, + ROCrateMetadataNotFoundError, +) from rocrate_validator.events import Event, EventType, Publisher, Subscriber from rocrate_validator.rocrate import ROCrate from rocrate_validator.utils import log as logging +from rocrate_validator.utils.cache_warmup import auto_warm_up_for_settings from rocrate_validator.utils.collections import MapIndex, MultiIndexMap -from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.utils.paths import get_profiles_path -from rocrate_validator.utils.python_helpers import \ - get_requirement_name_from_file +from rocrate_validator.utils.document_loader import install_document_loader +from rocrate_validator.utils.http import HttpRequester, find_offline_cache_miss +from rocrate_validator.utils.paths import ( + get_default_http_cache_path, + get_profiles_path, +) +from rocrate_validator.utils.python_helpers import ( + get_requirement_name_from_file, +) from rocrate_validator.utils.uri import URI # set the default profiles path @@ -93,13 +107,13 @@ def get(name: str) -> Severity: @total_ordering @dataclass class RequirementLevel: - """ Represents a requirement level. A requirement has a name and a severity level of type :class:`.Severity`. It implements the comparison operators to allow ordering of the requirement levels. """ + name: str severity: Severity @@ -121,7 +135,7 @@ def __hash__(self) -> int: return hash((self.name, self.severity)) def __repr__(self) -> str: - return f'RequirementLevel(name={self.name}, severity={self.severity})' + return f"RequirementLevel(name={self.name}, severity={self.severity})" def __str__(self) -> str: return self.name @@ -148,36 +162,39 @@ class LevelCollection: are to be interpreted as described in **RFC 2119**. """ + #: The requirement level OPTIONAL is mapped to the OPTIONAL severity level - OPTIONAL = RequirementLevel('OPTIONAL', Severity.OPTIONAL) + OPTIONAL = RequirementLevel("OPTIONAL", Severity.OPTIONAL) #: The requirement level MAY is mapped to the OPTIONAL severity level - MAY = RequirementLevel('MAY', Severity.OPTIONAL) + MAY = RequirementLevel("MAY", Severity.OPTIONAL) #: The requirement level REQUIRED is mapped to the REQUIRED severity level - REQUIRED = RequirementLevel('REQUIRED', Severity.REQUIRED) + REQUIRED = RequirementLevel("REQUIRED", Severity.REQUIRED) #: The requirement level SHOULD is mapped to the RECOMMENDED severity level - SHOULD = RequirementLevel('SHOULD', Severity.RECOMMENDED) + SHOULD = RequirementLevel("SHOULD", Severity.RECOMMENDED) #: The requirement level SHOULD NOT is mapped to the RECOMMENDED severity level - SHOULD_NOT = RequirementLevel('SHOULD_NOT', Severity.RECOMMENDED) + SHOULD_NOT = RequirementLevel("SHOULD_NOT", Severity.RECOMMENDED) #: The requirement level RECOMMENDED is mapped to the RECOMMENDED severity level - RECOMMENDED = RequirementLevel('RECOMMENDED', Severity.RECOMMENDED) + RECOMMENDED = RequirementLevel("RECOMMENDED", Severity.RECOMMENDED) #: The requirement level MUST is mapped to the REQUIRED severity level - MUST = RequirementLevel('MUST', Severity.REQUIRED) + MUST = RequirementLevel("MUST", Severity.REQUIRED) #: The requirement level MUST_NOT is mapped to the REQUIRED severity level - MUST_NOT = RequirementLevel('MUST_NOT', Severity.REQUIRED) + MUST_NOT = RequirementLevel("MUST_NOT", Severity.REQUIRED) #: The requirement level SHALL is mapped to the REQUIRED severity level - SHALL = RequirementLevel('SHALL', Severity.REQUIRED) + SHALL = RequirementLevel("SHALL", Severity.REQUIRED) #: The requirement level SHALL_NOT is mapped to the REQUIRED severity level - SHALL_NOT = RequirementLevel('SHALL_NOT', Severity.REQUIRED) + SHALL_NOT = RequirementLevel("SHALL_NOT", Severity.REQUIRED) def __init__(self): raise NotImplementedError(f"{type(self)} can't be instantiated") @staticmethod def all() -> list[RequirementLevel]: - return [level for name, level in inspect.getmembers(LevelCollection) - if not inspect.isroutine(level) - and not inspect.isdatadescriptor(level) and not name.startswith('__')] + return [ + level + for name, level in inspect.getmembers(LevelCollection) + if not inspect.isroutine(level) and not inspect.isdatadescriptor(level) and not name.startswith("__") + ] @staticmethod def get(name: str) -> RequirementLevel: @@ -189,7 +206,6 @@ def get(name: str) -> RequirementLevel: @total_ordering class Profile: - """ RO-Crate Validator profile. @@ -197,19 +213,25 @@ class Profile: """ # store the map of profiles: profile URI -> Profile instance - __profiles_map: MultiIndexMap = \ - MultiIndexMap("uri", indexes=[ - MapIndex("name"), MapIndex("token", unique=False), MapIndex("identifier", unique=True), - MapIndex("token_path", unique=False) - ]) - - def __init__(self, - profiles_base_path: Path, - profile_path: Path, - requirements: Optional[list[Requirement]] = None, - identifier: str = None, - publicID: Optional[str] = None, - severity: Severity = Severity.REQUIRED): + __profiles_map: MultiIndexMap = MultiIndexMap( + "uri", + indexes=[ + MapIndex("name"), + MapIndex("token", unique=False), + MapIndex("identifier", unique=True), + MapIndex("token_path", unique=False), + ], + ) + + def __init__( + self, + profiles_base_path: Path, + profile_path: Path, + requirements: Optional[list[Requirement]] = None, + identifier: str = None, + publicID: Optional[str] = None, + severity: Severity = Severity.REQUIRED, + ): """ Initialize the Profile instance @@ -250,7 +272,7 @@ def __init__(self, self._profile_node = None # init property to store the RDF graph of the profile specification - self._profile_specification_graph = None + self._profile_specification_graph: Optional[Graph] = None # check if the profile specification file exists spec_file = self.profile_specification_file_path @@ -274,11 +296,10 @@ def __init__(self, if existing_profile.path != profile_path: # if the profile already exists, log a warning logger.warning( - "Profile with identifier %s at %s is being overridden " - "by the profile loaded from %s.", + "Profile with identifier %s at %s is being overridden by the profile loaded from %s.", existing_profile.identifier, existing_profile.path, - profile_path + profile_path, ) # add the existing profile as an override self.__add_override__(existing_profile) @@ -286,17 +307,24 @@ def __init__(self, # add the profile to the profiles map self.__profiles_map.add( self._profile_node.toPython(), - self, token=self.token, - name=self.name, identifier=self.identifier, - token_path=self.__extract_token_from_path__() + self, + token=self.token, + name=self.name, + identifier=self.identifier, + token_path=self.__extract_token_from_path__(), ) # add the profile to the profiles map else: raise ProfileSpecificationError( - message=f"Profile specification file {spec_file} must contain exactly one profile") + message=f"Profile specification file {spec_file} must contain exactly one profile" + ) def __get_specification_property__( - self, property: str, namespace: Namespace, - pop_first: bool = True, as_Python_object: bool = True) -> Union[str, list[Union[str, URIRef]]]: + self, + property: str, + namespace: Namespace, + pop_first: bool = True, + as_Python_object: bool = True, + ) -> Union[str, list[Union[str, URIRef]]]: assert self._profile_specification_graph is not None, "Profile specification graph not loaded" values = list(self._profile_specification_graph.objects(self._profile_node, namespace[property])) if values and as_Python_object: @@ -358,6 +386,13 @@ def name(self): """ return self.label or f"Profile {self.uri}" + @property + def profile_specification_graph(self) -> Graph: + """ + The RDF graph of the profile specification. + """ + return self._profile_specification_graph # type: ignore + @property def profile_node(self): return self._profile_node @@ -433,6 +468,14 @@ def siblings(self) -> list[Profile]: """ return self.get_sibling_profiles(self) + @property + def descendants(self) -> list[Profile]: + """ + The list of profiles that are descendants of this profile + (i.e., profiles that have this profile among their inherited profiles). + """ + return self.get_descendants(self) + @property def readme_file_path(self) -> Path: """ @@ -484,23 +527,22 @@ def requirements(self) -> list[Requirement]: The list of requirements of the profile. """ if not self._requirements: - self._requirements = \ - RequirementLoader.load_requirements(self, severity=self.severity) + self._requirements = RequirementLoader.load_requirements(self, severity=self.severity) return self._requirements - def get_requirements( - self, severity: Severity = Severity.REQUIRED, - exact_match: bool = False) -> list[Requirement]: + def get_requirements(self, severity: Severity = Severity.REQUIRED, exact_match: bool = False) -> list[Requirement]: """ Get the requirements of the profile with the given severity level. If the exact_match flag is set to `True`, only the requirements with the exact severity level are returned; otherwise, the requirements with severity level greater than or equal to the given severity level are returned. """ - return [requirement for requirement in self.requirements - if (not exact_match and - (not requirement.severity_from_path or requirement.severity_from_path >= severity)) or - (exact_match and requirement.severity_from_path == severity)] + return [ + requirement + for requirement in self.requirements + if (not exact_match and (not requirement.severity_from_path or requirement.severity_from_path >= severity)) + or (exact_match and requirement.severity_from_path == severity) + ] def get_requirement(self, name: str) -> Optional[Requirement]: """ @@ -555,9 +597,7 @@ def remove_requirement(self, requirement: Requirement): self._requirements.remove(requirement) def __eq__(self, other: object) -> bool: - return isinstance(other, Profile) \ - and self.identifier == other.identifier \ - and self.path == other.path + return isinstance(other, Profile) and self.identifier == other.identifier and self.path == other.path def __lt__(self, other: object) -> bool: if not isinstance(other, Profile): @@ -573,10 +613,9 @@ def __hash__(self) -> int: def __repr__(self) -> str: return ( - f'Profile(identifier={self.identifier}, ' - f'name={self.name}, ' - f'path={self.path}, ' if self.path else '' - f'requirements={self.requirements})' + f"Profile(identifier={self.identifier}, name={self.name}, path={self.path}, " + if self.path + else f"requirements={self.requirements})" ) def __str__(self) -> str: @@ -587,7 +626,7 @@ def to_dict(self) -> dict: "identifier": self.identifier, "uri": self.uri, "name": self.name, - "description": self.description + "description": self.description, } @staticmethod @@ -601,12 +640,16 @@ def __extract_version_from_token__(token: str) -> Optional[str]: return None def __get_consistent_version__(self, candidate_token: str) -> str: - candidates = {_ for _ in [ - self.__get_specification_property__("version", SCHEMA_ORG_NS), - self.__extract_version_from_token__(candidate_token), - self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), - self.__extract_version_from_token__(str(self.uri)) - ] if _ is not None} + candidates = { + _ + for _ in [ + self.__get_specification_property__("version", SCHEMA_ORG_NS), + self.__extract_version_from_token__(candidate_token), + self.__extract_version_from_token__(str(self.path.relative_to(self._profiles_base_path))), + self.__extract_version_from_token__(str(self.uri)), + ] + if _ is not None + } if len(candidates) > 1: raise ProfileSpecificationError(f"Inconsistent versions found: {candidates}") logger.debug("Candidate versions: %s", candidates) @@ -621,7 +664,7 @@ def __extract_token_from_path__(self) -> str: # Remove the base path from the identifier identifier = identifier.replace(f"{base_path}/", "") # Replace slashes with hyphens - identifier = identifier.replace('/', '-') + identifier = identifier.replace("/", "-") return identifier def __init_token_version__(self) -> Tuple[str, str, str]: @@ -643,10 +686,13 @@ def __init_token_version__(self) -> Tuple[str, str, str]: return candidate_token, version @classmethod - def __load_profile_path__(cls, profiles_base_path: str, - profile_path: Union[str, Path], - publicID: Optional[str] = None, - severity: Severity = Severity.REQUIRED) -> Profile: + def __load_profile_path__( + cls, + profiles_base_path: str, + profile_path: Union[str, Path], + publicID: Optional[str] = None, + severity: Severity = Severity.REQUIRED, + ) -> Profile: # if the path is a string, convert it to a Path if isinstance(profile_path, str): profile_path = Path(profile_path) @@ -654,14 +700,21 @@ def __load_profile_path__(cls, profiles_base_path: str, if not profile_path.is_dir(): raise InvalidProfilePath(profile_path) # create a new profile - profile = Profile(profiles_base_path=profiles_base_path, - profile_path=profile_path, publicID=publicID, severity=severity) + profile = Profile( + profiles_base_path=profiles_base_path, + profile_path=profile_path, + publicID=publicID, + severity=severity, + ) logger.debug("Loaded profile: %s", profile) return profile @classmethod - def __load_profiles_paths__(cls, profiles_path: Union[str, Path] = None, - extra_profiles_path: Union[str, Path] = None) -> list[Tuple[Path, Path]]: + def __load_profiles_paths__( + cls, + profiles_path: Union[str, Path] = None, + extra_profiles_path: Union[str, Path] = None, + ) -> list[Tuple[Path, Path]]: """ Load the paths of the profiles from the given profiles path and extra profiles path. @@ -689,33 +742,47 @@ def __load_profiles_paths__(cls, profiles_path: Union[str, Path] = None, if not root_profile_directory.is_dir(): raise InvalidProfilePath(root_profile_directory) # if the path is a directory, get the profile directories - result.extend([(root_profile_directory, p.parent) - for p in root_profile_directory.rglob('*.*') if p.name == PROFILE_SPECIFICATION_FILE]) + result.extend( + [ + (root_profile_directory, p.parent) + for p in root_profile_directory.rglob("*.*") + if p.name == PROFILE_SPECIFICATION_FILE + ] + ) # return the list of profile directories return result @classmethod - def load_profiles(cls, - profiles_path: Union[str, Path], - extra_profiles_path: Union[str, Path] = None, - publicID: Optional[str] = None, - severity: Severity = Severity.REQUIRED, - allow_requirement_check_override: bool = True) -> list[Profile]: + def load_profiles( + cls, + profiles_path: Union[str, Path], + extra_profiles_path: Union[str, Path] = None, + publicID: Optional[str] = None, + severity: Severity = Severity.REQUIRED, + allow_requirement_check_override: bool = True, + ) -> list[Profile]: # initialize the profiles list profiles = [] # calculate the list of profiles path as the subdirectories of the profiles path # where the profile specification file is present - profiles_paths = cls.__load_profiles_paths__(profiles_path, - extra_profiles_path) + profiles_paths = cls.__load_profiles_paths__(profiles_path, extra_profiles_path) # iterate through the directories and load the profiles for root_profile_path, profile_path in profiles_paths: - logger.debug("Checking profile path: %s %s %r", profile_path, - profile_path.is_dir(), IGNORED_PROFILE_DIRECTORIES) + logger.debug( + "Checking profile path: %s %s %r", + profile_path, + profile_path.is_dir(), + IGNORED_PROFILE_DIRECTORIES, + ) # check if the profile path is a directory and not in the ignored directories if profile_path.is_dir() and profile_path not in IGNORED_PROFILE_DIRECTORIES: profile = Profile.__load_profile_path__( - root_profile_path, profile_path, publicID=publicID, severity=severity) + root_profile_path, + profile_path, + publicID=publicID, + severity=severity, + ) # if the profile overrides another profile, # remove the overridden profiles from the list of profiles # to avoid duplicates and ensure that the most specific profile is used @@ -751,7 +818,10 @@ def load_profiles(cls, # order profiles according to the number of profiles they depend on: # i.e, first the profiles that do not depend on any other profile # then the profiles that depend on the previous ones, and so on - return sorted(profiles, key=lambda x: f"{len(x.inherited_profiles)}_{x.identifier}") + return sorted( + profiles, + key=lambda x: f"{len(x.inherited_profiles)}_{x.identifier}", + ) @classmethod def get_by_identifier(cls, identifier: str) -> Profile: @@ -818,6 +888,20 @@ def get_sibling_profiles(cls, profile: Profile) -> list[Profile]: """ return [p for p in cls.__profiles_map.values() if profile in p.parents] + @classmethod + def get_descendants(cls, profile: Profile) -> list[Profile]: + """ + Get the transitive descendants of the given profile (any profile + that has `profile` among its `inherited_profiles`). + + :param profile: the profile + :type profile: Profile + + :return: the list of descendant profiles + :rtype: list[Profile] + """ + return [p for p in cls.__profiles_map.values() if profile in p.inherited_profiles] + @classmethod def all(cls) -> list[Profile]: """ @@ -829,8 +913,7 @@ def all(cls) -> list[Profile]: return cls.__profiles_map.values() @classmethod - def find_in_list(cls, profiles: Collection[Profile], - profile_identifier: str) -> Optional[Profile]: + def find_in_list(cls, profiles: Collection[Profile], profile_identifier: str) -> Optional[Profile]: """ Find a profile with the given identifier in the given list of profiles @@ -843,8 +926,10 @@ def find_in_list(cls, profiles: Collection[Profile], :return: the profile if found, None otherwise :rtype: Optional[Profile] """ - profile = next((p for p in profiles if p.identifier == profile_identifier), None) or \ - next((p for p in profiles if str(p.identifier).replace(f"-{p.version}", '') == profile_identifier), None) + profile = next((p for p in profiles if p.identifier == profile_identifier), None) or next( + (p for p in profiles if str(p.identifier).replace(f"-{p.version}", "") == profile_identifier), + None, + ) if not profile: raise ProfileNotFound(profile_identifier) return profile @@ -866,12 +951,14 @@ class Requirement(ABC): A requirement is a named set of checks that can be used to validate an RO-Crate. """ - def __init__(self, - profile: Profile, - name: str = "", - description: Optional[str] = None, - path: Optional[Path] = None, - initialize_checks: bool = True): + def __init__( + self, + profile: Profile, + name: str = "", + description: Optional[str] = None, + path: Optional[Path] = None, + initialize_checks: bool = True, + ): """ Initialize the Requirement instance @@ -947,7 +1034,10 @@ def requirement_level_from_path(self) -> RequirementLevel: try: self._level_from_path = LevelCollection.get(self._path.parent.name) except ValueError: - logger.debug("The requirement level could not be determined from the path: %s", self._path) + logger.debug( + "The requirement level could not be determined from the path: %s", + self._path, + ) return self._level_from_path @property @@ -957,8 +1047,9 @@ def profile(self) -> Profile: @property def description(self) -> str: if not self._description: - self._description = self.__class__.__doc__.strip( - ) if self.__class__.__doc__ else f"Profile Requirement {self.name}" + self._description = ( + self.__class__.__doc__.strip() if self.__class__.__doc__ else f"Profile Requirement {self.name}" + ) return self._description @property @@ -1005,45 +1096,78 @@ def _do_validate_(self, context: ValidationContext) -> bool: :meta private: """ - logger.debug("Validating Requirement %s with %s checks", self.name, len(self._checks)) + logger.debug( + "Validating Requirement %s with %s checks", + self.name, + len(self._checks), + ) - logger.debug("Running %s checks for Requirement '%s'", len(self._checks), self.name) + logger.debug( + "Running %s checks for Requirement '%s'", + len(self._checks), + self.name, + ) all_passed = True checks_to_perform = [ - _ for _ in self._checks - if not context.settings.skip_checks - or _.identifier not in context.settings.skip_checks + _ + for _ in self._checks + if not context.settings.skip_checks or _.identifier not in context.settings.skip_checks ] for check in checks_to_perform: try: if check.overridden and not check.requirement.profile.identifier == context.profile_identifier: - logger.debug("Skipping check '%s' because overridden by '%r'", - check.identifier, [_.identifier for _ in check.overridden_by]) + logger.debug( + "Skipping check '%s' because overridden by '%r'", + check.identifier, + [_.identifier for _ in check.overridden_by], + ) + continue + if check.deactivated: + logger.debug("Skipping check '%s' because deactivated", check.identifier) + context.result._add_skipped_check(check) continue # Determine whether to skip event notification for inherited profiles skip_event_notify = False - if check.requirement.profile.identifier != context.profile_identifier and \ - context.settings.disable_inherited_profiles_issue_reporting: - logger.debug("Inherited profiles reporting disabled. " - "Skipping requirement %s as it belongs to an inherited profile %s", - check.requirement.identifier, check.requirement.profile.identifier) + if ( + check.requirement.profile.identifier != context.profile_identifier + and context.settings.disable_inherited_profiles_issue_reporting + ): + logger.debug( + "Inherited profiles reporting disabled. " + "Skipping requirement %s as it belongs to an inherited profile %s", + check.requirement.identifier, + check.requirement.profile.identifier, + ) skip_event_notify = True # Notify the start of the check execution if not skip_event_notify is set to True if not skip_event_notify: - context.validator.notify(RequirementCheckValidationEvent( - EventType.REQUIREMENT_CHECK_VALIDATION_START, check)) + context.validator.notify( + RequirementCheckValidationEvent(EventType.REQUIREMENT_CHECK_VALIDATION_START, check) + ) # Execute the check check_result = check.execute_check(context) logger.debug("Result of check %s: %s", check.identifier, check_result) context.result._add_executed_check(check, check_result) # Notify the end of the check execution if not skip_event_notify is set to True if not skip_event_notify: - context.validator.notify(RequirementCheckValidationEvent( - EventType.REQUIREMENT_CHECK_VALIDATION_END, check, validation_result=check_result)) - logger.debug("Ran check '%s'. Got result %s", check.identifier, check_result) + context.validator.notify( + RequirementCheckValidationEvent( + EventType.REQUIREMENT_CHECK_VALIDATION_END, + check, + validation_result=check_result, + ) + ) + logger.debug( + "Ran check '%s'. Got result %s", + check.identifier, + check_result, + ) # Ensure the check result is a boolean if not isinstance(check_result, bool): - logger.warning("Ignoring the check %s as it returned the value %r instead of a boolean", check.name) + logger.warning( + "Ignoring the check %s as it returned the value %r instead of a boolean", + check.name, + ) raise RuntimeError(f"Ignoring invalid result from check {check.name}") # Aggregate the check result all_passed = all_passed and check_result @@ -1055,21 +1179,26 @@ def _do_validate_(self, context: ValidationContext) -> bool: continue except Exception as e: # Ignore the fact that the check failed as far as the validation result is concerned. - logger.warning("Unexpected error during check %s. Exception: %s", check, e) - logger.warning("Consider reporting this as a bug.") - if logger.isEnabledFor(logging.DEBUG): - logger.exception(e) + if context.maybe_warn_offline_cache_miss(e): + logger.debug("Offline cache miss during check %s: %s", check, e) + else: + logger.warning("Unexpected error during check %s. Exception: %s", check, e) + logger.warning("Consider reporting this as a bug.") + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) skipped_checks = set(self._checks) - set(checks_to_perform) context.result.skipped_checks.update(skipped_checks) - logger.debug("Checks for Requirement '%s' completed. Checks passed? %s", self.name, all_passed) + logger.debug( + "Checks for Requirement '%s' completed. Checks passed? %s", + self.name, + all_passed, + ) return all_passed def __eq__(self, other: object) -> bool: if not isinstance(other, Requirement): raise TypeError(f"Cannot compare {type(self)} with {type(other)}") - return self.name == other.name \ - and self.description == other.description \ - and self.path == other.path + return self.name == other.name and self.description == other.description and self.path == other.path def __ne__(self, other: object) -> bool: return not self.__eq__(other) @@ -1080,16 +1209,20 @@ def __hash__(self): def __lt__(self, other: object) -> bool: if not isinstance(other, Requirement): raise ValueError(f"Cannot compare Requirement with {type(other)}") - return (self._order_number, self.name) < (other._order_number, other.name) + return (self._order_number, self.name) < ( + other._order_number, + other.name, + ) def __repr__(self): return ( - f'ProfileRequirement(' - f'_order_number={self._order_number}, ' - f'name={self.name}, ' - f'description={self.description}' - f', path={self.path}, ' if self.path else '' - ')' + f"ProfileRequirement(" + f"_order_number={self._order_number}, " + f"name={self.name}, " + f"description={self.description}" + f", path={self.path}, " + if self.path + else ")" ) def __str__(self) -> str: @@ -1100,7 +1233,7 @@ def to_dict(self, with_profile: bool = True, with_checks: bool = True) -> dict: "identifier": self.identifier, "name": self.name, "description": self.description, - "order": self.order_number + "order": self.order_number, } if with_profile: result["profile"] = self.profile.to_dict() @@ -1108,9 +1241,36 @@ def to_dict(self, with_profile: bool = True, with_checks: bool = True) -> dict: result["checks"] = [_.to_dict(with_requirement=False, with_profile=False) for _ in self._checks] return result + @classmethod + def initialize(cls, context: ValidationContext) -> None: + logger.debug( + "Starting %s requirement initialization for context %s", + cls.__name__, + context, + ) + # do initialization logic here (empty for now) + logger.debug( + "Completed %s requirement initialization for context %s", + cls.__name__, + context, + ) + + @classmethod + def finalize(cls, context: ValidationContext) -> None: + logger.debug( + "Starting %s requirement finalization for context %s", + cls.__name__, + context, + ) + # do finalization logic here (empty for now) + logger.debug( + "Completed %s requirement finalization for context %s", + cls.__name__, + context, + ) -class RequirementLoader: +class RequirementLoader: def __init__(self, profile: Profile): self._profile = profile @@ -1129,7 +1289,6 @@ def __get_requirement_type__(requirement_path: Path) -> str: @classmethod def __get_requirement_loader__(cls, profile: Profile, requirement_path: Path) -> RequirementLoader: - import importlib requirement_type = cls.__get_requirement_type__(requirement_path) loader_instance_name = f"_{requirement_type}_loader_instance" loader_instance = getattr(profile, loader_instance_name, None) @@ -1143,21 +1302,52 @@ def __get_requirement_loader__(cls, profile: Profile, requirement_path: Path) -> setattr(profile, loader_instance_name, loader_instance) return loader_instance + @staticmethod + def __get_requirement_classes__() -> list[Type[Requirement]]: + + # Ensure known requirement modules are imported so subclasses are registered. + for requirement_type in ("python", "shacl"): + module_name = f"rocrate_validator.requirements.{requirement_type}" + try: + importlib.import_module(module_name) + except Exception: + logger.debug( + "Unable to import requirement module: %s", + module_name, + exc_info=True, + ) + + def all_subclasses( + base_class: Type[Requirement], + ) -> list[Type[Requirement]]: + result: list[Type[Requirement]] = [] + for subcls in base_class.__subclasses__(): + result.append(subcls) + result.extend(all_subclasses(subcls)) + return result + + return all_subclasses(Requirement) + @staticmethod def load_requirements(profile: Profile, severity: Severity = None) -> list[Requirement]: """ Load the requirements related to the profile """ - def ok_file(p: Path) -> bool: - return p.is_file() \ - and p.suffix in PROFILE_FILE_EXTENSIONS \ - and not p.name == DEFAULT_ONTOLOGY_FILE \ - and not p.name == PROFILE_SPECIFICATION_FILE \ - and not p.name.startswith('.') \ - and not p.name.startswith('_') - files = sorted((p for p in profile.path.rglob('*.*') if ok_file(p)), - key=lambda x: (not x.suffix == '.py', x)) + def ok_file(p: Path) -> bool: + return ( + p.is_file() + and p.suffix in PROFILE_FILE_EXTENSIONS + and not p.name == DEFAULT_ONTOLOGY_FILE + and not p.name == PROFILE_SPECIFICATION_FILE + and not p.name.startswith(".") + and not p.name.startswith("_") + ) + + files = sorted( + (p for p in profile.path.rglob("*.*") if ok_file(p)), + key=lambda x: (not x.suffix == ".py", x), + ) # set the requirement level corresponding to the severity requirement_level = LevelCollection.get(severity.name) @@ -1169,41 +1359,68 @@ def ok_file(p: Path) -> bool: if requirement_level_from_path < requirement_level: continue except ValueError: - logger.debug("The requirement level could not be determined from the path: %s", requirement_path) + logger.debug( + "The requirement level could not be determined from the path: %s", + requirement_path, + ) requirement_loader = RequirementLoader.__get_requirement_loader__(profile, requirement_path) for requirement in requirement_loader.load( - profile, requirement_level, - requirement_path, publicID=profile.publicID): + profile, + requirement_level, + requirement_path, + publicID=profile.publicID, + ): requirements.append(requirement) # sort the requirements by severity - requirements = sorted(requirements, - key=lambda x: (-x.severity_from_path.value, x.path.name, x.name) - if x.severity_from_path is not None else (0, x.path.name, x.name), - reverse=False) + requirements = sorted( + requirements, + key=lambda x: ( + (-x.severity_from_path.value, x.path.name, x.name) + if x.severity_from_path is not None + else (0, x.path.name, x.name) + ), + reverse=False, + ) # assign order numbers to requirements for i, requirement in enumerate(requirements): requirement._order_number = i + 1 # log and return the requirements - logger.debug("Profile %s loaded %s requirements: %s", - profile.identifier, len(requirements), requirements) + logger.debug("Profile %s loaded %s requirements: %s", profile.identifier, len(requirements), requirements) return requirements +@dataclass(frozen=True) +class SourceSnippet: + """ + A snippet of source code backing a :class:`RequirementCheck`. + :ivar language: language tag for syntax highlighting (e.g. ``"python"``, ``"turtle"``). + :ivar code: the source code as text. + :ivar source_path: path to the file the snippet was extracted from, when available. + """ + language: str + code: str + source_path: Optional[Path] = None + + @total_ordering class RequirementCheck(ABC): - def __init__(self, - requirement: Requirement, - name: str, - level: Optional[RequirementLevel] = LevelCollection.REQUIRED, - description: Optional[str] = None, - hidden: Optional[bool] = None): + def __init__( + self, + requirement: Requirement, + name: str, + level: Optional[RequirementLevel] = LevelCollection.REQUIRED, + description: Optional[str] = None, + hidden: Optional[bool] = None, + deactivated: bool = False, + ): self._requirement: Requirement = requirement self._order_number = 0 self._name = name self._level = level self._description = description self._hidden = hidden + self._deactivated = deactivated @property def order_number(self) -> int: @@ -1241,9 +1458,7 @@ def requirement(self) -> Requirement: @property def level(self) -> RequirementLevel: - return self._level or \ - self.requirement.requirement_level_from_path or \ - LevelCollection.REQUIRED + return self._level or self.requirement.requirement_level_from_path or LevelCollection.REQUIRED @property def severity(self) -> Severity: @@ -1271,6 +1486,10 @@ def overrides(self) -> list[RequirementCheck]: def overridden(self) -> bool: return len(self.overridden_by) > 0 + @property + def deactivated(self) -> bool: + return self._deactivated + @property def hidden(self) -> bool: if self._hidden is not None: @@ -1281,6 +1500,14 @@ def hidden(self) -> bool: def execute_check(self, context: ValidationContext) -> bool: raise NotImplementedError() + def get_source_snippet(self) -> Optional[SourceSnippet]: + """ + Return the source code that implements this check, or ``None`` if the + backing source cannot be extracted for this check kind. + Concrete subclasses should override this method. + """ + return None + def to_dict(self, with_requirement: bool = True, with_profile: bool = True) -> dict: result = { "identifier": self.identifier, @@ -1288,7 +1515,7 @@ def to_dict(self, with_requirement: bool = True, with_profile: bool = True) -> d "order": self.order_number, "name": self.name, "description": self.description, - "severity": self.severity.name + "severity": self.severity.name, } if with_requirement: result["requirement"] = self.requirement.to_dict(with_profile=with_profile, with_checks=False) @@ -1302,7 +1529,10 @@ def __eq__(self, other: object) -> bool: def __lt__(self, other: object) -> bool: if not isinstance(other, RequirementCheck): raise ValueError(f"Cannot compare RequirementCheck with {type(other)}") - return (self.requirement, self.identifier) < (other.requirement, other.identifier) + return (self.requirement, self.identifier) < ( + other.requirement, + other.identifier, + ) def __ne__(self, other: object) -> bool: return not self.__eq__(other) @@ -1318,12 +1548,14 @@ class CheckIssue: during the validation process. """ - def __init__(self, - check: RequirementCheck, - message: Optional[str] = None, - violatingProperty: Optional[str] = None, - violatingEntity: Optional[str] = None, - value: Optional[str] = None): + def __init__( + self, + check: RequirementCheck, + message: Optional[str] = None, + violatingProperty: Optional[str] = None, + violatingEntity: Optional[str] = None, + value: Optional[str] = None, + ): self._message = message self._check: RequirementCheck = check self._violatingProperty = violatingProperty @@ -1388,9 +1620,7 @@ def violatingPropertyValue(self) -> Optional[str]: return self._propertyValue def __eq__(self, other: object) -> bool: - return isinstance(other, CheckIssue) and \ - self._check == other._check and \ - self._message == other._message + return isinstance(other, CheckIssue) and self._check == other._check and self._message == other._message def __lt__(self, other: object) -> bool: if not isinstance(other, CheckIssue): @@ -1401,34 +1631,43 @@ def __hash__(self) -> int: return hash((self._check, self._message)) def __repr__(self) -> str: - return f'CheckIssue(severity={self.severity}, check={self.check}, message={self.message})' + return f"CheckIssue(severity={self.severity}, check={self.check}, message={self.message})" def __str__(self) -> str: - return f"Issue of severity {self.severity.name} with check \"{self.check.identifier}\": {self.message}" - - def to_dict(self, with_check: bool = True, - with_requirement: bool = True, with_profile: bool = True) -> dict: + return f'Issue of severity {self.severity.name} with check "{self.check.identifier}": {self.message}' + + def to_dict( + self, + with_check: bool = True, + with_requirement: bool = True, + with_profile: bool = True, + ) -> dict: result = { "severity": self.severity.name, "message": self.message, "violatingEntity": self.violatingEntity, "violatingProperty": self.violatingProperty, - "violatingPropertyValue": self.violatingPropertyValue + "violatingPropertyValue": self.violatingPropertyValue, } if with_check: result["check"] = self.check.to_dict(with_requirement=with_requirement, with_profile=with_profile) return result - def to_json(self, - with_checks: bool = True, - with_requirements: bool = True, - with_profile: bool = True) -> str: + def to_json( + self, + with_checks: bool = True, + with_requirements: bool = True, + with_profile: bool = True, + ) -> str: return json.dumps( self.to_dict( with_check=with_checks, with_requirement=with_requirements, - with_profile=with_profile - ), indent=4, cls=CustomEncoder) + with_profile=with_profile, + ), + indent=4, + cls=CustomEncoder, + ) class ValidationStatisticsListener(Protocol): @@ -1445,9 +1684,12 @@ class ValidationStatistics(Subscriber): Computes and stores statistical metrics about the RO-Crate validation process. """ - def __init__(self, settings: Union[dict, ValidationSettings], - context: Optional[ValidationContext] = None, - skip_initialization: bool = False): + def __init__( + self, + settings: Union[dict, ValidationSettings], + context: Optional[ValidationContext] = None, + skip_initialization: bool = False, + ): if isinstance(settings, dict): settings = ValidationSettings.parse(settings) self._settings = settings @@ -1638,7 +1880,11 @@ def __initialise__(cls, validation_settings: ValidationSettings): # extract the validation settings severity_validation = validation_settings.requirement_severity profiles: list[Profile] = Profile.load_profiles( - validation_settings.profiles_path, severity=severity_validation) + validation_settings.profiles_path, + extra_profiles_path=validation_settings.extra_profiles_path, + severity=severity_validation, + allow_requirement_check_override=validation_settings.allow_requirement_check_override, + ) profile: Profile = Profile.find_in_list(profiles, validation_settings.profile_identifier) target_profile_identifier = profile.identifier # initialize the profiles list @@ -1655,7 +1901,11 @@ def __initialise__(cls, validation_settings: ValidationSettings): requirements: set[Requirement] = set() # Initialize the counters - for severity in (Severity.REQUIRED, Severity.RECOMMENDED, Severity.OPTIONAL): + for severity in ( + Severity.REQUIRED, + Severity.RECOMMENDED, + Severity.OPTIONAL, + ): checks_by_severity[severity] = set() # Process the requirements and checks @@ -1669,26 +1919,23 @@ def __initialise__(cls, validation_settings: ValidationSettings): continue requirement_checks_count = 0 - for severity in (Severity.REQUIRED, Severity.RECOMMENDED, Severity.OPTIONAL): + for severity in ( + Severity.REQUIRED, + Severity.RECOMMENDED, + Severity.OPTIONAL, + ): logger.debug( - f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}") + f"Checking requirement: {requirement} severity: {severity} {severity < severity_validation}" + ) # skip requirements with lower severity if severity < severity_validation: continue # count the checks requirement_checks = [ _ - for _ in requirement.get_checks_by_level( - LevelCollection.get(severity.name) - ) - if ( - not validation_settings.skip_checks - or _.identifier not in validation_settings.skip_checks - ) - and ( - not _.overridden - or _.requirement.profile.identifier == target_profile_identifier - ) + for _ in requirement.get_checks_by_level(LevelCollection.get(severity.name)) + if (not validation_settings.skip_checks or _.identifier not in validation_settings.skip_checks) + and (not _.overridden or _.requirement.profile.identifier == target_profile_identifier) ] num_checks = len(requirement_checks) requirement_checks_count += num_checks @@ -1708,7 +1955,11 @@ def __initialise__(cls, validation_settings: ValidationSettings): requirements.add(requirement) # log processed requirements - logger.debug("Processed requirements %r: %r", len(processed_requirements), processed_requirements) + logger.debug( + "Processed requirements %r: %r", + len(processed_requirements), + processed_requirements, + ) # Prepare the result result = { @@ -1726,7 +1977,7 @@ def __initialise__(cls, validation_settings: ValidationSettings): "finished_at": None, "validated_profiles": [], "validated_requirements": [], - "validated_checks": [] + "validated_checks": [], } logger.debug(result) return result @@ -1744,9 +1995,10 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: logger.debug("Requirement check validation start") elif event.event_type == EventType.REQUIREMENT_CHECK_VALIDATION_END: target_profile = ctx.target_validation_profile - if not event.requirement_check.requirement.hidden and \ - (not event.requirement_check.overridden - or target_profile.identifier == event.requirement_check.requirement.profile.identifier): + if not event.requirement_check.requirement.hidden and ( + not event.requirement_check.overridden + or target_profile.identifier == event.requirement_check.requirement.profile.identifier + ): if event.validation_result is not None: if event.validation_result: self._stats["passed_checks"].append(event.requirement_check) @@ -1755,10 +2007,15 @@ def update(self, event: Event, ctx: Optional[ValidationContext] = None) -> None: self._stats["validated_checks"].append(event.requirement_check) self.notify_listeners() else: - logger.debug("Requirement check validation result is None: %s", - event.requirement_check.identifier) + logger.debug( + "Requirement check validation result is None: %s", + event.requirement_check.identifier, + ) else: - logger.debug("Skipping requirement check validation: %s", event.requirement_check.identifier) + logger.debug( + "Skipping requirement check validation: %s", + event.requirement_check.identifier, + ) elif event.event_type == EventType.REQUIREMENT_VALIDATION_END: if not event.requirement.hidden: if event.validation_result: @@ -1784,12 +2041,10 @@ def to_dict(self) -> dict: "started_at": self.started_at.isoformat() if self.started_at else None, "finished_at": self.finished_at.isoformat() if self.finished_at else None, "duration": self.duration, - # Profile details "profile": self.profile.identifier if self.profile else None, "profiles": [p.identifier for p in self.profiles], "severity": self.severity.name if self.severity else None, - # Computed totals "total_requirements": self.total_requirements, "total_passed_requirements": len(self.passed_requirements), @@ -1798,43 +2053,45 @@ def to_dict(self) -> dict: "total_passed_checks": len(self.passed_checks), "total_failed_checks": len(self.failed_checks), "total_checks_by_severity": {k.name: len(v) for k, v in self.checks_by_severity.items()}, - # Requirements involved "requirements": { "count": self.total_requirements, "passed": { "count": len(self.passed_requirements), - "percentage": (len(self.passed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 else 0.0, - "identifiers": sorted([r.identifier for r in self.passed_requirements]) + "percentage": ( + (len(self.passed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": sorted([r.identifier for r in self.passed_requirements]), }, "failed": { "count": len(self.failed_requirements), - "percentage": (len(self.failed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 else 0.0, - "identifiers": sorted([r.identifier for r in self.failed_requirements]) + "percentage": ( + (len(self.failed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": sorted([r.identifier for r in self.failed_requirements]), }, - "identifiers": sorted([r.identifier for r in self.requirements]) + "identifiers": sorted([r.identifier for r in self.requirements]), }, - # Checks involved "checks": { "count": self.total_checks, "passed": { "count": len(self.passed_checks), - "percentage": (len(self.passed_checks) / self.total_checks * 100) - if self.total_checks > 0 else 0.0, - "identifiers": sorted([c.identifier for c in self.passed_checks]) + "percentage": (len(self.passed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": sorted([c.identifier for c in self.passed_checks]), }, "failed": { "count": len(self.failed_checks), - "percentage": (len(self.failed_checks) / self.total_checks * 100) - if self.total_checks > 0 else 0.0, - "identifiers": sorted([c.identifier for c in self.failed_checks]) + "percentage": (len(self.failed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": sorted([c.identifier for c in self.failed_checks]), }, "identifiers": sorted([c.identifier for c in self.checks]), - "by_severity": {k.name: len(v) for k, v in self._stats.get("checks_by_severity", {}).items()} - } + "by_severity": {k.name: len(v) for k, v in self._stats.get("checks_by_severity", {}).items()}, + }, } def to_json(self) -> str: @@ -1874,10 +2131,8 @@ def to_dict(self) -> dict: "started_at": self.started_at.isoformat() if self.started_at else None, "finished_at": self.finished_at.isoformat() if self.finished_at else None, "duration": self.duration, - # Profiles involved "profiles": [p.identifier for p in self.profiles], - # Computed totals "total_requirements": self.total_requirements, "total_passed_requirements": len(self.passed_requirements), @@ -1886,40 +2141,43 @@ def to_dict(self) -> dict: "total_passed_checks": len(self.passed_checks), "total_failed_checks": len(self.failed_checks), "total_checks_by_severity": {k.name: len(v) for k, v in self.checks_by_severity.items()}, - # Requirements involved "requirements": { "count": self.total_requirements, "passed": { "count": len(self.passed_requirements), - "percentage": (len(self.passed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 else 0.0, - "identifiers": [r.identifier for r in self.passed_requirements] + "percentage": ( + (len(self.passed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": [r.identifier for r in self.passed_requirements], }, "failed": { "count": len(self.failed_requirements), - "percentage": (len(self.failed_requirements) / self.total_requirements * 100) - if self.total_requirements > 0 else 0.0, - "identifiers": [r.identifier for r in self.failed_requirements] + "percentage": ( + (len(self.failed_requirements) / self.total_requirements * 100) + if self.total_requirements > 0 + else 0.0 + ), + "identifiers": [r.identifier for r in self.failed_requirements], }, - "identifiers": [r.identifier for r in self.requirements] + "identifiers": [r.identifier for r in self.requirements], }, # Checks involved "checks": { "count": self.total_checks, "passed": { "count": len(self.passed_checks), - "percentage": (len(self.passed_checks) / self.total_checks * 100) - if self.total_checks > 0 else 0.0, - "identifiers": [c.identifier for c in self.passed_checks] + "percentage": (len(self.passed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": [c.identifier for c in self.passed_checks], }, "failed": { "count": len(self.failed_checks), - "percentage": (len(self.failed_checks) / self.total_checks * 100) - if self.total_checks > 0 else 0.0, - "identifiers": [c.identifier for c in self.failed_checks] + "percentage": (len(self.failed_checks) / self.total_checks * 100) if self.total_checks > 0 else 0.0, + "identifiers": [c.identifier for c in self.failed_checks], }, - "identifiers": [c.identifier for c in self.checks] + "identifiers": [c.identifier for c in self.checks], }, } @@ -2037,7 +2295,7 @@ def __compute_averall_stats__(self): "passed_checks": set(), "started_at": None, "finished_at": None, - "duration": 0.0 + "duration": 0.0, } # Aggregate statistics from each ValidationStatistics instance @@ -2058,10 +2316,12 @@ def __compute_averall_stats__(self): result["passed_checks"].update(stats.passed_checks) # Aggregate started_at and finished_at - result["started_at"] = min(result["started_at"], stats.started_at) \ - if result["started_at"] else stats.started_at - result["finished_at"] = max(result["finished_at"], stats.finished_at) \ - if result["finished_at"] else stats.finished_at + result["started_at"] = ( + min(result["started_at"], stats.started_at) if result["started_at"] else stats.started_at + ) + result["finished_at"] = ( + max(result["finished_at"], stats.finished_at) if result["finished_at"] else stats.finished_at + ) # Aggregate duration result["duration"] += stats.duration or 0.0 @@ -2069,8 +2329,9 @@ def __compute_averall_stats__(self): result["profiles"] = sorted(result["profiles"], key=lambda p: p.identifier) result["requirements"] = sorted(result["requirements"], key=lambda r: r.identifier) result["checks"] = sorted(result["checks"], key=lambda c: c.identifier) - result["checks_by_severity"] = {k: sorted(v, key=lambda c: c.identifier) - for k, v in result["checks_by_severity"].items()} + result["checks_by_severity"] = { + k: sorted(v, key=lambda c: c.identifier) for k, v in result["checks_by_severity"].items() + } result["failed_requirements"] = sorted(result["failed_requirements"], key=lambda r: r.identifier) result["failed_checks"] = sorted(result["failed_checks"], key=lambda c: c.identifier) result["passed_requirements"] = sorted(result["passed_requirements"], key=lambda r: r.identifier) @@ -2138,6 +2399,7 @@ def statistics(self) -> ValidationStatistics: The validation statistics """ return self._statistics + # --- Checks --- @property @@ -2198,9 +2460,7 @@ def get_issues(self, min_severity: Optional[Severity] = None) -> list[CheckIssue min_severity = min_severity or self.context.requirement_severity return [issue for issue in self._issues if issue.severity >= min_severity] - def get_issues_by_check(self, - check: RequirementCheck, - min_severity: Severity = None) -> list[CheckIssue]: + def get_issues_by_check(self, check: RequirementCheck, min_severity: Severity = None) -> list[CheckIssue]: """ Get the issues found during the validation for a specific check with a severity greater than or equal to `min_severity` @@ -2225,12 +2485,14 @@ def passed(self, min_severity: Optional[Severity] = None) -> bool: min_severity = min_severity or self.context.requirement_severity return not any(issue.severity >= min_severity for issue in self._issues) - def add_issue(self, - message: str, - check: RequirementCheck, - violatingEntity: Optional[str] = None, - violatingProperty: Optional[str] = None, - violatingPropertyValue: Optional[str] = None) -> CheckIssue: + def add_issue( + self, + message: str, + check: RequirementCheck, + violatingEntity: Optional[str] = None, + violatingProperty: Optional[str] = None, + violatingPropertyValue: Optional[str] = None, + ) -> CheckIssue: """ Add an issue to the validation result @@ -2241,8 +2503,13 @@ def add_issue(self, violatingProperty(Optional[str]): The property that caused the issue (if any) violatingPropertyValue(Optional[str]): The value of the violatingProperty (if any) """ - c = CheckIssue(check, message, violatingProperty=violatingProperty, - violatingEntity=violatingEntity, value=violatingPropertyValue) + c = CheckIssue( + check, + message, + violatingProperty=violatingProperty, + violatingEntity=violatingEntity, + value=violatingPropertyValue, + ) bisect.insort(self._issues, c) return c @@ -2250,17 +2517,21 @@ def add_issue(self, @property def failed_requirements(self) -> Collection[Requirement]: """ - Get the requirements that failed + Get the requirements that failed at or above the configured `requirement_severity`. """ - return set(issue.check.requirement for issue in self._issues) + min_severity = self.context.requirement_severity + return set(issue.check.requirement for issue in self._issues + if issue.severity >= min_severity) # --- Checks --- @property def failed_checks(self) -> Collection[RequirementCheck]: """ - Get the checks that failed + Get the checks that failed at or above the configured `requirement_severity`. """ - return set(issue.check for issue in self._issues) + min_severity = self.context.requirement_severity + return set(issue.check for issue in self._issues + if issue.severity >= min_severity) def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collection[RequirementCheck]: """ @@ -2269,13 +2540,14 @@ def get_failed_checks_by_requirement(self, requirement: Requirement) -> Collecti return [check for check in self.failed_checks if check.requirement == requirement] def get_failed_checks_by_requirement_and_severity( - self, requirement: Requirement, severity: Severity) -> Collection[RequirementCheck]: + self, requirement: Requirement, severity: Severity + ) -> Collection[RequirementCheck]: """ Get the checks that failed for a specific requirement and severity """ - return [check for check in self.failed_checks - if check.requirement == requirement - and check.severity == severity] + return [ + check for check in self.failed_checks if check.requirement == requirement and check.severity == severity + ] def __str__(self) -> str: return f"Validation result: passed={len(self.failed_checks) == 0}, {len(self._issues)} issues" @@ -2292,17 +2564,20 @@ def to_dict(self) -> dict: """ Convert the ValidationResult to a dictionary """ - allowed_properties = ["profile_identifier", "enable_profile_inheritance", - "requirement_severity", "abort_on_first"] - validation_settings = {key: value for key, value in self.validation_settings.to_dict().items() - if key in allowed_properties} + allowed_properties = [ + "profile_identifier", + "enable_profile_inheritance", + "requirement_severity", + "abort_on_first", + ] + validation_settings = { + key: value for key, value in self.validation_settings.to_dict().items() if key in allowed_properties + } result = { - "meta": { - "version": JSON_OUTPUT_FORMAT_VERSION - }, + "meta": {"version": JSON_OUTPUT_FORMAT_VERSION}, "validation_settings": validation_settings, "passed": self.passed(self.context.settings.requirement_severity), - "issues": [issue.to_dict() for issue in self.issues] + "issues": [issue.to_dict() for issue in self.issues], } # add validator version to the settings result["validation_settings"]["rocrate_validator_version"] = __version__ @@ -2344,6 +2619,7 @@ class ValidationSettings: It includes the following attributes: """ + #: The URI of the RO-Crate rocrate_uri: URI #: The relative root path of the RO-Crate @@ -2390,31 +2666,75 @@ class ValidationSettings: metadata_dict: dict = None #: Verbose output verbose: bool = False - #: Cache max age in seconds + #: Cache max age in seconds (negative values mean "never expire") cache_max_age: Optional[int] = DEFAULT_HTTP_CACHE_MAX_AGE #: Cache path cache_path: Optional[Path] = None + #: Flag to enable offline mode: HTTP requests are served only from the cache + offline: bool = False + #: Flag to disable the HTTP cache entirely: every request hits the network + no_cache: bool = False def __post_init__(self): # if requirement_severity is a str, convert to Severity if isinstance(self.requirement_severity, str): self.requirement_severity = Severity[self.requirement_severity] - # initialize the HTTP cache - HttpRequester.initialize_cache(cache_path=self.cache_path, cache_max_age=self.cache_max_age) - logger.debug("HTTP cache initialized at %s with max age %s seconds", - self.cache_path, self.cache_max_age) + # Offline mode needs the cache to serve responses, so it cannot be + # combined with an explicit cache disable. + if self.offline and self.no_cache: + raise ValueError( + "Offline mode requires the HTTP cache to be enabled; " + "no_cache=True is incompatible with offline=True." + ) + # Default to the persistent user cache whenever caching is enabled so that + # consecutive runs (online then offline) share the same HTTP cache: this + # is what lets the offline mode find the resources fetched online. + if self.cache_path is None and not self.no_cache: + default_path = get_default_http_cache_path() + default_path.parent.mkdir(parents=True, exist_ok=True) + self.cache_path = default_path + logger.debug("Cache path not set: defaulting to persistent user cache %s", self.cache_path) + if self.offline and self.cache_path is None: + logger.warning( + "Offline mode enabled without a persistent cache path: " + "all HTTP-backed resources will fail unless pre-populated." + ) + # Re-apply the cache settings to the HTTP requester. ``initialize_cache`` + # reconfigures the existing singleton in place (rather than dropping it), + # so new settings take effect without discarding state set on the instance. + HttpRequester.initialize_cache( + cache_path=str(self.cache_path) if self.cache_path is not None else None, + cache_max_age=self.cache_max_age, + offline=self.offline, + no_cache=self.no_cache, + ) + logger.debug( + "HTTP cache initialized at %s with max age %s seconds (offline=%s, no_cache=%s)", + self.cache_path, self.cache_max_age, self.offline, self.no_cache, + ) + # Install the JSON-LD document loader so context resolution goes through the cache. + try: + install_document_loader() + except Exception as e: + logger.debug("Could not install JSON-LD document loader: %s", e) + # Best-effort synchronous warm-up of profile-declared URLs. + if not self.offline: + try: + auto_warm_up_for_settings(self) + except Exception as e: + logger.debug("Auto warm-up skipped: %s", e) def to_dict(self): """ Convert the ValidationSettings to a dictionary """ result = asdict(self) - result['rocrate_uri'] = str(self.rocrate_uri) - result.pop('metadata_dict', None) # exclude metadata_dict from the dict representation + result["rocrate_uri"] = str(self.rocrate_uri) + result.pop("metadata_dict", None) # exclude metadata_dict from the dict representation # Remove disable_crate_download from the dict representation - result.pop('disable_remote_crate_download', None) + result.pop("disable_remote_crate_download", None) # Remove requirement_severity_only from the dict representation - result.pop('requirement_severity_only', None) + result.pop("requirement_severity_only", None) return result @property @@ -2461,8 +2781,12 @@ def parse(cls, settings: Union[dict, ValidationSettings]) -> ValidationSettings: class ValidationEvent(Event): - def __init__(self, event_type: EventType, - validation_result: Optional[ValidationResult] = None, message: Optional[str] = None): + def __init__( + self, + event_type: EventType, + validation_result: Optional[ValidationResult] = None, + message: Optional[str] = None, + ): super().__init__(event_type, message) self._validation_result = validation_result @@ -2472,8 +2796,16 @@ def validation_result(self) -> Optional[ValidationResult]: class ProfileValidationEvent(Event): - def __init__(self, event_type: EventType, profile: Profile, message: Optional[str] = None): - assert event_type in (EventType.PROFILE_VALIDATION_START, EventType.PROFILE_VALIDATION_END) + def __init__( + self, + event_type: EventType, + profile: Profile, + message: Optional[str] = None, + ): + assert event_type in ( + EventType.PROFILE_VALIDATION_START, + EventType.PROFILE_VALIDATION_END, + ) super().__init__(event_type, message) self._profile = profile @@ -2500,12 +2832,17 @@ def __hash__(self) -> int: class RequirementValidationEvent(Event): - def __init__(self, - event_type: EventType, - requirement: Requirement, - validation_result: Optional[bool] = None, - message: Optional[str] = None): - assert event_type in (EventType.REQUIREMENT_VALIDATION_START, EventType.REQUIREMENT_VALIDATION_END) + def __init__( + self, + event_type: EventType, + requirement: Requirement, + validation_result: Optional[bool] = None, + message: Optional[str] = None, + ): + assert event_type in ( + EventType.REQUIREMENT_VALIDATION_START, + EventType.REQUIREMENT_VALIDATION_END, + ) super().__init__(event_type, message) self._requirement = requirement self._validation_result = validation_result @@ -2537,10 +2874,17 @@ def __hash__(self) -> int: class RequirementCheckValidationEvent(Event): - def __init__(self, event_type: EventType, - requirement_check: RequirementCheck, - validation_result: Optional[bool] = None, message: Optional[str] = None): - assert event_type in (EventType.REQUIREMENT_CHECK_VALIDATION_START, EventType.REQUIREMENT_CHECK_VALIDATION_END) + def __init__( + self, + event_type: EventType, + requirement_check: RequirementCheck, + validation_result: Optional[bool] = None, + message: Optional[str] = None, + ): + assert event_type in ( + EventType.REQUIREMENT_CHECK_VALIDATION_START, + EventType.REQUIREMENT_CHECK_VALIDATION_END, + ) super().__init__(event_type, message) self._requirement_check = requirement_check self._validation_result = validation_result @@ -2557,8 +2901,9 @@ def __str__(self) -> str: return f"RequirementCheckValidationEvent({self.event_type}, {self.requirement_check})" def __repr__(self) -> str: - return f"RequirementCheckValidationEvent(event_type={self.event_type}, " \ - f"requirement_check={self.requirement_check})" + return ( + f"RequirementCheckValidationEvent(event_type={self.event_type}, requirement_check={self.requirement_check})" + ) def __eq__(self, other: object) -> bool: if not isinstance(other, RequirementCheckValidationEvent): @@ -2631,7 +2976,8 @@ def detect_rocrate_profiles(self) -> list[Profile]: context.profiles_path, extra_profiles_path=context.extra_profiles_path, publicID=context.publicID, - severity=context.requirement_severity) + severity=context.requirement_severity, + ) profiles = [p for p in available_profiles if p.uri in candidate_profiles_uris] # get the candidate profiles for profile in profiles: @@ -2640,7 +2986,11 @@ def detect_rocrate_profiles(self) -> list[Profile]: for inherited_profile in inherited_profiles: if inherited_profile in candidate_profiles: candidate_profiles.remove(inherited_profile) - logger.debug("%d Candidate Profiles found: %s", len(candidate_profiles), candidate_profiles) + logger.debug( + "%d Candidate Profiles found: %s", + len(candidate_profiles), + candidate_profiles, + ) # unmatched candidate profiles unmatched_profiles = candidate_profiles_uris.difference(set(p.uri for p in profiles)) logger.debug("Unmatched Candidate Profiles URIs: %s", unmatched_profiles) @@ -2666,46 +3016,79 @@ def validate_requirements(self, requirements: list[Requirement]) -> ValidationRe """ Validates the RO-Crate against the specified subset of the profile requirements """ - assert all(isinstance(requirement, Requirement) for requirement in requirements), \ - "Invalid requirement type" + assert all(isinstance(requirement, Requirement) for requirement in requirements), "Invalid requirement type" # perform the requirements validation return self.__do_validate__(requirements) - def __do_validate__(self, - requirements: Optional[list[Requirement]] = None) -> ValidationResult: + def __do_validate__(self, requirements: Optional[list[Requirement]] = None) -> ValidationResult: # initialize the validation context context = ValidationContext(self, self.validation_settings) # register the current context self.__current_context__ = context - try: + # initialize the requirement types + self.__invoke_pre_validation_hooks__(context) + try: # set the profiles to validate against profiles = context.profiles assert len(profiles) > 0, "No profiles to validate" + # Pre-load every profile's requirements so all shape graphs are + # populated before the validation loop runs. This lets a check + # see `sh:deactivated true` triples declared by descendant + # profiles that have not yet been visited. + for p in profiles: + _ = p.requirements self.notify(EventType.VALIDATION_START) for profile in profiles: - logger.debug("Validating profile %s (id: %s)", profile.name, profile.identifier) + logger.debug( + "Validating profile %s (id: %s)", + profile.name, + profile.identifier, + ) # set the target profile in the context context._target_validation_profile = profile self.notify(ProfileValidationEvent(EventType.PROFILE_VALIDATION_START, profile=profile)) # perform the requirements validation requirements = profile.get_requirements( - context.requirement_severity, exact_match=context.requirement_severity_only) - logger.debug("Validating profile %s with %s requirements", profile.identifier, len(requirements)) - logger.debug("For profile %s, validating these %s requirements: %s", - profile.identifier, len(requirements), requirements) + context.requirement_severity, + exact_match=context.requirement_severity_only, + ) + logger.debug( + "Validating profile %s with %s requirements", + profile.identifier, + len(requirements), + ) + logger.debug( + "For profile %s, validating these %s requirements: %s", + profile.identifier, + len(requirements), + requirements, + ) terminate = False for requirement in requirements: if not requirement.overridden: - self.notify(RequirementValidationEvent( - EventType.REQUIREMENT_VALIDATION_START, requirement=requirement)) + self.notify( + RequirementValidationEvent( + EventType.REQUIREMENT_VALIDATION_START, + requirement=requirement, + ) + ) passed = requirement._do_validate_(context) - logger.debug("Requirement %s passed: %s", requirement.identifier, passed) + logger.debug( + "Requirement %s passed: %s", + requirement.identifier, + passed, + ) if not requirement.overridden: - self.notify(RequirementValidationEvent( - EventType.REQUIREMENT_VALIDATION_END, requirement=requirement, validation_result=passed)) + self.notify( + RequirementValidationEvent( + EventType.REQUIREMENT_VALIDATION_END, + requirement=requirement, + validation_result=passed, + ) + ) if passed: logger.debug("Validation Requirement passed") else: @@ -2717,16 +3100,33 @@ def __do_validate__(self, self.notify(ProfileValidationEvent(EventType.PROFILE_VALIDATION_END, profile=profile)) if terminate: break - self.notify(ValidationEvent(EventType.VALIDATION_END, - validation_result=context.result)) + # finalize the requirement types + self.__invoke_post_validation_hooks__(context) + # notify the end of the validation + self.notify(ValidationEvent(EventType.VALIDATION_END, validation_result=context.result)) + # return the validation result return context.result finally: # clear the current context self.__current_context__ = None + def __invoke_pre_validation_hooks__(self, context: ValidationContext): + logger.debug("Initializing requirement types: starting...") + requirements_types = RequirementLoader.__get_requirement_classes__() + for requirement_type in requirements_types: + requirement_type.initialize(context) + logger.debug("Initializing requirement types: completed") + + def __invoke_post_validation_hooks__(self, context: ValidationContext): + logger.debug("Finalizing requirement types: starting...") + requirements_types = RequirementLoader.__get_requirement_classes__() + for requirement_type in requirements_types: + requirement_type.finalize(context) + logger.debug("Finalizing requirement types: completed") + def notify(self, event: Union[Event, EventType]): - """ Override notify to update statistics """ + """Override notify to update statistics""" assert self.__current_context__ is not None, "No current validation context" result: ValidationResult = self.__current_context__.result if isinstance(event, EventType): @@ -2755,13 +3155,17 @@ def __init__(self, validator: Validator, settings: ValidationSettings): self._result = None # additional properties for the context self._properties = {} + # URLs already reported as missing from the HTTP cache during this run + self._offline_cache_misses_warned: set[str] = set() # initialize the ROCrate object if settings.metadata_dict: self._rocrate = ROCrate.from_metadata_dict(settings.metadata_dict) else: - self._rocrate = ROCrate.new_instance(settings.rocrate_uri, - relative_root_path=settings.rocrate_relative_root_path) + self._rocrate = ROCrate.new_instance( + settings.rocrate_uri, + relative_root_path=settings.rocrate_relative_root_path, + ) assert isinstance(self._rocrate, ROCrate), "Invalid RO-Crate instance" @property @@ -2900,8 +3304,11 @@ def rel_fd_path(self) -> Path: def __load_data_graph__(self) -> Graph: data_graph = Graph() logger.debug("Loading RO-Crate metadata of: %s", self.ro_crate.uri) - _ = data_graph.parse(data=self.ro_crate.metadata.as_dict(), - format="json-ld", publicID=self.publicID) + _ = data_graph.parse( + data=self.ro_crate.metadata.as_dict(), + format="json-ld", + publicID=self.publicID, + ) logger.debug("RO-Crate metadata loaded: %s", data_graph) return data_graph @@ -2986,7 +3393,8 @@ def __load_profiles__(self) -> list[Profile]: extra_profiles_path=self.settings.extra_profiles_path, publicID=self.publicID, severity=self.requirement_severity, - allow_requirement_check_override=self.allow_requirement_check_override) + allow_requirement_check_override=self.allow_requirement_check_override, + ) # Check if the target profile is in the list of profiles profile = Profile.get_by_identifier(self.profile_identifier) @@ -3007,7 +3415,8 @@ def __load_profiles__(self) -> list[Profile]: logger.exception(e) raise ProfileNotFound( self.profile_identifier, - message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'") from e + message=f"Profile '{self.profile_identifier}' not found in '{self.profiles_path}'", + ) from e # if the inheritance is enabled, return only the target profile if not self.inheritance_enabled: @@ -3083,3 +3492,21 @@ def get_profile_by_identifier(self, identifier: str) -> list[Profile]: if p.identifier == identifier: return p raise ProfileNotFound(identifier) + + def maybe_warn_offline_cache_miss(self, exc: BaseException) -> bool: + """ + If ``exc`` (or any cause/context in its chain) is an + :class:`OfflineCacheMissError`, emit a single user-facing warning + for the missing URL โ€” but only the first time that URL is seen + during this validation run โ€” and return ``True``. + + Returns ``False`` when the exception is unrelated to offline cache + misses, so callers can fall back to their generic handling. + """ + miss = find_offline_cache_miss(exc) + if miss is None: + return False + if miss.url not in self._offline_cache_misses_warned: + self._offline_cache_misses_warned.add(miss.url) + logger.warning("%s", miss) + return True diff --git a/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl b/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl index e481ade1f..3755a6ebb 100644 --- a/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/10_definedterm.ttl @@ -24,11 +24,111 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAOntologyAnnotation a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA OntologyAnnotations within the RO-Crate" ; + sh:description "An OntologyAnnotation has type DefinedTerm and is attached to an Assay, Protocol, Person, or Article." ; + sh:targetClass schema:DefinedTerm ; + sh:order 4 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:OntologyAnnotation ; + # The condition: need to be attached to a Assay, Protocol, Person, or Article + sh:condition [ + a sh:NodeShape; + sh:name: "OntologyAnnotation is attached to an Assay, Protocol, Person, or Article" ; + sh:or ( + [ + sh:property [ + sh:name "OntologyAnnotation is measurementMethod of an Assay" ; + sh:path [ sh:inversePath schema:measurementMethod ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Assay ; + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is measurementTechnique of an Assay" ; + sh:path [ sh:inversePath schema:measurementTechnique ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Assay + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is intendedUse of a Protocol" ; + sh:path [ sh:inversePath schema:intendedUse ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is computationalTool of a Protocol" ; + sh:path [ sh:inversePath schema:computationalTool ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is labEquipment of a Protocol" ; + sh:path [ sh:inversePath schema:labEquipment ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is reagent of a Protocol" ; + sh:path [ sh:inversePath schema:reagent ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is job title of a Person" ; + sh:path [ sh:inversePath schema:jobTitle ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Person + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "OntologyAnnotation is status of a Publication" ; + sh:path [ sh:inversePath schema:creativeWorkStatus ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Publication + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + ) ; + ] ; + ] ; +. isa-ro-crate:DefinedTermMustHaveName a sh:NodeShape ; sh:name "DefinedTerm MUST have a name" ; sh:description "A DefinedTerm MUST have a name" ; - sh:targetClass schema:DefinedTerm ; + sh:targetClass isa-ro-crate:OntologyAnnotation ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -47,7 +147,7 @@ isa-ro-crate:DefinedTermMustHaveName a sh:NodeShape ; isa-ro-crate:DefinedTermShouldHaveTermCodeOfCorrectType a sh:NodeShape ; sh:name "DefinedTerm SHOULD have termCode of correct type" ; sh:description "A DefinedTerm SHOULD have at least one termCode of correct type" ; - sh:targetClass schema:DefinedTerm ; + sh:targetClass isa-ro-crate:OntologyAnnotation ; sh:property [ a sh:PropertyShape ; sh:path schema:termCode ; diff --git a/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl b/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl index b0c8c990b..6bdee423d 100644 --- a/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/11_propertyvalue.ttl @@ -24,11 +24,111 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAPropertyValue a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA PropertyValues within the RO-Crate" ; + sh:description "An ISA PropertyValue has type PropertyValue and is attached to an Assay, Process, Protocol, Sample, Person, or Article." ; + sh:targetClass schema:PropertyValue ; + sh:order 4 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:PropertyValue ; + # The condition: need to be attached to a Assay, Process, Protocol, Sample, Person, or Article + sh:condition [ + a sh:NodeShape; + sh:name: "PropertyValue is attached to an Assay, Process, Protocol, Sample, Person, or Article" ; + sh:or ( + [ + sh:property [ + sh:name "PropertyValue is measurementMethod of an Assay" ; + sh:path [ sh:inversePath schema:variableMeasured ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Assay ; + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is parameterValue of a Process" ; + sh:path [ sh:inversePath bioschemas-prop:parameterValue ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is computationalTool of a Protocol" ; + sh:path [ sh:inversePath schema:computationalTool ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is labEquipment of a Protocol" ; + sh:path [ sh:inversePath schema:labEquipment ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is reagent of a Protocol" ; + sh:path [ sh:inversePath schema:reagent ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Protocol + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is characteristic of a Sample" ; + sh:path [ sh:inversePath bioschemas-prop:additionalProperty ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Sample + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is identifier of a Person" ; + sh:path [ sh:inversePath schema:identifier ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Person + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "PropertyValue is identifier of a Publication" ; + sh:path [ sh:inversePath schema:identifier ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Publication + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + ) ; + ] ; + ] ; +. ro-crate:FindPropertyValueSubtypes a sh:NodeShape, validator:HiddenShape; sh:name "Identify PropertyValue subtypes within the RO-Crate" ; sh:description "A PropertyValue has type Parameter, Characteristic, Factor or Component if additionalType is set accordingly." ; - sh:targetClass schema:PropertyValue ; + sh:targetClass isa-ro-crate:PropertyValue ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; @@ -87,7 +187,7 @@ ro-crate:FindPropertyValueSubtypes a sh:NodeShape, validator:HiddenShape; isa-ro-crate:PropertyValueMustHaveName a sh:NodeShape ; sh:name "PropertyValue MUST have a name" ; sh:description "A PropertyValue MUST have a name" ; - sh:targetClass schema:PropertyValue ; + sh:targetClass isa-ro-crate:PropertyValue ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -106,7 +206,7 @@ isa-ro-crate:PropertyValueMustHaveName a sh:NodeShape ; isa-ro-crate:PropertyValueShouldHaveValueOfCorrectType a sh:NodeShape ; sh:name "PropertyValue SHOULD have value of correct type" ; sh:description "A PropertyValue SHOULD have at least one value of correct type" ; - sh:targetClass schema:PropertyValue ; + sh:targetClass isa-ro-crate:PropertyValue ; sh:property [ a sh:PropertyShape ; sh:path schema:value ; diff --git a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl index dc5ab239b..11d5ec2ff 100644 --- a/rocrate_validator/profiles/isa-ro-crate/1_study.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/1_study.ttl @@ -23,47 +23,11 @@ @prefix validator: . @prefix xsd: . -# class:study -# isTypeDataset -# isAdditionalType"Study - - -# check study must have identifier -# check study must have name -# check study must have description -# check study should have about - -# check study must be pointed to by investigation through hasPart - -# # Find studies and add isa-ro-crate:Study type to them, for easier retrieval for checks -# ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; -# sh:name "Identify Studies within the RO-Crate" ; -# sh:description "A Study has type Dataset and additionalType 'Study'." ; -# sh:target [ -# a sh:SPARQLTarget ; -# sh:prefixes ro-crate:sparqlPrefixes ; -# sh:select """ -# SELECT ?this -# WHERE { -# ?this a schema:Dataset . -# ?this schema:additionalType "Study" . -# } -# """ -# ] ; - -# # Expand data graph with triples from the file data entity -# sh:rule [ -# a sh:TripleRule ; -# sh:subject sh:this ; -# sh:predicate rdf:type ; -# sh:object isa-ro-crate:Study ; -# ] . - -# Find studies and add isa-ro-crate:Study type to them, for easier retrieval for checks -ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; +isa-ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; sh:name "Identify Studies within the RO-Crate" ; sh:description "A Study has type Dataset and additionalType 'Study'." ; sh:targetClass schema:Dataset ; + sh:order 1 ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; @@ -80,7 +44,6 @@ ro-crate:FindStudies a sh:NodeShape, validator:HiddenShape; ] . -# WIP isa-ro-crate:StudyMustHaveBaseDescriptors a sh:NodeShape ; sh:name "Study MUST have base properties" ; sh:description "A Study MUST have identifier, name and description" ; diff --git a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl index 8158498de..475627afc 100644 --- a/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/2_assay.ttl @@ -23,39 +23,11 @@ @prefix validator: . @prefix xsd: . - -# check assay must have name - -# check assay must be pointed to by investigation through hasPart - -# Find assays and add isa-ro-crate:Assay type to them, for easier retrieval for checks -# ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; -# sh:name "Identify Assays within the RO-Crate" ; -# sh:description "An Assay has type Dataset and additionalType 'Assay'." ; -# sh:target [ -# a sh:SPARQLTarget ; -# sh:prefixes ro-crate:sparqlPrefixes ; -# sh:select """ -# SELECT ?this -# WHERE { -# ?this a schema:Dataset . -# ?this schema:additionalType "Assay" . -# } -# """ -# ] ; - -# # Expand data graph with triples from the file data entity -# sh:rule [ -# a sh:TripleRule ; -# sh:subject sh:this ; -# sh:predicate rdf:type ; -# sh:object isa-ro-crate:Assay ; -# ] . - -ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; +isa-ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; sh:name "Identify Assays within the RO-Crate" ; sh:description "An Assay has type Dataset and additionalType 'Assay'." ; sh:targetClass schema:Dataset ; + sh:order 1 ; # Expand data graph with triples from the file data entity sh:rule [ a sh:TripleRule ; @@ -72,7 +44,6 @@ ro-crate:FindAssays a sh:NodeShape, validator:HiddenShape; ] . -# WIP isa-ro-crate:AssayMustHaveBaseDescriptors a sh:NodeShape ; sh:name "Assay MUST have base properties" ; sh:description "An Assay MUST have identifier" ; diff --git a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl index 40b6fdc7e..3d71bf39a 100644 --- a/rocrate_validator/profiles/isa-ro-crate/3_process.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/3_process.ttl @@ -24,6 +24,33 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAProcesses a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA processes within the RO-Crate" ; + sh:description "A Process has type LabProcess and is attached to a Study or Assay." ; + sh:targetClass bioschemas:LabProcess ; + sh:order 2 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Process ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + sh:property [ + sh:path [ sh:inversePath schema:about ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class isa-ro-crate:Assay] + [sh:class isa-ro-crate:Study] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ] +. + # check process must have name @@ -33,7 +60,7 @@ isa-ro-crate:ProcessMustHaveName a sh:NodeShape ; sh:name "Process MUST have name" ; sh:description "A Process MUST have a name" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -51,7 +78,7 @@ isa-ro-crate:ProcessMustHaveName a sh:NodeShape ; isa-ro-crate:ProcessMustBeReferencedFromDataset a sh:NodeShape ; sh:name "Process MUST be directly referenced from a dataset" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; @@ -67,7 +94,7 @@ isa-ro-crate:ProcessMustBeReferencedFromDataset a sh:NodeShape ; isa-ro-crate:ProcessShouldHaveObject a sh:NodeShape ; sh:name "Process SHOULD have an object" ; sh:description "A Process SHOULD have an object" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path schema:object ; @@ -93,7 +120,7 @@ isa-ro-crate:ProcessShouldHaveObject a sh:NodeShape ; isa-ro-crate:ProcessShouldHaveResult a sh:NodeShape ; sh:name "Process SHOULD have a result" ; sh:description "A Process SHOULD have a result" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path schema:result ; @@ -119,7 +146,7 @@ isa-ro-crate:ProcessShouldHaveResult a sh:NodeShape ; isa-ro-crate:ProcessShouldHaveProtocol a sh:NodeShape ; sh:name "Process SHOULD have a protocol" ; sh:description "A Process SHOULD have a protocol" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path bioschemas-prop:executesLabProtocol ; @@ -141,7 +168,7 @@ isa-ro-crate:ProcessShouldHaveProtocol a sh:NodeShape ; isa-ro-crate:ProcessShouldHaveParamValue a sh:NodeShape ; sh:name "Process SHOULD have a parameter value" ; sh:description "A Process SHOULD have a parameter value" ; - sh:targetClass bioschemas:LabProcess ; + sh:targetClass isa-ro-crate:Process ; sh:property [ a sh:PropertyShape ; sh:path bioschemas-prop:parameterValue ; diff --git a/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl b/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl index 6f5ef82e2..64e762242 100644 --- a/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/4_protocol.ttl @@ -24,14 +24,36 @@ @prefix validator: . @prefix xsd: . - +isa-ro-crate:FindISAProtocols a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA protocols within the RO-Crate" ; + sh:description "A Protocol has type LabProtocol and is attached to an ISA process." ; + sh:targetClass bioschemas:LabProtocol ; + sh:order 3 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Protocol ; + # The condition: need to be attached to a Process + sh:condition [ + sh:property [ + sh:path [ sh:inversePath bioschemas-prop:executesLabProtocol ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process ; + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ] ; +. # check protocol attributes must have correct types isa-ro-crate:ProtocolShouldHaveNameOfCorrectType a sh:NodeShape ; sh:name "Protocol SHOULD have name" ; sh:description "A Protocol SHOULD have a name" ; - sh:targetClass bioschemas:LabProtocol ; + sh:targetClass isa-ro-crate:Protocol ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -57,7 +79,7 @@ isa-ro-crate:ProtocolShouldHaveNameOfCorrectType a sh:NodeShape ; isa-ro-crate:ProtocolShouldHaveDescriptionOfCorrectType a sh:NodeShape ; sh:name "Protocol SHOULD have description" ; sh:description "A Protocol SHOULD have a description" ; - sh:targetClass bioschemas:LabProtocol ; + sh:targetClass isa-ro-crate:Protocol ; sh:property [ a sh:PropertyShape ; sh:path schema:description ; @@ -83,7 +105,7 @@ isa-ro-crate:ProtocolShouldHaveDescriptionOfCorrectType a sh:NodeShape ; isa-ro-crate:ProtocolShouldHaveIntendedUse a sh:NodeShape ; sh:name "Protocol SHOULD have intended use" ; sh:description "A Protocol SHOULD have an intended use" ; - sh:targetClass bioschemas:LabProtocol ; + sh:targetClass isa-ro-crate:Protocol ; sh:property [ a sh:PropertyShape ; sh:path bioschemas-prop:intendedUse ; diff --git a/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl b/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl index 5b6bf2f5f..5bfab5038 100644 --- a/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/5_sample.ttl @@ -24,14 +24,51 @@ @prefix validator: . @prefix xsd: . - +isa-ro-crate:FindISASamples a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA samples within the RO-Crate" ; + sh:description "A Sample has type Sample and is attached to an ISA process." ; + sh:targetClass bioschemas:Sample ; + sh:order 3 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Sample ; + # The condition: need to be attached to a Process + sh:condition [ + a sh:NodeShape ; + sh:name "Sample is attached to a process" ; + sh:or( + [ + sh:property [ + sh:path [ sh:inversePath schema:object ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:path [ sh:inversePath schema:result ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + ) ; + ] ; + ] ; +. # check Sample attributes must have correct types isa-ro-crate:SampledMustHaveName a sh:NodeShape ; sh:name "Sample MUST have name" ; sh:description "A Sample MUST have a name" ; - sh:targetClass bioschemas:Sample ; + sh:targetClass isa-ro-crate:Sample ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -50,7 +87,7 @@ isa-ro-crate:SampledMustHaveName a sh:NodeShape ; isa-ro-crate:SampleShouldHaveAdditionalPropertyOfCorrectType a sh:NodeShape ; sh:name "Sample SHOULD have additional properties" ; sh:description "A Sample SHOULD have at least one additional property" ; - sh:targetClass bioschemas:Sample ; + sh:targetClass isa-ro-crate:Sample ; sh:property [ a sh:PropertyShape ; sh:path bioschemas-prop:additionalProperty ; diff --git a/rocrate_validator/profiles/isa-ro-crate/6_data.ttl b/rocrate_validator/profiles/isa-ro-crate/6_data.ttl index 3bc9da6ad..3a8d1948d 100644 --- a/rocrate_validator/profiles/isa-ro-crate/6_data.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/6_data.ttl @@ -24,14 +24,39 @@ @prefix validator: . @prefix xsd: . - +isa-ro-crate:FindISAFiles a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA files within the RO-Crate" ; + sh:description "A data file has type File/MediaObject and is attached to an ISA Study or Assay." ; + sh:targetClass schema:MediaObject ; + sh:order 2 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Data ; + # The condition: need to be attached to a Study or Assay + sh:condition [ + sh:property [ + sh:path [ sh:inversePath schema:hasPart ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class isa-ro-crate:Assay] + [sh:class isa-ro-crate:Study] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ] ; +. # check File attributes must have correct types isa-ro-crate:FileMustHaveName a sh:NodeShape ; sh:name "File MUST have name" ; sh:description "A File MUST have a name" ; - sh:targetClass schema:MediaObject ; + sh:targetClass isa-ro-crate:Data ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; diff --git a/rocrate_validator/profiles/isa-ro-crate/7_person.ttl b/rocrate_validator/profiles/isa-ro-crate/7_person.ttl index fe9976f60..9e2855265 100644 --- a/rocrate_validator/profiles/isa-ro-crate/7_person.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/7_person.ttl @@ -24,14 +24,67 @@ @prefix validator: . @prefix xsd: . - +isa-ro-crate:FindISAPerson a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA persons within the RO-Crate" ; + sh:description "A Person has type Person and is attached to an Investigation, Study, Assay, Process, or Article." ; + sh:targetClass schema:Person ; + sh:order 3 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Person ; + # The condition: need to be attached to a Investigation, Study, Assay, Process, or Article + sh:condition [ + a sh:NodeShape; + sh:name: "Person is attached to an Investigation, Study, Assay, Process, or Article" ; + sh:or ( + [ + sh:property [ + sh:name "Person is creator of a Study, Assay, or RootDataEntity" ; + sh:path [ sh:inversePath schema:creator ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class isa-ro-crate:Assay] + [sh:class isa-ro-crate:Study] + [sh:class ro-crate:RootDataEntity] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "Person is author of an Article" ; + sh:path [ sh:inversePath schema:author ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Article + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + [ + sh:property [ + sh:name "Person is agent of a Process" ; + sh:path [ sh:inversePath schema:agent ] ; + sh:qualifiedValueShape [ + sh:class isa-ro-crate:Process + ] ; + sh:qualifiedMinCount 1 ; + ] + ] + ) ; + ] ; + ] ; +. # check Person attributes must have correct types isa-ro-crate:PersonMustHaveGivenName a sh:NodeShape ; sh:name "Person MUST have a given name" ; sh:description "A Person MUST have a given name" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:givenName ; @@ -50,7 +103,7 @@ isa-ro-crate:PersonMustHaveGivenName a sh:NodeShape ; isa-ro-crate:PersonShouldHaveAffiliationOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have affiliation" ; sh:description "A Person SHOULD have at least one affiliation" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:affiliation ; @@ -72,7 +125,7 @@ isa-ro-crate:PersonShouldHaveAffiliationOfCorrectType a sh:NodeShape ; isa-ro-crate:PersonShouldHaveJobTitleOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have job title" ; sh:description "A Person SHOULD have at least one job title" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:jobTitle ; @@ -94,7 +147,7 @@ isa-ro-crate:PersonShouldHaveJobTitleOfCorrectType a sh:NodeShape ; isa-ro-crate:PersonShouldHaveEmailOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have email" ; sh:description "A Person SHOULD have at least one email" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:email ; @@ -119,7 +172,7 @@ isa-ro-crate:PersonShouldHaveEmailOfCorrectType a sh:NodeShape ; isa-ro-crate:PersonShouldHaveFamilyNameOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have family name" ; sh:description "A Person SHOULD have at least one family name" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:familyName ; @@ -144,7 +197,7 @@ isa-ro-crate:PersonShouldHaveFamilyNameOfCorrectType a sh:NodeShape ; isa-ro-crate:PersonShouldHaveIdentifierOfCorrectType a sh:NodeShape ; sh:name "Person SHOULD have identifier" ; sh:description "A Person SHOULD have at least one identifier" ; - sh:targetClass schema:Person ; + sh:targetClass isa-ro-crate:Person ; sh:property [ a sh:PropertyShape ; sh:path schema:identifier ; diff --git a/rocrate_validator/profiles/isa-ro-crate/8_article.ttl b/rocrate_validator/profiles/isa-ro-crate/8_article.ttl index 176122490..c9deffb53 100644 --- a/rocrate_validator/profiles/isa-ro-crate/8_article.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/8_article.ttl @@ -24,11 +24,38 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAPublication a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA publications within the RO-Crate" ; + sh:description "A Publication has type ScholarlyArticle and is attached to a Study or Investigation." ; + sh:targetClass schema:ScholarlyArticle ; + sh:order 2 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Article ; + # The condition: need to be attached to a Study or Investigation + sh:condition [ + sh:property [ + sh:path [ sh:inversePath schema:citation ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class ro-crate:RootDataEntity] + [sh:class isa-ro-crate:Study] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ]; +. + isa-ro-crate:ArticleMustHaveHeadline a sh:NodeShape ; sh:name "Article MUST have a headline" ; sh:description "An Article MUST have a headline" ; - sh:targetClass schema:ScholarlyArticle ; + sh:targetClass isa-ro-crate:Article ; sh:property [ a sh:PropertyShape ; sh:path schema:headline ; @@ -47,7 +74,7 @@ isa-ro-crate:ArticleMustHaveHeadline a sh:NodeShape ; isa-ro-crate:ArticleMustHaveIdentifier a sh:NodeShape ; sh:name "Article MUST have an identifier" ; sh:description "An Article MUST have an identifier" ; - sh:targetClass schema:ScholarlyArticle ; + sh:targetClass isa-ro-crate:Article ; sh:property [ a sh:PropertyShape ; sh:path schema:identifier ; @@ -69,7 +96,7 @@ isa-ro-crate:ArticleMustHaveIdentifier a sh:NodeShape ; isa-ro-crate:ArticleShouldHaveAuthorOfCorrectType a sh:NodeShape ; sh:name "Article SHOULD have author" ; sh:description "An Article SHOULD have at least one author" ; - sh:targetClass schema:ScholarlyArticle ; + sh:targetClass isa-ro-crate:Article ; sh:property [ a sh:PropertyShape ; sh:path schema:author ; diff --git a/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl b/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl index 4d64e1198..d02c0eb3a 100644 --- a/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/9_comment.ttl @@ -24,11 +24,41 @@ @prefix validator: . @prefix xsd: . +isa-ro-crate:FindISAComment a sh:NodeShape, validator:HiddenShape; + sh:name "Identify ISA comments within the RO-Crate" ; + sh:description "A Comment has type Comment and is attached to an Investigation, Study, Assay, Protocol, Data file, or Publication." ; + sh:targetClass schema:Comment ; + sh:order 3 ; + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object isa-ro-crate:Comment ; + # The condition: need to be attached to a Investigation, Study, Assay, Protocol, Data file, or Publication + sh:condition [ + sh:property [ + sh:path [ sh:inversePath schema:comment ] ; + sh:qualifiedValueShape [ + sh:or( + [sh:class ro-crate:RootDataEntity] + [sh:class isa-ro-crate:Study] + [sh:class isa-ro-crate:Assay] + [sh:class isa-ro-crate:Protocol] + [sh:class isa-ro-crate:Data] + [sh:class isa-ro-crate:Article] + ) + ] ; + sh:qualifiedMinCount 1 ; + ] ; + ] ; + ]; +. isa-ro-crate:CommentShouldHaveName a sh:NodeShape ; sh:name "Comment SHOULD have name" ; sh:description "A Comment SHOULD have at least one name" ; - sh:targetClass schema:Comment ; + sh:targetClass isa-ro-crate:Comment ; sh:property [ a sh:PropertyShape ; sh:path schema:name ; @@ -53,7 +83,7 @@ isa-ro-crate:CommentShouldHaveName a sh:NodeShape ; isa-ro-crate:CommentShouldHaveText a sh:NodeShape ; sh:name "Comment SHOULD have text" ; sh:description "A Comment SHOULD have at least one text" ; - sh:targetClass schema:Comment ; + sh:targetClass isa-ro-crate:Comment ; sh:property [ a sh:PropertyShape ; sh:path schema:text ; diff --git a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl index 8e48b75bc..4c7b5d926 100644 --- a/rocrate_validator/profiles/isa-ro-crate/ontology.ttl +++ b/rocrate_validator/profiles/isa-ro-crate/ontology.ttl @@ -36,6 +36,51 @@ isa-ro-crate:Assay rdf:type owl:Class ; rdfs:subClassOf schema:Dataset ; rdfs:label "Assay"@en . +# Process +isa-ro-crate:Process rdf:type owl:Class ; + rdfs:subClassOf bioschemas:LabProcess ; + rdfs:label "Process"@en . + +# Protocol +isa-ro-crate:Protocol rdf:type owl:Class ; + rdfs:subClassOf bioschemas:LabProtocol ; + rdfs:label "Protocol"@en . + +# Person +isa-ro-crate:Person rdf:type owl:Class ; + rdfs:subClassOf schema:Person ; + rdfs:label "Person"@en . + +# Article +isa-ro-crate:Article rdf:type owl:Class ; + rdfs:subClassOf schema:ScholarlyArticle ; + rdfs:label "Article"@en . + +# Sample +isa-ro-crate:Sample rdf:type owl:Class ; + rdfs:subClassOf bioschemas:Sample ; + rdfs:label "Sample"@en . + +# Data +isa-ro-crate:Data rdf:type owl:Class ; + rdfs:subClassOf schema:MediaObject ; + rdfs:label "Data"@en + +# Comment +isa-ro-crate:Comment rdf:type owl:Class ; + rdfs:subClassOf schema:MediaObject ; + rdfs:label "Comment"@en . + +# OntologyAnnotation +isa-ro-crate:OntologyAnnotation rdf:type owl:Class ; + rdfs:subClassOf schema:DefinedTerm ; + rdfs:label "OntologyAnnotation"@en . + +# PropertyValue +isa-ro-crate:PropertyValue rdf:type owl:Class ; + rdfs:subClassOf schema:PropertyValue ; + rdfs:label "PropertyValue"@en . + isa-ro-crate:Parameter rdf:type owl:Class ; rdfs:subClassOf schema:PropertyValue ; rdfs:label "Parameter"@en . diff --git a/rocrate_validator/profiles/provenance-run-crate/must/1_howtostep.ttl b/rocrate_validator/profiles/provenance-run-crate/must/1_howtostep.ttl index c74f6ef71..cd86b44f4 100644 --- a/rocrate_validator/profiles/provenance-run-crate/must/1_howtostep.ttl +++ b/rocrate_validator/profiles/provenance-run-crate/must/1_howtostep.ttl @@ -52,14 +52,14 @@ provenance-run-crate:ProvRCHowToStepRequired a sh:NodeShape ; sh:property [ a sh:PropertyShape ; sh:name "HowToStep position type" ; - sh:description "If specified, position must be a string representing an integer" ; + sh:description "If specified, position must be an integer or a string representing an integer" ; sh:path schema:position ; sh:or ( [ sh:datatype xsd:string ; ] [ sh:datatype xsd:integer ; ] ) ; sh:pattern "\\d+" ; - sh:message "If specified, position must be a string representing an integer" ; + sh:message "If specified, position must be an integer or a string representing an integer" ; ] ; sh:property [ a sh:PropertyShape ; diff --git a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py index 1abcbcfa0..d96d8ee03 100644 --- a/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/must/4_data_entity_metadata.py @@ -38,11 +38,13 @@ def check_availability(self, context: ValidationContext) -> bool: return True # Perform the check result = True + # Web-based Data Entities (absolute URIs with any scheme other than `file`, + # e.g. http://, https://, ftp://, scp://, s3://, ...) are not required to + # be part of the local payload per the RO-Crate specification. for entity in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True): assert entity.id is not None, "Entity has no @id" logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id) try: - logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id) if entity.has_local_identifier(): logger.debug( "Ignoring the Data Entity '%s' as it is a local entity with a local identifier. " diff --git a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py index 13ef914aa..424269361 100644 --- a/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py +++ b/rocrate_validator/profiles/ro-crate/should/5_web_data_entity_metadata.py @@ -16,6 +16,7 @@ from rocrate_validator.models import ValidationContext from rocrate_validator.requirements.python import (PyFunctionCheck, check, requirement) +from rocrate_validator.utils.uri import AvailabilityStatus # set up logging logger = logging.getLogger(__name__) @@ -32,13 +33,37 @@ class WebDataEntityRecommendedChecker(PyFunctionCheck): def check_availability(self, context: ValidationContext) -> bool: """ Check if the Web-based Data Entity is directly downloadable - by a simple retrieval (e.g. HTTP GET) permitting redirection and HTTP/HTTPS URIs + by a simple retrieval (e.g. HTTP GET) permitting redirection and HTTP/HTTPS URIs. + + Resources that cannot be natively retrieved by the validator (e.g. + `scp://`, `s3://`, `sftp://`) or that are protected by an authorization + mechanism (HTTP 401/403) are reported as recommendation-level issues + and logged as warnings, without invalidating the validation. """ result = True for entity in context.ro_crate.metadata.get_web_data_entities(): assert entity.id is not None, "Entity has no @id" try: - if not entity.is_available(): + status = entity.check_availability() + if status == AvailabilityStatus.AVAILABLE: + continue + if status == AvailabilityStatus.UNAUTHORIZED: + msg = ( + f"Web-based Data Entity {entity.id} is protected by an " + f"authorization mechanism; availability could not be verified" + ) + logger.warning(msg) + context.result.add_issue(msg, self) + elif status == AvailabilityStatus.UNCHECKABLE: + scheme = entity.id_as_uri.scheme + msg = ( + f"Web-based Data Entity {entity.id} uses scheme " + f"'{scheme}' which is not natively supported by the " + f"validator; availability could not be verified" + ) + logger.warning(msg) + context.result.add_issue(msg, self) + else: context.result.add_issue( f'Web-based Data Entity {entity.id} is not available', self) result = False @@ -59,6 +84,12 @@ def check_content_size(self, context: ValidationContext) -> bool: result = True for entity in context.ro_crate.metadata.get_web_data_entities(): assert entity.id is not None, "Entity has no @id" + # Skip entities whose scheme the validator cannot natively fetch + # (e.g. scp://, s3://): without retrieving the content there is + # no actual size to compare `contentSize` against. Reachability + # is then checked separately via `is_available()` below. + if not entity.id_as_uri.is_natively_checkable(): + continue if entity.is_available(): content_size = entity.get_property("contentSize") if content_size and int(content_size) != context.ro_crate.get_external_file_size(entity.id): diff --git a/rocrate_validator/requirements/python/__init__.py b/rocrate_validator/requirements/python/__init__.py index b23c4a9d4..b15d3ee2a 100644 --- a/rocrate_validator/requirements/python/__init__.py +++ b/rocrate_validator/requirements/python/__init__.py @@ -21,7 +21,7 @@ from rocrate_validator.models import (LevelCollection, Profile, Requirement, RequirementCheck, RequirementLevel, RequirementLoader, Severity, - ValidationContext) + SourceSnippet, ValidationContext) from rocrate_validator.utils.python_helpers import get_classes_from_file # set up logging @@ -38,11 +38,12 @@ def __init__(self, name: str, check_function: Callable[[RequirementCheck, ValidationContext], bool], description: Optional[str] = None, - level: Optional[LevelCollection] = LevelCollection.REQUIRED): + level: Optional[LevelCollection] = LevelCollection.REQUIRED, + deactivated: bool = False): """ check_function: a function that accepts an instance of PyFunctionCheck and a ValidationContext. """ - super().__init__(requirement, name, description=description, level=level) + super().__init__(requirement, name, description=description, level=level, deactivated=deactivated) sig = inspect.signature(check_function) if len(sig.parameters) != 2: @@ -62,6 +63,19 @@ def execute_check(self, context: ValidationContext) -> bool: return True return self._check_function(self, context) + def get_source_snippet(self) -> Optional[SourceSnippet]: + try: + code = inspect.getsource(self._check_function) + except (OSError, TypeError) as e: + logger.debug("Unable to read source for check %s: %s", self.identifier, e) + return None + source_file = inspect.getsourcefile(self._check_function) + return SourceSnippet( + language="python", + code=code, + source_path=Path(source_file) if source_file else None, + ) + class PyRequirement(Requirement): """ @@ -115,11 +129,13 @@ def __init_checks__(self): f"Getting severity from path: {self.severity_from_path}") severity = self.severity_from_path or Severity.REQUIRED logger.debug("Severity log: %r", severity) + deactivated = bool(getattr(member, "deactivated", False)) check = self.requirement_check_class(self, check_name, member, description=check_description, - level=LevelCollection.get(severity.name) if severity else None) + level=LevelCollection.get(severity.name) if severity else None, + deactivated=deactivated) self._checks.append(check) logger.debug("Added check: %s %r", check_name, check) @@ -159,7 +175,9 @@ def decorator(cls): return decorator -def check(name: Optional[str] = None, severity: Optional[Severity] = None): +def check(name: Optional[str] = None, + severity: Optional[Severity] = None, + deactivated: bool = False): """ A decorator to mark a function as a check. @@ -178,6 +196,12 @@ def check(name: Optional[str] = None, severity: Optional[Severity] = None): :param severity: the severity level :type severity: Optional[Severity] + :param deactivated: when True, the check is skipped during validation. + Mirrors SHACL's ``sh:deactivated``: an extension profile may redeclare + a check with the same name as one in a parent profile and set this + flag to disable the inherited check. + :type deactivated: bool + :return: the decorated function :rtype: Callable """ @@ -193,6 +217,7 @@ def decorator(func): func.check = True func.name = check_name func.severity = severity + func.deactivated = deactivated return func return decorator diff --git a/rocrate_validator/requirements/shacl/checks.py b/rocrate_validator/requirements/shacl/checks.py index 012c87810..1ce62ab0b 100644 --- a/rocrate_validator/requirements/shacl/checks.py +++ b/rocrate_validator/requirements/shacl/checks.py @@ -16,6 +16,9 @@ from timeit import default_timer as timer from typing import Optional +from rdflib import RDF, BNode, Literal, Namespace + +from rocrate_validator.constants import SHACL_NS from rocrate_validator.errors import ROCrateMetadataNotFoundError from rocrate_validator.events import EventType from rocrate_validator.models import ( @@ -23,11 +26,13 @@ Requirement, RequirementCheck, RequirementCheckValidationEvent, + RequirementLevel, SkipRequirementCheck, + SourceSnippet, ValidationContext, ) -from rocrate_validator.requirements.shacl.models import Shape -from rocrate_validator.requirements.shacl.utils import make_uris_relative, resolve_parent_shape +from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry +from rocrate_validator.requirements.shacl.utils import build_node_subgraph, make_uris_relative, resolve_parent_shape from rocrate_validator.requirements.shacl.validator import ( SHACLValidationAlreadyProcessed, SHACLValidationContext, @@ -40,6 +45,9 @@ logger = logging.getLogger(__name__) +_SH = Namespace(SHACL_NS) +_TRUE_LITERALS = (Literal(True), Literal("true", datatype=None)) + class SHACLCheck(RequirementCheck): """ @@ -97,6 +105,37 @@ def shape(self) -> Shape: def root(self) -> bool: return self._root + @property + def deactivated(self) -> bool: + if self._deactivated: + return True + shape = self._shape + if shape is None: + return False + # Same-profile deactivation (cases B & C): the shape itself carries + # `sh:deactivated true`, possibly because it was redeclared in an + # extension profile via override-by-name. + for value in shape.graph.objects(subject=shape.node, predicate=_SH.deactivated): + if isinstance(value, Literal) and bool(value.toPython()): + return True + # Cross-profile deactivation (case A): a descendant profile may add + # ` sh:deactivated true` to its own shapes graph, + # without redeclaring the shape. Scan only profiles that inherit + # (transitively) from the shape's owning profile, so unrelated + # profiles loaded in the same process can't influence the result. + # Validator.__do_validate__ pre-loads the shape graphs. + from rocrate_validator.models import Profile + + owning_profile = self.requirement.profile + for profile in Profile.get_descendants(owning_profile): + try: + registry = ShapesRegistry.get_instance(profile) + except Exception: + continue + if registry.is_node_deactivated(shape.node): + return True + return False + @property def description(self) -> str: if self._shape.description: @@ -105,13 +144,28 @@ def description(self) -> str: return self._shape.parent.description return f"Check for {self._shape.name}" if self._shape.name else "SHACL validation check" - def __compute_requirement_level__(self) -> LevelCollection: + def __compute_requirement_level__(self) -> RequirementLevel: if self._shape and self._shape.get_declared_level(): return self._shape.get_declared_level() if self.requirement and self.requirement.requirement_level_from_path: return self.requirement.requirement_level_from_path + # When the shape file lives in the profile root and the NodeShape + # itself does not declare sh:severity, derive the level from the + # most severe nested PropertyShape instead of defaulting to REQUIRED. + derived = self.__derive_level_from_properties__() + if derived: + return derived return LevelCollection.REQUIRED + def __derive_level_from_properties__(self) -> Optional[RequirementLevel]: + properties = getattr(self._shape, "properties", None) + if not properties: + return None + declared_levels = [lvl for lvl in (p.get_declared_level() for p in properties) if lvl] + if not declared_levels: + return None + return max(declared_levels, key=lambda lvl: lvl.severity.value) + @property def level(self) -> str: if not self._level: @@ -122,6 +176,58 @@ def level(self) -> str: def severity(self) -> str: return self.level.severity + def get_source_snippet(self) -> Optional[SourceSnippet]: + if self._shape is None: + return None + try: + graph = self._shape.graph + # build a subgraph containing all the triples related to the shape + subgraph = build_node_subgraph(graph, self._shape.node) + # identify the owner of the shape + owner = self._shape + while getattr(owner, "parent", None) is not None: + owner = owner.parent + # if the shape is not a root shape, include the triples linking the owner to the shape + if owner is not self._shape: + shacl = Namespace(SHACL_NS) + target_predicates = ( + RDF.type, + shacl.targetClass, + shacl.targetNode, + shacl.targetSubjectsOf, + shacl.targetObjectsOf, + shacl.target, + ) + for predicate in target_predicates: + for triple in owner.graph.triples((owner.node, predicate, None)): + subgraph.add(triple) + # follow BNode objects (e.g. sh:target referencing an inline SPARQL target) + _, _, obj = triple + if isinstance(obj, BNode): + subgraph += build_node_subgraph(owner.graph, obj) + # link the owner to the property so the relationship is preserved in the serialization + subgraph.add((owner.node, shacl.property, self._shape.node)) + + # copy bindings so the serialized snippet uses the same prefix declarations as the source file + for prefix, namespace in graph.namespaces(): + subgraph.bind(prefix, namespace, replace=True) + # serialize the subgraph to Turtle format + code = subgraph.serialize(format="turtle") + except Exception as e: + logger.debug("Unable to serialize SHACL shape for check %s: %s", self.identifier, e) + return None + # if the code is bytes, decode it to string + if isinstance(code, bytes): + code = code.decode("utf-8") + # use the shape source file as the source path for the snippet if available + source_path = self.requirement.path if self.requirement else None + # build the source snippet for the check + return SourceSnippet( + language="turtle", + code=code, + source_path=source_path, + ) + def execute_check(self, context: ValidationContext): logger.debug("Starting check %s", self) try: @@ -266,6 +372,18 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): if requirementCheck is None: logger.warning("No check instance found for shape: %s", shape.key) continue + # Drop violations whose check severity is below the requested + # `requirement_severity`: pyshacl still emits sh:ValidationResult + # nodes for sh:Warning / sh:Info, but they are not actionable at a + # stricter validation level. + if requirementCheck.severity < shacl_context.settings.requirement_severity: + logger.debug( + "Dropping violation for check %s: severity %s below requested %s", + requirementCheck.identifier, + requirementCheck.severity, + shacl_context.settings.requirement_severity, + ) + continue if ( not shacl_context.settings.skip_checks or requirementCheck.identifier not in shacl_context.settings.skip_checks @@ -329,12 +447,16 @@ def __do_execute_check__(self, shacl_context: SHACLValidationContext): # all together and not profile by profile if requirementCheck.identifier not in failed_requirement_checks_notified: shacl_context.result._add_executed_check(requirementCheck, False) - if requirementCheck.identifier not in failed_requirement_checks_notified and \ - requirementCheck.requirement.profile != shacl_context.current_validation_profile: + if ( + requirementCheck.identifier not in failed_requirement_checks_notified + and requirementCheck.requirement.profile != shacl_context.current_validation_profile + ): failed_requirement_checks_notified.append(requirementCheck.identifier) - shacl_context.validator.notify(RequirementCheckValidationEvent( - EventType.REQUIREMENT_CHECK_VALIDATION_END, - requirementCheck, validation_result=False)) + shacl_context.validator.notify( + RequirementCheckValidationEvent( + EventType.REQUIREMENT_CHECK_VALIDATION_END, requirementCheck, validation_result=False + ) + ) logger.debug( "Added failed check to the context: %s", requirementCheck.identifier, diff --git a/rocrate_validator/requirements/shacl/models.py b/rocrate_validator/requirements/shacl/models.py index 7b61bf622..4da019ad2 100644 --- a/rocrate_validator/requirements/shacl/models.py +++ b/rocrate_validator/requirements/shacl/models.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import Optional, Union -from rdflib import Graph, Namespace, URIRef +from rdflib import Graph, Literal, Namespace, URIRef from rdflib.term import Node from rocrate_validator.constants import SHACL_NS @@ -344,6 +344,16 @@ def shapes_graph(self) -> Graph: g += self._shapes_graph return g + def is_node_deactivated(self, node: Node) -> bool: + """Return True if the underlying shapes graph asserts + ` sh:deactivated true`. Avoids the copy made by `shapes_graph` + so it is safe to call from hot paths.""" + deactivated = Namespace(SHACL_NS).deactivated + for value in self._shapes_graph.objects(subject=node, predicate=deactivated): + if isinstance(value, Literal) and bool(value.toPython()): + return True + return False + def load_shapes(self, shapes_path: Union[str, Path], publicID: Optional[str] = None) -> list[Shape]: """ Load the shapes from the graph diff --git a/rocrate_validator/requirements/shacl/requirements.py b/rocrate_validator/requirements/shacl/requirements.py index e85bc3ec3..9db999d65 100644 --- a/rocrate_validator/requirements/shacl/requirements.py +++ b/rocrate_validator/requirements/shacl/requirements.py @@ -19,8 +19,14 @@ from rocrate_validator.utils import log as logging from rocrate_validator.constants import VALIDATOR_NS -from rocrate_validator.models import (Profile, Requirement, RequirementCheck, - RequirementLevel, RequirementLoader) +from rocrate_validator.models import ( + Profile, + Requirement, + RequirementCheck, + RequirementLevel, + RequirementLoader, + ValidationContext, +) from rocrate_validator.requirements.shacl.checks import SHACLCheck from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry @@ -29,16 +35,11 @@ class SHACLRequirement(Requirement): - - def __init__(self, - shape: Shape, - profile: Profile, - path: Path): + def __init__(self, shape: Shape, profile: Profile, path: Path): self._shape = shape - super().__init__(profile, - shape.name if shape.name else "", - shape.description if shape.description else "", - path) + super().__init__( + profile, shape.name if shape.name else "", shape.description if shape.description else "", path + ) # init checks self._checks = self.__init_checks__() # assign check IDs @@ -59,8 +60,15 @@ def __init_checks__(self) -> list[RequirementCheck]: # check if the shape has nested properties has_properties = hasattr(self.shape, "properties") and len(self.shape.properties) > 0 # create a check for the shape itself, hidden if the shape has nested properties - checks.append(SHACLCheck(self, self.shape, name=f"Check {self.shape.name}" if has_properties else None, - hidden=has_properties, root=True)) + checks.append( + SHACLCheck( + self, + self.shape, + name=f"Check {self.shape.name}" if has_properties else None, + hidden=has_properties, + root=True, + ) + ) # create a check for each property if the shape has nested properties if has_properties: for prop in self.shape.properties: @@ -82,9 +90,73 @@ def hidden(self) -> bool: return True return False + @classmethod + def finalize(cls, context: ValidationContext) -> None: + """ " + Finalize the SHACL requirement by ensuring that a SHACL validation run is triggered for the target profile + if it has no shapes of its own (e.g. an extension profile that purely inherits or only deactivates). + + SHACL is normally driven by the first execute_check of a check + belonging to the target profile (see SHACLValidationContextManager). + If the target has zero SHACL checks of its own (e.g. an extension + profile that purely inherits or only deactivates), no pyshacl run + is ever triggered and inherited shapes are never evaluated. + Force one final run on the merged shapes graph in that case. + """ + + logger.debug("Starting %s requirement finalization for context %s", cls.__name__, context) + + # extract profiles and target profile from context + profiles = context.profiles + + from rocrate_validator.requirements.shacl.checks import SHACLCheck + from rocrate_validator.requirements.shacl.validator import SHACLValidationContext + + target = next((p for p in profiles if p.identifier == context.settings.profile_identifier), None) + if target is None: + return + + shacl_context = SHACLValidationContext.get_instance(context) + # If pyshacl already ran for the target during the main loop there is + # nothing to do. + if shacl_context.get_validation_result(target) is not None: + return + + # Pick any SHACLCheck across the loaded profiles to drive the run; the + # check identity is only used for logging inside __do_execute_check__, + # the actual validation is graph-wide. + runner = next( + (c for p in profiles for r in p.requirements for c in r.get_checks() if isinstance(c, SHACLCheck)), + None, + ) + if runner is None: + return + + # Make sure the target's shapes (if any) are in the merged registry + # and switch the current profile so violations are attributed under + # the target profile in the report. + shacl_context.__set_current_validation_profile__(target) + shacl_context._current_validation_profile = target + try: + runner.__do_execute_check__(shacl_context) + except Exception as e: + if context.maybe_warn_offline_cache_miss(e): + logger.debug( + "Forced SHACL run for zero-shape target profile %s skipped due to offline cache miss: %s", + target.identifier, e, + ) + else: + logger.warning("Forced SHACL run for zero-shape target profile %s failed: %s", target.identifier, e) + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + finally: + shacl_context.__unset_current_validation_profile__() + + # do finalization logic here (empty for now) + logger.debug("Completed %s requirement finalization for context %s", cls.__name__, context) -class SHACLRequirementLoader(RequirementLoader): +class SHACLRequirementLoader(RequirementLoader): def __init__(self, profile: Profile): super().__init__(profile) self._shape_registry = ShapesRegistry.get_instance(profile) @@ -95,9 +167,9 @@ def __init__(self, profile: Profile): def shapes_registry(self) -> ShapesRegistry: return self._shape_registry - def load(self, profile: Profile, - requirement_level: RequirementLevel, - file_path: Path, publicID: Optional[str] = None) -> list[Requirement]: + def load( + self, profile: Profile, requirement_level: RequirementLevel, file_path: Path, publicID: Optional[str] = None + ) -> list[Requirement]: assert file_path is not None, "The file path cannot be None" shapes: list[Shape] = self.shapes_registry.load_shapes(file_path, publicID) logger.debug("Loaded %s shapes: %s", len(shapes), shapes) diff --git a/rocrate_validator/requirements/shacl/utils.py b/rocrate_validator/requirements/shacl/utils.py index 4200bd08c..3f2e0cac8 100644 --- a/rocrate_validator/requirements/shacl/utils.py +++ b/rocrate_validator/requirements/shacl/utils.py @@ -21,10 +21,10 @@ from rdflib import RDF, BNode, Graph, Namespace from rdflib.term import Node -from rocrate_validator.utils import log as logging -from rocrate_validator.constants import RDF_SYNTAX_NS, SHACL_NS +from rocrate_validator.constants import SHACL_NS from rocrate_validator.errors import BadSyntaxError from rocrate_validator.models import Severity +from rocrate_validator.utils import log as logging if TYPE_CHECKING: from rocrate_validator.requirements.shacl.models import Shape @@ -34,24 +34,23 @@ def build_node_subgraph(graph: Graph, node: Node) -> Graph: - shape_graph = Graph() - shape_graph += graph.triples((node, None, None)) - - # add BNodes - for _, _, o in shape_graph: - shape_graph += graph.triples((o, None, None)) - - # Use the triples method to get all triples that are part of a list - RDF = Namespace(RDF_SYNTAX_NS) - first_predicate = RDF.first - rest_predicate = RDF.rest - shape_graph += graph.triples((None, first_predicate, None)) - shape_graph += graph.triples((None, rest_predicate, None)) - for _, _, object in shape_graph: - shape_graph += graph.triples((object, None, None)) - - # return the subgraph - return shape_graph + """ + Build a subgraph with every triple reachable from ``node`` by following BNode objects. + """ + subgraph = Graph() + visited: set = set() + stack: list = [node] + while stack: + current = stack.pop() + if current in visited: + continue + visited.add(current) + for triple in graph.triples((current, None, None)): + subgraph.add(triple) + _, _, obj = triple + if isinstance(obj, BNode) and obj not in visited: + stack.append(obj) + return subgraph def map_severity(shacl_severity: str) -> Severity: @@ -190,20 +189,23 @@ def get_shape_graph(self, shape_node: Node) -> Graph: def get_shape_property_graph(self, shape_node: Node, shape_property: Node) -> Graph: """ - Get the subgraph of the given shape node excluding the given property + Get the subgraph of a property shape nested inside a node shape. + + Includes only triples reachable from `shape_property` (its constraints + and any RDF lists used by `sh:and`/`sh:or`/`sh:xone`), plus the link + triple `(shape_node, sh:property, shape_property)`. Nothing reachable + only via sibling properties is included, so subtracting this graph + from the merged shapes graph cannot break sibling constructs. """ node_graph = self.get_shape_graph(shape_node) assert node_graph is not None, "The shape graph cannot be None" property_graph = Graph() - shacl_ns = Namespace(SHACL_NS) - nested_properties_to_exclude = [o for (_, _, o) in node_graph.triples( - (shape_node, shacl_ns.property, None)) if o != shape_property] - triples_to_exclude = [(s, _, o) for (s, _, o) in node_graph.triples((None, None, None)) - if s in nested_properties_to_exclude - or o in nested_properties_to_exclude] + for s, p, o in __extract_related_triples__(node_graph, shape_property): + property_graph.add((s, p, o)) - property_graph += node_graph - triples_to_exclude + shacl_ns = Namespace(SHACL_NS) + property_graph.add((shape_node, shacl_ns.property, shape_property)) return property_graph diff --git a/rocrate_validator/rocrate.py b/rocrate_validator/rocrate.py index a5affdb1a..0d9d4c6bb 100644 --- a/rocrate_validator/rocrate.py +++ b/rocrate_validator/rocrate.py @@ -30,7 +30,7 @@ from rocrate_validator.errors import ROCrateInvalidURIError from rocrate_validator.utils.uri import validate_rocrate_uri from rocrate_validator.utils.http import HttpRequester -from rocrate_validator.utils.uri import URI +from rocrate_validator.utils.uri import URI, AvailabilityStatus, is_external_reference # set up logging logger = logging.getLogger(__name__) @@ -140,8 +140,17 @@ def id_as_path(self) -> Path: @classmethod def get_id_as_uri(cls, entity_id: str, ro_crate: ROCrate) -> URI: assert entity_id, "Entity ID cannot be None" - if entity_id.startswith("http"): + # Per RO-Crate 1.1 ยง 4.2.2, an `@id` is either a relative URI path or + # an external URI/IRI (RFC 3986/3987). External references are used + # as-is (without resolving them against the crate URI) so the entity + # is classified as remote/web-based; this covers both authority-based + # forms (``http://``, ``scp://``) and scheme-only ones (``urn:``, + # ``doi:``, ``arcp:``). + if is_external_reference(entity_id): return URI(entity_id) + # Otherwise the `@id` is a relative path: if the RO-Crate itself is + # remote, resolve it against the crate URI so the entity is still + # classified as remote/web-based. if ro_crate.uri.is_remote_resource(): if entity_id.startswith("./"): return URI(f"{ro_crate.uri}/{entity_id[2:]}") @@ -208,58 +217,66 @@ def raw_data(self) -> object: def is_local(self) -> bool: return not self.is_remote() - def is_available(self) -> bool: + def check_availability(self) -> AvailabilityStatus: + """ + Return a fine-grained availability status for this entity. + + This is the primary check; :meth:`is_available` is the boolean + shortcut built on top of it. The status distinguishes definitely + unavailable resources, auth-protected ones, and remote URIs whose + scheme the validator cannot natively check (scp://, s3://, ...). + """ try: - # check if the entity points to an external file - if self.id.startswith("http"): + entity_uri = self.id_as_uri + # Remote entities with a scheme we can natively reach are checked + # by inspecting the remote response status. + if entity_uri.is_natively_checkable(): logger.debug("Checking the availability of a remote entity") - return self.ro_crate.get_external_file_size(self.id) > 0 - - # check if the entity is part of the local RO-Crate + return entity_uri.check_availability() + + # Remote entities with a non-natively-checkable scheme cannot be + # verified (scp://, sftp://, s3://, ...): report UNCHECKABLE so + # callers can warn without invalidating the validation. + if entity_uri.is_remote_resource(): + logger.debug( + "Cannot natively verify availability for entity '%s' (scheme '%s')", + self.id, + entity_uri.scheme, + ) + return AvailabilityStatus.UNCHECKABLE + + # Local entity: locate it inside the (local or remote) RO-Crate. if self.ro_crate.uri.is_local_resource(): - # check if the file exists in the local file system if isinstance(self.ro_crate, ROCrateLocalFolder): - logger.debug( - "Checking the availability of a local entity in a local folder" - ) - return self.ro_crate.has_file( - self.id_as_path - ) or self.ro_crate.has_directory(self.id_as_path) - # check if the file exists in the local zip file + found = self.ro_crate.has_file(self.id_as_path) or self.ro_crate.has_directory(self.id_as_path) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE if isinstance(self.ro_crate, ROCrateLocalZip): - logger.debug( - "Checking the availability of a local entity in a local zip file" - ) - # Skip the check for the root of a ZIP archive if self.id == "./": - logger.debug( - "Skipping the check for the presence of the Data Entity '%s' within the RO-Crate " - "as it is the root of a ZIP archive", - self.id, - ) - return True - return self.ro_crate.has_directory( + return AvailabilityStatus.AVAILABLE + found = self.ro_crate.has_directory(unquote(str(self.id))) or self.ro_crate.has_file( unquote(str(self.id)) - ) or self.ro_crate.has_file(unquote(str(self.id))) + ) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE - # check if the entity is part of the remote RO-Crate - logger.debug( - "Checking the availability of a remote entity in a remote RO-Crate" - ) if self.ro_crate.uri.is_remote_resource(): if self.id == "./": - return self.ro_crate.get_file_size(Path(self.id_as_uri())) > 0 - return self.ro_crate.has_directory( - unquote(str(self.id)) - ) or self.ro_crate.has_file(unquote(str(self.id))) + found = self.ro_crate.get_file_size(Path(self.id_as_uri())) > 0 + else: + found = self.ro_crate.has_directory(unquote(str(self.id))) or self.ro_crate.has_file( + unquote(str(self.id)) + ) + return AvailabilityStatus.AVAILABLE if found else AvailabilityStatus.UNAVAILABLE except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.exception(e) - return False + return AvailabilityStatus.UNAVAILABLE - raise ROCrateInvalidURIError( - uri=self.id, message="Could not determine the availability of the entity" - ) + # Fallthrough: the crate URI is neither a recognized local nor a + # remote resource โ€” the entity location cannot be determined. + raise ROCrateInvalidURIError(uri=self.id, message="Could not determine the availability of the entity") + + def is_available(self) -> bool: + return self.check_availability() == AvailabilityStatus.AVAILABLE def get_size(self) -> int: try: diff --git a/rocrate_validator/services.py b/rocrate_validator/services.py index 5e2461087..256dea787 100644 --- a/rocrate_validator/services.py +++ b/rocrate_validator/services.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + import shutil import tempfile import zipfile @@ -20,8 +21,7 @@ from rocrate_validator.utils import log as logging from rocrate_validator.events import Subscriber -from rocrate_validator.models import (Profile, Severity, ValidationResult, - ValidationSettings, Validator) +from rocrate_validator.models import Profile, Severity, ValidationResult, ValidationSettings, Validator from rocrate_validator.utils.uri import URI from rocrate_validator.utils.paths import get_profiles_path from rocrate_validator.utils.http import HttpRequester @@ -43,9 +43,8 @@ def detect_profiles(settings: Union[dict, ValidationSettings]) -> list[Profile]: def validate_metadata_as_dict( - metadata_dict: dict, - settings: Union[dict, ValidationSettings], - subscribers: Optional[list[Subscriber]] = None) -> ValidationResult: + metadata_dict: dict, settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None +) -> ValidationResult: """ Validate the RO-Crate metadata only against a profile and return the validation result. """ @@ -62,8 +61,9 @@ def validate_metadata_as_dict( return validate(settings, subscribers) -def validate(settings: Union[dict, ValidationSettings], - subscribers: Optional[list[Subscriber]] = None) -> ValidationResult: +def validate( + settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None +) -> ValidationResult: """ Validate a RO-Crate against a profile and return the validation result @@ -85,8 +85,9 @@ def validate(settings: Union[dict, ValidationSettings], return result -def __initialise_validator__(settings: Union[dict, ValidationSettings], - subscribers: Optional[list[Subscriber]] = None) -> Validator: +def __initialise_validator__( + settings: Union[dict, ValidationSettings], subscribers: Optional[list[Subscriber]] = None +) -> Validator: """ Validate a RO-Crate against a profile """ @@ -146,13 +147,31 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): # i.e., if the RO-Crate is a URL. If so, download the RO-Crate # and extract it to a temporary directory. We support either http or https # or ftp protocols to download the remote RO-Crate. - if rocrate_path.scheme in ('http', 'https', 'ftp'): + if rocrate_path.scheme in ("http", "https", "ftp"): logger.debug("RO-Crate is a remote RO-Crate") # create a temp folder to store the downloaded RO-Crate with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - # download the remote RO-Crate - with HttpRequester().get(rocrate_path.uri, stream=True, allow_redirects=True) as r: - with open(tmp_file.name, 'wb') as f: + requester = HttpRequester() + offline = bool(getattr(settings, "offline", False)) + # In offline mode, the cache is the only source of truth. Otherwise, + # bypass the cache to refresh the stored copy so that subsequent + # offline runs validate against the latest known remote state. + if offline: + response = requester.get(rocrate_path.uri, stream=True, allow_redirects=True) + else: + response = requester.fetch_fresh(rocrate_path.uri, stream=True, allow_redirects=True) + with response as r: + if r.status_code >= 400: + if offline and r.status_code == 504: + raise FileNotFoundError( + f"Remote RO-Crate '{rocrate_path.uri}' is not available in the HTTP cache. " + f"Validate it online first, or run " + f"`rocrate-validator cache warm --crate '{rocrate_path.uri}'`." + ) + raise FileNotFoundError( + f"Failed to download remote RO-Crate '{rocrate_path.uri}' (status {r.status_code})." + ) + with open(tmp_file.name, "wb") as f: shutil.copyfileobj(r.raw, f) logger.debug("RO-Crate downloaded to temporary file: %s", tmp_file.name) # continue with the validation process by extracting the RO-Crate and validating it @@ -171,15 +190,16 @@ def __extract_and_validate_rocrate__(rocrate_path: Path): return __init_validator__(settings) else: raise ValueError( - f"Invalid RO-Crate URI: {rocrate_path}. " - "It MUST be a local directory or a ZIP file (local or remote).") + f"Invalid RO-Crate URI: {rocrate_path}. It MUST be a local directory or a ZIP file (local or remote)." + ) -def get_profiles(profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None, - severity=Severity.OPTIONAL, - allow_requirement_check_override: bool = - ValidationSettings.allow_requirement_check_override) -> list[Profile]: +def get_profiles( + profiles_path: Path = DEFAULT_PROFILES_PATH, + extra_profiles_path: Optional[Path] = None, + severity=Severity.OPTIONAL, + allow_requirement_check_override: bool = ValidationSettings.allow_requirement_check_override, +) -> list[Profile]: """ Get the list of profiles supported by the package. The profile source path can be overridden by specifying ``profiles_path``. @@ -203,20 +223,23 @@ def get_profiles(profiles_path: Path = DEFAULT_PROFILES_PATH, :return: the list of profiles :rtype: list[Profile] """ - profiles = Profile.load_profiles(profiles_path, - extra_profiles_path=extra_profiles_path, - severity=severity, - allow_requirement_check_override=allow_requirement_check_override) + profiles = Profile.load_profiles( + profiles_path, + extra_profiles_path=extra_profiles_path, + severity=severity, + allow_requirement_check_override=allow_requirement_check_override, + ) logger.debug("Profiles loaded: %s", profiles) return profiles -def get_profile(profile_identifier: str, - profiles_path: Path = DEFAULT_PROFILES_PATH, - extra_profiles_path: Optional[Path] = None, - severity=Severity.OPTIONAL, - allow_requirement_check_override: bool = - ValidationSettings.allow_requirement_check_override) -> Profile: +def get_profile( + profile_identifier: str, + profiles_path: Path = DEFAULT_PROFILES_PATH, + extra_profiles_path: Optional[Path] = None, + severity=Severity.OPTIONAL, + allow_requirement_check_override: bool = ValidationSettings.allow_requirement_check_override, +) -> Profile: """ Get the profile with the given identifier. The profile source path can be overridden through ``profiles_path``. @@ -245,8 +268,10 @@ def get_profile(profile_identifier: str, :rtype: Profile """ - profiles = get_profiles(profiles_path, - extra_profiles_path=extra_profiles_path, - severity=severity, - allow_requirement_check_override=allow_requirement_check_override) + profiles = get_profiles( + profiles_path, + extra_profiles_path=extra_profiles_path, + severity=severity, + allow_requirement_check_override=allow_requirement_check_override, + ) return Profile.find_in_list(profiles, profile_identifier) diff --git a/rocrate_validator/utils/cache_warmup.py b/rocrate_validator/utils/cache_warmup.py new file mode 100644 index 000000000..e22f5d737 --- /dev/null +++ b/rocrate_validator/utils/cache_warmup.py @@ -0,0 +1,228 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Helpers to populate the HTTP cache with resources referenced by profile +descriptors. + +Profiles describe their external resources using the W3C Profiles Vocabulary +(``prof:hasResource`` / ``prof:hasArtifact``). The URLs declared there are the +ones the validator needs to resolve at runtime (JSON-LD contexts, ontologies, +schemas, ...). By discovering them dynamically we can warm the cache so that +subsequent offline runs find every required resource locally. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence + +from rocrate_validator import constants +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, + HttpRequester) + +if TYPE_CHECKING: + from rocrate_validator.models import Profile, ValidationSettings + +# set up logging +logger = logging.getLogger(__name__) + +# Guard to prevent multiple warm-up attempts in the same run. +# This is not a thread-safe mechanism. +__profiles_loaded = False + + +# SPARQL query returning every artifact URL declared in a profile descriptor. +# We intentionally do not filter by role: any resource the profile declares is +# considered a candidate for warm-up (Vocabulary, Constraints, Schema, ...). +_CACHEABLE_URLS_SPARQL = """ +PREFIX prof: +SELECT DISTINCT ?artifact +WHERE { + ?profile prof:hasResource ?resource . + ?resource prof:hasArtifact ?artifact . +} +""" + + +@dataclass +class WarmUpResult: + """Outcome of a warm-up operation.""" + url: str + status: str # "ok", "skipped", "failed" + detail: Optional[str] = None + + +def discover_profile_cacheable_urls(profile: "Profile") -> List[str]: + """ + Return the list of HTTP(S) URLs declared by ``profile`` as cacheable + artifacts. Returns an empty list when the profile has no declared + artifacts or cannot be parsed. + """ + graph = profile.profile_specification_graph + if graph is None: + logger.debug( + "Profile %s has no specification graph loaded", getattr(profile, "identifier", "?")) + return [] + urls: List[str] = [] + try: + for row in graph.query(_CACHEABLE_URLS_SPARQL): + artifact = row.artifact + if artifact is None: + continue + value = str(artifact) + if value.lower().startswith(("http://", "https://")) and value not in urls: + urls.append(value) + except Exception as e: + logger.debug("Failed to query cacheable URLs for profile %s: %s", + getattr(profile, "identifier", "?"), e) + return urls + + +def discover_cacheable_urls_from_profiles(profiles: Iterable["Profile"]) -> List[str]: + """ + Aggregate cacheable URLs from the given profiles, preserving order and + removing duplicates. + """ + seen: set[str] = set() + result: List[str] = [] + for profile in profiles: + for url in discover_profile_cacheable_urls(profile): + if url not in seen: + seen.add(url) + result.append(url) + return result + + +def warm_up_urls(urls: Sequence[str]) -> List[WarmUpResult]: + """ + Fetch each URL so that its response is stored in the HTTP cache. + + Already-cached URLs are skipped. Failures (including HTTP errors and + offline cache misses) are reported but do not raise. + """ + requester = HttpRequester() + results: List[WarmUpResult] = [] + offline = bool(getattr(requester, "offline", False)) + for url in urls: + try: + if requester.has_cached(url): + results.append(WarmUpResult(url=url, status="skipped", detail="already cached")) + continue + if offline: + response = requester.get(url) + else: + response = requester.fetch_fresh(url) + status_code = getattr(response, "status_code", None) + if status_code is None: + results.append(WarmUpResult(url=url, status="failed", detail="no status code")) + elif status_code == OFFLINE_CACHE_MISS_STATUS and offline: + results.append(WarmUpResult(url=url, status="failed", detail="offline cache miss")) + elif status_code >= 400: + results.append(WarmUpResult(url=url, status="failed", detail=f"HTTP {status_code}")) + else: + results.append(WarmUpResult(url=url, status="ok", detail=f"HTTP {status_code}")) + except Exception as e: + logger.debug("Warm-up failed for %s: %s", url, e) + results.append(WarmUpResult(url=url, status="failed", detail=str(e))) + return results + + +def auto_warm_up_for_settings(settings: "ValidationSettings") -> Optional[List[WarmUpResult]]: + """ + Perform a best-effort synchronous warm-up triggered by + ``ValidationSettings.__post_init__``. + + The warm-up is skipped when: + + - offline mode is enabled (nothing to fetch from the network); + - the cache path is not persistent (auto warm-up only makes sense when + the cache survives the run); + - the environment variable ``ROCRATE_VALIDATOR_AUTO_WARM`` is set to a + value disabling the feature (``0``, ``false``, ``no``, ``off``). + """ + if getattr(settings, "offline", False): + return None + if getattr(settings, "cache_path", None) is None: + return None + env_value = os.environ.get(constants.AUTO_WARM_ENV_VAR, "1").strip().lower() + if env_value in {"0", "false", "no", "off"}: + logger.debug("Auto warm-up disabled via %s=%s", constants.AUTO_WARM_ENV_VAR, env_value) + return None + + profile_identifier = getattr(settings, "profile_identifier", None) + if not profile_identifier: + return None + + profile = _find_profile(profile_identifier, settings) + if profile is None: + return None + urls = discover_profile_cacheable_urls(profile) + if not urls: + return None + requester = HttpRequester() + urls_to_fetch = [u for u in urls if not requester.has_cached(u)] + if not urls_to_fetch: + logger.debug("Auto warm-up: all %d resources already cached for profile %s", + len(urls), profile_identifier) + return [] + results = warm_up_urls(urls_to_fetch) + ok = sum(1 for r in results if r.status == "ok") + logger.info("Auto warm-up: pre-loaded %d/%d resources for profile %s", + ok, len(urls_to_fetch), profile_identifier) + return results + + +def _find_profile(identifier, settings) -> Optional["Profile"]: + """ + Look up a loaded profile by identifier. Accepts either a string or a list + (the settings sometimes store a list of identifiers). + """ + # Import here to avoid a circular import with models.py. + from rocrate_validator.models import Profile + from rocrate_validator.utils.paths import get_profiles_path + + # Load profiles to ensure the requested one is available and its graph is parsed. + global __profiles_loaded + if not __profiles_loaded: + profiles_path = getattr(settings, "profiles_path", None) or get_profiles_path() + extra_profiles_path = getattr(settings, "extra_profiles_path", None) + try: + Profile.load_profiles( + profiles_path=profiles_path, + publicID=None, + extra_profiles_path=extra_profiles_path, + ) + __profiles_loaded = True + except Exception as e: + logger.debug("Unable to preload profiles for auto warm-up: %s", e) + return None + + if isinstance(identifier, (list, tuple)): + if not identifier: + return None + identifier = identifier[0] + try: + return Profile.get_by_identifier(identifier) + except Exception: + # Fall back to scanning all loaded profiles. + try: + for profile in Profile.all(): + if getattr(profile, "identifier", None) == identifier: + return profile + except Exception: + return None + return None diff --git a/rocrate_validator/utils/document_loader.py b/rocrate_validator/utils/document_loader.py new file mode 100644 index 000000000..0242fac67 --- /dev/null +++ b/rocrate_validator/utils/document_loader.py @@ -0,0 +1,138 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +JSON-LD document loader that routes remote ``@context`` resolution through +``HttpRequester``. + +``rdflib``'s built-in JSON-LD parser fetches remote contexts via ``urllib``, +which bypasses the HTTP cache managed by this project. Installing the loader +ensures every remote context resolution benefits from the cache and honors +offline mode. +""" + +from __future__ import annotations + +import json +import threading +from typing import Any, Optional, Tuple + +from rdflib.plugins.shared.jsonld import context as jsonld_context +from rdflib.plugins.shared.jsonld import util as jsonld_util + +from rocrate_validator.utils import log as logging +from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, + HttpRequester, OfflineCacheMissError) + +logger = logging.getLogger(__name__) + +_install_lock = threading.Lock() +_installed = False +_original_source_to_json = jsonld_util.source_to_json + + +def _patched_source_to_json(source, fragment_id=None, extract_all_scripts=False): + # Only intercept remote URL strings; let the original handle everything else. + if isinstance(source, str) and source.lower().startswith(("http://", "https://")): + try: + return _fetch_json_ld(source), None + except OfflineCacheMissError: + raise + except Exception as e: + logger.debug("Custom JSON-LD loader failed for %s: %s; falling back", source, e) + return _original_source_to_json(source, fragment_id, extract_all_scripts) + + +def install_document_loader() -> bool: + """ + Install the custom JSON-LD document loader. Idempotent. + + Returns ``True`` when the loader is active after the call, ``False`` when + installation raised an unexpected error (which is logged). + """ + global _installed + + with _install_lock: + if _installed: + return True + + try: + jsonld_util.source_to_json = _patched_source_to_json + # The context module imports source_to_json at module import time, + # so it must be patched separately. + jsonld_context.source_to_json = _patched_source_to_json # type: ignore[attr-defined] + except Exception as e: + logger.error("Failed to install JSON-LD document loader: %s", e) + return False + + _installed = True + logger.debug("JSON-LD document loader installed") + return True + + +def uninstall_document_loader() -> bool: + """ + Restore the original JSON-LD document loader. Primarily intended for tests. + + Returns ``True`` when the loader is no longer active after the call, + ``False`` when uninstallation raised an unexpected error (which is logged). + """ + global _installed + with _install_lock: + if not _installed: + return True + + try: + jsonld_util.source_to_json = _original_source_to_json + jsonld_context.source_to_json = _original_source_to_json # type: ignore[attr-defined] + except Exception as e: + logger.error("Failed to uninstall JSON-LD document loader: %s", e) + return False + + _installed = False + return True + + +def _fetch_json_ld(url: str) -> Any: + """ + Fetch a JSON-LD document through ``HttpRequester``. + + Raises ``OfflineCacheMissError`` when running offline and the document + is not available in the cache. Raises ``RuntimeError`` for other + non-successful responses. + """ + requester = HttpRequester() + headers = {"Accept": "application/ld+json, application/json, */*;q=0.1"} + response = requester.get(url, headers=headers, allow_redirects=True) + status = getattr(response, "status_code", None) + if status == OFFLINE_CACHE_MISS_STATUS and getattr(requester, "offline", False): + raise OfflineCacheMissError(url) + if status is None or status >= 400: + raise RuntimeError(f"Unable to retrieve JSON-LD document from {url} (status {status})") + try: + return response.json() + except ValueError: + return json.loads(response.text) + + +def resolve_remote_document(url: str) -> Tuple[Optional[dict], Optional[str]]: + """ + Resolve a remote JSON-LD document, returning ``(json, content_type)``. + + Exposed primarily for tests and warm-up routines that need to reuse the + loader's semantics (offline handling, cache integration) without wiring + through rdflib. + """ + data = _fetch_json_ld(url) + return data, "application/ld+json" diff --git a/rocrate_validator/utils/http.py b/rocrate_validator/utils/http.py index 197588dc4..fa825bfa5 100644 --- a/rocrate_validator/utils/http.py +++ b/rocrate_validator/utils/http.py @@ -19,7 +19,7 @@ import random import string import threading -from typing import Optional +from typing import Any, Optional import requests @@ -30,9 +30,70 @@ logger = logging.getLogger(__name__) +# HTTP status code used to signal a cache miss in offline mode. +# 504 is what requests_cache uses when only_if_cached is set and +# no cached response is available. +OFFLINE_CACHE_MISS_STATUS = 504 + + +def _log_cache_outcome(method: str, url: str, response, *, offline: bool, forced_refresh: bool = False) -> None: + """ + Emit a standardized ``CachedHttpRequester: ...`` message describing whether ``url`` was + served from the HTTP cache or fetched from the remote server. + """ + from_cache = getattr(response, "from_cache", None) + status = getattr(response, "status_code", None) + + if offline and status == OFFLINE_CACHE_MISS_STATUS: + outcome = "not available in HTTP cache (offline cache miss)" + elif from_cache is True: + outcome = "served from HTTP cache" + elif forced_refresh: + outcome = "fetched from remote (cache refresh)" + elif from_cache is False: + outcome = "fetched from remote" + else: + # No from_cache attribute: plain requests.Session or offline fallback stub. + outcome = "fetched from remote (no cache backend)" + + logger.debug("CachedHttpRequester: %s %s %s", method, url, outcome) + + +class OfflineCacheMissError(RuntimeError): + """Raised when an HTTP resource is not available in the cache while offline.""" + + def __init__(self, url: str): + super().__init__( + f"Resource '{url}' is not available in the HTTP cache and " + f"the validator is running in offline mode. Run online once, or use " + f"`rocrate-validator cache warm` to pre-populate the cache." + ) + self.url = url + + +def find_offline_cache_miss(exc: BaseException) -> Optional[OfflineCacheMissError]: + """ + Walk the chain of an exception (``__cause__``/``__context__``) looking + for an :class:`OfflineCacheMissError`. Returns the first match, or + ``None`` if the chain does not contain one. + """ + seen: set[int] = set() + current: Optional[BaseException] = exc + while current is not None and id(current) not in seen: + seen.add(id(current)) + if isinstance(current, OfflineCacheMissError): + return current + current = current.__cause__ or current.__context__ + return None + + class HttpRequester: """ - A singleton class to handle HTTP requests + A singleton class to handle HTTP requests. + + The session is backed by ``requests_cache`` when available. The requester + supports an offline mode in which only cached responses are served + (cache misses yield a 504 response instead of hitting the network). """ _instance = None _lock = threading.Lock() @@ -50,7 +111,9 @@ def __new__(cls, *args, **kwargs) -> HttpRequester: def __init__(self, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Optional[str] = None): + cache_path: Optional[str] = None, + offline: bool = False, + no_cache: bool = False): logger.debug(f"Initializing instance of {self.__class__.__name__} {self}") # check if the instance is already initialized if not hasattr(self, "_initialized"): @@ -66,6 +129,8 @@ def __init__(self, except ValueError: raise TypeError("cache_max_age must be an integer") self.cache_path_prefix = cache_path + self.offline = bool(offline) + self.no_cache = bool(no_cache) # flag to indicate if the cache is permanent or temporary self.permanent_cache = cache_path is not None # initialize the session @@ -83,7 +148,7 @@ def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = # check if requests_cache is installed # and set up the cached session try: - if cache_max_age >= 0: + if not self.no_cache: from requests_cache import CachedSession # If cache_path is not provided, use the default path prefix @@ -96,15 +161,25 @@ def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = else: logger.debug(f"Using provided cache path: {cache_path}") self.permanent_cache = True + # Negative cache_max_age means "never expire" (as documented in the CLI); + # offline mode also forces never-expire so stale entries remain usable. + expire_after = -1 if (self.offline or cache_max_age < 0) else cache_max_age # Initialize the session with a cache self.session = CachedSession( # Cache name with random suffix - cache_name=cache_path, - expire_after=cache_max_age, # Cache expiration time in seconds + cache_name=str(cache_path), + expire_after=expire_after, # Cache expiration time in seconds backend='sqlite', # Use SQLite backend allowable_methods=('GET',), # Cache GET allowable_codes=(200, 302, 404) # Cache responses with these status codes ) + # Apply offline policy: only return cached responses. + if self.offline: + try: + self.session.settings.only_if_cached = True + except AttributeError: + # Older requests_cache versions expose the flag on the session directly. + setattr(self.session, "only_if_cached", True) except ImportError: logger.warning("requests_cache is not installed. Using requests instead.") except Exception as e: @@ -114,8 +189,15 @@ def __initialize_session__(self, cache_max_age: int, cache_path: Optional[str] = # use requests instead of requests_cache # and create a new session if not self.session: - logger.debug("Cache disabled: using requests instead of requests_cache") - self.session = requests.Session() + if self.offline: + logger.warning( + "Offline mode requested but requests_cache is not available: " + "HTTP requests will be blocked." + ) + self.session = _OfflineFallbackSession() + else: + logger.debug("Cache disabled: using requests instead of requests_cache") + self.session = requests.Session() def __del__(self): """ @@ -127,7 +209,7 @@ def __del__(self): def cleanup(self): """ - Destructor to clean up the cache file used by CachedSession. + Remove the SQLite cache file when the cache is marked as temporary. """ logger.debug(f"Deleting instance of {self.__class__.__name__}") if self.session and hasattr(self.session, 'cache') and self.session.cache: @@ -142,23 +224,243 @@ def cleanup(self): def __getattr__(self, name): """ - Delegate HTTP methods to the session object. + Delegate HTTP methods to the session object, wrapping the call with + cache-outcome logging. :param name: The name of the method to call. - :return: The method from the session object. + :return: A callable that proxies to the session method. """ if name.upper() in {"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH"}: - return getattr(self.session, name.lower()) + method = name.lower() + + def _wrapped(url, *args, **kwargs): + # Resolve the session method lazily, at call time, so the wrapper + # always targets the current session. This keeps the wrapper valid + # after the session is rebuilt in place (see ``_reconfigure``) and + # avoids holding a reference to a closed session. + session_method = getattr(self.session, method) + response = session_method(url, *args, **kwargs) + _log_cache_outcome(method.upper(), url, response, offline=self.offline) + return response + + return _wrapped raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") + def fetch_fresh(self, url: str, **kwargs) -> requests.Response: + """ + Fetch ``url`` bypassing the HTTP cache and store the fresh response. + + Used for resources that must always reflect the current remote state + while still being available offline afterwards (e.g., a remote RO-Crate + whose cached copy must be refreshed on every online run). + + In offline mode, the cache is consulted as usual (no network traffic). + + :param url: The URL to fetch. + :return: The HTTP response. + """ + if self.offline: + response = self.session.get(url, **kwargs) + else: + # ``force_refresh=True`` tells requests_cache to bypass the cache + # entirely and overwrite the stored entry with the new response. + # Older requests_cache versions only understand ``refresh=True`` + # (revalidation), and plain ``requests.Session`` accepts neither. + response = None + for flag in ("force_refresh", "refresh"): + try: + response = self.session.get(url, **{flag: True}, **kwargs) + break + except TypeError: + continue + if response is None: + response = self.session.get(url, **kwargs) + _log_cache_outcome("GET", url, response, offline=self.offline, forced_refresh=not self.offline) + return response + + def has_cached(self, url: str) -> bool: + """ + Check whether ``url`` is already present in the HTTP cache. + + Returns ``False`` when the underlying session does not implement a cache. + """ + cache = getattr(self.session, "cache", None) + if cache is None: + return False + contains = getattr(cache, "contains", None) + try: + if contains is not None: + return bool(contains(url=url)) + # Fallback for older requests_cache versions. + return bool(cache.has_url(url)) + except Exception as e: + logger.debug("Cache lookup failed for %s: %s", url, e) + return False + + def clear_cache(self) -> None: + """ + Remove every entry from the HTTP cache. + """ + cache = getattr(self.session, "cache", None) + if cache is None: + logger.debug("No cache backend to clear") + return + try: + cache.clear() + logger.info("HTTP cache cleared") + except Exception as e: + logger.error("Failed to clear HTTP cache: %s", e) + raise + + def cache_info(self) -> dict[str, Any]: + """ + Return metadata about the current HTTP cache backend. + """ + info: dict[str, Any] = { + "backend": None, + "path": None, + "permanent": getattr(self, "permanent_cache", False), + "offline": getattr(self, "offline", False), + "entries": 0, + "size_bytes": 0, + } + cache = getattr(self.session, "cache", None) + if cache is None: + return info + info["backend"] = cache.__class__.__name__ + cache_name = getattr(cache, "cache_name", None) or getattr(cache, "db_path", None) + if cache_name: + info["path"] = f"{cache_name}.sqlite" if not str(cache_name).endswith(".sqlite") else str(cache_name) + try: + info["entries"] = len(cache.responses) + except Exception: + try: + info["entries"] = sum(1 for _ in cache.urls()) + except Exception as e: + logger.debug("Unable to count cache entries: %s", e) + if info["path"] and os.path.exists(info["path"]): + try: + info["size_bytes"] = os.path.getsize(info["path"]) + except OSError: + pass + return info + @classmethod def initialize_cache(cls, cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, - cache_path: Optional[str] = None) -> HttpRequester: + cache_path: Optional[str] = None, + offline: bool = False, + no_cache: bool = False) -> HttpRequester: """ Initialize the HttpRequester singleton with cache settings. - :param max_age: The maximum age of the cache in seconds. + :param cache_max_age: Maximum age of cached responses in seconds. + Negative values mean "never expire". :param cache_path: The path to the cache directory. + :param offline: When ``True``, only cached responses are served. + :param no_cache: When ``True``, disable the HTTP cache entirely and + use a plain ``requests.Session``. Incompatible with ``offline``. + """ + with cls._lock: + instance = cls._instance + if instance is None: + return cls(cache_max_age=cache_max_age, cache_path=cache_path, + offline=offline, no_cache=no_cache) + # Re-apply the configuration without recreating the instance: + # we keep the same singleton in place and only rebuild its underlying session, + # rather than dropping and recreating the object (as ``reset`` does). + instance._reconfigure(cache_max_age=cache_max_age, cache_path=cache_path, + offline=offline, no_cache=no_cache) + return instance + + def _close_session(self) -> None: + """Close the current session and remove its cache file if it is temporary.""" + session = getattr(self, "session", None) + if session is not None and hasattr(session, "close"): + try: + session.close() + except Exception as e: + logger.debug("Error closing previous session: %s", e) + if getattr(self, "permanent_cache", True) is False: + try: + self.cleanup() + except Exception as e: + logger.debug("Error cleaning up previous cache: %s", e) + + def _reconfigure(self, + cache_max_age: int = constants.DEFAULT_HTTP_CACHE_MAX_AGE, + cache_path: Optional[str] = None, + offline: bool = False, + no_cache: bool = False) -> None: + """ + Rebuild the underlying session with new cache settings while preserving + the singleton instance (and any attributes set on it, e.g. test patches). + """ + with self._lock: + self._close_session() + try: + self.cache_max_age = int(cache_max_age) + except ValueError: + raise TypeError("cache_max_age must be an integer") + self.cache_path_prefix = cache_path + self.offline = bool(offline) + self.no_cache = bool(no_cache) + self.permanent_cache = cache_path is not None + # ``__initialize_session__`` asserts the instance is not yet initialized. + self._initialized = False + self.__initialize_session__(cache_max_age, cache_path) + self._initialized = True + + @classmethod + def reset(cls) -> None: """ - return cls(cache_max_age=cache_max_age, cache_path=cache_path) + Drop the singleton instance. Primarily intended for tests and the + ``cache`` CLI subcommand that reconfigures the cache on the fly. + """ + with cls._lock: + instance = cls._instance + if instance is not None: + instance._close_session() + cls._instance = None + + +class _OfflineFallbackSession: + """ + Minimal session used when offline mode is requested but no HTTP cache + backend is available. Every request yields a 504 response to signal a + cache miss, mirroring the behavior of ``requests_cache`` in offline mode. + """ + + cache = None + + def _offline_response(self, url: str) -> requests.Response: + response = requests.Response() + response.status_code = OFFLINE_CACHE_MISS_STATUS + response.reason = "Offline: no HTTP cache backend available" + response.url = url + # response._content = b"" + return response + + def get(self, url, **_kwargs): + return self._offline_response(url) + + def head(self, url, **_kwargs): + return self._offline_response(url) + + def post(self, url, **_kwargs): + return self._offline_response(url) + + def put(self, url, **_kwargs): + return self._offline_response(url) + + def delete(self, url, **_kwargs): + return self._offline_response(url) + + def options(self, url, **_kwargs): + return self._offline_response(url) + + def patch(self, url, **_kwargs): + return self._offline_response(url) + + def close(self): + pass diff --git a/rocrate_validator/utils/paths.py b/rocrate_validator/utils/paths.py index 9903f6c8d..44fddc907 100644 --- a/rocrate_validator/utils/paths.py +++ b/rocrate_validator/utils/paths.py @@ -72,6 +72,33 @@ def get_profiles_path() -> Path: return Path(CURRENT_DIR).parent / constants.DEFAULT_PROFILES_PATH +def get_user_cache_dir() -> Path: + """ + Get the user-level cache directory for rocrate-validator. + + Honors the XDG Base Directory Specification: + - Uses ``$XDG_CACHE_HOME/rocrate-validator`` when ``XDG_CACHE_HOME`` is set + - Falls back to ``~/.cache/rocrate-validator`` otherwise + + :return: The path to the cache directory (not guaranteed to exist) + """ + xdg = os.environ.get("XDG_CACHE_HOME") + base = Path(xdg) if xdg else Path.home() / ".cache" + return base / constants.USER_CACHE_DIR_NAME + + +def get_default_http_cache_path() -> Path: + """ + Get the default persistent HTTP cache path under the user cache directory. + + The returned path is the cache *name* expected by ``requests_cache`` + (i.e., without the ``.sqlite`` suffix added by the backend). + + :return: The default persistent HTTP cache name path + """ + return get_user_cache_dir() / constants.USER_CACHE_FILE_NAME + + def list_matching_file_paths( directory: str = '.', serialization_format: constants.RDF_SERIALIZATION_FORMATS_TYPES = "turtle") -> list[str]: diff --git a/rocrate_validator/utils/uri.py b/rocrate_validator/utils/uri.py index 3d92c8abb..2111f584c 100644 --- a/rocrate_validator/utils/uri.py +++ b/rocrate_validator/utils/uri.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import enum import re from pathlib import Path from typing import Optional, Union -from urllib.parse import ParseResult, parse_qsl, urlparse +from urllib.parse import ParseResult, parse_qsl, urlparse, urlsplit from rocrate_validator import errors from rocrate_validator.utils import log as logging @@ -25,19 +26,125 @@ logger = logging.getLogger(__name__) +class AvailabilityStatus(enum.Enum): + """Outcome of a URI availability check.""" + + AVAILABLE = "available" + UNAVAILABLE = "unavailable" + UNAUTHORIZED = "unauthorized" + UNCHECKABLE = "uncheckable" + + +# RFC 3986 ยง3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +# Require length >= 2 to disambiguate from Windows drive letters +# (e.g. ``C:\path``). RFC 3986 allows single-character schemes but no +# IANA-registered scheme is one character long, so this is an acceptable +# trade-off. +_SCHEME_RE = re.compile(r"^[A-Za-z][A-Za-z0-9+\-.]+$") + + +def is_external_reference(value: object) -> bool: + """ + Check if `value` is an external reference (i.e. has a URI scheme). + + Return True if *value* has an explicit URI or IRI scheme (RFC 3986 for URI, and RFC 3987 for IRIs). + Both authority-based forms (``http://...``) + and scheme-only forms (``urn:...``, ``doi:...``, ``arcp://...``) are + accepted, as required by RO-Crate 1.1 ยง4.2.2. + + The check is purely syntactic: the scheme is not verified against + the IANA registry and the hier-part is not resolved. + """ + if not isinstance(value, str) or not value: + return False + + try: + parts = urlsplit(value) + except ValueError: + # urlsplit can raise on malformed IPv6 literals, invalid ports, etc. + return False + + # Scheme must conform to RFC 3986 (and be at least 2 chars long). + if not _SCHEME_RE.match(parts.scheme): + return False + + # Reject scheme-only input (``urn:``, ``doi:``): syntactically valid + # per the grammar but semantically unusable as an identifier. + if not (parts.netloc or parts.path or parts.query or parts.fragment): + return False + + return True + + class URI: + # Schemes that the validator can fetch natively to verify availability. + # Anything outside this set is treated as remote but un-checkable. + NATIVELY_CHECKABLE_SCHEMES = ("http", "https") + + # Schemes accepted as RO-Crate root URIs (the loading code can only + # handle these as crate locations). + SUPPORTED_ROCRATE_SCHEMES = ("http", "https", "ftp", "file") - REMOTE_SUPPORTED_SCHEMA = ('http', 'https', 'ftp') + # Well-known remote schemes commonly used to reference data resources + # (used to distinguish "recognized but un-checkable" from "unknown"). + KNOWN_REMOTE_SCHEMES = ( + # Web + "http", + "https", + # FTP family + "ftp", + "ftps", + "sftp", + # Remote shell / transfer + "scp", + "ssh", + "rsync", + # Cloud object stores + "s3", + "gs", + "abfs", + "abfss", + "wasb", + "wasbs", + # WebDAV + "dav", + "davs", + # Research / big-data filesystems + "irods", + "hdfs", + ) + + # ``file://`` authorities that denote the local machine (RFC 8089 ยง2): + # an empty authority (``file:///path``) or the special ``localhost`` host. + LOCAL_FILE_AUTHORITIES = ("", "localhost") + + # Backwards-compatible alias kept for callers that still inspect it. + REMOTE_SUPPORTED_SCHEMA = SUPPORTED_ROCRATE_SCHEMES[:-1] # http, https, ftp def __init__(self, uri: Union[str, Path]): + if uri is None or (isinstance(uri, str) and not uri.strip()): + raise ValueError("Invalid URI: empty value") self._uri = uri = str(uri) try: - # map local path to URI with file scheme - if not re.match(r'^\w+://', uri): - uri = f"file://{uri}" + # Inputs that are not external references are assumed to be local + # paths, so the ``file:`` scheme is added explicitly. The + # detection covers both authority-based schemes (``http://``, + # ``scp://``) and scheme-only ones (``urn:``, ``doi:``), as + # defined by RFC 3986. + # + # The authority-less ``file:`` form (no ``//``) is used on purpose: + # ``file://data/x`` would parse ``data`` as the authority (host), + # while ``file:data/x`` keeps ``data/x`` as the path with an empty + # authority. This way a local path never gains a spurious host and + # the authority remains a reliable signal to tell a local file + # (``file:///path``) from a remote one (``file://host/path``, + # RFC 8089). + if not is_external_reference(uri): + uri = f"file:{uri}" # parse the value to extract the scheme self._parse_result = urlparse(uri) - assert self.scheme in self.REMOTE_SUPPORTED_SCHEMA + ('file',), "Invalid URI scheme" + if not self.scheme: + raise ValueError("URI has no scheme") except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.debug(e) @@ -86,10 +193,35 @@ def as_path(self) -> Path: return Path(self._uri) def is_remote_resource(self) -> bool: - return self.scheme in self.REMOTE_SUPPORTED_SCHEMA + """ + Return True for any well-formed URI that points to a non-local resource. + + Schemes other than ``file`` (``http``, ``scp``, ``s3``, ...) are always + remote. A ``file://`` URI is remote when it carries an explicit, + non-local authority (host) โ€” e.g. ``file://hostname/path`` per + RFC 8089: the referenced file lives on another machine and is therefore + not part of the local RO-Crate payload. + """ + if not self.scheme: + return False + if self.scheme == "file": + return self.get_netloc().lower() not in self.LOCAL_FILE_AUTHORITIES + return True def is_local_resource(self) -> bool: - return not self.is_remote_resource() + return self.scheme == "file" and self.get_netloc().lower() in self.LOCAL_FILE_AUTHORITIES + + def is_natively_checkable(self) -> bool: + """Return True if availability can be verified via a native request.""" + return self.scheme in self.NATIVELY_CHECKABLE_SCHEMES + + def is_known_remote_scheme(self) -> bool: + """Return True if the scheme is one of the well-known remote schemes.""" + return self.scheme in self.KNOWN_REMOTE_SCHEMES + + def has_supported_rocrate_scheme(self) -> bool: + """Return True if the scheme is supported as an RO-Crate root URI.""" + return self.scheme in self.SUPPORTED_ROCRATE_SCHEMES def is_local_directory(self) -> bool: return self.is_local_resource() and self.as_path().is_dir() @@ -97,17 +229,46 @@ def is_local_directory(self) -> bool: def is_local_file(self) -> bool: return self.is_local_resource() and self.as_path().is_file() - def is_available(self) -> bool: - """Check if the resource is available""" + def check_availability(self) -> AvailabilityStatus: + """ + Inspect the resource availability with as much detail as possible. + + Distinguishes: + - AVAILABLE: confirmed reachable + - UNAUTHORIZED: reachable but protected (HTTP 401/403) + - UNAVAILABLE: confirmed not reachable + - UNCHECKABLE: scheme has no native check (e.g. scp://, s3://) + """ if self.is_remote_resource(): + if not self.is_natively_checkable(): + logger.debug( + "Cannot natively verify availability for URI '%s' (scheme '%s')", + self._uri, + self.scheme, + ) + return AvailabilityStatus.UNCHECKABLE try: response = HttpRequester().head(self._uri, allow_redirects=True) - return response.status_code in (200, 302) + if response.status_code in (200, 302): + return AvailabilityStatus.AVAILABLE + if response.status_code in (401, 403): + return AvailabilityStatus.UNAUTHORIZED + return AvailabilityStatus.UNAVAILABLE except Exception as e: if logger.isEnabledFor(logging.DEBUG): logger.debug(e) - return False - return Path(self._uri).exists() + return AvailabilityStatus.UNAVAILABLE + return AvailabilityStatus.AVAILABLE if Path(self._uri).exists() else AvailabilityStatus.UNAVAILABLE + + def is_available(self) -> bool: + """ + Return True only when the resource is confirmed available. + + Resources that cannot be verified (unsupported scheme, auth-protected) + return False here; callers that need to distinguish those cases should + use :meth:`check_availability` instead. + """ + return self.check_availability() == AvailabilityStatus.AVAILABLE def __str__(self): return self._uri @@ -138,6 +299,9 @@ def validate_rocrate_uri(uri: Union[str, Path, URI], silent: bool = False) -> bo try: # parse the value to extract the scheme uri = URI(str(uri)) if isinstance(uri, str) or isinstance(uri, Path) else uri + # restrict RO-Crate roots to schemes the loader can actually handle + if not uri.has_supported_rocrate_scheme(): + raise errors.ROCrateInvalidURIError(uri) # check if the URI is a remote resource or local directory or local file if not uri.is_remote_resource() and not uri.is_local_directory() and not uri.is_local_file(): raise errors.ROCrateInvalidURIError(uri) @@ -146,7 +310,7 @@ def validate_rocrate_uri(uri: Union[str, Path, URI], silent: bool = False) -> bo raise errors.ROCrateInvalidURIError(uri) # check if the resource is available if not uri.is_available(): - raise errors.ROCrateInvalidURIError(uri, message=f"The RO-crate at the URI \"{uri}\" is not available") + raise errors.ROCrateInvalidURIError(uri, message=f'The RO-crate at the URI "{uri}" is not available') return True except ValueError as e: logger.error(e) diff --git a/tests/conftest.py b/tests/conftest.py index 704123b12..d14ff836a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,7 @@ # and add it to the system path import os +import pytest from pytest import fixture from rocrate_validator.utils import log as logging @@ -48,6 +49,65 @@ SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER = check_local_data_entity_existence.identifier +@pytest.fixture(scope="session", autouse=True) +def _session_wide_terminal(): + """ + Force Rich (and other ``os.get_terminal_size`` consumers) to render with + a wide terminal. ``click.testing.CliRunner`` captures stdout into a + StringIO, so Rich falls back to its 80-column default and truncates + table cells / wraps panel rows โ€” breaking ``"substring" in result.output`` + assertions in a non-deterministic way. Setting ``COLUMNS`` early keeps + the rendered output predictable across machines and CI. + """ + previous_columns = os.environ.get("COLUMNS") + previous_lines = os.environ.get("LINES") + os.environ["COLUMNS"] = "200" + os.environ["LINES"] = "50" + try: + yield + finally: + for name, prev in (("COLUMNS", previous_columns), ("LINES", previous_lines)): + if prev is None: + os.environ.pop(name, None) + else: + os.environ[name] = prev + + +@pytest.fixture(scope="session", autouse=True) +def _session_isolated_xdg(tmp_path_factory): + """ + Redirect the XDG user cache to a per-session temporary directory so that + tests do not write to, or read from, the developer's real ~/.cache. The + directory is shared across tests in the same session so that HTTP responses + fetched by one test remain available to subsequent ones (mirroring the + behavior users see in practice and preserving the HTTP-cache hit pattern + the existing test suite relies on). + """ + xdg_dir = tmp_path_factory.mktemp("rocrate_validator_xdg") + previous_xdg = os.environ.get("XDG_CACHE_HOME") + os.environ["XDG_CACHE_HOME"] = str(xdg_dir) + try: + yield xdg_dir + finally: + if previous_xdg is None: + os.environ.pop("XDG_CACHE_HOME", None) + else: + os.environ["XDG_CACHE_HOME"] = previous_xdg + + +@pytest.fixture(autouse=True) +def _per_test_auto_warm(monkeypatch): + """ + Disable the synchronous HTTP cache auto warm-up by default so tests do not + hit the network unexpectedly. Tests that exercise the warm-up opt in by + setting the environment variable before instantiating ValidationSettings. + """ + monkeypatch.setenv( + "ROCRATE_VALIDATOR_AUTO_WARM", + os.environ.get("ROCRATE_VALIDATOR_AUTO_WARM", "0"), + ) + + @fixture def skip_data_entity_existence_check_identifier(): return SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER diff --git a/tests/data/profiles/fake/c-deactivated-direct/profile.ttl b/tests/data/profiles/fake/c-deactivated-direct/profile.ttl new file mode 100644 index 000000000..695b2e4bf --- /dev/null +++ b/tests/data/profiles/fake/c-deactivated-direct/profile.ttl @@ -0,0 +1,28 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + rdfs:label "Profile C4" ; + rdfs:comment """Comment for Profile C4 (deactivates Profile C's ShapeC by IRI, no override-by-name)."""@en ; + dct:publisher ; + prof:isProfileOf ; + prof:isTransitiveProfileOf , ; + prof:hasToken "c-deactivated-direct" ; +. diff --git a/tests/data/profiles/fake/c-deactivated/profile.ttl b/tests/data/profiles/fake/c-deactivated/profile.ttl new file mode 100644 index 000000000..8d62ab379 --- /dev/null +++ b/tests/data/profiles/fake/c-deactivated/profile.ttl @@ -0,0 +1,28 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + rdfs:label "Profile C3" ; + rdfs:comment """Comment for Profile C3 (deactivates the inherited check from Profile C)."""@en ; + dct:publisher ; + prof:isProfileOf ; + prof:isTransitiveProfileOf , ; + prof:hasToken "c-deactivated" ; +. diff --git a/tests/data/profiles/fake/c-deactivated/shape_c.ttl b/tests/data/profiles/fake/c-deactivated/shape_c.ttl new file mode 100644 index 000000000..26cdb9bd1 --- /dev/null +++ b/tests/data/profiles/fake/c-deactivated/shape_c.ttl @@ -0,0 +1,41 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix dct: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix xml1: . +@prefix xsd: . + + +# Same NodeShape + PropertyShape names as the inherited Shape C in profile c, +# so the existing override-by-name mechanism wires this shape to the parent's. +# The PropertyShape is marked sh:deactivated true: pyshacl skips it during +# validation, and the override surfaces via RequirementCheck.deactivated. +ro:ShapeC + a sh:NodeShape ; + sh:name "The Shape C" ; + sh:description "Deactivates the inherited Shape C check from profile c." ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Check Metadata File Descriptor entity existence" ; + sh:description "Check disabled by extension profile" ; + sh:path rdf:type ; + sh:minCount 1 ; + sh:deactivated true ; + sh:message "Disabled" ; + ] . diff --git a/tests/data/profiles/fake/c-wrapper/profile.ttl b/tests/data/profiles/fake/c-wrapper/profile.ttl new file mode 100644 index 000000000..c44d1241b --- /dev/null +++ b/tests/data/profiles/fake/c-wrapper/profile.ttl @@ -0,0 +1,30 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix rdfs: . + +# A "wrapper" extension profile that inherits from c without declaring any +# SHACL shape of its own. Used to exercise the zero-shape target profile +# path: pyshacl must still run for inherited shapes to be evaluated. + + a prof:Profile ; + rdfs:label "Profile C-wrapper" ; + rdfs:comment """Pure inheritance from Profile C, no own checks."""@en ; + dct:publisher ; + prof:isProfileOf ; + prof:isTransitiveProfileOf , ; + prof:hasToken "c-wrapper" ; +. diff --git a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py index 9f129b2c5..74719843b 100644 --- a/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py +++ b/tests/integration/profiles/isa-ro-crate/test_10_definedterm.py @@ -31,6 +31,8 @@ def test_isa_defined_term_name(): sparql = ( SPARQL_PREFIXES + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: DELETE { ?defined_term schema:name ?name . } @@ -146,3 +148,47 @@ def test_isa_defined_term_termCode_of_incorrect_type(): profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) + + +def test_isa_term_not_correctly_referenced(): + """ + Test an ISA RO-Crate where an invalid defined term is not correctly referenced. + Such defined terms should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?protocol bioschemas-prop:intendedUse ?term . + ?person schema:jobTitle ?term . + ?assay schema:measurementTechnique ?term . + ?assay schema:measurementMethod ?term . + ?term schema:name ?name . + } + INSERT { + ?assay schema:mentions ?term . + } + WHERE { + ?protocol a bioschemas:LabProtocol . + ?person a schema:Person . + ?assay a schema:Dataset . + ?term a schema:DefinedTerm . + ?term schema:name ?name . + ?protocol bioschemas-prop:intendedUse ?term . + ?person schema:jobTitle ?term . + ?assay schema:measurementTechnique ?term . + ?assay schema:measurementMethod ?term . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) diff --git a/tests/integration/profiles/isa-ro-crate/test_3_process.py b/tests/integration/profiles/isa-ro-crate/test_3_process.py index 7c1234cd3..d7d9d15fb 100644 --- a/tests/integration/profiles/isa-ro-crate/test_3_process.py +++ b/tests/integration/profiles/isa-ro-crate/test_3_process.py @@ -16,6 +16,7 @@ import logging from rocrate_validator.models import Severity +# from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import ValidROC from tests.shared import do_entity_test, SPARQL_PREFIXES @@ -57,14 +58,18 @@ def test_isa_process_name(): def test_isa_process_not_correctly_referenced_from_dataset(): """ - Test an ISA RO-Crate where a Process is referenced from a Dataset with wrong property. + Test an ISA RO-Crate where an invalid Process is not correctly referenced. + Such processes should be ignored, meaning the validation should pass. """ sparql = ( SPARQL_PREFIXES + """ + PREFIX schema: PREFIX bioschemas: + PREFIX bioschemas-prop: DELETE { ?dataset schema:about ?process . + ?process schema:name ?name . } INSERT { ?dataset schema:mentions ?process . @@ -72,6 +77,7 @@ def test_isa_process_not_correctly_referenced_from_dataset(): WHERE { ?dataset a schema:Dataset . ?dataset schema:about ?process. + ?process schema:name ?name . } """ ) @@ -79,15 +85,16 @@ def test_isa_process_not_correctly_referenced_from_dataset(): do_entity_test( rocrate_path=ValidROC().isa_ro_crate, requirement_severity=Severity.REQUIRED, - expected_validation_result=False, - expected_triggered_requirements=[ - "Process MUST be directly referenced from a dataset" - ], - expected_triggered_issues=[ - "Process MUST be directly referenced in about on a Dataset" - ], + expected_validation_result=True, + # expected_triggered_requirements=[ + # "Process MUST be directly referenced from a dataset" + # ], + # expected_triggered_issues=[ + # "Process MUST be directly referenced in about on a Dataset" + # ], profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, ) diff --git a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py index c48885b51..eed5feff7 100644 --- a/tests/integration/profiles/isa-ro-crate/test_4_protocol.py +++ b/tests/integration/profiles/isa-ro-crate/test_4_protocol.py @@ -16,6 +16,7 @@ import logging from rocrate_validator.models import Severity +# from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import ValidROC from tests.shared import do_entity_test, SPARQL_PREFIXES @@ -183,6 +184,45 @@ def test_isa_protocol_no_intendedUse(): ) +def test_isa_protocol_not_correctly_referenced_from_process(): + """ + Test an ISA RO-Crate where an invalid Protocol is not correctly referenced. + Such protocols should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?process bioschemas-prop:executesLabProtocol ?protocol . + ?protocol schema:description ?description . + } + INSERT { + ?process schema:mentions ?protocol . + ?protocol schema:description 42 . + } + WHERE { + ?process a bioschemas:LabProcess . + ?protocol a bioschemas:LabProtocol . + ?process bioschemas-prop:executesLabProtocol ?protocol . + ?protocol schema:description ?description . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + # expected_triggered_requirements=["Protocol SHOULD have intended use"], + # expected_triggered_issues=["Protocol intended use MUST be of type string or DefinedTerm"], + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_protocol_intendedUse_incorrect_type(): """ Test an ISA RO-Crate where a Protocol has an intended use with the wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_5_sample.py b/tests/integration/profiles/isa-ro-crate/test_5_sample.py index 305ccca0f..bc2714d0e 100644 --- a/tests/integration/profiles/isa-ro-crate/test_5_sample.py +++ b/tests/integration/profiles/isa-ro-crate/test_5_sample.py @@ -55,6 +55,42 @@ def test_isa_sample_name(): ) +def test_isa_sample_not_correctly_referenced_from_process(): + """ + Test an ISA RO-Crate where an invalid Sample is not correctly referenced. + Such samples should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?process schema:object ?sample . + ?sample schema:name ?name . + } + INSERT { + ?process schema:mentions ?sample . + } + WHERE { + ?process a bioschemas:LabProcess . + ?sample a bioschemas:Sample . + ?process schema:object ?sample . + ?sample schema:name ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_sample_name_of_incorrect_type(): """ Test an ISA RO-Crate where a sample name has wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_6_data.py b/tests/integration/profiles/isa-ro-crate/test_6_data.py index 1f19f0ecf..bb79248db 100644 --- a/tests/integration/profiles/isa-ro-crate/test_6_data.py +++ b/tests/integration/profiles/isa-ro-crate/test_6_data.py @@ -54,6 +54,42 @@ def test_isa_file_name(): ) +def test_isa_data_not_correctly_referenced_from_process(): + """ + Test an ISA RO-Crate where an invalid data file is not correctly referenced. + Such files should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?dataset schema:hasPart ?file . + ?file schema:name ?name . + } + INSERT { + ?dataset schema:mentions ?file . + } + WHERE { + ?dataset a schema:Dataset . + ?file a schema:MediaObject . + ?dataset schema:hasPart ?file . + ?file schema:name ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_file_name_of_incorrect_type(): """ Test an ISA RO-Crate where a file name has wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_7_person.py b/tests/integration/profiles/isa-ro-crate/test_7_person.py index 9135b633a..c7263ed6b 100644 --- a/tests/integration/profiles/isa-ro-crate/test_7_person.py +++ b/tests/integration/profiles/isa-ro-crate/test_7_person.py @@ -54,6 +54,45 @@ def test_isa_person_given_name(): ) +def test_isa_person_not_correctly_referenced(): + """ + Test an ISA RO-Crate where an invalid person is not correctly referenced. + Such persons should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?dataset schema:creator ?person . + ?person schema:givenName ?name . + ?article schema:author ?person . + } + INSERT { + ?dataset schema:mentions ?person . + } + WHERE { + ?dataset a schema:Dataset . + ?person a schema:Person . + ?article a schema:ScholarlyArticle . + ?article schema:author ?person . + ?dataset schema:creator ?person . + ?person schema:givenName ?name . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_person_given_name_of_incorrect_type(): """ Test an ISA RO-Crate where a person given name has wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_8_article.py b/tests/integration/profiles/isa-ro-crate/test_8_article.py index fac95cbb6..31041f667 100644 --- a/tests/integration/profiles/isa-ro-crate/test_8_article.py +++ b/tests/integration/profiles/isa-ro-crate/test_8_article.py @@ -54,6 +54,41 @@ def test_isa_article_headline(): ) +def test_isa_publication_not_correctly_referenced(): + """ + Test an ISA RO-Crate where an invalid publication is not correctly referenced. + Such publications should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?dataset schema:citation ?publication . + ?publication schema:headline ?headline . + } + INSERT { + ?dataset schema:mentions ?publication . + } + WHERE { + ?dataset a schema:Dataset . + ?publication a schema:ScholarlyArticle . + ?publication schema:headline ?headline . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) + + def test_isa_article_headline_of_incorrect_type(): """ Test an ISA RO-Crate where an article headline has wrong type. diff --git a/tests/integration/profiles/isa-ro-crate/test_9_comment.py b/tests/integration/profiles/isa-ro-crate/test_9_comment.py index c2b7db4c5..53ddcf6b5 100644 --- a/tests/integration/profiles/isa-ro-crate/test_9_comment.py +++ b/tests/integration/profiles/isa-ro-crate/test_9_comment.py @@ -81,3 +81,39 @@ def test_isa_comment_text_of_incorrect_type(): profile_identifier="isa-ro-crate", rocrate_entity_mod_sparql=sparql, ) + + +def test_isa_comment_not_correctly_referenced(): + """ + Test an ISA RO-Crate where an invalid comment is not correctly referenced. + Such comments should be ignored, meaning the validation should pass. + """ + sparql = ( + SPARQL_PREFIXES + + """ + PREFIX bioschemas: + PREFIX bioschemas-prop: + DELETE { + ?publication schema:comment ?comment . + ?comment schema:text ?text . + } + INSERT { + ?publication schema:mentions ?comment . + ?comment schema:text 42 . + } + WHERE { + ?publication a schema:ScholarlyArticle . + ?comment a schema:Comment . + ?comment schema:text ?text . + } + """ + ) + + do_entity_test( + rocrate_path=ValidROC().isa_ro_crate, + requirement_severity=Severity.REQUIRED, + expected_validation_result=True, + profile_identifier="isa-ro-crate", + rocrate_entity_mod_sparql=sparql, + disable_inherited_profiles_issue_reporting=True, + ) diff --git a/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py b/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py index a8cf1cce8..41654474d 100644 --- a/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py +++ b/tests/integration/profiles/provenance-run-crate/test_provrc_howtostep.py @@ -97,14 +97,14 @@ def test_provrc_howtostep_no_position(): def test_provrc_howtostep_bad_position(): """\ Test a Provenance Run Crate where a HowToStep has a position that is not - a string representing an integer. + an integer or a string representing an integer. """ do_entity_test( InvalidProvRC().howtostep_bad_position, Severity.REQUIRED, False, ["ProvRC HowToStep MUST"], - ["If specified, position must be a string representing an integer"], + ["If specified, position must be an integer or a string representing an integer"], profile_identifier="provenance-run-crate" ) diff --git a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py index 01b1a5a09..baf00774d 100644 --- a/tests/integration/profiles/ro-crate/test_data_entity_metadata.py +++ b/tests/integration/profiles/ro-crate/test_data_entity_metadata.py @@ -12,9 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import logging -from rocrate_validator import models +import pytest + +from rocrate_validator import models, services from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER from tests.ro_crates import InvalidDataEntity, ValidROC from tests.shared import do_entity_test @@ -34,7 +37,7 @@ def test_missing_data_entity_reference(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED properties"], - ["sort-and-change-case.ga", "foo/xxx"] + ["sort-and-change-case.ga", "foo/xxx"], ) @@ -44,7 +47,7 @@ def test_data_entity_must_be_directly_linked(): paths.direct_hasPart_data_entity_reference, models.Severity.REQUIRED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -54,7 +57,7 @@ def test_data_entity_not_linked(): paths.dataset_not_linked_to_root, models.Severity.REQUIRED, False, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -64,7 +67,7 @@ def test_data_entity_must_be_indirectly_linked(): paths.indirect_hasPart_data_entity_reference, models.Severity.REQUIRED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -75,7 +78,7 @@ def test_directory_data_entity_wo_trailing_slash(): models.Severity.RECOMMENDED, False, ["Directory Data Entity: RECOMMENDED value restriction"], - ["Every Data Entity Directory URI SHOULD end with `/`"] + ["Every Data Entity Directory URI SHOULD end with `/`"], ) @@ -86,7 +89,7 @@ def test_missing_data_entity_encoding_format(): models.Severity.RECOMMENDED, False, ["File Data Entity: RECOMMENDED properties"], - ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"] + ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"], ) @@ -97,7 +100,7 @@ def test_invalid_data_entity_encoding_format_pronom(): models.Severity.RECOMMENDED, False, ["File Data Entity: RECOMMENDED properties"], - ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"] + ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"], ) @@ -108,7 +111,7 @@ def test_invalid_data_entity_encoding_format_ctx_website_type(): models.Severity.RECOMMENDED, False, ["File Data Entity: RECOMMENDED properties"], - ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"] + ["Missing or invalid `encodingFormat` linked to the `File Data Entity`"], ) @@ -119,7 +122,7 @@ def test_invalid_data_entity_encoding_format_ctx_website_name(): models.Severity.RECOMMENDED, False, ["WebSite RECOMMENDED Properties"], - ["A WebSite MUST have a `name` property"] + ["A WebSite MUST have a `name` property"], ) @@ -129,7 +132,7 @@ def test_valid_data_entity_encoding_format_pronom(): paths.valid_encoding_format_pronom, models.Severity.RECOMMENDED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -139,7 +142,7 @@ def test_valid_data_entity_encoding_format_ctx_website(): paths.valid_encoding_format_ctx_entity, models.Severity.RECOMMENDED, True, - skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER] + skip_checks=[SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER], ) @@ -150,7 +153,7 @@ def test_missing_file_data_entity_with_quoted_name(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED resource availability"], - ["The RO-Crate does not include the Data Entity 'pics/2017-06-11%2012.56.14.jpg' as part of its payload"] + ["The RO-Crate does not include the Data Entity 'pics/2017-06-11%2012.56.14.jpg' as part of its payload"], ) @@ -161,7 +164,7 @@ def test_missing_file_data_entity_with_unquoted_name(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED resource availability"], - ["The RO-Crate does not include the Data Entity 'pics/2017-06-11 12.56.14.jpg' as part of its payload"] + ["The RO-Crate does not include the Data Entity 'pics/2017-06-11 12.56.14.jpg' as part of its payload"], ) @@ -172,7 +175,7 @@ def test_missing_dataset_entity_with_quoted_name(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED resource availability"], - ["The RO-Crate does not include the Data Entity 'data%20set/' as part of its payload"] + ["The RO-Crate does not include the Data Entity 'data%20set/' as part of its payload"], ) @@ -183,7 +186,7 @@ def test_missing_dataset_entity_with_unquoted_name(): models.Severity.REQUIRED, False, ["Data Entity: REQUIRED resource availability"], - ["The RO-Crate does not include the Data Entity 'data set/' as part of its payload"] + ["The RO-Crate does not include the Data Entity 'data set/' as part of its payload"], ) @@ -194,15 +197,78 @@ def test_missing_absolute_path_data_entity(): models.Severity.RECOMMENDED, False, ["Data Entity: RECOMMENDED resource availability"], - ["Data Entity file:///tmp/test.txt is not available"] + ["Data Entity file:///tmp/test.txt is not available"], ) def test_valid_rocrate_with_data_entities(): """""" - do_entity_test( - ValidROC().rocrate_with_data_entities, - models.Severity.REQUIRED, - True, - profile_identifier="ro-crate" + do_entity_test(ValidROC().rocrate_with_data_entities, models.Severity.REQUIRED, True, profile_identifier="ro-crate") + + +@pytest.mark.parametrize( + "remote_entity_id", + [ + "scp://transfer.example.org//data/A.0.0", + "sftp://user@host/path/to/file", + "s3://bucket/key", + ], +) +def test_remote_data_entity_does_not_fail_required_check(tmp_path, remote_entity_id): + """Regression test for issue #176. + + A Data Entity whose `@id` is an absolute URI with a non-file scheme (e.g. + ``scp://``, ``sftp://``, ``s3://``) MUST NOT trigger the + "Data Entity: REQUIRED resource availability" violation: per the RO-Crate + spec, any absolute-URI Data Entity is web-based and is not required to be + part of the local payload. + """ + crate_dir = tmp_path / "crate-with-remote-entity" + crate_dir.mkdir() + metadata = { + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "about": {"@id": "./"}, + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Crate with remote entity", + "description": "Regression fixture for issue #176", + "datePublished": "2026-05-15T07:30:50+00:00", + "license": {"@id": "https://spdx.org/licenses/CC0-1.0"}, + "hasPart": [{"@id": remote_entity_id}], + }, + { + "@id": remote_entity_id, + "@type": "File", + "name": "Remote file", + "contentSize": 16, + "dateModified": "2026-05-15T07:30:50+00:00", + "sdDatePublished": "2026-05-15T07:31:03+00:00", + }, + {"@id": "https://spdx.org/licenses/CC0-1.0", "@type": "CreativeWork", "name": "CC0"}, + ], + } + (crate_dir / "ro-crate-metadata.json").write_text(json.dumps(metadata)) + + result = services.validate( + models.ValidationSettings( + rocrate_uri=crate_dir, + requirement_severity=models.Severity.REQUIRED, + profile_identifier="ro-crate", + ) + ) + assert result.passed(), ( + f"RO-Crate with remote entity '{remote_entity_id}' should pass REQUIRED " + f"validation; got issues: {[i.message for i in result.get_issues()]}" ) + # And the specific must/4 violation must NOT be among the issues. + for issue in result.get_issues(): + assert "as part of its payload" not in (issue.message or ""), ( + f"Unexpected payload violation raised for remote entity: {issue.message}" + ) diff --git a/tests/integration/test_offline_mode.py b/tests/integration/test_offline_mode.py new file mode 100644 index 000000000..6c48bf588 --- /dev/null +++ b/tests/integration/test_offline_mode.py @@ -0,0 +1,407 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Integration tests for offline mode, auto warm-up and the cache CLI.""" + +from __future__ import annotations + +import io + +import pytest +import urllib3 +from click.testing import CliRunner + +from rocrate_validator.cli.main import cli +from rocrate_validator.models import ValidationSettings +from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, + HttpRequester) +from tests.conftest import SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER +from tests.ro_crates import ValidROC + + +def _urllib3_response(payload: bytes = b'{"@context": {}}', + status: int = 200, + content_type: str = "application/ld+json") -> urllib3.HTTPResponse: + return urllib3.HTTPResponse( + body=io.BytesIO(payload), + headers={ + "Content-Type": content_type, + "Content-Length": str(len(payload)), + }, + status=status, + preload_content=False, + decode_content=False, + ) + + +@pytest.fixture +def network_interceptor(monkeypatch): + """ + Intercept every outbound HTTP call and record the requested URLs so tests + can assert whether the cache was actually consulted. + """ + from requests.adapters import HTTPAdapter + + recorder = {"calls": []} + + def fake_send(self, request, **kwargs): + recorder["calls"].append(request.url) + return self.build_response(request, _urllib3_response()) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + return recorder + + +@pytest.fixture(autouse=True) +def _clean_singleton(monkeypatch): + monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "0") + HttpRequester.reset() + yield + HttpRequester.reset() + + +@pytest.fixture +def cli_runner() -> CliRunner: + return CliRunner() + + +def test_offline_flag_configures_cache(tmp_path): + settings = ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=tmp_path / "cache", + ) + info = HttpRequester().cache_info() + assert info["offline"] is True + assert info["permanent"] is True + assert settings.offline is True + + +def test_offline_default_path_is_persistent(tmp_path, monkeypatch): + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg")) + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=None, + ) + info = HttpRequester().cache_info() + assert info["offline"] is True + assert info["permanent"] is True + assert str(tmp_path / "xdg") in str(info["path"]) + + +def test_offline_cache_miss_yields_504_response(tmp_path): + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=tmp_path / "cache", + ) + response = HttpRequester().get("https://example.org/never") + assert response.status_code == OFFLINE_CACHE_MISS_STATUS + + +def test_online_then_offline_share_default_cache(tmp_path, network_interceptor, monkeypatch): + """Reproduce the common user workflow: validate online without passing a + cache path, then validate offline without passing a cache path. Both runs + must share the same persistent XDG cache so the offline run finds every + resource fetched online. + """ + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg")) + url = "https://example.org/ctx" + + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + cache_max_age=60, + ) + online_info = HttpRequester().cache_info() + assert online_info["permanent"] is True + assert str(tmp_path / "xdg") in str(online_info["path"]) + HttpRequester().get(url) + assert HttpRequester().has_cached(url) is True + + HttpRequester.reset() + + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + ) + offline_info = HttpRequester().cache_info() + assert offline_info["path"] == online_info["path"] + assert HttpRequester().has_cached(url) is True + response = HttpRequester().get(url) + assert response.status_code == 200 + + +def test_offline_reuses_cached_response(tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + # First: online run populates the cache. + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + cache_path=cache_path, + cache_max_age=60, + ) + url = "https://example.org/context" + response = HttpRequester().get(url) + assert response.status_code == 200 + assert HttpRequester().has_cached(url) is True + pre_calls = len(network_interceptor["calls"]) + + # Second: offline run must not hit the network but still get the cached doc. + HttpRequester.reset() + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=cache_path, + ) + response = HttpRequester().get(url) + assert response.status_code == 200 + assert response.content == b'{"@context": {}}' + # No new network traffic in offline mode. + assert len(network_interceptor["calls"]) == pre_calls + + +def test_no_cache_disables_cache_backend(tmp_path, network_interceptor): + """no_cache=True must skip the cache and hit the network every call.""" + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + no_cache=True, + ) + requester = HttpRequester() + info = requester.cache_info() + assert info["backend"] is None + assert requester.has_cached("https://example.org/any") is False + # Two identical requests must both hit the network. + requester.get("https://example.org/any") + requester.get("https://example.org/any") + assert network_interceptor["calls"].count("https://example.org/any") == 2 + + +def test_negative_cache_max_age_means_never_expire(tmp_path, network_interceptor): + """cache_max_age<0 must enable the cache with no expiration.""" + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + cache_max_age=-1, + cache_path=tmp_path / "cache", + ) + requester = HttpRequester() + info = requester.cache_info() + assert info["backend"] is not None + url = "https://example.org/any" + requester.get(url) + # Second call must be served from the cache. + requester.get(url) + assert network_interceptor["calls"].count(url) == 1 + + +def test_offline_with_disabled_cache_raises(): + with pytest.raises(ValueError, match="Offline mode requires the HTTP cache"): + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + no_cache=True, + ) + + +def test_cli_no_cache_and_offline_rejected(cli_runner): + result = cli_runner.invoke( + cli, + [ + "-y", + "validate", + str(ValidROC().wrroc_paper_long_date), + "--no-paging", + "--no-cache", + "--offline", + ], + ) + assert result.exit_code != 0, result.output + assert "mutually exclusive" in result.output.lower() + + +def test_cli_no_cache_disables_cache_backend(cli_runner, tmp_path, network_interceptor): + """The --no-cache flag must skip the cache and hit the network on every call.""" + result = cli_runner.invoke( + cli, + [ + "-y", + "validate", + str(ValidROC().wrroc_paper_long_date), + "--no-paging", + "--no-cache", + "--skip-checks", SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) + # The validation itself may pass or fail depending on upstream checks; we + # only require that no cache file was written (ephemeral session). + assert "Traceback" not in result.output, result.output + info = HttpRequester().cache_info() + assert info["backend"] is None + + +def test_cli_cache_info(cli_runner, tmp_path): + result = cli_runner.invoke( + cli, + ["cache", "info", "--cache-path", str(tmp_path / "cache")], + ) + assert result.exit_code == 0, result.output + assert "HTTP Cache" in result.output or "Entries" in result.output + + +def test_cli_cache_reset_noninteractive_requires_yes(cli_runner, tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + # Populate the cache so the reset has something to do. + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=60) + HttpRequester().get("https://example.org/ctx") + assert HttpRequester().cache_info()["entries"] >= 1 + HttpRequester.reset() + + # Without --yes in non-interactive mode, reset must abort. + result = cli_runner.invoke( + cli, + ["-y", "cache", "reset", "--cache-path", str(cache_path)], + ) + assert result.exit_code == 1, result.output + # Cache should still contain the entry. + HttpRequester.reset() + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=3600) + assert HttpRequester().cache_info()["entries"] >= 1 + + +def test_cli_cache_reset_yes_clears_entries(cli_runner, tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=60) + HttpRequester().get("https://example.org/ctx") + HttpRequester().get("https://example.org/other") + assert HttpRequester().cache_info()["entries"] >= 2 + HttpRequester.reset() + + result = cli_runner.invoke( + cli, + ["-y", "cache", "reset", "--cache-path", str(cache_path), "--yes"], + ) + assert result.exit_code == 0, result.output + + HttpRequester.reset() + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=-1) + assert HttpRequester().cache_info()["entries"] == 0 + + +def test_cli_cache_warm_populates_profile_urls(cli_runner, tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + result = cli_runner.invoke( + cli, + [ + "-y", + "cache", "warm", + "--cache-path", str(cache_path), + "--profile-identifier", "ro-crate-1.1", + ], + ) + assert result.exit_code == 0, result.output + assert any("w3id.org" in c for c in network_interceptor["calls"]), \ + f"No expected URL fetched. Calls: {network_interceptor['calls']}" + # The URL must now be cached for offline use. + HttpRequester.reset() + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=3600, offline=True) + assert HttpRequester().has_cached("https://w3id.org/ro/crate/1.1/context") is True + + +def test_cli_cache_warm_crate_caches_remote_archive(cli_runner, tmp_path, network_interceptor): + cache_path = tmp_path / "cache" + crate_url = "https://example.org/my-crate.zip" + result = cli_runner.invoke( + cli, + [ + "-y", + "cache", "warm", + "--cache-path", str(cache_path), + "--crate", crate_url, + ], + ) + assert result.exit_code == 0, result.output + HttpRequester.reset() + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=3600, offline=True) + assert HttpRequester().has_cached(crate_url) is True + + +def test_cli_validate_offline_warns_when_remote(cli_runner, tmp_path, network_interceptor): + """In offline mode with a remote URI the validator must emit a warning.""" + # Pre-populate the cache so the remote crate resolves in offline mode. + cache_path = tmp_path / "cache" + HttpRequester.initialize_cache(cache_path=str(cache_path), cache_max_age=60) + HttpRequester().get("https://example.org/fake-crate.zip") + HttpRequester.reset() + + # We intentionally do not actually run the full validation here; the CLI + # will fail because the cached body is not a valid ZIP, but the warning is + # emitted before that point. + result = cli_runner.invoke( + cli, + [ + "-y", + "validate", + "https://example.org/fake-crate.zip", + "--no-paging", + "--offline", + "--cache-path", str(cache_path), + ], + ) + assert "offline mode is enabled" in result.output.lower() \ + or "cached version" in result.output.lower(), result.output + + +def test_cli_validate_offline_on_local_crate_succeeds(cli_runner, tmp_path): + """Validating a local crate in offline mode must work without network access.""" + cache_path = tmp_path / "cache" + result = cli_runner.invoke( + cli, + [ + "-y", + "validate", + str(ValidROC().wrroc_paper_long_date), + "--no-paging", + "--offline", + "--cache-path", str(cache_path), + "--skip-checks", SKIP_LOCAL_DATA_ENTITY_EXISTENCE_CHECK_IDENTIFIER, + ], + ) + # The validation may report issues for locally missing contexts; what we + # require is that no uncaught network-related exception aborts the run. + assert result.exit_code in (0, 1), result.output + assert "Traceback" not in result.output + + +def test_auto_warm_up_skipped_when_offline(tmp_path, network_interceptor, monkeypatch): + """Auto warm-up must not run when offline mode is active.""" + monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "1") + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=True, + cache_path=tmp_path / "cache", + ) + assert network_interceptor["calls"] == [] + + +def test_auto_warm_up_disabled_via_env(tmp_path, network_interceptor, monkeypatch): + monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "0") + ValidationSettings( + rocrate_uri=str(ValidROC().wrroc_paper_long_date), + offline=False, + cache_path=tmp_path / "cache", + ) + assert network_interceptor["calls"] == [] diff --git a/tests/test_cli.py b/tests/test_cli.py index fc897e098..519802e55 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -19,6 +19,10 @@ from click.testing import CliRunner from pytest import fixture +from rocrate_validator import services +from rocrate_validator.requirements.python import PyFunctionCheck +from rocrate_validator.requirements.shacl.checks import SHACLCheck + from rocrate_validator.utils import log as logging from rocrate_validator.cli.main import cli from rocrate_validator.utils.versioning import get_version @@ -31,7 +35,11 @@ @fixture def cli_runner() -> CliRunner: - return CliRunner() + # Force a wide terminal: the CLI renders output through Rich, which wraps + # and truncates tables/panels to the terminal width (defaulting to 80 + # columns when stdout is captured). Pinning COLUMNS keeps the rendered + # output deterministic regardless of the environment's actual width. + return CliRunner(env={"COLUMNS": "200"}) def test_version(cli_runner: CliRunner): @@ -134,7 +142,11 @@ def test_validate_with_invalid_profiles_path_dir(cli_runner: CliRunner): ) assert result.exit_code == 2 # logger.debug(result.output) - assert re.search(f"Path '{dummy_profiles_path}' does not exist.", result.output) + # On narrow terminals the Rich error panel wraps the message across lines + # and inserts box-drawing borders (โ”‚) between words; strip those and + # collapse whitespace so the match does not depend on terminal width. + normalized_output = re.sub(r"[\sโ”‚]+", " ", result.output) + assert re.search(f"Path '{dummy_profiles_path}' does not exist.", normalized_output) def test_profiles_list(cli_runner: CliRunner): @@ -157,3 +169,143 @@ def test_extra_profiles_list(cli_runner: CliRunner, fake_profiles_path: Path): assert result.exit_code == 0 # assert "Available profiles:" in result.output assert "Profile A" in result.output # Check for a known extra profile + + +# Profile used for `profiles describe` tests. +_DESCRIBE_TEST_PROFILE = "ro-crate-1.1" + + +def _first_visible_check(): + """Return the first non-hidden (Python-backed) check of the test profile.""" + profile = services.get_profile(_DESCRIBE_TEST_PROFILE) + for requirement in profile.requirements: + if requirement.hidden: + continue + for check in requirement.get_checks(): + if isinstance(check, PyFunctionCheck): + return profile, requirement, check + raise RuntimeError("No Python-backed check found in test profile") + + +def _first_shacl_check(): + """Return the first non-hidden SHACL-backed check of the test profile.""" + profile = services.get_profile(_DESCRIBE_TEST_PROFILE) + for requirement in profile.requirements: + if requirement.hidden: + continue + for check in requirement.get_checks(): + if isinstance(check, SHACLCheck): + return profile, requirement, check + raise RuntimeError("No SHACL-backed check found in test profile") + + +def test_profiles_describe_default(cli_runner: CliRunner): + """The default describe view (no check id) shows the profile compact view.""" + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "--no-paging"]) + assert result.exit_code == 0 + assert _DESCRIBE_TEST_PROFILE in result.output + assert "Profile Requirements" in result.output + + +def test_profiles_describe_verbose(cli_runner: CliRunner): + """The verbose describe view (no check id) shows individual check identifiers.""" + _, _, check = _first_visible_check() + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "-v", "--no-paging"]) + assert result.exit_code == 0 + assert check.identifier in result.output + + +def test_describe_check_relative_id(cli_runner: CliRunner): + """Resolving a check by '.' renders the single-check view.""" + _, requirement, check = _first_visible_check() + relative = f"{requirement.order_number}.{check.order_number}" + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "--no-paging"]) + assert result.exit_code == 0, result.output + assert check.identifier in result.output + assert check.severity.name in result.output + + +def test_describe_check_full_id(cli_runner: CliRunner): + """Resolving a check by full '_.'.""" + _, _, check = _first_visible_check() + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, check.identifier, "--no-paging"]) + assert result.exit_code == 0, result.output + assert check.identifier in result.output + + +def test_describe_check_unknown(cli_runner: CliRunner): + """An out-of-range check id produces a usage error with a hint.""" + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "99.99", "--no-paging"]) + assert result.exit_code == 2 + assert "No requirement #99" in result.output + + +def test_describe_check_bad_format(cli_runner: CliRunner): + """A non-numeric check id is rejected with a format hint.""" + result = cli_runner.invoke(cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "not-an-id", "--no-paging"]) + assert result.exit_code == 2 + assert "Invalid check identifier" in result.output + + +def test_describe_check_profile_mismatch(cli_runner: CliRunner): + """A full id whose prefix doesn't match the requested profile is rejected.""" + result = cli_runner.invoke( + cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, "some-other-profile_1.1", "--no-paging"] + ) + assert result.exit_code == 2 + assert "does not belong to profile" in result.output + + +def test_describe_check_verbose_python(cli_runner: CliRunner): + """Verbose single-check view on a Python-backed check shows the function source.""" + _, requirement, check = _first_visible_check() + relative = f"{requirement.order_number}.{check.order_number}" + result = cli_runner.invoke( + cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] + ) + assert result.exit_code == 0, result.output + assert "Source" in result.output + # The decorated check function is what gets serialized + assert "@check" in result.output + + +def test_describe_check_verbose_shacl(cli_runner: CliRunner): + """Verbose single-check view on a SHACL-backed check shows turtle source.""" + _, requirement, check = _first_shacl_check() + relative = f"{requirement.order_number}.{check.order_number}" + result = cli_runner.invoke( + cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] + ) + assert result.exit_code == 0, result.output + assert "Source" in result.output + # SHACL serialized as turtle should contain a sh: prefix and a NodeShape/PropertyShape declaration + assert "sh:" in result.output + + +def test_describe_check_verbose_shacl_includes_target(cli_runner: CliRunner): + """For nested PropertyShape checks, the snippet must include the owning NodeShape's target.""" + profile = services.get_profile(_DESCRIBE_TEST_PROFILE) + nested = None + for requirement in profile.requirements: + if requirement.hidden: + continue + for check in requirement.get_checks(): + if isinstance(check, SHACLCheck) and getattr(check._shape, "parent", None) is not None: + nested = (requirement, check) + break + if nested: + break + if nested is None: + # No nested PropertyShape check available in this profile; nothing to assert here. + return + requirement, check = nested + relative = f"{requirement.order_number}.{check.order_number}" + result = cli_runner.invoke( + cli, ["profiles", "describe", _DESCRIBE_TEST_PROFILE, relative, "-v", "--no-paging"] + ) + assert result.exit_code == 0, result.output + # The snippet must surface the owning shape's target declaration so the user can see + # what the property check applies to. + assert any(t in result.output for t in ("sh:targetClass", "sh:targetNode", + "sh:targetSubjectsOf", "sh:targetObjectsOf", + "sh:target ")) diff --git a/tests/test_cli_cache.py b/tests/test_cli_cache.py new file mode 100644 index 000000000..380b20bfc --- /dev/null +++ b/tests/test_cli_cache.py @@ -0,0 +1,464 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +CLI tests for the ``rocrate-validator cache`` subcommands: + +* ``cache warm`` profile-token fallback (mirrors ``validate``). +* ``cache warm -u/--url`` arbitrary URL warming. +* ``cache list`` / ``cache ls`` entry listing with filter/sort/--json. +""" + +from __future__ import annotations + +import io +import json + +import pytest +import urllib3 +from click.testing import CliRunner + +from rocrate_validator.cli.main import cli +from rocrate_validator.models import Profile +from rocrate_validator.utils.http import HttpRequester + + +# ---------- shared fixtures ---------- +@pytest.fixture +def cli_runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture(autouse=True) +def _reset_requester(): + HttpRequester.reset() + yield + HttpRequester.reset() + + +@pytest.fixture +def mock_network(monkeypatch): + """Route every outbound HTTP call to a fake successful response.""" + from requests.adapters import HTTPAdapter + + def fake_send(self, request, **kwargs): + raw = urllib3.HTTPResponse( + body=io.BytesIO(b'{"ok": true}'), + headers={"Content-Type": "application/json", "Content-Length": "12"}, + status=200, + preload_content=False, + decode_content=False, + ) + return self.build_response(request, raw) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +@pytest.fixture +def tmp_cache(tmp_path): + """Path passed via ``--cache-path`` to keep tests off the user cache.""" + return tmp_path / "cache" + + +def _make_profile_stub(identifier: str, version: str, token: str): + """Lightweight stand-in for a Profile used only by token fallback tests.""" + + class _Stub: + pass + + stub = _Stub() + stub.identifier = identifier + stub.version = version + stub.token = token + return stub + + +# ==================================================================== +# cache warm: profile-token fallback +# ==================================================================== +def test_warm_token_resolves_to_single_versioned_profile( + cli_runner, + mock_network, + tmp_cache, + monkeypatch, +): + """`-p process-run-crate` should resolve to the only versioned variant.""" + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-p", "process-run-crate"], + ) + assert result.exit_code == 0, result.output + assert "process-run-crate-0.5" in result.output + # Single-version token must resolve silently โ€” no "Note:" line. + assert "Note:" not in result.output + assert "not found and skipped" not in result.output + + +def test_warm_token_with_multiple_versions_emits_note( + cli_runner, + mock_network, + tmp_cache, + monkeypatch, +): + """When a token matches more than one version, the picked identifier and + the alternatives must appear in a one-line Note.""" + candidates = [ + _make_profile_stub("fakeprof-0.1", "0.1", "fakeprof"), + _make_profile_stub("fakeprof-0.2", "0.2", "fakeprof"), + ] + + real_by_id = Profile.get_by_identifier + real_by_token = Profile.get_by_token + + def fake_by_id(ident): + if ident == "fakeprof": + return None + return real_by_id(ident) + + def fake_by_token(tok): + if tok == "fakeprof": + return candidates + return real_by_token(tok) + + monkeypatch.setattr(Profile, "get_by_identifier", staticmethod(fake_by_id)) + monkeypatch.setattr(Profile, "get_by_token", staticmethod(fake_by_token)) + # Skip URL discovery entirely โ€” the test cares about the resolver, not + # what's warmed. + monkeypatch.setattr( + "rocrate_validator.cli.commands.cache.discover_cacheable_urls_from_profiles", + lambda profiles: [], + ) + + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-p", "fakeprof"], + ) + assert result.exit_code == 0, result.output + assert "Note:" in result.output + assert "fakeprof-0.2" in result.output # picked (highest version) + assert "fakeprof-0.1" in result.output # listed as alternative + + +def test_warm_unknown_profile_still_reported_as_missing( + cli_runner, + mock_network, + tmp_cache, +): + """A profile id that matches neither identifier nor token must end up + in the existing 'Profile(s) not found and skipped' message.""" + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-p", "definitely-not-a-profile"], + ) + assert result.exit_code == 0, result.output + assert "not found and skipped" in result.output + assert "definitely-not-a-profile" in result.output + + +# ==================================================================== +# cache warm: -u / --url +# ==================================================================== +def test_warm_url_alone_does_not_fall_back_to_all_profiles( + cli_runner, + mock_network, + tmp_cache, + monkeypatch, +): + """``cache warm -u `` with no -p must warm only the URL โ€” not every + installed profile (which is the default when no explicit source is + given).""" + seen = {"profile_calls": 0} + + def fake_discover(profiles): + seen["profile_calls"] += 1 + return [] + + monkeypatch.setattr( + "rocrate_validator.cli.commands.cache.discover_cacheable_urls_from_profiles", + fake_discover, + ) + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-u", "https://example.org/a"], + ) + assert result.exit_code == 0, result.output + assert seen["profile_calls"] == 0 + assert "Fetching explicit URLs" in result.output + assert "https://example.org/a" in result.output + + +def test_warm_url_invalid_value_is_rejected(cli_runner, tmp_cache): + """Non-http(s) values must trip Click's parameter validation and exit 2.""" + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "-u", "notaurl"], + ) + assert result.exit_code == 2 + assert "http(s)" in result.output + assert "notaurl" in result.output + + +def test_warm_url_combined_with_profile_warms_both( + cli_runner, + mock_network, + tmp_cache, + monkeypatch, +): + """``-p -u `` must warm the profile URLs *and* the extra + URL in the same invocation.""" + # Make the profile contribute a single deterministic URL. + monkeypatch.setattr( + "rocrate_validator.cli.commands.cache.discover_cacheable_urls_from_profiles", + lambda profiles: ["https://example.org/from-profile"], + ) + result = cli_runner.invoke( + cli, + [ + "cache", + "warm", + "--cache-path", + str(tmp_cache), + "-p", + "ro-crate-1.1", + "-u", + "https://example.org/explicit", + ], + ) + assert result.exit_code == 0, result.output + assert "Warming cache for profiles" in result.output + assert "Fetching explicit URLs" in result.output + assert "https://example.org/from-profile" in result.output + assert "https://example.org/explicit" in result.output + + +# ==================================================================== +# cache warm: --crate (remote RO-Crate) +# ==================================================================== +@pytest.fixture +def mock_network_gzip(monkeypatch): + """ + Same as ``mock_network``, but returns a ``Content-Encoding: gzip`` body. + This encoded response is required to reproduce the warm-crate bug. + """ + import gzip + + from requests.adapters import HTTPAdapter + + body = gzip.compress(b'{"@context": "https://w3id.org/ro/crate/1.2/context"}') + + def fake_send(self, request, **kwargs): + raw = urllib3.HTTPResponse( + body=io.BytesIO(body), + headers={ + "Content-Type": "application/json", + "Content-Encoding": "gzip", + "Content-Length": str(len(body)), + }, + status=200, + preload_content=False, + decode_content=False, + ) + return self.build_response(request, raw) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +def test_warm_crate_caches_remote_metadata(cli_runner, mock_network_gzip, tmp_cache): + """ + Regression: ``cache warm --crate `` must consume the body via + ``response.content`` rather than streaming ``response.raw``. + + With ``stream=True`` + ``shutil.copyfileobj(response.raw, ...)`` the warm-up + crashed with urllib3's "Calling read(decode_content=False) is not supported + after read(decode_content=True) was called": requests_cache buffers the + streamed body (decode_content=True) to store it, after which a raw read + (decode_content=False) is rejected. The body must therefore be touched in a + way that goes through the already-decoded content. + """ + url = "https://example.org/ro-crate/ro-crate-metadata.json" + result = cli_runner.invoke( + cli, + ["cache", "warm", "--cache-path", str(tmp_cache), "--crate", url], + ) + assert result.exit_code == 0, result.output + assert "Fetching remote RO-Crates" in result.output + assert "Summary: 1 cached, 0 failed, 0 skipped" in result.output + + # The fetched crate must actually be retrievable from the cache afterwards. + listed = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache)], + ) + assert listed.exit_code == 0, listed.output + assert "ro-crate-metadata.json" in listed.output + + +# ==================================================================== +# cache list / ls +# ==================================================================== +def _warm_some(cli_runner, tmp_cache, urls): + args = ["cache", "warm", "--cache-path", str(tmp_cache)] + for u in urls: + args += ["-u", u] + return cli_runner.invoke(cli, args) + + +def test_list_reports_empty_cache(cli_runner, tmp_cache): + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache)], + ) + assert result.exit_code == 0, result.output + assert "Cache is empty" in result.output + + +def test_list_shows_warmed_entries(cli_runner, mock_network, tmp_cache): + urls = [ + "https://example.org/alpha", + "https://example.org/beta", + "https://example.org/gamma", + ] + warm = _warm_some(cli_runner, tmp_cache, urls) + assert warm.exit_code == 0, warm.output + + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache)], + ) + assert result.exit_code == 0, result.output + for u in urls: + # URLs are wrapped/folded in the Rich table, so check a stable token. + assert u.rsplit("/", 1)[1] in result.output + assert "Total:" in result.output + + +def test_list_url_filter_narrows_results(cli_runner, mock_network, tmp_cache): + urls = [ + "https://example.org/keep-me", + "https://example.org/other", + ] + _warm_some(cli_runner, tmp_cache, urls) + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--url", "keep-me"], + ) + assert result.exit_code == 0, result.output + assert "keep-me" in result.output + # The filter is case-insensitive substring on URL; "other" must be absent. + assert "/other" not in result.output + + +def test_list_filter_with_no_match_message(cli_runner, mock_network, tmp_cache): + _warm_some(cli_runner, tmp_cache, ["https://example.org/only"]) + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--url", "no-such-fragment"], + ) + assert result.exit_code == 0, result.output + assert "No entries match" in result.output + + +def test_list_json_output_is_well_formed(cli_runner, mock_network, tmp_cache): + urls = [ + "https://example.org/a", + "https://example.org/b", + ] + _warm_some(cli_runner, tmp_cache, urls) + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--json"], + ) + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + assert isinstance(payload, list) + assert {e["url"] for e in payload} == set(urls) + sample = payload[0] + # Every entry must carry the documented fields. + assert {"url", "status", "size_bytes", "content_type", "created_at", "expires", "is_expired"} <= set(sample) + assert isinstance(sample["size_bytes"], int) + + +def test_list_sort_by_url_asc_then_desc(cli_runner, mock_network, tmp_cache): + """`--sort url` defaults to asc; `--order desc` must reverse it.""" + _warm_some( + cli_runner, + tmp_cache, + [ + "https://example.org/c", + "https://example.org/a", + "https://example.org/b", + ], + ) + + asc = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--sort", "url", "--json"], + ) + assert asc.exit_code == 0, asc.output + asc_urls = [e["url"] for e in json.loads(asc.output)] + assert asc_urls == sorted(asc_urls) + + desc = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--sort", "url", "--order", "desc", "--json"], + ) + assert desc.exit_code == 0, desc.output + desc_urls = [e["url"] for e in json.loads(desc.output)] + assert desc_urls == sorted(desc_urls, reverse=True) + + +def test_list_default_sort_is_created_desc(cli_runner, mock_network, tmp_cache): + """No --sort flag: entries come back ordered by created_at, most recent + first (the documented default).""" + _warm_some( + cli_runner, + tmp_cache, + [ + "https://example.org/first", + "https://example.org/second", + "https://example.org/third", + ], + ) + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--json"], + ) + assert result.exit_code == 0, result.output + created = [e["created_at"] for e in json.loads(result.output)] + # Each entry has a timestamp (mocked response goes through requests_cache); + # the sequence must be monotonically non-increasing. + assert all(a >= b for a, b in zip(created, created[1:])) + + +def test_list_invalid_order_is_rejected(cli_runner, tmp_cache): + result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--order", "sideways"], + ) + assert result.exit_code == 2 + assert "'sideways'" in result.output + + +def test_ls_alias_runs_the_same_command(cli_runner, mock_network, tmp_cache): + _warm_some(cli_runner, tmp_cache, ["https://example.org/x"]) + list_result = cli_runner.invoke( + cli, + ["cache", "list", "--cache-path", str(tmp_cache), "--json"], + ) + ls_result = cli_runner.invoke( + cli, + ["cache", "ls", "--cache-path", str(tmp_cache), "--json"], + ) + assert list_result.exit_code == ls_result.exit_code == 0 + assert json.loads(list_result.output) == json.loads(ls_result.output) diff --git a/tests/unit/requirements/test_profiles.py b/tests/unit/requirements/test_profiles.py index 424b6419d..bb1f46469 100644 --- a/tests/unit/requirements/test_profiles.py +++ b/tests/unit/requirements/test_profiles.py @@ -16,13 +16,13 @@ import os import pytest +from rdflib import Literal, Namespace -from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER -from rocrate_validator.errors import (DuplicateRequirementCheck, - InvalidProfilePath, - ProfileSpecificationError) -from rocrate_validator.models import (Profile, ValidationContext, - ValidationSettings, Validator) +from rocrate_validator.constants import DEFAULT_PROFILE_IDENTIFIER, SHACL_NS +from rocrate_validator.errors import DuplicateRequirementCheck, InvalidProfilePath, ProfileSpecificationError +from rocrate_validator.models import Profile, ValidationContext, ValidationSettings, Validator +from rocrate_validator.requirements.shacl.checks import SHACLCheck +from rocrate_validator.requirements.shacl.models import ShapesRegistry from tests.ro_crates import InvalidFileDescriptorEntity, ValidROC # set up logging @@ -57,7 +57,7 @@ def test_load_invalid_profile_from_validation_context(fake_profiles_path: str): "profiles_path": "/tmp/random_path_xxx", "profile_identifier": DEFAULT_PROFILE_IDENTIFIER, "rocrate_uri": ValidROC().wrroc_paper, - "enable_profile_inheritance": False + "enable_profile_inheritance": False, } settings = ValidationSettings(**settings) @@ -79,7 +79,7 @@ def test_load_valid_profile_without_inheritance_from_validation_context(fake_pro "profiles_path": fake_profiles_path, "profile_identifier": "c", "rocrate_uri": ValidROC().wrroc_paper, - "enable_profile_inheritance": False + "enable_profile_inheritance": False, } settings = ValidationSettings(**settings) @@ -128,7 +128,8 @@ def test_profile_spec_properties(fake_profiles_path: str): assert profile.version == "1.0.0", "The profile version should be 1.0.0" assert profile.is_profile_of == ["https://w3id.org/a"], "The profileOf property should be ['a']" assert profile.is_transitive_profile_of == [ - "https://w3id.org/a"], "The transitiveProfileOf property should be ['a']" + "https://w3id.org/a" + ], "The transitiveProfileOf property should be ['a']" def test_profiles_loading_free_folder_structure(profiles_with_free_folder_structure_path: str): @@ -206,8 +207,9 @@ def __perform_test__(profile_identifier: str, expected_inherited_profiles: list[ # The number of profiles should be 1 profiles_names = [_.token for _ in profile.inherited_profiles] - assert profiles_names == expected_inherited_profiles, \ - f"The number of profiles should be {expected_inherited_profiles}" + assert ( + profiles_names == expected_inherited_profiles + ), f"The number of profiles should be {expected_inherited_profiles}" # Test the inheritance mode with 1 profile __perform_test__("a", []) @@ -252,7 +254,7 @@ def test_load_invalid_profile_with_override_on_same_profile(fake_profiles_path: "profile_identifier": "invalid-duplicated-shapes", "rocrate_uri": ValidROC().wrroc_paper, "enable_profile_inheritance": True, - "allow_requirement_check_override": False + "allow_requirement_check_override": False, } settings = ValidationSettings(**settings) @@ -275,7 +277,7 @@ def test_load_valid_profile_with_override_on_inherited_profile(fake_profiles_pat "profile_identifier": "c-overridden", "rocrate_uri": ValidROC().wrroc_paper, "enable_profile_inheritance": True, - "allow_requirement_check_override": True + "allow_requirement_check_override": True, } settings = ValidationSettings(**settings) @@ -297,6 +299,34 @@ def test_load_valid_profile_with_override_on_inherited_profile(fake_profiles_pat assert len(requirements_checks) == 3, "The number of requirements should be 2" +def test_zero_shape_target_profile_triggers_pyshacl_run(fake_profiles_path: str): + """Regression test for the 0-shape profile bug: + when the target profile has no SHACL checks of its own, + Validator must still drive a single pyshacl run + on the merged shapes graph so inherited shapes get evaluated. + Without the fix in `Validator.__ensure_target_shacl_run__`, + no SHACLCheck would be recorded as executed for the wrapper target.""" + + settings = ValidationSettings( + **{ + "profiles_path": fake_profiles_path, + "profile_identifier": "c-wrapper", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + "disable_check_for_duplicates": True, + } + ) + result = Validator(settings).validate() + + executed_shacl = [c for c in result.executed_checks if isinstance(c, SHACLCheck)] + assert executed_shacl, ( + "Expected at least one inherited SHACLCheck to be executed for the " + "c-wrapper target. None recorded โ€” the zero-shape pyshacl run was " + "skipped." + ) + + def test_profile_parents(check_overriding_profiles_path: str): """Test the order of the loaded profiles.""" logger.debug("The profiles path: %r", check_overriding_profiles_path) @@ -366,29 +396,31 @@ def test_profile_check_overriding(check_overriding_profiles_path: str): def check_profile(profile, check, inherited_profiles, overridden_by, override): # Check inherited profiles - assert len(profile.inherited_profiles) == len(inherited_profiles), \ - f"The number of inherited profiles should be {len(inherited_profiles)}" + assert len(profile.inherited_profiles) == len( + inherited_profiles + ), f"The number of inherited profiles should be {len(inherited_profiles)}" inherited_profiles_tokens = [_.token for _ in profile.inherited_profiles] - assert set(inherited_profiles_tokens) == set(inherited_profiles), \ - f"The inherited profiles should be {inherited_profiles}" + assert set(inherited_profiles_tokens) == set( + inherited_profiles + ), f"The inherited profiles should be {inherited_profiles}" # Check overridden status - logger.debug("%r overridden by: %r", check.identifier, [ - _.requirement.profile.identifier for _ in check.overridden_by]) - assert check.overridden == (len(overridden_by) > 0), \ - f"The check overridden status should be {len(overridden_by) > 0}" - assert len(check.overridden_by) == len(overridden_by), \ - f"The number of overridden checks should be {len(overridden_by)}" + logger.debug( + "%r overridden by: %r", check.identifier, [_.requirement.profile.identifier for _ in check.overridden_by] + ) + assert check.overridden == ( + len(overridden_by) > 0 + ), f"The check overridden status should be {len(overridden_by) > 0}" + assert len(check.overridden_by) == len( + overridden_by + ), f"The number of overridden checks should be {len(overridden_by)}" overridden_by_tokens = [_.requirement.profile.identifier for _ in check.overridden_by] - assert set(overridden_by_tokens) == set(overridden_by), \ - f"The overridden checks should be {overridden_by}" + assert set(overridden_by_tokens) == set(overridden_by), f"The overridden checks should be {overridden_by}" # Check override status - assert len(check.overrides) == len(override), \ - f"The number of overridden checks should be {len(override)}" + assert len(check.overrides) == len(override), f"The number of overridden checks should be {len(override)}" override_tokens = [_.requirement.profile.identifier for _ in check.overrides] - assert set(override_tokens) == set(override), \ - f"The overridden checks should be {override}" + assert set(override_tokens) == set(override), f"The overridden checks should be {override}" # Check the number of requirements and checks of each profile for profile in profiles: @@ -438,3 +470,152 @@ def check_profile(profile, check, inherited_profiles, overridden_by, override): # Check the profile 'x' elif profile.token == "x": check_profile(profile, check, ["a", "b", "d"], [], ["d"]) + + +def test_python_check_decorator_sets_deactivated_flag(): + """The @check decorator must propagate the `deactivated` flag onto the + decorated function so that PyRequirement.__init_checks__ can read it.""" + from rocrate_validator.requirements.python import check + + @check(name="off", deactivated=True) + def disabled(self, ctx): # noqa: ANN001 + return False + + @check(name="on") + def enabled(self, ctx): # noqa: ANN001 + return True + + assert disabled.deactivated is True + assert enabled.deactivated is False + + +def test_shacl_shape_with_deactivated_marks_check_skipped(fake_profiles_path: str): + """A child profile that overrides an inherited NodeShape by `sh:name` and + sets `sh:deactivated true` should produce a check whose `deactivated` + property is True; the parent's check should be marked as `overridden`.""" + settings = { + "profiles_path": fake_profiles_path, + "profile_identifier": "c-deactivated", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + } + + settings = ValidationSettings(**settings) + validator = Validator(settings) + context = ValidationContext(validator, validator.validation_settings) + + profiles = context.profiles + profile_tokens = sorted(p.token for p in profiles) + # Inheritance chain: a <- c <- c-deactivated + assert profile_tokens == ["a", "c", "c-deactivated"] + + target = next(p for p in profiles if p.token == "c-deactivated") + parent_c = next(p for p in profiles if p.token == "c") + + # The PropertyShape carries `sh:deactivated true`; the matching check is + # the second one (the first is the hidden NodeShape root check). + target_property_check = target.requirements[0].get_checks()[1] + parent_property_check = parent_c.requirements[0].get_checks()[1] + + assert ( + target_property_check.deactivated is True + ), "The deactivated property should reflect sh:deactivated true on the PropertyShape" + + # The parent property check is overridden by the child's (same sh:name). + overridden_by_tokens = [c.requirement.profile.token for c in parent_property_check.overridden_by] + assert "c-deactivated" in overridden_by_tokens, "The parent check should be reported as overridden by c-deactivated" + assert parent_property_check.overridden is True + + # Default state for a non-deactivated check. + assert parent_property_check.deactivated is False + + +def test_shacl_check_deactivated_via_cross_profile_triple(fake_profiles_path: str): + """A child profile that adds ` sh:deactivated true` to its + own shapes graph (without redeclaring the shape) should cause the parent's + check to report `deactivated=True`. Verifies the cross-profile lookup in + SHACLCheck.deactivated and the pre-load pass in Validator.""" + + settings = ValidationSettings( + **{ + "profiles_path": fake_profiles_path, + "profile_identifier": "c-deactivated-direct", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + } + ) + validator = Validator(settings) + context = ValidationContext(validator, validator.validation_settings) + + profiles = context.profiles + profile_tokens = sorted(p.token for p in profiles) + assert profile_tokens == ["a", "c", "c-deactivated-direct"] + + target = next(p for p in profiles if p.token == "c-deactivated-direct") + parent_c = next(p for p in profiles if p.token == "c") + + # Trigger lazy loading of every profile's shape graph (the Validator + # would do this in __do_validate__; we replay it here for the unit test). + for p in profiles: + _ = p.requirements + + parent_shape_check = next(c for c in parent_c.requirements[0].get_checks() if isinstance(c, SHACLCheck)) + assert parent_shape_check.deactivated is False, "Sanity check: the parent shape should not be deactivated yet" + + # Simulate what a child-profile shape file would contribute: a single + # ` sh:deactivated true` triple in its own shapes graph. + target_registry = ShapesRegistry.get_instance(target) + sh = Namespace(SHACL_NS) + target_registry._shapes_graph.add((parent_shape_check.shape.node, sh.deactivated, Literal(True))) + + assert ( + parent_shape_check.deactivated is True + ), "The parent's check must read sh:deactivated true from the child's shapes graph" + + +def test_shacl_check_deactivation_scoped_to_descendants(fake_profiles_path: str): + """A `sh:deactivated true` triple declared by a profile that does NOT + inherit from the shape's owning profile must be ignored. Otherwise + unrelated profiles loaded in the same process could spuriously deactivate + one another's checks.""" + + settings = ValidationSettings( + **{ + "profiles_path": fake_profiles_path, + "profile_identifier": "c", + "rocrate_uri": ValidROC().wrroc_paper, + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + } + ) + validator = Validator(settings) + context = ValidationContext(validator, validator.validation_settings) + + # Force population of the global profile registry by listing all profiles + # under the fake_profiles_path, then resolve the specific ones we need. + all_profiles = Profile.load_profiles(profiles_path=fake_profiles_path) + parent_c = next(p for p in all_profiles if p.token == "c") + # Profile "b" is a descendant of "a" but NOT of "c" โ€” unrelated. + profile_b = next(p for p in all_profiles if p.token == "b") + assert parent_c not in profile_b.inherited_profiles + assert profile_b not in Profile.get_descendants(parent_c) + + # Trigger lazy loading. + for p in all_profiles: + _ = p.requirements + _ = context.profiles # warm context too + + parent_shape_check = next(c for c in parent_c.requirements[0].get_checks() if isinstance(c, SHACLCheck)) + assert parent_shape_check.deactivated is False + + # Inject a deactivation triple into an unrelated profile's registry. + sh = Namespace(SHACL_NS) + ShapesRegistry.get_instance(profile_b)._shapes_graph.add( + (parent_shape_check.shape.node, sh.deactivated, Literal(True)) + ) + + assert ( + parent_shape_check.deactivated is False + ), "An unrelated profile's deactivation triple must not affect the check" diff --git a/tests/unit/requirements/test_shacl_checks.py b/tests/unit/requirements/test_shacl_checks.py index 808f77bc3..f1c3fac0d 100644 --- a/tests/unit/requirements/test_shacl_checks.py +++ b/tests/unit/requirements/test_shacl_checks.py @@ -17,17 +17,20 @@ from rdflib import BNode, Graph, Namespace, URIRef from rocrate_validator.constants import SHACL_NS +from rocrate_validator.models import LevelCollection from rocrate_validator.requirements.shacl.checks import SHACLCheck -from rocrate_validator.requirements.shacl.models import Shape, ShapesRegistry +from rocrate_validator.requirements.shacl.models import (NodeShape, + PropertyShape, Shape, + ShapesRegistry) from rocrate_validator.requirements.shacl.utils import resolve_parent_shape logger = logging.getLogger(__name__) class MockRequirement: - def __init__(self): + def __init__(self, requirement_level_from_path=None): self.profile = None - self.requirement_level_from_path = None + self.requirement_level_from_path = requirement_level_from_path class MockParentShape: @@ -220,3 +223,98 @@ def test_resolve_parent_shape_with_property_bnode(): assert result is not None, "Should resolve parent shape for property BNode" assert result.key == shape.key + + +def _make_property(graph: Graph, severity_term: str = None) -> PropertyShape: + """Build a PropertyShape on a fresh BNode, optionally setting sh:severity.""" + prop = PropertyShape(BNode(), graph) + if severity_term is not None: + prop.severity = severity_term + return prop + + +def test_derive_level_picks_most_stringent_declared_property_severity(): + """ + Flat NodeShape with no declared severity inherits the highest severity + declared by its nested properties. + """ + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + shape.add_property(_make_property(g, f"{SHACL_NS}Warning")) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.RECOMMENDED + + +def test_derive_level_with_uniform_property_severity(): + """When every property declares the same severity, derive that severity.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.OPTIONAL + + +def test_derive_level_ignores_properties_without_declared_severity(): + """Properties without sh:severity are skipped; only declared ones drive the result.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g)) # no severity declared + shape.add_property(_make_property(g, f"{SHACL_NS}Warning")) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.RECOMMENDED + + +def test_derive_level_falls_back_to_required_when_no_property_declares_severity(): + """If no nested property declares a severity, fall back to REQUIRED.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g)) + shape.add_property(_make_property(g)) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.REQUIRED + + +def test_shape_declared_severity_takes_precedence_over_derivation(): + """An explicit severity on the NodeShape wins over property-based derivation.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.severity = f"{SHACL_NS}Warning" + shape.add_property(_make_property(g, f"{SHACL_NS}Violation")) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.RECOMMENDED + + +def test_path_based_level_takes_precedence_over_derivation(): + """When the requirement file is in a must/should/may folder the path level wins.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + shape.add_property(_make_property(g, f"{SHACL_NS}Info")) + + check = SHACLCheck( + MockRequirement(requirement_level_from_path=LevelCollection.SHOULD), shape + ) + + assert check.level == LevelCollection.SHOULD + + +def test_derive_level_for_node_shape_without_properties(): + """A NodeShape with no nested properties falls back to REQUIRED.""" + g = Graph() + shape = NodeShape(URIRef("http://example.org/NodeShape"), g) + + check = SHACLCheck(MockRequirement(), shape) + + assert check.level == LevelCollection.REQUIRED diff --git a/tests/unit/requirements/test_shacl_utils.py b/tests/unit/requirements/test_shacl_utils.py new file mode 100644 index 000000000..6a64c7cc0 --- /dev/null +++ b/tests/unit/requirements/test_shacl_utils.py @@ -0,0 +1,199 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Unit tests for ``ShapesList.get_shape_property_graph``. + +The method must return a subgraph that: +* contains every triple reachable from the property shape (its constraints + and any RDF lists used by ``sh:and``/``sh:or``/``sh:xone``); +* contains the link triple ``(shape_node, sh:property, shape_property)``; +* does NOT contain triples that belong only to sibling property shapes +""" + +import pytest +from rdflib import RDF, BNode, Graph, Literal, Namespace, URIRef +from rdflib.collection import Collection + +from rocrate_validator.constants import SHACL_NS +from rocrate_validator.requirements.shacl.utils import load_shapes_from_graph + +SH = Namespace(SHACL_NS) +EX = Namespace("http://example.org/") + + +def _build_two_property_shape() -> tuple[Graph, URIRef, URIRef, URIRef]: + """ + Build a NodeShape with two sibling property shapes. + + Returns ``(graph, node_shape, prop_a, prop_b)``. + + Each property shape is a BNode owning its own ``sh:path``, + ``sh:datatype``, ``sh:minCount`` constraints. + """ + g = Graph() + g.bind("sh", SH) + g.bind("ex", EX) + + node_shape = EX.PersonShape + g.add((node_shape, RDF.type, SH.NodeShape)) + g.add((node_shape, SH.targetClass, EX.Person)) + + prop_a = BNode("propA") + g.add((node_shape, SH.property, prop_a)) + g.add((prop_a, SH.path, EX.name)) + g.add((prop_a, SH.datatype, EX.stringType)) + g.add((prop_a, SH.minCount, Literal(1))) + + prop_b = BNode("propB") + g.add((node_shape, SH.property, prop_b)) + g.add((prop_b, SH.path, EX.age)) + g.add((prop_b, SH.datatype, EX.intType)) + g.add((prop_b, SH.minCount, Literal(0))) + + return g, node_shape, prop_a, prop_b + + +def test_returns_link_triple_to_target_property(): + """The link ``(node_shape, sh:property, shape_property)`` must be present.""" + g, node_shape, prop_a, prop_b = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + # The link to the prop_a shape must be present + assert (node_shape, SH.property, prop_a) in pg + # but not the link to the prop_b shape. + assert (node_shape, SH.property, prop_b) not in pg + + +def test_includes_all_constraints_of_target_property(): + """All triples whose subject is the target property shape must be included.""" + g, node_shape, prop_a, _ = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + assert (prop_a, SH.path, EX.name) in pg + assert (prop_a, SH.datatype, EX.stringType) in pg + assert (prop_a, SH.minCount, Literal(1)) in pg + + +def test_excludes_sibling_property_link_and_constraints(): + """ + Sibling property shapes and their link triples must not appear in the + returned subgraph. This is the regression the new implementation fixes. + """ + g, node_shape, prop_a, prop_b = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + # Sibling link triple must not be present. + assert (node_shape, SH.property, prop_b) not in pg + # Sibling constraints must not be present. + assert (prop_b, SH.path, EX.age) not in pg + assert (prop_b, SH.datatype, EX.intType) not in pg + assert (prop_b, SH.minCount, Literal(0)) not in pg + + +def test_subtraction_preserves_sibling_property_link(): + """ + Subtracting the returned subgraph from the merged shapes graph must + leave the sibling property's link to the parent NodeShape intact + """ + g, node_shape, prop_a, prop_b = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + remaining = shapes_list.shapes_graph - pg + + # The sibling property is still linked to the NodeShape. + assert (node_shape, SH.property, prop_b) in remaining + # And so are its constraints. + assert (prop_b, SH.path, EX.age) in remaining + + +def test_does_not_include_unrelated_node_shape_triples(): + """ + Triples on the parent NodeShape that are not the target link must + not be pulled in (e.g. ``sh:targetClass``). + """ + g, node_shape, prop_a, _ = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + assert (node_shape, SH.targetClass, EX.Person) not in pg + assert (node_shape, RDF.type, SH.NodeShape) not in pg + + +def test_includes_rdf_list_triples_for_sh_or(): + """ + When the property shape uses ``sh:or`` (an RDF list), the list spine + (``rdf:first``/``rdf:rest``) and every list member must be reachable + in the returned subgraph. + """ + g = Graph() + node_shape = EX.SomeShape + g.add((node_shape, RDF.type, SH.NodeShape)) + + prop = BNode("prop") + g.add((node_shape, SH.property, prop)) + g.add((prop, SH.path, EX.something)) + + member_a = BNode("memberA") + g.add((member_a, SH.datatype, EX.t1)) + member_b = BNode("memberB") + g.add((member_b, SH.datatype, EX.t2)) + + list_head = BNode("listHead") + Collection(g, list_head, [member_a, member_b]) + g.add((prop, SH["or"], list_head)) + + shapes_list = load_shapes_from_graph(g) + pg = shapes_list.get_shape_property_graph(node_shape, prop) + + # The sh:or link is reachable from the property. + assert (prop, SH["or"], list_head) in pg + # Both list members and their constraints are reachable. + assert (member_a, SH.datatype, EX.t1) in pg + assert (member_b, SH.datatype, EX.t2) in pg + # The RDF list spine is included so the list can be re-walked. + list_spine_subjects = {s for s, _, _ in pg.triples((None, RDF.first, None))} + assert list_head in list_spine_subjects + + +def test_only_target_link_present_when_node_has_multiple_properties(): + """ + The graph must contain exactly one ``sh:property`` triple originating + from the parent NodeShape โ€” the one pointing at the target property. + """ + g, node_shape, prop_a, _ = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + pg = shapes_list.get_shape_property_graph(node_shape, prop_a) + + property_links = list(pg.triples((node_shape, SH.property, None))) + assert len(property_links) == 1 + assert property_links[0] == (node_shape, SH.property, prop_a) + + +def test_unknown_shape_node_raises(): + """A shape node not in the registry should raise ``KeyError``.""" + g, _, prop_a, _ = _build_two_property_shape() + shapes_list = load_shapes_from_graph(g) + + with pytest.raises(KeyError): + shapes_list.get_shape_property_graph(EX.UnknownShape, prop_a) diff --git a/tests/unit/test_cache_warmup.py b/tests/unit/test_cache_warmup.py new file mode 100644 index 000000000..3b591b962 --- /dev/null +++ b/tests/unit/test_cache_warmup.py @@ -0,0 +1,173 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for profile URL discovery and cache warm-up.""" + +from __future__ import annotations + +import io + +import pytest +import urllib3 + +from rocrate_validator.models import Profile +from rocrate_validator.utils.cache_warmup import ( + auto_warm_up_for_settings, discover_cacheable_urls_from_profiles, + discover_profile_cacheable_urls, warm_up_urls) +from rocrate_validator.utils.http import HttpRequester +from rocrate_validator.utils.paths import get_profiles_path + +PROFILE_TTL_TEMPLATE = """ +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + rdfs:label "Sample profile" ; + prof:hasResource [ + a prof:ResourceDescriptor ; + prof:hasRole role:Vocabulary ; + prof:hasArtifact ; + ] ; + prof:hasResource [ + a prof:ResourceDescriptor ; + prof:hasRole role:Specification ; + prof:hasArtifact ; + ] ; + prof:hasResource [ + a prof:ResourceDescriptor ; + prof:hasArtifact "not-a-url" ; + ] ; + prof:hasToken "sample" ; +. +""" + + +@pytest.fixture(autouse=True) +def _reset_requester(): + HttpRequester.reset() + yield + HttpRequester.reset() + + +@pytest.fixture +def sample_profile(tmp_path): + profile_dir = tmp_path / "sample" + profile_dir.mkdir() + (profile_dir / "profile.ttl").write_text(PROFILE_TTL_TEMPLATE) + return Profile( + profiles_base_path=tmp_path, + profile_path=profile_dir, + ) + + +@pytest.fixture +def mock_network(monkeypatch): + from requests.adapters import HTTPAdapter + + def fake_send(self, request, **kwargs): + raw = urllib3.HTTPResponse( + body=io.BytesIO(b'{"ok": true}'), + headers={"Content-Type": "application/json", "Content-Length": "12"}, + status=200, + preload_content=False, + decode_content=False, + ) + return self.build_response(request, raw) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +def test_discover_urls_returns_all_declared_artifacts(sample_profile): + urls = discover_profile_cacheable_urls(sample_profile) + # Both declared roles are included; the non-URL artifact is dropped. + assert "https://example.org/ctx/v1" in urls + assert "https://example.org/spec/v1/index.html" in urls + assert all(u.lower().startswith("http") for u in urls) + assert len(urls) == 2 + + +def test_discover_urls_on_multiple_profiles_deduplicates(sample_profile, tmp_path): + other_dir = tmp_path / "sample_other" + other_dir.mkdir() + (other_dir / "profile.ttl").write_text( + PROFILE_TTL_TEMPLATE + .replace("", + "") + .replace('prof:hasToken "sample"', 'prof:hasToken "other"') + ) + other_profile = Profile(profiles_base_path=tmp_path, profile_path=other_dir) + aggregated = discover_cacheable_urls_from_profiles([sample_profile, other_profile]) + # Both profiles share the same two artifacts; the result should be deduped. + assert len(aggregated) == 2 + + +def test_warm_up_urls_skips_already_cached(tmp_path, mock_network): + HttpRequester.initialize_cache( + cache_path=str(tmp_path / "cache"), + cache_max_age=60, + ) + urls = ["https://example.org/a", "https://example.org/b"] + first = warm_up_urls(urls) + assert [r.status for r in first] == ["ok", "ok"] + second = warm_up_urls(urls) + assert [r.status for r in second] == ["skipped", "skipped"] + + +def test_warm_up_reports_offline_cache_miss(tmp_path): + HttpRequester.initialize_cache( + cache_path=str(tmp_path / "cache"), + cache_max_age=-1, + offline=True, + ) + results = warm_up_urls(["https://example.org/missing"]) + assert results[0].status == "failed" + assert "offline" in (results[0].detail or "").lower() + + +def test_auto_warm_up_noop_when_offline(tmp_path): + class _Settings: + offline = True + cache_path = tmp_path / "cache" + profile_identifier = "ro-crate-1.1" + profiles_path = get_profiles_path() + extra_profiles_path = None + + assert auto_warm_up_for_settings(_Settings()) is None + + +def test_auto_warm_up_disabled_via_env(monkeypatch, tmp_path): + monkeypatch.setenv("ROCRATE_VALIDATOR_AUTO_WARM", "0") + + class _Settings: + offline = False + cache_path = tmp_path / "cache" + profile_identifier = "ro-crate-1.1" + profiles_path = get_profiles_path() + extra_profiles_path = None + + assert auto_warm_up_for_settings(_Settings()) is None + + +def test_auto_warm_up_noop_when_no_cache_path(): + class _Settings: + offline = False + cache_path = None + profile_identifier = "ro-crate-1.1" + profiles_path = get_profiles_path() + extra_profiles_path = None + + assert auto_warm_up_for_settings(_Settings()) is None diff --git a/tests/unit/test_cli_internals.py b/tests/unit/test_cli_internals.py index 2f90bcb88..ae4762ce3 100644 --- a/tests/unit/test_cli_internals.py +++ b/tests/unit/test_cli_internals.py @@ -81,3 +81,22 @@ def test_compute_stats(fake_profiles_path): _.requirement.profile.identifier == "a"} logger.error(stats) + + +def test_compute_stats_resolves_profile_from_extra_profiles_path(fake_profiles_path): + # ValidationStatistics.__initialise__ used to call Profile.load_profiles + # without forwarding extra_profiles_path, so any profile that lived only + # under --extra-profiles-path raised ProfileNotFound. + settings = ValidationSettings.parse({ + "profiles_path": DEFAULT_PROFILES_PATH, + "extra_profiles_path": fake_profiles_path, + "profile_identifier": "a", + "enable_profile_inheritance": True, + "allow_requirement_check_override": True, + "requirement_severity": "REQUIRED", + }) + + stats = ValidationStatistics.__initialise__(validation_settings=settings) + + assert any(p.identifier == "a" for p in stats["profiles"]), \ + "Profile 'a' from extra_profiles_path was not resolved by ValidationStatistics" diff --git a/tests/unit/test_document_loader.py b/tests/unit/test_document_loader.py new file mode 100644 index 000000000..85f03db44 --- /dev/null +++ b/tests/unit/test_document_loader.py @@ -0,0 +1,156 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for the JSON-LD document loader.""" + +from __future__ import annotations + +import io + +import pytest +import urllib3 + +from rocrate_validator.utils import document_loader +from rocrate_validator.utils.document_loader import (install_document_loader, + resolve_remote_document, + uninstall_document_loader) +from rocrate_validator.utils.http import HttpRequester, OfflineCacheMissError + + +def _urllib3_response(payload: bytes = b'{"@context": {"name": "https://schema.org/name"}}', + status: int = 200) -> urllib3.HTTPResponse: + return urllib3.HTTPResponse( + body=io.BytesIO(payload), + headers={ + "Content-Type": "application/ld+json", + "Content-Length": str(len(payload)), + }, + status=status, + preload_content=False, + decode_content=False, + ) + + +@pytest.fixture +def mock_network(monkeypatch): + from requests.adapters import HTTPAdapter + + def fake_send(self, request, **kwargs): + raw = _urllib3_response() + return self.build_response(request, raw) + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +@pytest.fixture(autouse=True) +def _cleanup(): + uninstall_document_loader() + HttpRequester.reset() + yield + uninstall_document_loader() + HttpRequester.reset() + + +def test_install_is_idempotent(tmp_path): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=-1) + assert install_document_loader() is True + assert install_document_loader() is True + assert document_loader._installed is True + + +def test_install_returns_false_on_error(tmp_path, monkeypatch): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=-1) + from rdflib.plugins.shared.jsonld import util as jsonld_util + + class _FrozenModule: + def __setattr__(self, _name, _value): + raise RuntimeError("boom") + + monkeypatch.setattr(document_loader, "jsonld_util", _FrozenModule()) + assert install_document_loader() is False + assert document_loader._installed is False + # Original module must remain untouched on failure. + assert jsonld_util.source_to_json is document_loader._original_source_to_json + + +def test_uninstall_returns_true_when_not_installed(): + assert uninstall_document_loader() is True + + +def test_uninstall_returns_false_on_error(tmp_path, monkeypatch): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=-1) + assert install_document_loader() is True + + class _FrozenModule: + def __setattr__(self, _name, _value): + raise RuntimeError("boom") + + monkeypatch.setattr(document_loader, "jsonld_util", _FrozenModule()) + assert uninstall_document_loader() is False + assert document_loader._installed is True + + +def test_resolve_remote_document_uses_http_requester(tmp_path, mock_network): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + payload, content_type = resolve_remote_document("https://example.org/context") + assert payload == {"@context": {"name": "https://schema.org/name"}} + assert content_type == "application/ld+json" + assert HttpRequester().has_cached("https://example.org/context") is True + + +def test_resolve_raises_offline_cache_miss(tmp_path): + HttpRequester.initialize_cache( + cache_path=str(tmp_path / "cache"), + cache_max_age=-1, + offline=True, + ) + with pytest.raises(OfflineCacheMissError): + resolve_remote_document("https://example.org/never-cached") + + +def test_patched_source_to_json_routes_http_urls(tmp_path, mock_network): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + install_document_loader() + from rdflib.plugins.shared.jsonld import util as jsonld_util + doc, _ = jsonld_util.source_to_json("https://example.org/context") + assert doc == {"@context": {"name": "https://schema.org/name"}} + + +def test_patched_source_to_json_ignores_non_http(tmp_path): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + install_document_loader() + from rdflib.plugins.shared.jsonld import util as jsonld_util + file_path = tmp_path / "context.jsonld" + file_path.write_text('{"@context": {"foo": "https://example.org/foo"}}') + doc, _ = jsonld_util.source_to_json(str(file_path)) + assert doc == {"@context": {"foo": "https://example.org/foo"}} + + +def test_resolve_maps_http_error_to_runtime(tmp_path, monkeypatch): + HttpRequester.initialize_cache(cache_path=str(tmp_path / "cache"), cache_max_age=60) + + class _StubResponse: + status_code = 500 + text = "" + + def json(self): + raise ValueError + + monkeypatch.setattr( + HttpRequester(), + "get", + lambda *_, **__: _StubResponse(), + ) + with pytest.raises(RuntimeError): + resolve_remote_document("https://example.org/broken") diff --git a/tests/unit/test_http_requester_offline.py b/tests/unit/test_http_requester_offline.py new file mode 100644 index 000000000..de1491328 --- /dev/null +++ b/tests/unit/test_http_requester_offline.py @@ -0,0 +1,257 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for the HttpRequester offline-mode extensions.""" + +from __future__ import annotations + +import io +from unittest.mock import MagicMock, patch + +import pytest +import urllib3 + +from rocrate_validator.utils import http as http_module +from rocrate_validator.utils.http import (OFFLINE_CACHE_MISS_STATUS, + HttpRequester) + + +def _build_urllib3_response(body: bytes = b'{"ok": true}', + status: int = 200, + content_type: str = "application/json") -> urllib3.HTTPResponse: + return urllib3.HTTPResponse( + body=io.BytesIO(body), + headers={"Content-Type": content_type, "Content-Length": str(len(body))}, + status=status, + preload_content=False, + decode_content=False, + ) + + +@pytest.fixture +def mock_network(monkeypatch): + """Route every outbound HTTP call to a fake urllib3 response.""" + from requests.adapters import HTTPAdapter + + def fake_send(self, request, **kwargs): + raw = _build_urllib3_response() + response = self.build_response(request, raw) + return response + + monkeypatch.setattr(HTTPAdapter, "send", fake_send) + + +@pytest.fixture(autouse=True) +def _reset_singleton(): + HttpRequester.reset() + yield + HttpRequester.reset() + + +def _initialize(cache_path, offline=False, cache_max_age=-1): + HttpRequester.reset() + return HttpRequester.initialize_cache( + cache_path=str(cache_path), + cache_max_age=cache_max_age, + offline=offline, + ) + + +def test_initialize_offline_sets_only_if_cached(tmp_path): + requester = _initialize(tmp_path / "cache", offline=True) + assert requester.offline is True + assert getattr(requester.session.settings, "only_if_cached", False) is True + + +def test_offline_cache_miss_returns_504(tmp_path): + requester = _initialize(tmp_path / "cache", offline=True) + response = requester.get("https://example.org/missing") + assert response.status_code == OFFLINE_CACHE_MISS_STATUS + + +def test_online_unknown_url_is_not_cached(tmp_path): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + assert requester.has_cached("https://example.org/anything") is False + + +def test_has_cached_returns_true_after_successful_fetch(tmp_path, mock_network): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + url = "https://example.org/ctx" + assert requester.has_cached(url) is False + response = requester.get(url) + assert response.status_code == 200 + assert requester.has_cached(url) is True + + +def test_offline_serves_cached_response_populated_online(tmp_path, mock_network): + cache_path = tmp_path / "cache" + requester = _initialize(cache_path, offline=False, cache_max_age=60) + url = "https://example.org/ctx" + requester.get(url) + HttpRequester.reset() + # Re-open the cache in offline mode and confirm the hit. + requester = _initialize(cache_path, offline=True) + response = requester.get(url) + assert response.status_code == 200 + assert response.content == b'{"ok": true}' + + +def test_fetch_fresh_bypasses_cache_when_online(tmp_path): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + session_mock = MagicMock() + fresh_response = MagicMock() + fresh_response.status_code = 200 + fresh_response.from_cache = False + session_mock.get.return_value = fresh_response + requester.session = session_mock + result = requester.fetch_fresh("https://example.org/fresh", allow_redirects=True) + assert result is fresh_response + session_mock.get.assert_called_once() + kwargs = session_mock.get.call_args.kwargs + assert kwargs.get("force_refresh") is True + assert kwargs.get("allow_redirects") is True + + +def test_fetch_fresh_falls_back_when_force_refresh_unsupported(tmp_path): + """Older requests_cache versions lack force_refresh; fall back to refresh.""" + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + + class _LegacySession: + def __init__(self): + self.calls: list[dict] = [] + + def get(self, url, **kwargs): + self.calls.append(kwargs) + if "force_refresh" in kwargs: + raise TypeError("unexpected keyword argument 'force_refresh'") + fake = MagicMock() + fake.status_code = 200 + fake.from_cache = False + return fake + + legacy = _LegacySession() + requester.session = legacy + response = requester.fetch_fresh("https://example.org/fresh") + assert response.status_code == 200 + assert len(legacy.calls) == 2 + assert "refresh" in legacy.calls[1] + + +def test_fetch_fresh_in_offline_does_not_refresh(tmp_path): + requester = _initialize(tmp_path / "cache", offline=True) + session_mock = MagicMock() + cached_response = MagicMock() + cached_response.status_code = 200 + cached_response.from_cache = True + session_mock.get.return_value = cached_response + requester.session = session_mock + result = requester.fetch_fresh("https://example.org/x") + assert result is cached_response + assert "force_refresh" not in session_mock.get.call_args.kwargs + assert "refresh" not in session_mock.get.call_args.kwargs + + +def test_clear_cache_empties_backend(tmp_path, mock_network): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + requester.get("https://example.org/a") + requester.get("https://example.org/b") + assert requester.cache_info()["entries"] >= 2 + requester.clear_cache() + assert requester.cache_info()["entries"] == 0 + + +def test_cache_info_reports_metadata(tmp_path): + cache_path = tmp_path / "cache" + requester = _initialize(cache_path, offline=False, cache_max_age=60) + info = requester.cache_info() + assert info["backend"] == "SQLiteCache" + assert info["path"].endswith(".sqlite") + assert info["permanent"] is True + assert info["offline"] is False + assert info["entries"] == 0 + + +class _RecordCollector: + """Context manager that attaches a capturing handler to the http logger.""" + + def __init__(self): + self.records: list = [] + + def __enter__(self): + import logging as _logging + + from rocrate_validator.utils import http as http_module + self.records.clear() + self.handler = _logging.Handler() + self.handler.setLevel(_logging.DEBUG) + self.handler.emit = lambda record: self.records.append(record) # type: ignore[assignment] + # Force initialization of the underlying logger via the proxy. + http_module.logger.warning # noqa: B018 + self._target = http_module.logger._instance + self._target.addHandler(self.handler) + self._previous_level = self._target.level + self._target.setLevel(_logging.DEBUG) + return self + + def __exit__(self, exc_type, exc, tb): + self._target.removeHandler(self.handler) + self._target.setLevel(self._previous_level) + return False + + def messages(self) -> list[str]: + return [r.getMessage() for r in self.records] + + +def test_offline_prefix_logs_remote_then_cache(tmp_path, mock_network): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + with _RecordCollector() as collector: + requester.get("https://example.org/ctx") + requester.get("https://example.org/ctx") + messages = [m for m in collector.messages() if "CachedHttpRequester:" in m] + assert any("fetched from remote" in m for m in messages) + assert any("served from HTTP cache" in m for m in messages) + + +def test_offline_prefix_logs_cache_miss_in_offline_mode(tmp_path): + requester = _initialize(tmp_path / "cache", offline=True) + with _RecordCollector() as collector: + requester.get("https://example.org/unknown") + messages = [m for m in collector.messages() if "CachedHttpRequester:" in m] + assert any("not available in HTTP cache" in m for m in messages) + + +def test_offline_prefix_logs_fetch_fresh_as_refresh(tmp_path, mock_network): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + # Populate the cache first. + requester.get("https://example.org/x") + with _RecordCollector() as collector: + requester.fetch_fresh("https://example.org/x") + messages = [m for m in collector.messages() if "CachedHttpRequester:" in m] + assert any("cache refresh" in m for m in messages) + + +def test_offline_without_requests_cache_uses_fallback_session(tmp_path, monkeypatch): + """When requests_cache is unavailable, offline mode falls back to a 504 stub.""" + original_import = __import__ + + def fake_import(name, globals=None, locals=None, fromlist=(), level=0): + if name == "requests_cache" or (fromlist and "CachedSession" in fromlist and name.endswith("requests_cache")): + raise ImportError("simulated missing dependency") + return original_import(name, globals, locals, fromlist, level) + + with patch("builtins.__import__", side_effect=fake_import): + requester = _initialize(tmp_path / "cache", offline=True) + assert isinstance(requester.session, http_module._OfflineFallbackSession) + response = requester.get("https://example.org/whatever") + assert response.status_code == OFFLINE_CACHE_MISS_STATUS diff --git a/tests/unit/test_http_requester_reconfigure.py b/tests/unit/test_http_requester_reconfigure.py new file mode 100644 index 000000000..46e05a184 --- /dev/null +++ b/tests/unit/test_http_requester_reconfigure.py @@ -0,0 +1,154 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from rocrate_validator.utils.http import HttpRequester + + +@pytest.fixture(autouse=True) +def _reset_singleton(): + HttpRequester.reset() + yield + HttpRequester.reset() + + +def _initialize(cache_path, offline=False, cache_max_age=-1): + return HttpRequester.initialize_cache( + cache_path=str(cache_path), + cache_max_age=cache_max_age, + offline=offline, + ) + + +def _fake_session(status_code=200): + """A session-like mock whose ``get`` returns a sentinel response.""" + session = MagicMock() + response = MagicMock(status_code=status_code, from_cache=False) + session.get.return_value = response + return session, response + + +def test_initialize_cache_creates_instance_when_absent(tmp_path): + assert HttpRequester._instance is None + requester = _initialize(tmp_path / "cache") + assert isinstance(requester, HttpRequester) + assert HttpRequester._instance is requester + + +def test_initialize_cache_reuses_existing_instance(tmp_path): + first = _initialize(tmp_path / "cache-1") + second = _initialize(tmp_path / "cache-2") + # The singleton is reconfigured in place rather than recreated. + assert second is first + + +def test_reconfigure_applies_new_settings(tmp_path): + requester = _initialize(tmp_path / "cache", offline=False, cache_max_age=60) + assert requester.offline is False + + same = _initialize(tmp_path / "cache", offline=True, cache_max_age=-1) + assert same is requester + assert same.offline is True + # Offline mode is enforced on the freshly rebuilt session. + assert getattr(same.session.settings, "only_if_cached", False) is True + + +def test_reconfigure_rebuilds_underlying_session(tmp_path): + requester = _initialize(tmp_path / "cache-1", cache_max_age=60) + old_session = requester.session + _initialize(tmp_path / "cache-2", cache_max_age=60) + assert requester.session is not old_session + + +def test_reconfigure_preserves_instance_attributes(tmp_path): + """Regression: reconfiguring the cache must not discard state set on the + singleton (e.g. methods patched by tests).""" + requester = _initialize(tmp_path / "cache-1", cache_max_age=60) + sentinel = object() + requester.custom_marker = sentinel + + _initialize(tmp_path / "cache-2", cache_max_age=60) + + assert requester.custom_marker is sentinel + + +def test_method_wrapper_targets_current_session(tmp_path): + """The ``__getattr__`` HTTP wrappers resolve the session at call time, so a + wrapper obtained before a session swap still hits the live session.""" + requester = _initialize(tmp_path / "cache", cache_max_age=60) + + first_session, _ = _fake_session() + requester.session = first_session + wrapper = requester.get # captured before swapping the session + + second_session, expected = _fake_session(status_code=201) + requester.session = second_session + + result = wrapper("https://example.org/x") + + assert result is expected + second_session.get.assert_called_once() + first_session.get.assert_not_called() + + +def test_pinned_wrapper_survives_reconfigure(tmp_path): + """Mimics how ``pytest.monkeypatch`` teardown leaves a method wrapper pinned + as an instance attribute: after a reconfigure rebuilds the session, that + wrapper must still target the live session, not a closed one.""" + requester = _initialize(tmp_path / "cache-1", cache_max_age=60) + requester.get = requester.get # pin the wrapper as an instance attribute + + _initialize(tmp_path / "cache-2", cache_max_age=60) # rebuilds the session + + mock_session, expected = _fake_session() + requester.session = mock_session + + result = requester.get("https://example.org/x") + + assert result is expected + mock_session.get.assert_called_once() + + +def test_reset_drops_instance(tmp_path): + requester = _initialize(tmp_path / "cache", cache_max_age=60) + HttpRequester.reset() + assert HttpRequester._instance is None + # A subsequent initialization yields a brand-new instance. + assert _initialize(tmp_path / "cache", cache_max_age=60) is not requester + + +def test_validation_settings_preserves_singleton(tmp_path): + """Constructing ``ValidationSettings`` reconfigures the cache in place and + must not drop the existing requester (nor any state held on it).""" + from rocrate_validator.models import ValidationSettings + from rocrate_validator.utils.uri import URI + + requester = _initialize(tmp_path / "cache", cache_max_age=60) + marker = object() + requester.custom_marker = marker + + # ``offline=True`` keeps the construction self-contained (no warm-up/network). + ValidationSettings( + rocrate_uri=URI("."), + offline=True, + cache_path=tmp_path / "cache", + ) + + assert HttpRequester._instance is requester + assert requester.custom_marker is marker diff --git a/tests/unit/test_offline_cache_miss_warning.py b/tests/unit/test_offline_cache_miss_warning.py new file mode 100644 index 000000000..eae79b10e --- /dev/null +++ b/tests/unit/test_offline_cache_miss_warning.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from rocrate_validator import models as models_module +from rocrate_validator.models import ValidationContext +from rocrate_validator.utils.http import OfflineCacheMissError, find_offline_cache_miss + + +# ---------- find_offline_cache_miss ---------- +def test_find_offline_cache_miss_direct(): + exc = OfflineCacheMissError("https://example.org/x") + assert find_offline_cache_miss(exc) is exc + + +def test_find_offline_cache_miss_walks_cause_chain(): + inner = OfflineCacheMissError("https://example.org/x") + try: + try: + raise inner + except OfflineCacheMissError as e: + raise RuntimeError("wrapped") from e + except Exception as outer: + found = find_offline_cache_miss(outer) + assert found is inner + + +def test_find_offline_cache_miss_walks_context_chain(): + # `raise` inside `except` without `from` populates __context__. + try: + try: + raise OfflineCacheMissError("https://example.org/y") + except OfflineCacheMissError: + raise RuntimeError("wrapped via context") + except Exception as outer: + found = find_offline_cache_miss(outer) + assert isinstance(found, OfflineCacheMissError) + assert found.url == "https://example.org/y" + + +def test_find_offline_cache_miss_returns_none_for_unrelated(): + assert find_offline_cache_miss(ValueError("nope")) is None + + +def test_find_offline_cache_miss_handles_cyclic_chain(): + # Two exceptions referencing each other must not loop forever. + a = RuntimeError("a") + b = RuntimeError("b") + a.__context__ = b + b.__context__ = a + assert find_offline_cache_miss(a) is None + + +# ---------- ValidationContext.maybe_warn_offline_cache_miss ---------- +@pytest.fixture +def bare_context(): + """A ValidationContext with only the state needed by the dedup helper.""" + ctx = ValidationContext.__new__(ValidationContext) + ctx._offline_cache_misses_warned = set() + return ctx + + +@pytest.fixture +def mock_logger(monkeypatch): + """ + Replace the module-level logger in ``rocrate_validator.models`` with a + MagicMock. The project's custom logger sets ``propagate=False``, so + pytest's ``caplog`` does not see its records โ€” observing the mock is + both simpler and more precise. + """ + fake = MagicMock() + monkeypatch.setattr(models_module, "logger", fake) + return fake + + +def test_maybe_warn_returns_false_for_unrelated_exception(bare_context, mock_logger): + assert bare_context.maybe_warn_offline_cache_miss(ValueError("nope")) is False + mock_logger.warning.assert_not_called() + + +def test_maybe_warn_emits_once_per_url(bare_context, mock_logger): + url = "https://example.org/ctx" + for _ in range(3): + assert bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url)) is True + assert mock_logger.warning.call_count == 1 + # The bare miss exception is logged via "%s" so it stringifies and the + # URL appears verbatim in the formatted message. + args, _ = mock_logger.warning.call_args + assert url in str(args[1]) + + +def test_maybe_warn_emits_once_per_distinct_url(bare_context, mock_logger): + url_a = "https://example.org/a" + url_b = "https://example.org/b" + bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url_a)) + bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url_b)) + bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url_a)) + assert mock_logger.warning.call_count == 2 + logged = " ".join(str(call.args[1]) for call in mock_logger.warning.call_args_list) + assert url_a in logged + assert url_b in logged + + +def test_maybe_warn_dedups_when_miss_is_wrapped(bare_context, mock_logger): + url = "https://example.org/ctx" + try: + raise RuntimeError("wrapped") from OfflineCacheMissError(url) + except RuntimeError as wrapped_exc: + wrapped = wrapped_exc + # First call: direct miss; warning emitted. + assert bare_context.maybe_warn_offline_cache_miss(OfflineCacheMissError(url)) is True + # Second call: same URL but reached via a wrapper exception. Must still + # be recognized through the __cause__ chain and dedup'd against the first. + assert bare_context.maybe_warn_offline_cache_miss(wrapped) is True + assert mock_logger.warning.call_count == 1 diff --git a/tests/unit/test_requirement_lifecycle.py b/tests/unit/test_requirement_lifecycle.py new file mode 100644 index 000000000..15c6d224c --- /dev/null +++ b/tests/unit/test_requirement_lifecycle.py @@ -0,0 +1,140 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from rocrate_validator import services +from rocrate_validator.models import RequirementLoader, Severity, ValidationContext, ValidationSettings +from tests.ro_crates import InvalidRootDataEntity + + +class _RequirementTypeSpy: + """Stand-in for a Requirement subclass that records lifecycle hook calls.""" + + def __init__(self, name: str, timeline: list): + self.__name__ = name + self._timeline = timeline + self.calls: list[tuple[str, ValidationContext]] = [] + + def initialize(self, context: ValidationContext) -> None: + self.calls.append(("initialize", context)) + self._timeline.append(("initialize", self.__name__)) + + def finalize(self, context: ValidationContext) -> None: + self.calls.append(("finalize", context)) + self._timeline.append(("finalize", self.__name__)) + + +@pytest.fixture +def validation_settings(): + return ValidationSettings( + rocrate_uri=str(InvalidRootDataEntity().invalid_root_type), + requirement_severity=Severity.OPTIONAL, + abort_on_first=False, + ) + + +@pytest.fixture +def lifecycle_spies(monkeypatch): + """ + Replace the registered requirement classes with two spy stand-ins. + + Returns (spies, timeline) where timeline records the global ordering + of hook invocations across all spies. + """ + timeline: list[tuple[str, str]] = [] + spies = [ + _RequirementTypeSpy("SpyTypeA", timeline), + _RequirementTypeSpy("SpyTypeB", timeline), + ] + monkeypatch.setattr( + RequirementLoader, + "__get_requirement_classes__", + staticmethod(lambda: spies), + ) + return spies, timeline + + +def test_initialize_and_finalize_called_once_per_requirement_type(lifecycle_spies, validation_settings): + """ + Check that each requirement type's initialize and + finalize hooks are called exactly once per validation run. + """ + spies, _ = lifecycle_spies + + services.validate(validation_settings) + + for spy in spies: + events = [evt for evt, _ in spy.calls] + assert events == ["initialize", "finalize"], ( + f"{spy.__name__} expected exactly one initialize then one finalize, got {events}" + ) + + +def test_lifecycle_hooks_receive_the_same_validation_context(lifecycle_spies, validation_settings): + """ + Check that all lifecycle hooks receive the same ValidationContext instance. + This ensures that the context is properly shared across all requirements. + """ + spies, _ = lifecycle_spies + + services.validate(validation_settings) + + contexts = [ctx for spy in spies for _, ctx in spy.calls] + assert contexts, "No lifecycle hook was invoked" + first = contexts[0] + assert isinstance(first, ValidationContext) + assert all(ctx is first for ctx in contexts), ( + "All initialize/finalize invocations must share the same ValidationContext" + ) + + +def test_all_initialize_hooks_run_before_any_finalize_hook(lifecycle_spies, validation_settings): + """ + Check that all initialize hooks are called before any finalize hook is called. + This ensures that the context is fully initialized before any requirement starts finalizing. + """ + _, timeline = lifecycle_spies + + services.validate(validation_settings) + + init_indices = [i for i, (evt, _) in enumerate(timeline) if evt == "initialize"] + finalize_indices = [i for i, (evt, _) in enumerate(timeline) if evt == "finalize"] + assert init_indices and finalize_indices, "Lifecycle hooks were not all triggered" + assert max(init_indices) < min(finalize_indices), ( + f"Expected every initialize to precede every finalize, got timeline {timeline}" + ) + + +def test_lifecycle_hooks_invoked_exactly_once_per_validation_run(lifecycle_spies, validation_settings): + """ + Run validation multiple times and check that each spy receives exactly one + initialize+finalize pair per run. + """ + + # extract spies from fixture + spies, _ = lifecycle_spies + + # run validation multiple times and + # check that each spy receives exactly one initialize+finalize pair per run + runs = 3 + for _ in range(runs): + services.validate(validation_settings) + + for spy in spies: + events = [evt for evt, _ in spy.calls] + assert events == ["initialize", "finalize"] * runs, ( + f"{spy.__name__} should receive exactly one initialize+finalize " + f"pair per validation run (got {events} across {runs} runs)" + ) diff --git a/tests/unit/test_rocrate.py b/tests/unit/test_rocrate.py index d91ce3f4e..69e0b4fe7 100644 --- a/tests/unit/test_rocrate.py +++ b/tests/unit/test_rocrate.py @@ -556,20 +556,104 @@ def test_entity_path_from_identifier(): quoted_entity_id = "pics/2017-06-11%2012.56.14.jpg" path = ROCrateEntity.get_path_from_identifier(quoted_entity_id, rocrate_path=rocrate_path) logger.debug(f"Quoted Entity Path: {path}") - assert str(path) == f"{rocrate_path}/pics/2017-06-11%2012.56.14.jpg", \ + assert str(path) == f"{rocrate_path}/pics/2017-06-11%2012.56.14.jpg", ( "Path should be pics/2017-06-11%2012.56.14.jpg" + ) # Test quoted entity id which does not exist within the ro-crate quoted_entity_id = "pics/2018-06-11%2012.56.14.jpg" - path = ROCrateEntity.get_path_from_identifier( - quoted_entity_id, rocrate_path=rocrate_path, decode=True) + path = ROCrateEntity.get_path_from_identifier(quoted_entity_id, rocrate_path=rocrate_path, decode=True) logger.debug(f"Quoted Entity Path: {path}") - assert str(path) == f"{rocrate_path}/pics/2018-06-11 12.56.14.jpg", \ - "Path should be pics/2018-06-11 12.56.14.jpg" + assert str(path) == f"{rocrate_path}/pics/2018-06-11 12.56.14.jpg", "Path should be pics/2018-06-11 12.56.14.jpg" # Test unquoted entity id which exists within the ro-crate unquoted_entity_id = "pics/2017-06-11 12.56.14.jpg" path = ROCrateEntity.get_path_from_identifier(unquoted_entity_id, rocrate_path=rocrate_path) logger.debug(f"Unquoted Entity Path: {path}") - assert str(path) == f"{rocrate_path}/pics/2017-06-11 12.56.14.jpg", \ - "Path should be pics/2017-06-11 12.56.14.jpg" + assert str(path) == f"{rocrate_path}/pics/2017-06-11 12.56.14.jpg", "Path should be pics/2017-06-11 12.56.14.jpg" + + +def _metadata_dict_with_id(entity_id: str) -> dict: + """Build a minimal RO-Crate metadata dict referencing a single data entity.""" + return { + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + "about": {"@id": "./"}, + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Test crate", + "hasPart": [{"@id": entity_id}], + }, + {"@id": entity_id, "@type": "File", "name": "remote-file"}, + ], + } + + +@pytest.mark.parametrize( + "entity_id", + [ + # authority-based absolute URIs (with a `//` authority component) + "scp://transfer.example.org//data/A.0.0", + "sftp://user@host/path/to/file", + "s3://bucket/key", + "https://example.org/data.txt", + "arcp://name,foo/bar", + # `file://` URIs with a (non-local) authority denote files living on + # another host (RFC 8089), so they are remote too. + "file://gs02r3b58-ib0/scratch/tmp/5190874/tmp_rf_samples_slt86rc0", + # scheme-only absolute URIs (no authority; RO-Crate 1.1 ยง 4.2.2 + RFC 3986) + "urn:doi:10.5281/zenodo.1234", + "doi:10.5281/zenodo.1234", + ], +) +def test_absolute_uri_data_entity_is_classified_as_remote(entity_id): + """ + Data entities whose @id is an absolute URI (any non-file scheme, with or + without authority) MUST be recognized as remote (web-based) data entities + so that the must/4 payload check is skipped for them. + + Regression test for issue #176. + """ + crate = ROCrate.from_metadata_dict(_metadata_dict_with_id(entity_id)) + entity = crate.metadata.get_entity(entity_id) + assert entity is not None, "Entity should be present in the metadata" + assert entity.is_remote(), f"Entity with absolute URI '{entity_id}' should be classified as remote" + assert entity in crate.metadata.get_web_data_entities(), ( + f"Entity '{entity_id}' should be listed as a web data entity" + ) + assert entity not in crate.metadata.get_data_entities(exclude_web_data_entities=True), ( + f"Entity '{entity_id}' should be excluded from local-only data entities" + ) + + +@pytest.mark.parametrize( + "entity_id", + [ + # `file://` URIs without an authority (RFC 8089) or with the special + # `localhost` authority refer to the local machine, so they describe + # local payload members that the must/4 check must still verify. + "file:///absolute/path/to/file.txt", + "file://localhost/absolute/path/to/file.txt", + ], +) +def test_local_file_uri_data_entity_is_not_remote(entity_id): + """ + `file://` Data Entity identifiers that point to the local machine (empty or + `localhost` authority) MUST NOT be treated as remote/web-based: only + `file:///...` URIs with a real host are remote (issue #176 follow-up). + """ + crate = ROCrate.from_metadata_dict(_metadata_dict_with_id(entity_id)) + entity = crate.metadata.get_entity(entity_id) + assert entity is not None, "Entity should be present in the metadata" + assert not entity.is_remote(), ( + f"Entity with local file URI '{entity_id}' should NOT be classified as remote" + ) + assert entity not in crate.metadata.get_web_data_entities(), ( + f"Entity '{entity_id}' should not be listed as a web data entity" + ) diff --git a/tests/unit/test_uri.py b/tests/unit/test_uri.py index 45ceaf7a7..d6d4990db 100644 --- a/tests/unit/test_uri.py +++ b/tests/unit/test_uri.py @@ -25,11 +25,85 @@ def test_valid_url(): uri = URI("http://example.com") assert uri.is_remote_resource() + assert uri.is_natively_checkable() + assert uri.has_supported_rocrate_scheme() + + +def test_uri_with_unknown_scheme_is_accepted_but_not_supported_as_rocrate_root(): + # Schemes outside the natively-supported set are valid URIs (they may + # appear as Data Entity identifiers, e.g. scp://, s3://) but they are + # not accepted as RO-Crate root URIs. + uri = URI("httpx:///example.com") + assert uri.is_remote_resource() + assert not uri.is_natively_checkable() + assert not uri.has_supported_rocrate_scheme() def test_invalid_url(): + # A bare token without any scheme/path separator is not a valid URI. with pytest.raises(ValueError): - URI("httpx:///example.com") + URI("") + + +def test_scp_uri_is_remote(): + uri = URI("scp://transfer.example.org//data/A.0.0") + assert uri.is_remote_resource() + assert uri.is_known_remote_scheme() + assert not uri.is_natively_checkable() + + +def test_s3_uri_is_remote(): + uri = URI("s3://bucket/key/path") + assert uri.is_remote_resource() + assert uri.is_known_remote_scheme() + assert not uri.is_natively_checkable() + + +@pytest.mark.parametrize("uri_str,expected_scheme", [ + # Scheme-only (no authority) absolute URIs are valid per RFC 3986 and + # accepted by RO-Crate 1.1 ยง 4.2.2 as Data Entity `@id` values. + ("urn:doi:10.5281/zenodo.1234", "urn"), + ("doi:10.5281/zenodo.1234", "doi"), + ("arcp://name,foo/bar", "arcp"), +]) +def test_scheme_only_absolute_uri_is_remote(uri_str, expected_scheme): + uri = URI(uri_str) + assert uri.scheme == expected_scheme + assert uri.is_remote_resource() + assert not uri.is_natively_checkable() + + +def test_file_uri_with_remote_host_is_remote(): + # A `file://` URI carrying a (non-local) authority points to a file on + # another host (RFC 8089) and must be treated as remote, not as a local + # payload member (regression for issue #176 with `file://` schemes). + uri = URI("file://gs02r3b58-ib0/scratch/tmp/5190874/tmp_rf_samples_slt86rc0") + assert uri.scheme == "file" + assert uri.is_remote_resource() + assert not uri.is_local_resource() + assert not uri.is_natively_checkable() + + +@pytest.mark.parametrize("uri_str", [ + "file:///absolute/path/file.txt", + "file://localhost/absolute/path/file.txt", +]) +def test_file_uri_to_local_host_is_local(uri_str): + # An empty or `localhost` authority denotes the local machine. + uri = URI(uri_str) + assert uri.scheme == "file" + assert uri.is_local_resource() + assert not uri.is_remote_resource() + + +@pytest.mark.parametrize("path", ["README.md", "data/file.txt", "./", "/abs/dir"]) +def test_local_path_never_gains_a_spurious_host(path): + # Plain filesystem paths are normalized to authority-less `file:` URIs, so + # the first path segment is never mistaken for a remote host. + uri = URI(path) + assert uri.is_local_resource() + assert not uri.is_remote_resource() + assert uri.get_netloc() == "" def test_url_with_query_params(): @@ -131,10 +205,12 @@ def test_rocrate_uri_remote_valid(): def test_rocrate_uri_remote_invalid(): - - with pytest.raises(ValueError) as excinfo: - URI("httpx:///example.com") - assert str(excinfo.value) == "Invalid URI: httpx:///example.com" + # An unknown scheme is a valid URI but cannot be used as an RO-Crate root. + uri = URI("httpx:///example.com") + assert not validate_rocrate_uri(uri, silent=True), \ + f"The URI {uri} should not be accepted as an RO-Crate root" + with pytest.raises(ROCrateInvalidURIError): + validate_rocrate_uri(uri, silent=False) # Test with an invalid remote URL uri = URI("https:///example.com")