Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 28 additions & 34 deletions .github/bump_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Infer semver bump from towncrier fragment types and update version."""

import json
import re
import subprocess
import sys
Expand Down Expand Up @@ -116,49 +117,42 @@ def update_file(path: Path, new_version: str):
print(f" Updated {path}")


def sync_release_manifest_versions(manifest_dir: Path, new_version: str):
if not manifest_dir.exists():
def sync_bundle_versions(bundle_path: Path, new_version: str):
if not bundle_path.exists():
return

for manifest_path in sorted(manifest_dir.glob("*.json")):
country_id = manifest_path.stem
text = manifest_path.read_text()
updated = text
updated, bundle_id_replacements = re.subn(
r'("bundle_id"\s*:\s*")[^"]+(")',
rf"\g<1>{country_id}-{new_version}\g<2>",
updated,
count=1,
bundle = json.loads(bundle_path.read_text())
required = ["bundle_version", "policyengine_version", "packages"]
missing = [field for field in required if field not in bundle]
if missing:
print(
f"Could not update {bundle_path}: missing fields {', '.join(missing)}",
file=sys.stderr,
)
updated, policyengine_version_replacements = re.subn(
r'("policyengine_version"\s*:\s*")[^"]+(")',
rf"\g<1>{new_version}\g<2>",
updated,
count=1,
sys.exit(1)
bundle["bundle_version"] = new_version
bundle["policyengine_version"] = new_version
try:
bundle["packages"]["policyengine"]["version"] = new_version
except KeyError:
print(
f"Could not update {bundle_path}: missing packages.policyengine.version",
file=sys.stderr,
)
missing_fields = []
if bundle_id_replacements == 0:
missing_fields.append("bundle_id")
if policyengine_version_replacements == 0:
missing_fields.append("policyengine_version")
if missing_fields:
print(
f"Could not update {manifest_path}: missing fields "
f"{', '.join(missing_fields)}",
file=sys.stderr,
)
sys.exit(1)
if updated != text:
manifest_path.write_text(updated)
print(f" Updated {manifest_path}")
sys.exit(1)
for country_id, data_release in bundle.get("data_releases", {}).items():
if isinstance(data_release, dict):
data_release["policyengine_version"] = new_version
data_release["bundle_id"] = f"{country_id}-{new_version}"
bundle_path.write_text(json.dumps(bundle, indent=2, sort_keys=True) + "\n")
print(f" Updated {bundle_path}")


def main():
root = Path(__file__).resolve().parent.parent
pyproject = root / "pyproject.toml"
changelog = root / "CHANGELOG.md"
changelog_dir = root / "changelog.d"
manifest_dir = root / "src" / "policyengine" / "data" / "release_manifests"
bundle_path = root / "src" / "policyengine" / "data" / "bundle" / "manifest.json"

current = get_current_version(pyproject, changelog, root)
bump = infer_bump(changelog_dir)
Expand All @@ -167,7 +161,7 @@ def main():
print(f"Version: {current} -> {new} ({bump})")

update_file(pyproject, new)
sync_release_manifest_versions(manifest_dir, new)
sync_bundle_versions(bundle_path, new)


if __name__ == "__main__":
Expand Down
42 changes: 42 additions & 0 deletions .github/workflows/pr_code_changes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ on:
paths:
- src/**
- tests/**
- scripts/**
- .github/**
- changelog.d/**
- pyproject.toml
- src/policyengine/data/bundle/manifest.json
workflow_dispatch:

jobs:
Expand Down Expand Up @@ -73,6 +76,45 @@ jobs:
run: uv pip install --system . h5py
- name: Smoke-import core modules
run: python -c "import policyengine; from policyengine.core import Dataset, Policy, Simulation; from policyengine.outputs import aggregate, poverty, inequality; print('import OK')"
BundleVerification:
name: Verify bundle metadata
runs-on: ubuntu-latest
env:
POLICYENGINE_SKIP_COUNTRY_IMPORTS: "1"
steps:
- uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v8.1.0
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.13'
- name: Check derived bundle metadata
run: python scripts/bundle.py check
- name: Install bundle package scaffold
run: |
uv pip install -e ".[models]" --system
python - <<'PY'
import subprocess
import sys

from policyengine import bundle

requirements = [
requirement
for requirement in bundle.bundle_install_requirements(
countries=["us", "uk"]
)
if not requirement.startswith("policyengine==")
]
subprocess.check_call(
[sys.executable, "-m", "pip", "install", *requirements]
)
PY
- name: Check installed package consistency
run: python -m pip check
- name: Verify bundle packages
run: policyengine bundle verify --country us --country uk --packages-only --json
Test:
runs-on: macos-latest
strategy:
Expand Down
21 changes: 18 additions & 3 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,17 @@ jobs:
python-version: '3.13'
- name: Build changelog
run: pip install yaml-changelog towncrier && make changelog
- name: Generate derived bundle metadata
run: python scripts/bundle.py generate
- name: Preview changelog update
run: ".github/get-changelog-diff.sh"
- name: Install package for TRO regeneration
run: pip install -e . h5py
- name: Regenerate bundled TRACE TROs
env:
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
run: python scripts/generate_trace_tros.py
- name: Update changelog and TROs
run: python scripts/bundle.py generate --include-tros
- name: Update changelog, bundle metadata, and TROs
uses: EndBug/add-and-commit@v9
with:
add: "."
Expand Down Expand Up @@ -154,6 +156,15 @@ jobs:
run: ".github/publish-git-tag.sh"
- name: Build package
run: python -m build
- name: Export bundle release assets
run: python scripts/export_bundle_release_assets.py --dist-dir dist
- name: Verify bundle package metadata
env:
POLICYENGINE_SKIP_COUNTRY_IMPORTS: "1"
run: |
VERSION=$(python .github/fetch_version.py)
policyengine bundle verify --country us --country uk --packages-only --json \
> "dist/policyengine-bundle-$VERSION.verification.json"
- name: Publish a Python distribution to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
Expand All @@ -166,4 +177,8 @@ jobs:
gh release create "$VERSION" \
--title "v$VERSION" \
--notes "See [CHANGELOG.md](https://github.com/PolicyEngine/policyengine.py/blob/main/CHANGELOG.md) for details." \
--latest
--latest \
"dist/policyengine-bundle-$VERSION.json" \
"dist/policyengine-bundle-$VERSION.constraints.txt" \
"dist/policyengine-bundle-$VERSION.citation.txt" \
"dist/policyengine-bundle-$VERSION.verification.json"
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,19 @@ pip install policyengine[uk] # UK model only
pip install policyengine[us] # US model only
```

For a certified package-plus-dataset bundle, use the bundle installer as the
single setup command:

```bash
uvx --from policyengine policyengine bundle install
```

This installs the bundled package scaffold with pip, downloads the certified US
and UK datasets into `./data`, and writes a local receipt that can be checked
with `policyengine bundle status`. When run from `uvx` or `pipx`, the installer
creates or reuses `./.venv`; inside an existing virtualenv or conda environment,
it installs into the active environment.

### For development

```bash
Expand All @@ -111,6 +124,7 @@ uv pip install -e .[dev] # install with dev dependencies (pytest, ruff, m
| **Library user** | `pip install policyengine` | Using the package in your own code |
| **UK only** | `pip install policyengine[uk]` | Only need UK simulations |
| **US only** | `pip install policyengine[us]` | Only need US simulations |
| **Certified bundle** | `uvx --from policyengine policyengine bundle install` | Reproducible model-plus-data setup |
| **Developer** | `uv pip install -e .[dev]` | Contributing to the package |

### Common commands
Expand Down
1 change: 1 addition & 0 deletions changelog.d/bundle-system.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add PolicyEngine bundle metadata and CLI support for installing, inspecting, and verifying a cited package-plus-dataset release.
100 changes: 100 additions & 0 deletions docs/bundles.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# PolicyEngine bundles

A PolicyEngine bundle is the exact first-party package set and certified
dataset set for a `policyengine` release. The bundle version is the
`policyengine` version.

Regular package installation remains standard pip:

```bash
pip install "policyengine==4.19.1"
pip install "policyengine[us]==4.19.1"
pip install "policyengine[uk]==4.19.1"
```

For a certified model-plus-data install, run the bundle installer as the single
setup command:

```bash
uvx --from policyengine==4.19.1 policyengine bundle install 4.19.1
```

With no version pin, `uvx` uses the newest published `policyengine` release:

```bash
uvx --from policyengine policyengine bundle install
```

When run from `uvx` or `pipx`, the installer creates or reuses `./.venv`.
Inside an existing virtualenv or conda environment, it installs into that active
environment. The installer then installs the
exact bundled package scaffold with pip, downloads certified US and UK datasets
into `./data`, moves replaced dataset files into
`./data/.policyengine-bundle-backups/<timestamp>/`, and writes a
`./data/.policyengine-bundle-receipt.json` receipt that records the target
Python.

Country-specific and package-only installs are supported:

```bash
uvx --from policyengine policyengine bundle install --country uk
uvx --from policyengine policyengine bundle install --no-datasets
```

Use `--yes` for CI/CD. Without `--yes`, dataset downloads ask for confirmation.

The canonical bundle manifest is `src/policyengine/data/bundle/manifest.json`.
Derived artifacts are:

- `pyproject.toml` extras
- `src/policyengine/data/bundle/{country}.trace.tro.jsonld`
- GitHub release assets exported from the bundle manifest

Inspect or verify a local setup with:

```bash
uvx --from policyengine policyengine bundle status --data-dir ./data
uvx --from policyengine policyengine bundle verify 4.19.1 --data-dir ./data
policyengine bundle manifest 4.19.1
```

`status` and `verify` read the receipt and inspect the Python environment that
`install` targeted. Use `--venv` or `--python` only to inspect a different
target explicitly.

## Bundle-only PRs

Run:

```bash
python scripts/bundle.py update-packages \
--core 3.27.0 \
--us 1.730.0 \
--uk 2.91.0 \
--us-data 1.118.0
```

To certify a new data release from a data-producer manifest, run:

```bash
python scripts/bundle.py certify-data \
--country uk \
--data-producer populace \
--manifest-uri hf://dataset/policyengine/populace-uk-private@<release>/releases/<release>/release_manifest.json
```

Use `python scripts/bundle.py generate` to regenerate derived bundle metadata,
and `python scripts/bundle.py generate --include-tros` when TRACE TRO sidecars
should also be regenerated. Private data releases require `HUGGING_FACE_TOKEN`
or `HF_TOKEN` for TRO regeneration.

This updates bundle metadata and creates a patch changelog fragment. Do not bump
the `policyengine` version manually in the PR; the existing release workflow
bumps the package and bundle versions together after merge.

CI checks derived bundle metadata, installs the package scaffold from the
bundle manifest, runs `pip check`, and verifies the packaged bundle metadata
with lightweight URI checks. Dataset downloads are handled by
`policyengine bundle install`, so certified UK data can be pinned by manifest
version and downloaded from Hugging Face even when the matching
`policyengine-uk-data` package is not published to PyPI.
12 changes: 6 additions & 6 deletions docs/data-publishing-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ pairing does not:
`policyengine.py` release. **These are operational aliases, not
a scientific citation surface** — release bundles remain the
thing papers cite.
4. **Simpler refresh mechanics.** `refresh_release_bundle(country,
...)` becomes "fetch channel → read manifest → write the
certified release manifest" with no sha256 juggling.
4. **Simpler certification mechanics.** `certify_data_release(country,
data_producer=...)` becomes "fetch producer manifest → validate → write
the certified bundle data release" with no sha256 juggling.

Notably absent from that list compared to earlier drafts: **no
claim of org-independent build identity**, **no claim of
Expand Down Expand Up @@ -145,9 +145,9 @@ different things to four different audiences.

- The certification process (who signs off, what validations, what
compatibility checks) — unchanged.
- `src/policyengine/data/release_manifests/{country}.json` remains
the shipped record of what a given `policyengine.py` release
guarantees.
- `src/policyengine/data/bundle/manifest.json` remains the source record of
what a given `policyengine.py` release guarantees and is packaged directly
into the wheel.
- The staged `provisional → certified → retired` lifecycle —
unchanged.
- `*.trace.tro.jsonld` sidecars — unchanged (shorter to build
Expand Down
Loading
Loading