From c98504d9c58833c5ce937fa565bee12facfe513e Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 18 Jun 2026 12:47:16 -0400 Subject: [PATCH] fix: prefer entity key in TOML file when restoring Prior to this commit, we would always assume the entity key would match the directory naming, but that is not a hard requirement. Furthermore, it looks like something in the backup code is adding hashes to the directories unnecessarily, so this was coming up quite often. --- .../applets/backup_restore/zipper.py | 54 +++++++++++++++---- ...8ca126.toml => section1-extra-8ca126.toml} | 2 +- ...97-f1e9-4e8f-87f0-d5a3c26083e2-extra.toml} | 2 +- .../component_versions/v2/block.xml | 0 4 files changed, 46 insertions(+), 12 deletions(-) rename tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/{section1-8ca126.toml => section1-extra-8ca126.toml} (80%) rename tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/{c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2.toml => c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2-extra.toml} (64%) rename tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/{c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2 => c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2-extra}/component_versions/v2/block.xml (100%) diff --git a/src/openedx_content/applets/backup_restore/zipper.py b/src/openedx_content/applets/backup_restore/zipper.py index 3261836c3..9dfefb309 100644 --- a/src/openedx_content/applets/backup_restore/zipper.py +++ b/src/openedx_content/applets/backup_restore/zipper.py @@ -4,6 +4,7 @@ """ import hashlib import time +import tomllib import zipfile from collections import defaultdict from dataclasses import asdict, dataclass @@ -282,7 +283,7 @@ def create_zip(self, path: str) -> None: Exception: If the learning package cannot be found or if the zip creation fails. """ - with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf: + with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zipf: # Add the package.toml file package_toml_content: str = toml_learning_package( self.learning_package, self.utc_now, user=self.user, origin_server=self.origin_server @@ -1060,7 +1061,17 @@ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]: "collections": [], } - for path in file_paths: + # This is going to map static file directory roots to the appropriate + # entity keys. + comp_paths_to_keys = {} + + # The ordering of the file processing is important because we need to + # ensure that TOML files for a given component are processed before the + # static files for that component. '.' sorts before '/', so "foo.toml" + # will sort before "foo/component_versions/v1/static/figure1.webp" or + # any other subdirectory of the "foo" component. Processing the TOML + # first allows us to map the directory to a entity key. + for path in sorted(file_paths): if path.endswith("/"): # Skip directories continue @@ -1073,21 +1084,44 @@ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]: if path.endswith(".toml"): # Component entity TOML files organized["components"].append(path) + component_toml_str = self._read_file_from_zip(path) + component_toml = tomllib.loads(component_toml_str) + entity_key = component_toml['entity']['key'] + comp_path = path[:-5] # removes the ".toml" at the end + + # This maps the root path of a component, e.g."entities/xblock.v1/html/my_component_a822bb" + # to the actual key, e.g. "xblock.v1:html:my_component". The last part of the key will + # often correlate to the directory name, but does not have to (a hash is sometimes added). + comp_paths_to_keys[comp_path] = entity_key + else: # Component static files # Path structure: entities////component_versions//static/... - # Example: entities/xblock.v1/html/my_component_123456/component_versions/v1/static/... - component_key = Path(path).parts[1:4] # e.g., ['xblock.v1', 'html', 'my_component_123456'] + # Example: entities/xblock.v1/html/my_component_a822bb/component_versions/v1/static/... + + # e.g. 'entities/xblock.v1/html/my_component_a822bb' + component_root_path = '/'.join(Path(path).parts[0:4]) + + try: + component_identifier = comp_paths_to_keys[component_root_path] + except KeyError: + self.errors.append( + { + "file": path, + "errors": "Could not find destination entity key for component static file." + } + ) + continue + num_version = Path(path).parts[5] if len(Path(path).parts) > 5 else "v1" # e.g., 'v1' - if len(component_key) == 3: - component_identifier = ":".join(component_key) - component_identifier += f":{num_version}" - organized["component_static_files"][component_identifier].append(path) - else: - self.errors.append({"file": path, "errors": "Invalid component static file path structure."}) + + component_identifier += f":{num_version}" + organized["component_static_files"][component_identifier].append(path) + elif path.startswith("collections/") and path.endswith(".toml"): # Collection TOML files organized["collections"].append(path) + return organized def _get_versions_to_write( diff --git a/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/section1-8ca126.toml b/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/section1-extra-8ca126.toml similarity index 80% rename from tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/section1-8ca126.toml rename to tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/section1-extra-8ca126.toml index 00c34d430..855902ab3 100644 --- a/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/section1-8ca126.toml +++ b/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/section1-extra-8ca126.toml @@ -1,6 +1,6 @@ [entity] can_stand_alone = true -key = "section1-8ca126" +key = "section1-8ca126" # This intentionally does not match the filename created = 2025-09-04T22:51:40.919872Z [entity.draft] diff --git a/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2.toml b/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2-extra.toml similarity index 64% rename from tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2.toml rename to tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2-extra.toml index ca759834e..bf88a740f 100644 --- a/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2.toml +++ b/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2-extra.toml @@ -1,6 +1,6 @@ [entity] can_stand_alone = true -key = "xblock.v1:html:c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2" +key = "xblock.v1:html:c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2" # This intentionally does not match the filename created = 2025-10-06T16:59:34.160314Z [entity.draft] diff --git a/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2/component_versions/v2/block.xml b/tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2-extra/component_versions/v2/block.xml similarity index 100% rename from tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2/component_versions/v2/block.xml rename to tests/openedx_content/applets/backup_restore/fixtures/library_backup/entities/xblock.v1/html/c22b9f97-f1e9-4e8f-87f0-d5a3c26083e2-extra/component_versions/v2/block.xml