From a0be7e0858c20f3f353e81297a4551e5cf09ea4d Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Tue, 16 Jun 2026 19:07:49 +0000 Subject: [PATCH 1/2] Truncate - length > width --- pyiceberg/transforms.py | 4 ++++ tests/test_transforms.py | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 739e18a6e6..9ffae05e73 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -812,6 +812,10 @@ def project(self, name: str, pred: BoundPredicate) -> UnboundPredicate | None: if isinstance(pred, BoundLiteralPredicate): return _truncate_number(name, pred, self.transform(field_type)) elif isinstance(field_type, (BinaryType, StringType)): + if isinstance(pred, BoundNotStartsWith) and len(pred.literal.value) > self.width: + # A prefix longer than the width can't be projected: the truncated partition + # holds both matching and non-matching rows, so it cannot be pruned. + return None if isinstance(pred, BoundLiteralPredicate): return _truncate_array(name, pred, self.transform(field_type)) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 96500907cf..128a42bc8d 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1026,10 +1026,21 @@ def test_projection_truncate_string_starts_with(bound_reference_str: BoundRefere def test_projection_truncate_string_not_starts_with(bound_reference_str: BoundReference) -> None: + # shorter than width: projects to not-starts-with on the untruncated prefix assert TruncateTransform(2).project( - "name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello")) + "name", BoundNotStartsWith(term=bound_reference_str, literal=literal("h")) + ) == NotStartsWith(term="name", literal=literal("h")) + + # equal to width: projects to not-starts-with on the full prefix + assert TruncateTransform(2).project( + "name", BoundNotStartsWith(term=bound_reference_str, literal=literal("he")) ) == NotStartsWith(term="name", literal=literal("he")) + # longer than width: can't be projected, so the partition is always read + assert ( + TruncateTransform(2).project("name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))) is None + ) + def _test_projection(lhs: UnboundPredicate | None, rhs: UnboundPredicate | None) -> None: assert type(lhs) is type(lhs), f"Different classes: {type(lhs)} != {type(rhs)}" From e73ca38a190d439a76e5b8b342e98304ddc7ecfd Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Tue, 16 Jun 2026 19:28:37 +0000 Subject: [PATCH 2/2] linters --- tests/test_transforms.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 128a42bc8d..0e7836e293 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1037,9 +1037,7 @@ def test_projection_truncate_string_not_starts_with(bound_reference_str: BoundRe ) == NotStartsWith(term="name", literal=literal("he")) # longer than width: can't be projected, so the partition is always read - assert ( - TruncateTransform(2).project("name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))) is None - ) + assert TruncateTransform(2).project("name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))) is None def _test_projection(lhs: UnboundPredicate | None, rhs: UnboundPredicate | None) -> None: