Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pyiceberg/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,10 @@ def project(self, name: str, pred: BoundPredicate) -> UnboundPredicate | None:
if isinstance(pred, BoundLiteralPredicate):
return _truncate_number(name, pred, self.transform(field_type))
elif isinstance(field_type, (BinaryType, StringType)):
if isinstance(pred, BoundNotStartsWith) and len(pred.literal.value) > self.width:
# A prefix longer than the width can't be projected: the truncated partition
# holds both matching and non-matching rows, so it cannot be pruned.
return None
if isinstance(pred, BoundLiteralPredicate):
return _truncate_array(name, pred, self.transform(field_type))

Expand Down
11 changes: 10 additions & 1 deletion tests/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,10 +1026,19 @@ def test_projection_truncate_string_starts_with(bound_reference_str: BoundRefere


def test_projection_truncate_string_not_starts_with(bound_reference_str: BoundReference) -> None:
# shorter than width: projects to not-starts-with on the untruncated prefix
assert TruncateTransform(2).project(
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("h"))
) == NotStartsWith(term="name", literal=literal("h"))

# equal to width: projects to not-starts-with on the full prefix
assert TruncateTransform(2).project(
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("he"))
) == NotStartsWith(term="name", literal=literal("he"))

# longer than width: can't be projected, so the partition is always read
assert TruncateTransform(2).project("name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))) is None


def _test_projection(lhs: UnboundPredicate | None, rhs: UnboundPredicate | None) -> None:
assert type(lhs) is type(lhs), f"Different classes: {type(lhs)} != {type(rhs)}"
Expand Down