diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py index f3cfabf1a5..c11fc93e3b 100644 --- a/pyiceberg/expressions/visitors.py +++ b/pyiceberg/expressions/visitors.py @@ -1844,31 +1844,37 @@ def visit_is_nan(self, term: BoundTerm) -> BooleanExpression: def visit_not_nan(self, term: BoundTerm) -> BooleanExpression: val = term.eval(self.struct) - if isinstance(val, SupportsFloat) and not math.isnan(val): - return self.visit_true() - else: + # Mirror _ExpressionEvaluator.visit_not_nan (val == val): only NaN fails not-NaN. + # A null (and any non-float value) is not NaN, so the predicate holds. + if isinstance(val, SupportsFloat) and math.isnan(val): return self.visit_false() + else: + return self.visit_true() def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression: - if term.eval(self.struct) < literal.value: + value = term.eval(self.struct) + if value is not None and value < literal.value: return self.visit_true() else: return self.visit_false() def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression: - if term.eval(self.struct) <= literal.value: + value = term.eval(self.struct) + if value is not None and value <= literal.value: return self.visit_true() else: return self.visit_false() def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression: - if term.eval(self.struct) > literal.value: + value = term.eval(self.struct) + if value is not None and value > literal.value: return self.visit_true() else: return self.visit_false() def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression: - if term.eval(self.struct) >= literal.value: + value = term.eval(self.struct) + if value is not None and value >= literal.value: return self.visit_true() else: return self.visit_false() diff --git a/tests/expressions/test_residual_evaluator.py b/tests/expressions/test_residual_evaluator.py index ba0a0da2e5..63122cc11c 100644 --- a/tests/expressions/test_residual_evaluator.py +++ b/tests/expressions/test_residual_evaluator.py @@ -28,6 +28,7 @@ IsNaN, IsNull, LessThan, + LessThanOrEqual, NotIn, NotNaN, NotNull, @@ -211,7 +212,7 @@ def test_is_not_nan() -> None: res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema) residual = res_eval.residual_for(Record(None)) - assert residual == AlwaysFalse() + assert residual == AlwaysTrue() residual = res_eval.residual_for(Record(2)) assert residual == AlwaysTrue() @@ -223,12 +224,26 @@ def test_is_not_nan() -> None: res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema) residual = res_eval.residual_for(Record(None)) - assert residual == AlwaysFalse() + assert residual == AlwaysTrue() residual = res_eval.residual_for(Record(2)) assert residual == AlwaysTrue() +def test_comparison_residual_with_null_partition_value() -> None: + # Regression test for https://github.com/apache/iceberg-python/issues/3498 + # A nullable identity-partitioned column whose partition value is None must not raise a + # TypeError when compared against a literal; it should behave like row evaluation, where a + # null value never satisfies an ordering predicate. + schema = Schema(NestedField(50, "x", IntegerType(), required=False), NestedField(51, "hour", IntegerType())) + spec = PartitionSpec(PartitionField(50, 1050, IdentityTransform(), "x_part")) + + for predicate in (LessThan("x", 1), LessThanOrEqual("x", 1), GreaterThan("x", 1), GreaterThanOrEqual("x", 1)): + res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema) + residual = res_eval.residual_for(Record(None)) + assert residual == AlwaysFalse(), f"null partition value should not match {predicate}" + + def test_not_in_timestamp() -> None: schema = Schema(NestedField(50, "ts", TimestampType()), NestedField(51, "dateint", IntegerType()))