diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index fb5622c99f7a13..be3ab8e7c30cb5 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -116,6 +116,8 @@ PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t); PyAPI_FUNC(_PyStackRef) _PyCompactLong_Add(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(_PyStackRef) _PyCompactLong_Multiply(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(_PyStackRef) _PyCompactLong_Subtract(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(_PyStackRef) _PyCompactLong_AddWide(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(_PyStackRef) _PyCompactLong_SubtractWide(PyLongObject *left, PyLongObject *right); // Export for 'binascii' shared extension. PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; @@ -346,6 +348,102 @@ _PyLong_CheckExactAndCompact(PyObject *op) return PyLong_CheckExact(op) && _PyLong_IsCompact((const PyLongObject *)op); } +/* Max number of digits a PyLong can have and still fit in int64_t. + * 30-bit builds: ceil(64/30) = 3. 15-bit builds: ceil(64/15) = 5. */ +#define _PY_LONG_MAX_DIGITS_FOR_INT64 ((64 + PyLong_SHIFT - 1) / PyLong_SHIFT) + +/* Return 1 if v fits in int64_t. Does not require exact type. */ +static inline int +_PyLong_FitsInt64(const PyLongObject *v) +{ + uintptr_t tag = v->long_value.lv_tag; + /* Fast path: digit count is strictly below the max — always fits. */ + if (tag < ((uintptr_t)_PY_LONG_MAX_DIGITS_FOR_INT64 << NON_SIZE_BITS)) { + return 1; + } + Py_ssize_t ndigits = (Py_ssize_t)(tag >> NON_SIZE_BITS); + if (ndigits > _PY_LONG_MAX_DIGITS_FOR_INT64) { + return 0; + } + /* ndigits == _PY_LONG_MAX_DIGITS_FOR_INT64: check the top digit. */ + unsigned int shift = PyLong_SHIFT * (unsigned int)(ndigits - 1); + uint64_t top = (uint64_t)v->long_value.ob_digit[ndigits - 1]; + if ((tag & SIGN_MASK) == SIGN_NEGATIVE) { + uint64_t max_top = ((uint64_t)INT64_MAX + 1) >> shift; + if (top < max_top) { + return 1; + } + if (top > max_top) { + return 0; + } + /* top == max_top: only INT64_MIN has all lower digits == 0. */ + for (Py_ssize_t i = 0; i < ndigits - 1; i++) { + if (v->long_value.ob_digit[i] != 0) { + return 0; + } + } + return 1; + } + uint64_t max_top = (uint64_t)INT64_MAX >> shift; + return top <= max_top; +} + +/* Extract an exact int to int64_t without raising. + * Returns true and writes *out on success; returns false if out of range. + * Never sets a Python exception. */ +static inline bool +_PyLong_TryAsInt64Exact(PyLongObject *v, int64_t *out) +{ + assert(PyLong_CheckExact((PyObject *)v)); + uintptr_t tag = v->long_value.lv_tag; + int sign = 1 - (int)(tag & SIGN_MASK); + /* Compact (0 or 1 digit): fast, branchless extraction. */ + if (tag < (2u << NON_SIZE_BITS)) { + *out = (int64_t)(sign * (Py_ssize_t)v->long_value.ob_digit[0]); + return true; + } + Py_ssize_t ndigits = (Py_ssize_t)(tag >> NON_SIZE_BITS); + if (ndigits > _PY_LONG_MAX_DIGITS_FOR_INT64) { + return false; + } + uint64_t abs_val = 0; +#if PyLong_SHIFT == 30 + if (ndigits == 2) { + /* Most common non-compact case on 64-bit builds. */ + abs_val = (uint64_t)v->long_value.ob_digit[0] | + ((uint64_t)v->long_value.ob_digit[1] << 30); + *out = sign < 0 ? -(int64_t)abs_val : (int64_t)abs_val; + return true; + } +#endif + unsigned int shift = 0; + for (Py_ssize_t i = 0; i < ndigits - 1; i++) { + abs_val |= (uint64_t)v->long_value.ob_digit[i] << shift; + shift += PyLong_SHIFT; + } + uint64_t top = (uint64_t)v->long_value.ob_digit[ndigits - 1]; + /* Prevent UB from an oversized shift when at the maximum digit count. */ + if (ndigits == _PY_LONG_MAX_DIGITS_FOR_INT64 && (top >> (64 - shift)) != 0) { + return false; + } + abs_val |= top << shift; + if (abs_val <= (uint64_t)INT64_MAX) { + *out = sign < 0 ? -(int64_t)abs_val : (int64_t)abs_val; + return true; + } + if (sign < 0 && abs_val == (uint64_t)INT64_MAX + 1) { + *out = INT64_MIN; + return true; + } + return false; +} + +static inline int +_PyLong_CheckExactAndFitsInt64(PyObject *op) +{ + return PyLong_CheckExact(op) && _PyLong_FitsInt64((const PyLongObject *)op); +} + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d2e29a1b95ede2..7d124f093d096c 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -44,6 +44,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 2; case BINARY_OP_ADD_INT: return 2; + case BINARY_OP_ADD_INT_WIDE: + return 2; case BINARY_OP_ADD_UNICODE: return 2; case BINARY_OP_EXTEND: @@ -72,6 +74,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 2; case BINARY_OP_SUBTRACT_INT: return 2; + case BINARY_OP_SUBTRACT_INT_WIDE: + return 2; case BINARY_SLICE: return 3; case BUILD_INTERPOLATION: @@ -545,6 +549,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case BINARY_OP_ADD_INT: return 1; + case BINARY_OP_ADD_INT_WIDE: + return 1; case BINARY_OP_ADD_UNICODE: return 1; case BINARY_OP_EXTEND: @@ -573,6 +579,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case BINARY_OP_SUBTRACT_INT: return 1; + case BINARY_OP_SUBTRACT_INT_WIDE: + return 1; case BINARY_SLICE: return 1; case BUILD_INTERPOLATION: @@ -1115,6 +1123,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP] = { true, INSTR_FMT_IBC0000, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, + [BINARY_OP_ADD_INT_WIDE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1129,6 +1138,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_SUBSCR_USTR_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, + [BINARY_OP_SUBTRACT_INT_WIDE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_INTERPOLATION] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1573,6 +1583,7 @@ const char *_PyOpcode_OpName[267] = { [BINARY_OP] = "BINARY_OP", [BINARY_OP_ADD_FLOAT] = "BINARY_OP_ADD_FLOAT", [BINARY_OP_ADD_INT] = "BINARY_OP_ADD_INT", + [BINARY_OP_ADD_INT_WIDE] = "BINARY_OP_ADD_INT_WIDE", [BINARY_OP_ADD_UNICODE] = "BINARY_OP_ADD_UNICODE", [BINARY_OP_EXTEND] = "BINARY_OP_EXTEND", [BINARY_OP_INPLACE_ADD_UNICODE] = "BINARY_OP_INPLACE_ADD_UNICODE", @@ -1587,6 +1598,7 @@ const char *_PyOpcode_OpName[267] = { [BINARY_OP_SUBSCR_USTR_INT] = "BINARY_OP_SUBSCR_USTR_INT", [BINARY_OP_SUBTRACT_FLOAT] = "BINARY_OP_SUBTRACT_FLOAT", [BINARY_OP_SUBTRACT_INT] = "BINARY_OP_SUBTRACT_INT", + [BINARY_OP_SUBTRACT_INT_WIDE] = "BINARY_OP_SUBTRACT_INT_WIDE", [BINARY_SLICE] = "BINARY_SLICE", [BUILD_INTERPOLATION] = "BUILD_INTERPOLATION", [BUILD_LIST] = "BUILD_LIST", @@ -1856,8 +1868,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [125] = 125, [126] = 126, [127] = 127, - [219] = 219, - [220] = 220, [221] = 221, [222] = 222, [223] = 223, @@ -1873,6 +1883,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [BINARY_OP] = BINARY_OP, [BINARY_OP_ADD_FLOAT] = BINARY_OP, [BINARY_OP_ADD_INT] = BINARY_OP, + [BINARY_OP_ADD_INT_WIDE] = BINARY_OP, [BINARY_OP_ADD_UNICODE] = BINARY_OP, [BINARY_OP_EXTEND] = BINARY_OP, [BINARY_OP_INPLACE_ADD_UNICODE] = BINARY_OP, @@ -1887,6 +1898,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [BINARY_OP_SUBSCR_USTR_INT] = BINARY_OP, [BINARY_OP_SUBTRACT_FLOAT] = BINARY_OP, [BINARY_OP_SUBTRACT_INT] = BINARY_OP, + [BINARY_OP_SUBTRACT_INT_WIDE] = BINARY_OP, [BINARY_SLICE] = BINARY_SLICE, [BUILD_INTERPOLATION] = BUILD_INTERPOLATION, [BUILD_LIST] = BUILD_LIST, @@ -2117,8 +2129,6 @@ const uint8_t _PyOpcode_Deopt[256] = { case 125: \ case 126: \ case 127: \ - case 219: \ - case 220: \ case 221: \ case 222: \ case 223: \ diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index 758542720acf31..877e89fae02be9 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -133,94 +133,96 @@ extern "C" { #define RESUME 128 #define BINARY_OP_ADD_FLOAT 129 #define BINARY_OP_ADD_INT 130 -#define BINARY_OP_ADD_UNICODE 131 -#define BINARY_OP_EXTEND 132 -#define BINARY_OP_MULTIPLY_FLOAT 133 -#define BINARY_OP_MULTIPLY_INT 134 -#define BINARY_OP_SUBSCR_DICT 135 -#define BINARY_OP_SUBSCR_GETITEM 136 -#define BINARY_OP_SUBSCR_LIST_INT 137 -#define BINARY_OP_SUBSCR_LIST_SLICE 138 -#define BINARY_OP_SUBSCR_STR_INT 139 -#define BINARY_OP_SUBSCR_TUPLE_INT 140 -#define BINARY_OP_SUBSCR_USTR_INT 141 -#define BINARY_OP_SUBTRACT_FLOAT 142 -#define BINARY_OP_SUBTRACT_INT 143 -#define CALL_ALLOC_AND_ENTER_INIT 144 -#define CALL_BOUND_METHOD_EXACT_ARGS 145 -#define CALL_BOUND_METHOD_GENERAL 146 -#define CALL_BUILTIN_CLASS 147 -#define CALL_BUILTIN_FAST 148 -#define CALL_BUILTIN_FAST_WITH_KEYWORDS 149 -#define CALL_BUILTIN_O 150 -#define CALL_EX_NON_PY_GENERAL 151 -#define CALL_EX_PY 152 -#define CALL_ISINSTANCE 153 -#define CALL_KW_BOUND_METHOD 154 -#define CALL_KW_NON_PY 155 -#define CALL_KW_PY 156 -#define CALL_LEN 157 -#define CALL_LIST_APPEND 158 -#define CALL_METHOD_DESCRIPTOR_FAST 159 -#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 160 -#define CALL_METHOD_DESCRIPTOR_NOARGS 161 -#define CALL_METHOD_DESCRIPTOR_O 162 -#define CALL_NON_PY_GENERAL 163 -#define CALL_PY_EXACT_ARGS 164 -#define CALL_PY_GENERAL 165 -#define CALL_STR_1 166 -#define CALL_TUPLE_1 167 -#define CALL_TYPE_1 168 -#define COMPARE_OP_FLOAT 169 -#define COMPARE_OP_INT 170 -#define COMPARE_OP_STR 171 -#define CONTAINS_OP_DICT 172 -#define CONTAINS_OP_SET 173 -#define FOR_ITER_GEN 174 -#define FOR_ITER_LIST 175 -#define FOR_ITER_RANGE 176 -#define FOR_ITER_TUPLE 177 -#define FOR_ITER_VIRTUAL 178 -#define GET_ITER_SELF 179 -#define GET_ITER_VIRTUAL 180 -#define JUMP_BACKWARD_JIT 181 -#define JUMP_BACKWARD_NO_JIT 182 -#define LOAD_ATTR_CLASS 183 -#define LOAD_ATTR_CLASS_WITH_METACLASS_CHECK 184 -#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 185 -#define LOAD_ATTR_INSTANCE_VALUE 186 -#define LOAD_ATTR_METHOD_LAZY_DICT 187 -#define LOAD_ATTR_METHOD_NO_DICT 188 -#define LOAD_ATTR_METHOD_WITH_VALUES 189 -#define LOAD_ATTR_MODULE 190 -#define LOAD_ATTR_NONDESCRIPTOR_NO_DICT 191 -#define LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 192 -#define LOAD_ATTR_PROPERTY 193 -#define LOAD_ATTR_SLOT 194 -#define LOAD_ATTR_WITH_HINT 195 -#define LOAD_GLOBAL_BUILTIN 196 -#define LOAD_GLOBAL_MODULE 197 -#define LOAD_SUPER_ATTR_ATTR 198 -#define LOAD_SUPER_ATTR_METHOD 199 -#define RESUME_CHECK 200 -#define RESUME_CHECK_JIT 201 -#define SEND_ASYNC_GEN 202 -#define SEND_GEN 203 -#define SEND_VIRTUAL 204 -#define STORE_ATTR_INSTANCE_VALUE 205 -#define STORE_ATTR_SLOT 206 -#define STORE_ATTR_WITH_HINT 207 -#define STORE_SUBSCR_DICT 208 -#define STORE_SUBSCR_LIST_INT 209 -#define TO_BOOL_ALWAYS_TRUE 210 -#define TO_BOOL_BOOL 211 -#define TO_BOOL_INT 212 -#define TO_BOOL_LIST 213 -#define TO_BOOL_NONE 214 -#define TO_BOOL_STR 215 -#define UNPACK_SEQUENCE_LIST 216 -#define UNPACK_SEQUENCE_TUPLE 217 -#define UNPACK_SEQUENCE_TWO_TUPLE 218 +#define BINARY_OP_ADD_INT_WIDE 131 +#define BINARY_OP_ADD_UNICODE 132 +#define BINARY_OP_EXTEND 133 +#define BINARY_OP_MULTIPLY_FLOAT 134 +#define BINARY_OP_MULTIPLY_INT 135 +#define BINARY_OP_SUBSCR_DICT 136 +#define BINARY_OP_SUBSCR_GETITEM 137 +#define BINARY_OP_SUBSCR_LIST_INT 138 +#define BINARY_OP_SUBSCR_LIST_SLICE 139 +#define BINARY_OP_SUBSCR_STR_INT 140 +#define BINARY_OP_SUBSCR_TUPLE_INT 141 +#define BINARY_OP_SUBSCR_USTR_INT 142 +#define BINARY_OP_SUBTRACT_FLOAT 143 +#define BINARY_OP_SUBTRACT_INT 144 +#define BINARY_OP_SUBTRACT_INT_WIDE 145 +#define CALL_ALLOC_AND_ENTER_INIT 146 +#define CALL_BOUND_METHOD_EXACT_ARGS 147 +#define CALL_BOUND_METHOD_GENERAL 148 +#define CALL_BUILTIN_CLASS 149 +#define CALL_BUILTIN_FAST 150 +#define CALL_BUILTIN_FAST_WITH_KEYWORDS 151 +#define CALL_BUILTIN_O 152 +#define CALL_EX_NON_PY_GENERAL 153 +#define CALL_EX_PY 154 +#define CALL_ISINSTANCE 155 +#define CALL_KW_BOUND_METHOD 156 +#define CALL_KW_NON_PY 157 +#define CALL_KW_PY 158 +#define CALL_LEN 159 +#define CALL_LIST_APPEND 160 +#define CALL_METHOD_DESCRIPTOR_FAST 161 +#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 162 +#define CALL_METHOD_DESCRIPTOR_NOARGS 163 +#define CALL_METHOD_DESCRIPTOR_O 164 +#define CALL_NON_PY_GENERAL 165 +#define CALL_PY_EXACT_ARGS 166 +#define CALL_PY_GENERAL 167 +#define CALL_STR_1 168 +#define CALL_TUPLE_1 169 +#define CALL_TYPE_1 170 +#define COMPARE_OP_FLOAT 171 +#define COMPARE_OP_INT 172 +#define COMPARE_OP_STR 173 +#define CONTAINS_OP_DICT 174 +#define CONTAINS_OP_SET 175 +#define FOR_ITER_GEN 176 +#define FOR_ITER_LIST 177 +#define FOR_ITER_RANGE 178 +#define FOR_ITER_TUPLE 179 +#define FOR_ITER_VIRTUAL 180 +#define GET_ITER_SELF 181 +#define GET_ITER_VIRTUAL 182 +#define JUMP_BACKWARD_JIT 183 +#define JUMP_BACKWARD_NO_JIT 184 +#define LOAD_ATTR_CLASS 185 +#define LOAD_ATTR_CLASS_WITH_METACLASS_CHECK 186 +#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 187 +#define LOAD_ATTR_INSTANCE_VALUE 188 +#define LOAD_ATTR_METHOD_LAZY_DICT 189 +#define LOAD_ATTR_METHOD_NO_DICT 190 +#define LOAD_ATTR_METHOD_WITH_VALUES 191 +#define LOAD_ATTR_MODULE 192 +#define LOAD_ATTR_NONDESCRIPTOR_NO_DICT 193 +#define LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 194 +#define LOAD_ATTR_PROPERTY 195 +#define LOAD_ATTR_SLOT 196 +#define LOAD_ATTR_WITH_HINT 197 +#define LOAD_GLOBAL_BUILTIN 198 +#define LOAD_GLOBAL_MODULE 199 +#define LOAD_SUPER_ATTR_ATTR 200 +#define LOAD_SUPER_ATTR_METHOD 201 +#define RESUME_CHECK 202 +#define RESUME_CHECK_JIT 203 +#define SEND_ASYNC_GEN 204 +#define SEND_GEN 205 +#define SEND_VIRTUAL 206 +#define STORE_ATTR_INSTANCE_VALUE 207 +#define STORE_ATTR_SLOT 208 +#define STORE_ATTR_WITH_HINT 209 +#define STORE_SUBSCR_DICT 210 +#define STORE_SUBSCR_LIST_INT 211 +#define TO_BOOL_ALWAYS_TRUE 212 +#define TO_BOOL_BOOL 213 +#define TO_BOOL_INT 214 +#define TO_BOOL_LIST 215 +#define TO_BOOL_NONE 216 +#define TO_BOOL_STR 217 +#define UNPACK_SEQUENCE_LIST 218 +#define UNPACK_SEQUENCE_TUPLE 219 +#define UNPACK_SEQUENCE_TWO_TUPLE 220 #define INSTRUMENTED_END_FOR 233 #define INSTRUMENTED_POP_ITER 234 #define INSTRUMENTED_END_SEND 235 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index 183d0af30acf43..149d05f2d71c4c 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -18,7 +18,9 @@ BINARY_OP=( "BINARY_OP_MULTIPLY_INT", "BINARY_OP_ADD_INT", + "BINARY_OP_ADD_INT_WIDE", "BINARY_OP_SUBTRACT_INT", + "BINARY_OP_SUBTRACT_INT_WIDE", "BINARY_OP_MULTIPLY_FLOAT", "BINARY_OP_ADD_FLOAT", "BINARY_OP_SUBTRACT_FLOAT", @@ -135,95 +137,97 @@ _specialized_opmap = frozendict( BINARY_OP_ADD_FLOAT=129, BINARY_OP_ADD_INT=130, - BINARY_OP_ADD_UNICODE=131, - BINARY_OP_EXTEND=132, + BINARY_OP_ADD_INT_WIDE=131, + BINARY_OP_ADD_UNICODE=132, + BINARY_OP_EXTEND=133, BINARY_OP_INPLACE_ADD_UNICODE=3, - BINARY_OP_MULTIPLY_FLOAT=133, - BINARY_OP_MULTIPLY_INT=134, - BINARY_OP_SUBSCR_DICT=135, - BINARY_OP_SUBSCR_GETITEM=136, - BINARY_OP_SUBSCR_LIST_INT=137, - BINARY_OP_SUBSCR_LIST_SLICE=138, - BINARY_OP_SUBSCR_STR_INT=139, - BINARY_OP_SUBSCR_TUPLE_INT=140, - BINARY_OP_SUBSCR_USTR_INT=141, - BINARY_OP_SUBTRACT_FLOAT=142, - BINARY_OP_SUBTRACT_INT=143, - CALL_ALLOC_AND_ENTER_INIT=144, - CALL_BOUND_METHOD_EXACT_ARGS=145, - CALL_BOUND_METHOD_GENERAL=146, - CALL_BUILTIN_CLASS=147, - CALL_BUILTIN_FAST=148, - CALL_BUILTIN_FAST_WITH_KEYWORDS=149, - CALL_BUILTIN_O=150, - CALL_EX_NON_PY_GENERAL=151, - CALL_EX_PY=152, - CALL_ISINSTANCE=153, - CALL_KW_BOUND_METHOD=154, - CALL_KW_NON_PY=155, - CALL_KW_PY=156, - CALL_LEN=157, - CALL_LIST_APPEND=158, - CALL_METHOD_DESCRIPTOR_FAST=159, - CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS=160, - CALL_METHOD_DESCRIPTOR_NOARGS=161, - CALL_METHOD_DESCRIPTOR_O=162, - CALL_NON_PY_GENERAL=163, - CALL_PY_EXACT_ARGS=164, - CALL_PY_GENERAL=165, - CALL_STR_1=166, - CALL_TUPLE_1=167, - CALL_TYPE_1=168, - COMPARE_OP_FLOAT=169, - COMPARE_OP_INT=170, - COMPARE_OP_STR=171, - CONTAINS_OP_DICT=172, - CONTAINS_OP_SET=173, - FOR_ITER_GEN=174, - FOR_ITER_LIST=175, - FOR_ITER_RANGE=176, - FOR_ITER_TUPLE=177, - FOR_ITER_VIRTUAL=178, - GET_ITER_SELF=179, - GET_ITER_VIRTUAL=180, - JUMP_BACKWARD_JIT=181, - JUMP_BACKWARD_NO_JIT=182, - LOAD_ATTR_CLASS=183, - LOAD_ATTR_CLASS_WITH_METACLASS_CHECK=184, - LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN=185, - LOAD_ATTR_INSTANCE_VALUE=186, - LOAD_ATTR_METHOD_LAZY_DICT=187, - LOAD_ATTR_METHOD_NO_DICT=188, - LOAD_ATTR_METHOD_WITH_VALUES=189, - LOAD_ATTR_MODULE=190, - LOAD_ATTR_NONDESCRIPTOR_NO_DICT=191, - LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES=192, - LOAD_ATTR_PROPERTY=193, - LOAD_ATTR_SLOT=194, - LOAD_ATTR_WITH_HINT=195, - LOAD_GLOBAL_BUILTIN=196, - LOAD_GLOBAL_MODULE=197, - LOAD_SUPER_ATTR_ATTR=198, - LOAD_SUPER_ATTR_METHOD=199, - RESUME_CHECK=200, - RESUME_CHECK_JIT=201, - SEND_ASYNC_GEN=202, - SEND_GEN=203, - SEND_VIRTUAL=204, - STORE_ATTR_INSTANCE_VALUE=205, - STORE_ATTR_SLOT=206, - STORE_ATTR_WITH_HINT=207, - STORE_SUBSCR_DICT=208, - STORE_SUBSCR_LIST_INT=209, - TO_BOOL_ALWAYS_TRUE=210, - TO_BOOL_BOOL=211, - TO_BOOL_INT=212, - TO_BOOL_LIST=213, - TO_BOOL_NONE=214, - TO_BOOL_STR=215, - UNPACK_SEQUENCE_LIST=216, - UNPACK_SEQUENCE_TUPLE=217, - UNPACK_SEQUENCE_TWO_TUPLE=218, + BINARY_OP_MULTIPLY_FLOAT=134, + BINARY_OP_MULTIPLY_INT=135, + BINARY_OP_SUBSCR_DICT=136, + BINARY_OP_SUBSCR_GETITEM=137, + BINARY_OP_SUBSCR_LIST_INT=138, + BINARY_OP_SUBSCR_LIST_SLICE=139, + BINARY_OP_SUBSCR_STR_INT=140, + BINARY_OP_SUBSCR_TUPLE_INT=141, + BINARY_OP_SUBSCR_USTR_INT=142, + BINARY_OP_SUBTRACT_FLOAT=143, + BINARY_OP_SUBTRACT_INT=144, + BINARY_OP_SUBTRACT_INT_WIDE=145, + CALL_ALLOC_AND_ENTER_INIT=146, + CALL_BOUND_METHOD_EXACT_ARGS=147, + CALL_BOUND_METHOD_GENERAL=148, + CALL_BUILTIN_CLASS=149, + CALL_BUILTIN_FAST=150, + CALL_BUILTIN_FAST_WITH_KEYWORDS=151, + CALL_BUILTIN_O=152, + CALL_EX_NON_PY_GENERAL=153, + CALL_EX_PY=154, + CALL_ISINSTANCE=155, + CALL_KW_BOUND_METHOD=156, + CALL_KW_NON_PY=157, + CALL_KW_PY=158, + CALL_LEN=159, + CALL_LIST_APPEND=160, + CALL_METHOD_DESCRIPTOR_FAST=161, + CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS=162, + CALL_METHOD_DESCRIPTOR_NOARGS=163, + CALL_METHOD_DESCRIPTOR_O=164, + CALL_NON_PY_GENERAL=165, + CALL_PY_EXACT_ARGS=166, + CALL_PY_GENERAL=167, + CALL_STR_1=168, + CALL_TUPLE_1=169, + CALL_TYPE_1=170, + COMPARE_OP_FLOAT=171, + COMPARE_OP_INT=172, + COMPARE_OP_STR=173, + CONTAINS_OP_DICT=174, + CONTAINS_OP_SET=175, + FOR_ITER_GEN=176, + FOR_ITER_LIST=177, + FOR_ITER_RANGE=178, + FOR_ITER_TUPLE=179, + FOR_ITER_VIRTUAL=180, + GET_ITER_SELF=181, + GET_ITER_VIRTUAL=182, + JUMP_BACKWARD_JIT=183, + JUMP_BACKWARD_NO_JIT=184, + LOAD_ATTR_CLASS=185, + LOAD_ATTR_CLASS_WITH_METACLASS_CHECK=186, + LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN=187, + LOAD_ATTR_INSTANCE_VALUE=188, + LOAD_ATTR_METHOD_LAZY_DICT=189, + LOAD_ATTR_METHOD_NO_DICT=190, + LOAD_ATTR_METHOD_WITH_VALUES=191, + LOAD_ATTR_MODULE=192, + LOAD_ATTR_NONDESCRIPTOR_NO_DICT=193, + LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES=194, + LOAD_ATTR_PROPERTY=195, + LOAD_ATTR_SLOT=196, + LOAD_ATTR_WITH_HINT=197, + LOAD_GLOBAL_BUILTIN=198, + LOAD_GLOBAL_MODULE=199, + LOAD_SUPER_ATTR_ATTR=200, + LOAD_SUPER_ATTR_METHOD=201, + RESUME_CHECK=202, + RESUME_CHECK_JIT=203, + SEND_ASYNC_GEN=204, + SEND_GEN=205, + SEND_VIRTUAL=206, + STORE_ATTR_INSTANCE_VALUE=207, + STORE_ATTR_SLOT=208, + STORE_ATTR_WITH_HINT=209, + STORE_SUBSCR_DICT=210, + STORE_SUBSCR_LIST_INT=211, + TO_BOOL_ALWAYS_TRUE=212, + TO_BOOL_BOOL=213, + TO_BOOL_INT=214, + TO_BOOL_LIST=215, + TO_BOOL_NONE=216, + TO_BOOL_STR=217, + UNPACK_SEQUENCE_LIST=218, + UNPACK_SEQUENCE_TUPLE=219, + UNPACK_SEQUENCE_TWO_TUPLE=220, ) opmap = frozendict( diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index b48a8812a1a2d1..6b9835716a684e 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -1140,6 +1140,53 @@ def test_small_ints(self): self.assertIs(i - i, 0) self.assertIs(0 * i, 0) + @support.cpython_only + def test_int64_boundary_add(self): + # Verify _PyLong_TryAsInt64Exact handles INT64 extremes correctly. + INT64_MAX = (1 << 63) - 1 + INT64_MIN = -(1 << 63) + + # INT64_MAX: addition that stays within range + self.assertEqual(INT64_MAX + 0, INT64_MAX) + self.assertEqual(INT64_MAX + (-1), INT64_MAX - 1) + + # INT64_MIN: extraction must succeed and arithmetic must be correct + self.assertEqual(INT64_MIN + 0, INT64_MIN) + self.assertEqual(INT64_MIN + 1, INT64_MIN + 1) + + # Adding two values that overflow int64 falls back to slow path + self.assertEqual(INT64_MAX + 1, 1 << 63) + self.assertEqual(INT64_MIN + (-1), INT64_MIN - 1) + + # Values just outside int64 range are not handled by the fast path + beyond_max = INT64_MAX + 2 + beyond_min = INT64_MIN - 2 + self.assertEqual(beyond_max + 1, INT64_MAX + 3) + self.assertEqual(beyond_min + (-1), INT64_MIN - 3) + + def test_int64_boundary_sub(self): + # Verify _PyLong_TryAsInt64Exact handles INT64 extremes for subtraction. + INT64_MAX = (1 << 63) - 1 + INT64_MIN = -(1 << 63) + + # INT64_MIN: subtraction that stays within range + self.assertEqual(INT64_MIN - 0, INT64_MIN) + self.assertEqual(INT64_MIN - (-1), INT64_MIN + 1) + + # INT64_MAX: extraction must succeed and arithmetic must be correct + self.assertEqual(INT64_MAX - 0, INT64_MAX) + self.assertEqual(INT64_MAX - 1, INT64_MAX - 1) + + # Subtracting values that overflow int64 falls back to slow path + self.assertEqual(INT64_MIN - 1, INT64_MIN - 1) + self.assertEqual(INT64_MAX - (-1), 1 << 63) + + # Values just outside int64 range are not handled by the fast path + beyond_max = INT64_MAX + 2 + beyond_min = INT64_MIN - 2 + self.assertEqual(beyond_max - 1, INT64_MAX + 1) + self.assertEqual(beyond_min - (-1), INT64_MIN - 1) + def test_bit_length(self): tiny = 1e-10 for x in range(-65000, 65000): diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 7946550ec0db63..952f668c9d7bae 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1375,20 +1375,35 @@ def binary_op_add_int(): self.assert_specialized(binary_op_add_int, "BINARY_OP_ADD_INT") self.assert_no_opcode(binary_op_add_int, "BINARY_OP") - def binary_op_int_non_compact(): + # Wide (non-compact) ints in the int64 range now specialize add. + def binary_op_int_wide_add(): for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD): a, b = 10000000000, 1 c = a + b self.assertEqual(c, 10000000001) + + binary_op_int_wide_add() + self.assert_specialized(binary_op_int_wide_add, "BINARY_OP_ADD_INT_WIDE") + + # Wide (non-compact) ints in the int64 range now specialize subtract. + def binary_op_int_wide_sub(): + for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD): + a, b = 10000000000, 1 c = a - b self.assertEqual(c, 9999999999) + + binary_op_int_wide_sub() + self.assert_specialized(binary_op_int_wide_sub, "BINARY_OP_SUBTRACT_INT_WIDE") + + # Multiply is still compact-only. + def binary_op_int_non_compact_mul(): + for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD): + a, b = 10000000000, 1 c = a * b self.assertEqual(c, 10000000000) - binary_op_int_non_compact() - self.assert_no_opcode(binary_op_int_non_compact, "BINARY_OP_ADD_INT") - self.assert_no_opcode(binary_op_int_non_compact, "BINARY_OP_SUBTRACT_INT") - self.assert_no_opcode(binary_op_int_non_compact, "BINARY_OP_MULTIPLY_INT") + binary_op_int_non_compact_mul() + self.assert_no_opcode(binary_op_int_non_compact_mul, "BINARY_OP_MULTIPLY_INT") def binary_op_add_unicode(): for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-10-17-40-41.gh-issue-151289.xZhM7W.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-10-17-40-41.gh-issue-151289.xZhM7W.rst new file mode 100644 index 00000000000000..9aa766aca7ad30 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-10-17-40-41.gh-issue-151289.xZhM7W.rst @@ -0,0 +1,3 @@ +Improve performance of binary ``int`` add and subtract for wide exact +integers that fit in ``int64_t`` while keeping the existing compact-int +specialization unchanged. diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index 11dfcc68eb2dac..4bcf3a36fc1c8d 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -241,6 +241,85 @@ DISPATCH(); } + TARGET(BINARY_OP_ADD_INT_WIDE) { + #if _Py_TAIL_CALL_INTERP + int opcode = BINARY_OP_ADD_INT_WIDE; + (void)(opcode); + #endif + _Py_CODEUNIT* const this_instr = next_instr; + (void)this_instr; + frame->instr_ptr = next_instr; + next_instr += 6; + INSTRUCTION_STATS(BINARY_OP_ADD_INT_WIDE); + opcode = BINARY_OP_ADD_INT_WIDE; + static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5, "incorrect cache size"); + _PyStackRef value; + _PyStackRef left; + _PyStackRef right; + _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + // _GUARD_TOS_INT_WIDE + { + value = stack_pointer[-1]; + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + if (!_PyLong_CheckExactAndFitsInt64(value_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + } + // _GUARD_NOS_INT_WIDE + { + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + if (!_PyLong_CheckExactAndFitsInt64(left_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + } + /* Skip 5 cache entries */ + // _BINARY_OP_ADD_INT_WIDE + { + right = value; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + _PyFrame_SetStackPointer(frame, stack_pointer); + res = _PyCompactLong_AddWide((PyLongObject *)left_o, (PyLongObject *)right_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + l = left; + r = right; + if (PyStackRef_IsError(res)) { + JUMP_TO_LABEL(pop_2_error); + } + } + // _POP_TOP_INT + { + value = r; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + // _POP_TOP_INT + { + value = l; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + stack_pointer[-2] = res; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + DISPATCH(); + } + TARGET(BINARY_OP_ADD_UNICODE) { #if _Py_TAIL_CALL_INTERP int opcode = BINARY_OP_ADD_UNICODE; @@ -1377,6 +1456,82 @@ DISPATCH(); } + TARGET(BINARY_OP_SUBTRACT_INT_WIDE) { + #if _Py_TAIL_CALL_INTERP + int opcode = BINARY_OP_SUBTRACT_INT_WIDE; + (void)(opcode); + #endif + _Py_CODEUNIT* const this_instr = next_instr; + (void)this_instr; + frame->instr_ptr = next_instr; + next_instr += 6; + INSTRUCTION_STATS(BINARY_OP_SUBTRACT_INT_WIDE); + opcode = BINARY_OP_SUBTRACT_INT_WIDE; + static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5, "incorrect cache size"); + _PyStackRef value; + _PyStackRef left; + _PyStackRef right; + _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + // _GUARD_TOS_INT_WIDE + { + value = stack_pointer[-1]; + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + if (!_PyLong_CheckExactAndFitsInt64(value_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + } + // _GUARD_NOS_INT_WIDE + { + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + if (!_PyLong_CheckExactAndFitsInt64(left_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + } + /* Skip 5 cache entries */ + // _BINARY_OP_SUBTRACT_INT_WIDE + { + right = value; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + _PyFrame_SetStackPointer(frame, stack_pointer); + res = _PyCompactLong_SubtractWide((PyLongObject *)left_o, (PyLongObject *)right_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + l = left; + r = right; + } + // _POP_TOP_INT + { + value = r; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + // _POP_TOP_INT + { + value = l; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + stack_pointer[-2] = res; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + DISPATCH(); + } + TARGET(BINARY_SLICE) { #if _Py_TAIL_CALL_INTERP int opcode = BINARY_SLICE; diff --git a/Modules/_testinternalcapi/test_targets.h b/Modules/_testinternalcapi/test_targets.h index 1a7eb9169fc837..bed11bc1dd0cb1 100644 --- a/Modules/_testinternalcapi/test_targets.h +++ b/Modules/_testinternalcapi/test_targets.h @@ -131,6 +131,7 @@ static void *opcode_targets_table[256] = { &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, + &&TARGET_BINARY_OP_ADD_INT_WIDE, &&TARGET_BINARY_OP_ADD_UNICODE, &&TARGET_BINARY_OP_EXTEND, &&TARGET_BINARY_OP_MULTIPLY_FLOAT, @@ -144,6 +145,7 @@ static void *opcode_targets_table[256] = { &&TARGET_BINARY_OP_SUBSCR_USTR_INT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, &&TARGET_BINARY_OP_SUBTRACT_INT, + &&TARGET_BINARY_OP_SUBTRACT_INT_WIDE, &&TARGET_CALL_ALLOC_AND_ENTER_INIT, &&TARGET_CALL_BOUND_METHOD_EXACT_ARGS, &&TARGET_CALL_BOUND_METHOD_GENERAL, @@ -231,8 +233,6 @@ static void *opcode_targets_table[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_INSTRUMENTED_END_FOR, &&TARGET_INSTRUMENTED_POP_ITER, &&TARGET_INSTRUMENTED_END_SEND, @@ -478,8 +478,8 @@ static void *opcode_tracing_targets_table[256] = { &&TARGET_TRACE_RECORD, &&TARGET_TRACE_RECORD, &&TARGET_TRACE_RECORD, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_TRACE_RECORD, + &&TARGET_TRACE_RECORD, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, @@ -534,6 +534,7 @@ static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_stop_tracing(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_ADD_FLOAT(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_ADD_INT(TAIL_CALL_PARAMS); +static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_ADD_INT_WIDE(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_ADD_UNICODE(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_EXTEND(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_INPLACE_ADD_UNICODE(TAIL_CALL_PARAMS); @@ -548,6 +549,7 @@ static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBSCR_TUPLE_INT(TAIL_ static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBSCR_USTR_INT(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBTRACT_FLOAT(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBTRACT_INT(TAIL_CALL_PARAMS); +static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBTRACT_INT_WIDE(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_SLICE(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BUILD_INTERPOLATION(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BUILD_LIST(TAIL_CALL_PARAMS); @@ -780,6 +782,7 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256] = { [BINARY_OP] = _TAIL_CALL_BINARY_OP, [BINARY_OP_ADD_FLOAT] = _TAIL_CALL_BINARY_OP_ADD_FLOAT, [BINARY_OP_ADD_INT] = _TAIL_CALL_BINARY_OP_ADD_INT, + [BINARY_OP_ADD_INT_WIDE] = _TAIL_CALL_BINARY_OP_ADD_INT_WIDE, [BINARY_OP_ADD_UNICODE] = _TAIL_CALL_BINARY_OP_ADD_UNICODE, [BINARY_OP_EXTEND] = _TAIL_CALL_BINARY_OP_EXTEND, [BINARY_OP_INPLACE_ADD_UNICODE] = _TAIL_CALL_BINARY_OP_INPLACE_ADD_UNICODE, @@ -794,6 +797,7 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256] = { [BINARY_OP_SUBSCR_USTR_INT] = _TAIL_CALL_BINARY_OP_SUBSCR_USTR_INT, [BINARY_OP_SUBTRACT_FLOAT] = _TAIL_CALL_BINARY_OP_SUBTRACT_FLOAT, [BINARY_OP_SUBTRACT_INT] = _TAIL_CALL_BINARY_OP_SUBTRACT_INT, + [BINARY_OP_SUBTRACT_INT_WIDE] = _TAIL_CALL_BINARY_OP_SUBTRACT_INT_WIDE, [BINARY_SLICE] = _TAIL_CALL_BINARY_SLICE, [BUILD_INTERPOLATION] = _TAIL_CALL_BUILD_INTERPOLATION, [BUILD_LIST] = _TAIL_CALL_BUILD_LIST, @@ -1019,8 +1023,6 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256] = { [125] = _TAIL_CALL_UNKNOWN_OPCODE, [126] = _TAIL_CALL_UNKNOWN_OPCODE, [127] = _TAIL_CALL_UNKNOWN_OPCODE, - [219] = _TAIL_CALL_UNKNOWN_OPCODE, - [220] = _TAIL_CALL_UNKNOWN_OPCODE, [221] = _TAIL_CALL_UNKNOWN_OPCODE, [222] = _TAIL_CALL_UNKNOWN_OPCODE, [223] = _TAIL_CALL_UNKNOWN_OPCODE, @@ -1038,6 +1040,7 @@ static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = { [BINARY_OP] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_ADD_FLOAT] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_ADD_INT] = _TAIL_CALL_TRACE_RECORD, + [BINARY_OP_ADD_INT_WIDE] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_ADD_UNICODE] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_EXTEND] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_INPLACE_ADD_UNICODE] = _TAIL_CALL_TRACE_RECORD, @@ -1052,6 +1055,7 @@ static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = { [BINARY_OP_SUBSCR_USTR_INT] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_SUBTRACT_FLOAT] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_SUBTRACT_INT] = _TAIL_CALL_TRACE_RECORD, + [BINARY_OP_SUBTRACT_INT_WIDE] = _TAIL_CALL_TRACE_RECORD, [BINARY_SLICE] = _TAIL_CALL_TRACE_RECORD, [BUILD_INTERPOLATION] = _TAIL_CALL_TRACE_RECORD, [BUILD_LIST] = _TAIL_CALL_TRACE_RECORD, @@ -1277,8 +1281,6 @@ static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = { [125] = _TAIL_CALL_UNKNOWN_OPCODE, [126] = _TAIL_CALL_UNKNOWN_OPCODE, [127] = _TAIL_CALL_UNKNOWN_OPCODE, - [219] = _TAIL_CALL_UNKNOWN_OPCODE, - [220] = _TAIL_CALL_UNKNOWN_OPCODE, [221] = _TAIL_CALL_UNKNOWN_OPCODE, [222] = _TAIL_CALL_UNKNOWN_OPCODE, [223] = _TAIL_CALL_UNKNOWN_OPCODE, diff --git a/Objects/longobject.c b/Objects/longobject.c index 6e6011cb19aab5..3f9f331e6bf2a3 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3871,6 +3871,84 @@ _PyCompactLong_Add(PyLongObject *a, PyLongObject *b) return medium_from_stwodigits(v); } +static inline bool +_Py_i64_add_overflow(int64_t a, int64_t b, int64_t *out) +{ +#if defined(__GNUC__) || defined(__clang__) + return __builtin_add_overflow(a, b, out); +#else + if ((b > 0 && a > INT64_MAX - b) || (b < 0 && a < INT64_MIN - b)) { + return true; + } + *out = a + b; + return false; +#endif +} + +/* Build a _PyStackRef from an int64 arithmetic result. + * Returns PyStackRef_ERROR on OOM (no exception set); never PyStackRef_NULL. */ +static inline _PyStackRef +_wide_op_result(int64_t v) +{ + if (IS_SMALL_INT(v)) { + return PyStackRef_FromPyObjectBorrow(get_small_int((sdigit)v)); + } + assert(v != 0); + if (is_medium_int(v)) { + PyLongObject *result = (PyLongObject *)_Py_FREELIST_POP(PyLongObject, ints); + if (result == NULL) { + result = PyObject_Malloc(sizeof(PyLongObject)); + if (result == NULL) { + return PyStackRef_ERROR; + } + _PyObject_Init((PyObject *)result, &PyLong_Type); + _PyLong_InitTag(result); + } + digit abs_v = v < 0 ? (digit)(-(sdigit)v) : (digit)(sdigit)v; + _PyLong_SetSignAndDigitCount(result, v < 0 ? -1 : 1, 1); + result->long_value.ob_digit[0] = abs_v; + return PyStackRef_FromPyObjectStealMortal((PyObject *)result); + } + PyObject *result = (PyObject *)_PyLong_FromLarge(v); + if (result == NULL) { + return PyStackRef_ERROR; + } + return PyStackRef_FromPyObjectStealMortal(result); +} + +/* Exact int -> int64_t helper for the wide int fast path. + * Keeps the exact-type check local to this translation unit. */ +static inline bool +_PyLong_CheckExactAndTryAsInt64(PyObject *op, int64_t *out) +{ + return PyLong_CheckExact(op) && + _PyLong_TryAsInt64Exact((PyLongObject *)op, out); +} + +/* Wide variant: operands are exact ints in the full int64 range (may be + * non-compact). Returns PyStackRef_NULL (without raising) when an input is + * out of int64 range or the sum overflows int64. Returns PyStackRef_ERROR + * only on OOM. */ +_PyStackRef +_PyCompactLong_AddWide(PyLongObject *a, PyLongObject *b) +{ + /* Fast path: both compact — avoids int64 extraction overhead. */ + if (_PyLong_BothAreCompact(a, b)) { + stwodigits v = medium_value(a) + medium_value(b); + return medium_from_stwodigits(v); + } + int64_t va, vb; + if (!_PyLong_CheckExactAndTryAsInt64((PyObject *)a, &va) || + !_PyLong_CheckExactAndTryAsInt64((PyObject *)b, &vb)) { + return PyStackRef_NULL; + } + int64_t v; + if (_Py_i64_add_overflow(va, vb, &v)) { + return PyStackRef_NULL; + } + return _wide_op_result(v); +} + static PyObject * long_add_method(PyObject *a, PyObject *b) { @@ -3878,7 +3956,6 @@ long_add_method(PyObject *a, PyObject *b) return (PyObject*)long_add((PyLongObject*)a, (PyLongObject*)b); } - static PyLongObject * long_sub(PyLongObject *a, PyLongObject *b) { @@ -3916,6 +3993,39 @@ _PyCompactLong_Subtract(PyLongObject *a, PyLongObject *b) return medium_from_stwodigits(v); } +static inline bool +_Py_i64_sub_overflow(int64_t a, int64_t b, int64_t *out) +{ +#if defined(__GNUC__) || defined(__clang__) + return __builtin_sub_overflow(a, b, out); +#else + if ((b < 0 && a > INT64_MAX + b) || (b > 0 && a < INT64_MIN + b)) { + return true; + } + *out = a - b; + return false; +#endif +} + +_PyStackRef +_PyCompactLong_SubtractWide(PyLongObject *a, PyLongObject *b) +{ + if (_PyLong_BothAreCompact(a, b)) { + stwodigits v = medium_value(a) - medium_value(b); + return medium_from_stwodigits(v); + } + int64_t va, vb; + if (!_PyLong_CheckExactAndTryAsInt64((PyObject *)a, &va) || + !_PyLong_CheckExactAndTryAsInt64((PyObject *)b, &vb)) { + return PyStackRef_NULL; + } + int64_t v; + if (_Py_i64_sub_overflow(va, vb, &v)) { + return PyStackRef_NULL; + } + return _wide_op_result(v); +} + static PyObject * long_sub_method(PyObject *a, PyObject *b) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 993d231751409b..8009cf2a9ab7dd 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -616,7 +616,9 @@ dummy_func( family(BINARY_OP, INLINE_CACHE_ENTRIES_BINARY_OP) = { BINARY_OP_MULTIPLY_INT, BINARY_OP_ADD_INT, + BINARY_OP_ADD_INT_WIDE, BINARY_OP_SUBTRACT_INT, + BINARY_OP_SUBTRACT_INT_WIDE, BINARY_OP_MULTIPLY_FLOAT, BINARY_OP_ADD_FLOAT, BINARY_OP_SUBTRACT_FLOAT, @@ -642,6 +644,16 @@ dummy_func( EXIT_IF(!_PyLong_CheckExactAndCompact(value_o)); } + tier1 op(_GUARD_NOS_INT_WIDE, (left, unused -- left, unused)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + EXIT_IF(!_PyLong_CheckExactAndFitsInt64(left_o)); + } + + tier1 op(_GUARD_TOS_INT_WIDE, (value -- value)) { + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + EXIT_IF(!_PyLong_CheckExactAndFitsInt64(value_o)); + } + op(_GUARD_NOS_OVERFLOWED, (left, unused -- left, unused)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); assert(Py_TYPE(left_o) == &PyLong_Type); @@ -684,6 +696,25 @@ dummy_func( INPUTS_DEAD(); } + tier1 op(_BINARY_OP_ADD_INT_WIDE, (left, right -- res, l, r)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + + STAT_INC(BINARY_OP, hit); + res = _PyCompactLong_AddWide((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + l = left; + r = right; + INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(res)); + } + pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res, l, r)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -699,15 +730,39 @@ dummy_func( INPUTS_DEAD(); } + tier1 op(_BINARY_OP_SUBTRACT_INT_WIDE, (left, right -- res, l, r)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + + STAT_INC(BINARY_OP, hit); + res = _PyCompactLong_SubtractWide((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + l = left; + r = right; + INPUTS_DEAD(); + } + macro(BINARY_OP_MULTIPLY_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_MULTIPLY_INT + _POP_TOP_INT + _POP_TOP_INT; macro(BINARY_OP_ADD_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_ADD_INT + _POP_TOP_INT + _POP_TOP_INT; + macro(BINARY_OP_ADD_INT_WIDE) = + _GUARD_TOS_INT_WIDE + _GUARD_NOS_INT_WIDE + unused/5 + _BINARY_OP_ADD_INT_WIDE + _POP_TOP_INT + _POP_TOP_INT; + macro(BINARY_OP_SUBTRACT_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_SUBTRACT_INT + _POP_TOP_INT + _POP_TOP_INT; + macro(BINARY_OP_SUBTRACT_INT_WIDE) = + _GUARD_TOS_INT_WIDE + _GUARD_NOS_INT_WIDE + unused/5 + _BINARY_OP_SUBTRACT_INT_WIDE + _POP_TOP_INT + _POP_TOP_INT; + // Inplace compact int ops: mutate the uniquely-referenced operand // when possible. The op handles decref of TARGET internally so // the following _POP_TOP_INT becomes _POP_TOP_NOP. Tier 2 only. diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 94384d5db3c107..49a962b62a5c98 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -241,6 +241,85 @@ DISPATCH(); } + TARGET(BINARY_OP_ADD_INT_WIDE) { + #if _Py_TAIL_CALL_INTERP + int opcode = BINARY_OP_ADD_INT_WIDE; + (void)(opcode); + #endif + _Py_CODEUNIT* const this_instr = next_instr; + (void)this_instr; + frame->instr_ptr = next_instr; + next_instr += 6; + INSTRUCTION_STATS(BINARY_OP_ADD_INT_WIDE); + opcode = BINARY_OP_ADD_INT_WIDE; + static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5, "incorrect cache size"); + _PyStackRef value; + _PyStackRef left; + _PyStackRef right; + _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + // _GUARD_TOS_INT_WIDE + { + value = stack_pointer[-1]; + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + if (!_PyLong_CheckExactAndFitsInt64(value_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + } + // _GUARD_NOS_INT_WIDE + { + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + if (!_PyLong_CheckExactAndFitsInt64(left_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + } + /* Skip 5 cache entries */ + // _BINARY_OP_ADD_INT_WIDE + { + right = value; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + _PyFrame_SetStackPointer(frame, stack_pointer); + res = _PyCompactLong_AddWide((PyLongObject *)left_o, (PyLongObject *)right_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + l = left; + r = right; + if (PyStackRef_IsError(res)) { + JUMP_TO_LABEL(pop_2_error); + } + } + // _POP_TOP_INT + { + value = r; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + // _POP_TOP_INT + { + value = l; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + stack_pointer[-2] = res; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + DISPATCH(); + } + TARGET(BINARY_OP_ADD_UNICODE) { #if _Py_TAIL_CALL_INTERP int opcode = BINARY_OP_ADD_UNICODE; @@ -1377,6 +1456,82 @@ DISPATCH(); } + TARGET(BINARY_OP_SUBTRACT_INT_WIDE) { + #if _Py_TAIL_CALL_INTERP + int opcode = BINARY_OP_SUBTRACT_INT_WIDE; + (void)(opcode); + #endif + _Py_CODEUNIT* const this_instr = next_instr; + (void)this_instr; + frame->instr_ptr = next_instr; + next_instr += 6; + INSTRUCTION_STATS(BINARY_OP_SUBTRACT_INT_WIDE); + opcode = BINARY_OP_SUBTRACT_INT_WIDE; + static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5, "incorrect cache size"); + _PyStackRef value; + _PyStackRef left; + _PyStackRef right; + _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + // _GUARD_TOS_INT_WIDE + { + value = stack_pointer[-1]; + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + if (!_PyLong_CheckExactAndFitsInt64(value_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + } + // _GUARD_NOS_INT_WIDE + { + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + if (!_PyLong_CheckExactAndFitsInt64(left_o)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + } + /* Skip 5 cache entries */ + // _BINARY_OP_SUBTRACT_INT_WIDE + { + right = value; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + _PyFrame_SetStackPointer(frame, stack_pointer); + res = _PyCompactLong_SubtractWide((PyLongObject *)left_o, (PyLongObject *)right_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } + l = left; + r = right; + } + // _POP_TOP_INT + { + value = r; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + // _POP_TOP_INT + { + value = l; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + stack_pointer[-2] = res; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + DISPATCH(); + } + TARGET(BINARY_SLICE) { #if _Py_TAIL_CALL_INTERP int opcode = BINARY_SLICE; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 1a7eb9169fc837..bed11bc1dd0cb1 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -131,6 +131,7 @@ static void *opcode_targets_table[256] = { &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, + &&TARGET_BINARY_OP_ADD_INT_WIDE, &&TARGET_BINARY_OP_ADD_UNICODE, &&TARGET_BINARY_OP_EXTEND, &&TARGET_BINARY_OP_MULTIPLY_FLOAT, @@ -144,6 +145,7 @@ static void *opcode_targets_table[256] = { &&TARGET_BINARY_OP_SUBSCR_USTR_INT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, &&TARGET_BINARY_OP_SUBTRACT_INT, + &&TARGET_BINARY_OP_SUBTRACT_INT_WIDE, &&TARGET_CALL_ALLOC_AND_ENTER_INIT, &&TARGET_CALL_BOUND_METHOD_EXACT_ARGS, &&TARGET_CALL_BOUND_METHOD_GENERAL, @@ -231,8 +233,6 @@ static void *opcode_targets_table[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_INSTRUMENTED_END_FOR, &&TARGET_INSTRUMENTED_POP_ITER, &&TARGET_INSTRUMENTED_END_SEND, @@ -478,8 +478,8 @@ static void *opcode_tracing_targets_table[256] = { &&TARGET_TRACE_RECORD, &&TARGET_TRACE_RECORD, &&TARGET_TRACE_RECORD, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_TRACE_RECORD, + &&TARGET_TRACE_RECORD, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, @@ -534,6 +534,7 @@ static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_stop_tracing(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_ADD_FLOAT(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_ADD_INT(TAIL_CALL_PARAMS); +static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_ADD_INT_WIDE(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_ADD_UNICODE(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_EXTEND(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_INPLACE_ADD_UNICODE(TAIL_CALL_PARAMS); @@ -548,6 +549,7 @@ static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBSCR_TUPLE_INT(TAIL_ static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBSCR_USTR_INT(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBTRACT_FLOAT(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBTRACT_INT(TAIL_CALL_PARAMS); +static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_OP_SUBTRACT_INT_WIDE(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BINARY_SLICE(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BUILD_INTERPOLATION(TAIL_CALL_PARAMS); static PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_BUILD_LIST(TAIL_CALL_PARAMS); @@ -780,6 +782,7 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256] = { [BINARY_OP] = _TAIL_CALL_BINARY_OP, [BINARY_OP_ADD_FLOAT] = _TAIL_CALL_BINARY_OP_ADD_FLOAT, [BINARY_OP_ADD_INT] = _TAIL_CALL_BINARY_OP_ADD_INT, + [BINARY_OP_ADD_INT_WIDE] = _TAIL_CALL_BINARY_OP_ADD_INT_WIDE, [BINARY_OP_ADD_UNICODE] = _TAIL_CALL_BINARY_OP_ADD_UNICODE, [BINARY_OP_EXTEND] = _TAIL_CALL_BINARY_OP_EXTEND, [BINARY_OP_INPLACE_ADD_UNICODE] = _TAIL_CALL_BINARY_OP_INPLACE_ADD_UNICODE, @@ -794,6 +797,7 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256] = { [BINARY_OP_SUBSCR_USTR_INT] = _TAIL_CALL_BINARY_OP_SUBSCR_USTR_INT, [BINARY_OP_SUBTRACT_FLOAT] = _TAIL_CALL_BINARY_OP_SUBTRACT_FLOAT, [BINARY_OP_SUBTRACT_INT] = _TAIL_CALL_BINARY_OP_SUBTRACT_INT, + [BINARY_OP_SUBTRACT_INT_WIDE] = _TAIL_CALL_BINARY_OP_SUBTRACT_INT_WIDE, [BINARY_SLICE] = _TAIL_CALL_BINARY_SLICE, [BUILD_INTERPOLATION] = _TAIL_CALL_BUILD_INTERPOLATION, [BUILD_LIST] = _TAIL_CALL_BUILD_LIST, @@ -1019,8 +1023,6 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256] = { [125] = _TAIL_CALL_UNKNOWN_OPCODE, [126] = _TAIL_CALL_UNKNOWN_OPCODE, [127] = _TAIL_CALL_UNKNOWN_OPCODE, - [219] = _TAIL_CALL_UNKNOWN_OPCODE, - [220] = _TAIL_CALL_UNKNOWN_OPCODE, [221] = _TAIL_CALL_UNKNOWN_OPCODE, [222] = _TAIL_CALL_UNKNOWN_OPCODE, [223] = _TAIL_CALL_UNKNOWN_OPCODE, @@ -1038,6 +1040,7 @@ static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = { [BINARY_OP] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_ADD_FLOAT] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_ADD_INT] = _TAIL_CALL_TRACE_RECORD, + [BINARY_OP_ADD_INT_WIDE] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_ADD_UNICODE] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_EXTEND] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_INPLACE_ADD_UNICODE] = _TAIL_CALL_TRACE_RECORD, @@ -1052,6 +1055,7 @@ static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = { [BINARY_OP_SUBSCR_USTR_INT] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_SUBTRACT_FLOAT] = _TAIL_CALL_TRACE_RECORD, [BINARY_OP_SUBTRACT_INT] = _TAIL_CALL_TRACE_RECORD, + [BINARY_OP_SUBTRACT_INT_WIDE] = _TAIL_CALL_TRACE_RECORD, [BINARY_SLICE] = _TAIL_CALL_TRACE_RECORD, [BUILD_INTERPOLATION] = _TAIL_CALL_TRACE_RECORD, [BUILD_LIST] = _TAIL_CALL_TRACE_RECORD, @@ -1277,8 +1281,6 @@ static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = { [125] = _TAIL_CALL_UNKNOWN_OPCODE, [126] = _TAIL_CALL_UNKNOWN_OPCODE, [127] = _TAIL_CALL_UNKNOWN_OPCODE, - [219] = _TAIL_CALL_UNKNOWN_OPCODE, - [220] = _TAIL_CALL_UNKNOWN_OPCODE, [221] = _TAIL_CALL_UNKNOWN_OPCODE, [222] = _TAIL_CALL_UNKNOWN_OPCODE, [223] = _TAIL_CALL_UNKNOWN_OPCODE, diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index edb4c644bccbf6..d262eb9d6a1365 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -383,27 +383,12 @@ dummy_func(void) { ADD_OP(emit_op, oparg, 0); } - op(_BINARY_OP_ADD_INT, (left, right -- res, l, r)) { - if (PyJitRef_IsUnique(left)) { - REPLACE_OP(this_instr, _BINARY_OP_ADD_INT_INPLACE, 0, 0); - } - else if (PyJitRef_IsUnique(right)) { - REPLACE_OP(this_instr, _BINARY_OP_ADD_INT_INPLACE_RIGHT, 0, 0); - } - // Result may be a unique compact int or a cached small int - // at runtime. Mark as unique; inplace ops verify at runtime. - res = PyJitRef_MakeUnique(sym_new_compact_int(ctx)); - l = left; - r = right; - REPLACE_OPCODE_IF_EVALUATES_PURE(left, right, res); - } - - op(_BINARY_OP_SUBTRACT_INT, (left, right -- res, l, r)) { + op(_BINARY_OP_MULTIPLY_INT, (left, right -- res, l, r)) { if (PyJitRef_IsUnique(left)) { - REPLACE_OP(this_instr, _BINARY_OP_SUBTRACT_INT_INPLACE, 0, 0); + REPLACE_OP(this_instr, _BINARY_OP_MULTIPLY_INT_INPLACE, 0, 0); } else if (PyJitRef_IsUnique(right)) { - REPLACE_OP(this_instr, _BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT, 0, 0); + REPLACE_OP(this_instr, _BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT, 0, 0); } res = PyJitRef_MakeUnique(sym_new_compact_int(ctx)); l = left; @@ -411,13 +396,15 @@ dummy_func(void) { REPLACE_OPCODE_IF_EVALUATES_PURE(left, right, res); } - op(_BINARY_OP_MULTIPLY_INT, (left, right -- res, l, r)) { + op(_BINARY_OP_ADD_INT, (left, right -- res, l, r)) { if (PyJitRef_IsUnique(left)) { - REPLACE_OP(this_instr, _BINARY_OP_MULTIPLY_INT_INPLACE, 0, 0); + REPLACE_OP(this_instr, _BINARY_OP_ADD_INT_INPLACE, 0, 0); } else if (PyJitRef_IsUnique(right)) { - REPLACE_OP(this_instr, _BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT, 0, 0); + REPLACE_OP(this_instr, _BINARY_OP_ADD_INT_INPLACE_RIGHT, 0, 0); } + // Result may be a unique compact int or a cached small int + // at runtime. Mark as unique; inplace ops verify at runtime. res = PyJitRef_MakeUnique(sym_new_compact_int(ctx)); l = left; r = right; @@ -460,6 +447,19 @@ dummy_func(void) { res = PyJitRef_MakeUnique(sym_new_type(ctx, &PyFloat_Type)); } + op(_BINARY_OP_SUBTRACT_INT, (left, right -- res, l, r)) { + if (PyJitRef_IsUnique(left)) { + REPLACE_OP(this_instr, _BINARY_OP_SUBTRACT_INT_INPLACE, 0, 0); + } + else if (PyJitRef_IsUnique(right)) { + REPLACE_OP(this_instr, _BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT, 0, 0); + } + res = PyJitRef_MakeUnique(sym_new_compact_int(ctx)); + l = left; + r = right; + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right, res); + } + op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res, l, r)) { if (PyJitRef_IsUnique(left)) { ADD_OP(_BINARY_OP_MULTIPLY_FLOAT_INPLACE, 0, 0); diff --git a/Python/record_functions.c.h b/Python/record_functions.c.h index 98abe3d0505e20..6f8f9b09fd10f8 100644 --- a/Python/record_functions.c.h +++ b/Python/record_functions.c.h @@ -118,7 +118,9 @@ const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = { [TO_BOOL_ALWAYS_TRUE] = {1, {_RECORD_TOS_TYPE_INDEX}}, [BINARY_OP_MULTIPLY_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, [BINARY_OP_ADD_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_ADD_INT_WIDE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, [BINARY_OP_SUBTRACT_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBTRACT_INT_WIDE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, [BINARY_OP_MULTIPLY_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, [BINARY_OP_ADD_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, [BINARY_OP_SUBTRACT_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, diff --git a/Python/specialize.c b/Python/specialize.c index 2ff0a9d0072cec..92bf1b7ad28889 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2354,6 +2354,10 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in specialize(instr, BINARY_OP_ADD_INT); return; } + if (_PyLong_CheckExactAndFitsInt64(lhs) && _PyLong_CheckExactAndFitsInt64(rhs)) { + specialize(instr, BINARY_OP_ADD_INT_WIDE); + return; + } if (PyFloat_CheckExact(lhs)) { specialize(instr, BINARY_OP_ADD_FLOAT); return; @@ -2382,6 +2386,10 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in specialize(instr, BINARY_OP_SUBTRACT_INT); return; } + if (_PyLong_CheckExactAndFitsInt64(lhs) && _PyLong_CheckExactAndFitsInt64(rhs)) { + specialize(instr, BINARY_OP_SUBTRACT_INT_WIDE); + return; + } if (PyFloat_CheckExact(lhs)) { specialize(instr, BINARY_OP_SUBTRACT_FLOAT); return; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 22a321b4953de7..77fa77ccc15103 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -721,6 +721,8 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_Wrap", "PyStackRef_Unwrap", "_PyLong_CheckExactAndCompact", + "_PyLong_CheckExactAndFitsInt64", + "_PyLong_FitsInt64", "_PyExecutor_FromExit", "_PyJit_TryInitializeTracing", "_Py_unset_eval_breaker_bit",