From 0fee38e5779427181fa2cda6270ed94df26b7e32 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 8 Apr 2026 18:24:44 +0200 Subject: [PATCH 1/2] Optimize __init__ in JIT: resolve init function and eliminate redundant type guards - _CHECK_AND_ALLOCATE_OBJECT: resolve __init__ from type's _spec_cache so the optimizer can follow into __init__ bodies - _GUARD_TYPE_VERSION_LOCKED: add optimizer handler to track type version and NOP redundant guards on the same object - Add test_guard_type_version_locked_removed Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_capi/test_opt.py | 23 ++++++++++++++++++++++ Python/optimizer_bytecodes.c | 35 ++++++++++++++++++++++++++++++++-- Python/optimizer_cases.c.h | 34 +++++++++++++++++++++++++++++++-- 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 8d3da8b5a22968..e621092a3138fc 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1518,6 +1518,29 @@ class Foo: Foo.attr = 0 self.assertFalse(ex.is_valid()) + def test_guard_type_version_locked_removed(self): + """ + Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are + eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__. + """ + + class Foo: + def __init__(self): + self.a = 1 + self.b = 2 + self.c = 3 + + def thing(n): + for _ in range(n): + Foo() + + res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + opnames = list(iter_opnames(ex)) + guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED") + # Only the first store needs the guard; the rest should be NOPed. + self.assertEqual(guard_locked_count, 1) + def test_type_version_doesnt_segfault(self): """ Tests that setting a type version doesn't cause a segfault when later looking at the stack. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 2fd235a2dda149..ab708329ffef25 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -134,6 +134,21 @@ dummy_func(void) { assert(!PyJitRef_IsUnique(value)); } + op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) { + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + ADD_OP(_NOP, 0, 0); + } else { + PyTypeObject *type = _PyType_LookupByVersion(type_version); + if (type) { + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + } + } + } + op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) { (void)offset; (void)value; @@ -1027,9 +1042,25 @@ dummy_func(void) { } op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { - (void)type_version; (void)args; - callable = sym_new_not_null(ctx); + PyTypeObject *type = _PyType_LookupByVersion(type_version); + if (type) { + PyHeapTypeObject *cls = (PyHeapTypeObject *)type; + PyObject *init = FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); + if (init != NULL && PyFunction_Check(init)) { + // Record the __init__ function so _CREATE_INIT_FRAME can + // resolve the code object and continue optimizing. + callable = sym_new_const(ctx, init); + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + else { + callable = sym_new_not_null(ctx); + } + } + else { + callable = sym_new_not_null(ctx); + } self_or_null = sym_new_not_null(ctx); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8634189c707510..0deb43c5a40601 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2386,6 +2386,21 @@ } case _GUARD_TYPE_VERSION_LOCKED: { + JitOptRef owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand0; + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + ADD_OP(_NOP, 0, 0); + } else { + PyTypeObject *type = _PyType_LookupByVersion(type_version); + if (type) { + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + } + } break; } @@ -3776,9 +3791,24 @@ self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; uint32_t type_version = (uint32_t)this_instr->operand0; - (void)type_version; (void)args; - callable = sym_new_not_null(ctx); + PyTypeObject *type = _PyType_LookupByVersion(type_version); + if (type) { + PyHeapTypeObject *cls = (PyHeapTypeObject *)type; + PyObject *init = FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); + if (init != NULL && PyFunction_Check(init)) { + callable = sym_new_const(ctx, init); + stack_pointer[-2 - oparg] = callable; + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + else { + callable = sym_new_not_null(ctx); + } + } + else { + callable = sym_new_not_null(ctx); + } self_or_null = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = callable; stack_pointer[-1 - oparg] = self_or_null; From 83fd02cdac496084263bc89e4db8dec7bd31e829 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 9 Apr 2026 00:06:33 +0200 Subject: [PATCH 2/2] add tests --- Lib/test/test_capi/test_opt.py | 95 ++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e621092a3138fc..d566e825c03e8d 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1562,6 +1562,101 @@ def fn(a): fn(A()) + def test_init_resolves_callable(self): + """ + _CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant, + enabling the optimizer to trace into the init frame and eliminate + redundant function version and arg count checks. + """ + class MyPoint: + def __init__(self, x, y): + self.x = x + self.y = y + + def testfunc(n): + total = 0.0 + for _ in range(n): + p = MyPoint(1.0, 2.0) + total += p.x + return total + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.0) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # The __init__ call should be traced through via _PUSH_FRAME + self.assertIn("_PUSH_FRAME", uops) + # __init__ resolution eliminates function version and arg checks + self.assertNotIn("_CHECK_FUNCTION_VERSION", uops) + self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops) + + def test_guard_type_version_locked_propagates(self): + """ + _GUARD_TYPE_VERSION_LOCKED should set the type version on the + symbol so repeated accesses to the same type can benefit. + """ + class Item: + def __init__(self, val): + self.val = val + + def get(self): + return self.val + + def get2(self): + return self.val + 1 + + def testfunc(n): + item = Item(42) + total = 0 + for _ in range(n): + # Two method calls on the same object — the second + # should benefit from type info set by the first. + total += item.get() + item.get2() + return total + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD * (42 + 43)) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # Both methods should be traced through + self.assertEqual(uops.count("_PUSH_FRAME"), 2) + # Type version propagation: one guard covers both method lookups + self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1) + # Function checks eliminated (type info resolves the callable) + self.assertNotIn("_CHECK_FUNCTION_VERSION", uops) + self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops) + + def test_method_chain_guard_elimination(self): + """ + Calling two methods on the same object should share the outer + type guard — only one _GUARD_TYPE_VERSION for the two lookups. + """ + class Calc: + def __init__(self, val): + self.val = val + + def add(self, x): + self.val += x + return self + + def testfunc(n): + c = Calc(0) + for _ in range(n): + c.add(1).add(2) + return c.val + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD * 3) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # Both add() calls should be inlined + push_count = uops.count("_PUSH_FRAME") + self.assertEqual(push_count, 2) + # Only one outer type version guard for the two method lookups + # on the same object c (the second lookup reuses type info) + guard_version_count = uops.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_version_count, 1) + def test_func_guards_removed_or_reduced(self): def testfunc(n): for i in range(n):