Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,29 @@ class Foo:
Foo.attr = 0
self.assertFalse(ex.is_valid())

def test_guard_type_version_locked_removed(self):
"""
Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are
eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__.
"""

class Foo:
def __init__(self):
self.a = 1
self.b = 2
self.c = 3

def thing(n):
for _ in range(n):
Foo()

res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
opnames = list(iter_opnames(ex))
guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED")
# Only the first store needs the guard; the rest should be NOPed.
self.assertEqual(guard_locked_count, 1)

def test_type_version_doesnt_segfault(self):
"""
Tests that setting a type version doesn't cause a segfault when later looking at the stack.
Expand All @@ -1539,6 +1562,101 @@ def fn(a):

fn(A())

def test_init_resolves_callable(self):
"""
_CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant,
enabling the optimizer to trace into the init frame and eliminate
redundant function version and arg count checks.
Comment on lines +1568 to +1569
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has nothing to do with tracing into the init frame. We already do that. it's more of propagating information through the frame

"""
class MyPoint:
def __init__(self, x, y):
self.x = x
self.y = y

def testfunc(n):
total = 0.0
for _ in range(n):
p = MyPoint(1.0, 2.0)
total += p.x
return total

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# The __init__ call should be traced through via _PUSH_FRAME
self.assertIn("_PUSH_FRAME", uops)
# __init__ resolution eliminates function version and arg checks
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)

def test_guard_type_version_locked_propagates(self):
"""
_GUARD_TYPE_VERSION_LOCKED should set the type version on the
symbol so repeated accesses to the same type can benefit.
"""
class Item:
def __init__(self, val):
self.val = val

def get(self):
return self.val

def get2(self):
return self.val + 1

def testfunc(n):
item = Item(42)
total = 0
for _ in range(n):
# Two method calls on the same object — the second
# should benefit from type info set by the first.
total += item.get() + item.get2()
return total

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD * (42 + 43))
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both methods should be traced through
self.assertEqual(uops.count("_PUSH_FRAME"), 2)
# Type version propagation: one guard covers both method lookups
self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1)
# Function checks eliminated (type info resolves the callable)
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)

def test_method_chain_guard_elimination(self):
"""
Calling two methods on the same object should share the outer
type guard — only one _GUARD_TYPE_VERSION for the two lookups.
"""
class Calc:
def __init__(self, val):
self.val = val

def add(self, x):
self.val += x
return self

def testfunc(n):
c = Calc(0)
for _ in range(n):
c.add(1).add(2)
return c.val

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD * 3)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both add() calls should be inlined
push_count = uops.count("_PUSH_FRAME")
self.assertEqual(push_count, 2)
# Only one outer type version guard for the two method lookups
# on the same object c (the second lookup reuses type info)
guard_version_count = uops.count("_GUARD_TYPE_VERSION")
self.assertEqual(guard_version_count, 1)

def test_func_guards_removed_or_reduced(self):
def testfunc(n):
for i in range(n):
Expand Down
35 changes: 33 additions & 2 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,21 @@ dummy_func(void) {
assert(!PyJitRef_IsUnique(value));
}

op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
assert(type_version);
if (sym_matches_type_version(owner, type_version)) {
ADD_OP(_NOP, 0, 0);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not be removing this as we are moving towards FT compatibility. This uop unlocks objects on FT as well, so we need to keep it around as it's side effecting.

Instead, you should break out the _GUARD_TYPE_VERSION_LOCKED into _GUARD_TYPE_VERSION + UNLOCK. See for example the _LOCK_OBJECT op.

} else {
PyTypeObject *type = _PyType_LookupByVersion(type_version);
if (type) {
if (sym_set_type_version(owner, type_version)) {
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
_Py_BloomFilter_Add(dependencies, type);
}
}
}
}

op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) {
(void)offset;
(void)value;
Expand Down Expand Up @@ -1027,9 +1042,25 @@ dummy_func(void) {
}

op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
(void)type_version;
(void)args;
callable = sym_new_not_null(ctx);
PyTypeObject *type = _PyType_LookupByVersion(type_version);
if (type) {
PyHeapTypeObject *cls = (PyHeapTypeObject *)type;
PyObject *init = FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
if (init != NULL && PyFunction_Check(init)) {
// Record the __init__ function so _CREATE_INIT_FRAME can
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Record the __init__ function so _CREATE_INIT_FRAME can
// Propagate the __init__ function so _CREATE_INIT_FRAME can

// resolve the code object and continue optimizing.
callable = sym_new_const(ctx, init);
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
_Py_BloomFilter_Add(dependencies, type);
}
else {
callable = sym_new_not_null(ctx);
}
}
else {
callable = sym_new_not_null(ctx);
}
self_or_null = sym_new_not_null(ctx);
}

Expand Down
34 changes: 32 additions & 2 deletions Python/optimizer_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading