gh-115999: Add free-threaded specialization for UNPACK_SEQUENCE (#126600)

Add free-threaded specialization for `UNPACK_SEQUENCE` opcode.
`UNPACK_SEQUENCE_TUPLE/UNPACK_SEQUENCE_TWO_TUPLE` are already thread safe since tuples are immutable.
`UNPACK_SEQUENCE_LIST` is not thread safe because of nature of lists (there is nothing preventing another thread from adding items to or removing them the list while the instruction is executing). To achieve thread safety we add a critical section to the implementation of `UNPACK_SEQUENCE_LIST`, especially around the parts where we check the size of the list and push items onto the stack.


---------

Co-authored-by: Matt Page <mpage@meta.com>
Co-authored-by: mpage <mpage@cs.stanford.edu>
This commit is contained in:
Kirill Podoprigora 2024-11-22 19:00:35 +02:00 committed by GitHub
parent 5ba67af006
commit 27486c3365
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 103 additions and 30 deletions

View File

@ -1223,7 +1223,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = {
[UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG },
[UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },

View File

@ -112,7 +112,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
[_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
[_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
[_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
[_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,

View File

@ -1339,6 +1339,37 @@ class TestSpecializer(TestBase):
self.assert_specialized(to_bool_str, "TO_BOOL_STR")
self.assert_no_opcode(to_bool_str, "TO_BOOL")
@cpython_only
@requires_specialization_ft
def test_unpack_sequence(self):
def f():
for _ in range(100):
a, b = 1, 2
self.assertEqual(a, 1)
self.assertEqual(b, 2)
f()
self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE")
self.assert_no_opcode(f, "UNPACK_SEQUENCE")
def g():
for _ in range(100):
a, = 1,
self.assertEqual(a, 1)
g()
self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE")
self.assert_no_opcode(g, "UNPACK_SEQUENCE")
def x():
for _ in range(100):
a, b = [1, 2]
self.assertEqual(a, 1)
self.assertEqual(b, 2)
x()
self.assert_specialized(x, "UNPACK_SEQUENCE_LIST")
self.assert_no_opcode(x, "UNPACK_SEQUENCE")
if __name__ == "__main__":
unittest.main()

View File

@ -1381,7 +1381,7 @@ dummy_func(
};
specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
@ -1389,7 +1389,7 @@ dummy_func(
}
OPCODE_DEFERRED_INC(UNPACK_SEQUENCE);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
(void)seq;
(void)counter;
}
@ -1429,12 +1429,24 @@ dummy_func(
inst(UNPACK_SEQUENCE_LIST, (unused/1, seq -- values[oparg])) {
PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq);
DEOPT_IF(!PyList_CheckExact(seq_o));
DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg);
#ifdef Py_GIL_DISABLED
PyCriticalSection cs;
PyCriticalSection_Begin(&cs, seq_o);
#endif
if (PyList_GET_SIZE(seq_o) != oparg) {
#ifdef Py_GIL_DISABLED
PyCriticalSection_End(&cs);
#endif
DEOPT_IF(true);
}
STAT_INC(UNPACK_SEQUENCE, hit);
PyObject **items = _PyList_ITEMS(seq_o);
for (int i = oparg; --i >= 0; ) {
*values++ = PyStackRef_FromPyObjectNew(items[i]);
}
#ifdef Py_GIL_DISABLED
PyCriticalSection_End(&cs);
#endif
DECREF_INPUTS();
}
@ -2525,7 +2537,7 @@ dummy_func(
}
OPCODE_DEFERRED_INC(CONTAINS_OP);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}
macro(CONTAINS_OP) = _SPECIALIZE_CONTAINS_OP + _CONTAINS_OP;

View File

@ -1711,15 +1711,33 @@
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
#ifdef Py_GIL_DISABLED
PyCriticalSection cs;
_PyFrame_SetStackPointer(frame, stack_pointer);
PyCriticalSection_Begin(&cs, seq_o);
stack_pointer = _PyFrame_GetStackPointer(frame);
#endif
if (PyList_GET_SIZE(seq_o) != oparg) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
#ifdef Py_GIL_DISABLED
_PyFrame_SetStackPointer(frame, stack_pointer);
PyCriticalSection_End(&cs);
stack_pointer = _PyFrame_GetStackPointer(frame);
#endif
if (true) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
}
STAT_INC(UNPACK_SEQUENCE, hit);
PyObject **items = _PyList_ITEMS(seq_o);
for (int i = oparg; --i >= 0; ) {
*values++ = PyStackRef_FromPyObjectNew(items[i]);
}
#ifdef Py_GIL_DISABLED
_PyFrame_SetStackPointer(frame, stack_pointer);
PyCriticalSection_End(&cs);
stack_pointer = _PyFrame_GetStackPointer(frame);
#endif
PyStackRef_CLOSE(seq);
stack_pointer += -1 + oparg;
assert(WITHIN_STACK_BOUNDS());

View File

@ -3405,7 +3405,7 @@
}
OPCODE_DEFERRED_INC(CONTAINS_OP);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}
// _CONTAINS_OP
{
@ -7994,7 +7994,7 @@
seq = stack_pointer[-1];
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -8004,7 +8004,7 @@
}
OPCODE_DEFERRED_INC(UNPACK_SEQUENCE);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
(void)seq;
(void)counter;
}
@ -8035,12 +8035,30 @@
values = &stack_pointer[-1];
PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq);
DEOPT_IF(!PyList_CheckExact(seq_o), UNPACK_SEQUENCE);
DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg, UNPACK_SEQUENCE);
#ifdef Py_GIL_DISABLED
PyCriticalSection cs;
_PyFrame_SetStackPointer(frame, stack_pointer);
PyCriticalSection_Begin(&cs, seq_o);
stack_pointer = _PyFrame_GetStackPointer(frame);
#endif
if (PyList_GET_SIZE(seq_o) != oparg) {
#ifdef Py_GIL_DISABLED
_PyFrame_SetStackPointer(frame, stack_pointer);
PyCriticalSection_End(&cs);
stack_pointer = _PyFrame_GetStackPointer(frame);
#endif
DEOPT_IF(true, UNPACK_SEQUENCE);
}
STAT_INC(UNPACK_SEQUENCE, hit);
PyObject **items = _PyList_ITEMS(seq_o);
for (int i = oparg; --i >= 0; ) {
*values++ = PyStackRef_FromPyObjectNew(items[i]);
}
#ifdef Py_GIL_DISABLED
_PyFrame_SetStackPointer(frame, stack_pointer);
PyCriticalSection_End(&cs);
stack_pointer = _PyFrame_GetStackPointer(frame);
#endif
PyStackRef_CLOSE(seq);
stack_pointer += -1 + oparg;
assert(WITHIN_STACK_BOUNDS());

View File

@ -2487,39 +2487,33 @@ _Py_Specialize_UnpackSequence(_PyStackRef seq_st, _Py_CODEUNIT *instr, int oparg
{
PyObject *seq = PyStackRef_AsPyObjectBorrow(seq_st);
assert(ENABLE_SPECIALIZATION);
assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[UNPACK_SEQUENCE] ==
INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE);
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)(instr + 1);
if (PyTuple_CheckExact(seq)) {
if (PyTuple_GET_SIZE(seq) != oparg) {
SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR);
goto failure;
unspecialize(instr);
return;
}
if (PyTuple_GET_SIZE(seq) == 2) {
instr->op.code = UNPACK_SEQUENCE_TWO_TUPLE;
goto success;
specialize(instr, UNPACK_SEQUENCE_TWO_TUPLE);
return;
}
instr->op.code = UNPACK_SEQUENCE_TUPLE;
goto success;
specialize(instr, UNPACK_SEQUENCE_TUPLE);
return;
}
if (PyList_CheckExact(seq)) {
if (PyList_GET_SIZE(seq) != oparg) {
SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR);
goto failure;
unspecialize(instr);
return;
}
instr->op.code = UNPACK_SEQUENCE_LIST;
goto success;
specialize(instr, UNPACK_SEQUENCE_LIST);
return;
}
SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq));
failure:
STAT_INC(UNPACK_SEQUENCE, failure);
instr->op.code = UNPACK_SEQUENCE;
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(UNPACK_SEQUENCE, success);
cache->counter = adaptive_counter_cooldown();
unspecialize(instr);
}
#ifdef Py_STATS