gh-115999: Implement thread-local bytecode and enable specialization for BINARY_OP (#123926)

Each thread specializes a thread-local copy of the bytecode, created on the first RESUME, in free-threaded builds. All copies of the bytecode for a code object are stored in the co_tlbc array on the code object. Threads reserve a globally unique index identifying its copy of the bytecode in all co_tlbc arrays at thread creation and release the index at thread destruction. The first entry in every co_tlbc array always points to the "main" copy of the bytecode that is stored at the end of the code object. This ensures that no bytecode is copied for programs that do not use threads.

Thread-local bytecode can be disabled at runtime by providing either -X tlbc=0 or PYTHON_TLBC=0. Disabling thread-local bytecode also disables specialization.

Concurrent modifications to the bytecode made by the specializing interpreter and instrumentation use atomics, with specialization taking care not to overwrite an instruction that was instrumented concurrently.
This commit is contained in:
mpage 2024-11-04 11:13:32 -08:00 committed by GitHub
parent e5a4b402ae
commit 2e95c5ba3b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
44 changed files with 1510 additions and 255 deletions

View File

@ -72,6 +72,24 @@ typedef struct {
uint8_t *per_instruction_tools;
} _PyCoMonitoringData;
#ifdef Py_GIL_DISABLED
/* Each thread specializes a thread-local copy of the bytecode in free-threaded
* builds. These copies are stored on the code object in a `_PyCodeArray`. The
* first entry in the array always points to the "main" copy of the bytecode
* that is stored at the end of the code object.
*/
typedef struct {
Py_ssize_t size;
char *entries[1];
} _PyCodeArray;
#define _PyCode_DEF_THREAD_LOCAL_BYTECODE() \
_PyCodeArray *co_tlbc;
#else
#define _PyCode_DEF_THREAD_LOCAL_BYTECODE()
#endif
// To avoid repeating ourselves in deepfreeze.py, all PyCodeObject members are
// defined in this macro:
#define _PyCode_DEF(SIZE) { \
@ -138,6 +156,7 @@ typedef struct {
Type is a void* to keep the format private in codeobject.c to force \
people to go through the proper APIs. */ \
void *co_extra; \
_PyCode_DEF_THREAD_LOCAL_BYTECODE() \
char co_code_adaptive[(SIZE)]; \
}

View File

@ -183,6 +183,7 @@ typedef struct PyConfig {
int cpu_count;
#ifdef Py_GIL_DISABLED
int enable_gil;
int tlbc_enabled;
#endif
/* --- Path configuration inputs ------------ */

View File

@ -174,6 +174,18 @@ _PyEval_IsGILEnabled(PyThreadState *tstate)
extern int _PyEval_EnableGILTransient(PyThreadState *tstate);
extern int _PyEval_EnableGILPermanent(PyThreadState *tstate);
extern int _PyEval_DisableGIL(PyThreadState *state);
static inline _Py_CODEUNIT *
_PyEval_GetExecutableCode(PyThreadState *tstate, PyCodeObject *co)
{
_Py_CODEUNIT *bc = _PyCode_GetTLBCFast(tstate, co);
if (bc != NULL) {
return bc;
}
return _PyCode_GetTLBC(co);
}
#endif
extern void _PyEval_DeactivateOpCache(void);

View File

@ -11,6 +11,7 @@ extern "C" {
#include "pycore_stackref.h" // _PyStackRef
#include "pycore_lock.h" // PyMutex
#include "pycore_backoff.h" // _Py_BackoffCounter
#include "pycore_tstate.h" // _PyThreadStateImpl
/* Each instruction in a code object is a fixed-width value,
@ -313,11 +314,17 @@ extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
/** API for executors */
extern void _PyCode_Clear_Executors(PyCodeObject *code);
#ifdef Py_GIL_DISABLED
// gh-115999 tracks progress on addressing this.
#define ENABLE_SPECIALIZATION 0
// Use this to enable specialization families once they are thread-safe. All
// uses will be replaced with ENABLE_SPECIALIZATION once all families are
// thread-safe.
#define ENABLE_SPECIALIZATION_FT 1
#else
#define ENABLE_SPECIALIZATION 1
#define ENABLE_SPECIALIZATION_FT ENABLE_SPECIALIZATION
#endif
/* Specialization functions */
@ -600,6 +607,40 @@ struct _PyCode8 _PyCode_DEF(8);
PyAPI_DATA(const struct _PyCode8) _Py_InitCleanup;
#ifdef Py_GIL_DISABLED
// Return a pointer to the thread-local bytecode for the current thread, if it
// exists.
static inline _Py_CODEUNIT *
_PyCode_GetTLBCFast(PyThreadState *tstate, PyCodeObject *co)
{
_PyCodeArray *code = _Py_atomic_load_ptr_acquire(&co->co_tlbc);
int32_t idx = ((_PyThreadStateImpl*) tstate)->tlbc_index;
if (idx < code->size && code->entries[idx] != NULL) {
return (_Py_CODEUNIT *) code->entries[idx];
}
return NULL;
}
// Return a pointer to the thread-local bytecode for the current thread,
// creating it if necessary.
extern _Py_CODEUNIT *_PyCode_GetTLBC(PyCodeObject *co);
// Reserve an index for the current thread into thread-local bytecode
// arrays
//
// Returns the reserved index or -1 on error.
extern int32_t _Py_ReserveTLBCIndex(PyInterpreterState *interp);
// Release the current thread's index into thread-local bytecode arrays
extern void _Py_ClearTLBCIndex(_PyThreadStateImpl *tstate);
// Free all TLBC copies not associated with live threads.
//
// Returns 0 on success or -1 on error.
extern int _Py_ClearUnusedTLBC(PyInterpreterState *interp);
#endif
#ifdef __cplusplus
}
#endif

View File

@ -68,6 +68,10 @@ typedef struct _PyInterpreterFrame {
PyObject *f_locals; /* Strong reference, may be NULL. Only valid if not on C stack */
PyFrameObject *frame_obj; /* Strong reference, may be NULL. Only valid if not on C stack */
_Py_CODEUNIT *instr_ptr; /* Instruction currently executing (or about to begin) */
#ifdef Py_GIL_DISABLED
/* Index of thread-local bytecode containing instr_ptr. */
int32_t tlbc_index;
#endif
_PyStackRef *stackpointer;
uint16_t return_offset; /* Only relevant during a function call */
char owner;
@ -76,7 +80,7 @@ typedef struct _PyInterpreterFrame {
} _PyInterpreterFrame;
#define _PyInterpreterFrame_LASTI(IF) \
((int)((IF)->instr_ptr - _PyCode_CODE(_PyFrame_GetCode(IF))))
((int)((IF)->instr_ptr - _PyFrame_GetBytecode((IF))))
static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) {
PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
@ -84,6 +88,19 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) {
return (PyCodeObject *)executable;
}
static inline _Py_CODEUNIT *
_PyFrame_GetBytecode(_PyInterpreterFrame *f)
{
#ifdef Py_GIL_DISABLED
PyCodeObject *co = _PyFrame_GetCode(f);
_PyCodeArray *tlbc = _Py_atomic_load_ptr_acquire(&co->co_tlbc);
assert(f->tlbc_index >= 0 && f->tlbc_index < tlbc->size);
return (_Py_CODEUNIT *)tlbc->entries[f->tlbc_index];
#else
return _PyCode_CODE(_PyFrame_GetCode(f));
#endif
}
static inline PyFunctionObject *_PyFrame_GetFunction(_PyInterpreterFrame *f) {
PyObject *func = PyStackRef_AsPyObjectBorrow(f->f_funcobj);
assert(PyFunction_Check(func));
@ -144,13 +161,33 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *
#endif
}
#ifdef Py_GIL_DISABLED
static inline void
_PyFrame_InitializeTLBC(PyThreadState *tstate, _PyInterpreterFrame *frame,
PyCodeObject *code)
{
_Py_CODEUNIT *tlbc = _PyCode_GetTLBCFast(tstate, code);
if (tlbc == NULL) {
// No thread-local bytecode exists for this thread yet; use the main
// thread's copy, deferring thread-local bytecode creation to the
// execution of RESUME.
frame->instr_ptr = _PyCode_CODE(code);
frame->tlbc_index = 0;
}
else {
frame->instr_ptr = tlbc;
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
}
}
#endif
/* Consumes reference to func and locals.
Does not initialize frame->previous, which happens
when frame is linked into the frame stack.
*/
static inline void
_PyFrame_Initialize(
_PyInterpreterFrame *frame, _PyStackRef func,
PyThreadState *tstate, _PyInterpreterFrame *frame, _PyStackRef func,
PyObject *locals, PyCodeObject *code, int null_locals_from, _PyInterpreterFrame *previous)
{
frame->previous = previous;
@ -162,7 +199,12 @@ _PyFrame_Initialize(
frame->f_locals = locals;
frame->stackpointer = frame->localsplus + code->co_nlocalsplus;
frame->frame_obj = NULL;
#ifdef Py_GIL_DISABLED
_PyFrame_InitializeTLBC(tstate, frame, code);
#else
(void)tstate;
frame->instr_ptr = _PyCode_CODE(code);
#endif
frame->return_offset = 0;
frame->owner = FRAME_OWNED_BY_THREAD;
@ -224,7 +266,8 @@ _PyFrame_IsIncomplete(_PyInterpreterFrame *frame)
return true;
}
return frame->owner != FRAME_OWNED_BY_GENERATOR &&
frame->instr_ptr < _PyCode_CODE(_PyFrame_GetCode(frame)) + _PyFrame_GetCode(frame)->_co_firsttraceable;
frame->instr_ptr < _PyFrame_GetBytecode(frame) +
_PyFrame_GetCode(frame)->_co_firsttraceable;
}
static inline _PyInterpreterFrame *
@ -315,7 +358,8 @@ _PyFrame_PushUnchecked(PyThreadState *tstate, _PyStackRef func, int null_locals_
_PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top;
tstate->datastack_top += code->co_framesize;
assert(tstate->datastack_top < tstate->datastack_limit);
_PyFrame_Initialize(new_frame, func, NULL, code, null_locals_from, previous);
_PyFrame_Initialize(tstate, new_frame, func, NULL, code, null_locals_from,
previous);
return new_frame;
}
@ -339,7 +383,11 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
assert(stackdepth <= code->co_stacksize);
frame->stackpointer = frame->localsplus + code->co_nlocalsplus + stackdepth;
frame->frame_obj = NULL;
#ifdef Py_GIL_DISABLED
_PyFrame_InitializeTLBC(tstate, frame, code);
#else
frame->instr_ptr = _PyCode_CODE(code);
#endif
frame->owner = FRAME_OWNED_BY_THREAD;
frame->return_offset = 0;

View File

@ -389,6 +389,10 @@ extern int _PyGC_VisitStackRef(union _PyStackRef *ref, visitproc visit, void *ar
} \
} while (0)
#ifdef Py_GIL_DISABLED
extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
gcvisitobjects_t callback, void *arg);
#endif
#ifdef __cplusplus
}

View File

@ -0,0 +1,56 @@
#ifndef Py_INTERNAL_INDEX_POOL_H
#define Py_INTERNAL_INDEX_POOL_H
#include "Python.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
#ifdef Py_GIL_DISABLED
// This contains code for allocating unique indices in an array. It is used by
// the free-threaded build to assign each thread a globally unique index into
// each code object's thread-local bytecode array.
// A min-heap of indices
typedef struct _PyIndexHeap {
int32_t *values;
// Number of items stored in values
Py_ssize_t size;
// Maximum number of items that can be stored in values
Py_ssize_t capacity;
} _PyIndexHeap;
// An unbounded pool of indices. Indices are allocated starting from 0. They
// may be released back to the pool once they are no longer in use.
typedef struct _PyIndexPool {
PyMutex mutex;
// Min heap of indices available for allocation
_PyIndexHeap free_indices;
// Next index to allocate if no free indices are available
int32_t next_index;
} _PyIndexPool;
// Allocate the smallest available index. Returns -1 on error.
extern int32_t _PyIndexPool_AllocIndex(_PyIndexPool *indices);
// Release `index` back to the pool
extern void _PyIndexPool_FreeIndex(_PyIndexPool *indices, int32_t index);
extern void _PyIndexPool_Fini(_PyIndexPool *indices);
#endif // Py_GIL_DISABLED
#ifdef __cplusplus
}
#endif
#endif // !Py_INTERNAL_INDEX_POOL_H

View File

@ -26,6 +26,7 @@ extern "C" {
#include "pycore_genobject.h" // _PyGen_FetchStopIterationValue
#include "pycore_global_objects.h"// struct _Py_interp_cached_objects
#include "pycore_import.h" // struct _import_state
#include "pycore_index_pool.h" // _PyIndexPool
#include "pycore_instruments.h" // _PY_MONITORING_EVENTS
#include "pycore_list.h" // struct _Py_list_state
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
@ -222,6 +223,7 @@ struct _is {
struct _brc_state brc; // biased reference counting state
struct _Py_unique_id_pool unique_ids; // object ids for per-thread refcounts
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
_PyIndexPool tlbc_indices;
#endif
// Per-interpreter state for the obmalloc allocator. For the main

View File

@ -42,6 +42,9 @@ typedef struct _PyThreadStateImpl {
int is_finalized;
} refcounts;
// Index to use to retrieve thread-local bytecode for this thread
int32_t tlbc_index;
// When >1, code objects do not immortalize their non-string constants.
int suppress_co_const_immortalization;
#endif
@ -52,7 +55,6 @@ typedef struct _PyThreadStateImpl {
} _PyThreadStateImpl;
#ifdef __cplusplus
}
#endif

View File

@ -193,106 +193,107 @@ extern "C" {
#define _LOAD_ATTR_SLOT_1 423
#define _LOAD_ATTR_WITH_HINT 424
#define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
#define _LOAD_BYTECODE 425
#define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
#define _LOAD_CONST LOAD_CONST
#define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL
#define _LOAD_CONST_INLINE 425
#define _LOAD_CONST_INLINE_BORROW 426
#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 427
#define _LOAD_CONST_INLINE_WITH_NULL 428
#define _LOAD_CONST_INLINE 426
#define _LOAD_CONST_INLINE_BORROW 427
#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 428
#define _LOAD_CONST_INLINE_WITH_NULL 429
#define _LOAD_DEREF LOAD_DEREF
#define _LOAD_FAST 429
#define _LOAD_FAST_0 430
#define _LOAD_FAST_1 431
#define _LOAD_FAST_2 432
#define _LOAD_FAST_3 433
#define _LOAD_FAST_4 434
#define _LOAD_FAST_5 435
#define _LOAD_FAST_6 436
#define _LOAD_FAST_7 437
#define _LOAD_FAST 430
#define _LOAD_FAST_0 431
#define _LOAD_FAST_1 432
#define _LOAD_FAST_2 433
#define _LOAD_FAST_3 434
#define _LOAD_FAST_4 435
#define _LOAD_FAST_5 436
#define _LOAD_FAST_6 437
#define _LOAD_FAST_7 438
#define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
#define _LOAD_FAST_CHECK LOAD_FAST_CHECK
#define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
#define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
#define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
#define _LOAD_GLOBAL 438
#define _LOAD_GLOBAL_BUILTINS 439
#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 440
#define _LOAD_GLOBAL_MODULE 441
#define _LOAD_GLOBAL_MODULE_FROM_KEYS 442
#define _LOAD_GLOBAL 439
#define _LOAD_GLOBAL_BUILTINS 440
#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 441
#define _LOAD_GLOBAL_MODULE 442
#define _LOAD_GLOBAL_MODULE_FROM_KEYS 443
#define _LOAD_LOCALS LOAD_LOCALS
#define _LOAD_NAME LOAD_NAME
#define _LOAD_SMALL_INT 443
#define _LOAD_SMALL_INT_0 444
#define _LOAD_SMALL_INT_1 445
#define _LOAD_SMALL_INT_2 446
#define _LOAD_SMALL_INT_3 447
#define _LOAD_SMALL_INT 444
#define _LOAD_SMALL_INT_0 445
#define _LOAD_SMALL_INT_1 446
#define _LOAD_SMALL_INT_2 447
#define _LOAD_SMALL_INT_3 448
#define _LOAD_SPECIAL LOAD_SPECIAL
#define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
#define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
#define _MAKE_CALLARGS_A_TUPLE 448
#define _MAKE_CALLARGS_A_TUPLE 449
#define _MAKE_CELL MAKE_CELL
#define _MAKE_FUNCTION MAKE_FUNCTION
#define _MAKE_WARM 449
#define _MAKE_WARM 450
#define _MAP_ADD MAP_ADD
#define _MATCH_CLASS MATCH_CLASS
#define _MATCH_KEYS MATCH_KEYS
#define _MATCH_MAPPING MATCH_MAPPING
#define _MATCH_SEQUENCE MATCH_SEQUENCE
#define _MAYBE_EXPAND_METHOD 450
#define _MAYBE_EXPAND_METHOD_KW 451
#define _MONITOR_CALL 452
#define _MONITOR_JUMP_BACKWARD 453
#define _MONITOR_RESUME 454
#define _MAYBE_EXPAND_METHOD 451
#define _MAYBE_EXPAND_METHOD_KW 452
#define _MONITOR_CALL 453
#define _MONITOR_JUMP_BACKWARD 454
#define _MONITOR_RESUME 455
#define _NOP NOP
#define _POP_EXCEPT POP_EXCEPT
#define _POP_JUMP_IF_FALSE 455
#define _POP_JUMP_IF_TRUE 456
#define _POP_JUMP_IF_FALSE 456
#define _POP_JUMP_IF_TRUE 457
#define _POP_TOP POP_TOP
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 457
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 458
#define _PUSH_EXC_INFO PUSH_EXC_INFO
#define _PUSH_FRAME 458
#define _PUSH_FRAME 459
#define _PUSH_NULL PUSH_NULL
#define _PY_FRAME_GENERAL 459
#define _PY_FRAME_KW 460
#define _QUICKEN_RESUME 461
#define _REPLACE_WITH_TRUE 462
#define _PY_FRAME_GENERAL 460
#define _PY_FRAME_KW 461
#define _QUICKEN_RESUME 462
#define _REPLACE_WITH_TRUE 463
#define _RESUME_CHECK RESUME_CHECK
#define _RETURN_GENERATOR RETURN_GENERATOR
#define _RETURN_VALUE RETURN_VALUE
#define _SAVE_RETURN_OFFSET 463
#define _SEND 464
#define _SEND_GEN_FRAME 465
#define _SAVE_RETURN_OFFSET 464
#define _SEND 465
#define _SEND_GEN_FRAME 466
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
#define _SET_ADD SET_ADD
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
#define _SET_UPDATE SET_UPDATE
#define _START_EXECUTOR 466
#define _STORE_ATTR 467
#define _STORE_ATTR_INSTANCE_VALUE 468
#define _STORE_ATTR_SLOT 469
#define _STORE_ATTR_WITH_HINT 470
#define _START_EXECUTOR 467
#define _STORE_ATTR 468
#define _STORE_ATTR_INSTANCE_VALUE 469
#define _STORE_ATTR_SLOT 470
#define _STORE_ATTR_WITH_HINT 471
#define _STORE_DEREF STORE_DEREF
#define _STORE_FAST 471
#define _STORE_FAST_0 472
#define _STORE_FAST_1 473
#define _STORE_FAST_2 474
#define _STORE_FAST_3 475
#define _STORE_FAST_4 476
#define _STORE_FAST_5 477
#define _STORE_FAST_6 478
#define _STORE_FAST_7 479
#define _STORE_FAST 472
#define _STORE_FAST_0 473
#define _STORE_FAST_1 474
#define _STORE_FAST_2 475
#define _STORE_FAST_3 476
#define _STORE_FAST_4 477
#define _STORE_FAST_5 478
#define _STORE_FAST_6 479
#define _STORE_FAST_7 480
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
#define _STORE_GLOBAL STORE_GLOBAL
#define _STORE_NAME STORE_NAME
#define _STORE_SLICE 480
#define _STORE_SUBSCR 481
#define _STORE_SLICE 481
#define _STORE_SUBSCR 482
#define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
#define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
#define _SWAP SWAP
#define _TIER2_RESUME_CHECK 482
#define _TO_BOOL 483
#define _TIER2_RESUME_CHECK 483
#define _TO_BOOL 484
#define _TO_BOOL_BOOL TO_BOOL_BOOL
#define _TO_BOOL_INT TO_BOOL_INT
#define _TO_BOOL_LIST TO_BOOL_LIST
@ -302,13 +303,13 @@ extern "C" {
#define _UNARY_NEGATIVE UNARY_NEGATIVE
#define _UNARY_NOT UNARY_NOT
#define _UNPACK_EX UNPACK_EX
#define _UNPACK_SEQUENCE 484
#define _UNPACK_SEQUENCE 485
#define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
#define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
#define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
#define _WITH_EXCEPT_START WITH_EXCEPT_START
#define _YIELD_VALUE YIELD_VALUE
#define MAX_UOP_ID 484
#define MAX_UOP_ID 485
#ifdef __cplusplus
}

View File

@ -289,7 +289,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_FATAL_ERROR] = 0,
[_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG,
[_DEOPT] = 0,
[_ERROR_POP_N] = HAS_ARG_FLAG,
[_ERROR_POP_N] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG,
[_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG,
};

View File

@ -1274,6 +1274,11 @@ def requires_specialization(test):
_opcode.ENABLE_SPECIALIZATION, "requires specialization")(test)
def requires_specialization_ft(test):
return unittest.skipUnless(
_opcode.ENABLE_SPECIALIZATION_FT, "requires specialization")(test)
#=======================================================================
# Check for the presence of docstrings.

View File

@ -100,6 +100,7 @@ class CAPITests(unittest.TestCase):
options.append(("run_presite", str | None, None))
if sysconfig.get_config_var('Py_GIL_DISABLED'):
options.append(("enable_gil", int, None))
options.append(("tlbc_enabled", int, None))
if support.MS_WINDOWS:
options.extend((
("legacy_windows_stdio", bool, None),

View File

@ -7,7 +7,8 @@ import os
import _opcode
from test.support import script_helper, requires_specialization, import_helper
from test.support import (script_helper, requires_specialization,
import_helper, Py_GIL_DISABLED)
_testinternalcapi = import_helper.import_module("_testinternalcapi")
@ -34,6 +35,7 @@ def clear_executors(func):
@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"),
"Requires optimizer infrastructure")
class TestOptimizerAPI(unittest.TestCase):
@ -138,6 +140,7 @@ def get_opnames(ex):
@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"),
"Requires optimizer infrastructure")
class TestExecutorInvalidation(unittest.TestCase):
@ -219,6 +222,7 @@ class TestExecutorInvalidation(unittest.TestCase):
@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"),
"Requires optimizer infrastructure")
@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.")
@ -586,6 +590,7 @@ class TestUops(unittest.TestCase):
@requires_specialization
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"),
"Requires optimizer infrastructure")
@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.")

View File

@ -12,6 +12,7 @@ import unittest
from test import support
from test.support import os_helper
from test.support import force_not_colorized
from test.support import threading_helper
from test.support.script_helper import (
spawn_python, kill_python, assert_python_ok, assert_python_failure,
interpreter_requires_environment
@ -1068,6 +1069,57 @@ class CmdLineTest(unittest.TestCase):
out = res.out.strip().decode("utf-8")
return tuple(int(i) for i in out.split())
@unittest.skipUnless(support.Py_GIL_DISABLED,
"PYTHON_TLBC and -X tlbc"
" only supported in Py_GIL_DISABLED builds")
@threading_helper.requires_working_threading()
def test_disable_thread_local_bytecode(self):
code = """if 1:
import threading
def test(x, y):
return x + y
t = threading.Thread(target=test, args=(1,2))
t.start()
t.join()"""
assert_python_ok("-W", "always", "-X", "tlbc=0", "-c", code)
assert_python_ok("-W", "always", "-c", code, PYTHON_TLBC="0")
@unittest.skipUnless(support.Py_GIL_DISABLED,
"PYTHON_TLBC and -X tlbc"
" only supported in Py_GIL_DISABLED builds")
@threading_helper.requires_working_threading()
def test_enable_thread_local_bytecode(self):
code = """if 1:
import threading
def test(x, y):
return x + y
t = threading.Thread(target=test, args=(1,2))
t.start()
t.join()"""
# The functionality of thread-local bytecode is tested more extensively
# in test_thread_local_bytecode
assert_python_ok("-W", "always", "-X", "tlbc=1", "-c", code)
assert_python_ok("-W", "always", "-c", code, PYTHON_TLBC="1")
@unittest.skipUnless(support.Py_GIL_DISABLED,
"PYTHON_TLBC and -X tlbc"
" only supported in Py_GIL_DISABLED builds")
def test_invalid_thread_local_bytecode(self):
rc, out, err = assert_python_failure("-X", "tlbc")
self.assertIn(b"tlbc=n: n is missing or invalid", err)
rc, out, err = assert_python_failure("-X", "tlbc=foo")
self.assertIn(b"tlbc=n: n is missing or invalid", err)
rc, out, err = assert_python_failure("-X", "tlbc=-1")
self.assertIn(b"tlbc=n: n is missing or invalid", err)
rc, out, err = assert_python_failure("-X", "tlbc=2")
self.assertIn(b"tlbc=n: n is missing or invalid", err)
rc, out, err = assert_python_failure(PYTHON_TLBC="foo")
self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err)
rc, out, err = assert_python_failure(PYTHON_TLBC="-1")
self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err)
rc, out, err = assert_python_failure(PYTHON_TLBC="2")
self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err)
@unittest.skipIf(interpreter_requires_environment(),
'Cannot run -I tests when PYTHON env vars are required.')

View File

@ -10,7 +10,8 @@ import sys
import types
import unittest
from test.support import (captured_stdout, requires_debug_ranges,
requires_specialization, cpython_only)
requires_specialization, requires_specialization_ft,
cpython_only)
from test.support.bytecode_helper import BytecodeTestCase
import opcode
@ -1261,7 +1262,7 @@ class DisTests(DisTestBase):
self.do_disassembly_compare(got, dis_load_test_quickened_code)
@cpython_only
@requires_specialization
@requires_specialization_ft
def test_binary_specialize(self):
binary_op_quicken = """\
0 RESUME_CHECK 0
@ -1281,6 +1282,9 @@ class DisTests(DisTestBase):
got = self.get_disassembly(co_unicode, adaptive=True)
self.do_disassembly_compare(got, binary_op_quicken % "BINARY_OP_ADD_UNICODE 0 (+)")
@cpython_only
@requires_specialization
def test_binary_subscr_specialize(self):
binary_subscr_quicken = """\
0 RESUME_CHECK 0

View File

@ -644,6 +644,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
CONFIG_COMPAT['run_presite'] = None
if support.Py_GIL_DISABLED:
CONFIG_COMPAT['enable_gil'] = -1
CONFIG_COMPAT['tlbc_enabled'] = GET_DEFAULT_CONFIG
if MS_WINDOWS:
CONFIG_COMPAT.update({
'legacy_windows_stdio': False,

View File

@ -1094,7 +1094,14 @@ class SysModuleTest(unittest.TestCase):
# While we could imagine a Python session where the number of
# multiple buffer objects would exceed the sharing of references,
# it is unlikely to happen in a normal test run.
self.assertLess(a, sys.gettotalrefcount())
#
# In free-threaded builds each code object owns an array of
# pointers to copies of the bytecode. When the number of
# code objects is a large fraction of the total number of
# references, this can cause the total number of allocated
# blocks to exceed the total number of references.
if not support.Py_GIL_DISABLED:
self.assertLess(a, sys.gettotalrefcount())
except AttributeError:
# gettotalrefcount() not available
pass
@ -1613,7 +1620,10 @@ class SizeofTest(unittest.TestCase):
def func():
return sys._getframe()
x = func()
INTERPRETER_FRAME = '9PhcP'
if support.Py_GIL_DISABLED:
INTERPRETER_FRAME = '10PhcP'
else:
INTERPRETER_FRAME = '9PhcP'
check(x, size('3PiccPP' + INTERPRETER_FRAME + 'P'))
# function
def func(): pass

View File

@ -0,0 +1,198 @@
"""Tests for thread-local bytecode."""
import dis
import textwrap
import unittest
from test import support
from test.support import cpython_only, import_helper, requires_specialization_ft
from test.support.script_helper import assert_python_ok
from test.support.threading_helper import requires_working_threading
# Skip this test if the _testinternalcapi module isn't available
_testinternalcapi = import_helper.import_module("_testinternalcapi")
@cpython_only
@requires_working_threading()
@unittest.skipUnless(support.Py_GIL_DISABLED, "only in free-threaded builds")
class TLBCTests(unittest.TestCase):
@requires_specialization_ft
def test_new_threads_start_with_unspecialized_code(self):
code = textwrap.dedent("""
import dis
import queue
import threading
from _testinternalcapi import get_tlbc
def all_opnames(bc):
return {i.opname for i in dis._get_instructions_bytes(bc)}
def f(a, b, q=None):
if q is not None:
q.put(get_tlbc(f))
return a + b
for _ in range(100):
# specialize
f(1, 2)
q = queue.Queue()
t = threading.Thread(target=f, args=('a', 'b', q))
t.start()
t.join()
assert "BINARY_OP_ADD_INT" in all_opnames(get_tlbc(f))
assert "BINARY_OP_ADD_INT" not in all_opnames(q.get())
""")
assert_python_ok("-X", "tlbc=1", "-c", code)
@requires_specialization_ft
def test_threads_specialize_independently(self):
code = textwrap.dedent("""
import dis
import queue
import threading
from _testinternalcapi import get_tlbc
def all_opnames(bc):
return {i.opname for i in dis._get_instructions_bytes(bc)}
def f(a, b):
return a + b
def g(a, b, q=None):
for _ in range(100):
f(a, b)
if q is not None:
q.put(get_tlbc(f))
# specialize in main thread
g(1, 2)
# specialize in other thread
q = queue.Queue()
t = threading.Thread(target=g, args=('a', 'b', q))
t.start()
t.join()
assert "BINARY_OP_ADD_INT" in all_opnames(get_tlbc(f))
t_opnames = all_opnames(q.get())
assert "BINARY_OP_ADD_INT" not in t_opnames
assert "BINARY_OP_ADD_UNICODE" in t_opnames
""")
assert_python_ok("-X", "tlbc=1", "-c", code)
def test_reuse_tlbc_across_threads_different_lifetimes(self):
code = textwrap.dedent("""
import queue
import threading
from _testinternalcapi import get_tlbc_id
def f(a, b, q=None):
if q is not None:
q.put(get_tlbc_id(f))
return a + b
q = queue.Queue()
tlbc_ids = []
for _ in range(3):
t = threading.Thread(target=f, args=('a', 'b', q))
t.start()
t.join()
tlbc_ids.append(q.get())
assert tlbc_ids[0] == tlbc_ids[1]
assert tlbc_ids[1] == tlbc_ids[2]
""")
assert_python_ok("-X", "tlbc=1", "-c", code)
def test_no_copies_if_tlbc_disabled(self):
code = textwrap.dedent("""
import queue
import threading
from _testinternalcapi import get_tlbc_id
def f(a, b, q=None):
if q is not None:
q.put(get_tlbc_id(f))
return a + b
q = queue.Queue()
threads = []
for _ in range(3):
t = threading.Thread(target=f, args=('a', 'b', q))
t.start()
threads.append(t)
tlbc_ids = []
for t in threads:
t.join()
tlbc_ids.append(q.get())
main_tlbc_id = get_tlbc_id(f)
assert main_tlbc_id is not None
assert tlbc_ids[0] == main_tlbc_id
assert tlbc_ids[1] == main_tlbc_id
assert tlbc_ids[2] == main_tlbc_id
""")
assert_python_ok("-X", "tlbc=0", "-c", code)
def test_no_specialization_if_tlbc_disabled(self):
code = textwrap.dedent("""
import dis
import queue
import threading
from _testinternalcapi import get_tlbc
def all_opnames(f):
bc = get_tlbc(f)
return {i.opname for i in dis._get_instructions_bytes(bc)}
def f(a, b):
return a + b
for _ in range(100):
f(1, 2)
assert "BINARY_OP_ADD_INT" not in all_opnames(f)
""")
assert_python_ok("-X", "tlbc=0", "-c", code)
def test_generator_throw(self):
code = textwrap.dedent("""
import queue
import threading
from _testinternalcapi import get_tlbc_id
def g():
try:
yield
except:
yield get_tlbc_id(g)
def f(q):
gen = g()
next(gen)
q.put(gen.throw(ValueError))
q = queue.Queue()
t = threading.Thread(target=f, args=(q,))
t.start()
t.join()
gen = g()
next(gen)
main_id = gen.throw(ValueError)
assert main_id != q.get()
""")
assert_python_ok("-X", "tlbc=1", "-c", code)
if __name__ == "__main__":
unittest.main()

View File

@ -460,6 +460,7 @@ PYTHON_OBJS= \
Python/hashtable.o \
Python/import.o \
Python/importdl.o \
Python/index_pool.o \
Python/initconfig.o \
Python/interpconfig.o \
Python/instrumentation.o \
@ -1228,6 +1229,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_hashtable.h \
$(srcdir)/Include/internal/pycore_import.h \
$(srcdir)/Include/internal/pycore_importdl.h \
$(srcdir)/Include/internal/pycore_index_pool.h \
$(srcdir)/Include/internal/pycore_initconfig.h \
$(srcdir)/Include/internal/pycore_instruments.h \
$(srcdir)/Include/internal/pycore_instruction_sequence.h \

View File

@ -422,6 +422,9 @@ _opcode_exec(PyObject *m) {
if (PyModule_AddIntMacro(m, ENABLE_SPECIALIZATION) < 0) {
return -1;
}
if (PyModule_AddIntMacro(m, ENABLE_SPECIALIZATION_FT) < 0) {
return -1;
}
return 0;
}

View File

@ -14,6 +14,7 @@
#include "pycore_bitutils.h" // _Py_bswap32()
#include "pycore_bytesobject.h" // _PyBytes_Find()
#include "pycore_ceval.h" // _PyEval_AddPendingCall()
#include "pycore_code.h" // _PyCode_GetTLBCFast()
#include "pycore_compile.h" // _PyCompile_CodeGen()
#include "pycore_context.h" // _PyContext_NewHamtForTests()
#include "pycore_dict.h" // _PyManagedDictPointer_GetValues()
@ -1963,6 +1964,48 @@ get_py_thread_id(PyObject *self, PyObject *Py_UNUSED(ignored))
Py_BUILD_ASSERT(sizeof(unsigned long long) >= sizeof(tid));
return PyLong_FromUnsignedLongLong(tid);
}
static PyCodeObject *
get_code(PyObject *obj)
{
if (PyCode_Check(obj)) {
return (PyCodeObject *)obj;
}
else if (PyFunction_Check(obj)) {
return (PyCodeObject *)PyFunction_GetCode(obj);
}
return (PyCodeObject *)PyErr_Format(
PyExc_TypeError, "expected function or code object, got %s",
Py_TYPE(obj)->tp_name);
}
static PyObject *
get_tlbc(PyObject *Py_UNUSED(module), PyObject *obj)
{
PyCodeObject *code = get_code(obj);
if (code == NULL) {
return NULL;
}
_Py_CODEUNIT *bc = _PyCode_GetTLBCFast(PyThreadState_GET(), code);
if (bc == NULL) {
Py_RETURN_NONE;
}
return PyBytes_FromStringAndSize((const char *)bc, _PyCode_NBYTES(code));
}
static PyObject *
get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj)
{
PyCodeObject *code = get_code(obj);
if (code == NULL) {
return NULL;
}
_Py_CODEUNIT *bc = _PyCode_GetTLBCFast(PyThreadState_GET(), code);
if (bc == NULL) {
Py_RETURN_NONE;
}
return PyLong_FromVoidPtr(bc);
}
#endif
static PyObject *
@ -2022,7 +2065,6 @@ identify_type_slot_wrappers(PyObject *self, PyObject *Py_UNUSED(ignored))
return _PyType_GetSlotWrapperNames();
}
static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@ -2110,6 +2152,8 @@ static PyMethodDef module_functions[] = {
#ifdef Py_GIL_DISABLED
{"py_thread_id", get_py_thread_id, METH_NOARGS},
{"get_tlbc", get_tlbc, METH_O, NULL},
{"get_tlbc_id", get_tlbc_id, METH_O, NULL},
#endif
#ifdef _Py_TIER2
{"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},

View File

@ -6,17 +6,22 @@
#include "pycore_code.h" // _PyCodeConstructor
#include "pycore_frame.h" // FRAME_SPECIALS_SIZE
#include "pycore_hashtable.h" // _Py_hashtable_t
#include "pycore_index_pool.h" // _PyIndexPool
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interp.h" // PyInterpreterState.co_extra_freefuncs
#include "pycore_object.h" // _PyObject_SetDeferredRefcount
#include "pycore_object_stack.h"
#include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches
#include "pycore_opcode_utils.h" // RESUME_AT_FUNC_START
#include "pycore_pymem.h" // _PyMem_FreeDelayed
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_uniqueid.h" // _PyObject_AssignUniqueId()
#include "clinic/codeobject.c.h"
#define INITIAL_SPECIALIZED_CODE_SIZE 16
static const char *
code_event_name(PyCodeEvent event) {
switch (event) {
@ -440,9 +445,15 @@ _PyCode_Validate(struct _PyCodeConstructor *con)
return 0;
}
extern void _PyCode_Quicken(PyCodeObject *code);
extern void
_PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts,
int enable_counters);
static void
#ifdef Py_GIL_DISABLED
static _PyCodeArray * _PyCodeArray_New(Py_ssize_t size);
#endif
static int
init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
{
int nlocalsplus = (int)PyTuple_GET_SIZE(con->localsplusnames);
@ -505,14 +516,27 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
memcpy(_PyCode_CODE(co), PyBytes_AS_STRING(con->code),
PyBytes_GET_SIZE(con->code));
#ifdef Py_GIL_DISABLED
co->co_tlbc = _PyCodeArray_New(INITIAL_SPECIALIZED_CODE_SIZE);
if (co->co_tlbc == NULL) {
return -1;
}
co->co_tlbc->entries[0] = co->co_code_adaptive;
#endif
int entry_point = 0;
while (entry_point < Py_SIZE(co) &&
_PyCode_CODE(co)[entry_point].op.code != RESUME) {
entry_point++;
}
co->_co_firsttraceable = entry_point;
_PyCode_Quicken(co);
#ifdef Py_GIL_DISABLED
_PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), co->co_consts,
interp->config.tlbc_enabled);
#else
_PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), co->co_consts, 1);
#endif
notify_code_watchers(PY_CODE_EVENT_CREATE, co);
return 0;
}
static int
@ -667,7 +691,12 @@ _PyCode_New(struct _PyCodeConstructor *con)
PyErr_NoMemory();
return NULL;
}
init_code(co, con);
if (init_code(co, con) < 0) {
Py_DECREF(co);
return NULL;
}
#ifdef Py_GIL_DISABLED
co->_co_unique_id = _PyObject_AssignUniqueId((PyObject *)co);
_PyObject_GC_TRACK(co);
@ -1871,6 +1900,17 @@ code_dealloc(PyCodeObject *co)
PyObject_ClearWeakRefs((PyObject*)co);
}
free_monitoring_data(co->_co_monitoring);
#ifdef Py_GIL_DISABLED
// The first element always points to the mutable bytecode at the end of
// the code object, which will be freed when the code object is freed.
for (Py_ssize_t i = 1; i < co->co_tlbc->size; i++) {
char *entry = co->co_tlbc->entries[i];
if (entry != NULL) {
PyMem_Free(entry);
}
}
PyMem_Free(co->co_tlbc);
#endif
PyObject_Free(co);
}
@ -2646,5 +2686,270 @@ _PyCode_Fini(PyInterpreterState *interp)
_Py_hashtable_destroy(state->constants);
state->constants = NULL;
}
_PyIndexPool_Fini(&interp->tlbc_indices);
#endif
}
#ifdef Py_GIL_DISABLED
// Thread-local bytecode (TLBC)
//
// Each thread specializes a thread-local copy of the bytecode, created on the
// first RESUME, in free-threaded builds. All copies of the bytecode for a code
// object are stored in the `co_tlbc` array. Threads reserve a globally unique
// index identifying its copy of the bytecode in all `co_tlbc` arrays at thread
// creation and release the index at thread destruction. The first entry in
// every `co_tlbc` array always points to the "main" copy of the bytecode that
// is stored at the end of the code object. This ensures that no bytecode is
// copied for programs that do not use threads.
//
// Thread-local bytecode can be disabled at runtime by providing either `-X
// tlbc=0` or `PYTHON_TLBC=0`. Disabling thread-local bytecode also disables
// specialization. All threads share the main copy of the bytecode when
// thread-local bytecode is disabled.
//
// Concurrent modifications to the bytecode made by the specializing
// interpreter and instrumentation use atomics, with specialization taking care
// not to overwrite an instruction that was instrumented concurrently.
int32_t
_Py_ReserveTLBCIndex(PyInterpreterState *interp)
{
if (interp->config.tlbc_enabled) {
return _PyIndexPool_AllocIndex(&interp->tlbc_indices);
}
// All threads share the main copy of the bytecode when TLBC is disabled
return 0;
}
void
_Py_ClearTLBCIndex(_PyThreadStateImpl *tstate)
{
PyInterpreterState *interp = ((PyThreadState *)tstate)->interp;
if (interp->config.tlbc_enabled) {
_PyIndexPool_FreeIndex(&interp->tlbc_indices, tstate->tlbc_index);
}
}
static _PyCodeArray *
_PyCodeArray_New(Py_ssize_t size)
{
_PyCodeArray *arr = PyMem_Calloc(
1, offsetof(_PyCodeArray, entries) + sizeof(void *) * size);
if (arr == NULL) {
PyErr_NoMemory();
return NULL;
}
arr->size = size;
return arr;
}
static void
copy_code(_Py_CODEUNIT *dst, PyCodeObject *co)
{
int code_len = (int) Py_SIZE(co);
for (int i = 0; i < code_len; i += _PyInstruction_GetLength(co, i)) {
dst[i] = _Py_GetBaseCodeUnit(co, i);
}
_PyCode_Quicken(dst, code_len, co->co_consts, 1);
}
static Py_ssize_t
get_pow2_greater(Py_ssize_t initial, Py_ssize_t limit)
{
// initial must be a power of two
assert(!(initial & (initial - 1)));
Py_ssize_t res = initial;
while (res && res < limit) {
res <<= 1;
}
return res;
}
static _Py_CODEUNIT *
create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx)
{
_PyCodeArray *tlbc = co->co_tlbc;
if (idx >= tlbc->size) {
Py_ssize_t new_size = get_pow2_greater(tlbc->size, idx + 1);
if (!new_size) {
PyErr_NoMemory();
return NULL;
}
_PyCodeArray *new_tlbc = _PyCodeArray_New(new_size);
if (new_tlbc == NULL) {
return NULL;
}
memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *));
_Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc);
_PyMem_FreeDelayed(tlbc);
tlbc = new_tlbc;
}
char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co));
if (bc == NULL) {
PyErr_NoMemory();
return NULL;
}
copy_code((_Py_CODEUNIT *) bc, co);
assert(tlbc->entries[idx] == NULL);
tlbc->entries[idx] = bc;
return (_Py_CODEUNIT *) bc;
}
static _Py_CODEUNIT *
get_tlbc_lock_held(PyCodeObject *co)
{
_PyCodeArray *tlbc = co->co_tlbc;
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
int32_t idx = tstate->tlbc_index;
if (idx < tlbc->size && tlbc->entries[idx] != NULL) {
return (_Py_CODEUNIT *)tlbc->entries[idx];
}
return create_tlbc_lock_held(co, idx);
}
_Py_CODEUNIT *
_PyCode_GetTLBC(PyCodeObject *co)
{
_Py_CODEUNIT *result;
Py_BEGIN_CRITICAL_SECTION(co);
result = get_tlbc_lock_held(co);
Py_END_CRITICAL_SECTION();
return result;
}
// My kingdom for a bitset
struct flag_set {
uint8_t *flags;
Py_ssize_t size;
};
static inline int
flag_is_set(struct flag_set *flags, Py_ssize_t idx)
{
assert(idx >= 0);
return (idx < flags->size) && flags->flags[idx];
}
// Set the flag for each tlbc index in use
static int
get_indices_in_use(PyInterpreterState *interp, struct flag_set *in_use)
{
assert(interp->stoptheworld.world_stopped);
assert(in_use->flags == NULL);
int32_t max_index = 0;
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
int32_t idx = ((_PyThreadStateImpl *) p)->tlbc_index;
if (idx > max_index) {
max_index = idx;
}
}
in_use->size = (size_t) max_index + 1;
in_use->flags = PyMem_Calloc(in_use->size, sizeof(*in_use->flags));
if (in_use->flags == NULL) {
return -1;
}
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
in_use->flags[((_PyThreadStateImpl *) p)->tlbc_index] = 1;
}
return 0;
}
struct get_code_args {
_PyObjectStack code_objs;
struct flag_set indices_in_use;
int err;
};
static void
clear_get_code_args(struct get_code_args *args)
{
if (args->indices_in_use.flags != NULL) {
PyMem_Free(args->indices_in_use.flags);
args->indices_in_use.flags = NULL;
}
_PyObjectStack_Clear(&args->code_objs);
}
static inline int
is_bytecode_unused(_PyCodeArray *tlbc, Py_ssize_t idx,
struct flag_set *indices_in_use)
{
assert(idx > 0 && idx < tlbc->size);
return tlbc->entries[idx] != NULL && !flag_is_set(indices_in_use, idx);
}
static int
get_code_with_unused_tlbc(PyObject *obj, struct get_code_args *args)
{
if (!PyCode_Check(obj)) {
return 1;
}
PyCodeObject *co = (PyCodeObject *) obj;
_PyCodeArray *tlbc = co->co_tlbc;
// The first index always points at the main copy of the bytecode embedded
// in the code object.
for (Py_ssize_t i = 1; i < tlbc->size; i++) {
if (is_bytecode_unused(tlbc, i, &args->indices_in_use)) {
if (_PyObjectStack_Push(&args->code_objs, obj) < 0) {
args->err = -1;
return 0;
}
return 1;
}
}
return 1;
}
static void
free_unused_bytecode(PyCodeObject *co, struct flag_set *indices_in_use)
{
_PyCodeArray *tlbc = co->co_tlbc;
// The first index always points at the main copy of the bytecode embedded
// in the code object.
for (Py_ssize_t i = 1; i < tlbc->size; i++) {
if (is_bytecode_unused(tlbc, i, indices_in_use)) {
PyMem_Free(tlbc->entries[i]);
tlbc->entries[i] = NULL;
}
}
}
int
_Py_ClearUnusedTLBC(PyInterpreterState *interp)
{
struct get_code_args args = {
.code_objs = {NULL},
.indices_in_use = {NULL, 0},
.err = 0,
};
_PyEval_StopTheWorld(interp);
// Collect in-use tlbc indices
if (get_indices_in_use(interp, &args.indices_in_use) < 0) {
goto err;
}
// Collect code objects that have bytecode not in use by any thread
_PyGC_VisitObjectsWorldStopped(
interp, (gcvisitobjects_t)get_code_with_unused_tlbc, &args);
if (args.err < 0) {
goto err;
}
// Free unused bytecode. This must happen outside of gc_visit_heaps; it is
// unsafe to allocate or free any mimalloc managed memory when it's
// running.
PyObject *obj;
while ((obj = _PyObjectStack_Pop(&args.code_objs)) != NULL) {
free_unused_bytecode((PyCodeObject*) obj, &args.indices_in_use);
}
_PyEval_StartTheWorld(interp);
clear_get_code_args(&args);
return 0;
err:
_PyEval_StartTheWorld(interp);
clear_get_code_args(&args);
PyErr_NoMemory();
return -1;
}
#endif

View File

@ -1651,7 +1651,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno, void *Py_UNUSED(ignore
}
/* Finally set the new lasti and return OK. */
f->f_lineno = 0;
f->f_frame->instr_ptr = _PyCode_CODE(code) + best_addr;
f->f_frame->instr_ptr = _PyFrame_GetBytecode(f->f_frame) + best_addr;
return 0;
}
@ -1867,10 +1867,11 @@ PyTypeObject PyFrame_Type = {
};
static void
init_frame(_PyInterpreterFrame *frame, PyFunctionObject *func, PyObject *locals)
init_frame(PyThreadState *tstate, _PyInterpreterFrame *frame,
PyFunctionObject *func, PyObject *locals)
{
PyCodeObject *code = (PyCodeObject *)func->func_code;
_PyFrame_Initialize(frame, PyStackRef_FromPyObjectNew(func),
_PyFrame_Initialize(tstate, frame, PyStackRef_FromPyObjectNew(func),
Py_XNewRef(locals), code, 0, NULL);
}
@ -1922,7 +1923,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code,
Py_DECREF(func);
return NULL;
}
init_frame((_PyInterpreterFrame *)f->_f_frame_data, func, locals);
init_frame(tstate, (_PyInterpreterFrame *)f->_f_frame_data, func, locals);
f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data;
f->f_frame->owner = FRAME_OWNED_BY_FRAME_OBJECT;
// This frame needs to be "complete", so pretend that the first RESUME ran:
@ -1941,7 +1942,8 @@ frame_init_get_vars(_PyInterpreterFrame *frame)
// here:
PyCodeObject *co = _PyFrame_GetCode(frame);
int lasti = _PyInterpreterFrame_LASTI(frame);
if (!(lasti < 0 && _PyCode_CODE(co)->op.code == COPY_FREE_VARS
if (!(lasti < 0
&& _PyFrame_GetBytecode(frame)->op.code == COPY_FREE_VARS
&& PyStackRef_FunctionCheck(frame->f_funcobj)))
{
/* Free vars are initialized */
@ -1957,7 +1959,7 @@ frame_init_get_vars(_PyInterpreterFrame *frame)
frame->localsplus[offset + i] = PyStackRef_FromPyObjectNew(o);
}
// COPY_FREE_VARS doesn't have inline CACHEs, either:
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame));
frame->instr_ptr = _PyFrame_GetBytecode(frame);
}

View File

@ -11638,9 +11638,10 @@ super_descr_get(PyObject *self, PyObject *obj, PyObject *type)
}
static int
super_init_without_args(_PyInterpreterFrame *cframe, PyCodeObject *co,
PyTypeObject **type_p, PyObject **obj_p)
super_init_without_args(_PyInterpreterFrame *cframe, PyTypeObject **type_p,
PyObject **obj_p)
{
PyCodeObject *co = _PyFrame_GetCode(cframe);
if (co->co_argcount == 0) {
PyErr_SetString(PyExc_RuntimeError,
"super(): no arguments");
@ -11740,7 +11741,7 @@ super_init_impl(PyObject *self, PyTypeObject *type, PyObject *obj) {
"super(): no current frame");
return -1;
}
int res = super_init_without_args(frame, _PyFrame_GetCode(frame), &type, &obj);
int res = super_init_without_args(frame, &type, &obj);
if (res < 0) {
return -1;

View File

@ -222,6 +222,7 @@
<ClCompile Include="..\Python\hashtable.c" />
<ClCompile Include="..\Python\import.c" />
<ClCompile Include="..\Python\importdl.c" />
<ClCompile Include="..\Python\index_pool.c" />
<ClCompile Include="..\Python\initconfig.c" />
<ClCompile Include="..\Python\instruction_sequence.c" />
<ClCompile Include="..\Python\interpconfig.c" />

View File

@ -232,6 +232,9 @@
<ClCompile Include="..\Python\importdl.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Python\index_pool.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Python\initconfig.c">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -255,6 +255,7 @@
<ClInclude Include="..\Include\internal\pycore_identifier.h" />
<ClInclude Include="..\Include\internal\pycore_import.h" />
<ClInclude Include="..\Include\internal\pycore_importdl.h" />
<ClInclude Include="..\Include\internal\pycore_index_pool.h" />
<ClInclude Include="..\Include\internal\pycore_initconfig.h" />
<ClInclude Include="..\Include\internal\pycore_instruction_sequence.h" />
<ClInclude Include="..\Include\internal\pycore_interp.h" />
@ -614,6 +615,7 @@
<ClCompile Include="..\Python\hashtable.c" />
<ClCompile Include="..\Python\import.c" />
<ClCompile Include="..\Python\importdl.c" />
<ClCompile Include="..\Python\index_pool.c" />
<ClCompile Include="..\Python\initconfig.c" />
<ClCompile Include="..\Python\interpconfig.c" />
<ClCompile Include="..\Python\intrinsics.c" />

View File

@ -687,6 +687,9 @@
<ClInclude Include="..\Include\internal\pycore_importdl.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_index_pool.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_initconfig.h">
<Filter>Include\internal</Filter>
</ClInclude>
@ -1373,6 +1376,9 @@
<ClCompile Include="..\Python\importdl.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\index_pool.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\initconfig.c">
<Filter>Python</Filter>
</ClCompile>

View File

@ -168,11 +168,11 @@ dummy_func(
}
op(_QUICKEN_RESUME, (--)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (tstate->tracing == 0 && this_instr->op.code == RESUME) {
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
}
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}
tier1 op(_MAYBE_INSTRUMENT, (--)) {
@ -190,7 +190,26 @@ dummy_func(
}
}
op(_LOAD_BYTECODE, (--)) {
#ifdef Py_GIL_DISABLED
if (frame->tlbc_index !=
((_PyThreadStateImpl *)tstate)->tlbc_index) {
_Py_CODEUNIT *bytecode =
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
ERROR_IF(bytecode == NULL, error);
int off = this_instr - _PyFrame_GetBytecode(frame);
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
frame->instr_ptr = bytecode + off;
// Make sure this_instr gets reset correctley for any uops that
// follow
next_instr = frame->instr_ptr;
DISPATCH();
}
#endif
}
macro(RESUME) =
_LOAD_BYTECODE +
_MAYBE_INSTRUMENT +
_QUICKEN_RESUME +
_CHECK_PERIODIC_IF_NOT_YIELD_FROM;
@ -204,6 +223,10 @@ dummy_func(
uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
DEOPT_IF(eval_breaker != version);
#ifdef Py_GIL_DISABLED
DEOPT_IF(frame->tlbc_index !=
((_PyThreadStateImpl *)tstate)->tlbc_index);
#endif
}
op(_MONITOR_RESUME, (--)) {
@ -217,6 +240,7 @@ dummy_func(
}
macro(INSTRUMENTED_RESUME) =
_LOAD_BYTECODE +
_MAYBE_INSTRUMENT +
_CHECK_PERIODIC_IF_NOT_YIELD_FROM +
_MONITOR_RESUME;
@ -682,8 +706,8 @@ dummy_func(
};
specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
assert(frame->stackpointer == NULL);
#if ENABLE_SPECIALIZATION
assert(frame->stackpointer == NULL);
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinarySubscr(container, sub, next_instr);
@ -1236,7 +1260,7 @@ dummy_func(
if (oparg) {
PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]);
if (PyLong_Check(lasti)) {
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti);
frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti);
assert(!_PyErr_Occurred(tstate));
}
else {
@ -2671,9 +2695,7 @@ dummy_func(
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_False);
DEAD(cond);
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
JUMPBY(oparg * flag);
}
@ -2681,9 +2703,7 @@ dummy_func(
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_True);
DEAD(cond);
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
JUMPBY(oparg * flag);
}
@ -3697,7 +3717,7 @@ dummy_func(
op(_CREATE_INIT_FRAME, (init[1], self[1], args[oparg] -- init_frame: _PyInterpreterFrame *)) {
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self[0]);
DEAD(init);
@ -4593,7 +4613,7 @@ dummy_func(
}
specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
@ -4601,7 +4621,7 @@ dummy_func(
}
OPCODE_DEFERRED_INC(BINARY_OP);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
assert(NB_ADD <= oparg);
assert(oparg <= NB_INPLACE_XOR);
}
@ -4632,7 +4652,7 @@ dummy_func(
int original_opcode = 0;
if (tstate->tracing) {
PyCodeObject *code = _PyFrame_GetCode(frame);
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode;
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode;
next_instr = this_instr;
} else {
original_opcode = _Py_call_instrumentation_line(
@ -4687,9 +4707,7 @@ dummy_func(
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_True);
int offset = flag * oparg;
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
}
@ -4698,9 +4716,7 @@ dummy_func(
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_False);
int offset = flag * oparg;
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
}
@ -4715,9 +4731,7 @@ dummy_func(
PyStackRef_CLOSE(value_stackref);
offset = 0;
}
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
}
@ -4815,7 +4829,7 @@ dummy_func(
tier2 op(_EXIT_TRACE, (exit_p/4 --)) {
_PyExitData *exit = (_PyExitData *)exit_p;
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
#if defined(Py_DEBUG) && !defined(_Py_JIT)
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
if (lltrace >= 2) {
@ -4823,7 +4837,7 @@ dummy_func(
_PyUOpPrint(&next_uop[-1]);
printf(", exit %u, temp %d, target %d -> %s]\n",
exit - current_executor->exits, exit->temperature.value_and_backoff,
(int)(target - _PyCode_CODE(code)),
(int)(target - _PyFrame_GetBytecode(frame)),
_PyOpcode_OpName[target->op.code]);
}
#endif
@ -4933,7 +4947,7 @@ dummy_func(
_PyUOpPrint(&next_uop[-1]);
printf(", exit %u, temp %d, target %d -> %s]\n",
exit - current_executor->exits, exit->temperature.value_and_backoff,
(int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))),
(int)(target - _PyFrame_GetBytecode(frame)),
_PyOpcode_OpName[target->op.code]);
}
#endif
@ -4995,7 +5009,7 @@ dummy_func(
}
tier2 op(_ERROR_POP_N, (target/2, unused[oparg] --)) {
frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target;
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
SYNC_SP();
GOTO_UNWIND();
}

View File

@ -189,7 +189,7 @@ lltrace_instruction(_PyInterpreterFrame *frame,
dump_stack(frame, stack_pointer);
const char *opname = _PyOpcode_OpName[opcode];
assert(opname != NULL);
int offset = (int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame)));
int offset = (int)(next_instr - _PyFrame_GetBytecode(frame));
if (OPCODE_HAS_ARG((int)_PyOpcode_Deopt[opcode])) {
printf("%d: %s %d\n", offset * 2, opname, oparg);
}
@ -841,6 +841,19 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
/* Because this avoids the RESUME,
* we need to update instrumentation */
#ifdef Py_GIL_DISABLED
/* Load thread-local bytecode */
if (frame->tlbc_index != ((_PyThreadStateImpl *)tstate)->tlbc_index) {
_Py_CODEUNIT *bytecode =
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
if (bytecode == NULL) {
goto error;
}
ptrdiff_t off = frame->instr_ptr - _PyFrame_GetBytecode(frame);
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
frame->instr_ptr = bytecode + off;
}
#endif
_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
monitor_throw(tstate, frame, frame->instr_ptr);
/* TO DO -- Monitor throw entry. */
@ -983,7 +996,7 @@ exception_unwind:
Python main loop. */
PyObject *exc = _PyErr_GetRaisedException(tstate);
PUSH(PyStackRef_FromPyObjectSteal(exc));
next_instr = _PyCode_CODE(_PyFrame_GetCode(frame)) + handler;
next_instr = _PyFrame_GetBytecode(frame) + handler;
if (monitor_handled(tstate, frame, next_instr, exc) < 0) {
goto exception_unwind;
@ -1045,6 +1058,8 @@ enter_tier_two:
#undef ENABLE_SPECIALIZATION
#define ENABLE_SPECIALIZATION 0
#undef ENABLE_SPECIALIZATION_FT
#define ENABLE_SPECIALIZATION_FT 0
#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
@ -1139,7 +1154,7 @@ exit_to_tier1_dynamic:
goto goto_to_tier1;
exit_to_tier1:
assert(next_uop[-1].format == UOP_FORMAT_TARGET);
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
next_instr = next_uop[-1].target + _PyFrame_GetBytecode(frame);
goto_to_tier1:
#ifdef Py_DEBUG
if (lltrace >= 2) {
@ -1764,7 +1779,7 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func,
if (frame == NULL) {
goto fail;
}
_PyFrame_Initialize(frame, func, locals, code, 0, previous);
_PyFrame_Initialize(tstate, frame, func, locals, code, 0, previous);
if (initialize_locals(tstate, func_obj, frame->localsplus, args, argcount, kwnames)) {
assert(frame->owner == FRAME_OWNED_BY_THREAD);
clear_thread_frame(tstate, frame);

View File

@ -151,7 +151,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
/* Code access macros */
/* The integer overflow is checked by an assertion below. */
#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame))))
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
#define NEXTOPARG() do { \
_Py_CODEUNIT word = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
opcode = word.op.code; \
@ -301,14 +301,6 @@ GETITEM(PyObject *v, Py_ssize_t i) {
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
backoff_counter_triggers(forge_backoff_counter((COUNTER)))
#ifdef Py_GIL_DISABLED
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
do { \
/* gh-115999 tracks progress on addressing this. */ \
static_assert(0, "The specializing interpreter is not yet thread-safe"); \
} while (0);
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) ((void)COUNTER)
#else
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
do { \
(COUNTER) = advance_backoff_counter((COUNTER)); \
@ -318,6 +310,18 @@ GETITEM(PyObject *v, Py_ssize_t i) {
do { \
(COUNTER) = pause_backoff_counter((COUNTER)); \
} while (0);
#ifdef ENABLE_SPECIALIZATION_FT
/* Multiple threads may execute these concurrently if thread-local bytecode is
* disabled and they all execute the main copy of the bytecode. Specialization
* is disabled in that case so the value is unused, but the RMW cycle should be
* free of data races.
*/
#define RECORD_BRANCH_TAKEN(bitset, flag) \
FT_ATOMIC_STORE_UINT16_RELAXED( \
bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
#else
#define RECORD_BRANCH_TAKEN(bitset, flag)
#endif
#define UNBOUNDLOCAL_ERROR_MSG \

View File

@ -41,6 +41,8 @@
/* _QUICKEN_RESUME is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
/* _LOAD_BYTECODE is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
case _RESUME_CHECK: {
#if defined(__EMSCRIPTEN__)
if (_Py_emscripten_signal_clock == 0) {
@ -56,6 +58,13 @@
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
#ifdef Py_GIL_DISABLED
if (frame->tlbc_index !=
((_PyThreadStateImpl *)tstate)->tlbc_index) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
#endif
break;
}
@ -4480,8 +4489,8 @@
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
stack_pointer = _PyFrame_GetStackPointer(frame);
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self[0]);
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -5683,7 +5692,9 @@
PyObject *exit_p = (PyObject *)CURRENT_OPERAND();
_PyExitData *exit = (_PyExitData *)exit_p;
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
stack_pointer = _PyFrame_GetStackPointer(frame);
#if defined(Py_DEBUG) && !defined(_Py_JIT)
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
if (lltrace >= 2) {
@ -5692,7 +5703,7 @@
_PyUOpPrint(&next_uop[-1]);
printf(", exit %u, temp %d, target %d -> %s]\n",
exit - current_executor->exits, exit->temperature.value_and_backoff,
(int)(target - _PyCode_CODE(code)),
(int)(target - _PyFrame_GetBytecode(frame)),
_PyOpcode_OpName[target->op.code]);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
@ -5878,7 +5889,7 @@
_PyUOpPrint(&next_uop[-1]);
printf(", exit %u, temp %d, target %d -> %s]\n",
exit - current_executor->exits, exit->temperature.value_and_backoff,
(int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))),
(int)(target - _PyFrame_GetBytecode(frame)),
_PyOpcode_OpName[target->op.code]);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
@ -5956,9 +5967,11 @@
case _ERROR_POP_N: {
oparg = CURRENT_OPARG();
uint32_t target = (uint32_t)CURRENT_OPERAND();
frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target;
stack_pointer += -oparg;
assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
stack_pointer = _PyFrame_GetStackPointer(frame);
GOTO_UNWIND();
break;
}

View File

@ -63,7 +63,8 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
// This may be a newly-created generator or coroutine frame. Since it's
// dead anyways, just pretend that the first RESUME ran:
PyCodeObject *code = _PyFrame_GetCode(frame);
frame->instr_ptr = _PyCode_CODE(code) + code->_co_firsttraceable + 1;
frame->instr_ptr =
_PyFrame_GetBytecode(frame) + code->_co_firsttraceable + 1;
}
assert(!_PyFrame_IsIncomplete(frame));
assert(f->f_back == NULL);

View File

@ -1953,16 +1953,22 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area,
}
void
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
_PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
gcvisitobjects_t callback, void *arg)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
struct custom_visitor_args wrapper = {
.callback = callback,
.arg = arg,
};
_PyEval_StopTheWorld(interp);
gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base);
}
void
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyEval_StopTheWorld(interp);
_PyGC_VisitObjectsWorldStopped(interp, callback, arg);
_PyEval_StartTheWorld(interp);
}

View File

@ -25,7 +25,7 @@
lhs = stack_pointer[-2];
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -35,7 +35,7 @@
}
OPCODE_DEFERRED_INC(BINARY_OP);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
assert(NB_ADD <= oparg);
assert(oparg <= NB_INPLACE_XOR);
}
@ -435,8 +435,8 @@
container = stack_pointer[-2];
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
assert(frame->stackpointer == NULL);
#if ENABLE_SPECIALIZATION
assert(frame->stackpointer == NULL);
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -1066,8 +1066,8 @@
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
stack_pointer = _PyFrame_GetStackPointer(frame);
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self[0]);
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -4711,7 +4711,9 @@
int original_opcode = 0;
if (tstate->tracing) {
PyCodeObject *code = _PyFrame_GetCode(frame);
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode;
_PyFrame_SetStackPointer(frame, stack_pointer);
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode;
stack_pointer = _PyFrame_GetStackPointer(frame);
next_instr = this_instr;
} else {
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -4759,9 +4761,7 @@
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_False);
int offset = flag * oparg;
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
DISPATCH();
}
@ -4782,9 +4782,7 @@
PyStackRef_CLOSE(value_stackref);
offset = 0;
}
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
DISPATCH();
}
@ -4822,9 +4820,7 @@
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_True);
int offset = flag * oparg;
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
DISPATCH();
}
@ -4834,6 +4830,28 @@
(void)this_instr;
next_instr += 1;
INSTRUCTION_STATS(INSTRUMENTED_RESUME);
// _LOAD_BYTECODE
{
#ifdef Py_GIL_DISABLED
if (frame->tlbc_index !=
((_PyThreadStateImpl *)tstate)->tlbc_index) {
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_CODEUNIT *bytecode =
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
stack_pointer = _PyFrame_GetStackPointer(frame);
if (bytecode == NULL) goto error;
_PyFrame_SetStackPointer(frame, stack_pointer);
int off = this_instr - _PyFrame_GetBytecode(frame);
stack_pointer = _PyFrame_GetStackPointer(frame);
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
frame->instr_ptr = bytecode + off;
// Make sure this_instr gets reset correctley for any uops that
// follow
next_instr = frame->instr_ptr;
DISPATCH();
}
#endif
}
// _MAYBE_INSTRUMENT
{
if (tstate->tracing == 0) {
@ -6646,9 +6664,7 @@
cond = stack_pointer[-1];
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_False);
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
JUMPBY(oparg * flag);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -6680,9 +6696,7 @@
cond = b;
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_True);
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
JUMPBY(oparg * flag);
}
stack_pointer += -1;
@ -6715,9 +6729,7 @@
cond = b;
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_False);
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
JUMPBY(oparg * flag);
}
stack_pointer += -1;
@ -6735,9 +6747,7 @@
cond = stack_pointer[-1];
assert(PyStackRef_BoolCheck(cond));
int flag = PyStackRef_Is(cond, PyStackRef_True);
#if ENABLE_SPECIALIZATION
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
#endif
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
JUMPBY(oparg * flag);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -6832,7 +6842,11 @@
if (oparg) {
PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]);
if (PyLong_Check(lasti)) {
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti);
stack_pointer = _PyFrame_GetStackPointer(frame);
assert(!_PyErr_Occurred(tstate));
}
else {
@ -6844,6 +6858,8 @@
Py_DECREF(exc);
goto error;
}
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
}
assert(exc && PyExceptionInstance_Check(exc));
stack_pointer += -1;
@ -6871,6 +6887,28 @@
PREDICTED(RESUME);
_Py_CODEUNIT* const this_instr = next_instr - 1;
(void)this_instr;
// _LOAD_BYTECODE
{
#ifdef Py_GIL_DISABLED
if (frame->tlbc_index !=
((_PyThreadStateImpl *)tstate)->tlbc_index) {
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_CODEUNIT *bytecode =
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
stack_pointer = _PyFrame_GetStackPointer(frame);
if (bytecode == NULL) goto error;
_PyFrame_SetStackPointer(frame, stack_pointer);
int off = this_instr - _PyFrame_GetBytecode(frame);
stack_pointer = _PyFrame_GetStackPointer(frame);
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
frame->instr_ptr = bytecode + off;
// Make sure this_instr gets reset correctley for any uops that
// follow
next_instr = frame->instr_ptr;
DISPATCH();
}
#endif
}
// _MAYBE_INSTRUMENT
{
if (tstate->tracing == 0) {
@ -6890,11 +6928,11 @@
}
// _QUICKEN_RESUME
{
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (tstate->tracing == 0 && this_instr->op.code == RESUME) {
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
}
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}
// _CHECK_PERIODIC_IF_NOT_YIELD_FROM
{
@ -6925,6 +6963,10 @@
uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
DEOPT_IF(eval_breaker != version, RESUME);
#ifdef Py_GIL_DISABLED
DEOPT_IF(frame->tlbc_index !=
((_PyThreadStateImpl *)tstate)->tlbc_index, RESUME);
#endif
DISPATCH();
}

193
Python/index_pool.c Normal file
View File

@ -0,0 +1,193 @@
#include <stdbool.h>
#include "Python.h"
#include "pycore_index_pool.h"
#include "pycore_lock.h"
#ifdef Py_GIL_DISABLED
static inline void
swap(int32_t *values, Py_ssize_t i, Py_ssize_t j)
{
int32_t tmp = values[i];
values[i] = values[j];
values[j] = tmp;
}
static bool
heap_try_swap(_PyIndexHeap *heap, Py_ssize_t i, Py_ssize_t j)
{
if (i < 0 || i >= heap->size) {
return 0;
}
if (j < 0 || j >= heap->size) {
return 0;
}
if (i <= j) {
if (heap->values[i] <= heap->values[j]) {
return 0;
}
}
else if (heap->values[j] <= heap->values[i]) {
return 0;
}
swap(heap->values, i, j);
return 1;
}
static inline Py_ssize_t
parent(Py_ssize_t i)
{
return (i - 1) / 2;
}
static inline Py_ssize_t
left_child(Py_ssize_t i)
{
return 2 * i + 1;
}
static inline Py_ssize_t
right_child(Py_ssize_t i)
{
return 2 * i + 2;
}
static void
heap_add(_PyIndexHeap *heap, int32_t val)
{
assert(heap->size < heap->capacity);
// Add val to end
heap->values[heap->size] = val;
heap->size++;
// Sift up
for (Py_ssize_t cur = heap->size - 1; cur > 0; cur = parent(cur)) {
if (!heap_try_swap(heap, cur, parent(cur))) {
break;
}
}
}
static Py_ssize_t
heap_min_child(_PyIndexHeap *heap, Py_ssize_t i)
{
if (left_child(i) < heap->size) {
if (right_child(i) < heap->size) {
Py_ssize_t lval = heap->values[left_child(i)];
Py_ssize_t rval = heap->values[right_child(i)];
return lval < rval ? left_child(i) : right_child(i);
}
return left_child(i);
}
else if (right_child(i) < heap->size) {
return right_child(i);
}
return -1;
}
static int32_t
heap_pop(_PyIndexHeap *heap)
{
assert(heap->size > 0);
// Pop smallest and replace with the last element
int32_t result = heap->values[0];
heap->values[0] = heap->values[heap->size - 1];
heap->size--;
// Sift down
for (Py_ssize_t cur = 0; cur < heap->size;) {
Py_ssize_t min_child = heap_min_child(heap, cur);
if (min_child > -1 && heap_try_swap(heap, cur, min_child)) {
cur = min_child;
}
else {
break;
}
}
return result;
}
static int
heap_ensure_capacity(_PyIndexHeap *heap, Py_ssize_t limit)
{
assert(limit > 0);
if (heap->capacity > limit) {
return 0;
}
Py_ssize_t new_capacity = heap->capacity ? heap->capacity : 1024;
while (new_capacity && new_capacity < limit) {
new_capacity <<= 1;
}
if (!new_capacity) {
return -1;
}
int32_t *new_values = PyMem_RawCalloc(new_capacity, sizeof(int32_t));
if (new_values == NULL) {
return -1;
}
if (heap->values != NULL) {
memcpy(new_values, heap->values, heap->capacity);
PyMem_RawFree(heap->values);
}
heap->values = new_values;
heap->capacity = new_capacity;
return 0;
}
static void
heap_fini(_PyIndexHeap *heap)
{
if (heap->values != NULL) {
PyMem_RawFree(heap->values);
heap->values = NULL;
}
heap->size = -1;
heap->capacity = -1;
}
#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH)
#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex)
int32_t
_PyIndexPool_AllocIndex(_PyIndexPool *pool)
{
LOCK_POOL(pool);
int32_t index;
_PyIndexHeap *free_indices = &pool->free_indices;
if (free_indices->size == 0) {
// No free indices. Make sure the heap can always store all of the
// indices that have been allocated to avoid having to allocate memory
// (which can fail) when freeing an index. Freeing indices happens when
// threads are being destroyed, which makes error handling awkward /
// impossible. This arrangement shifts handling of allocation failures
// to when indices are allocated, which happens at thread creation,
// where we are better equipped to deal with failure.
if (heap_ensure_capacity(free_indices, pool->next_index + 1) < 0) {
UNLOCK_POOL(pool);
PyErr_NoMemory();
return -1;
}
index = pool->next_index++;
}
else {
index = heap_pop(free_indices);
}
UNLOCK_POOL(pool);
return index;
}
void
_PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index)
{
LOCK_POOL(pool);
heap_add(&pool->free_indices, index);
UNLOCK_POOL(pool);
}
void
_PyIndexPool_Fini(_PyIndexPool *pool)
{
heap_fini(&pool->free_indices);
}
#endif // Py_GIL_DISABLED

View File

@ -134,6 +134,7 @@ static const PyConfigSpec PYCONFIG_SPEC[] = {
SPEC(dump_refs_file, WSTR_OPT, READ_ONLY, NO_SYS),
#ifdef Py_GIL_DISABLED
SPEC(enable_gil, INT, READ_ONLY, NO_SYS),
SPEC(tlbc_enabled, INT, READ_ONLY, NO_SYS),
#endif
SPEC(faulthandler, BOOL, READ_ONLY, NO_SYS),
SPEC(filesystem_encoding, WSTR, READ_ONLY, NO_SYS),
@ -315,8 +316,13 @@ The following implementation-specific options are available:\n\
"\
-X showrefcount: output the total reference count and number of used\n\
memory blocks when the program finishes or after each statement in\n\
the interactive interpreter; only works on debug builds\n\
-X tracemalloc[=N]: trace Python memory allocations; N sets a traceback limit\n\
the interactive interpreter; only works on debug builds\n"
#ifdef Py_GIL_DISABLED
"-X tlbc=[0|1]: enable (1) or disable (0) thread-local bytecode. Also\n\
PYTHON_TLBC\n"
#endif
"\
-X tracemalloc[=N]: trace Python memory allocations; N sets a traceback limit\n \
of N frames (default: 1); also PYTHONTRACEMALLOC=N\n\
-X utf8[=0|1]: enable (1) or disable (0) UTF-8 mode; also PYTHONUTF8\n\
-X warn_default_encoding: enable opt-in EncodingWarning for 'encoding=None';\n\
@ -400,6 +406,9 @@ static const char usage_envvars[] =
#ifdef Py_STATS
"PYTHONSTATS : turns on statistics gathering (-X pystats)\n"
#endif
#ifdef Py_GIL_DISABLED
"PYTHON_TLBC : when set to 0, disables thread-local bytecode (-X tlbc)\n"
#endif
"PYTHONTRACEMALLOC: trace Python memory allocations (-X tracemalloc)\n"
"PYTHONUNBUFFERED: disable stdout/stderr buffering (-u)\n"
"PYTHONUTF8 : control the UTF-8 mode (-X utf8)\n"
@ -979,6 +988,7 @@ _PyConfig_InitCompatConfig(PyConfig *config)
config->cpu_count = -1;
#ifdef Py_GIL_DISABLED
config->enable_gil = _PyConfig_GIL_DEFAULT;
config->tlbc_enabled = 1;
#endif
}
@ -1862,6 +1872,36 @@ error:
"n must be greater than 0");
}
static PyStatus
config_init_tlbc(PyConfig *config)
{
#ifdef Py_GIL_DISABLED
const char *env = config_get_env(config, "PYTHON_TLBC");
if (env) {
int enabled;
if (_Py_str_to_int(env, &enabled) < 0 || (enabled < 0) || (enabled > 1)) {
return _PyStatus_ERR(
"PYTHON_TLBC=N: N is missing or invalid");
}
config->tlbc_enabled = enabled;
}
const wchar_t *xoption = config_get_xoption(config, L"tlbc");
if (xoption) {
int enabled;
const wchar_t *sep = wcschr(xoption, L'=');
if (!sep || (config_wstr_to_int(sep + 1, &enabled) < 0) || (enabled < 0) || (enabled > 1)) {
return _PyStatus_ERR(
"-X tlbc=n: n is missing or invalid");
}
config->tlbc_enabled = enabled;
}
return _PyStatus_OK();
#else
return _PyStatus_OK();
#endif
}
static PyStatus
config_init_perf_profiling(PyConfig *config)
{
@ -2111,6 +2151,11 @@ config_read_complex_options(PyConfig *config)
}
#endif
status = config_init_tlbc(config);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
return _PyStatus_OK();
}

View File

@ -44,10 +44,24 @@
#define UNLOCK_CODE() Py_END_CRITICAL_SECTION()
#define MODIFY_BYTECODE(code, func, ...) \
do { \
PyCodeObject *co = (code); \
for (Py_ssize_t i = 0; i < code->co_tlbc->size; i++) { \
char *bc = co->co_tlbc->entries[i]; \
if (bc == NULL) { \
continue; \
} \
(func)((_Py_CODEUNIT *)bc, __VA_ARGS__); \
} \
} while (0)
#else
#define LOCK_CODE(code)
#define UNLOCK_CODE()
#define MODIFY_BYTECODE(code, func, ...) \
(func)(_PyCode_CODE(code), __VA_ARGS__)
#endif
@ -309,7 +323,8 @@ _PyInstruction_GetLength(PyCodeObject *code, int offset)
{
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
int opcode = _PyCode_CODE(code)[offset].op.code;
int opcode =
FT_ATOMIC_LOAD_UINT8_RELAXED(_PyCode_CODE(code)[offset].op.code);
assert(opcode != 0);
assert(opcode != RESERVED);
if (opcode == INSTRUMENTED_LINE) {
@ -578,7 +593,9 @@ sanity_check_instrumentation(PyCodeObject *code)
_Py_CODEUNIT
_Py_GetBaseCodeUnit(PyCodeObject *code, int i)
{
_Py_CODEUNIT inst = _PyCode_CODE(code)[i];
_Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i;
_Py_CODEUNIT inst = {
.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)};
int opcode = inst.op.code;
if (opcode < MIN_INSTRUMENTED_OPCODE) {
inst.op.code = _PyOpcode_Deopt[opcode];
@ -614,21 +631,22 @@ _Py_GetBaseCodeUnit(PyCodeObject *code, int i)
}
static void
de_instrument(PyCodeObject *code, int i, int event)
de_instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i,
int event)
{
assert(event != PY_MONITORING_EVENT_INSTRUCTION);
assert(event != PY_MONITORING_EVENT_LINE);
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
_Py_CODEUNIT *instr = &bytecode[i];
uint8_t *opcode_ptr = &instr->op.code;
int opcode = *opcode_ptr;
assert(opcode != ENTER_EXECUTOR);
if (opcode == INSTRUMENTED_LINE) {
opcode_ptr = &code->_co_monitoring->lines[i].original_opcode;
opcode_ptr = &monitoring->lines[i].original_opcode;
opcode = *opcode_ptr;
}
if (opcode == INSTRUMENTED_INSTRUCTION) {
opcode_ptr = &code->_co_monitoring->per_instruction_opcodes[i];
opcode_ptr = &monitoring->per_instruction_opcodes[i];
opcode = *opcode_ptr;
}
int deinstrumented = DE_INSTRUMENT[opcode];
@ -644,65 +662,68 @@ de_instrument(PyCodeObject *code, int i, int event)
}
static void
de_instrument_line(PyCodeObject *code, int i)
de_instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring,
int i)
{
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
_Py_CODEUNIT *instr = &bytecode[i];
int opcode = instr->op.code;
if (opcode != INSTRUMENTED_LINE) {
return;
}
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
int original_opcode = lines->original_opcode;
if (original_opcode == INSTRUMENTED_INSTRUCTION) {
lines->original_opcode = code->_co_monitoring->per_instruction_opcodes[i];
lines->original_opcode = monitoring->per_instruction_opcodes[i];
}
CHECK(original_opcode != 0);
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
instr->op.code = original_opcode;
FT_ATOMIC_STORE_UINT8(instr->op.code, original_opcode);
if (_PyOpcode_Caches[original_opcode]) {
instr[1].counter = adaptive_counter_warmup();
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
adaptive_counter_warmup().value_and_backoff);
}
assert(instr->op.code != INSTRUMENTED_LINE);
}
static void
de_instrument_per_instruction(PyCodeObject *code, int i)
de_instrument_per_instruction(_Py_CODEUNIT *bytecode,
_PyCoMonitoringData *monitoring, int i)
{
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
_Py_CODEUNIT *instr = &bytecode[i];
uint8_t *opcode_ptr = &instr->op.code;
int opcode = *opcode_ptr;
if (opcode == INSTRUMENTED_LINE) {
opcode_ptr = &code->_co_monitoring->lines[i].original_opcode;
opcode_ptr = &monitoring->lines[i].original_opcode;
opcode = *opcode_ptr;
}
if (opcode != INSTRUMENTED_INSTRUCTION) {
return;
}
int original_opcode = code->_co_monitoring->per_instruction_opcodes[i];
int original_opcode = monitoring->per_instruction_opcodes[i];
CHECK(original_opcode != 0);
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
*opcode_ptr = original_opcode;
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode);
if (_PyOpcode_Caches[original_opcode]) {
instr[1].counter = adaptive_counter_warmup();
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
adaptive_counter_warmup().value_and_backoff);
}
assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION);
assert(instr->op.code != INSTRUMENTED_INSTRUCTION);
}
static void
instrument(PyCodeObject *code, int i)
instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
{
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
_Py_CODEUNIT *instr = &bytecode[i];
uint8_t *opcode_ptr = &instr->op.code;
int opcode =*opcode_ptr;
if (opcode == INSTRUMENTED_LINE) {
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
opcode_ptr = &lines->original_opcode;
opcode = *opcode_ptr;
}
if (opcode == INSTRUMENTED_INSTRUCTION) {
opcode_ptr = &code->_co_monitoring->per_instruction_opcodes[i];
opcode_ptr = &monitoring->per_instruction_opcodes[i];
opcode = *opcode_ptr;
CHECK(opcode != INSTRUMENTED_INSTRUCTION && opcode != INSTRUMENTED_LINE);
CHECK(opcode == _PyOpcode_Deopt[opcode]);
@ -716,52 +737,52 @@ instrument(PyCodeObject *code, int i)
if (_PyOpcode_Caches[deopt]) {
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
adaptive_counter_warmup().value_and_backoff);
instr[1].counter = adaptive_counter_warmup();
}
}
}
static void
instrument_line(PyCodeObject *code, int i)
instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
{
uint8_t *opcode_ptr = &_PyCode_CODE(code)[i].op.code;
uint8_t *opcode_ptr = &bytecode[i].op.code;
int opcode = *opcode_ptr;
if (opcode == INSTRUMENTED_LINE) {
return;
}
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
lines->original_opcode = _PyOpcode_Deopt[opcode];
CHECK(lines->original_opcode > 0);
*opcode_ptr = INSTRUMENTED_LINE;
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE);
}
static void
instrument_per_instruction(PyCodeObject *code, int i)
instrument_per_instruction(_Py_CODEUNIT *bytecode,
_PyCoMonitoringData *monitoring, int i)
{
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
_Py_CODEUNIT *instr = &bytecode[i];
uint8_t *opcode_ptr = &instr->op.code;
int opcode = *opcode_ptr;
if (opcode == INSTRUMENTED_LINE) {
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
opcode_ptr = &lines->original_opcode;
opcode = *opcode_ptr;
}
if (opcode == INSTRUMENTED_INSTRUCTION) {
assert(code->_co_monitoring->per_instruction_opcodes[i] > 0);
assert(monitoring->per_instruction_opcodes[i] > 0);
return;
}
CHECK(opcode != 0);
if (is_instrumented(opcode)) {
code->_co_monitoring->per_instruction_opcodes[i] = opcode;
monitoring->per_instruction_opcodes[i] = opcode;
}
else {
assert(opcode != 0);
assert(_PyOpcode_Deopt[opcode] != 0);
assert(_PyOpcode_Deopt[opcode] != RESUME);
code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode];
monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode];
}
assert(code->_co_monitoring->per_instruction_opcodes[i] > 0);
*opcode_ptr = INSTRUMENTED_INSTRUCTION;
assert(monitoring->per_instruction_opcodes[i] > 0);
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION);
}
static void
@ -773,19 +794,19 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools)
assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event));
assert(opcode_has_event(_Py_GetBaseCodeUnit(code, offset).op.code));
_PyCoMonitoringData *monitoring = code->_co_monitoring;
bool should_de_instrument;
if (monitoring && monitoring->tools) {
monitoring->tools[offset] &= ~tools;
if (monitoring->tools[offset] == 0) {
de_instrument(code, offset, event);
}
should_de_instrument = (monitoring->tools[offset] == 0);
}
else {
/* Single tool */
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[event];
assert(_Py_popcount32(single_tool) <= 1);
if (((single_tool & tools) == single_tool)) {
de_instrument(code, offset, event);
}
should_de_instrument = ((single_tool & tools) == single_tool);
}
if (should_de_instrument) {
MODIFY_BYTECODE(code, de_instrument, monitoring, offset, event);
}
}
@ -804,22 +825,23 @@ remove_line_tools(PyCodeObject * code, int offset, int tools)
{
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
assert(code->_co_monitoring);
if (code->_co_monitoring->line_tools)
_PyCoMonitoringData *monitoring = code->_co_monitoring;
assert(monitoring);
bool should_de_instrument;
if (monitoring->line_tools)
{
uint8_t *toolsptr = &code->_co_monitoring->line_tools[offset];
uint8_t *toolsptr = &monitoring->line_tools[offset];
*toolsptr &= ~tools;
if (*toolsptr == 0 ) {
de_instrument_line(code, offset);
}
should_de_instrument = (*toolsptr == 0);
}
else {
/* Single tool */
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[PY_MONITORING_EVENT_LINE];
uint8_t single_tool = monitoring->active_monitors.tools[PY_MONITORING_EVENT_LINE];
assert(_Py_popcount32(single_tool) <= 1);
if (((single_tool & tools) == single_tool)) {
de_instrument_line(code, offset);
}
should_de_instrument = ((single_tool & tools) == single_tool);
}
if (should_de_instrument) {
MODIFY_BYTECODE(code, de_instrument_line, monitoring, offset);
}
}
@ -841,7 +863,7 @@ add_tools(PyCodeObject * code, int offset, int event, int tools)
assert(_Py_popcount32(tools) == 1);
assert(tools_is_subset_for_event(code, event, tools));
}
instrument(code, offset);
MODIFY_BYTECODE(code, instrument, code->_co_monitoring, offset);
}
static void
@ -858,7 +880,7 @@ add_line_tools(PyCodeObject * code, int offset, int tools)
/* Single tool */
assert(_Py_popcount32(tools) == 1);
}
instrument_line(code, offset);
MODIFY_BYTECODE(code, instrument_line, code->_co_monitoring, offset);
}
@ -876,7 +898,7 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools)
/* Single tool */
assert(_Py_popcount32(tools) == 1);
}
instrument_per_instruction(code, offset);
MODIFY_BYTECODE(code, instrument_per_instruction, code->_co_monitoring, offset);
}
@ -885,21 +907,22 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools)
{
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
_PyCoMonitoringData *monitoring = code->_co_monitoring;
assert(code->_co_monitoring);
bool should_de_instrument;
if (code->_co_monitoring->per_instruction_tools) {
uint8_t *toolsptr = &code->_co_monitoring->per_instruction_tools[offset];
*toolsptr &= ~tools;
if (*toolsptr == 0) {
de_instrument_per_instruction(code, offset);
}
should_de_instrument = (*toolsptr == 0);
}
else {
/* Single tool */
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION];
assert(_Py_popcount32(single_tool) <= 1);
if (((single_tool & tools) == single_tool)) {
de_instrument_per_instruction(code, offset);
}
should_de_instrument = ((single_tool & tools) == single_tool);
}
if (should_de_instrument) {
MODIFY_BYTECODE(code, de_instrument_per_instruction, monitoring, offset);
}
}
@ -1087,7 +1110,7 @@ call_instrumentation_vector(
PyCodeObject *code = _PyFrame_GetCode(frame);
assert(args[1] == NULL);
args[1] = (PyObject *)code;
int offset = (int)(instr - _PyCode_CODE(code));
int offset = (int)(instr - _PyFrame_GetBytecode(frame));
/* Offset visible to user should be the offset in bytes, as that is the
* convention for APIs involving code offsets. */
int bytes_offset = offset * (int)sizeof(_Py_CODEUNIT);
@ -1173,8 +1196,7 @@ _Py_call_instrumentation_jump(
assert(event == PY_MONITORING_EVENT_JUMP ||
event == PY_MONITORING_EVENT_BRANCH);
assert(frame->instr_ptr == instr);
PyCodeObject *code = _PyFrame_GetCode(frame);
int to = (int)(target - _PyCode_CODE(code));
int to = (int)(target - _PyFrame_GetBytecode(frame));
PyObject *to_obj = PyLong_FromLong(to * (int)sizeof(_Py_CODEUNIT));
if (to_obj == NULL) {
return NULL;
@ -1240,7 +1262,8 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
PyCodeObject *code = _PyFrame_GetCode(frame);
assert(tstate->tracing == 0);
assert(debug_check_sanity(tstate->interp, code));
int i = (int)(instr - _PyCode_CODE(code));
_Py_CODEUNIT *bytecode = _PyFrame_GetBytecode(frame);
int i = (int)(instr - bytecode);
_PyCoMonitoringData *monitoring = code->_co_monitoring;
_PyCoLineInstrumentationData *line_data = &monitoring->lines[i];
@ -1256,10 +1279,10 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
line = compute_line(code, i, line_delta);
assert(line >= 0);
assert(prev != NULL);
int prev_index = (int)(prev - _PyCode_CODE(code));
int prev_index = (int)(prev - bytecode);
int prev_line = _Py_Instrumentation_GetLine(code, prev_index);
if (prev_line == line) {
int prev_opcode = _PyCode_CODE(code)[prev_index].op.code;
int prev_opcode = bytecode[prev_index].op.code;
/* RESUME and INSTRUMENTED_RESUME are needed for the operation of
* instrumentation, so must never be hidden by an INSTRUMENTED_LINE.
*/
@ -1359,7 +1382,7 @@ int
_Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr)
{
PyCodeObject *code = _PyFrame_GetCode(frame);
int offset = (int)(instr - _PyCode_CODE(code));
int offset = (int)(instr - _PyFrame_GetBytecode(frame));
_PyCoMonitoringData *instrumentation_data = code->_co_monitoring;
assert(instrumentation_data->per_instruction_opcodes);
int next_opcode = instrumentation_data->per_instruction_opcodes[offset];

View File

@ -17,6 +17,8 @@
/* _QUICKEN_RESUME is not a viable micro-op for tier 2 */
/* _LOAD_BYTECODE is not a viable micro-op for tier 2 */
case _RESUME_CHECK: {
break;
}

View File

@ -1513,6 +1513,11 @@ new_threadstate(PyInterpreterState *interp, int whence)
PyMem_RawFree(new_tstate);
return NULL;
}
int32_t tlbc_idx = _Py_ReserveTLBCIndex(interp);
if (tlbc_idx < 0) {
PyMem_RawFree(new_tstate);
return NULL;
}
#endif
/* We serialize concurrent creation to protect global state. */
@ -1555,6 +1560,7 @@ new_threadstate(PyInterpreterState *interp, int whence)
#ifdef Py_GIL_DISABLED
// Must be called with lock unlocked to avoid lock ordering deadlocks.
_Py_qsbr_register(tstate, interp, qsbr_idx);
tstate->tlbc_index = tlbc_idx;
#endif
return (PyThreadState *)tstate;
@ -1706,6 +1712,10 @@ PyThreadState_Clear(PyThreadState *tstate)
// Remove ourself from the biased reference counting table of threads.
_Py_brc_remove_thread(tstate);
// Release our thread-local copies of the bytecode for reuse by another
// thread
_Py_ClearTLBCIndex((_PyThreadStateImpl *)tstate);
#endif
// Merge our queue of pointers to be freed into the interpreter queue.

View File

@ -24,6 +24,25 @@ extern const char *_PyUOpName(int index);
* ./adaptive.md
*/
#ifdef Py_GIL_DISABLED
#define SET_OPCODE_OR_RETURN(instr, opcode) \
do { \
uint8_t old_op = _Py_atomic_load_uint8_relaxed(&(instr)->op.code); \
if (old_op >= MIN_INSTRUMENTED_OPCODE) { \
/* Lost race with instrumentation */ \
return; \
} \
if (!_Py_atomic_compare_exchange_uint8(&(instr)->op.code, &old_op, \
(opcode))) { \
/* Lost race with instrumentation */ \
assert(old_op >= MIN_INSTRUMENTED_OPCODE); \
return; \
} \
} while (0)
#else
#define SET_OPCODE_OR_RETURN(instr, opcode) (instr)->op.code = (opcode)
#endif
#ifdef Py_STATS
GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 };
static PyStats _Py_stats_struct = { .gc_stats = _py_gc_stats };
@ -436,16 +455,25 @@ do { \
# define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
#endif
// Initialize warmup counters and insert superinstructions. This cannot fail.
// Initialize warmup counters and optimize instructions. This cannot fail.
void
_PyCode_Quicken(PyCodeObject *code)
_PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts,
int enable_counters)
{
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
_Py_BackoffCounter jump_counter, adaptive_counter;
if (enable_counters) {
jump_counter = initial_jump_backoff_counter();
adaptive_counter = adaptive_counter_warmup();
}
else {
jump_counter = initial_unreachable_backoff_counter();
adaptive_counter = initial_unreachable_backoff_counter();
}
int opcode = 0;
int oparg = 0;
_Py_CODEUNIT *instructions = _PyCode_CODE(code);
/* The last code unit cannot have a cache, so we don't need to check it */
for (int i = 0; i < Py_SIZE(code)-1; i++) {
for (Py_ssize_t i = 0; i < size-1; i++) {
opcode = instructions[i].op.code;
int caches = _PyOpcode_Caches[opcode];
oparg = (oparg << 8) | instructions[i].op.arg;
@ -453,7 +481,7 @@ _PyCode_Quicken(PyCodeObject *code)
// The initial value depends on the opcode
switch (opcode) {
case JUMP_BACKWARD:
instructions[i + 1].counter = initial_jump_backoff_counter();
instructions[i + 1].counter = jump_counter;
break;
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
@ -462,7 +490,7 @@ _PyCode_Quicken(PyCodeObject *code)
instructions[i + 1].cache = 0x5555; // Alternating 0, 1 bits
break;
default:
instructions[i + 1].counter = adaptive_counter_warmup();
instructions[i + 1].counter = adaptive_counter;
break;
}
i += caches;
@ -471,7 +499,7 @@ _PyCode_Quicken(PyCodeObject *code)
/* We can't do this in the bytecode compiler as
* marshalling can intern strings and make them immortal. */
PyObject *obj = PyTuple_GET_ITEM(code->co_consts, oparg);
PyObject *obj = PyTuple_GET_ITEM(consts, oparg);
if (_Py_IsImmortal(obj)) {
instructions[i].op.code = LOAD_CONST_IMMORTAL;
}
@ -480,7 +508,7 @@ _PyCode_Quicken(PyCodeObject *code)
oparg = 0;
}
}
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}
#define SIMPLE_FUNCTION 0
@ -2243,9 +2271,10 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
{
PyObject *lhs = PyStackRef_AsPyObjectBorrow(lhs_st);
PyObject *rhs = PyStackRef_AsPyObjectBorrow(rhs_st);
assert(ENABLE_SPECIALIZATION);
assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[BINARY_OP] == INLINE_CACHE_ENTRIES_BINARY_OP);
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1);
uint8_t specialized_op;
switch (oparg) {
case NB_ADD:
case NB_INPLACE_ADD:
@ -2256,18 +2285,18 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
_Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_BINARY_OP + 1];
bool to_store = (next.op.code == STORE_FAST);
if (to_store && PyStackRef_AsPyObjectBorrow(locals[next.op.arg]) == lhs) {
instr->op.code = BINARY_OP_INPLACE_ADD_UNICODE;
specialized_op = BINARY_OP_INPLACE_ADD_UNICODE;
goto success;
}
instr->op.code = BINARY_OP_ADD_UNICODE;
specialized_op = BINARY_OP_ADD_UNICODE;
goto success;
}
if (PyLong_CheckExact(lhs)) {
instr->op.code = BINARY_OP_ADD_INT;
specialized_op = BINARY_OP_ADD_INT;
goto success;
}
if (PyFloat_CheckExact(lhs)) {
instr->op.code = BINARY_OP_ADD_FLOAT;
specialized_op = BINARY_OP_ADD_FLOAT;
goto success;
}
break;
@ -2277,11 +2306,11 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
break;
}
if (PyLong_CheckExact(lhs)) {
instr->op.code = BINARY_OP_MULTIPLY_INT;
specialized_op = BINARY_OP_MULTIPLY_INT;
goto success;
}
if (PyFloat_CheckExact(lhs)) {
instr->op.code = BINARY_OP_MULTIPLY_FLOAT;
specialized_op = BINARY_OP_MULTIPLY_FLOAT;
goto success;
}
break;
@ -2291,22 +2320,23 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
break;
}
if (PyLong_CheckExact(lhs)) {
instr->op.code = BINARY_OP_SUBTRACT_INT;
specialized_op = BINARY_OP_SUBTRACT_INT;
goto success;
}
if (PyFloat_CheckExact(lhs)) {
instr->op.code = BINARY_OP_SUBTRACT_FLOAT;
specialized_op = BINARY_OP_SUBTRACT_FLOAT;
goto success;
}
break;
}
SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs));
STAT_INC(BINARY_OP, failure);
instr->op.code = BINARY_OP;
SET_OPCODE_OR_RETURN(instr, BINARY_OP);
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(BINARY_OP, success);
SET_OPCODE_OR_RETURN(instr, specialized_op);
cache->counter = adaptive_counter_cooldown();
}

View File

@ -2174,6 +2174,11 @@ sys__clear_internal_caches_impl(PyObject *module)
#ifdef _Py_TIER2
PyInterpreterState *interp = _PyInterpreterState_GET();
_Py_Executors_InvalidateAll(interp, 0);
#endif
#ifdef Py_GIL_DISABLED
if (_Py_ClearUnusedTLBC(_PyInterpreterState_GET()) < 0) {
return NULL;
}
#endif
PyType_ClearCache();
Py_RETURN_NONE;

View File

@ -77,6 +77,10 @@ def _managed_dict_offset():
else:
return -3 * _sizeof_void_p()
def _interp_frame_has_tlbc_index():
interp_frame = gdb.lookup_type("_PyInterpreterFrame")
return any(field.name == "tlbc_index" for field in interp_frame.fields())
Py_TPFLAGS_INLINE_VALUES = (1 << 2)
Py_TPFLAGS_MANAGED_DICT = (1 << 4)
@ -105,6 +109,8 @@ FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)'
UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame'
EVALFRAME = '_PyEval_EvalFrameDefault'
INTERP_FRAME_HAS_TLBC_INDEX = _interp_frame_has_tlbc_index()
class NullPyObjectPtr(RuntimeError):
pass
@ -693,6 +699,16 @@ def parse_location_table(firstlineno, linetable):
yield addr, end_addr, line
addr = end_addr
class PyCodeArrayPtr:
def __init__(self, gdbval):
self._gdbval = gdbval
def get_entry(self, index):
assert (index >= 0) and (index < self._gdbval["size"])
return self._gdbval["entries"][index]
class PyCodeObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
@ -1085,7 +1101,12 @@ class PyFramePtr:
def _f_lasti(self):
codeunit_p = gdb.lookup_type("_Py_CODEUNIT").pointer()
instr_ptr = self._gdbval["instr_ptr"]
first_instr = self._f_code().field("co_code_adaptive").cast(codeunit_p)
if INTERP_FRAME_HAS_TLBC_INDEX:
tlbc_index = self._gdbval["tlbc_index"]
code_arr = PyCodeArrayPtr(self._f_code().field("co_tlbc"))
first_instr = code_arr.get_entry(tlbc_index).cast(codeunit_p)
else:
first_instr = self._f_code().field("co_code_adaptive").cast(codeunit_p)
return int(instr_ptr - first_instr)
def is_shim(self):