mirror of
https://github.com/python/cpython.git
synced 2024-11-21 21:09:37 +01:00
gh-115999: Implement thread-local bytecode and enable specialization for BINARY_OP
(#123926)
Each thread specializes a thread-local copy of the bytecode, created on the first RESUME, in free-threaded builds. All copies of the bytecode for a code object are stored in the co_tlbc array on the code object. Threads reserve a globally unique index identifying its copy of the bytecode in all co_tlbc arrays at thread creation and release the index at thread destruction. The first entry in every co_tlbc array always points to the "main" copy of the bytecode that is stored at the end of the code object. This ensures that no bytecode is copied for programs that do not use threads. Thread-local bytecode can be disabled at runtime by providing either -X tlbc=0 or PYTHON_TLBC=0. Disabling thread-local bytecode also disables specialization. Concurrent modifications to the bytecode made by the specializing interpreter and instrumentation use atomics, with specialization taking care not to overwrite an instruction that was instrumented concurrently.
This commit is contained in:
parent
e5a4b402ae
commit
2e95c5ba3b
@ -72,6 +72,24 @@ typedef struct {
|
||||
uint8_t *per_instruction_tools;
|
||||
} _PyCoMonitoringData;
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
/* Each thread specializes a thread-local copy of the bytecode in free-threaded
|
||||
* builds. These copies are stored on the code object in a `_PyCodeArray`. The
|
||||
* first entry in the array always points to the "main" copy of the bytecode
|
||||
* that is stored at the end of the code object.
|
||||
*/
|
||||
typedef struct {
|
||||
Py_ssize_t size;
|
||||
char *entries[1];
|
||||
} _PyCodeArray;
|
||||
|
||||
#define _PyCode_DEF_THREAD_LOCAL_BYTECODE() \
|
||||
_PyCodeArray *co_tlbc;
|
||||
#else
|
||||
#define _PyCode_DEF_THREAD_LOCAL_BYTECODE()
|
||||
#endif
|
||||
|
||||
// To avoid repeating ourselves in deepfreeze.py, all PyCodeObject members are
|
||||
// defined in this macro:
|
||||
#define _PyCode_DEF(SIZE) { \
|
||||
@ -138,6 +156,7 @@ typedef struct {
|
||||
Type is a void* to keep the format private in codeobject.c to force \
|
||||
people to go through the proper APIs. */ \
|
||||
void *co_extra; \
|
||||
_PyCode_DEF_THREAD_LOCAL_BYTECODE() \
|
||||
char co_code_adaptive[(SIZE)]; \
|
||||
}
|
||||
|
||||
|
@ -183,6 +183,7 @@ typedef struct PyConfig {
|
||||
int cpu_count;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
int enable_gil;
|
||||
int tlbc_enabled;
|
||||
#endif
|
||||
|
||||
/* --- Path configuration inputs ------------ */
|
||||
|
@ -174,6 +174,18 @@ _PyEval_IsGILEnabled(PyThreadState *tstate)
|
||||
extern int _PyEval_EnableGILTransient(PyThreadState *tstate);
|
||||
extern int _PyEval_EnableGILPermanent(PyThreadState *tstate);
|
||||
extern int _PyEval_DisableGIL(PyThreadState *state);
|
||||
|
||||
|
||||
static inline _Py_CODEUNIT *
|
||||
_PyEval_GetExecutableCode(PyThreadState *tstate, PyCodeObject *co)
|
||||
{
|
||||
_Py_CODEUNIT *bc = _PyCode_GetTLBCFast(tstate, co);
|
||||
if (bc != NULL) {
|
||||
return bc;
|
||||
}
|
||||
return _PyCode_GetTLBC(co);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
extern void _PyEval_DeactivateOpCache(void);
|
||||
|
@ -11,6 +11,7 @@ extern "C" {
|
||||
#include "pycore_stackref.h" // _PyStackRef
|
||||
#include "pycore_lock.h" // PyMutex
|
||||
#include "pycore_backoff.h" // _Py_BackoffCounter
|
||||
#include "pycore_tstate.h" // _PyThreadStateImpl
|
||||
|
||||
|
||||
/* Each instruction in a code object is a fixed-width value,
|
||||
@ -313,11 +314,17 @@ extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
|
||||
/** API for executors */
|
||||
extern void _PyCode_Clear_Executors(PyCodeObject *code);
|
||||
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// gh-115999 tracks progress on addressing this.
|
||||
#define ENABLE_SPECIALIZATION 0
|
||||
// Use this to enable specialization families once they are thread-safe. All
|
||||
// uses will be replaced with ENABLE_SPECIALIZATION once all families are
|
||||
// thread-safe.
|
||||
#define ENABLE_SPECIALIZATION_FT 1
|
||||
#else
|
||||
#define ENABLE_SPECIALIZATION 1
|
||||
#define ENABLE_SPECIALIZATION_FT ENABLE_SPECIALIZATION
|
||||
#endif
|
||||
|
||||
/* Specialization functions */
|
||||
@ -600,6 +607,40 @@ struct _PyCode8 _PyCode_DEF(8);
|
||||
|
||||
PyAPI_DATA(const struct _PyCode8) _Py_InitCleanup;
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
// Return a pointer to the thread-local bytecode for the current thread, if it
|
||||
// exists.
|
||||
static inline _Py_CODEUNIT *
|
||||
_PyCode_GetTLBCFast(PyThreadState *tstate, PyCodeObject *co)
|
||||
{
|
||||
_PyCodeArray *code = _Py_atomic_load_ptr_acquire(&co->co_tlbc);
|
||||
int32_t idx = ((_PyThreadStateImpl*) tstate)->tlbc_index;
|
||||
if (idx < code->size && code->entries[idx] != NULL) {
|
||||
return (_Py_CODEUNIT *) code->entries[idx];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Return a pointer to the thread-local bytecode for the current thread,
|
||||
// creating it if necessary.
|
||||
extern _Py_CODEUNIT *_PyCode_GetTLBC(PyCodeObject *co);
|
||||
|
||||
// Reserve an index for the current thread into thread-local bytecode
|
||||
// arrays
|
||||
//
|
||||
// Returns the reserved index or -1 on error.
|
||||
extern int32_t _Py_ReserveTLBCIndex(PyInterpreterState *interp);
|
||||
|
||||
// Release the current thread's index into thread-local bytecode arrays
|
||||
extern void _Py_ClearTLBCIndex(_PyThreadStateImpl *tstate);
|
||||
|
||||
// Free all TLBC copies not associated with live threads.
|
||||
//
|
||||
// Returns 0 on success or -1 on error.
|
||||
extern int _Py_ClearUnusedTLBC(PyInterpreterState *interp);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -68,6 +68,10 @@ typedef struct _PyInterpreterFrame {
|
||||
PyObject *f_locals; /* Strong reference, may be NULL. Only valid if not on C stack */
|
||||
PyFrameObject *frame_obj; /* Strong reference, may be NULL. Only valid if not on C stack */
|
||||
_Py_CODEUNIT *instr_ptr; /* Instruction currently executing (or about to begin) */
|
||||
#ifdef Py_GIL_DISABLED
|
||||
/* Index of thread-local bytecode containing instr_ptr. */
|
||||
int32_t tlbc_index;
|
||||
#endif
|
||||
_PyStackRef *stackpointer;
|
||||
uint16_t return_offset; /* Only relevant during a function call */
|
||||
char owner;
|
||||
@ -76,7 +80,7 @@ typedef struct _PyInterpreterFrame {
|
||||
} _PyInterpreterFrame;
|
||||
|
||||
#define _PyInterpreterFrame_LASTI(IF) \
|
||||
((int)((IF)->instr_ptr - _PyCode_CODE(_PyFrame_GetCode(IF))))
|
||||
((int)((IF)->instr_ptr - _PyFrame_GetBytecode((IF))))
|
||||
|
||||
static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) {
|
||||
PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
|
||||
@ -84,6 +88,19 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) {
|
||||
return (PyCodeObject *)executable;
|
||||
}
|
||||
|
||||
static inline _Py_CODEUNIT *
|
||||
_PyFrame_GetBytecode(_PyInterpreterFrame *f)
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
PyCodeObject *co = _PyFrame_GetCode(f);
|
||||
_PyCodeArray *tlbc = _Py_atomic_load_ptr_acquire(&co->co_tlbc);
|
||||
assert(f->tlbc_index >= 0 && f->tlbc_index < tlbc->size);
|
||||
return (_Py_CODEUNIT *)tlbc->entries[f->tlbc_index];
|
||||
#else
|
||||
return _PyCode_CODE(_PyFrame_GetCode(f));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline PyFunctionObject *_PyFrame_GetFunction(_PyInterpreterFrame *f) {
|
||||
PyObject *func = PyStackRef_AsPyObjectBorrow(f->f_funcobj);
|
||||
assert(PyFunction_Check(func));
|
||||
@ -144,13 +161,33 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
static inline void
|
||||
_PyFrame_InitializeTLBC(PyThreadState *tstate, _PyInterpreterFrame *frame,
|
||||
PyCodeObject *code)
|
||||
{
|
||||
_Py_CODEUNIT *tlbc = _PyCode_GetTLBCFast(tstate, code);
|
||||
if (tlbc == NULL) {
|
||||
// No thread-local bytecode exists for this thread yet; use the main
|
||||
// thread's copy, deferring thread-local bytecode creation to the
|
||||
// execution of RESUME.
|
||||
frame->instr_ptr = _PyCode_CODE(code);
|
||||
frame->tlbc_index = 0;
|
||||
}
|
||||
else {
|
||||
frame->instr_ptr = tlbc;
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Consumes reference to func and locals.
|
||||
Does not initialize frame->previous, which happens
|
||||
when frame is linked into the frame stack.
|
||||
*/
|
||||
static inline void
|
||||
_PyFrame_Initialize(
|
||||
_PyInterpreterFrame *frame, _PyStackRef func,
|
||||
PyThreadState *tstate, _PyInterpreterFrame *frame, _PyStackRef func,
|
||||
PyObject *locals, PyCodeObject *code, int null_locals_from, _PyInterpreterFrame *previous)
|
||||
{
|
||||
frame->previous = previous;
|
||||
@ -162,7 +199,12 @@ _PyFrame_Initialize(
|
||||
frame->f_locals = locals;
|
||||
frame->stackpointer = frame->localsplus + code->co_nlocalsplus;
|
||||
frame->frame_obj = NULL;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_PyFrame_InitializeTLBC(tstate, frame, code);
|
||||
#else
|
||||
(void)tstate;
|
||||
frame->instr_ptr = _PyCode_CODE(code);
|
||||
#endif
|
||||
frame->return_offset = 0;
|
||||
frame->owner = FRAME_OWNED_BY_THREAD;
|
||||
|
||||
@ -224,7 +266,8 @@ _PyFrame_IsIncomplete(_PyInterpreterFrame *frame)
|
||||
return true;
|
||||
}
|
||||
return frame->owner != FRAME_OWNED_BY_GENERATOR &&
|
||||
frame->instr_ptr < _PyCode_CODE(_PyFrame_GetCode(frame)) + _PyFrame_GetCode(frame)->_co_firsttraceable;
|
||||
frame->instr_ptr < _PyFrame_GetBytecode(frame) +
|
||||
_PyFrame_GetCode(frame)->_co_firsttraceable;
|
||||
}
|
||||
|
||||
static inline _PyInterpreterFrame *
|
||||
@ -315,7 +358,8 @@ _PyFrame_PushUnchecked(PyThreadState *tstate, _PyStackRef func, int null_locals_
|
||||
_PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top;
|
||||
tstate->datastack_top += code->co_framesize;
|
||||
assert(tstate->datastack_top < tstate->datastack_limit);
|
||||
_PyFrame_Initialize(new_frame, func, NULL, code, null_locals_from, previous);
|
||||
_PyFrame_Initialize(tstate, new_frame, func, NULL, code, null_locals_from,
|
||||
previous);
|
||||
return new_frame;
|
||||
}
|
||||
|
||||
@ -339,7 +383,11 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
|
||||
assert(stackdepth <= code->co_stacksize);
|
||||
frame->stackpointer = frame->localsplus + code->co_nlocalsplus + stackdepth;
|
||||
frame->frame_obj = NULL;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_PyFrame_InitializeTLBC(tstate, frame, code);
|
||||
#else
|
||||
frame->instr_ptr = _PyCode_CODE(code);
|
||||
#endif
|
||||
frame->owner = FRAME_OWNED_BY_THREAD;
|
||||
frame->return_offset = 0;
|
||||
|
||||
|
@ -389,6 +389,10 @@ extern int _PyGC_VisitStackRef(union _PyStackRef *ref, visitproc visit, void *ar
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
|
||||
gcvisitobjects_t callback, void *arg);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
56
Include/internal/pycore_index_pool.h
Normal file
56
Include/internal/pycore_index_pool.h
Normal file
@ -0,0 +1,56 @@
|
||||
#ifndef Py_INTERNAL_INDEX_POOL_H
|
||||
#define Py_INTERNAL_INDEX_POOL_H
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
// This contains code for allocating unique indices in an array. It is used by
|
||||
// the free-threaded build to assign each thread a globally unique index into
|
||||
// each code object's thread-local bytecode array.
|
||||
|
||||
// A min-heap of indices
|
||||
typedef struct _PyIndexHeap {
|
||||
int32_t *values;
|
||||
|
||||
// Number of items stored in values
|
||||
Py_ssize_t size;
|
||||
|
||||
// Maximum number of items that can be stored in values
|
||||
Py_ssize_t capacity;
|
||||
} _PyIndexHeap;
|
||||
|
||||
// An unbounded pool of indices. Indices are allocated starting from 0. They
|
||||
// may be released back to the pool once they are no longer in use.
|
||||
typedef struct _PyIndexPool {
|
||||
PyMutex mutex;
|
||||
|
||||
// Min heap of indices available for allocation
|
||||
_PyIndexHeap free_indices;
|
||||
|
||||
// Next index to allocate if no free indices are available
|
||||
int32_t next_index;
|
||||
} _PyIndexPool;
|
||||
|
||||
// Allocate the smallest available index. Returns -1 on error.
|
||||
extern int32_t _PyIndexPool_AllocIndex(_PyIndexPool *indices);
|
||||
|
||||
// Release `index` back to the pool
|
||||
extern void _PyIndexPool_FreeIndex(_PyIndexPool *indices, int32_t index);
|
||||
|
||||
extern void _PyIndexPool_Fini(_PyIndexPool *indices);
|
||||
|
||||
#endif // Py_GIL_DISABLED
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // !Py_INTERNAL_INDEX_POOL_H
|
@ -26,6 +26,7 @@ extern "C" {
|
||||
#include "pycore_genobject.h" // _PyGen_FetchStopIterationValue
|
||||
#include "pycore_global_objects.h"// struct _Py_interp_cached_objects
|
||||
#include "pycore_import.h" // struct _import_state
|
||||
#include "pycore_index_pool.h" // _PyIndexPool
|
||||
#include "pycore_instruments.h" // _PY_MONITORING_EVENTS
|
||||
#include "pycore_list.h" // struct _Py_list_state
|
||||
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
|
||||
@ -222,6 +223,7 @@ struct _is {
|
||||
struct _brc_state brc; // biased reference counting state
|
||||
struct _Py_unique_id_pool unique_ids; // object ids for per-thread refcounts
|
||||
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
|
||||
_PyIndexPool tlbc_indices;
|
||||
#endif
|
||||
|
||||
// Per-interpreter state for the obmalloc allocator. For the main
|
||||
|
@ -42,6 +42,9 @@ typedef struct _PyThreadStateImpl {
|
||||
int is_finalized;
|
||||
} refcounts;
|
||||
|
||||
// Index to use to retrieve thread-local bytecode for this thread
|
||||
int32_t tlbc_index;
|
||||
|
||||
// When >1, code objects do not immortalize their non-string constants.
|
||||
int suppress_co_const_immortalization;
|
||||
#endif
|
||||
@ -52,7 +55,6 @@ typedef struct _PyThreadStateImpl {
|
||||
|
||||
} _PyThreadStateImpl;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
123
Include/internal/pycore_uop_ids.h
generated
123
Include/internal/pycore_uop_ids.h
generated
@ -193,106 +193,107 @@ extern "C" {
|
||||
#define _LOAD_ATTR_SLOT_1 423
|
||||
#define _LOAD_ATTR_WITH_HINT 424
|
||||
#define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
|
||||
#define _LOAD_BYTECODE 425
|
||||
#define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
|
||||
#define _LOAD_CONST LOAD_CONST
|
||||
#define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL
|
||||
#define _LOAD_CONST_INLINE 425
|
||||
#define _LOAD_CONST_INLINE_BORROW 426
|
||||
#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 427
|
||||
#define _LOAD_CONST_INLINE_WITH_NULL 428
|
||||
#define _LOAD_CONST_INLINE 426
|
||||
#define _LOAD_CONST_INLINE_BORROW 427
|
||||
#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 428
|
||||
#define _LOAD_CONST_INLINE_WITH_NULL 429
|
||||
#define _LOAD_DEREF LOAD_DEREF
|
||||
#define _LOAD_FAST 429
|
||||
#define _LOAD_FAST_0 430
|
||||
#define _LOAD_FAST_1 431
|
||||
#define _LOAD_FAST_2 432
|
||||
#define _LOAD_FAST_3 433
|
||||
#define _LOAD_FAST_4 434
|
||||
#define _LOAD_FAST_5 435
|
||||
#define _LOAD_FAST_6 436
|
||||
#define _LOAD_FAST_7 437
|
||||
#define _LOAD_FAST 430
|
||||
#define _LOAD_FAST_0 431
|
||||
#define _LOAD_FAST_1 432
|
||||
#define _LOAD_FAST_2 433
|
||||
#define _LOAD_FAST_3 434
|
||||
#define _LOAD_FAST_4 435
|
||||
#define _LOAD_FAST_5 436
|
||||
#define _LOAD_FAST_6 437
|
||||
#define _LOAD_FAST_7 438
|
||||
#define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
|
||||
#define _LOAD_FAST_CHECK LOAD_FAST_CHECK
|
||||
#define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
|
||||
#define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
|
||||
#define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
|
||||
#define _LOAD_GLOBAL 438
|
||||
#define _LOAD_GLOBAL_BUILTINS 439
|
||||
#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 440
|
||||
#define _LOAD_GLOBAL_MODULE 441
|
||||
#define _LOAD_GLOBAL_MODULE_FROM_KEYS 442
|
||||
#define _LOAD_GLOBAL 439
|
||||
#define _LOAD_GLOBAL_BUILTINS 440
|
||||
#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 441
|
||||
#define _LOAD_GLOBAL_MODULE 442
|
||||
#define _LOAD_GLOBAL_MODULE_FROM_KEYS 443
|
||||
#define _LOAD_LOCALS LOAD_LOCALS
|
||||
#define _LOAD_NAME LOAD_NAME
|
||||
#define _LOAD_SMALL_INT 443
|
||||
#define _LOAD_SMALL_INT_0 444
|
||||
#define _LOAD_SMALL_INT_1 445
|
||||
#define _LOAD_SMALL_INT_2 446
|
||||
#define _LOAD_SMALL_INT_3 447
|
||||
#define _LOAD_SMALL_INT 444
|
||||
#define _LOAD_SMALL_INT_0 445
|
||||
#define _LOAD_SMALL_INT_1 446
|
||||
#define _LOAD_SMALL_INT_2 447
|
||||
#define _LOAD_SMALL_INT_3 448
|
||||
#define _LOAD_SPECIAL LOAD_SPECIAL
|
||||
#define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
|
||||
#define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
|
||||
#define _MAKE_CALLARGS_A_TUPLE 448
|
||||
#define _MAKE_CALLARGS_A_TUPLE 449
|
||||
#define _MAKE_CELL MAKE_CELL
|
||||
#define _MAKE_FUNCTION MAKE_FUNCTION
|
||||
#define _MAKE_WARM 449
|
||||
#define _MAKE_WARM 450
|
||||
#define _MAP_ADD MAP_ADD
|
||||
#define _MATCH_CLASS MATCH_CLASS
|
||||
#define _MATCH_KEYS MATCH_KEYS
|
||||
#define _MATCH_MAPPING MATCH_MAPPING
|
||||
#define _MATCH_SEQUENCE MATCH_SEQUENCE
|
||||
#define _MAYBE_EXPAND_METHOD 450
|
||||
#define _MAYBE_EXPAND_METHOD_KW 451
|
||||
#define _MONITOR_CALL 452
|
||||
#define _MONITOR_JUMP_BACKWARD 453
|
||||
#define _MONITOR_RESUME 454
|
||||
#define _MAYBE_EXPAND_METHOD 451
|
||||
#define _MAYBE_EXPAND_METHOD_KW 452
|
||||
#define _MONITOR_CALL 453
|
||||
#define _MONITOR_JUMP_BACKWARD 454
|
||||
#define _MONITOR_RESUME 455
|
||||
#define _NOP NOP
|
||||
#define _POP_EXCEPT POP_EXCEPT
|
||||
#define _POP_JUMP_IF_FALSE 455
|
||||
#define _POP_JUMP_IF_TRUE 456
|
||||
#define _POP_JUMP_IF_FALSE 456
|
||||
#define _POP_JUMP_IF_TRUE 457
|
||||
#define _POP_TOP POP_TOP
|
||||
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 457
|
||||
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 458
|
||||
#define _PUSH_EXC_INFO PUSH_EXC_INFO
|
||||
#define _PUSH_FRAME 458
|
||||
#define _PUSH_FRAME 459
|
||||
#define _PUSH_NULL PUSH_NULL
|
||||
#define _PY_FRAME_GENERAL 459
|
||||
#define _PY_FRAME_KW 460
|
||||
#define _QUICKEN_RESUME 461
|
||||
#define _REPLACE_WITH_TRUE 462
|
||||
#define _PY_FRAME_GENERAL 460
|
||||
#define _PY_FRAME_KW 461
|
||||
#define _QUICKEN_RESUME 462
|
||||
#define _REPLACE_WITH_TRUE 463
|
||||
#define _RESUME_CHECK RESUME_CHECK
|
||||
#define _RETURN_GENERATOR RETURN_GENERATOR
|
||||
#define _RETURN_VALUE RETURN_VALUE
|
||||
#define _SAVE_RETURN_OFFSET 463
|
||||
#define _SEND 464
|
||||
#define _SEND_GEN_FRAME 465
|
||||
#define _SAVE_RETURN_OFFSET 464
|
||||
#define _SEND 465
|
||||
#define _SEND_GEN_FRAME 466
|
||||
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
|
||||
#define _SET_ADD SET_ADD
|
||||
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
|
||||
#define _SET_UPDATE SET_UPDATE
|
||||
#define _START_EXECUTOR 466
|
||||
#define _STORE_ATTR 467
|
||||
#define _STORE_ATTR_INSTANCE_VALUE 468
|
||||
#define _STORE_ATTR_SLOT 469
|
||||
#define _STORE_ATTR_WITH_HINT 470
|
||||
#define _START_EXECUTOR 467
|
||||
#define _STORE_ATTR 468
|
||||
#define _STORE_ATTR_INSTANCE_VALUE 469
|
||||
#define _STORE_ATTR_SLOT 470
|
||||
#define _STORE_ATTR_WITH_HINT 471
|
||||
#define _STORE_DEREF STORE_DEREF
|
||||
#define _STORE_FAST 471
|
||||
#define _STORE_FAST_0 472
|
||||
#define _STORE_FAST_1 473
|
||||
#define _STORE_FAST_2 474
|
||||
#define _STORE_FAST_3 475
|
||||
#define _STORE_FAST_4 476
|
||||
#define _STORE_FAST_5 477
|
||||
#define _STORE_FAST_6 478
|
||||
#define _STORE_FAST_7 479
|
||||
#define _STORE_FAST 472
|
||||
#define _STORE_FAST_0 473
|
||||
#define _STORE_FAST_1 474
|
||||
#define _STORE_FAST_2 475
|
||||
#define _STORE_FAST_3 476
|
||||
#define _STORE_FAST_4 477
|
||||
#define _STORE_FAST_5 478
|
||||
#define _STORE_FAST_6 479
|
||||
#define _STORE_FAST_7 480
|
||||
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
|
||||
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
|
||||
#define _STORE_GLOBAL STORE_GLOBAL
|
||||
#define _STORE_NAME STORE_NAME
|
||||
#define _STORE_SLICE 480
|
||||
#define _STORE_SUBSCR 481
|
||||
#define _STORE_SLICE 481
|
||||
#define _STORE_SUBSCR 482
|
||||
#define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
|
||||
#define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
|
||||
#define _SWAP SWAP
|
||||
#define _TIER2_RESUME_CHECK 482
|
||||
#define _TO_BOOL 483
|
||||
#define _TIER2_RESUME_CHECK 483
|
||||
#define _TO_BOOL 484
|
||||
#define _TO_BOOL_BOOL TO_BOOL_BOOL
|
||||
#define _TO_BOOL_INT TO_BOOL_INT
|
||||
#define _TO_BOOL_LIST TO_BOOL_LIST
|
||||
@ -302,13 +303,13 @@ extern "C" {
|
||||
#define _UNARY_NEGATIVE UNARY_NEGATIVE
|
||||
#define _UNARY_NOT UNARY_NOT
|
||||
#define _UNPACK_EX UNPACK_EX
|
||||
#define _UNPACK_SEQUENCE 484
|
||||
#define _UNPACK_SEQUENCE 485
|
||||
#define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
|
||||
#define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
|
||||
#define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
|
||||
#define _WITH_EXCEPT_START WITH_EXCEPT_START
|
||||
#define _YIELD_VALUE YIELD_VALUE
|
||||
#define MAX_UOP_ID 484
|
||||
#define MAX_UOP_ID 485
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
2
Include/internal/pycore_uop_metadata.h
generated
2
Include/internal/pycore_uop_metadata.h
generated
@ -289,7 +289,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
|
||||
[_FATAL_ERROR] = 0,
|
||||
[_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG,
|
||||
[_DEOPT] = 0,
|
||||
[_ERROR_POP_N] = HAS_ARG_FLAG,
|
||||
[_ERROR_POP_N] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG,
|
||||
[_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG,
|
||||
};
|
||||
|
||||
|
@ -1274,6 +1274,11 @@ def requires_specialization(test):
|
||||
_opcode.ENABLE_SPECIALIZATION, "requires specialization")(test)
|
||||
|
||||
|
||||
def requires_specialization_ft(test):
|
||||
return unittest.skipUnless(
|
||||
_opcode.ENABLE_SPECIALIZATION_FT, "requires specialization")(test)
|
||||
|
||||
|
||||
#=======================================================================
|
||||
# Check for the presence of docstrings.
|
||||
|
||||
|
@ -100,6 +100,7 @@ class CAPITests(unittest.TestCase):
|
||||
options.append(("run_presite", str | None, None))
|
||||
if sysconfig.get_config_var('Py_GIL_DISABLED'):
|
||||
options.append(("enable_gil", int, None))
|
||||
options.append(("tlbc_enabled", int, None))
|
||||
if support.MS_WINDOWS:
|
||||
options.extend((
|
||||
("legacy_windows_stdio", bool, None),
|
||||
|
@ -7,7 +7,8 @@ import os
|
||||
|
||||
import _opcode
|
||||
|
||||
from test.support import script_helper, requires_specialization, import_helper
|
||||
from test.support import (script_helper, requires_specialization,
|
||||
import_helper, Py_GIL_DISABLED)
|
||||
|
||||
_testinternalcapi = import_helper.import_module("_testinternalcapi")
|
||||
|
||||
@ -34,6 +35,7 @@ def clear_executors(func):
|
||||
|
||||
|
||||
@requires_specialization
|
||||
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
|
||||
@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"),
|
||||
"Requires optimizer infrastructure")
|
||||
class TestOptimizerAPI(unittest.TestCase):
|
||||
@ -138,6 +140,7 @@ def get_opnames(ex):
|
||||
|
||||
|
||||
@requires_specialization
|
||||
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
|
||||
@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"),
|
||||
"Requires optimizer infrastructure")
|
||||
class TestExecutorInvalidation(unittest.TestCase):
|
||||
@ -219,6 +222,7 @@ class TestExecutorInvalidation(unittest.TestCase):
|
||||
|
||||
|
||||
@requires_specialization
|
||||
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
|
||||
@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"),
|
||||
"Requires optimizer infrastructure")
|
||||
@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.")
|
||||
@ -586,6 +590,7 @@ class TestUops(unittest.TestCase):
|
||||
|
||||
|
||||
@requires_specialization
|
||||
@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds")
|
||||
@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"),
|
||||
"Requires optimizer infrastructure")
|
||||
@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.")
|
||||
|
@ -12,6 +12,7 @@ import unittest
|
||||
from test import support
|
||||
from test.support import os_helper
|
||||
from test.support import force_not_colorized
|
||||
from test.support import threading_helper
|
||||
from test.support.script_helper import (
|
||||
spawn_python, kill_python, assert_python_ok, assert_python_failure,
|
||||
interpreter_requires_environment
|
||||
@ -1068,6 +1069,57 @@ class CmdLineTest(unittest.TestCase):
|
||||
out = res.out.strip().decode("utf-8")
|
||||
return tuple(int(i) for i in out.split())
|
||||
|
||||
@unittest.skipUnless(support.Py_GIL_DISABLED,
|
||||
"PYTHON_TLBC and -X tlbc"
|
||||
" only supported in Py_GIL_DISABLED builds")
|
||||
@threading_helper.requires_working_threading()
|
||||
def test_disable_thread_local_bytecode(self):
|
||||
code = """if 1:
|
||||
import threading
|
||||
def test(x, y):
|
||||
return x + y
|
||||
t = threading.Thread(target=test, args=(1,2))
|
||||
t.start()
|
||||
t.join()"""
|
||||
assert_python_ok("-W", "always", "-X", "tlbc=0", "-c", code)
|
||||
assert_python_ok("-W", "always", "-c", code, PYTHON_TLBC="0")
|
||||
|
||||
@unittest.skipUnless(support.Py_GIL_DISABLED,
|
||||
"PYTHON_TLBC and -X tlbc"
|
||||
" only supported in Py_GIL_DISABLED builds")
|
||||
@threading_helper.requires_working_threading()
|
||||
def test_enable_thread_local_bytecode(self):
|
||||
code = """if 1:
|
||||
import threading
|
||||
def test(x, y):
|
||||
return x + y
|
||||
t = threading.Thread(target=test, args=(1,2))
|
||||
t.start()
|
||||
t.join()"""
|
||||
# The functionality of thread-local bytecode is tested more extensively
|
||||
# in test_thread_local_bytecode
|
||||
assert_python_ok("-W", "always", "-X", "tlbc=1", "-c", code)
|
||||
assert_python_ok("-W", "always", "-c", code, PYTHON_TLBC="1")
|
||||
|
||||
@unittest.skipUnless(support.Py_GIL_DISABLED,
|
||||
"PYTHON_TLBC and -X tlbc"
|
||||
" only supported in Py_GIL_DISABLED builds")
|
||||
def test_invalid_thread_local_bytecode(self):
|
||||
rc, out, err = assert_python_failure("-X", "tlbc")
|
||||
self.assertIn(b"tlbc=n: n is missing or invalid", err)
|
||||
rc, out, err = assert_python_failure("-X", "tlbc=foo")
|
||||
self.assertIn(b"tlbc=n: n is missing or invalid", err)
|
||||
rc, out, err = assert_python_failure("-X", "tlbc=-1")
|
||||
self.assertIn(b"tlbc=n: n is missing or invalid", err)
|
||||
rc, out, err = assert_python_failure("-X", "tlbc=2")
|
||||
self.assertIn(b"tlbc=n: n is missing or invalid", err)
|
||||
rc, out, err = assert_python_failure(PYTHON_TLBC="foo")
|
||||
self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err)
|
||||
rc, out, err = assert_python_failure(PYTHON_TLBC="-1")
|
||||
self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err)
|
||||
rc, out, err = assert_python_failure(PYTHON_TLBC="2")
|
||||
self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err)
|
||||
|
||||
|
||||
@unittest.skipIf(interpreter_requires_environment(),
|
||||
'Cannot run -I tests when PYTHON env vars are required.')
|
||||
|
@ -10,7 +10,8 @@ import sys
|
||||
import types
|
||||
import unittest
|
||||
from test.support import (captured_stdout, requires_debug_ranges,
|
||||
requires_specialization, cpython_only)
|
||||
requires_specialization, requires_specialization_ft,
|
||||
cpython_only)
|
||||
from test.support.bytecode_helper import BytecodeTestCase
|
||||
|
||||
import opcode
|
||||
@ -1261,7 +1262,7 @@ class DisTests(DisTestBase):
|
||||
self.do_disassembly_compare(got, dis_load_test_quickened_code)
|
||||
|
||||
@cpython_only
|
||||
@requires_specialization
|
||||
@requires_specialization_ft
|
||||
def test_binary_specialize(self):
|
||||
binary_op_quicken = """\
|
||||
0 RESUME_CHECK 0
|
||||
@ -1281,6 +1282,9 @@ class DisTests(DisTestBase):
|
||||
got = self.get_disassembly(co_unicode, adaptive=True)
|
||||
self.do_disassembly_compare(got, binary_op_quicken % "BINARY_OP_ADD_UNICODE 0 (+)")
|
||||
|
||||
@cpython_only
|
||||
@requires_specialization
|
||||
def test_binary_subscr_specialize(self):
|
||||
binary_subscr_quicken = """\
|
||||
0 RESUME_CHECK 0
|
||||
|
||||
|
@ -644,6 +644,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||
CONFIG_COMPAT['run_presite'] = None
|
||||
if support.Py_GIL_DISABLED:
|
||||
CONFIG_COMPAT['enable_gil'] = -1
|
||||
CONFIG_COMPAT['tlbc_enabled'] = GET_DEFAULT_CONFIG
|
||||
if MS_WINDOWS:
|
||||
CONFIG_COMPAT.update({
|
||||
'legacy_windows_stdio': False,
|
||||
|
@ -1094,6 +1094,13 @@ class SysModuleTest(unittest.TestCase):
|
||||
# While we could imagine a Python session where the number of
|
||||
# multiple buffer objects would exceed the sharing of references,
|
||||
# it is unlikely to happen in a normal test run.
|
||||
#
|
||||
# In free-threaded builds each code object owns an array of
|
||||
# pointers to copies of the bytecode. When the number of
|
||||
# code objects is a large fraction of the total number of
|
||||
# references, this can cause the total number of allocated
|
||||
# blocks to exceed the total number of references.
|
||||
if not support.Py_GIL_DISABLED:
|
||||
self.assertLess(a, sys.gettotalrefcount())
|
||||
except AttributeError:
|
||||
# gettotalrefcount() not available
|
||||
@ -1613,6 +1620,9 @@ class SizeofTest(unittest.TestCase):
|
||||
def func():
|
||||
return sys._getframe()
|
||||
x = func()
|
||||
if support.Py_GIL_DISABLED:
|
||||
INTERPRETER_FRAME = '10PhcP'
|
||||
else:
|
||||
INTERPRETER_FRAME = '9PhcP'
|
||||
check(x, size('3PiccPP' + INTERPRETER_FRAME + 'P'))
|
||||
# function
|
||||
|
198
Lib/test/test_thread_local_bytecode.py
Normal file
198
Lib/test/test_thread_local_bytecode.py
Normal file
@ -0,0 +1,198 @@
|
||||
"""Tests for thread-local bytecode."""
|
||||
import dis
|
||||
import textwrap
|
||||
import unittest
|
||||
|
||||
from test import support
|
||||
from test.support import cpython_only, import_helper, requires_specialization_ft
|
||||
from test.support.script_helper import assert_python_ok
|
||||
from test.support.threading_helper import requires_working_threading
|
||||
|
||||
# Skip this test if the _testinternalcapi module isn't available
|
||||
_testinternalcapi = import_helper.import_module("_testinternalcapi")
|
||||
|
||||
|
||||
@cpython_only
|
||||
@requires_working_threading()
|
||||
@unittest.skipUnless(support.Py_GIL_DISABLED, "only in free-threaded builds")
|
||||
class TLBCTests(unittest.TestCase):
|
||||
@requires_specialization_ft
|
||||
def test_new_threads_start_with_unspecialized_code(self):
|
||||
code = textwrap.dedent("""
|
||||
import dis
|
||||
import queue
|
||||
import threading
|
||||
|
||||
from _testinternalcapi import get_tlbc
|
||||
|
||||
def all_opnames(bc):
|
||||
return {i.opname for i in dis._get_instructions_bytes(bc)}
|
||||
|
||||
def f(a, b, q=None):
|
||||
if q is not None:
|
||||
q.put(get_tlbc(f))
|
||||
return a + b
|
||||
|
||||
for _ in range(100):
|
||||
# specialize
|
||||
f(1, 2)
|
||||
|
||||
q = queue.Queue()
|
||||
t = threading.Thread(target=f, args=('a', 'b', q))
|
||||
t.start()
|
||||
t.join()
|
||||
|
||||
assert "BINARY_OP_ADD_INT" in all_opnames(get_tlbc(f))
|
||||
assert "BINARY_OP_ADD_INT" not in all_opnames(q.get())
|
||||
""")
|
||||
assert_python_ok("-X", "tlbc=1", "-c", code)
|
||||
|
||||
@requires_specialization_ft
|
||||
def test_threads_specialize_independently(self):
|
||||
code = textwrap.dedent("""
|
||||
import dis
|
||||
import queue
|
||||
import threading
|
||||
|
||||
from _testinternalcapi import get_tlbc
|
||||
|
||||
def all_opnames(bc):
|
||||
return {i.opname for i in dis._get_instructions_bytes(bc)}
|
||||
|
||||
def f(a, b):
|
||||
return a + b
|
||||
|
||||
def g(a, b, q=None):
|
||||
for _ in range(100):
|
||||
f(a, b)
|
||||
if q is not None:
|
||||
q.put(get_tlbc(f))
|
||||
|
||||
# specialize in main thread
|
||||
g(1, 2)
|
||||
|
||||
# specialize in other thread
|
||||
q = queue.Queue()
|
||||
t = threading.Thread(target=g, args=('a', 'b', q))
|
||||
t.start()
|
||||
t.join()
|
||||
|
||||
assert "BINARY_OP_ADD_INT" in all_opnames(get_tlbc(f))
|
||||
t_opnames = all_opnames(q.get())
|
||||
assert "BINARY_OP_ADD_INT" not in t_opnames
|
||||
assert "BINARY_OP_ADD_UNICODE" in t_opnames
|
||||
""")
|
||||
assert_python_ok("-X", "tlbc=1", "-c", code)
|
||||
|
||||
def test_reuse_tlbc_across_threads_different_lifetimes(self):
|
||||
code = textwrap.dedent("""
|
||||
import queue
|
||||
import threading
|
||||
|
||||
from _testinternalcapi import get_tlbc_id
|
||||
|
||||
def f(a, b, q=None):
|
||||
if q is not None:
|
||||
q.put(get_tlbc_id(f))
|
||||
return a + b
|
||||
|
||||
q = queue.Queue()
|
||||
tlbc_ids = []
|
||||
for _ in range(3):
|
||||
t = threading.Thread(target=f, args=('a', 'b', q))
|
||||
t.start()
|
||||
t.join()
|
||||
tlbc_ids.append(q.get())
|
||||
|
||||
assert tlbc_ids[0] == tlbc_ids[1]
|
||||
assert tlbc_ids[1] == tlbc_ids[2]
|
||||
""")
|
||||
assert_python_ok("-X", "tlbc=1", "-c", code)
|
||||
|
||||
def test_no_copies_if_tlbc_disabled(self):
|
||||
code = textwrap.dedent("""
|
||||
import queue
|
||||
import threading
|
||||
|
||||
from _testinternalcapi import get_tlbc_id
|
||||
|
||||
def f(a, b, q=None):
|
||||
if q is not None:
|
||||
q.put(get_tlbc_id(f))
|
||||
return a + b
|
||||
|
||||
q = queue.Queue()
|
||||
threads = []
|
||||
for _ in range(3):
|
||||
t = threading.Thread(target=f, args=('a', 'b', q))
|
||||
t.start()
|
||||
threads.append(t)
|
||||
|
||||
tlbc_ids = []
|
||||
for t in threads:
|
||||
t.join()
|
||||
tlbc_ids.append(q.get())
|
||||
|
||||
main_tlbc_id = get_tlbc_id(f)
|
||||
assert main_tlbc_id is not None
|
||||
assert tlbc_ids[0] == main_tlbc_id
|
||||
assert tlbc_ids[1] == main_tlbc_id
|
||||
assert tlbc_ids[2] == main_tlbc_id
|
||||
""")
|
||||
assert_python_ok("-X", "tlbc=0", "-c", code)
|
||||
|
||||
def test_no_specialization_if_tlbc_disabled(self):
|
||||
code = textwrap.dedent("""
|
||||
import dis
|
||||
import queue
|
||||
import threading
|
||||
|
||||
from _testinternalcapi import get_tlbc
|
||||
|
||||
def all_opnames(f):
|
||||
bc = get_tlbc(f)
|
||||
return {i.opname for i in dis._get_instructions_bytes(bc)}
|
||||
|
||||
def f(a, b):
|
||||
return a + b
|
||||
|
||||
for _ in range(100):
|
||||
f(1, 2)
|
||||
|
||||
assert "BINARY_OP_ADD_INT" not in all_opnames(f)
|
||||
""")
|
||||
assert_python_ok("-X", "tlbc=0", "-c", code)
|
||||
|
||||
def test_generator_throw(self):
|
||||
code = textwrap.dedent("""
|
||||
import queue
|
||||
import threading
|
||||
|
||||
from _testinternalcapi import get_tlbc_id
|
||||
|
||||
def g():
|
||||
try:
|
||||
yield
|
||||
except:
|
||||
yield get_tlbc_id(g)
|
||||
|
||||
def f(q):
|
||||
gen = g()
|
||||
next(gen)
|
||||
q.put(gen.throw(ValueError))
|
||||
|
||||
q = queue.Queue()
|
||||
t = threading.Thread(target=f, args=(q,))
|
||||
t.start()
|
||||
t.join()
|
||||
|
||||
gen = g()
|
||||
next(gen)
|
||||
main_id = gen.throw(ValueError)
|
||||
assert main_id != q.get()
|
||||
""")
|
||||
assert_python_ok("-X", "tlbc=1", "-c", code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
@ -460,6 +460,7 @@ PYTHON_OBJS= \
|
||||
Python/hashtable.o \
|
||||
Python/import.o \
|
||||
Python/importdl.o \
|
||||
Python/index_pool.o \
|
||||
Python/initconfig.o \
|
||||
Python/interpconfig.o \
|
||||
Python/instrumentation.o \
|
||||
@ -1228,6 +1229,7 @@ PYTHON_HEADERS= \
|
||||
$(srcdir)/Include/internal/pycore_hashtable.h \
|
||||
$(srcdir)/Include/internal/pycore_import.h \
|
||||
$(srcdir)/Include/internal/pycore_importdl.h \
|
||||
$(srcdir)/Include/internal/pycore_index_pool.h \
|
||||
$(srcdir)/Include/internal/pycore_initconfig.h \
|
||||
$(srcdir)/Include/internal/pycore_instruments.h \
|
||||
$(srcdir)/Include/internal/pycore_instruction_sequence.h \
|
||||
|
@ -422,6 +422,9 @@ _opcode_exec(PyObject *m) {
|
||||
if (PyModule_AddIntMacro(m, ENABLE_SPECIALIZATION) < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (PyModule_AddIntMacro(m, ENABLE_SPECIALIZATION_FT) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "pycore_bitutils.h" // _Py_bswap32()
|
||||
#include "pycore_bytesobject.h" // _PyBytes_Find()
|
||||
#include "pycore_ceval.h" // _PyEval_AddPendingCall()
|
||||
#include "pycore_code.h" // _PyCode_GetTLBCFast()
|
||||
#include "pycore_compile.h" // _PyCompile_CodeGen()
|
||||
#include "pycore_context.h" // _PyContext_NewHamtForTests()
|
||||
#include "pycore_dict.h" // _PyManagedDictPointer_GetValues()
|
||||
@ -1963,6 +1964,48 @@ get_py_thread_id(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||
Py_BUILD_ASSERT(sizeof(unsigned long long) >= sizeof(tid));
|
||||
return PyLong_FromUnsignedLongLong(tid);
|
||||
}
|
||||
|
||||
static PyCodeObject *
|
||||
get_code(PyObject *obj)
|
||||
{
|
||||
if (PyCode_Check(obj)) {
|
||||
return (PyCodeObject *)obj;
|
||||
}
|
||||
else if (PyFunction_Check(obj)) {
|
||||
return (PyCodeObject *)PyFunction_GetCode(obj);
|
||||
}
|
||||
return (PyCodeObject *)PyErr_Format(
|
||||
PyExc_TypeError, "expected function or code object, got %s",
|
||||
Py_TYPE(obj)->tp_name);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_tlbc(PyObject *Py_UNUSED(module), PyObject *obj)
|
||||
{
|
||||
PyCodeObject *code = get_code(obj);
|
||||
if (code == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
_Py_CODEUNIT *bc = _PyCode_GetTLBCFast(PyThreadState_GET(), code);
|
||||
if (bc == NULL) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
return PyBytes_FromStringAndSize((const char *)bc, _PyCode_NBYTES(code));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj)
|
||||
{
|
||||
PyCodeObject *code = get_code(obj);
|
||||
if (code == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
_Py_CODEUNIT *bc = _PyCode_GetTLBCFast(PyThreadState_GET(), code);
|
||||
if (bc == NULL) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
return PyLong_FromVoidPtr(bc);
|
||||
}
|
||||
#endif
|
||||
|
||||
static PyObject *
|
||||
@ -2022,7 +2065,6 @@ identify_type_slot_wrappers(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||
return _PyType_GetSlotWrapperNames();
|
||||
}
|
||||
|
||||
|
||||
static PyMethodDef module_functions[] = {
|
||||
{"get_configs", get_configs, METH_NOARGS},
|
||||
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
|
||||
@ -2110,6 +2152,8 @@ static PyMethodDef module_functions[] = {
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
{"py_thread_id", get_py_thread_id, METH_NOARGS},
|
||||
{"get_tlbc", get_tlbc, METH_O, NULL},
|
||||
{"get_tlbc_id", get_tlbc_id, METH_O, NULL},
|
||||
#endif
|
||||
#ifdef _Py_TIER2
|
||||
{"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},
|
||||
|
@ -6,17 +6,22 @@
|
||||
#include "pycore_code.h" // _PyCodeConstructor
|
||||
#include "pycore_frame.h" // FRAME_SPECIALS_SIZE
|
||||
#include "pycore_hashtable.h" // _Py_hashtable_t
|
||||
#include "pycore_index_pool.h" // _PyIndexPool
|
||||
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||
#include "pycore_interp.h" // PyInterpreterState.co_extra_freefuncs
|
||||
#include "pycore_object.h" // _PyObject_SetDeferredRefcount
|
||||
#include "pycore_object_stack.h"
|
||||
#include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches
|
||||
#include "pycore_opcode_utils.h" // RESUME_AT_FUNC_START
|
||||
#include "pycore_pymem.h" // _PyMem_FreeDelayed
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
#include "pycore_setobject.h" // _PySet_NextEntry()
|
||||
#include "pycore_tuple.h" // _PyTuple_ITEMS()
|
||||
#include "pycore_uniqueid.h" // _PyObject_AssignUniqueId()
|
||||
#include "clinic/codeobject.c.h"
|
||||
|
||||
#define INITIAL_SPECIALIZED_CODE_SIZE 16
|
||||
|
||||
static const char *
|
||||
code_event_name(PyCodeEvent event) {
|
||||
switch (event) {
|
||||
@ -440,9 +445,15 @@ _PyCode_Validate(struct _PyCodeConstructor *con)
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void _PyCode_Quicken(PyCodeObject *code);
|
||||
extern void
|
||||
_PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts,
|
||||
int enable_counters);
|
||||
|
||||
static void
|
||||
#ifdef Py_GIL_DISABLED
|
||||
static _PyCodeArray * _PyCodeArray_New(Py_ssize_t size);
|
||||
#endif
|
||||
|
||||
static int
|
||||
init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
|
||||
{
|
||||
int nlocalsplus = (int)PyTuple_GET_SIZE(con->localsplusnames);
|
||||
@ -505,14 +516,27 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
|
||||
|
||||
memcpy(_PyCode_CODE(co), PyBytes_AS_STRING(con->code),
|
||||
PyBytes_GET_SIZE(con->code));
|
||||
#ifdef Py_GIL_DISABLED
|
||||
co->co_tlbc = _PyCodeArray_New(INITIAL_SPECIALIZED_CODE_SIZE);
|
||||
if (co->co_tlbc == NULL) {
|
||||
return -1;
|
||||
}
|
||||
co->co_tlbc->entries[0] = co->co_code_adaptive;
|
||||
#endif
|
||||
int entry_point = 0;
|
||||
while (entry_point < Py_SIZE(co) &&
|
||||
_PyCode_CODE(co)[entry_point].op.code != RESUME) {
|
||||
entry_point++;
|
||||
}
|
||||
co->_co_firsttraceable = entry_point;
|
||||
_PyCode_Quicken(co);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), co->co_consts,
|
||||
interp->config.tlbc_enabled);
|
||||
#else
|
||||
_PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), co->co_consts, 1);
|
||||
#endif
|
||||
notify_code_watchers(PY_CODE_EVENT_CREATE, co);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -667,7 +691,12 @@ _PyCode_New(struct _PyCodeConstructor *con)
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
init_code(co, con);
|
||||
|
||||
if (init_code(co, con) < 0) {
|
||||
Py_DECREF(co);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
co->_co_unique_id = _PyObject_AssignUniqueId((PyObject *)co);
|
||||
_PyObject_GC_TRACK(co);
|
||||
@ -1871,6 +1900,17 @@ code_dealloc(PyCodeObject *co)
|
||||
PyObject_ClearWeakRefs((PyObject*)co);
|
||||
}
|
||||
free_monitoring_data(co->_co_monitoring);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// The first element always points to the mutable bytecode at the end of
|
||||
// the code object, which will be freed when the code object is freed.
|
||||
for (Py_ssize_t i = 1; i < co->co_tlbc->size; i++) {
|
||||
char *entry = co->co_tlbc->entries[i];
|
||||
if (entry != NULL) {
|
||||
PyMem_Free(entry);
|
||||
}
|
||||
}
|
||||
PyMem_Free(co->co_tlbc);
|
||||
#endif
|
||||
PyObject_Free(co);
|
||||
}
|
||||
|
||||
@ -2646,5 +2686,270 @@ _PyCode_Fini(PyInterpreterState *interp)
|
||||
_Py_hashtable_destroy(state->constants);
|
||||
state->constants = NULL;
|
||||
}
|
||||
_PyIndexPool_Fini(&interp->tlbc_indices);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
// Thread-local bytecode (TLBC)
|
||||
//
|
||||
// Each thread specializes a thread-local copy of the bytecode, created on the
|
||||
// first RESUME, in free-threaded builds. All copies of the bytecode for a code
|
||||
// object are stored in the `co_tlbc` array. Threads reserve a globally unique
|
||||
// index identifying its copy of the bytecode in all `co_tlbc` arrays at thread
|
||||
// creation and release the index at thread destruction. The first entry in
|
||||
// every `co_tlbc` array always points to the "main" copy of the bytecode that
|
||||
// is stored at the end of the code object. This ensures that no bytecode is
|
||||
// copied for programs that do not use threads.
|
||||
//
|
||||
// Thread-local bytecode can be disabled at runtime by providing either `-X
|
||||
// tlbc=0` or `PYTHON_TLBC=0`. Disabling thread-local bytecode also disables
|
||||
// specialization. All threads share the main copy of the bytecode when
|
||||
// thread-local bytecode is disabled.
|
||||
//
|
||||
// Concurrent modifications to the bytecode made by the specializing
|
||||
// interpreter and instrumentation use atomics, with specialization taking care
|
||||
// not to overwrite an instruction that was instrumented concurrently.
|
||||
|
||||
int32_t
|
||||
_Py_ReserveTLBCIndex(PyInterpreterState *interp)
|
||||
{
|
||||
if (interp->config.tlbc_enabled) {
|
||||
return _PyIndexPool_AllocIndex(&interp->tlbc_indices);
|
||||
}
|
||||
// All threads share the main copy of the bytecode when TLBC is disabled
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
_Py_ClearTLBCIndex(_PyThreadStateImpl *tstate)
|
||||
{
|
||||
PyInterpreterState *interp = ((PyThreadState *)tstate)->interp;
|
||||
if (interp->config.tlbc_enabled) {
|
||||
_PyIndexPool_FreeIndex(&interp->tlbc_indices, tstate->tlbc_index);
|
||||
}
|
||||
}
|
||||
|
||||
static _PyCodeArray *
|
||||
_PyCodeArray_New(Py_ssize_t size)
|
||||
{
|
||||
_PyCodeArray *arr = PyMem_Calloc(
|
||||
1, offsetof(_PyCodeArray, entries) + sizeof(void *) * size);
|
||||
if (arr == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
arr->size = size;
|
||||
return arr;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_code(_Py_CODEUNIT *dst, PyCodeObject *co)
|
||||
{
|
||||
int code_len = (int) Py_SIZE(co);
|
||||
for (int i = 0; i < code_len; i += _PyInstruction_GetLength(co, i)) {
|
||||
dst[i] = _Py_GetBaseCodeUnit(co, i);
|
||||
}
|
||||
_PyCode_Quicken(dst, code_len, co->co_consts, 1);
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
get_pow2_greater(Py_ssize_t initial, Py_ssize_t limit)
|
||||
{
|
||||
// initial must be a power of two
|
||||
assert(!(initial & (initial - 1)));
|
||||
Py_ssize_t res = initial;
|
||||
while (res && res < limit) {
|
||||
res <<= 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static _Py_CODEUNIT *
|
||||
create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx)
|
||||
{
|
||||
_PyCodeArray *tlbc = co->co_tlbc;
|
||||
if (idx >= tlbc->size) {
|
||||
Py_ssize_t new_size = get_pow2_greater(tlbc->size, idx + 1);
|
||||
if (!new_size) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
_PyCodeArray *new_tlbc = _PyCodeArray_New(new_size);
|
||||
if (new_tlbc == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *));
|
||||
_Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc);
|
||||
_PyMem_FreeDelayed(tlbc);
|
||||
tlbc = new_tlbc;
|
||||
}
|
||||
char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co));
|
||||
if (bc == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
copy_code((_Py_CODEUNIT *) bc, co);
|
||||
assert(tlbc->entries[idx] == NULL);
|
||||
tlbc->entries[idx] = bc;
|
||||
return (_Py_CODEUNIT *) bc;
|
||||
}
|
||||
|
||||
static _Py_CODEUNIT *
|
||||
get_tlbc_lock_held(PyCodeObject *co)
|
||||
{
|
||||
_PyCodeArray *tlbc = co->co_tlbc;
|
||||
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
|
||||
int32_t idx = tstate->tlbc_index;
|
||||
if (idx < tlbc->size && tlbc->entries[idx] != NULL) {
|
||||
return (_Py_CODEUNIT *)tlbc->entries[idx];
|
||||
}
|
||||
return create_tlbc_lock_held(co, idx);
|
||||
}
|
||||
|
||||
_Py_CODEUNIT *
|
||||
_PyCode_GetTLBC(PyCodeObject *co)
|
||||
{
|
||||
_Py_CODEUNIT *result;
|
||||
Py_BEGIN_CRITICAL_SECTION(co);
|
||||
result = get_tlbc_lock_held(co);
|
||||
Py_END_CRITICAL_SECTION();
|
||||
return result;
|
||||
}
|
||||
|
||||
// My kingdom for a bitset
|
||||
struct flag_set {
|
||||
uint8_t *flags;
|
||||
Py_ssize_t size;
|
||||
};
|
||||
|
||||
static inline int
|
||||
flag_is_set(struct flag_set *flags, Py_ssize_t idx)
|
||||
{
|
||||
assert(idx >= 0);
|
||||
return (idx < flags->size) && flags->flags[idx];
|
||||
}
|
||||
|
||||
// Set the flag for each tlbc index in use
|
||||
static int
|
||||
get_indices_in_use(PyInterpreterState *interp, struct flag_set *in_use)
|
||||
{
|
||||
assert(interp->stoptheworld.world_stopped);
|
||||
assert(in_use->flags == NULL);
|
||||
int32_t max_index = 0;
|
||||
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
|
||||
int32_t idx = ((_PyThreadStateImpl *) p)->tlbc_index;
|
||||
if (idx > max_index) {
|
||||
max_index = idx;
|
||||
}
|
||||
}
|
||||
in_use->size = (size_t) max_index + 1;
|
||||
in_use->flags = PyMem_Calloc(in_use->size, sizeof(*in_use->flags));
|
||||
if (in_use->flags == NULL) {
|
||||
return -1;
|
||||
}
|
||||
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
|
||||
in_use->flags[((_PyThreadStateImpl *) p)->tlbc_index] = 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct get_code_args {
|
||||
_PyObjectStack code_objs;
|
||||
struct flag_set indices_in_use;
|
||||
int err;
|
||||
};
|
||||
|
||||
static void
|
||||
clear_get_code_args(struct get_code_args *args)
|
||||
{
|
||||
if (args->indices_in_use.flags != NULL) {
|
||||
PyMem_Free(args->indices_in_use.flags);
|
||||
args->indices_in_use.flags = NULL;
|
||||
}
|
||||
_PyObjectStack_Clear(&args->code_objs);
|
||||
}
|
||||
|
||||
static inline int
|
||||
is_bytecode_unused(_PyCodeArray *tlbc, Py_ssize_t idx,
|
||||
struct flag_set *indices_in_use)
|
||||
{
|
||||
assert(idx > 0 && idx < tlbc->size);
|
||||
return tlbc->entries[idx] != NULL && !flag_is_set(indices_in_use, idx);
|
||||
}
|
||||
|
||||
static int
|
||||
get_code_with_unused_tlbc(PyObject *obj, struct get_code_args *args)
|
||||
{
|
||||
if (!PyCode_Check(obj)) {
|
||||
return 1;
|
||||
}
|
||||
PyCodeObject *co = (PyCodeObject *) obj;
|
||||
_PyCodeArray *tlbc = co->co_tlbc;
|
||||
// The first index always points at the main copy of the bytecode embedded
|
||||
// in the code object.
|
||||
for (Py_ssize_t i = 1; i < tlbc->size; i++) {
|
||||
if (is_bytecode_unused(tlbc, i, &args->indices_in_use)) {
|
||||
if (_PyObjectStack_Push(&args->code_objs, obj) < 0) {
|
||||
args->err = -1;
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
free_unused_bytecode(PyCodeObject *co, struct flag_set *indices_in_use)
|
||||
{
|
||||
_PyCodeArray *tlbc = co->co_tlbc;
|
||||
// The first index always points at the main copy of the bytecode embedded
|
||||
// in the code object.
|
||||
for (Py_ssize_t i = 1; i < tlbc->size; i++) {
|
||||
if (is_bytecode_unused(tlbc, i, indices_in_use)) {
|
||||
PyMem_Free(tlbc->entries[i]);
|
||||
tlbc->entries[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
_Py_ClearUnusedTLBC(PyInterpreterState *interp)
|
||||
{
|
||||
struct get_code_args args = {
|
||||
.code_objs = {NULL},
|
||||
.indices_in_use = {NULL, 0},
|
||||
.err = 0,
|
||||
};
|
||||
_PyEval_StopTheWorld(interp);
|
||||
// Collect in-use tlbc indices
|
||||
if (get_indices_in_use(interp, &args.indices_in_use) < 0) {
|
||||
goto err;
|
||||
}
|
||||
// Collect code objects that have bytecode not in use by any thread
|
||||
_PyGC_VisitObjectsWorldStopped(
|
||||
interp, (gcvisitobjects_t)get_code_with_unused_tlbc, &args);
|
||||
if (args.err < 0) {
|
||||
goto err;
|
||||
}
|
||||
// Free unused bytecode. This must happen outside of gc_visit_heaps; it is
|
||||
// unsafe to allocate or free any mimalloc managed memory when it's
|
||||
// running.
|
||||
PyObject *obj;
|
||||
while ((obj = _PyObjectStack_Pop(&args.code_objs)) != NULL) {
|
||||
free_unused_bytecode((PyCodeObject*) obj, &args.indices_in_use);
|
||||
}
|
||||
_PyEval_StartTheWorld(interp);
|
||||
clear_get_code_args(&args);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
_PyEval_StartTheWorld(interp);
|
||||
clear_get_code_args(&args);
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1651,7 +1651,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno, void *Py_UNUSED(ignore
|
||||
}
|
||||
/* Finally set the new lasti and return OK. */
|
||||
f->f_lineno = 0;
|
||||
f->f_frame->instr_ptr = _PyCode_CODE(code) + best_addr;
|
||||
f->f_frame->instr_ptr = _PyFrame_GetBytecode(f->f_frame) + best_addr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1867,10 +1867,11 @@ PyTypeObject PyFrame_Type = {
|
||||
};
|
||||
|
||||
static void
|
||||
init_frame(_PyInterpreterFrame *frame, PyFunctionObject *func, PyObject *locals)
|
||||
init_frame(PyThreadState *tstate, _PyInterpreterFrame *frame,
|
||||
PyFunctionObject *func, PyObject *locals)
|
||||
{
|
||||
PyCodeObject *code = (PyCodeObject *)func->func_code;
|
||||
_PyFrame_Initialize(frame, PyStackRef_FromPyObjectNew(func),
|
||||
_PyFrame_Initialize(tstate, frame, PyStackRef_FromPyObjectNew(func),
|
||||
Py_XNewRef(locals), code, 0, NULL);
|
||||
}
|
||||
|
||||
@ -1922,7 +1923,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code,
|
||||
Py_DECREF(func);
|
||||
return NULL;
|
||||
}
|
||||
init_frame((_PyInterpreterFrame *)f->_f_frame_data, func, locals);
|
||||
init_frame(tstate, (_PyInterpreterFrame *)f->_f_frame_data, func, locals);
|
||||
f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data;
|
||||
f->f_frame->owner = FRAME_OWNED_BY_FRAME_OBJECT;
|
||||
// This frame needs to be "complete", so pretend that the first RESUME ran:
|
||||
@ -1941,7 +1942,8 @@ frame_init_get_vars(_PyInterpreterFrame *frame)
|
||||
// here:
|
||||
PyCodeObject *co = _PyFrame_GetCode(frame);
|
||||
int lasti = _PyInterpreterFrame_LASTI(frame);
|
||||
if (!(lasti < 0 && _PyCode_CODE(co)->op.code == COPY_FREE_VARS
|
||||
if (!(lasti < 0
|
||||
&& _PyFrame_GetBytecode(frame)->op.code == COPY_FREE_VARS
|
||||
&& PyStackRef_FunctionCheck(frame->f_funcobj)))
|
||||
{
|
||||
/* Free vars are initialized */
|
||||
@ -1957,7 +1959,7 @@ frame_init_get_vars(_PyInterpreterFrame *frame)
|
||||
frame->localsplus[offset + i] = PyStackRef_FromPyObjectNew(o);
|
||||
}
|
||||
// COPY_FREE_VARS doesn't have inline CACHEs, either:
|
||||
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame));
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame);
|
||||
}
|
||||
|
||||
|
||||
|
@ -11638,9 +11638,10 @@ super_descr_get(PyObject *self, PyObject *obj, PyObject *type)
|
||||
}
|
||||
|
||||
static int
|
||||
super_init_without_args(_PyInterpreterFrame *cframe, PyCodeObject *co,
|
||||
PyTypeObject **type_p, PyObject **obj_p)
|
||||
super_init_without_args(_PyInterpreterFrame *cframe, PyTypeObject **type_p,
|
||||
PyObject **obj_p)
|
||||
{
|
||||
PyCodeObject *co = _PyFrame_GetCode(cframe);
|
||||
if (co->co_argcount == 0) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"super(): no arguments");
|
||||
@ -11740,7 +11741,7 @@ super_init_impl(PyObject *self, PyTypeObject *type, PyObject *obj) {
|
||||
"super(): no current frame");
|
||||
return -1;
|
||||
}
|
||||
int res = super_init_without_args(frame, _PyFrame_GetCode(frame), &type, &obj);
|
||||
int res = super_init_without_args(frame, &type, &obj);
|
||||
|
||||
if (res < 0) {
|
||||
return -1;
|
||||
|
@ -222,6 +222,7 @@
|
||||
<ClCompile Include="..\Python\hashtable.c" />
|
||||
<ClCompile Include="..\Python\import.c" />
|
||||
<ClCompile Include="..\Python\importdl.c" />
|
||||
<ClCompile Include="..\Python\index_pool.c" />
|
||||
<ClCompile Include="..\Python\initconfig.c" />
|
||||
<ClCompile Include="..\Python\instruction_sequence.c" />
|
||||
<ClCompile Include="..\Python\interpconfig.c" />
|
||||
|
@ -232,6 +232,9 @@
|
||||
<ClCompile Include="..\Python\importdl.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\index_pool.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\initconfig.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
@ -255,6 +255,7 @@
|
||||
<ClInclude Include="..\Include\internal\pycore_identifier.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_import.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_importdl.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_index_pool.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_initconfig.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_instruction_sequence.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_interp.h" />
|
||||
@ -614,6 +615,7 @@
|
||||
<ClCompile Include="..\Python\hashtable.c" />
|
||||
<ClCompile Include="..\Python\import.c" />
|
||||
<ClCompile Include="..\Python\importdl.c" />
|
||||
<ClCompile Include="..\Python\index_pool.c" />
|
||||
<ClCompile Include="..\Python\initconfig.c" />
|
||||
<ClCompile Include="..\Python\interpconfig.c" />
|
||||
<ClCompile Include="..\Python\intrinsics.c" />
|
||||
|
@ -687,6 +687,9 @@
|
||||
<ClInclude Include="..\Include\internal\pycore_importdl.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_index_pool.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_initconfig.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
@ -1373,6 +1376,9 @@
|
||||
<ClCompile Include="..\Python\importdl.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\index_pool.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\initconfig.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
|
@ -168,11 +168,11 @@ dummy_func(
|
||||
}
|
||||
|
||||
op(_QUICKEN_RESUME, (--)) {
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (tstate->tracing == 0 && this_instr->op.code == RESUME) {
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
}
|
||||
|
||||
tier1 op(_MAYBE_INSTRUMENT, (--)) {
|
||||
@ -190,7 +190,26 @@ dummy_func(
|
||||
}
|
||||
}
|
||||
|
||||
op(_LOAD_BYTECODE, (--)) {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
ERROR_IF(bytecode == NULL, error);
|
||||
int off = this_instr - _PyFrame_GetBytecode(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
// Make sure this_instr gets reset correctley for any uops that
|
||||
// follow
|
||||
next_instr = frame->instr_ptr;
|
||||
DISPATCH();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
macro(RESUME) =
|
||||
_LOAD_BYTECODE +
|
||||
_MAYBE_INSTRUMENT +
|
||||
_QUICKEN_RESUME +
|
||||
_CHECK_PERIODIC_IF_NOT_YIELD_FROM;
|
||||
@ -204,6 +223,10 @@ dummy_func(
|
||||
uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
|
||||
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
|
||||
DEOPT_IF(eval_breaker != version);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
DEOPT_IF(frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index);
|
||||
#endif
|
||||
}
|
||||
|
||||
op(_MONITOR_RESUME, (--)) {
|
||||
@ -217,6 +240,7 @@ dummy_func(
|
||||
}
|
||||
|
||||
macro(INSTRUMENTED_RESUME) =
|
||||
_LOAD_BYTECODE +
|
||||
_MAYBE_INSTRUMENT +
|
||||
_CHECK_PERIODIC_IF_NOT_YIELD_FROM +
|
||||
_MONITOR_RESUME;
|
||||
@ -682,8 +706,8 @@ dummy_func(
|
||||
};
|
||||
|
||||
specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
|
||||
assert(frame->stackpointer == NULL);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
assert(frame->stackpointer == NULL);
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_Py_Specialize_BinarySubscr(container, sub, next_instr);
|
||||
@ -1236,7 +1260,7 @@ dummy_func(
|
||||
if (oparg) {
|
||||
PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]);
|
||||
if (PyLong_Check(lasti)) {
|
||||
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti);
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti);
|
||||
assert(!_PyErr_Occurred(tstate));
|
||||
}
|
||||
else {
|
||||
@ -2671,9 +2695,7 @@ dummy_func(
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
DEAD(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
|
||||
@ -2681,9 +2703,7 @@ dummy_func(
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
DEAD(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
|
||||
@ -3697,7 +3717,7 @@ dummy_func(
|
||||
op(_CREATE_INIT_FRAME, (init[1], self[1], args[oparg] -- init_frame: _PyInterpreterFrame *)) {
|
||||
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
|
||||
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
|
||||
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
|
||||
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
|
||||
/* Push self onto stack of shim */
|
||||
shim->localsplus[0] = PyStackRef_DUP(self[0]);
|
||||
DEAD(init);
|
||||
@ -4593,7 +4613,7 @@ dummy_func(
|
||||
}
|
||||
|
||||
specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) {
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
|
||||
@ -4601,7 +4621,7 @@ dummy_func(
|
||||
}
|
||||
OPCODE_DEFERRED_INC(BINARY_OP);
|
||||
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
assert(NB_ADD <= oparg);
|
||||
assert(oparg <= NB_INPLACE_XOR);
|
||||
}
|
||||
@ -4632,7 +4652,7 @@ dummy_func(
|
||||
int original_opcode = 0;
|
||||
if (tstate->tracing) {
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode;
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode;
|
||||
next_instr = this_instr;
|
||||
} else {
|
||||
original_opcode = _Py_call_instrumentation_line(
|
||||
@ -4687,9 +4707,7 @@ dummy_func(
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
@ -4698,9 +4716,7 @@ dummy_func(
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
@ -4715,9 +4731,7 @@ dummy_func(
|
||||
PyStackRef_CLOSE(value_stackref);
|
||||
offset = 0;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
@ -4815,7 +4829,7 @@ dummy_func(
|
||||
tier2 op(_EXIT_TRACE, (exit_p/4 --)) {
|
||||
_PyExitData *exit = (_PyExitData *)exit_p;
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
|
||||
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
|
||||
#if defined(Py_DEBUG) && !defined(_Py_JIT)
|
||||
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
|
||||
if (lltrace >= 2) {
|
||||
@ -4823,7 +4837,7 @@ dummy_func(
|
||||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(code)),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
}
|
||||
#endif
|
||||
@ -4933,7 +4947,7 @@ dummy_func(
|
||||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
}
|
||||
#endif
|
||||
@ -4995,7 +5009,7 @@ dummy_func(
|
||||
}
|
||||
|
||||
tier2 op(_ERROR_POP_N, (target/2, unused[oparg] --)) {
|
||||
frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target;
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
|
||||
SYNC_SP();
|
||||
GOTO_UNWIND();
|
||||
}
|
||||
|
@ -189,7 +189,7 @@ lltrace_instruction(_PyInterpreterFrame *frame,
|
||||
dump_stack(frame, stack_pointer);
|
||||
const char *opname = _PyOpcode_OpName[opcode];
|
||||
assert(opname != NULL);
|
||||
int offset = (int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame)));
|
||||
int offset = (int)(next_instr - _PyFrame_GetBytecode(frame));
|
||||
if (OPCODE_HAS_ARG((int)_PyOpcode_Deopt[opcode])) {
|
||||
printf("%d: %s %d\n", offset * 2, opname, oparg);
|
||||
}
|
||||
@ -841,6 +841,19 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
||||
}
|
||||
/* Because this avoids the RESUME,
|
||||
* we need to update instrumentation */
|
||||
#ifdef Py_GIL_DISABLED
|
||||
/* Load thread-local bytecode */
|
||||
if (frame->tlbc_index != ((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
if (bytecode == NULL) {
|
||||
goto error;
|
||||
}
|
||||
ptrdiff_t off = frame->instr_ptr - _PyFrame_GetBytecode(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
}
|
||||
#endif
|
||||
_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
|
||||
monitor_throw(tstate, frame, frame->instr_ptr);
|
||||
/* TO DO -- Monitor throw entry. */
|
||||
@ -983,7 +996,7 @@ exception_unwind:
|
||||
Python main loop. */
|
||||
PyObject *exc = _PyErr_GetRaisedException(tstate);
|
||||
PUSH(PyStackRef_FromPyObjectSteal(exc));
|
||||
next_instr = _PyCode_CODE(_PyFrame_GetCode(frame)) + handler;
|
||||
next_instr = _PyFrame_GetBytecode(frame) + handler;
|
||||
|
||||
if (monitor_handled(tstate, frame, next_instr, exc) < 0) {
|
||||
goto exception_unwind;
|
||||
@ -1045,6 +1058,8 @@ enter_tier_two:
|
||||
|
||||
#undef ENABLE_SPECIALIZATION
|
||||
#define ENABLE_SPECIALIZATION 0
|
||||
#undef ENABLE_SPECIALIZATION_FT
|
||||
#define ENABLE_SPECIALIZATION_FT 0
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
#define DPRINTF(level, ...) \
|
||||
@ -1139,7 +1154,7 @@ exit_to_tier1_dynamic:
|
||||
goto goto_to_tier1;
|
||||
exit_to_tier1:
|
||||
assert(next_uop[-1].format == UOP_FORMAT_TARGET);
|
||||
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
|
||||
next_instr = next_uop[-1].target + _PyFrame_GetBytecode(frame);
|
||||
goto_to_tier1:
|
||||
#ifdef Py_DEBUG
|
||||
if (lltrace >= 2) {
|
||||
@ -1764,7 +1779,7 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func,
|
||||
if (frame == NULL) {
|
||||
goto fail;
|
||||
}
|
||||
_PyFrame_Initialize(frame, func, locals, code, 0, previous);
|
||||
_PyFrame_Initialize(tstate, frame, func, locals, code, 0, previous);
|
||||
if (initialize_locals(tstate, func_obj, frame->localsplus, args, argcount, kwnames)) {
|
||||
assert(frame->owner == FRAME_OWNED_BY_THREAD);
|
||||
clear_thread_frame(tstate, frame);
|
||||
|
@ -151,7 +151,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
|
||||
/* Code access macros */
|
||||
|
||||
/* The integer overflow is checked by an assertion below. */
|
||||
#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame))))
|
||||
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
|
||||
#define NEXTOPARG() do { \
|
||||
_Py_CODEUNIT word = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
|
||||
opcode = word.op.code; \
|
||||
@ -301,14 +301,6 @@ GETITEM(PyObject *v, Py_ssize_t i) {
|
||||
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
|
||||
backoff_counter_triggers(forge_backoff_counter((COUNTER)))
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
|
||||
do { \
|
||||
/* gh-115999 tracks progress on addressing this. */ \
|
||||
static_assert(0, "The specializing interpreter is not yet thread-safe"); \
|
||||
} while (0);
|
||||
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) ((void)COUNTER)
|
||||
#else
|
||||
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
|
||||
do { \
|
||||
(COUNTER) = advance_backoff_counter((COUNTER)); \
|
||||
@ -318,6 +310,18 @@ GETITEM(PyObject *v, Py_ssize_t i) {
|
||||
do { \
|
||||
(COUNTER) = pause_backoff_counter((COUNTER)); \
|
||||
} while (0);
|
||||
|
||||
#ifdef ENABLE_SPECIALIZATION_FT
|
||||
/* Multiple threads may execute these concurrently if thread-local bytecode is
|
||||
* disabled and they all execute the main copy of the bytecode. Specialization
|
||||
* is disabled in that case so the value is unused, but the RMW cycle should be
|
||||
* free of data races.
|
||||
*/
|
||||
#define RECORD_BRANCH_TAKEN(bitset, flag) \
|
||||
FT_ATOMIC_STORE_UINT16_RELAXED( \
|
||||
bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
|
||||
#else
|
||||
#define RECORD_BRANCH_TAKEN(bitset, flag)
|
||||
#endif
|
||||
|
||||
#define UNBOUNDLOCAL_ERROR_MSG \
|
||||
|
23
Python/executor_cases.c.h
generated
23
Python/executor_cases.c.h
generated
@ -41,6 +41,8 @@
|
||||
|
||||
/* _QUICKEN_RESUME is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
|
||||
|
||||
/* _LOAD_BYTECODE is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
|
||||
|
||||
case _RESUME_CHECK: {
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
if (_Py_emscripten_signal_clock == 0) {
|
||||
@ -56,6 +58,13 @@
|
||||
UOP_STAT_INC(uopcode, miss);
|
||||
JUMP_TO_JUMP_TARGET();
|
||||
}
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
UOP_STAT_INC(uopcode, miss);
|
||||
JUMP_TO_JUMP_TARGET();
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
@ -4480,8 +4489,8 @@
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
|
||||
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
|
||||
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
|
||||
/* Push self onto stack of shim */
|
||||
shim->localsplus[0] = PyStackRef_DUP(self[0]);
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
@ -5683,7 +5692,9 @@
|
||||
PyObject *exit_p = (PyObject *)CURRENT_OPERAND();
|
||||
_PyExitData *exit = (_PyExitData *)exit_p;
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
#if defined(Py_DEBUG) && !defined(_Py_JIT)
|
||||
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
|
||||
if (lltrace >= 2) {
|
||||
@ -5692,7 +5703,7 @@
|
||||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(code)),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
}
|
||||
@ -5878,7 +5889,7 @@
|
||||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
}
|
||||
@ -5956,9 +5967,11 @@
|
||||
case _ERROR_POP_N: {
|
||||
oparg = CURRENT_OPARG();
|
||||
uint32_t target = (uint32_t)CURRENT_OPERAND();
|
||||
frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target;
|
||||
stack_pointer += -oparg;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
GOTO_UNWIND();
|
||||
break;
|
||||
}
|
||||
|
@ -63,7 +63,8 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
|
||||
// This may be a newly-created generator or coroutine frame. Since it's
|
||||
// dead anyways, just pretend that the first RESUME ran:
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
frame->instr_ptr = _PyCode_CODE(code) + code->_co_firsttraceable + 1;
|
||||
frame->instr_ptr =
|
||||
_PyFrame_GetBytecode(frame) + code->_co_firsttraceable + 1;
|
||||
}
|
||||
assert(!_PyFrame_IsIncomplete(frame));
|
||||
assert(f->f_back == NULL);
|
||||
|
@ -1953,16 +1953,22 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area,
|
||||
}
|
||||
|
||||
void
|
||||
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
|
||||
_PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
|
||||
gcvisitobjects_t callback, void *arg)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
struct custom_visitor_args wrapper = {
|
||||
.callback = callback,
|
||||
.arg = arg,
|
||||
};
|
||||
|
||||
_PyEval_StopTheWorld(interp);
|
||||
gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base);
|
||||
}
|
||||
|
||||
void
|
||||
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_PyEval_StopTheWorld(interp);
|
||||
_PyGC_VisitObjectsWorldStopped(interp, callback, arg);
|
||||
_PyEval_StartTheWorld(interp);
|
||||
}
|
||||
|
||||
|
100
Python/generated_cases.c.h
generated
100
Python/generated_cases.c.h
generated
@ -25,7 +25,7 @@
|
||||
lhs = stack_pointer[-2];
|
||||
uint16_t counter = read_u16(&this_instr[1].cache);
|
||||
(void)counter;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
@ -35,7 +35,7 @@
|
||||
}
|
||||
OPCODE_DEFERRED_INC(BINARY_OP);
|
||||
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
assert(NB_ADD <= oparg);
|
||||
assert(oparg <= NB_INPLACE_XOR);
|
||||
}
|
||||
@ -435,8 +435,8 @@
|
||||
container = stack_pointer[-2];
|
||||
uint16_t counter = read_u16(&this_instr[1].cache);
|
||||
(void)counter;
|
||||
assert(frame->stackpointer == NULL);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
assert(frame->stackpointer == NULL);
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
@ -1066,8 +1066,8 @@
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
|
||||
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
|
||||
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
|
||||
/* Push self onto stack of shim */
|
||||
shim->localsplus[0] = PyStackRef_DUP(self[0]);
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
@ -4711,7 +4711,9 @@
|
||||
int original_opcode = 0;
|
||||
if (tstate->tracing) {
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode;
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
next_instr = this_instr;
|
||||
} else {
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
@ -4759,9 +4761,7 @@
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
DISPATCH();
|
||||
}
|
||||
@ -4782,9 +4782,7 @@
|
||||
PyStackRef_CLOSE(value_stackref);
|
||||
offset = 0;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
DISPATCH();
|
||||
}
|
||||
@ -4822,9 +4820,7 @@
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
DISPATCH();
|
||||
}
|
||||
@ -4834,6 +4830,28 @@
|
||||
(void)this_instr;
|
||||
next_instr += 1;
|
||||
INSTRUCTION_STATS(INSTRUMENTED_RESUME);
|
||||
// _LOAD_BYTECODE
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
if (bytecode == NULL) goto error;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
int off = this_instr - _PyFrame_GetBytecode(frame);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
// Make sure this_instr gets reset correctley for any uops that
|
||||
// follow
|
||||
next_instr = frame->instr_ptr;
|
||||
DISPATCH();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// _MAYBE_INSTRUMENT
|
||||
{
|
||||
if (tstate->tracing == 0) {
|
||||
@ -6646,9 +6664,7 @@
|
||||
cond = stack_pointer[-1];
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
stack_pointer += -1;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
@ -6680,9 +6696,7 @@
|
||||
cond = b;
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
stack_pointer += -1;
|
||||
@ -6715,9 +6729,7 @@
|
||||
cond = b;
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
stack_pointer += -1;
|
||||
@ -6735,9 +6747,7 @@
|
||||
cond = stack_pointer[-1];
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
stack_pointer += -1;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
@ -6832,7 +6842,11 @@
|
||||
if (oparg) {
|
||||
PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]);
|
||||
if (PyLong_Check(lasti)) {
|
||||
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti);
|
||||
stack_pointer += -1;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
assert(!_PyErr_Occurred(tstate));
|
||||
}
|
||||
else {
|
||||
@ -6844,6 +6858,8 @@
|
||||
Py_DECREF(exc);
|
||||
goto error;
|
||||
}
|
||||
stack_pointer += 1;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
}
|
||||
assert(exc && PyExceptionInstance_Check(exc));
|
||||
stack_pointer += -1;
|
||||
@ -6871,6 +6887,28 @@
|
||||
PREDICTED(RESUME);
|
||||
_Py_CODEUNIT* const this_instr = next_instr - 1;
|
||||
(void)this_instr;
|
||||
// _LOAD_BYTECODE
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
if (bytecode == NULL) goto error;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
int off = this_instr - _PyFrame_GetBytecode(frame);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
// Make sure this_instr gets reset correctley for any uops that
|
||||
// follow
|
||||
next_instr = frame->instr_ptr;
|
||||
DISPATCH();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// _MAYBE_INSTRUMENT
|
||||
{
|
||||
if (tstate->tracing == 0) {
|
||||
@ -6890,11 +6928,11 @@
|
||||
}
|
||||
// _QUICKEN_RESUME
|
||||
{
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (tstate->tracing == 0 && this_instr->op.code == RESUME) {
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
}
|
||||
// _CHECK_PERIODIC_IF_NOT_YIELD_FROM
|
||||
{
|
||||
@ -6925,6 +6963,10 @@
|
||||
uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
|
||||
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
|
||||
DEOPT_IF(eval_breaker != version, RESUME);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
DEOPT_IF(frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index, RESUME);
|
||||
#endif
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
193
Python/index_pool.c
Normal file
193
Python/index_pool.c
Normal file
@ -0,0 +1,193 @@
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
#include "pycore_index_pool.h"
|
||||
#include "pycore_lock.h"
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
static inline void
|
||||
swap(int32_t *values, Py_ssize_t i, Py_ssize_t j)
|
||||
{
|
||||
int32_t tmp = values[i];
|
||||
values[i] = values[j];
|
||||
values[j] = tmp;
|
||||
}
|
||||
|
||||
static bool
|
||||
heap_try_swap(_PyIndexHeap *heap, Py_ssize_t i, Py_ssize_t j)
|
||||
{
|
||||
if (i < 0 || i >= heap->size) {
|
||||
return 0;
|
||||
}
|
||||
if (j < 0 || j >= heap->size) {
|
||||
return 0;
|
||||
}
|
||||
if (i <= j) {
|
||||
if (heap->values[i] <= heap->values[j]) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if (heap->values[j] <= heap->values[i]) {
|
||||
return 0;
|
||||
}
|
||||
swap(heap->values, i, j);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline Py_ssize_t
|
||||
parent(Py_ssize_t i)
|
||||
{
|
||||
return (i - 1) / 2;
|
||||
}
|
||||
|
||||
static inline Py_ssize_t
|
||||
left_child(Py_ssize_t i)
|
||||
{
|
||||
return 2 * i + 1;
|
||||
}
|
||||
|
||||
static inline Py_ssize_t
|
||||
right_child(Py_ssize_t i)
|
||||
{
|
||||
return 2 * i + 2;
|
||||
}
|
||||
|
||||
static void
|
||||
heap_add(_PyIndexHeap *heap, int32_t val)
|
||||
{
|
||||
assert(heap->size < heap->capacity);
|
||||
// Add val to end
|
||||
heap->values[heap->size] = val;
|
||||
heap->size++;
|
||||
// Sift up
|
||||
for (Py_ssize_t cur = heap->size - 1; cur > 0; cur = parent(cur)) {
|
||||
if (!heap_try_swap(heap, cur, parent(cur))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
heap_min_child(_PyIndexHeap *heap, Py_ssize_t i)
|
||||
{
|
||||
if (left_child(i) < heap->size) {
|
||||
if (right_child(i) < heap->size) {
|
||||
Py_ssize_t lval = heap->values[left_child(i)];
|
||||
Py_ssize_t rval = heap->values[right_child(i)];
|
||||
return lval < rval ? left_child(i) : right_child(i);
|
||||
}
|
||||
return left_child(i);
|
||||
}
|
||||
else if (right_child(i) < heap->size) {
|
||||
return right_child(i);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
heap_pop(_PyIndexHeap *heap)
|
||||
{
|
||||
assert(heap->size > 0);
|
||||
// Pop smallest and replace with the last element
|
||||
int32_t result = heap->values[0];
|
||||
heap->values[0] = heap->values[heap->size - 1];
|
||||
heap->size--;
|
||||
// Sift down
|
||||
for (Py_ssize_t cur = 0; cur < heap->size;) {
|
||||
Py_ssize_t min_child = heap_min_child(heap, cur);
|
||||
if (min_child > -1 && heap_try_swap(heap, cur, min_child)) {
|
||||
cur = min_child;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static int
|
||||
heap_ensure_capacity(_PyIndexHeap *heap, Py_ssize_t limit)
|
||||
{
|
||||
assert(limit > 0);
|
||||
if (heap->capacity > limit) {
|
||||
return 0;
|
||||
}
|
||||
Py_ssize_t new_capacity = heap->capacity ? heap->capacity : 1024;
|
||||
while (new_capacity && new_capacity < limit) {
|
||||
new_capacity <<= 1;
|
||||
}
|
||||
if (!new_capacity) {
|
||||
return -1;
|
||||
}
|
||||
int32_t *new_values = PyMem_RawCalloc(new_capacity, sizeof(int32_t));
|
||||
if (new_values == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (heap->values != NULL) {
|
||||
memcpy(new_values, heap->values, heap->capacity);
|
||||
PyMem_RawFree(heap->values);
|
||||
}
|
||||
heap->values = new_values;
|
||||
heap->capacity = new_capacity;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
heap_fini(_PyIndexHeap *heap)
|
||||
{
|
||||
if (heap->values != NULL) {
|
||||
PyMem_RawFree(heap->values);
|
||||
heap->values = NULL;
|
||||
}
|
||||
heap->size = -1;
|
||||
heap->capacity = -1;
|
||||
}
|
||||
|
||||
#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH)
|
||||
#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex)
|
||||
|
||||
int32_t
|
||||
_PyIndexPool_AllocIndex(_PyIndexPool *pool)
|
||||
{
|
||||
LOCK_POOL(pool);
|
||||
int32_t index;
|
||||
_PyIndexHeap *free_indices = &pool->free_indices;
|
||||
if (free_indices->size == 0) {
|
||||
// No free indices. Make sure the heap can always store all of the
|
||||
// indices that have been allocated to avoid having to allocate memory
|
||||
// (which can fail) when freeing an index. Freeing indices happens when
|
||||
// threads are being destroyed, which makes error handling awkward /
|
||||
// impossible. This arrangement shifts handling of allocation failures
|
||||
// to when indices are allocated, which happens at thread creation,
|
||||
// where we are better equipped to deal with failure.
|
||||
if (heap_ensure_capacity(free_indices, pool->next_index + 1) < 0) {
|
||||
UNLOCK_POOL(pool);
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
index = pool->next_index++;
|
||||
}
|
||||
else {
|
||||
index = heap_pop(free_indices);
|
||||
}
|
||||
UNLOCK_POOL(pool);
|
||||
return index;
|
||||
}
|
||||
|
||||
void
|
||||
_PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index)
|
||||
{
|
||||
LOCK_POOL(pool);
|
||||
heap_add(&pool->free_indices, index);
|
||||
UNLOCK_POOL(pool);
|
||||
}
|
||||
|
||||
void
|
||||
_PyIndexPool_Fini(_PyIndexPool *pool)
|
||||
{
|
||||
heap_fini(&pool->free_indices);
|
||||
}
|
||||
|
||||
#endif // Py_GIL_DISABLED
|
@ -134,6 +134,7 @@ static const PyConfigSpec PYCONFIG_SPEC[] = {
|
||||
SPEC(dump_refs_file, WSTR_OPT, READ_ONLY, NO_SYS),
|
||||
#ifdef Py_GIL_DISABLED
|
||||
SPEC(enable_gil, INT, READ_ONLY, NO_SYS),
|
||||
SPEC(tlbc_enabled, INT, READ_ONLY, NO_SYS),
|
||||
#endif
|
||||
SPEC(faulthandler, BOOL, READ_ONLY, NO_SYS),
|
||||
SPEC(filesystem_encoding, WSTR, READ_ONLY, NO_SYS),
|
||||
@ -315,7 +316,12 @@ The following implementation-specific options are available:\n\
|
||||
"\
|
||||
-X showrefcount: output the total reference count and number of used\n\
|
||||
memory blocks when the program finishes or after each statement in\n\
|
||||
the interactive interpreter; only works on debug builds\n\
|
||||
the interactive interpreter; only works on debug builds\n"
|
||||
#ifdef Py_GIL_DISABLED
|
||||
"-X tlbc=[0|1]: enable (1) or disable (0) thread-local bytecode. Also\n\
|
||||
PYTHON_TLBC\n"
|
||||
#endif
|
||||
"\
|
||||
-X tracemalloc[=N]: trace Python memory allocations; N sets a traceback limit\n \
|
||||
of N frames (default: 1); also PYTHONTRACEMALLOC=N\n\
|
||||
-X utf8[=0|1]: enable (1) or disable (0) UTF-8 mode; also PYTHONUTF8\n\
|
||||
@ -400,6 +406,9 @@ static const char usage_envvars[] =
|
||||
#ifdef Py_STATS
|
||||
"PYTHONSTATS : turns on statistics gathering (-X pystats)\n"
|
||||
#endif
|
||||
#ifdef Py_GIL_DISABLED
|
||||
"PYTHON_TLBC : when set to 0, disables thread-local bytecode (-X tlbc)\n"
|
||||
#endif
|
||||
"PYTHONTRACEMALLOC: trace Python memory allocations (-X tracemalloc)\n"
|
||||
"PYTHONUNBUFFERED: disable stdout/stderr buffering (-u)\n"
|
||||
"PYTHONUTF8 : control the UTF-8 mode (-X utf8)\n"
|
||||
@ -979,6 +988,7 @@ _PyConfig_InitCompatConfig(PyConfig *config)
|
||||
config->cpu_count = -1;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
config->enable_gil = _PyConfig_GIL_DEFAULT;
|
||||
config->tlbc_enabled = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1862,6 +1872,36 @@ error:
|
||||
"n must be greater than 0");
|
||||
}
|
||||
|
||||
static PyStatus
|
||||
config_init_tlbc(PyConfig *config)
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
const char *env = config_get_env(config, "PYTHON_TLBC");
|
||||
if (env) {
|
||||
int enabled;
|
||||
if (_Py_str_to_int(env, &enabled) < 0 || (enabled < 0) || (enabled > 1)) {
|
||||
return _PyStatus_ERR(
|
||||
"PYTHON_TLBC=N: N is missing or invalid");
|
||||
}
|
||||
config->tlbc_enabled = enabled;
|
||||
}
|
||||
|
||||
const wchar_t *xoption = config_get_xoption(config, L"tlbc");
|
||||
if (xoption) {
|
||||
int enabled;
|
||||
const wchar_t *sep = wcschr(xoption, L'=');
|
||||
if (!sep || (config_wstr_to_int(sep + 1, &enabled) < 0) || (enabled < 0) || (enabled > 1)) {
|
||||
return _PyStatus_ERR(
|
||||
"-X tlbc=n: n is missing or invalid");
|
||||
}
|
||||
config->tlbc_enabled = enabled;
|
||||
}
|
||||
return _PyStatus_OK();
|
||||
#else
|
||||
return _PyStatus_OK();
|
||||
#endif
|
||||
}
|
||||
|
||||
static PyStatus
|
||||
config_init_perf_profiling(PyConfig *config)
|
||||
{
|
||||
@ -2111,6 +2151,11 @@ config_read_complex_options(PyConfig *config)
|
||||
}
|
||||
#endif
|
||||
|
||||
status = config_init_tlbc(config);
|
||||
if (_PyStatus_EXCEPTION(status)) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
||||
|
@ -44,10 +44,24 @@
|
||||
|
||||
#define UNLOCK_CODE() Py_END_CRITICAL_SECTION()
|
||||
|
||||
#define MODIFY_BYTECODE(code, func, ...) \
|
||||
do { \
|
||||
PyCodeObject *co = (code); \
|
||||
for (Py_ssize_t i = 0; i < code->co_tlbc->size; i++) { \
|
||||
char *bc = co->co_tlbc->entries[i]; \
|
||||
if (bc == NULL) { \
|
||||
continue; \
|
||||
} \
|
||||
(func)((_Py_CODEUNIT *)bc, __VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define LOCK_CODE(code)
|
||||
#define UNLOCK_CODE()
|
||||
#define MODIFY_BYTECODE(code, func, ...) \
|
||||
(func)(_PyCode_CODE(code), __VA_ARGS__)
|
||||
|
||||
#endif
|
||||
|
||||
@ -309,7 +323,8 @@ _PyInstruction_GetLength(PyCodeObject *code, int offset)
|
||||
{
|
||||
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
|
||||
|
||||
int opcode = _PyCode_CODE(code)[offset].op.code;
|
||||
int opcode =
|
||||
FT_ATOMIC_LOAD_UINT8_RELAXED(_PyCode_CODE(code)[offset].op.code);
|
||||
assert(opcode != 0);
|
||||
assert(opcode != RESERVED);
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
@ -578,7 +593,9 @@ sanity_check_instrumentation(PyCodeObject *code)
|
||||
_Py_CODEUNIT
|
||||
_Py_GetBaseCodeUnit(PyCodeObject *code, int i)
|
||||
{
|
||||
_Py_CODEUNIT inst = _PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i;
|
||||
_Py_CODEUNIT inst = {
|
||||
.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)};
|
||||
int opcode = inst.op.code;
|
||||
if (opcode < MIN_INSTRUMENTED_OPCODE) {
|
||||
inst.op.code = _PyOpcode_Deopt[opcode];
|
||||
@ -614,21 +631,22 @@ _Py_GetBaseCodeUnit(PyCodeObject *code, int i)
|
||||
}
|
||||
|
||||
static void
|
||||
de_instrument(PyCodeObject *code, int i, int event)
|
||||
de_instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i,
|
||||
int event)
|
||||
{
|
||||
assert(event != PY_MONITORING_EVENT_INSTRUCTION);
|
||||
assert(event != PY_MONITORING_EVENT_LINE);
|
||||
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
uint8_t *opcode_ptr = &instr->op.code;
|
||||
int opcode = *opcode_ptr;
|
||||
assert(opcode != ENTER_EXECUTOR);
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
opcode_ptr = &code->_co_monitoring->lines[i].original_opcode;
|
||||
opcode_ptr = &monitoring->lines[i].original_opcode;
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
if (opcode == INSTRUMENTED_INSTRUCTION) {
|
||||
opcode_ptr = &code->_co_monitoring->per_instruction_opcodes[i];
|
||||
opcode_ptr = &monitoring->per_instruction_opcodes[i];
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
int deinstrumented = DE_INSTRUMENT[opcode];
|
||||
@ -644,65 +662,68 @@ de_instrument(PyCodeObject *code, int i, int event)
|
||||
}
|
||||
|
||||
static void
|
||||
de_instrument_line(PyCodeObject *code, int i)
|
||||
de_instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring,
|
||||
int i)
|
||||
{
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
int opcode = instr->op.code;
|
||||
if (opcode != INSTRUMENTED_LINE) {
|
||||
return;
|
||||
}
|
||||
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
|
||||
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
|
||||
int original_opcode = lines->original_opcode;
|
||||
if (original_opcode == INSTRUMENTED_INSTRUCTION) {
|
||||
lines->original_opcode = code->_co_monitoring->per_instruction_opcodes[i];
|
||||
lines->original_opcode = monitoring->per_instruction_opcodes[i];
|
||||
}
|
||||
CHECK(original_opcode != 0);
|
||||
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
|
||||
instr->op.code = original_opcode;
|
||||
FT_ATOMIC_STORE_UINT8(instr->op.code, original_opcode);
|
||||
if (_PyOpcode_Caches[original_opcode]) {
|
||||
instr[1].counter = adaptive_counter_warmup();
|
||||
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
|
||||
adaptive_counter_warmup().value_and_backoff);
|
||||
}
|
||||
assert(instr->op.code != INSTRUMENTED_LINE);
|
||||
}
|
||||
|
||||
static void
|
||||
de_instrument_per_instruction(PyCodeObject *code, int i)
|
||||
de_instrument_per_instruction(_Py_CODEUNIT *bytecode,
|
||||
_PyCoMonitoringData *monitoring, int i)
|
||||
{
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
uint8_t *opcode_ptr = &instr->op.code;
|
||||
int opcode = *opcode_ptr;
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
opcode_ptr = &code->_co_monitoring->lines[i].original_opcode;
|
||||
opcode_ptr = &monitoring->lines[i].original_opcode;
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
if (opcode != INSTRUMENTED_INSTRUCTION) {
|
||||
return;
|
||||
}
|
||||
int original_opcode = code->_co_monitoring->per_instruction_opcodes[i];
|
||||
int original_opcode = monitoring->per_instruction_opcodes[i];
|
||||
CHECK(original_opcode != 0);
|
||||
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
|
||||
*opcode_ptr = original_opcode;
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode);
|
||||
if (_PyOpcode_Caches[original_opcode]) {
|
||||
instr[1].counter = adaptive_counter_warmup();
|
||||
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
|
||||
adaptive_counter_warmup().value_and_backoff);
|
||||
}
|
||||
assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION);
|
||||
assert(instr->op.code != INSTRUMENTED_INSTRUCTION);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
instrument(PyCodeObject *code, int i)
|
||||
instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
|
||||
{
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
uint8_t *opcode_ptr = &instr->op.code;
|
||||
int opcode =*opcode_ptr;
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
|
||||
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
|
||||
opcode_ptr = &lines->original_opcode;
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
if (opcode == INSTRUMENTED_INSTRUCTION) {
|
||||
opcode_ptr = &code->_co_monitoring->per_instruction_opcodes[i];
|
||||
opcode_ptr = &monitoring->per_instruction_opcodes[i];
|
||||
opcode = *opcode_ptr;
|
||||
CHECK(opcode != INSTRUMENTED_INSTRUCTION && opcode != INSTRUMENTED_LINE);
|
||||
CHECK(opcode == _PyOpcode_Deopt[opcode]);
|
||||
@ -716,52 +737,52 @@ instrument(PyCodeObject *code, int i)
|
||||
if (_PyOpcode_Caches[deopt]) {
|
||||
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
|
||||
adaptive_counter_warmup().value_and_backoff);
|
||||
instr[1].counter = adaptive_counter_warmup();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
instrument_line(PyCodeObject *code, int i)
|
||||
instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
|
||||
{
|
||||
uint8_t *opcode_ptr = &_PyCode_CODE(code)[i].op.code;
|
||||
uint8_t *opcode_ptr = &bytecode[i].op.code;
|
||||
int opcode = *opcode_ptr;
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
return;
|
||||
}
|
||||
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
|
||||
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
|
||||
lines->original_opcode = _PyOpcode_Deopt[opcode];
|
||||
CHECK(lines->original_opcode > 0);
|
||||
*opcode_ptr = INSTRUMENTED_LINE;
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE);
|
||||
}
|
||||
|
||||
static void
|
||||
instrument_per_instruction(PyCodeObject *code, int i)
|
||||
instrument_per_instruction(_Py_CODEUNIT *bytecode,
|
||||
_PyCoMonitoringData *monitoring, int i)
|
||||
{
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
uint8_t *opcode_ptr = &instr->op.code;
|
||||
int opcode = *opcode_ptr;
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
|
||||
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
|
||||
opcode_ptr = &lines->original_opcode;
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
if (opcode == INSTRUMENTED_INSTRUCTION) {
|
||||
assert(code->_co_monitoring->per_instruction_opcodes[i] > 0);
|
||||
assert(monitoring->per_instruction_opcodes[i] > 0);
|
||||
return;
|
||||
}
|
||||
CHECK(opcode != 0);
|
||||
if (is_instrumented(opcode)) {
|
||||
code->_co_monitoring->per_instruction_opcodes[i] = opcode;
|
||||
monitoring->per_instruction_opcodes[i] = opcode;
|
||||
}
|
||||
else {
|
||||
assert(opcode != 0);
|
||||
assert(_PyOpcode_Deopt[opcode] != 0);
|
||||
assert(_PyOpcode_Deopt[opcode] != RESUME);
|
||||
code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode];
|
||||
monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode];
|
||||
}
|
||||
assert(code->_co_monitoring->per_instruction_opcodes[i] > 0);
|
||||
*opcode_ptr = INSTRUMENTED_INSTRUCTION;
|
||||
assert(monitoring->per_instruction_opcodes[i] > 0);
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -773,19 +794,19 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools)
|
||||
assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event));
|
||||
assert(opcode_has_event(_Py_GetBaseCodeUnit(code, offset).op.code));
|
||||
_PyCoMonitoringData *monitoring = code->_co_monitoring;
|
||||
bool should_de_instrument;
|
||||
if (monitoring && monitoring->tools) {
|
||||
monitoring->tools[offset] &= ~tools;
|
||||
if (monitoring->tools[offset] == 0) {
|
||||
de_instrument(code, offset, event);
|
||||
}
|
||||
should_de_instrument = (monitoring->tools[offset] == 0);
|
||||
}
|
||||
else {
|
||||
/* Single tool */
|
||||
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[event];
|
||||
assert(_Py_popcount32(single_tool) <= 1);
|
||||
if (((single_tool & tools) == single_tool)) {
|
||||
de_instrument(code, offset, event);
|
||||
should_de_instrument = ((single_tool & tools) == single_tool);
|
||||
}
|
||||
if (should_de_instrument) {
|
||||
MODIFY_BYTECODE(code, de_instrument, monitoring, offset, event);
|
||||
}
|
||||
}
|
||||
|
||||
@ -804,22 +825,23 @@ remove_line_tools(PyCodeObject * code, int offset, int tools)
|
||||
{
|
||||
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
|
||||
|
||||
assert(code->_co_monitoring);
|
||||
if (code->_co_monitoring->line_tools)
|
||||
_PyCoMonitoringData *monitoring = code->_co_monitoring;
|
||||
assert(monitoring);
|
||||
bool should_de_instrument;
|
||||
if (monitoring->line_tools)
|
||||
{
|
||||
uint8_t *toolsptr = &code->_co_monitoring->line_tools[offset];
|
||||
uint8_t *toolsptr = &monitoring->line_tools[offset];
|
||||
*toolsptr &= ~tools;
|
||||
if (*toolsptr == 0 ) {
|
||||
de_instrument_line(code, offset);
|
||||
}
|
||||
should_de_instrument = (*toolsptr == 0);
|
||||
}
|
||||
else {
|
||||
/* Single tool */
|
||||
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[PY_MONITORING_EVENT_LINE];
|
||||
uint8_t single_tool = monitoring->active_monitors.tools[PY_MONITORING_EVENT_LINE];
|
||||
assert(_Py_popcount32(single_tool) <= 1);
|
||||
if (((single_tool & tools) == single_tool)) {
|
||||
de_instrument_line(code, offset);
|
||||
should_de_instrument = ((single_tool & tools) == single_tool);
|
||||
}
|
||||
if (should_de_instrument) {
|
||||
MODIFY_BYTECODE(code, de_instrument_line, monitoring, offset);
|
||||
}
|
||||
}
|
||||
|
||||
@ -841,7 +863,7 @@ add_tools(PyCodeObject * code, int offset, int event, int tools)
|
||||
assert(_Py_popcount32(tools) == 1);
|
||||
assert(tools_is_subset_for_event(code, event, tools));
|
||||
}
|
||||
instrument(code, offset);
|
||||
MODIFY_BYTECODE(code, instrument, code->_co_monitoring, offset);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -858,7 +880,7 @@ add_line_tools(PyCodeObject * code, int offset, int tools)
|
||||
/* Single tool */
|
||||
assert(_Py_popcount32(tools) == 1);
|
||||
}
|
||||
instrument_line(code, offset);
|
||||
MODIFY_BYTECODE(code, instrument_line, code->_co_monitoring, offset);
|
||||
}
|
||||
|
||||
|
||||
@ -876,7 +898,7 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools)
|
||||
/* Single tool */
|
||||
assert(_Py_popcount32(tools) == 1);
|
||||
}
|
||||
instrument_per_instruction(code, offset);
|
||||
MODIFY_BYTECODE(code, instrument_per_instruction, code->_co_monitoring, offset);
|
||||
}
|
||||
|
||||
|
||||
@ -885,21 +907,22 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools)
|
||||
{
|
||||
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
|
||||
|
||||
_PyCoMonitoringData *monitoring = code->_co_monitoring;
|
||||
assert(code->_co_monitoring);
|
||||
bool should_de_instrument;
|
||||
if (code->_co_monitoring->per_instruction_tools) {
|
||||
uint8_t *toolsptr = &code->_co_monitoring->per_instruction_tools[offset];
|
||||
*toolsptr &= ~tools;
|
||||
if (*toolsptr == 0) {
|
||||
de_instrument_per_instruction(code, offset);
|
||||
}
|
||||
should_de_instrument = (*toolsptr == 0);
|
||||
}
|
||||
else {
|
||||
/* Single tool */
|
||||
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION];
|
||||
assert(_Py_popcount32(single_tool) <= 1);
|
||||
if (((single_tool & tools) == single_tool)) {
|
||||
de_instrument_per_instruction(code, offset);
|
||||
should_de_instrument = ((single_tool & tools) == single_tool);
|
||||
}
|
||||
if (should_de_instrument) {
|
||||
MODIFY_BYTECODE(code, de_instrument_per_instruction, monitoring, offset);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1087,7 +1110,7 @@ call_instrumentation_vector(
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
assert(args[1] == NULL);
|
||||
args[1] = (PyObject *)code;
|
||||
int offset = (int)(instr - _PyCode_CODE(code));
|
||||
int offset = (int)(instr - _PyFrame_GetBytecode(frame));
|
||||
/* Offset visible to user should be the offset in bytes, as that is the
|
||||
* convention for APIs involving code offsets. */
|
||||
int bytes_offset = offset * (int)sizeof(_Py_CODEUNIT);
|
||||
@ -1173,8 +1196,7 @@ _Py_call_instrumentation_jump(
|
||||
assert(event == PY_MONITORING_EVENT_JUMP ||
|
||||
event == PY_MONITORING_EVENT_BRANCH);
|
||||
assert(frame->instr_ptr == instr);
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
int to = (int)(target - _PyCode_CODE(code));
|
||||
int to = (int)(target - _PyFrame_GetBytecode(frame));
|
||||
PyObject *to_obj = PyLong_FromLong(to * (int)sizeof(_Py_CODEUNIT));
|
||||
if (to_obj == NULL) {
|
||||
return NULL;
|
||||
@ -1240,7 +1262,8 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
assert(tstate->tracing == 0);
|
||||
assert(debug_check_sanity(tstate->interp, code));
|
||||
int i = (int)(instr - _PyCode_CODE(code));
|
||||
_Py_CODEUNIT *bytecode = _PyFrame_GetBytecode(frame);
|
||||
int i = (int)(instr - bytecode);
|
||||
|
||||
_PyCoMonitoringData *monitoring = code->_co_monitoring;
|
||||
_PyCoLineInstrumentationData *line_data = &monitoring->lines[i];
|
||||
@ -1256,10 +1279,10 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
|
||||
line = compute_line(code, i, line_delta);
|
||||
assert(line >= 0);
|
||||
assert(prev != NULL);
|
||||
int prev_index = (int)(prev - _PyCode_CODE(code));
|
||||
int prev_index = (int)(prev - bytecode);
|
||||
int prev_line = _Py_Instrumentation_GetLine(code, prev_index);
|
||||
if (prev_line == line) {
|
||||
int prev_opcode = _PyCode_CODE(code)[prev_index].op.code;
|
||||
int prev_opcode = bytecode[prev_index].op.code;
|
||||
/* RESUME and INSTRUMENTED_RESUME are needed for the operation of
|
||||
* instrumentation, so must never be hidden by an INSTRUMENTED_LINE.
|
||||
*/
|
||||
@ -1359,7 +1382,7 @@ int
|
||||
_Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr)
|
||||
{
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
int offset = (int)(instr - _PyCode_CODE(code));
|
||||
int offset = (int)(instr - _PyFrame_GetBytecode(frame));
|
||||
_PyCoMonitoringData *instrumentation_data = code->_co_monitoring;
|
||||
assert(instrumentation_data->per_instruction_opcodes);
|
||||
int next_opcode = instrumentation_data->per_instruction_opcodes[offset];
|
||||
|
2
Python/optimizer_cases.c.h
generated
2
Python/optimizer_cases.c.h
generated
@ -17,6 +17,8 @@
|
||||
|
||||
/* _QUICKEN_RESUME is not a viable micro-op for tier 2 */
|
||||
|
||||
/* _LOAD_BYTECODE is not a viable micro-op for tier 2 */
|
||||
|
||||
case _RESUME_CHECK: {
|
||||
break;
|
||||
}
|
||||
|
@ -1513,6 +1513,11 @@ new_threadstate(PyInterpreterState *interp, int whence)
|
||||
PyMem_RawFree(new_tstate);
|
||||
return NULL;
|
||||
}
|
||||
int32_t tlbc_idx = _Py_ReserveTLBCIndex(interp);
|
||||
if (tlbc_idx < 0) {
|
||||
PyMem_RawFree(new_tstate);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We serialize concurrent creation to protect global state. */
|
||||
@ -1555,6 +1560,7 @@ new_threadstate(PyInterpreterState *interp, int whence)
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// Must be called with lock unlocked to avoid lock ordering deadlocks.
|
||||
_Py_qsbr_register(tstate, interp, qsbr_idx);
|
||||
tstate->tlbc_index = tlbc_idx;
|
||||
#endif
|
||||
|
||||
return (PyThreadState *)tstate;
|
||||
@ -1706,6 +1712,10 @@ PyThreadState_Clear(PyThreadState *tstate)
|
||||
|
||||
// Remove ourself from the biased reference counting table of threads.
|
||||
_Py_brc_remove_thread(tstate);
|
||||
|
||||
// Release our thread-local copies of the bytecode for reuse by another
|
||||
// thread
|
||||
_Py_ClearTLBCIndex((_PyThreadStateImpl *)tstate);
|
||||
#endif
|
||||
|
||||
// Merge our queue of pointers to be freed into the interpreter queue.
|
||||
|
@ -24,6 +24,25 @@ extern const char *_PyUOpName(int index);
|
||||
* ./adaptive.md
|
||||
*/
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
#define SET_OPCODE_OR_RETURN(instr, opcode) \
|
||||
do { \
|
||||
uint8_t old_op = _Py_atomic_load_uint8_relaxed(&(instr)->op.code); \
|
||||
if (old_op >= MIN_INSTRUMENTED_OPCODE) { \
|
||||
/* Lost race with instrumentation */ \
|
||||
return; \
|
||||
} \
|
||||
if (!_Py_atomic_compare_exchange_uint8(&(instr)->op.code, &old_op, \
|
||||
(opcode))) { \
|
||||
/* Lost race with instrumentation */ \
|
||||
assert(old_op >= MIN_INSTRUMENTED_OPCODE); \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define SET_OPCODE_OR_RETURN(instr, opcode) (instr)->op.code = (opcode)
|
||||
#endif
|
||||
|
||||
#ifdef Py_STATS
|
||||
GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 };
|
||||
static PyStats _Py_stats_struct = { .gc_stats = _py_gc_stats };
|
||||
@ -436,16 +455,25 @@ do { \
|
||||
# define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
|
||||
#endif
|
||||
|
||||
// Initialize warmup counters and insert superinstructions. This cannot fail.
|
||||
// Initialize warmup counters and optimize instructions. This cannot fail.
|
||||
void
|
||||
_PyCode_Quicken(PyCodeObject *code)
|
||||
_PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts,
|
||||
int enable_counters)
|
||||
{
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
_Py_BackoffCounter jump_counter, adaptive_counter;
|
||||
if (enable_counters) {
|
||||
jump_counter = initial_jump_backoff_counter();
|
||||
adaptive_counter = adaptive_counter_warmup();
|
||||
}
|
||||
else {
|
||||
jump_counter = initial_unreachable_backoff_counter();
|
||||
adaptive_counter = initial_unreachable_backoff_counter();
|
||||
}
|
||||
int opcode = 0;
|
||||
int oparg = 0;
|
||||
_Py_CODEUNIT *instructions = _PyCode_CODE(code);
|
||||
/* The last code unit cannot have a cache, so we don't need to check it */
|
||||
for (int i = 0; i < Py_SIZE(code)-1; i++) {
|
||||
for (Py_ssize_t i = 0; i < size-1; i++) {
|
||||
opcode = instructions[i].op.code;
|
||||
int caches = _PyOpcode_Caches[opcode];
|
||||
oparg = (oparg << 8) | instructions[i].op.arg;
|
||||
@ -453,7 +481,7 @@ _PyCode_Quicken(PyCodeObject *code)
|
||||
// The initial value depends on the opcode
|
||||
switch (opcode) {
|
||||
case JUMP_BACKWARD:
|
||||
instructions[i + 1].counter = initial_jump_backoff_counter();
|
||||
instructions[i + 1].counter = jump_counter;
|
||||
break;
|
||||
case POP_JUMP_IF_FALSE:
|
||||
case POP_JUMP_IF_TRUE:
|
||||
@ -462,7 +490,7 @@ _PyCode_Quicken(PyCodeObject *code)
|
||||
instructions[i + 1].cache = 0x5555; // Alternating 0, 1 bits
|
||||
break;
|
||||
default:
|
||||
instructions[i + 1].counter = adaptive_counter_warmup();
|
||||
instructions[i + 1].counter = adaptive_counter;
|
||||
break;
|
||||
}
|
||||
i += caches;
|
||||
@ -471,7 +499,7 @@ _PyCode_Quicken(PyCodeObject *code)
|
||||
/* We can't do this in the bytecode compiler as
|
||||
* marshalling can intern strings and make them immortal. */
|
||||
|
||||
PyObject *obj = PyTuple_GET_ITEM(code->co_consts, oparg);
|
||||
PyObject *obj = PyTuple_GET_ITEM(consts, oparg);
|
||||
if (_Py_IsImmortal(obj)) {
|
||||
instructions[i].op.code = LOAD_CONST_IMMORTAL;
|
||||
}
|
||||
@ -480,7 +508,7 @@ _PyCode_Quicken(PyCodeObject *code)
|
||||
oparg = 0;
|
||||
}
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
}
|
||||
|
||||
#define SIMPLE_FUNCTION 0
|
||||
@ -2243,9 +2271,10 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
|
||||
{
|
||||
PyObject *lhs = PyStackRef_AsPyObjectBorrow(lhs_st);
|
||||
PyObject *rhs = PyStackRef_AsPyObjectBorrow(rhs_st);
|
||||
assert(ENABLE_SPECIALIZATION);
|
||||
assert(ENABLE_SPECIALIZATION_FT);
|
||||
assert(_PyOpcode_Caches[BINARY_OP] == INLINE_CACHE_ENTRIES_BINARY_OP);
|
||||
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1);
|
||||
uint8_t specialized_op;
|
||||
switch (oparg) {
|
||||
case NB_ADD:
|
||||
case NB_INPLACE_ADD:
|
||||
@ -2256,18 +2285,18 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
|
||||
_Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_BINARY_OP + 1];
|
||||
bool to_store = (next.op.code == STORE_FAST);
|
||||
if (to_store && PyStackRef_AsPyObjectBorrow(locals[next.op.arg]) == lhs) {
|
||||
instr->op.code = BINARY_OP_INPLACE_ADD_UNICODE;
|
||||
specialized_op = BINARY_OP_INPLACE_ADD_UNICODE;
|
||||
goto success;
|
||||
}
|
||||
instr->op.code = BINARY_OP_ADD_UNICODE;
|
||||
specialized_op = BINARY_OP_ADD_UNICODE;
|
||||
goto success;
|
||||
}
|
||||
if (PyLong_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_ADD_INT;
|
||||
specialized_op = BINARY_OP_ADD_INT;
|
||||
goto success;
|
||||
}
|
||||
if (PyFloat_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_ADD_FLOAT;
|
||||
specialized_op = BINARY_OP_ADD_FLOAT;
|
||||
goto success;
|
||||
}
|
||||
break;
|
||||
@ -2277,11 +2306,11 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
|
||||
break;
|
||||
}
|
||||
if (PyLong_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_MULTIPLY_INT;
|
||||
specialized_op = BINARY_OP_MULTIPLY_INT;
|
||||
goto success;
|
||||
}
|
||||
if (PyFloat_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_MULTIPLY_FLOAT;
|
||||
specialized_op = BINARY_OP_MULTIPLY_FLOAT;
|
||||
goto success;
|
||||
}
|
||||
break;
|
||||
@ -2291,22 +2320,23 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
|
||||
break;
|
||||
}
|
||||
if (PyLong_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_SUBTRACT_INT;
|
||||
specialized_op = BINARY_OP_SUBTRACT_INT;
|
||||
goto success;
|
||||
}
|
||||
if (PyFloat_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_SUBTRACT_FLOAT;
|
||||
specialized_op = BINARY_OP_SUBTRACT_FLOAT;
|
||||
goto success;
|
||||
}
|
||||
break;
|
||||
}
|
||||
SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs));
|
||||
STAT_INC(BINARY_OP, failure);
|
||||
instr->op.code = BINARY_OP;
|
||||
SET_OPCODE_OR_RETURN(instr, BINARY_OP);
|
||||
cache->counter = adaptive_counter_backoff(cache->counter);
|
||||
return;
|
||||
success:
|
||||
STAT_INC(BINARY_OP, success);
|
||||
SET_OPCODE_OR_RETURN(instr, specialized_op);
|
||||
cache->counter = adaptive_counter_cooldown();
|
||||
}
|
||||
|
||||
|
@ -2174,6 +2174,11 @@ sys__clear_internal_caches_impl(PyObject *module)
|
||||
#ifdef _Py_TIER2
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_Py_Executors_InvalidateAll(interp, 0);
|
||||
#endif
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (_Py_ClearUnusedTLBC(_PyInterpreterState_GET()) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
PyType_ClearCache();
|
||||
Py_RETURN_NONE;
|
||||
|
@ -77,6 +77,10 @@ def _managed_dict_offset():
|
||||
else:
|
||||
return -3 * _sizeof_void_p()
|
||||
|
||||
def _interp_frame_has_tlbc_index():
|
||||
interp_frame = gdb.lookup_type("_PyInterpreterFrame")
|
||||
return any(field.name == "tlbc_index" for field in interp_frame.fields())
|
||||
|
||||
|
||||
Py_TPFLAGS_INLINE_VALUES = (1 << 2)
|
||||
Py_TPFLAGS_MANAGED_DICT = (1 << 4)
|
||||
@ -105,6 +109,8 @@ FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)'
|
||||
UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame'
|
||||
EVALFRAME = '_PyEval_EvalFrameDefault'
|
||||
|
||||
INTERP_FRAME_HAS_TLBC_INDEX = _interp_frame_has_tlbc_index()
|
||||
|
||||
class NullPyObjectPtr(RuntimeError):
|
||||
pass
|
||||
|
||||
@ -693,6 +699,16 @@ def parse_location_table(firstlineno, linetable):
|
||||
yield addr, end_addr, line
|
||||
addr = end_addr
|
||||
|
||||
|
||||
class PyCodeArrayPtr:
|
||||
def __init__(self, gdbval):
|
||||
self._gdbval = gdbval
|
||||
|
||||
def get_entry(self, index):
|
||||
assert (index >= 0) and (index < self._gdbval["size"])
|
||||
return self._gdbval["entries"][index]
|
||||
|
||||
|
||||
class PyCodeObjectPtr(PyObjectPtr):
|
||||
"""
|
||||
Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
|
||||
@ -1085,6 +1101,11 @@ class PyFramePtr:
|
||||
def _f_lasti(self):
|
||||
codeunit_p = gdb.lookup_type("_Py_CODEUNIT").pointer()
|
||||
instr_ptr = self._gdbval["instr_ptr"]
|
||||
if INTERP_FRAME_HAS_TLBC_INDEX:
|
||||
tlbc_index = self._gdbval["tlbc_index"]
|
||||
code_arr = PyCodeArrayPtr(self._f_code().field("co_tlbc"))
|
||||
first_instr = code_arr.get_entry(tlbc_index).cast(codeunit_p)
|
||||
else:
|
||||
first_instr = self._f_code().field("co_code_adaptive").cast(codeunit_p)
|
||||
return int(instr_ptr - first_instr)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user