mirror of
https://github.com/python/cpython.git
synced 2024-12-01 03:01:36 +01:00
2e95c5ba3b
Each thread specializes a thread-local copy of the bytecode, created on the first RESUME, in free-threaded builds. All copies of the bytecode for a code object are stored in the co_tlbc array on the code object. Threads reserve a globally unique index identifying its copy of the bytecode in all co_tlbc arrays at thread creation and release the index at thread destruction. The first entry in every co_tlbc array always points to the "main" copy of the bytecode that is stored at the end of the code object. This ensures that no bytecode is copied for programs that do not use threads. Thread-local bytecode can be disabled at runtime by providing either -X tlbc=0 or PYTHON_TLBC=0. Disabling thread-local bytecode also disables specialization. Concurrent modifications to the bytecode made by the specializing interpreter and instrumentation use atomics, with specialization taking care not to overwrite an instruction that was instrumented concurrently.
648 lines
21 KiB
C
648 lines
21 KiB
C
#ifndef Py_INTERNAL_CODE_H
|
|
#define Py_INTERNAL_CODE_H
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#ifndef Py_BUILD_CORE
|
|
# error "this header requires Py_BUILD_CORE define"
|
|
#endif
|
|
|
|
#include "pycore_stackref.h" // _PyStackRef
|
|
#include "pycore_lock.h" // PyMutex
|
|
#include "pycore_backoff.h" // _Py_BackoffCounter
|
|
#include "pycore_tstate.h" // _PyThreadStateImpl
|
|
|
|
|
|
/* Each instruction in a code object is a fixed-width value,
|
|
* currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG
|
|
* opcode allows for larger values but the current limit is 3 uses
|
|
* of EXTENDED_ARG (see Python/compile.c), for a maximum
|
|
* 32-bit value. This aligns with the note in Python/compile.c
|
|
* (compiler_addop_i_line) indicating that the max oparg value is
|
|
* 2**32 - 1, rather than INT_MAX.
|
|
*/
|
|
|
|
typedef union {
|
|
uint16_t cache;
|
|
struct {
|
|
uint8_t code;
|
|
uint8_t arg;
|
|
} op;
|
|
_Py_BackoffCounter counter; // First cache entry of specializable op
|
|
} _Py_CODEUNIT;
|
|
|
|
#define _PyCode_CODE(CO) _Py_RVALUE((_Py_CODEUNIT *)(CO)->co_code_adaptive)
|
|
#define _PyCode_NBYTES(CO) (Py_SIZE(CO) * (Py_ssize_t)sizeof(_Py_CODEUNIT))
|
|
|
|
|
|
/* These macros only remain defined for compatibility. */
|
|
#define _Py_OPCODE(word) ((word).op.code)
|
|
#define _Py_OPARG(word) ((word).op.arg)
|
|
|
|
static inline _Py_CODEUNIT
|
|
_py_make_codeunit(uint8_t opcode, uint8_t oparg)
|
|
{
|
|
// No designated initialisers because of C++ compat
|
|
_Py_CODEUNIT word;
|
|
word.op.code = opcode;
|
|
word.op.arg = oparg;
|
|
return word;
|
|
}
|
|
|
|
static inline void
|
|
_py_set_opcode(_Py_CODEUNIT *word, uint8_t opcode)
|
|
{
|
|
word->op.code = opcode;
|
|
}
|
|
|
|
#define _Py_MAKE_CODEUNIT(opcode, oparg) _py_make_codeunit((opcode), (oparg))
|
|
#define _Py_SET_OPCODE(word, opcode) _py_set_opcode(&(word), (opcode))
|
|
|
|
|
|
// We hide some of the newer PyCodeObject fields behind macros.
|
|
// This helps with backporting certain changes to 3.12.
|
|
#define _PyCode_HAS_EXECUTORS(CODE) \
|
|
(CODE->co_executors != NULL)
|
|
#define _PyCode_HAS_INSTRUMENTATION(CODE) \
|
|
(CODE->_co_instrumentation_version > 0)
|
|
|
|
struct _py_code_state {
|
|
PyMutex mutex;
|
|
// Interned constants from code objects. Used by the free-threaded build.
|
|
struct _Py_hashtable_t *constants;
|
|
};
|
|
|
|
extern PyStatus _PyCode_Init(PyInterpreterState *interp);
|
|
extern void _PyCode_Fini(PyInterpreterState *interp);
|
|
|
|
#define CODE_MAX_WATCHERS 8
|
|
|
|
/* PEP 659
|
|
* Specialization and quickening structs and helper functions
|
|
*/
|
|
|
|
|
|
// Inline caches. If you change the number of cache entries for an instruction,
|
|
// you must *also* update the number of cache entries in Lib/opcode.py and bump
|
|
// the magic number in Lib/importlib/_bootstrap_external.py!
|
|
|
|
#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
uint16_t module_keys_version;
|
|
uint16_t builtin_keys_version;
|
|
uint16_t index;
|
|
} _PyLoadGlobalCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PyBinaryOpCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PyUnpackSequenceCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
|
|
CACHE_ENTRIES(_PyUnpackSequenceCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PyCompareOpCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PyBinarySubscrCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PySuperAttrCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
uint16_t version[2];
|
|
uint16_t index;
|
|
} _PyAttrCache;
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
uint16_t type_version[2];
|
|
union {
|
|
uint16_t keys_version[2];
|
|
uint16_t dict_offset;
|
|
};
|
|
uint16_t descr[4];
|
|
} _PyLoadMethodCache;
|
|
|
|
|
|
// MUST be the max(_PyAttrCache, _PyLoadMethodCache)
|
|
#define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyLoadMethodCache)
|
|
|
|
#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
uint16_t func_version[2];
|
|
} _PyCallCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
|
|
#define INLINE_CACHE_ENTRIES_CALL_KW CACHE_ENTRIES(_PyCallCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PyStoreSubscrCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PyForIterCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PySendCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
uint16_t version[2];
|
|
} _PyToBoolCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache)
|
|
|
|
typedef struct {
|
|
_Py_BackoffCounter counter;
|
|
} _PyContainsOpCache;
|
|
|
|
#define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache)
|
|
|
|
// Borrowed references to common callables:
|
|
struct callable_cache {
|
|
PyObject *isinstance;
|
|
PyObject *len;
|
|
PyObject *list_append;
|
|
PyObject *object__getattribute__;
|
|
};
|
|
|
|
/* "Locals plus" for a code object is the set of locals + cell vars +
|
|
* free vars. This relates to variable names as well as offsets into
|
|
* the "fast locals" storage array of execution frames. The compiler
|
|
* builds the list of names, their offsets, and the corresponding
|
|
* kind of local.
|
|
*
|
|
* Those kinds represent the source of the initial value and the
|
|
* variable's scope (as related to closures). A "local" is an
|
|
* argument or other variable defined in the current scope. A "free"
|
|
* variable is one that is defined in an outer scope and comes from
|
|
* the function's closure. A "cell" variable is a local that escapes
|
|
* into an inner function as part of a closure, and thus must be
|
|
* wrapped in a cell. Any "local" can also be a "cell", but the
|
|
* "free" kind is mutually exclusive with both.
|
|
*/
|
|
|
|
// Note that these all fit within a byte, as do combinations.
|
|
// Later, we will use the smaller numbers to differentiate the different
|
|
// kinds of locals (e.g. pos-only arg, varkwargs, local-only).
|
|
#define CO_FAST_HIDDEN 0x10
|
|
#define CO_FAST_LOCAL 0x20
|
|
#define CO_FAST_CELL 0x40
|
|
#define CO_FAST_FREE 0x80
|
|
|
|
typedef unsigned char _PyLocals_Kind;
|
|
|
|
static inline _PyLocals_Kind
|
|
_PyLocals_GetKind(PyObject *kinds, int i)
|
|
{
|
|
assert(PyBytes_Check(kinds));
|
|
assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
|
|
char *ptr = PyBytes_AS_STRING(kinds);
|
|
return (_PyLocals_Kind)(ptr[i]);
|
|
}
|
|
|
|
static inline void
|
|
_PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind)
|
|
{
|
|
assert(PyBytes_Check(kinds));
|
|
assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
|
|
char *ptr = PyBytes_AS_STRING(kinds);
|
|
ptr[i] = (char) kind;
|
|
}
|
|
|
|
|
|
struct _PyCodeConstructor {
|
|
/* metadata */
|
|
PyObject *filename;
|
|
PyObject *name;
|
|
PyObject *qualname;
|
|
int flags;
|
|
|
|
/* the code */
|
|
PyObject *code;
|
|
int firstlineno;
|
|
PyObject *linetable;
|
|
|
|
/* used by the code */
|
|
PyObject *consts;
|
|
PyObject *names;
|
|
|
|
/* mapping frame offsets to information */
|
|
PyObject *localsplusnames; // Tuple of strings
|
|
PyObject *localspluskinds; // Bytes object, one byte per variable
|
|
|
|
/* args (within varnames) */
|
|
int argcount;
|
|
int posonlyargcount;
|
|
// XXX Replace argcount with posorkwargcount (argcount - posonlyargcount).
|
|
int kwonlyargcount;
|
|
|
|
/* needed to create the frame */
|
|
int stacksize;
|
|
|
|
/* used by the eval loop */
|
|
PyObject *exceptiontable;
|
|
};
|
|
|
|
// Using an "arguments struct" like this is helpful for maintainability
|
|
// in a case such as this with many parameters. It does bear a risk:
|
|
// if the struct changes and callers are not updated properly then the
|
|
// compiler will not catch problems (like a missing argument). This can
|
|
// cause hard-to-debug problems. The risk is mitigated by the use of
|
|
// check_code() in codeobject.c. However, we may decide to switch
|
|
// back to a regular function signature. Regardless, this approach
|
|
// wouldn't be appropriate if this weren't a strictly internal API.
|
|
// (See the comments in https://github.com/python/cpython/pull/26258.)
|
|
extern int _PyCode_Validate(struct _PyCodeConstructor *);
|
|
extern PyCodeObject* _PyCode_New(struct _PyCodeConstructor *);
|
|
|
|
|
|
/* Private API */
|
|
|
|
/* Getters for internal PyCodeObject data. */
|
|
extern PyObject* _PyCode_GetVarnames(PyCodeObject *);
|
|
extern PyObject* _PyCode_GetCellvars(PyCodeObject *);
|
|
extern PyObject* _PyCode_GetFreevars(PyCodeObject *);
|
|
extern PyObject* _PyCode_GetCode(PyCodeObject *);
|
|
|
|
/** API for initializing the line number tables. */
|
|
extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
|
|
|
|
/** Out of process API for initializing the location table. */
|
|
extern void _PyLineTable_InitAddressRange(
|
|
const char *linetable,
|
|
Py_ssize_t length,
|
|
int firstlineno,
|
|
PyCodeAddressRange *range);
|
|
|
|
/** API for traversing the line number table. */
|
|
extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
|
|
extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
|
|
|
|
/** API for executors */
|
|
extern void _PyCode_Clear_Executors(PyCodeObject *code);
|
|
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
// gh-115999 tracks progress on addressing this.
|
|
#define ENABLE_SPECIALIZATION 0
|
|
// Use this to enable specialization families once they are thread-safe. All
|
|
// uses will be replaced with ENABLE_SPECIALIZATION once all families are
|
|
// thread-safe.
|
|
#define ENABLE_SPECIALIZATION_FT 1
|
|
#else
|
|
#define ENABLE_SPECIALIZATION 1
|
|
#define ENABLE_SPECIALIZATION_FT ENABLE_SPECIALIZATION
|
|
#endif
|
|
|
|
/* Specialization functions */
|
|
|
|
extern void _Py_Specialize_LoadSuperAttr(_PyStackRef global_super, _PyStackRef cls,
|
|
_Py_CODEUNIT *instr, int load_method);
|
|
extern void _Py_Specialize_LoadAttr(_PyStackRef owner, _Py_CODEUNIT *instr,
|
|
PyObject *name);
|
|
extern void _Py_Specialize_StoreAttr(_PyStackRef owner, _Py_CODEUNIT *instr,
|
|
PyObject *name);
|
|
extern void _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins,
|
|
_Py_CODEUNIT *instr, PyObject *name);
|
|
extern void _Py_Specialize_BinarySubscr(_PyStackRef sub, _PyStackRef container,
|
|
_Py_CODEUNIT *instr);
|
|
extern void _Py_Specialize_StoreSubscr(_PyStackRef container, _PyStackRef sub,
|
|
_Py_CODEUNIT *instr);
|
|
extern void _Py_Specialize_Call(_PyStackRef callable, _Py_CODEUNIT *instr,
|
|
int nargs);
|
|
extern void _Py_Specialize_CallKw(_PyStackRef callable, _Py_CODEUNIT *instr,
|
|
int nargs);
|
|
extern void _Py_Specialize_BinaryOp(_PyStackRef lhs, _PyStackRef rhs, _Py_CODEUNIT *instr,
|
|
int oparg, _PyStackRef *locals);
|
|
extern void _Py_Specialize_CompareOp(_PyStackRef lhs, _PyStackRef rhs,
|
|
_Py_CODEUNIT *instr, int oparg);
|
|
extern void _Py_Specialize_UnpackSequence(_PyStackRef seq, _Py_CODEUNIT *instr,
|
|
int oparg);
|
|
extern void _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg);
|
|
extern void _Py_Specialize_Send(_PyStackRef receiver, _Py_CODEUNIT *instr);
|
|
extern void _Py_Specialize_ToBool(_PyStackRef value, _Py_CODEUNIT *instr);
|
|
extern void _Py_Specialize_ContainsOp(_PyStackRef value, _Py_CODEUNIT *instr);
|
|
|
|
#ifdef Py_STATS
|
|
|
|
#include "pycore_bitutils.h" // _Py_bit_length
|
|
|
|
#define STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name++; } while (0)
|
|
#define STAT_DEC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name--; } while (0)
|
|
#define OPCODE_EXE_INC(opname) do { if (_Py_stats) _Py_stats->opcode_stats[opname].execution_count++; } while (0)
|
|
#define CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.name++; } while (0)
|
|
#define OBJECT_STAT_INC(name) do { if (_Py_stats) _Py_stats->object_stats.name++; } while (0)
|
|
#define OBJECT_STAT_INC_COND(name, cond) \
|
|
do { if (_Py_stats && cond) _Py_stats->object_stats.name++; } while (0)
|
|
#define EVAL_CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.eval_calls[name]++; } while (0)
|
|
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
|
|
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
|
|
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
|
|
#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
|
|
#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0)
|
|
#define UOP_PAIR_INC(uopcode, lastuop) \
|
|
do { \
|
|
if (lastuop && _Py_stats) { \
|
|
_Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
|
|
} \
|
|
lastuop = uopcode; \
|
|
} while (0)
|
|
#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
|
|
#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0)
|
|
#define OPT_HIST(length, name) \
|
|
do { \
|
|
if (_Py_stats) { \
|
|
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
|
|
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
|
|
_Py_stats->optimization_stats.name[bucket]++; \
|
|
} \
|
|
} while (0)
|
|
#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0)
|
|
#define OPCODE_DEFERRED_INC(opname) do { if (_Py_stats && opcode == opname) _Py_stats->opcode_stats[opname].specialization.deferred++; } while (0)
|
|
|
|
// Export for '_opcode' shared extension
|
|
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
|
|
|
|
#else
|
|
#define STAT_INC(opname, name) ((void)0)
|
|
#define STAT_DEC(opname, name) ((void)0)
|
|
#define OPCODE_EXE_INC(opname) ((void)0)
|
|
#define CALL_STAT_INC(name) ((void)0)
|
|
#define OBJECT_STAT_INC(name) ((void)0)
|
|
#define OBJECT_STAT_INC_COND(name, cond) ((void)0)
|
|
#define EVAL_CALL_STAT_INC(name) ((void)0)
|
|
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
|
|
#define GC_STAT_ADD(gen, name, n) ((void)0)
|
|
#define OPT_STAT_INC(name) ((void)0)
|
|
#define UOP_STAT_INC(opname, name) ((void)0)
|
|
#define UOP_PAIR_INC(uopcode, lastuop) ((void)0)
|
|
#define OPT_UNSUPPORTED_OPCODE(opname) ((void)0)
|
|
#define OPT_ERROR_IN_OPCODE(opname) ((void)0)
|
|
#define OPT_HIST(length, name) ((void)0)
|
|
#define RARE_EVENT_STAT_INC(name) ((void)0)
|
|
#define OPCODE_DEFERRED_INC(opname) ((void)0)
|
|
#endif // !Py_STATS
|
|
|
|
// Utility functions for reading/writing 32/64-bit values in the inline caches.
|
|
// Great care should be taken to ensure that these functions remain correct and
|
|
// performant! They should compile to just "move" instructions on all supported
|
|
// compilers and platforms.
|
|
|
|
// We use memcpy to let the C compiler handle unaligned accesses and endianness
|
|
// issues for us. It also seems to produce better code than manual copying for
|
|
// most compilers (see https://blog.regehr.org/archives/959 for more info).
|
|
|
|
static inline void
|
|
write_u32(uint16_t *p, uint32_t val)
|
|
{
|
|
memcpy(p, &val, sizeof(val));
|
|
}
|
|
|
|
static inline void
|
|
write_u64(uint16_t *p, uint64_t val)
|
|
{
|
|
memcpy(p, &val, sizeof(val));
|
|
}
|
|
|
|
static inline void
|
|
write_obj(uint16_t *p, PyObject *val)
|
|
{
|
|
memcpy(p, &val, sizeof(val));
|
|
}
|
|
|
|
static inline uint16_t
|
|
read_u16(uint16_t *p)
|
|
{
|
|
return *p;
|
|
}
|
|
|
|
static inline uint32_t
|
|
read_u32(uint16_t *p)
|
|
{
|
|
uint32_t val;
|
|
memcpy(&val, p, sizeof(val));
|
|
return val;
|
|
}
|
|
|
|
static inline uint64_t
|
|
read_u64(uint16_t *p)
|
|
{
|
|
uint64_t val;
|
|
memcpy(&val, p, sizeof(val));
|
|
return val;
|
|
}
|
|
|
|
static inline PyObject *
|
|
read_obj(uint16_t *p)
|
|
{
|
|
PyObject *val;
|
|
memcpy(&val, p, sizeof(val));
|
|
return val;
|
|
}
|
|
|
|
/* See Objects/exception_handling_notes.txt for details.
|
|
*/
|
|
static inline unsigned char *
|
|
parse_varint(unsigned char *p, int *result) {
|
|
int val = p[0] & 63;
|
|
while (p[0] & 64) {
|
|
p++;
|
|
val = (val << 6) | (p[0] & 63);
|
|
}
|
|
*result = val;
|
|
return p+1;
|
|
}
|
|
|
|
static inline int
|
|
write_varint(uint8_t *ptr, unsigned int val)
|
|
{
|
|
int written = 1;
|
|
while (val >= 64) {
|
|
*ptr++ = 64 | (val & 63);
|
|
val >>= 6;
|
|
written++;
|
|
}
|
|
*ptr = (uint8_t)val;
|
|
return written;
|
|
}
|
|
|
|
static inline int
|
|
write_signed_varint(uint8_t *ptr, int val)
|
|
{
|
|
unsigned int uval;
|
|
if (val < 0) {
|
|
// (unsigned int)(-val) has an undefined behavior for INT_MIN
|
|
uval = ((0 - (unsigned int)val) << 1) | 1;
|
|
}
|
|
else {
|
|
uval = (unsigned int)val << 1;
|
|
}
|
|
return write_varint(ptr, uval);
|
|
}
|
|
|
|
static inline int
|
|
write_location_entry_start(uint8_t *ptr, int code, int length)
|
|
{
|
|
assert((code & 15) == code);
|
|
*ptr = 128 | (uint8_t)(code << 3) | (uint8_t)(length - 1);
|
|
return 1;
|
|
}
|
|
|
|
|
|
/** Counters
|
|
* The first 16-bit value in each inline cache is a counter.
|
|
*
|
|
* When counting executions until the next specialization attempt,
|
|
* exponential backoff is used to reduce the number of specialization failures.
|
|
* See pycore_backoff.h for more details.
|
|
* On a specialization failure, the backoff counter is restarted.
|
|
*/
|
|
|
|
#include "pycore_backoff.h"
|
|
|
|
// A value of 1 means that we attempt to specialize the *second* time each
|
|
// instruction is executed. Executing twice is a much better indicator of
|
|
// "hotness" than executing once, but additional warmup delays only prevent
|
|
// specialization. Most types stabilize by the second execution, too:
|
|
#define ADAPTIVE_WARMUP_VALUE 1
|
|
#define ADAPTIVE_WARMUP_BACKOFF 1
|
|
|
|
// A value of 52 means that we attempt to re-specialize after 53 misses (a prime
|
|
// number, useful for avoiding artifacts if every nth value is a different type
|
|
// or something). Setting the backoff to 0 means that the counter is reset to
|
|
// the same state as a warming-up instruction (value == 1, backoff == 1) after
|
|
// deoptimization. This isn't strictly necessary, but it is bit easier to reason
|
|
// about when thinking about the opcode transitions as a state machine:
|
|
#define ADAPTIVE_COOLDOWN_VALUE 52
|
|
#define ADAPTIVE_COOLDOWN_BACKOFF 0
|
|
|
|
// Can't assert this in pycore_backoff.h because of header order dependencies
|
|
#if SIDE_EXIT_INITIAL_VALUE <= ADAPTIVE_COOLDOWN_VALUE
|
|
# error "Cold exit value should be larger than adaptive cooldown value"
|
|
#endif
|
|
|
|
static inline _Py_BackoffCounter
|
|
adaptive_counter_bits(uint16_t value, uint16_t backoff) {
|
|
return make_backoff_counter(value, backoff);
|
|
}
|
|
|
|
static inline _Py_BackoffCounter
|
|
adaptive_counter_warmup(void) {
|
|
return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE,
|
|
ADAPTIVE_WARMUP_BACKOFF);
|
|
}
|
|
|
|
static inline _Py_BackoffCounter
|
|
adaptive_counter_cooldown(void) {
|
|
return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE,
|
|
ADAPTIVE_COOLDOWN_BACKOFF);
|
|
}
|
|
|
|
static inline _Py_BackoffCounter
|
|
adaptive_counter_backoff(_Py_BackoffCounter counter) {
|
|
return restart_backoff_counter(counter);
|
|
}
|
|
|
|
|
|
/* Comparison bit masks. */
|
|
|
|
/* Note this evaluates its arguments twice each */
|
|
#define COMPARISON_BIT(x, y) (1 << (2 * ((x) >= (y)) + ((x) <= (y))))
|
|
|
|
/*
|
|
* The following bits are chosen so that the value of
|
|
* COMPARSION_BIT(left, right)
|
|
* masked by the values below will be non-zero if the
|
|
* comparison is true, and zero if it is false */
|
|
|
|
/* This is for values that are unordered, ie. NaN, not types that are unordered, e.g. sets */
|
|
#define COMPARISON_UNORDERED 1
|
|
|
|
#define COMPARISON_LESS_THAN 2
|
|
#define COMPARISON_GREATER_THAN 4
|
|
#define COMPARISON_EQUALS 8
|
|
|
|
#define COMPARISON_NOT_EQUALS (COMPARISON_UNORDERED | COMPARISON_LESS_THAN | COMPARISON_GREATER_THAN)
|
|
|
|
extern int _Py_Instrument(PyCodeObject *co, PyInterpreterState *interp);
|
|
|
|
extern _Py_CODEUNIT _Py_GetBaseCodeUnit(PyCodeObject *code, int offset);
|
|
|
|
extern int _PyInstruction_GetLength(PyCodeObject *code, int offset);
|
|
|
|
struct _PyCode8 _PyCode_DEF(8);
|
|
|
|
PyAPI_DATA(const struct _PyCode8) _Py_InitCleanup;
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
|
|
// Return a pointer to the thread-local bytecode for the current thread, if it
|
|
// exists.
|
|
static inline _Py_CODEUNIT *
|
|
_PyCode_GetTLBCFast(PyThreadState *tstate, PyCodeObject *co)
|
|
{
|
|
_PyCodeArray *code = _Py_atomic_load_ptr_acquire(&co->co_tlbc);
|
|
int32_t idx = ((_PyThreadStateImpl*) tstate)->tlbc_index;
|
|
if (idx < code->size && code->entries[idx] != NULL) {
|
|
return (_Py_CODEUNIT *) code->entries[idx];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
// Return a pointer to the thread-local bytecode for the current thread,
|
|
// creating it if necessary.
|
|
extern _Py_CODEUNIT *_PyCode_GetTLBC(PyCodeObject *co);
|
|
|
|
// Reserve an index for the current thread into thread-local bytecode
|
|
// arrays
|
|
//
|
|
// Returns the reserved index or -1 on error.
|
|
extern int32_t _Py_ReserveTLBCIndex(PyInterpreterState *interp);
|
|
|
|
// Release the current thread's index into thread-local bytecode arrays
|
|
extern void _Py_ClearTLBCIndex(_PyThreadStateImpl *tstate);
|
|
|
|
// Free all TLBC copies not associated with live threads.
|
|
//
|
|
// Returns 0 on success or -1 on error.
|
|
extern int _Py_ClearUnusedTLBC(PyInterpreterState *interp);
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
#endif /* !Py_INTERNAL_CODE_H */
|