mirror of
https://github.com/python/cpython.git
synced 2024-11-24 00:38:00 +01:00
PyMutex is a one byte lock with fast, inlineable lock and unlock functions for the common uncontended case. The design is based on WebKit's WTF::Lock. PyMutex is built using the _PyParkingLot APIs, which provides a cross-platform futex-like API (based on WebKit's WTF::ParkingLot). This internal API will be used for building other synchronization primitives used to implement PEP 703, such as one-time initialization and events. This also includes tests and a mini benchmark in Tools/lockbench/lockbench.py to compare with the existing PyThread_type_lock. Uncontended acquisition + release: * Linux (x86-64): PyMutex: 11 ns, PyThread_type_lock: 44 ns * macOS (arm64): PyMutex: 13 ns, PyThread_type_lock: 18 ns * Windows (x86-64): PyMutex: 13 ns, PyThread_type_lock: 38 ns PR Overview: The primary purpose of this PR is to implement PyMutex, but there are a number of support pieces (described below). * PyMutex: A 1-byte lock that doesn't require memory allocation to initialize and is generally faster than the existing PyThread_type_lock. The API is internal only for now. * _PyParking_Lot: A futex-like API based on the API of the same name in WebKit. Used to implement PyMutex. * _PyRawMutex: A word sized lock used to implement _PyParking_Lot. * PyEvent: A one time event. This was used a bunch in the "nogil" fork and is useful for testing the PyMutex implementation, so I've included it as part of the PR. * pycore_llist.h: Defines common operations on doubly-linked list. Not strictly necessary (could do the list operations manually), but they come up frequently in the "nogil" fork. ( Similar to https://man.freebsd.org/cgi/man.cgi?queue) --------- Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
parent
0a31ff0050
commit
0c89056fe5
@ -48,6 +48,7 @@
|
||||
#include "pytypedefs.h"
|
||||
#include "pybuffer.h"
|
||||
#include "pystats.h"
|
||||
#include "pyatomic.h"
|
||||
#include "object.h"
|
||||
#include "objimpl.h"
|
||||
#include "typeslots.h"
|
||||
|
@ -83,9 +83,9 @@
|
||||
// # release
|
||||
// ...
|
||||
|
||||
#ifndef Py_ATOMIC_H
|
||||
#define Py_ATOMIC_H
|
||||
|
||||
#ifndef Py_CPYTHON_ATOMIC_H
|
||||
# error "this header file must not be included directly"
|
||||
#endif
|
||||
|
||||
// --- _Py_atomic_add --------------------------------------------------------
|
||||
// Atomically adds `value` to `obj` and returns the previous value
|
||||
@ -501,6 +501,3 @@ static inline void _Py_atomic_fence_release(void);
|
||||
#else
|
||||
# error "no available pyatomic implementation for this platform/compiler"
|
||||
#endif
|
||||
|
||||
#endif /* Py_ATOMIC_H */
|
||||
|
||||
|
@ -906,7 +906,7 @@ _Py_atomic_store_ptr_release(void *obj, void *value)
|
||||
#if defined(_M_X64) || defined(_M_IX86)
|
||||
*(void * volatile *)obj = value;
|
||||
#elif defined(_M_ARM64)
|
||||
__stlr64(obj, (uintptr_t)value);
|
||||
__stlr64((unsigned __int64 volatile *)obj, (uintptr_t)value);
|
||||
#else
|
||||
# error "no implementation of _Py_atomic_store_ptr_release"
|
||||
#endif
|
||||
|
107
Include/internal/pycore_llist.h
Normal file
107
Include/internal/pycore_llist.h
Normal file
@ -0,0 +1,107 @@
|
||||
// A doubly-linked list that can be embedded in a struct.
|
||||
//
|
||||
// Usage:
|
||||
// struct llist_node head = LLIST_INIT(head);
|
||||
// typedef struct {
|
||||
// ...
|
||||
// struct llist_node node;
|
||||
// ...
|
||||
// } MyObj;
|
||||
//
|
||||
// llist_insert_tail(&head, &obj->node);
|
||||
// llist_remove(&obj->node);
|
||||
//
|
||||
// struct llist_node *node;
|
||||
// llist_for_each(node, &head) {
|
||||
// MyObj *obj = llist_data(node, MyObj, node);
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
|
||||
#ifndef Py_INTERNAL_LLIST_H
|
||||
#define Py_INTERNAL_LLIST_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "Py_BUILD_CORE must be defined to include this header"
|
||||
#endif
|
||||
|
||||
struct llist_node {
|
||||
struct llist_node *next;
|
||||
struct llist_node *prev;
|
||||
};
|
||||
|
||||
// Get the struct containing a node.
|
||||
#define llist_data(node, type, member) \
|
||||
(type*)((char*)node - offsetof(type, member))
|
||||
|
||||
// Iterate over a list.
|
||||
#define llist_for_each(node, head) \
|
||||
for (node = (head)->next; node != (head); node = node->next)
|
||||
|
||||
// Iterate over a list, but allow removal of the current node.
|
||||
#define llist_for_each_safe(node, head) \
|
||||
for (struct llist_node *_next = (node = (head)->next, node->next); \
|
||||
node != (head); node = _next, _next = node->next)
|
||||
|
||||
#define LLIST_INIT(head) { &head, &head }
|
||||
|
||||
static inline void
|
||||
llist_init(struct llist_node *head)
|
||||
{
|
||||
head->next = head;
|
||||
head->prev = head;
|
||||
}
|
||||
|
||||
// Returns 1 if the list is empty, 0 otherwise.
|
||||
static inline int
|
||||
llist_empty(struct llist_node *head)
|
||||
{
|
||||
return head->next == head;
|
||||
}
|
||||
|
||||
// Appends to the tail of the list.
|
||||
static inline void
|
||||
llist_insert_tail(struct llist_node *head, struct llist_node *node)
|
||||
{
|
||||
node->prev = head->prev;
|
||||
node->next = head;
|
||||
head->prev->next = node;
|
||||
head->prev = node;
|
||||
}
|
||||
|
||||
// Remove a node from the list.
|
||||
static inline void
|
||||
llist_remove(struct llist_node *node)
|
||||
{
|
||||
struct llist_node *prev = node->prev;
|
||||
struct llist_node *next = node->next;
|
||||
prev->next = next;
|
||||
next->prev = prev;
|
||||
node->prev = NULL;
|
||||
node->next = NULL;
|
||||
}
|
||||
|
||||
// Append all nodes from head2 onto head1. head2 is left empty.
|
||||
static inline void
|
||||
llist_concat(struct llist_node *head1, struct llist_node *head2)
|
||||
{
|
||||
if (!llist_empty(head2)) {
|
||||
head1->prev->next = head2->next;
|
||||
head2->next->prev = head1->prev;
|
||||
|
||||
head1->prev = head2->prev;
|
||||
head2->prev->next = head1;
|
||||
llist_init(head2);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_LLIST_H */
|
158
Include/internal/pycore_lock.h
Normal file
158
Include/internal/pycore_lock.h
Normal file
@ -0,0 +1,158 @@
|
||||
// Lightweight locks and other synchronization mechanisms.
|
||||
//
|
||||
// These implementations are based on WebKit's WTF::Lock. See
|
||||
// https://webkit.org/blog/6161/locking-in-webkit/ for a description of the
|
||||
// design.
|
||||
#ifndef Py_INTERNAL_LOCK_H
|
||||
#define Py_INTERNAL_LOCK_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#include "pycore_time.h" // _PyTime_t
|
||||
|
||||
|
||||
// A mutex that occupies one byte. The lock can be zero initialized.
|
||||
//
|
||||
// Only the two least significant bits are used. The remaining bits should be
|
||||
// zero:
|
||||
// 0b00: unlocked
|
||||
// 0b01: locked
|
||||
// 0b10: unlocked and has parked threads
|
||||
// 0b11: locked and has parked threads
|
||||
//
|
||||
// Typical initialization:
|
||||
// PyMutex m = (PyMutex){0};
|
||||
//
|
||||
// Typical usage:
|
||||
// PyMutex_Lock(&m);
|
||||
// ...
|
||||
// PyMutex_Unlock(&m);
|
||||
typedef struct _PyMutex {
|
||||
uint8_t v;
|
||||
} PyMutex;
|
||||
|
||||
#define _Py_UNLOCKED 0
|
||||
#define _Py_LOCKED 1
|
||||
#define _Py_HAS_PARKED 2
|
||||
|
||||
// (private) slow path for locking the mutex
|
||||
PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);
|
||||
|
||||
// (private) slow path for unlocking the mutex
|
||||
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);
|
||||
|
||||
// Locks the mutex.
|
||||
//
|
||||
// If the mutex is currently locked, the calling thread will be parked until
|
||||
// the mutex is unlocked. If the current thread holds the GIL, then the GIL
|
||||
// will be released while the thread is parked.
|
||||
static inline void
|
||||
PyMutex_Lock(PyMutex *m)
|
||||
{
|
||||
uint8_t expected = _Py_UNLOCKED;
|
||||
if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) {
|
||||
_PyMutex_LockSlow(m);
|
||||
}
|
||||
}
|
||||
|
||||
// Unlocks the mutex.
|
||||
static inline void
|
||||
PyMutex_Unlock(PyMutex *m)
|
||||
{
|
||||
uint8_t expected = _Py_LOCKED;
|
||||
if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_UNLOCKED)) {
|
||||
_PyMutex_UnlockSlow(m);
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if the mutex is currently locked.
|
||||
static inline int
|
||||
PyMutex_IsLocked(PyMutex *m)
|
||||
{
|
||||
return (_Py_atomic_load_uint8(&m->v) & _Py_LOCKED) != 0;
|
||||
}
|
||||
|
||||
typedef enum _PyLockFlags {
|
||||
// Do not detach/release the GIL when waiting on the lock.
|
||||
_Py_LOCK_DONT_DETACH = 0,
|
||||
|
||||
// Detach/release the GIL while waiting on the lock.
|
||||
_PY_LOCK_DETACH = 1,
|
||||
|
||||
// Handle signals if interrupted while waiting on the lock.
|
||||
_PY_LOCK_HANDLE_SIGNALS = 2,
|
||||
} _PyLockFlags;
|
||||
|
||||
// Lock a mutex with an optional timeout and additional options. See
|
||||
// _PyLockFlags for details.
|
||||
extern PyLockStatus
|
||||
_PyMutex_LockTimed(PyMutex *m, _PyTime_t timeout_ns, _PyLockFlags flags);
|
||||
|
||||
// Unlock a mutex, returns 0 if the mutex is not locked (used for improved
|
||||
// error messages).
|
||||
extern int _PyMutex_TryUnlock(PyMutex *m);
|
||||
|
||||
|
||||
// PyEvent is a one-time event notification
|
||||
typedef struct {
|
||||
uint8_t v;
|
||||
} PyEvent;
|
||||
|
||||
// Set the event and notify any waiting threads.
|
||||
// Export for '_testinternalcapi' shared extension
|
||||
PyAPI_FUNC(void) _PyEvent_Notify(PyEvent *evt);
|
||||
|
||||
// Wait for the event to be set. If the event is already set, then this returns
|
||||
// immediately.
|
||||
PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);
|
||||
|
||||
// Wait for the event to be set, or until the timeout expires. If the event is
|
||||
// already set, then this returns immediately. Returns 1 if the event was set,
|
||||
// and 0 if the timeout expired or thread was interrupted.
|
||||
PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, _PyTime_t timeout_ns);
|
||||
|
||||
|
||||
// _PyRawMutex implements a word-sized mutex that that does not depend on the
|
||||
// parking lot API, and therefore can be used in the parking lot
|
||||
// implementation.
|
||||
//
|
||||
// The mutex uses a packed representation: the least significant bit is used to
|
||||
// indicate whether the mutex is locked or not. The remaining bits are either
|
||||
// zero or a pointer to a `struct raw_mutex_entry` (see lock.c).
|
||||
typedef struct {
|
||||
uintptr_t v;
|
||||
} _PyRawMutex;
|
||||
|
||||
// Slow paths for lock/unlock
|
||||
extern void _PyRawMutex_LockSlow(_PyRawMutex *m);
|
||||
extern void _PyRawMutex_UnlockSlow(_PyRawMutex *m);
|
||||
|
||||
static inline void
|
||||
_PyRawMutex_Lock(_PyRawMutex *m)
|
||||
{
|
||||
uintptr_t unlocked = _Py_UNLOCKED;
|
||||
if (_Py_atomic_compare_exchange_uintptr(&m->v, &unlocked, _Py_LOCKED)) {
|
||||
return;
|
||||
}
|
||||
_PyRawMutex_LockSlow(m);
|
||||
}
|
||||
|
||||
static inline void
|
||||
_PyRawMutex_Unlock(_PyRawMutex *m)
|
||||
{
|
||||
uintptr_t locked = _Py_LOCKED;
|
||||
if (_Py_atomic_compare_exchange_uintptr(&m->v, &locked, _Py_UNLOCKED)) {
|
||||
return;
|
||||
}
|
||||
_PyRawMutex_UnlockSlow(m);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_LOCK_H */
|
99
Include/internal/pycore_parking_lot.h
Normal file
99
Include/internal/pycore_parking_lot.h
Normal file
@ -0,0 +1,99 @@
|
||||
// ParkingLot is an internal API for building efficient synchronization
|
||||
// primitives like mutexes and events.
|
||||
//
|
||||
// The API and name is inspired by WebKit's WTF::ParkingLot, which in turn
|
||||
// is inspired Linux's futex API.
|
||||
// See https://webkit.org/blog/6161/locking-in-webkit/.
|
||||
//
|
||||
// The core functionality is an atomic "compare-and-sleep" operation along with
|
||||
// an atomic "wake-up" operation.
|
||||
|
||||
#ifndef Py_INTERNAL_PARKING_LOT_H
|
||||
#define Py_INTERNAL_PARKING_LOT_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#include "pycore_time.h" // _PyTime_t
|
||||
|
||||
|
||||
enum {
|
||||
// The thread was unparked by another thread.
|
||||
Py_PARK_OK = 0,
|
||||
|
||||
// The value of `address` did not match `expected`.
|
||||
Py_PARK_AGAIN = -1,
|
||||
|
||||
// The thread was unparked due to a timeout.
|
||||
Py_PARK_TIMEOUT = -2,
|
||||
|
||||
// The thread was interrupted by a signal.
|
||||
Py_PARK_INTR = -3,
|
||||
};
|
||||
|
||||
// Checks that `*address == *expected` and puts the thread to sleep until an
|
||||
// unpark operation is called on the same `address`. Otherwise, the function
|
||||
// returns `Py_PARK_AGAIN`. The comparison behaves like memcmp, but is
|
||||
// performed atomically with respect to unpark operations.
|
||||
//
|
||||
// The `address_size` argument is the size of the data pointed to by the
|
||||
// `address` and `expected` pointers (i.e., sizeof(*address)). It must be
|
||||
// 1, 2, 4, or 8.
|
||||
//
|
||||
// The `timeout_ns` argument specifies the maximum amount of time to wait, with
|
||||
// -1 indicating an infinite wait.
|
||||
//
|
||||
// `park_arg`, which can be NULL, is passed to the unpark operation.
|
||||
//
|
||||
// If `detach` is true, then the thread will detach/release the GIL while
|
||||
// waiting.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// if (_Py_atomic_compare_exchange_uint8(address, &expected, new_value)) {
|
||||
// int res = _PyParkingLot_Park(address, &new_value, sizeof(*address),
|
||||
// timeout_ns, NULL, 1);
|
||||
// ...
|
||||
// }
|
||||
PyAPI_FUNC(int)
|
||||
_PyParkingLot_Park(const void *address, const void *expected,
|
||||
size_t address_size, _PyTime_t timeout_ns,
|
||||
void *park_arg, int detach);
|
||||
|
||||
// Callback for _PyParkingLot_Unpark:
|
||||
//
|
||||
// `arg` is the data of the same name provided to the _PyParkingLot_Unpark()
|
||||
// call.
|
||||
// `park_arg` is the data provided to _PyParkingLot_Park() call or NULL if
|
||||
// no waiting thread was found.
|
||||
// `has_more_waiters` is true if there are more threads waiting on the same
|
||||
// address. May be true in cases where threads are waiting on a different
|
||||
// address that map to the same internal bucket.
|
||||
typedef void _Py_unpark_fn_t(void *arg, void *park_arg, int has_more_waiters);
|
||||
|
||||
// Unparks a single thread waiting on `address`.
|
||||
//
|
||||
// Note that fn() is called regardless of whether a thread was unparked. If
|
||||
// no threads are waiting on `address` then the `park_arg` argument to fn()
|
||||
// will be NULL.
|
||||
//
|
||||
// Example usage:
|
||||
// void callback(void *arg, void *park_arg, int has_more_waiters);
|
||||
// _PyParkingLot_Unpark(address, &callback, arg);
|
||||
PyAPI_FUNC(void)
|
||||
_PyParkingLot_Unpark(const void *address, _Py_unpark_fn_t *fn, void *arg);
|
||||
|
||||
// Unparks all threads waiting on `address`.
|
||||
PyAPI_FUNC(void) _PyParkingLot_UnparkAll(const void *address);
|
||||
|
||||
// Resets the parking lot state after a fork. Forgets all parked threads.
|
||||
PyAPI_FUNC(void) _PyParkingLot_AfterFork(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_PARKING_LOT_H */
|
63
Include/internal/pycore_semaphore.h
Normal file
63
Include/internal/pycore_semaphore.h
Normal file
@ -0,0 +1,63 @@
|
||||
// The _PySemaphore API a simplified cross-platform semaphore used to implement
|
||||
// wakeup/sleep.
|
||||
#ifndef Py_INTERNAL_SEMAPHORE_H
|
||||
#define Py_INTERNAL_SEMAPHORE_H
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#include "pycore_time.h" // _PyTime_t
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# include <windows.h>
|
||||
#elif defined(HAVE_PTHREAD_H)
|
||||
# include <pthread.h>
|
||||
#elif defined(HAVE_PTHREAD_STUBS)
|
||||
# include "cpython/pthread_stubs.h"
|
||||
#else
|
||||
# error "Require native threads. See https://bugs.python.org/issue31370"
|
||||
#endif
|
||||
|
||||
#if defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES+0) != -1
|
||||
# define _Py_USE_SEMAPHORES
|
||||
# include <semaphore.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _PySemaphore {
|
||||
#if defined(MS_WINDOWS)
|
||||
HANDLE platform_sem;
|
||||
#elif defined(_Py_USE_SEMAPHORES)
|
||||
sem_t platform_sem;
|
||||
#else
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t cond;
|
||||
int counter;
|
||||
#endif
|
||||
} _PySemaphore;
|
||||
|
||||
// Puts the current thread to sleep until _PySemaphore_Wakeup() is called.
|
||||
// If `detach` is true, then the thread will detach/release the GIL while
|
||||
// sleeping.
|
||||
PyAPI_FUNC(int)
|
||||
_PySemaphore_Wait(_PySemaphore *sema, _PyTime_t timeout_ns, int detach);
|
||||
|
||||
// Wakes up a single thread waiting on sema. Note that _PySemaphore_Wakeup()
|
||||
// can be called before _PySemaphore_Wait().
|
||||
PyAPI_FUNC(void)
|
||||
_PySemaphore_Wakeup(_PySemaphore *sema);
|
||||
|
||||
// Initializes/destroys a semaphore
|
||||
PyAPI_FUNC(void) _PySemaphore_Init(_PySemaphore *sema);
|
||||
PyAPI_FUNC(void) _PySemaphore_Destroy(_PySemaphore *sema);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_SEMAPHORE_H */
|
16
Include/pyatomic.h
Normal file
16
Include/pyatomic.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef Py_ATOMIC_H
|
||||
#define Py_ATOMIC_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
# define Py_CPYTHON_ATOMIC_H
|
||||
# include "cpython/pyatomic.h"
|
||||
# undef Py_CPYTHON_ATOMIC_H
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_ATOMIC_H */
|
@ -2085,7 +2085,15 @@ class Test_testcapi(unittest.TestCase):
|
||||
class Test_testinternalcapi(unittest.TestCase):
|
||||
locals().update((name, getattr(_testinternalcapi, name))
|
||||
for name in dir(_testinternalcapi)
|
||||
if name.startswith('test_'))
|
||||
if name.startswith('test_')
|
||||
and not name.startswith('test_lock_'))
|
||||
|
||||
|
||||
@threading_helper.requires_working_threading()
|
||||
class Test_PyLock(unittest.TestCase):
|
||||
locals().update((name, getattr(_testinternalcapi, name))
|
||||
for name in dir(_testinternalcapi)
|
||||
if name.startswith('test_lock_'))
|
||||
|
||||
|
||||
@unittest.skipIf(_testmultiphase is None, "test requires _testmultiphase module")
|
||||
|
@ -400,12 +400,14 @@ PYTHON_OBJS= \
|
||||
Python/instrumentation.o \
|
||||
Python/intrinsics.o \
|
||||
Python/legacy_tracing.o \
|
||||
Python/lock.o \
|
||||
Python/marshal.o \
|
||||
Python/modsupport.o \
|
||||
Python/mysnprintf.o \
|
||||
Python/mystrtoul.o \
|
||||
Python/optimizer.o \
|
||||
Python/optimizer_analysis.o \
|
||||
Python/parking_lot.o \
|
||||
Python/pathconfig.o \
|
||||
Python/preconfig.o \
|
||||
Python/pyarena.o \
|
||||
@ -1779,6 +1781,8 @@ PYTHON_HEADERS= \
|
||||
$(srcdir)/Include/internal/pycore_interp.h \
|
||||
$(srcdir)/Include/internal/pycore_intrinsics.h \
|
||||
$(srcdir)/Include/internal/pycore_list.h \
|
||||
$(srcdir)/Include/internal/pycore_llist.h \
|
||||
$(srcdir)/Include/internal/pycore_lock.h \
|
||||
$(srcdir)/Include/internal/pycore_long.h \
|
||||
$(srcdir)/Include/internal/pycore_modsupport.h \
|
||||
$(srcdir)/Include/internal/pycore_moduleobject.h \
|
||||
@ -1790,6 +1794,7 @@ PYTHON_HEADERS= \
|
||||
$(srcdir)/Include/internal/pycore_opcode_metadata.h \
|
||||
$(srcdir)/Include/internal/pycore_opcode_utils.h \
|
||||
$(srcdir)/Include/internal/pycore_optimizer.h \
|
||||
$(srcdir)/Include/internal/pycore_parking_lot.h \
|
||||
$(srcdir)/Include/internal/pycore_pathconfig.h \
|
||||
$(srcdir)/Include/internal/pycore_pyarena.h \
|
||||
$(srcdir)/Include/internal/pycore_pyerrors.h \
|
||||
@ -1805,6 +1810,7 @@ PYTHON_HEADERS= \
|
||||
$(srcdir)/Include/internal/pycore_runtime.h \
|
||||
$(srcdir)/Include/internal/pycore_runtime_init_generated.h \
|
||||
$(srcdir)/Include/internal/pycore_runtime_init.h \
|
||||
$(srcdir)/Include/internal/pycore_semaphore.h \
|
||||
$(srcdir)/Include/internal/pycore_setobject.h \
|
||||
$(srcdir)/Include/internal/pycore_signal.h \
|
||||
$(srcdir)/Include/internal/pycore_sliceobject.h \
|
||||
|
@ -0,0 +1 @@
|
||||
Add :c:type:`PyMutex` internal-only lightweight locking API.
|
@ -158,7 +158,7 @@
|
||||
@MODULE_XXSUBTYPE_TRUE@xxsubtype xxsubtype.c
|
||||
@MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c
|
||||
@MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
|
||||
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/pytime.c
|
||||
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c
|
||||
@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/pyos.c _testcapi/immortal.c _testcapi/heaptype_relative.c _testcapi/gc.c
|
||||
@MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c
|
||||
@MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c
|
||||
|
@ -8,7 +8,6 @@
|
||||
#undef NDEBUG
|
||||
|
||||
#include "Python.h"
|
||||
#include "cpython/pyatomic.h"
|
||||
#include "parts.h"
|
||||
|
||||
// We define atomic bitwise operations on these types
|
||||
|
@ -1543,6 +1543,9 @@ static PyMethodDef module_functions[] = {
|
||||
static int
|
||||
module_exec(PyObject *module)
|
||||
{
|
||||
if (_PyTestInternalCapi_Init_Lock(module) < 0) {
|
||||
return 1;
|
||||
}
|
||||
if (_PyTestInternalCapi_Init_PyTime(module) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
74
Modules/_testinternalcapi/clinic/test_lock.c.h
generated
Normal file
74
Modules/_testinternalcapi/clinic/test_lock.c.h
generated
Normal file
@ -0,0 +1,74 @@
|
||||
/*[clinic input]
|
||||
preserve
|
||||
[clinic start generated code]*/
|
||||
|
||||
#include "pycore_abstract.h" // _PyNumber_Index()
|
||||
|
||||
PyDoc_STRVAR(_testinternalcapi_benchmark_locks__doc__,
|
||||
"benchmark_locks($module, num_threads, use_pymutex=True,\n"
|
||||
" critical_section_length=1, time_ms=1000, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF \
|
||||
{"benchmark_locks", _PyCFunction_CAST(_testinternalcapi_benchmark_locks), METH_FASTCALL, _testinternalcapi_benchmark_locks__doc__},
|
||||
|
||||
static PyObject *
|
||||
_testinternalcapi_benchmark_locks_impl(PyObject *module,
|
||||
Py_ssize_t num_threads,
|
||||
int use_pymutex,
|
||||
int critical_section_length,
|
||||
int time_ms);
|
||||
|
||||
static PyObject *
|
||||
_testinternalcapi_benchmark_locks(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
Py_ssize_t num_threads;
|
||||
int use_pymutex = 1;
|
||||
int critical_section_length = 1;
|
||||
int time_ms = 1000;
|
||||
|
||||
if (!_PyArg_CheckPositional("benchmark_locks", nargs, 1, 4)) {
|
||||
goto exit;
|
||||
}
|
||||
{
|
||||
Py_ssize_t ival = -1;
|
||||
PyObject *iobj = _PyNumber_Index(args[0]);
|
||||
if (iobj != NULL) {
|
||||
ival = PyLong_AsSsize_t(iobj);
|
||||
Py_DECREF(iobj);
|
||||
}
|
||||
if (ival == -1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
num_threads = ival;
|
||||
}
|
||||
if (nargs < 2) {
|
||||
goto skip_optional;
|
||||
}
|
||||
use_pymutex = PyObject_IsTrue(args[1]);
|
||||
if (use_pymutex < 0) {
|
||||
goto exit;
|
||||
}
|
||||
if (nargs < 3) {
|
||||
goto skip_optional;
|
||||
}
|
||||
critical_section_length = PyLong_AsInt(args[2]);
|
||||
if (critical_section_length == -1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
if (nargs < 4) {
|
||||
goto skip_optional;
|
||||
}
|
||||
time_ms = PyLong_AsInt(args[3]);
|
||||
if (time_ms == -1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
skip_optional:
|
||||
return_value = _testinternalcapi_benchmark_locks_impl(module, num_threads, use_pymutex, critical_section_length, time_ms);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=97c85dff601fed4b input=a9049054013a1b77]*/
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
int _PyTestInternalCapi_Init_Lock(PyObject *module);
|
||||
int _PyTestInternalCapi_Init_PyTime(PyObject *module);
|
||||
|
||||
#endif // Py_TESTINTERNALCAPI_PARTS_H
|
||||
|
353
Modules/_testinternalcapi/test_lock.c
Normal file
353
Modules/_testinternalcapi/test_lock.c
Normal file
@ -0,0 +1,353 @@
|
||||
// C Extension module to test pycore_lock.h API
|
||||
|
||||
#include "parts.h"
|
||||
|
||||
#include "pycore_lock.h"
|
||||
#include "clinic/test_lock.c.h"
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <unistd.h> // usleep()
|
||||
#endif
|
||||
|
||||
/*[clinic input]
|
||||
module _testinternalcapi
|
||||
[clinic start generated code]*/
|
||||
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7bb583d8c9eb9a78]*/
|
||||
|
||||
|
||||
static void
|
||||
pysleep(int ms)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
Sleep(ms);
|
||||
#else
|
||||
usleep(ms * 1000);
|
||||
#endif
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_lock_basic(PyObject *self, PyObject *obj)
|
||||
{
|
||||
PyMutex m = (PyMutex){0};
|
||||
|
||||
// uncontended lock and unlock
|
||||
PyMutex_Lock(&m);
|
||||
assert(m.v == 1);
|
||||
PyMutex_Unlock(&m);
|
||||
assert(m.v == 0);
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
struct test_lock2_data {
|
||||
PyMutex m;
|
||||
PyEvent done;
|
||||
int started;
|
||||
};
|
||||
|
||||
static void
|
||||
lock_thread(void *arg)
|
||||
{
|
||||
struct test_lock2_data *test_data = arg;
|
||||
PyMutex *m = &test_data->m;
|
||||
_Py_atomic_store_int(&test_data->started, 1);
|
||||
|
||||
PyMutex_Lock(m);
|
||||
assert(m->v == 1);
|
||||
|
||||
PyMutex_Unlock(m);
|
||||
assert(m->v == 0);
|
||||
|
||||
_PyEvent_Notify(&test_data->done);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_lock_two_threads(PyObject *self, PyObject *obj)
|
||||
{
|
||||
// lock attempt by two threads
|
||||
struct test_lock2_data test_data;
|
||||
memset(&test_data, 0, sizeof(test_data));
|
||||
|
||||
PyMutex_Lock(&test_data.m);
|
||||
assert(test_data.m.v == 1);
|
||||
|
||||
PyThread_start_new_thread(lock_thread, &test_data);
|
||||
while (!_Py_atomic_load_int(&test_data.started)) {
|
||||
pysleep(10);
|
||||
}
|
||||
pysleep(10); // allow some time for the other thread to try to lock
|
||||
assert(test_data.m.v == 3);
|
||||
|
||||
PyMutex_Unlock(&test_data.m);
|
||||
PyEvent_Wait(&test_data.done);
|
||||
assert(test_data.m.v == 0);
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
#define COUNTER_THREADS 5
|
||||
#define COUNTER_ITERS 10000
|
||||
|
||||
struct test_data_counter {
|
||||
PyMutex m;
|
||||
Py_ssize_t counter;
|
||||
};
|
||||
|
||||
struct thread_data_counter {
|
||||
struct test_data_counter *test_data;
|
||||
PyEvent done_event;
|
||||
};
|
||||
|
||||
static void
|
||||
counter_thread(void *arg)
|
||||
{
|
||||
struct thread_data_counter *thread_data = arg;
|
||||
struct test_data_counter *test_data = thread_data->test_data;
|
||||
|
||||
for (Py_ssize_t i = 0; i < COUNTER_ITERS; i++) {
|
||||
PyMutex_Lock(&test_data->m);
|
||||
test_data->counter++;
|
||||
PyMutex_Unlock(&test_data->m);
|
||||
}
|
||||
_PyEvent_Notify(&thread_data->done_event);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_lock_counter(PyObject *self, PyObject *obj)
|
||||
{
|
||||
// Test with rapidly locking and unlocking mutex
|
||||
struct test_data_counter test_data;
|
||||
memset(&test_data, 0, sizeof(test_data));
|
||||
|
||||
struct thread_data_counter thread_data[COUNTER_THREADS];
|
||||
memset(&thread_data, 0, sizeof(thread_data));
|
||||
|
||||
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
|
||||
thread_data[i].test_data = &test_data;
|
||||
PyThread_start_new_thread(counter_thread, &thread_data[i]);
|
||||
}
|
||||
|
||||
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
|
||||
PyEvent_Wait(&thread_data[i].done_event);
|
||||
}
|
||||
|
||||
assert(test_data.counter == COUNTER_THREADS * COUNTER_ITERS);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
#define SLOW_COUNTER_ITERS 100
|
||||
|
||||
static void
|
||||
slow_counter_thread(void *arg)
|
||||
{
|
||||
struct thread_data_counter *thread_data = arg;
|
||||
struct test_data_counter *test_data = thread_data->test_data;
|
||||
|
||||
for (Py_ssize_t i = 0; i < SLOW_COUNTER_ITERS; i++) {
|
||||
PyMutex_Lock(&test_data->m);
|
||||
if (i % 7 == 0) {
|
||||
pysleep(2);
|
||||
}
|
||||
test_data->counter++;
|
||||
PyMutex_Unlock(&test_data->m);
|
||||
}
|
||||
_PyEvent_Notify(&thread_data->done_event);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_lock_counter_slow(PyObject *self, PyObject *obj)
|
||||
{
|
||||
// Test lock/unlock with occasional "long" critical section, which will
|
||||
// trigger handoff of the lock.
|
||||
struct test_data_counter test_data;
|
||||
memset(&test_data, 0, sizeof(test_data));
|
||||
|
||||
struct thread_data_counter thread_data[COUNTER_THREADS];
|
||||
memset(&thread_data, 0, sizeof(thread_data));
|
||||
|
||||
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
|
||||
thread_data[i].test_data = &test_data;
|
||||
PyThread_start_new_thread(slow_counter_thread, &thread_data[i]);
|
||||
}
|
||||
|
||||
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
|
||||
PyEvent_Wait(&thread_data[i].done_event);
|
||||
}
|
||||
|
||||
assert(test_data.counter == COUNTER_THREADS * SLOW_COUNTER_ITERS);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
struct bench_data_locks {
|
||||
int stop;
|
||||
int use_pymutex;
|
||||
int critical_section_length;
|
||||
char padding[200];
|
||||
PyThread_type_lock lock;
|
||||
PyMutex m;
|
||||
double value;
|
||||
Py_ssize_t total_iters;
|
||||
};
|
||||
|
||||
struct bench_thread_data {
|
||||
struct bench_data_locks *bench_data;
|
||||
Py_ssize_t iters;
|
||||
PyEvent done;
|
||||
};
|
||||
|
||||
static void
|
||||
thread_benchmark_locks(void *arg)
|
||||
{
|
||||
struct bench_thread_data *thread_data = arg;
|
||||
struct bench_data_locks *bench_data = thread_data->bench_data;
|
||||
int use_pymutex = bench_data->use_pymutex;
|
||||
int critical_section_length = bench_data->critical_section_length;
|
||||
|
||||
double my_value = 1.0;
|
||||
Py_ssize_t iters = 0;
|
||||
while (!_Py_atomic_load_int_relaxed(&bench_data->stop)) {
|
||||
if (use_pymutex) {
|
||||
PyMutex_Lock(&bench_data->m);
|
||||
for (int i = 0; i < critical_section_length; i++) {
|
||||
bench_data->value += my_value;
|
||||
my_value = bench_data->value;
|
||||
}
|
||||
PyMutex_Unlock(&bench_data->m);
|
||||
}
|
||||
else {
|
||||
PyThread_acquire_lock(bench_data->lock, 1);
|
||||
for (int i = 0; i < critical_section_length; i++) {
|
||||
bench_data->value += my_value;
|
||||
my_value = bench_data->value;
|
||||
}
|
||||
PyThread_release_lock(bench_data->lock);
|
||||
}
|
||||
iters++;
|
||||
}
|
||||
|
||||
thread_data->iters = iters;
|
||||
_Py_atomic_add_ssize(&bench_data->total_iters, iters);
|
||||
_PyEvent_Notify(&thread_data->done);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_testinternalcapi.benchmark_locks
|
||||
|
||||
num_threads: Py_ssize_t
|
||||
use_pymutex: bool = True
|
||||
critical_section_length: int = 1
|
||||
time_ms: int = 1000
|
||||
/
|
||||
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_testinternalcapi_benchmark_locks_impl(PyObject *module,
|
||||
Py_ssize_t num_threads,
|
||||
int use_pymutex,
|
||||
int critical_section_length,
|
||||
int time_ms)
|
||||
/*[clinic end generated code: output=381df8d7e9a74f18 input=f3aeaf688738c121]*/
|
||||
{
|
||||
// Run from Tools/lockbench/lockbench.py
|
||||
// Based on the WebKit lock benchmarks:
|
||||
// https://github.com/WebKit/WebKit/blob/main/Source/WTF/benchmarks/LockSpeedTest.cpp
|
||||
// See also https://webkit.org/blog/6161/locking-in-webkit/
|
||||
PyObject *thread_iters = NULL;
|
||||
PyObject *res = NULL;
|
||||
|
||||
struct bench_data_locks bench_data;
|
||||
memset(&bench_data, 0, sizeof(bench_data));
|
||||
bench_data.use_pymutex = use_pymutex;
|
||||
bench_data.critical_section_length = critical_section_length;
|
||||
|
||||
bench_data.lock = PyThread_allocate_lock();
|
||||
if (bench_data.lock == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
struct bench_thread_data *thread_data = NULL;
|
||||
thread_data = PyMem_Calloc(num_threads, sizeof(*thread_data));
|
||||
if (thread_data == NULL) {
|
||||
PyErr_NoMemory();
|
||||
goto exit;
|
||||
}
|
||||
|
||||
thread_iters = PyList_New(num_threads);
|
||||
if (thread_iters == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
_PyTime_t start = _PyTime_GetMonotonicClock();
|
||||
|
||||
for (Py_ssize_t i = 0; i < num_threads; i++) {
|
||||
thread_data[i].bench_data = &bench_data;
|
||||
PyThread_start_new_thread(thread_benchmark_locks, &thread_data[i]);
|
||||
}
|
||||
|
||||
// Let the threads run for `time_ms` milliseconds
|
||||
pysleep(time_ms);
|
||||
_Py_atomic_store_int(&bench_data.stop, 1);
|
||||
|
||||
// Wait for the threads to finish
|
||||
for (Py_ssize_t i = 0; i < num_threads; i++) {
|
||||
PyEvent_Wait(&thread_data[i].done);
|
||||
}
|
||||
|
||||
Py_ssize_t total_iters = bench_data.total_iters;
|
||||
_PyTime_t end = _PyTime_GetMonotonicClock();
|
||||
|
||||
// Return the total number of acquisitions and the number of acquisitions
|
||||
// for each thread.
|
||||
for (Py_ssize_t i = 0; i < num_threads; i++) {
|
||||
PyObject *iter = PyLong_FromSsize_t(thread_data[i].iters);
|
||||
if (iter == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
PyList_SET_ITEM(thread_iters, i, iter);
|
||||
}
|
||||
|
||||
double rate = total_iters * 1000000000.0 / (end - start);
|
||||
res = Py_BuildValue("(dO)", rate, thread_iters);
|
||||
|
||||
exit:
|
||||
PyThread_free_lock(bench_data.lock);
|
||||
PyMem_Free(thread_data);
|
||||
Py_XDECREF(thread_iters);
|
||||
return res;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_lock_benchmark(PyObject *module, PyObject *obj)
|
||||
{
|
||||
// Just make sure the benchmark runs without crashing
|
||||
PyObject *res = _testinternalcapi_benchmark_locks_impl(
|
||||
module, 1, 1, 1, 100);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
Py_DECREF(res);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyMethodDef test_methods[] = {
|
||||
{"test_lock_basic", test_lock_basic, METH_NOARGS},
|
||||
{"test_lock_two_threads", test_lock_two_threads, METH_NOARGS},
|
||||
{"test_lock_counter", test_lock_counter, METH_NOARGS},
|
||||
{"test_lock_counter_slow", test_lock_counter_slow, METH_NOARGS},
|
||||
_TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF
|
||||
{"test_lock_benchmark", test_lock_benchmark, METH_NOARGS},
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
int
|
||||
_PyTestInternalCapi_Init_Lock(PyObject *mod)
|
||||
{
|
||||
if (PyModule_AddFunctions(mod, test_methods) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -95,6 +95,7 @@
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\Modules\_testinternalcapi.c" />
|
||||
<ClCompile Include="..\Modules\_testinternalcapi\pytime.c" />
|
||||
<ClCompile Include="..\Modules\_testinternalcapi\test_lock.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ResourceCompile Include="..\PC\python_nt.rc" />
|
||||
|
@ -15,6 +15,9 @@
|
||||
<ClCompile Include="..\Modules\_testinternalcapi\pytime.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Modules\_testinternalcapi\test_lock.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ResourceCompile Include="..\PC\python_nt.rc">
|
||||
|
@ -245,6 +245,8 @@
|
||||
<ClInclude Include="..\Include\internal\pycore_interp.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_intrinsics.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_list.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_llist.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_lock.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_long.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_modsupport.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_moduleobject.h" />
|
||||
@ -254,6 +256,7 @@
|
||||
<ClInclude Include="..\Include\internal\pycore_obmalloc.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_obmalloc_init.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_optimizer.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_parking_lot.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_pathconfig.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_pyarena.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_pyerrors.h" />
|
||||
@ -269,6 +272,7 @@
|
||||
<ClInclude Include="..\Include\internal\pycore_runtime.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_runtime_init.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_runtime_init_generated.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_semaphore.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_setobject.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_signal.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_sliceobject.h" />
|
||||
@ -307,6 +311,7 @@
|
||||
<ClInclude Include="..\Include\osmodule.h" />
|
||||
<ClInclude Include="..\Include\patchlevel.h" />
|
||||
<ClInclude Include="..\Include\py_curses.h" />
|
||||
<ClInclude Include="..\Include\pyatomic.h" />
|
||||
<ClInclude Include="..\Include\pybuffer.h" />
|
||||
<ClInclude Include="..\Include\pycapsule.h" />
|
||||
<ClInclude Include="..\Include\pyerrors.h" />
|
||||
@ -552,12 +557,14 @@
|
||||
<ClCompile Include="..\Python\intrinsics.c" />
|
||||
<ClCompile Include="..\Python\instrumentation.c" />
|
||||
<ClCompile Include="..\Python\legacy_tracing.c" />
|
||||
<ClCompile Include="..\Python\lock.c" />
|
||||
<ClCompile Include="..\Python\marshal.c" />
|
||||
<ClCompile Include="..\Python\modsupport.c" />
|
||||
<ClCompile Include="..\Python\mysnprintf.c" />
|
||||
<ClCompile Include="..\Python\mystrtoul.c" />
|
||||
<ClCompile Include="..\Python\optimizer.c" />
|
||||
<ClCompile Include="..\Python\optimizer_analysis.c" />
|
||||
<ClCompile Include="..\Python\parking_lot.c" />
|
||||
<ClCompile Include="..\Python\pathconfig.c" />
|
||||
<ClCompile Include="..\Python\perf_trampoline.c" />
|
||||
<ClCompile Include="..\Python\preconfig.c" />
|
||||
|
@ -144,6 +144,9 @@
|
||||
<ClInclude Include="..\Include\py_curses.h">
|
||||
<Filter>Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\pyatomic.h">
|
||||
<Filter>Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\pybuffer.h">
|
||||
<Filter>Include</Filter>
|
||||
</ClInclude>
|
||||
@ -645,6 +648,12 @@
|
||||
<ClInclude Include="..\Include\internal\pycore_list.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_llist.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_lock.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_long.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
@ -672,6 +681,9 @@
|
||||
<ClInclude Include="..\Include\internal\pycore_optimizer.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_parking_lot.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_pathconfig.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
@ -717,6 +729,9 @@
|
||||
<ClInclude Include="..\Include\internal\pycore_runtime_init_generated.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_semaphore.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_setobject.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
@ -1241,6 +1256,9 @@
|
||||
<ClCompile Include="..\Python\legacy_tracing.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\lock.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\marshal.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
@ -1259,6 +1277,9 @@
|
||||
<ClCompile Include="..\Python\optimizer_analysis.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\parking_lot.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\pathconfig.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
|
297
Python/lock.c
Normal file
297
Python/lock.c
Normal file
@ -0,0 +1,297 @@
|
||||
// Lock implementation
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
#include "pycore_lock.h"
|
||||
#include "pycore_parking_lot.h"
|
||||
#include "pycore_semaphore.h"
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h> // SwitchToThread()
|
||||
#elif defined(HAVE_SCHED_H)
|
||||
#include <sched.h> // sched_yield()
|
||||
#endif
|
||||
|
||||
// If a thread waits on a lock for longer than TIME_TO_BE_FAIR_NS (1 ms), then
|
||||
// the unlocking thread directly hands off ownership of the lock. This avoids
|
||||
// starvation.
|
||||
static const _PyTime_t TIME_TO_BE_FAIR_NS = 1000*1000;
|
||||
|
||||
// Spin for a bit before parking the thread. This is only enabled for
|
||||
// `--disable-gil` builds because it is unlikely to be helpful if the GIL is
|
||||
// enabled.
|
||||
#if Py_NOGIL
|
||||
static const int MAX_SPIN_COUNT = 40;
|
||||
#else
|
||||
static const int MAX_SPIN_COUNT = 0;
|
||||
#endif
|
||||
|
||||
struct mutex_entry {
|
||||
// The time after which the unlocking thread should hand off lock ownership
|
||||
// directly to the waiting thread. Written by the waiting thread.
|
||||
_PyTime_t time_to_be_fair;
|
||||
|
||||
// Set to 1 if the lock was handed off. Written by the unlocking thread.
|
||||
int handed_off;
|
||||
};
|
||||
|
||||
static void
|
||||
_Py_yield(void)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
SwitchToThread();
|
||||
#elif defined(HAVE_SCHED_H)
|
||||
sched_yield();
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
_PyMutex_LockSlow(PyMutex *m)
|
||||
{
|
||||
_PyMutex_LockTimed(m, -1, _PY_LOCK_DETACH);
|
||||
}
|
||||
|
||||
PyLockStatus
|
||||
_PyMutex_LockTimed(PyMutex *m, _PyTime_t timeout, _PyLockFlags flags)
|
||||
{
|
||||
uint8_t v = _Py_atomic_load_uint8_relaxed(&m->v);
|
||||
if ((v & _Py_LOCKED) == 0) {
|
||||
if (_Py_atomic_compare_exchange_uint8(&m->v, &v, v|_Py_LOCKED)) {
|
||||
return PY_LOCK_ACQUIRED;
|
||||
}
|
||||
}
|
||||
else if (timeout == 0) {
|
||||
return PY_LOCK_FAILURE;
|
||||
}
|
||||
|
||||
_PyTime_t now = _PyTime_GetMonotonicClock();
|
||||
_PyTime_t endtime = 0;
|
||||
if (timeout > 0) {
|
||||
endtime = _PyTime_Add(now, timeout);
|
||||
}
|
||||
|
||||
struct mutex_entry entry = {
|
||||
.time_to_be_fair = now + TIME_TO_BE_FAIR_NS,
|
||||
.handed_off = 0,
|
||||
};
|
||||
|
||||
Py_ssize_t spin_count = 0;
|
||||
for (;;) {
|
||||
if ((v & _Py_LOCKED) == 0) {
|
||||
// The lock is unlocked. Try to grab it.
|
||||
if (_Py_atomic_compare_exchange_uint8(&m->v, &v, v|_Py_LOCKED)) {
|
||||
return PY_LOCK_ACQUIRED;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(v & _Py_HAS_PARKED) && spin_count < MAX_SPIN_COUNT) {
|
||||
// Spin for a bit.
|
||||
_Py_yield();
|
||||
spin_count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (timeout == 0) {
|
||||
return PY_LOCK_FAILURE;
|
||||
}
|
||||
|
||||
uint8_t newv = v;
|
||||
if (!(v & _Py_HAS_PARKED)) {
|
||||
// We are the first waiter. Set the _Py_HAS_PARKED flag.
|
||||
newv = v | _Py_HAS_PARKED;
|
||||
if (!_Py_atomic_compare_exchange_uint8(&m->v, &v, newv)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
int ret = _PyParkingLot_Park(&m->v, &newv, sizeof(newv), timeout,
|
||||
&entry, (flags & _PY_LOCK_DETACH) != 0);
|
||||
if (ret == Py_PARK_OK) {
|
||||
if (entry.handed_off) {
|
||||
// We own the lock now.
|
||||
assert(_Py_atomic_load_uint8_relaxed(&m->v) & _Py_LOCKED);
|
||||
return PY_LOCK_ACQUIRED;
|
||||
}
|
||||
}
|
||||
else if (ret == Py_PARK_INTR && (flags & _PY_LOCK_HANDLE_SIGNALS)) {
|
||||
if (Py_MakePendingCalls() < 0) {
|
||||
return PY_LOCK_INTR;
|
||||
}
|
||||
}
|
||||
else if (ret == Py_PARK_TIMEOUT) {
|
||||
assert(timeout >= 0);
|
||||
return PY_LOCK_FAILURE;
|
||||
}
|
||||
|
||||
if (timeout > 0) {
|
||||
timeout = _PyDeadline_Get(endtime);
|
||||
if (timeout <= 0) {
|
||||
// Avoid negative values because those mean block forever.
|
||||
timeout = 0;
|
||||
}
|
||||
}
|
||||
|
||||
v = _Py_atomic_load_uint8_relaxed(&m->v);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
mutex_unpark(PyMutex *m, struct mutex_entry *entry, int has_more_waiters)
|
||||
{
|
||||
uint8_t v = 0;
|
||||
if (entry) {
|
||||
_PyTime_t now = _PyTime_GetMonotonicClock();
|
||||
int should_be_fair = now > entry->time_to_be_fair;
|
||||
|
||||
entry->handed_off = should_be_fair;
|
||||
if (should_be_fair) {
|
||||
v |= _Py_LOCKED;
|
||||
}
|
||||
if (has_more_waiters) {
|
||||
v |= _Py_HAS_PARKED;
|
||||
}
|
||||
}
|
||||
_Py_atomic_store_uint8(&m->v, v);
|
||||
}
|
||||
|
||||
int
|
||||
_PyMutex_TryUnlock(PyMutex *m)
|
||||
{
|
||||
uint8_t v = _Py_atomic_load_uint8(&m->v);
|
||||
for (;;) {
|
||||
if ((v & _Py_LOCKED) == 0) {
|
||||
// error: the mutex is not locked
|
||||
return -1;
|
||||
}
|
||||
else if ((v & _Py_HAS_PARKED)) {
|
||||
// wake up a single thread
|
||||
_PyParkingLot_Unpark(&m->v, (_Py_unpark_fn_t *)mutex_unpark, m);
|
||||
return 0;
|
||||
}
|
||||
else if (_Py_atomic_compare_exchange_uint8(&m->v, &v, _Py_UNLOCKED)) {
|
||||
// fast-path: no waiters
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
_PyMutex_UnlockSlow(PyMutex *m)
|
||||
{
|
||||
if (_PyMutex_TryUnlock(m) < 0) {
|
||||
Py_FatalError("unlocking mutex that is not locked");
|
||||
}
|
||||
}
|
||||
|
||||
// _PyRawMutex stores a linked list of `struct raw_mutex_entry`, one for each
|
||||
// thread waiting on the mutex, directly in the mutex itself.
|
||||
struct raw_mutex_entry {
|
||||
struct raw_mutex_entry *next;
|
||||
_PySemaphore sema;
|
||||
};
|
||||
|
||||
void
|
||||
_PyRawMutex_LockSlow(_PyRawMutex *m)
|
||||
{
|
||||
struct raw_mutex_entry waiter;
|
||||
_PySemaphore_Init(&waiter.sema);
|
||||
|
||||
uintptr_t v = _Py_atomic_load_uintptr(&m->v);
|
||||
for (;;) {
|
||||
if ((v & _Py_LOCKED) == 0) {
|
||||
// Unlocked: try to grab it (even if it has a waiter).
|
||||
if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, v|_Py_LOCKED)) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Locked: try to add ourselves as a waiter.
|
||||
waiter.next = (struct raw_mutex_entry *)(v & ~1);
|
||||
uintptr_t desired = ((uintptr_t)&waiter)|_Py_LOCKED;
|
||||
if (!_Py_atomic_compare_exchange_uintptr(&m->v, &v, desired)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Wait for us to be woken up. Note that we still have to lock the
|
||||
// mutex ourselves: it is NOT handed off to us.
|
||||
_PySemaphore_Wait(&waiter.sema, -1, /*detach=*/0);
|
||||
}
|
||||
|
||||
_PySemaphore_Destroy(&waiter.sema);
|
||||
}
|
||||
|
||||
void
|
||||
_PyRawMutex_UnlockSlow(_PyRawMutex *m)
|
||||
{
|
||||
uintptr_t v = _Py_atomic_load_uintptr(&m->v);
|
||||
for (;;) {
|
||||
if ((v & _Py_LOCKED) == 0) {
|
||||
Py_FatalError("unlocking mutex that is not locked");
|
||||
}
|
||||
|
||||
struct raw_mutex_entry *waiter = (struct raw_mutex_entry *)(v & ~1);
|
||||
if (waiter) {
|
||||
uintptr_t next_waiter = (uintptr_t)waiter->next;
|
||||
if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, next_waiter)) {
|
||||
_PySemaphore_Wakeup(&waiter->sema);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, _Py_UNLOCKED)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
_PyEvent_Notify(PyEvent *evt)
|
||||
{
|
||||
uintptr_t v = _Py_atomic_exchange_uint8(&evt->v, _Py_LOCKED);
|
||||
if (v == _Py_UNLOCKED) {
|
||||
// no waiters
|
||||
return;
|
||||
}
|
||||
else if (v == _Py_LOCKED) {
|
||||
// event already set
|
||||
return;
|
||||
}
|
||||
else {
|
||||
assert(v == _Py_HAS_PARKED);
|
||||
_PyParkingLot_UnparkAll(&evt->v);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PyEvent_Wait(PyEvent *evt)
|
||||
{
|
||||
while (!PyEvent_WaitTimed(evt, -1))
|
||||
;
|
||||
}
|
||||
|
||||
int
|
||||
PyEvent_WaitTimed(PyEvent *evt, _PyTime_t timeout_ns)
|
||||
{
|
||||
for (;;) {
|
||||
uint8_t v = _Py_atomic_load_uint8(&evt->v);
|
||||
if (v == _Py_LOCKED) {
|
||||
// event already set
|
||||
return 1;
|
||||
}
|
||||
if (v == _Py_UNLOCKED) {
|
||||
if (!_Py_atomic_compare_exchange_uint8(&evt->v, &v, _Py_HAS_PARKED)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t expected = _Py_HAS_PARKED;
|
||||
(void) _PyParkingLot_Park(&evt->v, &expected, sizeof(evt->v),
|
||||
timeout_ns, NULL, 1);
|
||||
|
||||
return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED;
|
||||
}
|
||||
}
|
370
Python/parking_lot.c
Normal file
370
Python/parking_lot.c
Normal file
@ -0,0 +1,370 @@
|
||||
#include "Python.h"
|
||||
|
||||
#include "pycore_llist.h"
|
||||
#include "pycore_lock.h" // _PyRawMutex
|
||||
#include "pycore_parking_lot.h"
|
||||
#include "pycore_pyerrors.h" // _Py_FatalErrorFormat
|
||||
#include "pycore_pystate.h" // _PyThreadState_GET
|
||||
#include "pycore_semaphore.h" // _PySemaphore
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
typedef struct {
|
||||
// The mutex protects the waiter queue and the num_waiters counter.
|
||||
_PyRawMutex mutex;
|
||||
|
||||
// Linked list of `struct wait_entry` waiters in this bucket.
|
||||
struct llist_node root;
|
||||
size_t num_waiters;
|
||||
} Bucket;
|
||||
|
||||
struct wait_entry {
|
||||
void *park_arg;
|
||||
uintptr_t addr;
|
||||
_PySemaphore sema;
|
||||
struct llist_node node;
|
||||
bool is_unparking;
|
||||
};
|
||||
|
||||
// Prime number to avoid correlations with memory addresses.
|
||||
// We want this to be roughly proportional to the number of CPU cores
|
||||
// to minimize contention on the bucket locks, but not too big to avoid
|
||||
// wasting memory. The exact choice does not matter much.
|
||||
#define NUM_BUCKETS 257
|
||||
|
||||
#define BUCKET_INIT(b, i) [i] = { .root = LLIST_INIT(b[i].root) }
|
||||
#define BUCKET_INIT_2(b, i) BUCKET_INIT(b, i), BUCKET_INIT(b, i+1)
|
||||
#define BUCKET_INIT_4(b, i) BUCKET_INIT_2(b, i), BUCKET_INIT_2(b, i+2)
|
||||
#define BUCKET_INIT_8(b, i) BUCKET_INIT_4(b, i), BUCKET_INIT_4(b, i+4)
|
||||
#define BUCKET_INIT_16(b, i) BUCKET_INIT_8(b, i), BUCKET_INIT_8(b, i+8)
|
||||
#define BUCKET_INIT_32(b, i) BUCKET_INIT_16(b, i), BUCKET_INIT_16(b, i+16)
|
||||
#define BUCKET_INIT_64(b, i) BUCKET_INIT_32(b, i), BUCKET_INIT_32(b, i+32)
|
||||
#define BUCKET_INIT_128(b, i) BUCKET_INIT_64(b, i), BUCKET_INIT_64(b, i+64)
|
||||
#define BUCKET_INIT_256(b, i) BUCKET_INIT_128(b, i), BUCKET_INIT_128(b, i+128)
|
||||
|
||||
// Table of waiters (hashed by address)
|
||||
static Bucket buckets[NUM_BUCKETS] = {
|
||||
BUCKET_INIT_256(buckets, 0),
|
||||
BUCKET_INIT(buckets, 256),
|
||||
};
|
||||
|
||||
void
|
||||
_PySemaphore_Init(_PySemaphore *sema)
|
||||
{
|
||||
#if defined(MS_WINDOWS)
|
||||
sema->platform_sem = CreateSemaphore(
|
||||
NULL, // attributes
|
||||
0, // initial count
|
||||
10, // maximum count
|
||||
NULL // unnamed
|
||||
);
|
||||
if (!sema->platform_sem) {
|
||||
Py_FatalError("parking_lot: CreateSemaphore failed");
|
||||
}
|
||||
#elif defined(_Py_USE_SEMAPHORES)
|
||||
if (sem_init(&sema->platform_sem, /*pshared=*/0, /*value=*/0) < 0) {
|
||||
Py_FatalError("parking_lot: sem_init failed");
|
||||
}
|
||||
#else
|
||||
if (pthread_mutex_init(&sema->mutex, NULL) != 0) {
|
||||
Py_FatalError("parking_lot: pthread_mutex_init failed");
|
||||
}
|
||||
if (pthread_cond_init(&sema->cond, NULL)) {
|
||||
Py_FatalError("parking_lot: pthread_cond_init failed");
|
||||
}
|
||||
sema->counter = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
_PySemaphore_Destroy(_PySemaphore *sema)
|
||||
{
|
||||
#if defined(MS_WINDOWS)
|
||||
CloseHandle(sema->platform_sem);
|
||||
#elif defined(_Py_USE_SEMAPHORES)
|
||||
sem_destroy(&sema->platform_sem);
|
||||
#else
|
||||
pthread_mutex_destroy(&sema->mutex);
|
||||
pthread_cond_destroy(&sema->cond);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
_PySemaphore_PlatformWait(_PySemaphore *sema, _PyTime_t timeout)
|
||||
{
|
||||
int res;
|
||||
#if defined(MS_WINDOWS)
|
||||
DWORD wait;
|
||||
DWORD millis = 0;
|
||||
if (timeout < 0) {
|
||||
millis = INFINITE;
|
||||
}
|
||||
else {
|
||||
millis = (DWORD) (timeout / 1000000);
|
||||
}
|
||||
wait = WaitForSingleObjectEx(sema->platform_sem, millis, FALSE);
|
||||
if (wait == WAIT_OBJECT_0) {
|
||||
res = Py_PARK_OK;
|
||||
}
|
||||
else if (wait == WAIT_TIMEOUT) {
|
||||
res = Py_PARK_TIMEOUT;
|
||||
}
|
||||
else {
|
||||
res = Py_PARK_INTR;
|
||||
}
|
||||
#elif defined(_Py_USE_SEMAPHORES)
|
||||
int err;
|
||||
if (timeout >= 0) {
|
||||
struct timespec ts;
|
||||
|
||||
_PyTime_t deadline = _PyTime_Add(_PyTime_GetSystemClock(), timeout);
|
||||
_PyTime_AsTimespec(deadline, &ts);
|
||||
|
||||
err = sem_timedwait(&sema->platform_sem, &ts);
|
||||
}
|
||||
else {
|
||||
err = sem_wait(&sema->platform_sem);
|
||||
}
|
||||
if (err == -1) {
|
||||
err = errno;
|
||||
if (err == EINTR) {
|
||||
res = Py_PARK_INTR;
|
||||
}
|
||||
else if (err == ETIMEDOUT) {
|
||||
res = Py_PARK_TIMEOUT;
|
||||
}
|
||||
else {
|
||||
_Py_FatalErrorFormat(__func__,
|
||||
"unexpected error from semaphore: %d",
|
||||
err);
|
||||
}
|
||||
}
|
||||
else {
|
||||
res = Py_PARK_OK;
|
||||
}
|
||||
#else
|
||||
pthread_mutex_lock(&sema->mutex);
|
||||
int err = 0;
|
||||
if (sema->counter == 0) {
|
||||
if (timeout >= 0) {
|
||||
struct timespec ts;
|
||||
|
||||
_PyTime_t deadline = _PyTime_Add(_PyTime_GetSystemClock(), timeout);
|
||||
_PyTime_AsTimespec(deadline, &ts);
|
||||
|
||||
err = pthread_cond_timedwait(&sema->cond, &sema->mutex, &ts);
|
||||
}
|
||||
else {
|
||||
err = pthread_cond_wait(&sema->cond, &sema->mutex);
|
||||
}
|
||||
}
|
||||
if (sema->counter > 0) {
|
||||
sema->counter--;
|
||||
res = Py_PARK_OK;
|
||||
}
|
||||
else if (err) {
|
||||
res = Py_PARK_TIMEOUT;
|
||||
}
|
||||
else {
|
||||
res = Py_PARK_INTR;
|
||||
}
|
||||
pthread_mutex_unlock(&sema->mutex);
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
_PySemaphore_Wait(_PySemaphore *sema, _PyTime_t timeout, int detach)
|
||||
{
|
||||
PyThreadState *tstate = NULL;
|
||||
if (detach) {
|
||||
tstate = _PyThreadState_GET();
|
||||
if (tstate) {
|
||||
PyEval_ReleaseThread(tstate);
|
||||
}
|
||||
}
|
||||
|
||||
int res = _PySemaphore_PlatformWait(sema, timeout);
|
||||
|
||||
if (detach && tstate) {
|
||||
PyEval_AcquireThread(tstate);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
_PySemaphore_Wakeup(_PySemaphore *sema)
|
||||
{
|
||||
#if defined(MS_WINDOWS)
|
||||
if (!ReleaseSemaphore(sema->platform_sem, 1, NULL)) {
|
||||
Py_FatalError("parking_lot: ReleaseSemaphore failed");
|
||||
}
|
||||
#elif defined(_Py_USE_SEMAPHORES)
|
||||
int err = sem_post(&sema->platform_sem);
|
||||
if (err != 0) {
|
||||
Py_FatalError("parking_lot: sem_post failed");
|
||||
}
|
||||
#else
|
||||
pthread_mutex_lock(&sema->mutex);
|
||||
sema->counter++;
|
||||
pthread_cond_signal(&sema->cond);
|
||||
pthread_mutex_unlock(&sema->mutex);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
enqueue(Bucket *bucket, const void *address, struct wait_entry *wait)
|
||||
{
|
||||
llist_insert_tail(&bucket->root, &wait->node);
|
||||
++bucket->num_waiters;
|
||||
}
|
||||
|
||||
static struct wait_entry *
|
||||
dequeue(Bucket *bucket, const void *address)
|
||||
{
|
||||
// find the first waiter that is waiting on `address`
|
||||
struct llist_node *root = &bucket->root;
|
||||
struct llist_node *node;
|
||||
llist_for_each(node, root) {
|
||||
struct wait_entry *wait = llist_data(node, struct wait_entry, node);
|
||||
if (wait->addr == (uintptr_t)address) {
|
||||
llist_remove(node);
|
||||
--bucket->num_waiters;
|
||||
return wait;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
dequeue_all(Bucket *bucket, const void *address, struct llist_node *dst)
|
||||
{
|
||||
// remove and append all matching waiters to dst
|
||||
struct llist_node *root = &bucket->root;
|
||||
struct llist_node *node;
|
||||
llist_for_each_safe(node, root) {
|
||||
struct wait_entry *wait = llist_data(node, struct wait_entry, node);
|
||||
if (wait->addr == (uintptr_t)address) {
|
||||
llist_remove(node);
|
||||
llist_insert_tail(dst, node);
|
||||
--bucket->num_waiters;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Checks that `*addr == *expected` (only works for 1, 2, 4, or 8 bytes)
|
||||
static int
|
||||
atomic_memcmp(const void *addr, const void *expected, size_t addr_size)
|
||||
{
|
||||
switch (addr_size) {
|
||||
case 1: return _Py_atomic_load_uint8(addr) == *(const uint8_t *)expected;
|
||||
case 2: return _Py_atomic_load_uint16(addr) == *(const uint16_t *)expected;
|
||||
case 4: return _Py_atomic_load_uint32(addr) == *(const uint32_t *)expected;
|
||||
case 8: return _Py_atomic_load_uint64(addr) == *(const uint64_t *)expected;
|
||||
default: Py_UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
_PyParkingLot_Park(const void *addr, const void *expected, size_t size,
|
||||
_PyTime_t timeout_ns, void *park_arg, int detach)
|
||||
{
|
||||
struct wait_entry wait = {
|
||||
.park_arg = park_arg,
|
||||
.addr = (uintptr_t)addr,
|
||||
.is_unparking = false,
|
||||
};
|
||||
|
||||
Bucket *bucket = &buckets[((uintptr_t)addr) % NUM_BUCKETS];
|
||||
|
||||
_PyRawMutex_Lock(&bucket->mutex);
|
||||
if (!atomic_memcmp(addr, expected, size)) {
|
||||
_PyRawMutex_Unlock(&bucket->mutex);
|
||||
return Py_PARK_AGAIN;
|
||||
}
|
||||
_PySemaphore_Init(&wait.sema);
|
||||
enqueue(bucket, addr, &wait);
|
||||
_PyRawMutex_Unlock(&bucket->mutex);
|
||||
|
||||
int res = _PySemaphore_Wait(&wait.sema, timeout_ns, detach);
|
||||
if (res == Py_PARK_OK) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
// timeout or interrupt
|
||||
_PyRawMutex_Lock(&bucket->mutex);
|
||||
if (wait.is_unparking) {
|
||||
_PyRawMutex_Unlock(&bucket->mutex);
|
||||
// Another thread has started to unpark us. Wait until we process the
|
||||
// wakeup signal.
|
||||
do {
|
||||
res = _PySemaphore_Wait(&wait.sema, -1, detach);
|
||||
} while (res != Py_PARK_OK);
|
||||
goto done;
|
||||
}
|
||||
else {
|
||||
llist_remove(&wait.node);
|
||||
--bucket->num_waiters;
|
||||
}
|
||||
_PyRawMutex_Unlock(&bucket->mutex);
|
||||
|
||||
done:
|
||||
_PySemaphore_Destroy(&wait.sema);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
_PyParkingLot_Unpark(const void *addr, _Py_unpark_fn_t *fn, void *arg)
|
||||
{
|
||||
Bucket *bucket = &buckets[((uintptr_t)addr) % NUM_BUCKETS];
|
||||
|
||||
// Find the first waiter that is waiting on `addr`
|
||||
_PyRawMutex_Lock(&bucket->mutex);
|
||||
struct wait_entry *waiter = dequeue(bucket, addr);
|
||||
if (waiter) {
|
||||
waiter->is_unparking = true;
|
||||
|
||||
int has_more_waiters = (bucket->num_waiters > 0);
|
||||
fn(arg, waiter->park_arg, has_more_waiters);
|
||||
}
|
||||
else {
|
||||
fn(arg, NULL, 0);
|
||||
}
|
||||
_PyRawMutex_Unlock(&bucket->mutex);
|
||||
|
||||
if (waiter) {
|
||||
// Wakeup the waiter outside of the bucket lock
|
||||
_PySemaphore_Wakeup(&waiter->sema);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
_PyParkingLot_UnparkAll(const void *addr)
|
||||
{
|
||||
struct llist_node head = LLIST_INIT(head);
|
||||
Bucket *bucket = &buckets[((uintptr_t)addr) % NUM_BUCKETS];
|
||||
|
||||
_PyRawMutex_Lock(&bucket->mutex);
|
||||
dequeue_all(bucket, addr, &head);
|
||||
_PyRawMutex_Unlock(&bucket->mutex);
|
||||
|
||||
struct llist_node *node;
|
||||
llist_for_each_safe(node, &head) {
|
||||
struct wait_entry *waiter = llist_data(node, struct wait_entry, node);
|
||||
llist_remove(node);
|
||||
_PySemaphore_Wakeup(&waiter->sema);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
_PyParkingLot_AfterFork(void)
|
||||
{
|
||||
// After a fork only one thread remains. That thread cannot be blocked
|
||||
// so all entries in the parking lot are for dead threads.
|
||||
memset(buckets, 0, sizeof(buckets));
|
||||
for (Py_ssize_t i = 0; i < NUM_BUCKETS; i++) {
|
||||
llist_init(&buckets[i].root);
|
||||
}
|
||||
}
|
@ -9,6 +9,7 @@
|
||||
#include "pycore_frame.h"
|
||||
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||
#include "pycore_object.h" // _PyType_InitCache()
|
||||
#include "pycore_parking_lot.h" // _PyParkingLot_AfterFork()
|
||||
#include "pycore_pyerrors.h" // _PyErr_Clear()
|
||||
#include "pycore_pylifecycle.h" // _PyAST_Fini()
|
||||
#include "pycore_pymem.h" // _PyMem_SetDefaultAllocator()
|
||||
@ -554,6 +555,10 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
|
||||
|
||||
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
|
||||
|
||||
// Clears the parking lot. Any waiting threads are dead. This must be
|
||||
// called before releasing any locks that use the parking lot.
|
||||
_PyParkingLot_AfterFork();
|
||||
|
||||
/* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does
|
||||
* not force the default allocator. */
|
||||
reinit_err += _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex);
|
||||
|
@ -314,6 +314,7 @@ MAX_SIZES = {
|
||||
_abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
|
||||
_abs('Objects/typeobject.c'): (35_000, 200),
|
||||
_abs('Python/compile.c'): (20_000, 500),
|
||||
_abs('Python/parking_lot.c'): (40_000, 1000),
|
||||
_abs('Python/pylifecycle.c'): (500_000, 5000),
|
||||
_abs('Python/pystate.c'): (500_000, 5000),
|
||||
|
||||
|
@ -50,6 +50,9 @@ Python/getversion.c - version -
|
||||
Python/bootstrap_hash.c - _Py_HashSecret_Initialized -
|
||||
Python/pyhash.c - _Py_HashSecret -
|
||||
|
||||
## thread-safe hashtable (internal locks)
|
||||
Python/parking_lot.c - buckets -
|
||||
|
||||
|
||||
##################################
|
||||
## state tied to Py_Main()
|
||||
|
Can't render this file because it has a wrong number of fields in line 4.
|
53
Tools/lockbench/lockbench.py
Normal file
53
Tools/lockbench/lockbench.py
Normal file
@ -0,0 +1,53 @@
|
||||
# Measure the performance of PyMutex and PyThread_type_lock locks
|
||||
# with short critical sections.
|
||||
#
|
||||
# Usage: python Tools/lockbench/lockbench.py [CRITICAL_SECTION_LENGTH]
|
||||
#
|
||||
# How to interpret the results:
|
||||
#
|
||||
# Acquisitions (kHz): Reports the total number of lock acquisitions in
|
||||
# thousands of acquisitions per second. This is the most important metric,
|
||||
# particularly for the 1 thread case because even in multithreaded programs,
|
||||
# most locks acquisitions are not contended. Values for 2+ threads are
|
||||
# only meaningful for `--disable-gil` builds, because the GIL prevents most
|
||||
# situations where there is lock contention with short critical sections.
|
||||
#
|
||||
# Fairness: A measure of how evenly the lock acquisitions are distributed.
|
||||
# A fairness of 1.0 means that all threads acquired the lock the same number
|
||||
# of times. A fairness of 1/N means that only one thread ever acquired the
|
||||
# lock.
|
||||
# See https://en.wikipedia.org/wiki/Fairness_measure#Jain's_fairness_index
|
||||
|
||||
from _testinternalcapi import benchmark_locks
|
||||
import sys
|
||||
|
||||
# Max number of threads to test
|
||||
MAX_THREADS = 10
|
||||
|
||||
# How much "work" to do while holding the lock
|
||||
CRITICAL_SECTION_LENGTH = 1
|
||||
|
||||
|
||||
def jains_fairness(values):
|
||||
# Jain's fairness index
|
||||
# See https://en.wikipedia.org/wiki/Fairness_measure
|
||||
return (sum(values) ** 2) / (len(values) * sum(x ** 2 for x in values))
|
||||
|
||||
def main():
|
||||
print("Lock Type Threads Acquisitions (kHz) Fairness")
|
||||
for lock_type in ["PyMutex", "PyThread_type_lock"]:
|
||||
use_pymutex = (lock_type == "PyMutex")
|
||||
for num_threads in range(1, MAX_THREADS + 1):
|
||||
acquisitions, thread_iters = benchmark_locks(
|
||||
num_threads, use_pymutex, CRITICAL_SECTION_LENGTH)
|
||||
|
||||
acquisitions /= 1000 # report in kHz for readability
|
||||
fairness = jains_fairness(thread_iters)
|
||||
|
||||
print(f"{lock_type: <20}{num_threads: <18}{acquisitions: >5.0f}{fairness: >20.2f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
CRITICAL_SECTION_LENGTH = int(sys.argv[1])
|
||||
main()
|
8
configure
generated
vendored
8
configure
generated
vendored
@ -27760,7 +27760,7 @@ fi
|
||||
#
|
||||
# Avoid #include <Python.h> or #include <pyport.h>. The <Python.h> header
|
||||
# requires <pyconfig.h> header which is only written below by AC_OUTPUT below.
|
||||
# If the check is done after AC_OUTPUT, modifying LIBATOMIC has no effect
|
||||
# If the check is done after AC_OUTPUT, modifying LIBS has no effect
|
||||
# anymore. <pyport.h> cannot be included alone, it's designed to be included
|
||||
# by <Python.h>: it expects other includes and macros to be defined.
|
||||
save_CPPFLAGS=$CPPFLAGS
|
||||
@ -27793,7 +27793,7 @@ typedef intptr_t Py_ssize_t;
|
||||
# error "unable to define Py_ssize_t"
|
||||
#endif
|
||||
|
||||
#include "cpython/pyatomic.h"
|
||||
#include "pyatomic.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
@ -27825,7 +27825,7 @@ printf "%s\n" "$ac_cv_libatomic_needed" >&6; }
|
||||
|
||||
if test "x$ac_cv_libatomic_needed" = xyes
|
||||
then :
|
||||
LIBATOMIC=${LIBATOMIC-"-latomic"}
|
||||
LIBS="${LIBS} -latomic"
|
||||
fi
|
||||
CPPFLAGS=$save_CPPFLAGS
|
||||
|
||||
@ -29979,7 +29979,7 @@ fi
|
||||
then :
|
||||
|
||||
|
||||
as_fn_append MODULE_BLOCK "MODULE__TESTCAPI_LDFLAGS=$LIBATOMIC$as_nl"
|
||||
|
||||
|
||||
fi
|
||||
if test "$py_cv_module__testcapi" = yes; then
|
||||
|
11
configure.ac
11
configure.ac
@ -6970,7 +6970,7 @@ fi
|
||||
#
|
||||
# Avoid #include <Python.h> or #include <pyport.h>. The <Python.h> header
|
||||
# requires <pyconfig.h> header which is only written below by AC_OUTPUT below.
|
||||
# If the check is done after AC_OUTPUT, modifying LIBATOMIC has no effect
|
||||
# If the check is done after AC_OUTPUT, modifying LIBS has no effect
|
||||
# anymore. <pyport.h> cannot be included alone, it's designed to be included
|
||||
# by <Python.h>: it expects other includes and macros to be defined.
|
||||
_SAVE_VAR([CPPFLAGS])
|
||||
@ -6993,7 +6993,7 @@ typedef intptr_t Py_ssize_t;
|
||||
# error "unable to define Py_ssize_t"
|
||||
#endif
|
||||
|
||||
#include "cpython/pyatomic.h"
|
||||
#include "pyatomic.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
@ -7014,7 +7014,7 @@ int main()
|
||||
])
|
||||
|
||||
AS_VAR_IF([ac_cv_libatomic_needed], [yes],
|
||||
[LIBATOMIC=${LIBATOMIC-"-latomic"}])
|
||||
[LIBS="${LIBS} -latomic"])
|
||||
_RESTORE_VAR([CPPFLAGS])
|
||||
|
||||
|
||||
@ -7286,10 +7286,7 @@ PY_STDLIB_MOD([_hashlib], [], [test "$ac_cv_working_openssl_hashlib" = yes],
|
||||
[$OPENSSL_INCLUDES], [$OPENSSL_LDFLAGS $OPENSSL_LDFLAGS_RPATH $LIBCRYPTO_LIBS])
|
||||
|
||||
dnl test modules
|
||||
PY_STDLIB_MOD([_testcapi],
|
||||
[test "$TEST_MODULES" = yes], []
|
||||
dnl Modules/_testcapi/pyatomic.c uses <cpython/pyatomic.h> header
|
||||
[], [], [$LIBATOMIC])
|
||||
PY_STDLIB_MOD([_testcapi], [test "$TEST_MODULES" = yes])
|
||||
PY_STDLIB_MOD([_testclinic], [test "$TEST_MODULES" = yes])
|
||||
PY_STDLIB_MOD([_testclinic_limited], [test "$TEST_MODULES" = yes])
|
||||
PY_STDLIB_MOD([_testinternalcapi], [test "$TEST_MODULES" = yes])
|
||||
|
Loading…
Reference in New Issue
Block a user