From 8717f792f7cc343599dc60daf80630cceb66145b Mon Sep 17 00:00:00 2001 From: George Alexopoulos Date: Fri, 15 Nov 2024 12:05:51 +0200 Subject: [PATCH] gh-126554: ctypes: Correctly handle NULL dlsym values (GH-126555) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For dlsym(), a return value of NULL does not necessarily indicate an error [1]. Therefore, to avoid using stale (or NULL) dlerror() values, we must: 1. clear the previous error state by calling dlerror() 2. call dlsym() 3. call dlerror() If the return value of dlerror() is not NULL, an error occured. In ctypes we choose to treat a NULL return value from dlsym() as a "not found" error. This is the same as the fallback message we use on Windows, Cygwin or when getting/formatting the error reason fails. [1]: https://man7.org/linux/man-pages/man3/dlsym.3.html Signed-off-by: Georgios Alexopoulos Signed-off-by: Georgios Alexopoulos Co-authored-by: Peter Bierma Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Petr Viktorin --- Lib/test/test_ctypes/test_dlerror.py | 123 ++++++++++++++++++ ...-11-07-20-24-58.gh-issue-126554.ri12eb.rst | 2 + Modules/_ctypes/_ctypes.c | 90 +++++++++---- Modules/_ctypes/callproc.c | 36 ++++- 4 files changed, 220 insertions(+), 31 deletions(-) create mode 100644 Lib/test/test_ctypes/test_dlerror.py create mode 100644 Misc/NEWS.d/next/C_API/2024-11-07-20-24-58.gh-issue-126554.ri12eb.rst diff --git a/Lib/test/test_ctypes/test_dlerror.py b/Lib/test/test_ctypes/test_dlerror.py new file mode 100644 index 00000000000..4441e30cd7a --- /dev/null +++ b/Lib/test/test_ctypes/test_dlerror.py @@ -0,0 +1,123 @@ +import os +import sys +import unittest +import platform + +FOO_C = r""" +#include + +/* This is a 'GNU indirect function' (IFUNC) that will be called by + dlsym() to resolve the symbol "foo" to an address. Typically, such + a function would return the address of an actual function, but it + can also just return NULL. For some background on IFUNCs, see + https://willnewton.name/uncategorized/using-gnu-indirect-functions. + + Adapted from Michael Kerrisk's answer: https://stackoverflow.com/a/53590014. +*/ + +asm (".type foo STT_GNU_IFUNC"); + +void *foo(void) +{ + write($DESCRIPTOR, "OK", 2); + return NULL; +} +""" + + +@unittest.skipUnless(sys.platform.startswith('linux'), + 'Test only valid for Linux') +class TestNullDlsym(unittest.TestCase): + """GH-126554: Ensure that we catch NULL dlsym return values + + In rare cases, such as when using GNU IFUNCs, dlsym(), + the C function that ctypes' CDLL uses to get the address + of symbols, can return NULL. + + The objective way of telling if an error during symbol + lookup happened is to call glibc's dlerror() and check + for a non-NULL return value. + + However, there can be cases where dlsym() returns NULL + and dlerror() is also NULL, meaning that glibc did not + encounter any error. + + In the case of ctypes, we subjectively treat that as + an error, and throw a relevant exception. + + This test case ensures that we correctly enforce + this 'dlsym returned NULL -> throw Error' rule. + """ + + def test_null_dlsym(self): + import subprocess + import tempfile + + # To avoid ImportErrors on Windows, where _ctypes does not have + # dlopen and dlsym, + # import here, i.e., inside the test function. + # The skipUnless('linux') decorator ensures that we're on linux + # if we're executing these statements. + from ctypes import CDLL, c_int + from _ctypes import dlopen, dlsym + + retcode = subprocess.call(["gcc", "--version"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + if retcode != 0: + self.skipTest("gcc is missing") + + pipe_r, pipe_w = os.pipe() + self.addCleanup(os.close, pipe_r) + self.addCleanup(os.close, pipe_w) + + with tempfile.TemporaryDirectory() as d: + # Create a C file with a GNU Indirect Function (FOO_C) + # and compile it into a shared library. + srcname = os.path.join(d, 'foo.c') + dstname = os.path.join(d, 'libfoo.so') + with open(srcname, 'w') as f: + f.write(FOO_C.replace('$DESCRIPTOR', str(pipe_w))) + args = ['gcc', '-fPIC', '-shared', '-o', dstname, srcname] + p = subprocess.run(args, capture_output=True) + + if p.returncode != 0: + # IFUNC is not supported on all architectures. + if platform.machine() == 'x86_64': + # It should be supported here. Something else went wrong. + p.check_returncode() + else: + # IFUNC might not be supported on this machine. + self.skipTest(f"could not compile indirect function: {p}") + + # Case #1: Test 'PyCFuncPtr_FromDll' from Modules/_ctypes/_ctypes.c + L = CDLL(dstname) + with self.assertRaisesRegex(AttributeError, "function 'foo' not found"): + # Try accessing the 'foo' symbol. + # It should resolve via dlsym() to NULL, + # and since we subjectively treat NULL + # addresses as errors, we should get + # an error. + L.foo + + # Assert that the IFUNC was called + self.assertEqual(os.read(pipe_r, 2), b'OK') + + # Case #2: Test 'CDataType_in_dll_impl' from Modules/_ctypes/_ctypes.c + with self.assertRaisesRegex(ValueError, "symbol 'foo' not found"): + c_int.in_dll(L, "foo") + + # Assert that the IFUNC was called + self.assertEqual(os.read(pipe_r, 2), b'OK') + + # Case #3: Test 'py_dl_sym' from Modules/_ctypes/callproc.c + L = dlopen(dstname) + with self.assertRaisesRegex(OSError, "symbol 'foo' not found"): + dlsym(L, "foo") + + # Assert that the IFUNC was called + self.assertEqual(os.read(pipe_r, 2), b'OK') + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/C_API/2024-11-07-20-24-58.gh-issue-126554.ri12eb.rst b/Misc/NEWS.d/next/C_API/2024-11-07-20-24-58.gh-issue-126554.ri12eb.rst new file mode 100644 index 00000000000..6af89c7d470 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-11-07-20-24-58.gh-issue-126554.ri12eb.rst @@ -0,0 +1,2 @@ +Fix error handling in :class:`ctypes.CDLL` objects +which could result in a crash in rare situations. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index eae69e484e1..34529bce496 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -956,32 +956,48 @@ CDataType_in_dll_impl(PyObject *type, PyTypeObject *cls, PyObject *dll, return NULL; } +#undef USE_DLERROR #ifdef MS_WIN32 Py_BEGIN_ALLOW_THREADS address = (void *)GetProcAddress(handle, name); Py_END_ALLOW_THREADS - if (!address) { - PyErr_Format(PyExc_ValueError, - "symbol '%s' not found", - name); - return NULL; - } #else + #ifdef __CYGWIN__ + // dlerror() isn't very helpful on cygwin + #else + #define USE_DLERROR + /* dlerror() always returns the latest error. + * + * Clear the previous value before calling dlsym(), + * to ensure we can tell if our call resulted in an error. + */ + (void)dlerror(); + #endif address = (void *)dlsym(handle, name); - if (!address) { -#ifdef __CYGWIN__ -/* dlerror() isn't very helpful on cygwin */ - PyErr_Format(PyExc_ValueError, - "symbol '%s' not found", - name); -#else - PyErr_SetString(PyExc_ValueError, dlerror()); #endif - return NULL; + + if (address) { + ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); + return PyCData_AtAddress(st, type, address); } -#endif - ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); - return PyCData_AtAddress(st, type, address); + + #ifdef USE_DLERROR + const char *dlerr = dlerror(); + if (dlerr) { + PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); + if (message) { + PyErr_SetObject(PyExc_ValueError, message); + Py_DECREF(message); + return NULL; + } + // Ignore errors from PyUnicode_DecodeLocale, + // fall back to the generic error below. + PyErr_Clear(); + } + #endif +#undef USE_DLERROR + PyErr_Format(PyExc_ValueError, "symbol '%s' not found", name); + return NULL; } /*[clinic input] @@ -3759,6 +3775,7 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } +#undef USE_DLERROR #ifdef MS_WIN32 address = FindAddress(handle, name, (PyObject *)type); if (!address) { @@ -3774,20 +3791,41 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } #else + #ifdef __CYGWIN__ + //dlerror() isn't very helpful on cygwin */ + #else + #define USE_DLERROR + /* dlerror() always returns the latest error. + * + * Clear the previous value before calling dlsym(), + * to ensure we can tell if our call resulted in an error. + */ + (void)dlerror(); + #endif address = (PPROC)dlsym(handle, name); + if (!address) { -#ifdef __CYGWIN__ -/* dlerror() isn't very helpful on cygwin */ - PyErr_Format(PyExc_AttributeError, - "function '%s' not found", - name); -#else - PyErr_SetString(PyExc_AttributeError, dlerror()); -#endif + #ifdef USE_DLERROR + const char *dlerr = dlerror(); + if (dlerr) { + PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); + if (message) { + PyErr_SetObject(PyExc_AttributeError, message); + Py_DECREF(ftuple); + Py_DECREF(message); + return NULL; + } + // Ignore errors from PyUnicode_DecodeLocale, + // fall back to the generic error below. + PyErr_Clear(); + } + #endif + PyErr_Format(PyExc_AttributeError, "function '%s' not found", name); Py_DECREF(ftuple); return NULL; } #endif +#undef USE_DLERROR ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); if (!_validate_paramflags(st, type, paramflags)) { Py_DECREF(ftuple); diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index 5ac9cf16681..218c3a9c81e 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1623,13 +1623,39 @@ static PyObject *py_dl_sym(PyObject *self, PyObject *args) if (PySys_Audit("ctypes.dlsym/handle", "O", args) < 0) { return NULL; } +#undef USE_DLERROR + #ifdef __CYGWIN__ + // dlerror() isn't very helpful on cygwin + #else + #define USE_DLERROR + /* dlerror() always returns the latest error. + * + * Clear the previous value before calling dlsym(), + * to ensure we can tell if our call resulted in an error. + */ + (void)dlerror(); + #endif ptr = dlsym((void*)handle, name); - if (!ptr) { - PyErr_SetString(PyExc_OSError, - dlerror()); - return NULL; + if (ptr) { + return PyLong_FromVoidPtr(ptr); } - return PyLong_FromVoidPtr(ptr); + #ifdef USE_DLERROR + const char *dlerr = dlerror(); + if (dlerr) { + PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); + if (message) { + PyErr_SetObject(PyExc_OSError, message); + Py_DECREF(message); + return NULL; + } + // Ignore errors from PyUnicode_DecodeLocale, + // fall back to the generic error below. + PyErr_Clear(); + } + #endif + #undef USE_DLERROR + PyErr_Format(PyExc_OSError, "symbol '%s' not found", name); + return NULL; } #endif