diff --git a/Lib/test/test_ctypes/test_dlerror.py b/Lib/test/test_ctypes/test_dlerror.py new file mode 100644 index 00000000000..4441e30cd7a --- /dev/null +++ b/Lib/test/test_ctypes/test_dlerror.py @@ -0,0 +1,123 @@ +import os +import sys +import unittest +import platform + +FOO_C = r""" +#include + +/* This is a 'GNU indirect function' (IFUNC) that will be called by + dlsym() to resolve the symbol "foo" to an address. Typically, such + a function would return the address of an actual function, but it + can also just return NULL. For some background on IFUNCs, see + https://willnewton.name/uncategorized/using-gnu-indirect-functions. + + Adapted from Michael Kerrisk's answer: https://stackoverflow.com/a/53590014. +*/ + +asm (".type foo STT_GNU_IFUNC"); + +void *foo(void) +{ + write($DESCRIPTOR, "OK", 2); + return NULL; +} +""" + + +@unittest.skipUnless(sys.platform.startswith('linux'), + 'Test only valid for Linux') +class TestNullDlsym(unittest.TestCase): + """GH-126554: Ensure that we catch NULL dlsym return values + + In rare cases, such as when using GNU IFUNCs, dlsym(), + the C function that ctypes' CDLL uses to get the address + of symbols, can return NULL. + + The objective way of telling if an error during symbol + lookup happened is to call glibc's dlerror() and check + for a non-NULL return value. + + However, there can be cases where dlsym() returns NULL + and dlerror() is also NULL, meaning that glibc did not + encounter any error. + + In the case of ctypes, we subjectively treat that as + an error, and throw a relevant exception. + + This test case ensures that we correctly enforce + this 'dlsym returned NULL -> throw Error' rule. + """ + + def test_null_dlsym(self): + import subprocess + import tempfile + + # To avoid ImportErrors on Windows, where _ctypes does not have + # dlopen and dlsym, + # import here, i.e., inside the test function. + # The skipUnless('linux') decorator ensures that we're on linux + # if we're executing these statements. + from ctypes import CDLL, c_int + from _ctypes import dlopen, dlsym + + retcode = subprocess.call(["gcc", "--version"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + if retcode != 0: + self.skipTest("gcc is missing") + + pipe_r, pipe_w = os.pipe() + self.addCleanup(os.close, pipe_r) + self.addCleanup(os.close, pipe_w) + + with tempfile.TemporaryDirectory() as d: + # Create a C file with a GNU Indirect Function (FOO_C) + # and compile it into a shared library. + srcname = os.path.join(d, 'foo.c') + dstname = os.path.join(d, 'libfoo.so') + with open(srcname, 'w') as f: + f.write(FOO_C.replace('$DESCRIPTOR', str(pipe_w))) + args = ['gcc', '-fPIC', '-shared', '-o', dstname, srcname] + p = subprocess.run(args, capture_output=True) + + if p.returncode != 0: + # IFUNC is not supported on all architectures. + if platform.machine() == 'x86_64': + # It should be supported here. Something else went wrong. + p.check_returncode() + else: + # IFUNC might not be supported on this machine. + self.skipTest(f"could not compile indirect function: {p}") + + # Case #1: Test 'PyCFuncPtr_FromDll' from Modules/_ctypes/_ctypes.c + L = CDLL(dstname) + with self.assertRaisesRegex(AttributeError, "function 'foo' not found"): + # Try accessing the 'foo' symbol. + # It should resolve via dlsym() to NULL, + # and since we subjectively treat NULL + # addresses as errors, we should get + # an error. + L.foo + + # Assert that the IFUNC was called + self.assertEqual(os.read(pipe_r, 2), b'OK') + + # Case #2: Test 'CDataType_in_dll_impl' from Modules/_ctypes/_ctypes.c + with self.assertRaisesRegex(ValueError, "symbol 'foo' not found"): + c_int.in_dll(L, "foo") + + # Assert that the IFUNC was called + self.assertEqual(os.read(pipe_r, 2), b'OK') + + # Case #3: Test 'py_dl_sym' from Modules/_ctypes/callproc.c + L = dlopen(dstname) + with self.assertRaisesRegex(OSError, "symbol 'foo' not found"): + dlsym(L, "foo") + + # Assert that the IFUNC was called + self.assertEqual(os.read(pipe_r, 2), b'OK') + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/C_API/2024-11-07-20-24-58.gh-issue-126554.ri12eb.rst b/Misc/NEWS.d/next/C_API/2024-11-07-20-24-58.gh-issue-126554.ri12eb.rst new file mode 100644 index 00000000000..6af89c7d470 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-11-07-20-24-58.gh-issue-126554.ri12eb.rst @@ -0,0 +1,2 @@ +Fix error handling in :class:`ctypes.CDLL` objects +which could result in a crash in rare situations. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index eae69e484e1..34529bce496 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -956,32 +956,48 @@ CDataType_in_dll_impl(PyObject *type, PyTypeObject *cls, PyObject *dll, return NULL; } +#undef USE_DLERROR #ifdef MS_WIN32 Py_BEGIN_ALLOW_THREADS address = (void *)GetProcAddress(handle, name); Py_END_ALLOW_THREADS - if (!address) { - PyErr_Format(PyExc_ValueError, - "symbol '%s' not found", - name); - return NULL; - } #else + #ifdef __CYGWIN__ + // dlerror() isn't very helpful on cygwin + #else + #define USE_DLERROR + /* dlerror() always returns the latest error. + * + * Clear the previous value before calling dlsym(), + * to ensure we can tell if our call resulted in an error. + */ + (void)dlerror(); + #endif address = (void *)dlsym(handle, name); - if (!address) { -#ifdef __CYGWIN__ -/* dlerror() isn't very helpful on cygwin */ - PyErr_Format(PyExc_ValueError, - "symbol '%s' not found", - name); -#else - PyErr_SetString(PyExc_ValueError, dlerror()); #endif - return NULL; + + if (address) { + ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); + return PyCData_AtAddress(st, type, address); } -#endif - ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); - return PyCData_AtAddress(st, type, address); + + #ifdef USE_DLERROR + const char *dlerr = dlerror(); + if (dlerr) { + PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); + if (message) { + PyErr_SetObject(PyExc_ValueError, message); + Py_DECREF(message); + return NULL; + } + // Ignore errors from PyUnicode_DecodeLocale, + // fall back to the generic error below. + PyErr_Clear(); + } + #endif +#undef USE_DLERROR + PyErr_Format(PyExc_ValueError, "symbol '%s' not found", name); + return NULL; } /*[clinic input] @@ -3759,6 +3775,7 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } +#undef USE_DLERROR #ifdef MS_WIN32 address = FindAddress(handle, name, (PyObject *)type); if (!address) { @@ -3774,20 +3791,41 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } #else + #ifdef __CYGWIN__ + //dlerror() isn't very helpful on cygwin */ + #else + #define USE_DLERROR + /* dlerror() always returns the latest error. + * + * Clear the previous value before calling dlsym(), + * to ensure we can tell if our call resulted in an error. + */ + (void)dlerror(); + #endif address = (PPROC)dlsym(handle, name); + if (!address) { -#ifdef __CYGWIN__ -/* dlerror() isn't very helpful on cygwin */ - PyErr_Format(PyExc_AttributeError, - "function '%s' not found", - name); -#else - PyErr_SetString(PyExc_AttributeError, dlerror()); -#endif + #ifdef USE_DLERROR + const char *dlerr = dlerror(); + if (dlerr) { + PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); + if (message) { + PyErr_SetObject(PyExc_AttributeError, message); + Py_DECREF(ftuple); + Py_DECREF(message); + return NULL; + } + // Ignore errors from PyUnicode_DecodeLocale, + // fall back to the generic error below. + PyErr_Clear(); + } + #endif + PyErr_Format(PyExc_AttributeError, "function '%s' not found", name); Py_DECREF(ftuple); return NULL; } #endif +#undef USE_DLERROR ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); if (!_validate_paramflags(st, type, paramflags)) { Py_DECREF(ftuple); diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index 5ac9cf16681..218c3a9c81e 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1623,13 +1623,39 @@ static PyObject *py_dl_sym(PyObject *self, PyObject *args) if (PySys_Audit("ctypes.dlsym/handle", "O", args) < 0) { return NULL; } +#undef USE_DLERROR + #ifdef __CYGWIN__ + // dlerror() isn't very helpful on cygwin + #else + #define USE_DLERROR + /* dlerror() always returns the latest error. + * + * Clear the previous value before calling dlsym(), + * to ensure we can tell if our call resulted in an error. + */ + (void)dlerror(); + #endif ptr = dlsym((void*)handle, name); - if (!ptr) { - PyErr_SetString(PyExc_OSError, - dlerror()); - return NULL; + if (ptr) { + return PyLong_FromVoidPtr(ptr); } - return PyLong_FromVoidPtr(ptr); + #ifdef USE_DLERROR + const char *dlerr = dlerror(); + if (dlerr) { + PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); + if (message) { + PyErr_SetObject(PyExc_OSError, message); + Py_DECREF(message); + return NULL; + } + // Ignore errors from PyUnicode_DecodeLocale, + // fall back to the generic error below. + PyErr_Clear(); + } + #endif + #undef USE_DLERROR + PyErr_Format(PyExc_OSError, "symbol '%s' not found", name); + return NULL; } #endif