From c00964ecd508ba6ae43498017d5a3873844a058a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 02:25:23 +0200 Subject: [PATCH] gh-124665: Add `_PyCodec_UnregisterError` and `_codecs._unregister_error` (#124677) --- Include/internal/pycore_codecs.h | 11 ++++++ Lib/test/test_codeccallbacks.py | 27 ++++++++++++++- Modules/_codecsmodule.c | 25 +++++++++++++ Modules/clinic/_codecsmodule.c.h | 52 +++++++++++++++++++++++++++- Python/codecs.c | 22 ++++++++++++ Tools/c-analyzer/cpython/ignored.tsv | 1 + 6 files changed, 136 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index 5e2d5c5ce9d..4400be8b33d 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -21,6 +21,17 @@ extern void _PyCodec_Fini(PyInterpreterState *interp); extern PyObject* _PyCodec_Lookup(const char *encoding); +/* + * Un-register the error handling callback function registered under + * the given 'name'. Only custom error handlers can be un-registered. + * + * - Return -1 and set an exception if 'name' refers to a built-in + * error handling name (e.g., 'strict'), or if an error occurred. + * - Return 0 if no custom error handler can be found for 'name'. + * - Return 1 if the custom error handler was successfully removed. + */ +extern int _PyCodec_UnregisterError(const char *name); + /* Text codec specific encoding and decoding API. Checks the encoding against a list of codecs which do not diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 4991330489d..86e5e5c1474 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -1,3 +1,4 @@ +from _codecs import _unregister_error as _codecs_unregister_error import codecs import html.entities import itertools @@ -1210,7 +1211,6 @@ class CodecCallbackTest(unittest.TestCase): '\ufffd\x00\x00' ) - def test_fake_error_class(self): handlers = [ codecs.strict_errors, @@ -1235,6 +1235,31 @@ class CodecCallbackTest(unittest.TestCase): with self.assertRaises((TypeError, FakeUnicodeError)): handler(FakeUnicodeError()) + def test_reject_unregister_builtin_error_handler(self): + for name in [ + 'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace', + 'xmlcharrefreplace', 'surrogateescape', 'surrogatepass', + ]: + with self.subTest(name): + self.assertRaises(ValueError, _codecs_unregister_error, name) + + def test_unregister_custom_error_handler(self): + def custom_handler(exc): + raise exc + + custom_name = 'test.test_unregister_custom_error_handler' + self.assertRaises(LookupError, codecs.lookup_error, custom_name) + codecs.register_error(custom_name, custom_handler) + self.assertIs(codecs.lookup_error(custom_name), custom_handler) + self.assertTrue(_codecs_unregister_error(custom_name)) + self.assertRaises(LookupError, codecs.lookup_error, custom_name) + + def test_unregister_custom_unknown_error_handler(self): + unknown_name = 'test.test_unregister_custom_unknown_error_handler' + self.assertRaises(LookupError, codecs.lookup_error, unknown_name) + self.assertFalse(_codecs_unregister_error(unknown_name)) + self.assertRaises(LookupError, codecs.lookup_error, unknown_name) + if __name__ == "__main__": unittest.main() diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 32373f0799b..471b42badc8 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -979,6 +979,30 @@ _codecs_register_error_impl(PyObject *module, const char *errors, Py_RETURN_NONE; } +/*[clinic input] +_codecs._unregister_error -> bool + errors: str + / + +Un-register the specified error handler for the error handling `errors'. + +Only custom error handlers can be un-registered. An exception is raised +if the error handling is a built-in one (e.g., 'strict'), or if an error +occurs. + +Otherwise, this returns True if a custom handler has been successfully +un-registered, and False if no custom handler for the specified error +handling exists. + +[clinic start generated code]*/ + +static int +_codecs__unregister_error_impl(PyObject *module, const char *errors) +/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/ +{ + return _PyCodec_UnregisterError(errors); +} + /*[clinic input] _codecs.lookup_error name: str @@ -1044,6 +1068,7 @@ static PyMethodDef _codecs_functions[] = { _CODECS_CODE_PAGE_ENCODE_METHODDEF _CODECS_CODE_PAGE_DECODE_METHODDEF _CODECS_REGISTER_ERROR_METHODDEF + _CODECS__UNREGISTER_ERROR_METHODDEF _CODECS_LOOKUP_ERROR_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index 1c0f37442ab..01855aec5e1 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -2683,6 +2683,56 @@ exit: return return_value; } +PyDoc_STRVAR(_codecs__unregister_error__doc__, +"_unregister_error($module, errors, /)\n" +"--\n" +"\n" +"Un-register the specified error handler for the error handling `errors\'.\n" +"\n" +"Only custom error handlers can be un-registered. An exception is raised\n" +"if the error handling is a built-in one (e.g., \'strict\'), or if an error\n" +"occurs.\n" +"\n" +"Otherwise, this returns True if a custom handler has been successfully\n" +"un-registered, and False if no custom handler for the specified error\n" +"handling exists."); + +#define _CODECS__UNREGISTER_ERROR_METHODDEF \ + {"_unregister_error", (PyCFunction)_codecs__unregister_error, METH_O, _codecs__unregister_error__doc__}, + +static int +_codecs__unregister_error_impl(PyObject *module, const char *errors); + +static PyObject * +_codecs__unregister_error(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + const char *errors; + int _return_value; + + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("_unregister_error", "argument", "str", arg); + goto exit; + } + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(arg, &errors_length); + if (errors == NULL) { + goto exit; + } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + _return_value = _codecs__unregister_error_impl(module, errors); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(_codecs_lookup_error__doc__, "lookup_error($module, name, /)\n" "--\n" @@ -2746,4 +2796,4 @@ exit: #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=e50d5fdf65bd45fa input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b3013d4709d96ffe input=a9049054013a1b77]*/ diff --git a/Python/codecs.c b/Python/codecs.c index 9c0a3fad314..68dc232bb86 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -16,6 +16,12 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI +static const char *codecs_builtin_error_handlers[] = { + "strict", "ignore", "replace", + "xmlcharrefreplace", "backslashreplace", "namereplace", + "surrogatepass", "surrogateescape", +}; + const char *Py_hexdigits = "0123456789abcdef"; /* --- Codec Registry ----------------------------------------------------- */ @@ -618,6 +624,20 @@ int PyCodec_RegisterError(const char *name, PyObject *error) name, error); } +int _PyCodec_UnregisterError(const char *name) +{ + for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_builtin_error_handlers); ++i) { + if (strcmp(name, codecs_builtin_error_handlers[i]) == 0) { + PyErr_Format(PyExc_ValueError, + "cannot un-register built-in error handler '%s'", name); + return -1; + } + } + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp->codecs.initialized); + return PyDict_PopString(interp->codecs.error_registry, name, NULL); +} + /* Lookup the error handling callback function registered under the name error. As a special case NULL can be passed, in which case the error handling callback for strict encoding will be returned. */ @@ -1470,6 +1490,8 @@ _PyCodec_InitRegistry(PyInterpreterState *interp) } } }; + // ensure that the built-in error handlers' names are kept in sync + assert(Py_ARRAY_LENGTH(methods) == Py_ARRAY_LENGTH(codecs_builtin_error_handlers)); assert(interp->codecs.initialized == 0); interp->codecs.search_path = PyList_New(0); diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index f4dc807198a..e6c599a2ac4 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -345,6 +345,7 @@ Python/ast_opt.c fold_unaryop ops - Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - +Python/codecs.c - codecs_builtin_error_handlers - Python/codecs.c - ucnhash_capi - Python/codecs.c _PyCodec_InitRegistry methods - Python/compile.c - NO_LOCATION -