From 9089a265918754d95e105a7c4c409ac9352c87bb Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 22 Jan 2018 19:07:32 +0100 Subject: [PATCH] bpo-29240: PyUnicode_DecodeLocale() uses UTF-8 on Android (#5272) PyUnicode_DecodeLocaleAndSize(), PyUnicode_DecodeLocale() and PyUnicode_EncodeLocale() now use always use the UTF-8 encoding on Android, instead of the current locale encoding. On Android API 19, mbstowcs() and wcstombs() are broken and cannot be used. --- Doc/c-api/unicode.rst | 11 +++++++---- Python/fileutils.c | 10 ++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 3f6c0559907..92e22b16a4e 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -760,7 +760,8 @@ system. Py_ssize_t len, \ const char *errors) - Decode a string from the current locale encoding. The supported + Decode a string from UTF-8 on Android, or from the current locale encoding + on other platforms. The supported error handlers are ``"strict"`` and ``"surrogateescape"`` (:pep:`383`). The decoder uses ``"strict"`` error handler if *errors* is ``NULL``. *str* must end with a null character but @@ -780,7 +781,7 @@ system. .. versionchanged:: 3.7 The function now also uses the current locale encoding for the - ``surrogateescape`` error handler. Previously, :c:func:`Py_DecodeLocale` + ``surrogateescape`` error handler, except on Android. Previously, :c:func:`Py_DecodeLocale` was used for the ``surrogateescape``, and the current locale encoding was used for ``strict``. @@ -795,7 +796,8 @@ system. .. c:function:: PyObject* PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) - Encode a Unicode object to the current locale encoding. The + Encode a Unicode object to UTF-8 on Android, or to the current locale + encoding on other platforms. The supported error handlers are ``"strict"`` and ``"surrogateescape"`` (:pep:`383`). The encoder uses ``"strict"`` error handler if *errors* is ``NULL``. Return a :class:`bytes` object. *unicode* cannot @@ -815,7 +817,8 @@ system. .. versionchanged:: 3.7 The function now also uses the current locale encoding for the - ``surrogateescape`` error handler. Previously, :c:func:`Py_EncodeLocale` + ``surrogateescape`` error handler, except on Android. Previously, + :c:func:`Py_EncodeLocale` was used for the ``surrogateescape``, and the current locale encoding was used for ``strict``. diff --git a/Python/fileutils.c b/Python/fileutils.c index 9a1435cfb32..d610639688e 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -449,7 +449,12 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen, int current_locale, int surrogateescape) { if (current_locale) { +#ifdef __ANDROID__ + return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, + surrogateescape); +#else return decode_current_locale(arg, wstr, wlen, reason, surrogateescape); +#endif } #if defined(__APPLE__) || defined(__ANDROID__) @@ -605,8 +610,13 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos, int raw_malloc, int current_locale, int surrogateescape) { if (current_locale) { +#ifdef __ANDROID__ + return _Py_EncodeUTF8Ex(text, str, error_pos, reason, + raw_malloc, surrogateescape); +#else return encode_current_locale(text, str, error_pos, reason, raw_malloc, surrogateescape); +#endif } #if defined(__APPLE__) || defined(__ANDROID__)