[3.12] gh-126727: Fix locale.nl_langinfo(locale.ERA) (GH-126730) (GH-127098)

It now returns multiple era description segments separated by semicolons. Previously it only returned the first segment on platforms with Glibc. (cherry picked from commit 4803cd0244)
2024-11-21 21:09:37 +01:00 · 2024-11-21 13:44:37 +02:00 · 2024-11-21 13:44:37 +02:00 · 4b4e0dbdf4
commit 4b4e0dbdf4
parent d997be0510
4 changed files with 96 additions and 28 deletions
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@ -281,7 +281,8 @@ The :mod:`locale` module defines the following exception and functions:
   .. data:: ERA
-      Get a string that represents the era used in the current locale.
+      Get a string which describes how years are counted and displayed for
      each era in a locale.
      Most locales do not define this value.  An example of a locale which does
      define this value is the Japanese one.  In Japan, the traditional
@ -290,9 +291,10 @@ The :mod:`locale` module defines the following exception and functions:
      Normally it should not be necessary to use this value directly. Specifying
      the ``E`` modifier in their format strings causes the :func:`time.strftime`
-      function to use this information.  The format of the returned string is not
+      function to use this information.
-      specified, and therefore you should not assume knowledge of it on different
+      The format of the returned string is specified in *The Open Group Base
-      systems.
+      Specifications Issue 8*, paragraph `7.3.5.2 LC_TIME C-Language Access
      <https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html#tag_07_03_05_02>`_.
   .. data:: ERA_D_T_FMT
--- a/Lib/test/test__locale.py
+++ b/Lib/test/test__locale.py
@ -90,6 +90,14 @@ known_alt_digits = {
    'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}),
 }
 known_era = {
    'C': (0, ''),
    'en_US': (0, ''),
    'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'),
    'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'),
    'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'),
 }
 if sys.platform == 'win32':
    # ps_AF doesn't work on Windows: see bpo-38324 (msg361830)
    del known_numerics['ps_AF']
@ -228,6 +236,44 @@ class _LocaleTests(unittest.TestCase):
        if not tested:
            self.skipTest('no suitable locales')
    @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available")
    @unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA")
    @unittest.skipIf(
        support.is_emscripten or support.is_wasi,
        "musl libc issue on Emscripten, bpo-46390"
    )
    def test_era_nl_langinfo(self):
        # Test nl_langinfo(ERA)
        tested = False
        for loc in candidate_locales:
            with self.subTest(locale=loc):
                try:
                    setlocale(LC_TIME, loc)
                    setlocale(LC_CTYPE, loc)
                except Error:
                    self.skipTest(f'no locale {loc!r}')
                    continue
                with self.subTest(locale=loc):
                    era = nl_langinfo(locale.ERA)
                    self.assertIsInstance(era, str)
                    if era:
                        self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era)
                    loc1 = loc.split('.', 1)[0]
                    if loc1 in known_era:
                        count, sample = known_era[loc1]
                        if count:
                            if not era:
                                self.skipTest(f'ERA is not set for locale {loc!r} on this platform')
                            self.assertGreaterEqual(era.count(';') + 1, count)
                            self.assertIn(sample, era)
                        else:
                            self.assertEqual(era, '')
                    tested = True
        if not tested:
            self.skipTest('no suitable locales')
    def test_float_parsing(self):
        # Bug #1391872: Test whether float parsing is okay on European
        # locales.
--- a/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst
+++ b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst
@ -0,0 +1,3 @@
 ``locale.nl_langinfo(locale.ERA)`` now returns multiple era description
 segments separated by semicolons. Previously it only returned the first
 segment on platforms with Glibc.
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@ -595,6 +595,37 @@ static struct langinfo_constant{
    {0, 0}
 };
 #ifdef __GLIBC__
 #if defined(ALT_DIGITS) || defined(ERA)
 static PyObject *
 decode_strings(const char *result, size_t max_count)
 {
    /* Convert a sequence of NUL-separated C strings to a Python string
     * containing semicolon separated items. */
    size_t i = 0;
    size_t count = 0;
    for (; count < max_count && result[i]; count++) {
        i += strlen(result + i) + 1;
    }
    char *buf = PyMem_Malloc(i);
    if (buf == NULL) {
        PyErr_NoMemory();
        return NULL;
    }
    memcpy(buf, result, i);
    /* Replace all NULs with semicolons. */
    i = 0;
    while (--count) {
        i += strlen(buf + i);
        buf[i++] = ';';
    }
    PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL);
    PyMem_Free(buf);
    return pyresult;
 }
 #endif
 #endif
 /*[clinic input]
 _locale.nl_langinfo
@ -620,32 +651,18 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
            result = result != NULL ? result : "";
            PyObject *pyresult;
 #ifdef __GLIBC__
            /* According to the POSIX specification the result must be
             * a sequence of semicolon-separated strings.
             * But in Glibc they are NUL-separated. */
 #ifdef ALT_DIGITS
            if (item == ALT_DIGITS && *result) {
-                /* According to the POSIX specification the result must be
+                pyresult = decode_strings(result, 100);
-                 * a sequence of up to 100 semicolon-separated strings.
+            }
-                 * But in Glibc they are NUL-separated. */
+            else
-                Py_ssize_t i = 0;
+#endif
-                int count = 0;
+#ifdef ERA
-                for (; count < 100 && result[i]; count++) {
+            if (item == ERA && *result) {
-                    i += strlen(result + i) + 1;
+                pyresult = decode_strings(result, SIZE_MAX);
                }
                char *buf = PyMem_Malloc(i);
                if (buf == NULL) {
                    PyErr_NoMemory();
                    pyresult = NULL;
                }
                else {
                    memcpy(buf, result, i);
                    /* Replace all NULs with semicolons. */
                    i = 0;
                    while (--count) {
                        i += strlen(buf + i);
                        buf[i++] = ';';
                    }
                    pyresult = PyUnicode_DecodeLocale(buf, NULL);
                    PyMem_Free(buf);
                }
            }
            else
 #endif