From 4b4e0dbdf49adc91c35a357ad332ab3abd4c31b1 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 21 Nov 2024 13:44:37 +0200
Subject: [PATCH] [3.12] gh-126727: Fix locale.nl_langinfo(locale.ERA)
 (GH-126730) (GH-127098)

It now returns multiple era description segments separated by semicolons.
Previously it only returned the first segment on platforms with Glibc.
(cherry picked from commit 4803cd0244847f286641c85591fda08b513cea52)
---
 Doc/library/locale.rst                        | 10 +--
 Lib/test/test__locale.py                      | 46 +++++++++++++
 ...-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst |  3 +
 Modules/_localemodule.c                       | 65 ++++++++++++-------
 4 files changed, 96 insertions(+), 28 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst

diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst
index a81879a2fe4..fee5aba7ee3 100644
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@@ -281,7 +281,8 @@ The :mod:`locale` module defines the following exception and functions:
 
    .. data:: ERA
 
-      Get a string that represents the era used in the current locale.
+      Get a string which describes how years are counted and displayed for
+      each era in a locale.
 
       Most locales do not define this value.  An example of a locale which does
       define this value is the Japanese one.  In Japan, the traditional
@@ -290,9 +291,10 @@ The :mod:`locale` module defines the following exception and functions:
 
       Normally it should not be necessary to use this value directly. Specifying
       the ``E`` modifier in their format strings causes the :func:`time.strftime`
-      function to use this information.  The format of the returned string is not
-      specified, and therefore you should not assume knowledge of it on different
-      systems.
+      function to use this information.
+      The format of the returned string is specified in *The Open Group Base
+      Specifications Issue 8*, paragraph `7.3.5.2 LC_TIME C-Language Access
+      <https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html#tag_07_03_05_02>`_.
 
    .. data:: ERA_D_T_FMT
 
diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py
index a680e6edb63..89c20325055 100644
--- a/Lib/test/test__locale.py
+++ b/Lib/test/test__locale.py
@@ -90,6 +90,14 @@ known_alt_digits = {
     'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}),
 }
 
+known_era = {
+    'C': (0, ''),
+    'en_US': (0, ''),
+    'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'),
+    'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'),
+    'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'),
+}
+
 if sys.platform == 'win32':
     # ps_AF doesn't work on Windows: see bpo-38324 (msg361830)
     del known_numerics['ps_AF']
@@ -228,6 +236,44 @@ class _LocaleTests(unittest.TestCase):
         if not tested:
             self.skipTest('no suitable locales')
 
+    @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available")
+    @unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA")
+    @unittest.skipIf(
+        support.is_emscripten or support.is_wasi,
+        "musl libc issue on Emscripten, bpo-46390"
+    )
+    def test_era_nl_langinfo(self):
+        # Test nl_langinfo(ERA)
+        tested = False
+        for loc in candidate_locales:
+            with self.subTest(locale=loc):
+                try:
+                    setlocale(LC_TIME, loc)
+                    setlocale(LC_CTYPE, loc)
+                except Error:
+                    self.skipTest(f'no locale {loc!r}')
+                    continue
+
+                with self.subTest(locale=loc):
+                    era = nl_langinfo(locale.ERA)
+                    self.assertIsInstance(era, str)
+                    if era:
+                        self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era)
+
+                    loc1 = loc.split('.', 1)[0]
+                    if loc1 in known_era:
+                        count, sample = known_era[loc1]
+                        if count:
+                            if not era:
+                                self.skipTest(f'ERA is not set for locale {loc!r} on this platform')
+                            self.assertGreaterEqual(era.count(';') + 1, count)
+                            self.assertIn(sample, era)
+                        else:
+                            self.assertEqual(era, '')
+                    tested = True
+        if not tested:
+            self.skipTest('no suitable locales')
+
     def test_float_parsing(self):
         # Bug #1391872: Test whether float parsing is okay on European
         # locales.
diff --git a/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst
new file mode 100644
index 00000000000..7bec8a6b7a8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst
@@ -0,0 +1,3 @@
+``locale.nl_langinfo(locale.ERA)`` now returns multiple era description
+segments separated by semicolons. Previously it only returned the first
+segment on platforms with Glibc.
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 53ebb57d23a..db8194372da 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -595,6 +595,37 @@ static struct langinfo_constant{
     {0, 0}
 };
 
+#ifdef __GLIBC__
+#if defined(ALT_DIGITS) || defined(ERA)
+static PyObject *
+decode_strings(const char *result, size_t max_count)
+{
+    /* Convert a sequence of NUL-separated C strings to a Python string
+     * containing semicolon separated items. */
+    size_t i = 0;
+    size_t count = 0;
+    for (; count < max_count && result[i]; count++) {
+        i += strlen(result + i) + 1;
+    }
+    char *buf = PyMem_Malloc(i);
+    if (buf == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    memcpy(buf, result, i);
+    /* Replace all NULs with semicolons. */
+    i = 0;
+    while (--count) {
+        i += strlen(buf + i);
+        buf[i++] = ';';
+    }
+    PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL);
+    PyMem_Free(buf);
+    return pyresult;
+}
+#endif
+#endif
+
 /*[clinic input]
 _locale.nl_langinfo
 
@@ -620,32 +651,18 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
             result = result != NULL ? result : "";
             PyObject *pyresult;
 #ifdef __GLIBC__
+            /* According to the POSIX specification the result must be
+             * a sequence of semicolon-separated strings.
+             * But in Glibc they are NUL-separated. */
 #ifdef ALT_DIGITS
             if (item == ALT_DIGITS && *result) {
-                /* According to the POSIX specification the result must be
-                 * a sequence of up to 100 semicolon-separated strings.
-                 * But in Glibc they are NUL-separated. */
-                Py_ssize_t i = 0;
-                int count = 0;
-                for (; count < 100 && result[i]; count++) {
-                    i += strlen(result + i) + 1;
-                }
-                char *buf = PyMem_Malloc(i);
-                if (buf == NULL) {
-                    PyErr_NoMemory();
-                    pyresult = NULL;
-                }
-                else {
-                    memcpy(buf, result, i);
-                    /* Replace all NULs with semicolons. */
-                    i = 0;
-                    while (--count) {
-                        i += strlen(buf + i);
-                        buf[i++] = ';';
-                    }
-                    pyresult = PyUnicode_DecodeLocale(buf, NULL);
-                    PyMem_Free(buf);
-                }
+                pyresult = decode_strings(result, 100);
+            }
+            else
+#endif
+#ifdef ERA
+            if (item == ERA && *result) {
+                pyresult = decode_strings(result, SIZE_MAX);
             }
             else
 #endif