From 4b4e0dbdf49adc91c35a357ad332ab3abd4c31b1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 21 Nov 2024 13:44:37 +0200 Subject: [PATCH] [3.12] gh-126727: Fix locale.nl_langinfo(locale.ERA) (GH-126730) (GH-127098) It now returns multiple era description segments separated by semicolons. Previously it only returned the first segment on platforms with Glibc. (cherry picked from commit 4803cd0244847f286641c85591fda08b513cea52) --- Doc/library/locale.rst | 10 +-- Lib/test/test__locale.py | 46 +++++++++++++ ...-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst | 3 + Modules/_localemodule.c | 65 ++++++++++++------- 4 files changed, 96 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index a81879a2fe4..fee5aba7ee3 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -281,7 +281,8 @@ The :mod:`locale` module defines the following exception and functions: .. data:: ERA - Get a string that represents the era used in the current locale. + Get a string which describes how years are counted and displayed for + each era in a locale. Most locales do not define this value. An example of a locale which does define this value is the Japanese one. In Japan, the traditional @@ -290,9 +291,10 @@ The :mod:`locale` module defines the following exception and functions: Normally it should not be necessary to use this value directly. Specifying the ``E`` modifier in their format strings causes the :func:`time.strftime` - function to use this information. The format of the returned string is not - specified, and therefore you should not assume knowledge of it on different - systems. + function to use this information. + The format of the returned string is specified in *The Open Group Base + Specifications Issue 8*, paragraph `7.3.5.2 LC_TIME C-Language Access + `_. .. data:: ERA_D_T_FMT diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index a680e6edb63..89c20325055 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -90,6 +90,14 @@ known_alt_digits = { 'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}), } +known_era = { + 'C': (0, ''), + 'en_US': (0, ''), + 'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'), + 'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'), + 'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'), +} + if sys.platform == 'win32': # ps_AF doesn't work on Windows: see bpo-38324 (msg361830) del known_numerics['ps_AF'] @@ -228,6 +236,44 @@ class _LocaleTests(unittest.TestCase): if not tested: self.skipTest('no suitable locales') + @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available") + @unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA") + @unittest.skipIf( + support.is_emscripten or support.is_wasi, + "musl libc issue on Emscripten, bpo-46390" + ) + def test_era_nl_langinfo(self): + # Test nl_langinfo(ERA) + tested = False + for loc in candidate_locales: + with self.subTest(locale=loc): + try: + setlocale(LC_TIME, loc) + setlocale(LC_CTYPE, loc) + except Error: + self.skipTest(f'no locale {loc!r}') + continue + + with self.subTest(locale=loc): + era = nl_langinfo(locale.ERA) + self.assertIsInstance(era, str) + if era: + self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era) + + loc1 = loc.split('.', 1)[0] + if loc1 in known_era: + count, sample = known_era[loc1] + if count: + if not era: + self.skipTest(f'ERA is not set for locale {loc!r} on this platform') + self.assertGreaterEqual(era.count(';') + 1, count) + self.assertIn(sample, era) + else: + self.assertEqual(era, '') + tested = True + if not tested: + self.skipTest('no suitable locales') + def test_float_parsing(self): # Bug #1391872: Test whether float parsing is okay on European # locales. diff --git a/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst new file mode 100644 index 00000000000..7bec8a6b7a8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst @@ -0,0 +1,3 @@ +``locale.nl_langinfo(locale.ERA)`` now returns multiple era description +segments separated by semicolons. Previously it only returned the first +segment on platforms with Glibc. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 53ebb57d23a..db8194372da 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -595,6 +595,37 @@ static struct langinfo_constant{ {0, 0} }; +#ifdef __GLIBC__ +#if defined(ALT_DIGITS) || defined(ERA) +static PyObject * +decode_strings(const char *result, size_t max_count) +{ + /* Convert a sequence of NUL-separated C strings to a Python string + * containing semicolon separated items. */ + size_t i = 0; + size_t count = 0; + for (; count < max_count && result[i]; count++) { + i += strlen(result + i) + 1; + } + char *buf = PyMem_Malloc(i); + if (buf == NULL) { + PyErr_NoMemory(); + return NULL; + } + memcpy(buf, result, i); + /* Replace all NULs with semicolons. */ + i = 0; + while (--count) { + i += strlen(buf + i); + buf[i++] = ';'; + } + PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL); + PyMem_Free(buf); + return pyresult; +} +#endif +#endif + /*[clinic input] _locale.nl_langinfo @@ -620,32 +651,18 @@ _locale_nl_langinfo_impl(PyObject *module, int item) result = result != NULL ? result : ""; PyObject *pyresult; #ifdef __GLIBC__ + /* According to the POSIX specification the result must be + * a sequence of semicolon-separated strings. + * But in Glibc they are NUL-separated. */ #ifdef ALT_DIGITS if (item == ALT_DIGITS && *result) { - /* According to the POSIX specification the result must be - * a sequence of up to 100 semicolon-separated strings. - * But in Glibc they are NUL-separated. */ - Py_ssize_t i = 0; - int count = 0; - for (; count < 100 && result[i]; count++) { - i += strlen(result + i) + 1; - } - char *buf = PyMem_Malloc(i); - if (buf == NULL) { - PyErr_NoMemory(); - pyresult = NULL; - } - else { - memcpy(buf, result, i); - /* Replace all NULs with semicolons. */ - i = 0; - while (--count) { - i += strlen(buf + i); - buf[i++] = ';'; - } - pyresult = PyUnicode_DecodeLocale(buf, NULL); - PyMem_Free(buf); - } + pyresult = decode_strings(result, 100); + } + else +#endif +#ifdef ERA + if (item == ERA && *result) { + pyresult = decode_strings(result, SIZE_MAX); } else #endif