gh-126807: pygettext: Do not attempt to extract messages from function definitions. (GH-126808)

Fixes a bug where pygettext would attempt to extract a message from a code like this: def _(x): pass This is because pygettext only looks at one token at a time and '_(x)' looks like a function call. However, since 'x' is not a string literal, it would erroneously issue a warning.
2024-11-21 12:59:38 +01:00 · 2024-11-14 23:17:42 +01:00 · 2024-11-14 23:17:42 +01:00 · 9a456383be
commit 9a456383be
parent cae9d9d20f
3 changed files with 36 additions and 5 deletions
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@ -87,17 +87,23 @@ class Test_pygettext(unittest.TestCase):
        self.maxDiff = None
        self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))

-    def extract_docstrings_from_str(self, module_content):
-        """ utility: return all msgids extracted from module_content """
-        filename = 'test_docstrings.py'
-        with temp_cwd(None) as cwd:
+    def extract_from_str(self, module_content, *, args=(), strict=True):
+        """Return all msgids extracted from module_content."""
+        filename = 'test.py'
+        with temp_cwd(None):
            with open(filename, 'w', encoding='utf-8') as fp:
                fp.write(module_content)
-            assert_python_ok('-Xutf8', self.script, '-D', filename)
+            res = assert_python_ok('-Xutf8', self.script, *args, filename)
+            if strict:
+                self.assertEqual(res.err, b'')
            with open('messages.pot', encoding='utf-8') as fp:
                data = fp.read()
        return self.get_msgids(data)

+    def extract_docstrings_from_str(self, module_content):
+        """Return all docstrings extracted from module_content."""
+        return self.extract_from_str(module_content, args=('--docstrings',), strict=False)
+
    def test_header(self):
        """Make sure the required fields are in the header, according to:
           http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
@ -344,6 +350,23 @@ class Test_pygettext(unittest.TestCase):
        self.assertNotIn('foo', msgids)
        self.assertIn('bar', msgids)

+    def test_function_and_class_names(self):
+        """Test that function and class names are not mistakenly extracted."""
+        msgids = self.extract_from_str(dedent('''\
+        def _(x):
+            pass
+
+        def _(x="foo"):
+            pass
+
+        async def _(x):
+            pass
+
+        class _(object):
+            pass
+        '''))
+        self.assertEqual(msgids, [''])
+
    def test_pygettext_output(self):
        """Test that the pygettext output exactly matches snapshots."""
        for input_file in DATA_DIR.glob('*.py'):
--- a/Misc/NEWS.d/next/Tools-Demos/2024-11-13-22-23-36.gh-issue-126807.vpaWuN.rst
+++ b/Misc/NEWS.d/next/Tools-Demos/2024-11-13-22-23-36.gh-issue-126807.vpaWuN.rst
@ -0,0 +1,2 @@
+Fix extraction warnings in :program:`pygettext.py` caused by mistaking
+function definitions for function calls.
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@ -341,6 +341,9 @@ class TokenEater:
            if ttype == tokenize.NAME and tstring in ('class', 'def'):
                self.__state = self.__suiteseen
                return
+        if ttype == tokenize.NAME and tstring in ('class', 'def'):
+            self.__state = self.__ignorenext
+            return
        if ttype == tokenize.NAME and tstring in opts.keywords:
            self.__state = self.__keywordseen
            return
@ -448,6 +451,9 @@ class TokenEater:
                }, file=sys.stderr)
            self.__state = self.__waiting

+    def __ignorenext(self, ttype, tstring, lineno):
+        self.__state = self.__waiting
+
    def __addentry(self, msg, lineno=None, isdocstring=0):
        if lineno is None:
            lineno = self.__lineno