From dce642f24418c58e67fa31a686575c980c31dd37 Mon Sep 17 00:00:00 2001 From: Ben Kehoe Date: Tue, 11 Jan 2022 12:15:42 -0700 Subject: [PATCH] bpo-46307: Add string.Template.get_identifiers() method (GH-30493) Add `string.Template.get_identifiers()` method that returns the identifiers within the template. By default, raises an error if it encounters an invalid identifier (like `substitute()`). The keyword-only argument `raise_on_invalid` can be set to `False` to ignore invalid identifiers (like `safe_substitute()`). Automerge-Triggered-By: GH:warsaw --- Doc/library/string.rst | 19 +++++++ Lib/string.py | 29 +++++++++++ Lib/test/test_string.py | 51 +++++++++++++++++++ .../2022-01-10-07-51-43.bpo-46307.SKvOIY.rst | 1 + 4 files changed, 100 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-01-10-07-51-43.bpo-46307.SKvOIY.rst diff --git a/Doc/library/string.rst b/Doc/library/string.rst index b27782f8d8e..9bc703e70cd 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -783,6 +783,22 @@ these rules. The methods of :class:`Template` are: templates containing dangling delimiters, unmatched braces, or placeholders that are not valid Python identifiers. + + .. method:: is_valid() + + Returns false if the template has invalid placeholders that will cause + :meth:`substitute` to raise :exc:`ValueError`. + + .. versionadded:: 3.11 + + + .. method:: get_identifiers() + + Returns a list of the valid identifiers in the template, in the order + they first appear, ignoring any invalid identifiers. + + .. versionadded:: 3.11 + :class:`Template` instances also provide one public data attribute: .. attribute:: template @@ -869,6 +885,9 @@ rule: * *invalid* -- This group matches any other delimiter pattern (usually a single delimiter), and it should appear last in the regular expression. +The methods on this class will raise :exc:`ValueError` if the pattern matches +the template without one of these named groups matching. + Helper functions ---------------- diff --git a/Lib/string.py b/Lib/string.py index 261789cc10a..2eab6d4f595 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -141,6 +141,35 @@ class Template: self.pattern) return self.pattern.sub(convert, self.template) + def is_valid(self): + for mo in self.pattern.finditer(self.template): + if mo.group('invalid') is not None: + return False + if (mo.group('named') is None + and mo.group('braced') is None + and mo.group('escaped') is None): + # If all the groups are None, there must be + # another group we're not expecting + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return True + + def get_identifiers(self): + ids = [] + for mo in self.pattern.finditer(self.template): + named = mo.group('named') or mo.group('braced') + if named is not None and named not in ids: + # add a named group only the first time it appears + ids.append(named) + elif (named is None + and mo.group('invalid') is None + and mo.group('escaped') is None): + # If all the groups are None, there must be + # another group we're not expecting + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return ids + # Initialize Template.pattern. __init_subclass__() is automatically called # only for subclasses, not for the Template class itself. Template.__init_subclass__() diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py index 0be28fdb609..824b89ad517 100644 --- a/Lib/test/test_string.py +++ b/Lib/test/test_string.py @@ -475,6 +475,57 @@ class TestTemplate(unittest.TestCase): self.assertEqual(s.substitute(dict(who='tim', what='ham')), 'tim likes to eat a bag of ham worth $100') + def test_is_valid(self): + eq = self.assertEqual + s = Template('$who likes to eat a bag of ${what} worth $$100') + self.assertTrue(s.is_valid()) + + s = Template('$who likes to eat a bag of ${what} worth $100') + self.assertFalse(s.is_valid()) + + # if the pattern has an unrecognized capture group, + # it should raise ValueError like substitute and safe_substitute do + class BadPattern(Template): + pattern = r""" + (?P.*) | + (?P@{2}) | + @(?P[_a-z][._a-z0-9]*) | + @{(?P[_a-z][._a-z0-9]*)} | + (?P@) | + """ + s = BadPattern('@bag.foo.who likes to eat a bag of @bag.what') + self.assertRaises(ValueError, s.is_valid) + + def test_get_identifiers(self): + eq = self.assertEqual + raises = self.assertRaises + s = Template('$who likes to eat a bag of ${what} worth $$100') + ids = s.get_identifiers() + eq(ids, ['who', 'what']) + + # repeated identifiers only included once + s = Template('$who likes to eat a bag of ${what} worth $$100; ${who} likes to eat a bag of $what worth $$100') + ids = s.get_identifiers() + eq(ids, ['who', 'what']) + + # invalid identifiers are ignored + s = Template('$who likes to eat a bag of ${what} worth $100') + ids = s.get_identifiers() + eq(ids, ['who', 'what']) + + # if the pattern has an unrecognized capture group, + # it should raise ValueError like substitute and safe_substitute do + class BadPattern(Template): + pattern = r""" + (?P.*) | + (?P@{2}) | + @(?P[_a-z][._a-z0-9]*) | + @{(?P[_a-z][._a-z0-9]*)} | + (?P@) | + """ + s = BadPattern('@bag.foo.who likes to eat a bag of @bag.what') + self.assertRaises(ValueError, s.get_identifiers) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-01-10-07-51-43.bpo-46307.SKvOIY.rst b/Misc/NEWS.d/next/Library/2022-01-10-07-51-43.bpo-46307.SKvOIY.rst new file mode 100644 index 00000000000..6207c424ce9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-01-10-07-51-43.bpo-46307.SKvOIY.rst @@ -0,0 +1 @@ +Add :meth:`string.Template.is_valid` and :meth:`string.Template.get_identifiers` methods.