From 8a5877165e993afb2633cd48da5222326d3f6e0e Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Fri, 22 Dec 2017 12:18:33 -0800 Subject: [PATCH] bpo-24960: use pkgutil.get_data in lib2to3 to read pickled grammar files (#4977) This is more complicated than it should be because we need to preserve the useful mtime-based regeneration feature that lib2to3.pgen2.driver.load_grammar has. We only look for the pickled grammar file with pkgutil.get_data and only if the source file does not exist. --- Lib/lib2to3/pgen2/driver.py | 21 +++++++++++++++++++ Lib/lib2to3/pgen2/grammar.py | 4 ++++ Lib/lib2to3/pygram.py | 4 ++-- Lib/lib2to3/tests/test_parser.py | 15 +++++++++++++ .../2017-12-22-09-25-51.bpo-24960.TGdAgO.rst | 3 +++ 5 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2017-12-22-09-25-51.bpo-24960.TGdAgO.rst diff --git a/Lib/lib2to3/pgen2/driver.py b/Lib/lib2to3/pgen2/driver.py index e5e4824c008..95c6692cf7c 100644 --- a/Lib/lib2to3/pgen2/driver.py +++ b/Lib/lib2to3/pgen2/driver.py @@ -20,6 +20,7 @@ import codecs import io import os import logging +import pkgutil import sys # Pgen imports @@ -140,6 +141,26 @@ def _newer(a, b): return os.path.getmtime(a) >= os.path.getmtime(b) +def load_packaged_grammar(package, grammar_source): + """Normally, loads a pickled grammar by doing + pkgutil.get_data(package, pickled_grammar) + where *pickled_grammar* is computed from *grammar_source* by adding the + Python version and using a ``.pickle`` extension. + + However, if *grammar_source* is an extant file, load_grammar(grammar_source) + is called instead. This facilities using a packaged grammar file when needed + but preserves load_grammar's automatic regeneration behavior when possible. + + """ + if os.path.isfile(grammar_source): + return load_grammar(grammar_source) + pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) + data = pkgutil.get_data(package, pickled_name) + g = grammar.Grammar() + g.loads(data) + return g + + def main(*args): """Main program, when run as a script: produce grammar pickle files. diff --git a/Lib/lib2to3/pgen2/grammar.py b/Lib/lib2to3/pgen2/grammar.py index c10dcfa9ac2..088c58bfa99 100644 --- a/Lib/lib2to3/pgen2/grammar.py +++ b/Lib/lib2to3/pgen2/grammar.py @@ -108,6 +108,10 @@ class Grammar(object): d = pickle.load(f) self.__dict__.update(d) + def loads(self, pkl): + """Load the grammar tables from a pickle bytes object.""" + self.__dict__.update(pickle.loads(pkl)) + def copy(self): """ Copy the grammar. diff --git a/Lib/lib2to3/pygram.py b/Lib/lib2to3/pygram.py index 01fa1087115..919624eb399 100644 --- a/Lib/lib2to3/pygram.py +++ b/Lib/lib2to3/pygram.py @@ -29,12 +29,12 @@ class Symbols(object): setattr(self, name, symbol) -python_grammar = driver.load_grammar(_GRAMMAR_FILE) +python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE) python_symbols = Symbols(python_grammar) python_grammar_no_print_statement = python_grammar.copy() del python_grammar_no_print_statement.keywords["print"] -pattern_grammar = driver.load_grammar(_PATTERN_GRAMMAR_FILE) +pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE) pattern_symbols = Symbols(pattern_grammar) diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py index dc94a69036a..ed1ef836463 100644 --- a/Lib/lib2to3/tests/test_parser.py +++ b/Lib/lib2to3/tests/test_parser.py @@ -12,7 +12,10 @@ from .support import driver from test.support import verbose # Python imports +import importlib +import operator import os +import pickle import shutil import subprocess import sys @@ -99,6 +102,18 @@ pgen2_driver.load_grammar(%r, save=True, force=True) finally: shutil.rmtree(tmpdir) + def test_load_packaged_grammar(self): + modname = __name__ + '.load_test' + class MyLoader: + def get_data(self, where): + return pickle.dumps({'elephant': 19}) + class MyModule: + __file__ = 'parsertestmodule' + __spec__ = importlib.util.spec_from_loader(modname, MyLoader()) + sys.modules[modname] = MyModule() + self.addCleanup(operator.delitem, sys.modules, modname) + g = pgen2_driver.load_packaged_grammar(modname, 'Grammar.txt') + self.assertEqual(g.elephant, 19) class GrammarTest(support.TestCase): diff --git a/Misc/NEWS.d/next/Tools-Demos/2017-12-22-09-25-51.bpo-24960.TGdAgO.rst b/Misc/NEWS.d/next/Tools-Demos/2017-12-22-09-25-51.bpo-24960.TGdAgO.rst new file mode 100644 index 00000000000..6f9d83c5772 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2017-12-22-09-25-51.bpo-24960.TGdAgO.rst @@ -0,0 +1,3 @@ +2to3 and lib2to3 can now read pickled grammar files using pkgutil.get_data() +rather than probing the filesystem. This lets 2to3 and lib2to3 work when run +from a zipfile.