From 37e0c7850de902179b28f1378fbbc38a5ed3628c Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 2 May 2021 17:03:40 -0400 Subject: [PATCH] bpo-43926: Cleaner metadata with PEP 566 JSON support. (GH-25565) * bpo-43926: Cleaner metadata with PEP 566 JSON support. * Add blurb * Add versionchanged and versionadded declarations for changes to metadata. * Use descriptor for PEP 566 --- Doc/library/importlib.metadata.rst | 13 +++ .../{metadata.py => metadata/__init__.py} | 28 +----- Lib/importlib/metadata/_adapters.py | 67 +++++++++++++ Lib/importlib/{ => metadata}/_collections.py | 0 Lib/importlib/{ => metadata}/_functools.py | 0 Lib/importlib/{ => metadata}/_itertools.py | 0 Lib/importlib/metadata/_meta.py | 29 ++++++ Lib/importlib/metadata/_text.py | 99 +++++++++++++++++++ Lib/test/test_importlib/fixtures.py | 11 +++ Lib/test/test_importlib/test_main.py | 6 +- Lib/test/test_importlib/test_metadata_api.py | 23 +++++ .../2021-04-23-17-48-55.bpo-43926.HMUlGU.rst | 4 + 12 files changed, 254 insertions(+), 26 deletions(-) rename Lib/importlib/{metadata.py => metadata/__init__.py} (97%) create mode 100644 Lib/importlib/metadata/_adapters.py rename Lib/importlib/{ => metadata}/_collections.py (100%) rename Lib/importlib/{ => metadata}/_functools.py (100%) rename Lib/importlib/{ => metadata}/_itertools.py (100%) create mode 100644 Lib/importlib/metadata/_meta.py create mode 100644 Lib/importlib/metadata/_text.py create mode 100644 Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index 40e48d1beec..9bedee5af28 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -170,6 +170,19 @@ the values are returned unparsed from the distribution metadata:: >>> wheel_metadata['Requires-Python'] # doctest: +SKIP '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*' +``PackageMetadata`` also presents a ``json`` attribute that returns +all the metadata in a JSON-compatible form per :PEP:`566`:: + + >>> wheel_metadata.json['requires_python'] + '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*' + +.. versionchanged:: 3.10 + The ``Description`` is now included in the metadata when presented + through the payload. Line continuation characters have been removed. + +.. versionadded:: 3.10 + The ``json`` attribute was added. + .. _version: diff --git a/Lib/importlib/metadata.py b/Lib/importlib/metadata/__init__.py similarity index 97% rename from Lib/importlib/metadata.py rename to Lib/importlib/metadata/__init__.py index 7a427eb3b28..142162196fa 100644 --- a/Lib/importlib/metadata.py +++ b/Lib/importlib/metadata/__init__.py @@ -14,6 +14,7 @@ import itertools import posixpath import collections +from . import _adapters, _meta from ._collections import FreezableDefaultDict, Pair from ._functools import method_cache from ._itertools import unique_everseen @@ -22,7 +23,7 @@ from contextlib import suppress from importlib import import_module from importlib.abc import MetaPathFinder from itertools import starmap -from typing import Any, List, Mapping, Optional, Protocol, TypeVar, Union +from typing import List, Mapping, Optional, Union __all__ = [ @@ -385,25 +386,6 @@ class FileHash: return ''.format(self.mode, self.value) -_T = TypeVar("_T") - - -class PackageMetadata(Protocol): - def __len__(self) -> int: - ... # pragma: no cover - - def __contains__(self, item: str) -> bool: - ... # pragma: no cover - - def __getitem__(self, key: str) -> str: - ... # pragma: no cover - - def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: - """ - Return all values associated with a possibly multi-valued key. - """ - - class Distribution: """A Python distribution package.""" @@ -488,7 +470,7 @@ class Distribution: return PathDistribution(zipfile.Path(meta.build_as_zip(builder))) @property - def metadata(self) -> PackageMetadata: + def metadata(self) -> _meta.PackageMetadata: """Return the parsed metadata for this Distribution. The returned object will have keys that name the various bits of @@ -502,7 +484,7 @@ class Distribution: # (which points to the egg-info file) attribute unchanged. or self.read_text('') ) - return email.message_from_string(text) + return _adapters.Message(email.message_from_string(text)) @property def name(self): @@ -829,7 +811,7 @@ def distributions(**kwargs): return Distribution.discover(**kwargs) -def metadata(distribution_name) -> PackageMetadata: +def metadata(distribution_name) -> _meta.PackageMetadata: """Get the metadata for the named package. :param distribution_name: The name of the distribution package to query. diff --git a/Lib/importlib/metadata/_adapters.py b/Lib/importlib/metadata/_adapters.py new file mode 100644 index 00000000000..ab086180fc3 --- /dev/null +++ b/Lib/importlib/metadata/_adapters.py @@ -0,0 +1,67 @@ +import re +import textwrap +import email.message + +from ._text import FoldedCase + + +class Message(email.message.Message): + multiple_use_keys = set( + map( + FoldedCase, + [ + 'Classifier', + 'Obsoletes-Dist', + 'Platform', + 'Project-URL', + 'Provides-Dist', + 'Provides-Extra', + 'Requires-Dist', + 'Requires-External', + 'Supported-Platform', + ], + ) + ) + """ + Keys that may be indicated multiple times per PEP 566. + """ + + def __new__(cls, orig: email.message.Message): + res = super().__new__(cls) + vars(res).update(vars(orig)) + return res + + def __init__(self, *args, **kwargs): + self._headers = self._repair_headers() + + # suppress spurious error from mypy + def __iter__(self): + return super().__iter__() + + def _repair_headers(self): + def redent(value): + "Correct for RFC822 indentation" + if not value or '\n' not in value: + return value + return textwrap.dedent(' ' * 8 + value) + + headers = [(key, redent(value)) for key, value in vars(self)['_headers']] + if self._payload: + headers.append(('Description', self.get_payload())) + return headers + + @property + def json(self): + """ + Convert PackageMetadata to a JSON-compatible format + per PEP 0566. + """ + + def transform(key): + value = self.get_all(key) if key in self.multiple_use_keys else self[key] + if key == 'Keywords': + value = re.split(r'\s+', value) + tk = key.lower().replace('-', '_') + return tk, value + + return dict(map(transform, map(FoldedCase, self))) diff --git a/Lib/importlib/_collections.py b/Lib/importlib/metadata/_collections.py similarity index 100% rename from Lib/importlib/_collections.py rename to Lib/importlib/metadata/_collections.py diff --git a/Lib/importlib/_functools.py b/Lib/importlib/metadata/_functools.py similarity index 100% rename from Lib/importlib/_functools.py rename to Lib/importlib/metadata/_functools.py diff --git a/Lib/importlib/_itertools.py b/Lib/importlib/metadata/_itertools.py similarity index 100% rename from Lib/importlib/_itertools.py rename to Lib/importlib/metadata/_itertools.py diff --git a/Lib/importlib/metadata/_meta.py b/Lib/importlib/metadata/_meta.py new file mode 100644 index 00000000000..04d9a023536 --- /dev/null +++ b/Lib/importlib/metadata/_meta.py @@ -0,0 +1,29 @@ +from typing import Any, Dict, Iterator, List, Protocol, TypeVar, Union + + +_T = TypeVar("_T") + + +class PackageMetadata(Protocol): + def __len__(self) -> int: + ... # pragma: no cover + + def __contains__(self, item: str) -> bool: + ... # pragma: no cover + + def __getitem__(self, key: str) -> str: + ... # pragma: no cover + + def __iter__(self) -> Iterator[str]: + ... # pragma: no cover + + def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: + """ + Return all values associated with a possibly multi-valued key. + """ + + @property + def json(self) -> Dict[str, Union[str, List[str]]]: + """ + A JSON-compatible form of the metadata. + """ diff --git a/Lib/importlib/metadata/_text.py b/Lib/importlib/metadata/_text.py new file mode 100644 index 00000000000..766979d93c1 --- /dev/null +++ b/Lib/importlib/metadata/_text.py @@ -0,0 +1,99 @@ +import re + +from ._functools import method_cache + + +# from jaraco.text 3.5 +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use in_: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super(FoldedCase, self).lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super(FoldedCase, self).lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) diff --git a/Lib/test/test_importlib/fixtures.py b/Lib/test/test_importlib/fixtures.py index 1ae70c70f10..12ed07d3374 100644 --- a/Lib/test/test_importlib/fixtures.py +++ b/Lib/test/test_importlib/fixtures.py @@ -1,5 +1,6 @@ import os import sys +import copy import shutil import pathlib import tempfile @@ -108,6 +109,16 @@ class DistInfoPkg(OnSysPath, SiteDir): super(DistInfoPkg, self).setUp() build_files(DistInfoPkg.files, self.site_dir) + def make_uppercase(self): + """ + Rewrite metadata with everything uppercase. + """ + shutil.rmtree(self.site_dir / "distinfo_pkg-1.0.0.dist-info") + files = copy.deepcopy(DistInfoPkg.files) + info = files["distinfo_pkg-1.0.0.dist-info"] + info["METADATA"] = info["METADATA"].upper() + build_files(files, self.site_dir) + class DistInfoPkgWithDot(OnSysPath, SiteDir): files: FilesDef = { diff --git a/Lib/test/test_importlib/test_main.py b/Lib/test/test_importlib/test_main.py index 08069c9a5de..52cb63712a5 100644 --- a/Lib/test/test_importlib/test_main.py +++ b/Lib/test/test_importlib/test_main.py @@ -125,7 +125,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): metadata_dir.mkdir() metadata = metadata_dir / 'METADATA' with metadata.open('w', encoding='utf-8') as fp: - fp.write('Description: pôrˈtend\n') + fp.write('Description: pôrˈtend') return 'portend' @staticmethod @@ -145,7 +145,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): pôrˈtend """ - ).lstrip() + ).strip() ) return 'portend' @@ -157,7 +157,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): def test_metadata_loads_egg_info(self): pkg_name = self.pkg_with_non_ascii_description_egg_info(self.site_dir) meta = metadata(pkg_name) - assert meta.get_payload() == 'pôrˈtend\n' + assert meta['Description'] == 'pôrˈtend' class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase): diff --git a/Lib/test/test_importlib/test_metadata_api.py b/Lib/test/test_importlib/test_metadata_api.py index 657c16603f6..825edc10f12 100644 --- a/Lib/test/test_importlib/test_metadata_api.py +++ b/Lib/test/test_importlib/test_metadata_api.py @@ -231,6 +231,29 @@ class APITests( assert deps == expected + def test_as_json(self): + md = metadata('distinfo-pkg').json + assert 'name' in md + assert md['keywords'] == ['sample', 'package'] + desc = md['description'] + assert desc.startswith('Once upon a time\nThere was') + assert len(md['requires_dist']) == 2 + + def test_as_json_egg_info(self): + md = metadata('egginfo-pkg').json + assert 'name' in md + assert md['keywords'] == ['sample', 'package'] + desc = md['description'] + assert desc.startswith('Once upon a time\nThere was') + assert len(md['classifier']) == 2 + + def test_as_json_odd_case(self): + self.make_uppercase() + md = metadata('distinfo-pkg').json + assert 'name' in md + assert len(md['requires_dist']) == 2 + assert md['keywords'] == ['SAMPLE', 'PACKAGE'] + class LegacyDots(fixtures.DistInfoPkgWithDotLegacy, unittest.TestCase): def test_name_normalization(self): diff --git a/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst b/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst new file mode 100644 index 00000000000..45f29a84cd5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst @@ -0,0 +1,4 @@ +In ``importlib.metadata``, provide a uniform interface to ``Description``, +allow for any field to be encoded with multiline values, remove continuation +lines from multiline values, and add a ``.json`` property for easy access to +the PEP 566 JSON-compatible form. Sync with ``importlib_metadata 4.0``.