From dcd58c50844dae0d83517e88518a677914ea594b Mon Sep 17 00:00:00 2001 From: "Bernhard M. Wiedemann" Date: Sat, 12 Oct 2024 19:18:48 +0200 Subject: [PATCH] gh-125260: Change the default ``gzip.compress()`` mtime to 0 (#125261) This follows GNU gzip, which defaults to using 0 as the mtime for compressing stdin, where no file mtime is involved. This makes the output of gzip.compress() deterministic by default, greatly helping reproducible builds. Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/gzip.rst | 9 +++++++-- Lib/gzip.py | 6 +++--- Lib/test/test_gzip.py | 11 +++++++++++ .../2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst | 2 ++ 4 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 6b6e158f6eb..f24e73517e5 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -184,11 +184,12 @@ The module defines the following items: attribute instead. -.. function:: compress(data, compresslevel=9, *, mtime=None) +.. function:: compress(data, compresslevel=9, *, mtime=0) Compress the *data*, returning a :class:`bytes` object containing the compressed data. *compresslevel* and *mtime* have the same meaning as in - the :class:`GzipFile` constructor above. + the :class:`GzipFile` constructor above, + but *mtime* defaults to 0 for reproducible output. .. versionadded:: 3.2 .. versionchanged:: 3.8 @@ -203,6 +204,10 @@ The module defines the following items: .. versionchanged:: 3.13 The gzip header OS byte is guaranteed to be set to 255 when this function is used as was the case in 3.10 and earlier. + .. versionchanged:: 3.14 + The *mtime* parameter now defaults to 0 for reproducible output. + For the previous behaviour of using the current time, + pass ``None`` to *mtime*. .. function:: decompress(data) diff --git a/Lib/gzip.py b/Lib/gzip.py index ba753ce3050..1a3c82ce7e0 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -580,12 +580,12 @@ class _GzipReader(_compression.DecompressReader): self._new_member = True -def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): +def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0): """Compress data in one shot and return the compressed string. compresslevel sets the compression level in range of 0-9. - mtime can be used to set the modification time. The modification time is - set to the current time by default. + mtime can be used to set the modification time. + The modification time is set to 0 by default, for reproducibility. """ # Wbits=31 automatically includes a gzip header and trailer. gzip_data = zlib.compress(data, level=compresslevel, wbits=31) diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index ae384c3849d..bf6e1703db8 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -713,6 +713,17 @@ class TestGzip(BaseTest): f.read(1) # to set mtime attribute self.assertEqual(f.mtime, mtime) + def test_compress_mtime_default(self): + # test for gh-125260 + datac = gzip.compress(data1, mtime=0) + datac2 = gzip.compress(data1) + self.assertEqual(datac, datac2) + datac3 = gzip.compress(data1, mtime=None) + self.assertNotEqual(datac, datac3) + with gzip.GzipFile(fileobj=io.BytesIO(datac3), mode="rb") as f: + f.read(1) # to set mtime attribute + self.assertGreater(f.mtime, 1) + def test_compress_correct_level(self): for mtime in (0, 42): with self.subTest(mtime=mtime): diff --git a/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst b/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst new file mode 100644 index 00000000000..fab524ea018 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst @@ -0,0 +1,2 @@ +The :func:`gzip.compress` *mtime* parameter now defaults to 0 for reproducible output. +Patch by Bernhard M. Wiedemann and Adam Turner.