gh-125260: Change the default `gzip.compress()` mtime to 0 (#125261)

This follows GNU gzip, which defaults to using 0 as the mtime
for compressing stdin, where no file mtime is involved.

This makes the output of gzip.compress() deterministic by default,
greatly helping reproducible builds.

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
Bernhard M. Wiedemann 2024-10-12 19:18:48 +02:00 committed by GitHub
parent 9944ad388c
commit dcd58c5084
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 23 additions and 5 deletions

View File

@ -184,11 +184,12 @@ The module defines the following items:
attribute instead.
.. function:: compress(data, compresslevel=9, *, mtime=None)
.. function:: compress(data, compresslevel=9, *, mtime=0)
Compress the *data*, returning a :class:`bytes` object containing
the compressed data. *compresslevel* and *mtime* have the same meaning as in
the :class:`GzipFile` constructor above.
the :class:`GzipFile` constructor above,
but *mtime* defaults to 0 for reproducible output.
.. versionadded:: 3.2
.. versionchanged:: 3.8
@ -203,6 +204,10 @@ The module defines the following items:
.. versionchanged:: 3.13
The gzip header OS byte is guaranteed to be set to 255 when this function
is used as was the case in 3.10 and earlier.
.. versionchanged:: 3.14
The *mtime* parameter now defaults to 0 for reproducible output.
For the previous behaviour of using the current time,
pass ``None`` to *mtime*.
.. function:: decompress(data)

View File

@ -580,12 +580,12 @@ class _GzipReader(_compression.DecompressReader):
self._new_member = True
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0):
"""Compress data in one shot and return the compressed string.
compresslevel sets the compression level in range of 0-9.
mtime can be used to set the modification time. The modification time is
set to the current time by default.
mtime can be used to set the modification time.
The modification time is set to 0 by default, for reproducibility.
"""
# Wbits=31 automatically includes a gzip header and trailer.
gzip_data = zlib.compress(data, level=compresslevel, wbits=31)

View File

@ -713,6 +713,17 @@ class TestGzip(BaseTest):
f.read(1) # to set mtime attribute
self.assertEqual(f.mtime, mtime)
def test_compress_mtime_default(self):
# test for gh-125260
datac = gzip.compress(data1, mtime=0)
datac2 = gzip.compress(data1)
self.assertEqual(datac, datac2)
datac3 = gzip.compress(data1, mtime=None)
self.assertNotEqual(datac, datac3)
with gzip.GzipFile(fileobj=io.BytesIO(datac3), mode="rb") as f:
f.read(1) # to set mtime attribute
self.assertGreater(f.mtime, 1)
def test_compress_correct_level(self):
for mtime in (0, 42):
with self.subTest(mtime=mtime):

View File

@ -0,0 +1,2 @@
The :func:`gzip.compress` *mtime* parameter now defaults to 0 for reproducible output.
Patch by Bernhard M. Wiedemann and Adam Turner.