mirror of
https://github.com/python/cpython.git
synced 2024-11-24 02:15:30 +08:00
gh-102120: [TarFile] Add an iter function that doesn't cache (GH-102128)
This commit is contained in:
parent
097b7830cd
commit
50fce89d12
@ -318,7 +318,7 @@ be finalized; only the internally used file object will be closed. See the
|
||||
.. versionadded:: 3.2
|
||||
Added support for the context management protocol.
|
||||
|
||||
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1)
|
||||
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False)
|
||||
|
||||
All following arguments are optional and can be accessed as instance attributes
|
||||
as well.
|
||||
@ -369,6 +369,9 @@ be finalized; only the internally used file object will be closed. See the
|
||||
The *pax_headers* argument is an optional dictionary of strings which
|
||||
will be added as a pax global header if *format* is :const:`PAX_FORMAT`.
|
||||
|
||||
If *stream* is set to :const:`True` then while reading the archive info about files
|
||||
in the archive are not cached, saving memory.
|
||||
|
||||
.. versionchanged:: 3.2
|
||||
Use ``'surrogateescape'`` as the default for the *errors* argument.
|
||||
|
||||
@ -378,6 +381,8 @@ be finalized; only the internally used file object will be closed. See the
|
||||
.. versionchanged:: 3.6
|
||||
The *name* parameter accepts a :term:`path-like object`.
|
||||
|
||||
.. versionchanged:: 3.13
|
||||
Add the *stream* parameter.
|
||||
|
||||
.. classmethod:: TarFile.open(...)
|
||||
|
||||
|
@ -1633,7 +1633,7 @@ class TarFile(object):
|
||||
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
||||
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
||||
errors="surrogateescape", pax_headers=None, debug=None,
|
||||
errorlevel=None, copybufsize=None):
|
||||
errorlevel=None, copybufsize=None, stream=False):
|
||||
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
|
||||
read from an existing archive, 'a' to append data to an existing
|
||||
file or 'w' to create a new file overwriting an existing one. `mode'
|
||||
@ -1665,6 +1665,8 @@ class TarFile(object):
|
||||
self.name = os.path.abspath(name) if name else None
|
||||
self.fileobj = fileobj
|
||||
|
||||
self.stream = stream
|
||||
|
||||
# Init attributes.
|
||||
if format is not None:
|
||||
self.format = format
|
||||
@ -2631,7 +2633,9 @@ class TarFile(object):
|
||||
break
|
||||
|
||||
if tarinfo is not None:
|
||||
self.members.append(tarinfo)
|
||||
# if streaming the file we do not want to cache the tarinfo
|
||||
if not self.stream:
|
||||
self.members.append(tarinfo)
|
||||
else:
|
||||
self._loaded = True
|
||||
|
||||
@ -2682,11 +2686,12 @@ class TarFile(object):
|
||||
|
||||
def _load(self):
|
||||
"""Read through the entire archive file and look for readable
|
||||
members.
|
||||
members. This should not run if the file is set to stream.
|
||||
"""
|
||||
while self.next() is not None:
|
||||
pass
|
||||
self._loaded = True
|
||||
if not self.stream:
|
||||
while self.next() is not None:
|
||||
pass
|
||||
self._loaded = True
|
||||
|
||||
def _check(self, mode=None):
|
||||
"""Check if TarFile is still open, and if the operation's mode
|
||||
|
@ -100,6 +100,14 @@ class ReadTest(TarTest):
|
||||
def tearDown(self):
|
||||
self.tar.close()
|
||||
|
||||
class StreamModeTest(ReadTest):
|
||||
|
||||
# Only needs to change how the tarfile is opened to set
|
||||
# stream mode
|
||||
def setUp(self):
|
||||
self.tar = tarfile.open(self.tarname, mode=self.mode,
|
||||
encoding="iso8859-1",
|
||||
stream=True)
|
||||
|
||||
class UstarReadTest(ReadTest, unittest.TestCase):
|
||||
|
||||
@ -852,6 +860,21 @@ class Bz2StreamReadTest(Bz2Test, StreamReadTest):
|
||||
class LzmaStreamReadTest(LzmaTest, StreamReadTest):
|
||||
pass
|
||||
|
||||
class TarStreamModeReadTest(StreamModeTest, unittest.TestCase):
|
||||
|
||||
def test_stream_mode_no_cache(self):
|
||||
for _ in self.tar:
|
||||
pass
|
||||
self.assertEqual(self.tar.members, [])
|
||||
|
||||
class GzipStreamModeReadTest(GzipTest, TarStreamModeReadTest):
|
||||
pass
|
||||
|
||||
class Bz2StreamModeReadTest(Bz2Test, TarStreamModeReadTest):
|
||||
pass
|
||||
|
||||
class LzmaStreamModeReadTest(LzmaTest, TarStreamModeReadTest):
|
||||
pass
|
||||
|
||||
class DetectReadTest(TarTest, unittest.TestCase):
|
||||
def _testfunc_file(self, name, mode):
|
||||
|
@ -0,0 +1,2 @@
|
||||
Added a stream mode to ``tarfile`` that allows for reading
|
||||
archives without caching info about the inner files.
|
Loading…
Reference in New Issue
Block a user