mirror of
https://github.com/python/cpython.git
synced 2024-11-21 12:59:38 +01:00
gh-121267: Improve performance of tarfile (#121267) (#121269)
Tarfile in the default write mode spends much of its time resolving UIDs into usernames and GIDs into group names. By caching these mappings, a significant speedup can be achieved. In my simple benchmark[1], this extra caching speeds up tarfile by 8x. [1] https://gist.github.com/jforberg/86af759c796199740c31547ae828aef2 --------- Co-authored-by: Tian Gao <gaogaotiantian@hotmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
This commit is contained in:
parent
616468b87b
commit
2b2d607095
@ -1760,6 +1760,8 @@ class TarFile(object):
|
||||
# current position in the archive file
|
||||
self.inodes = {} # dictionary caching the inodes of
|
||||
# archive members already added
|
||||
self._unames = {} # Cached mappings of uid -> uname
|
||||
self._gnames = {} # Cached mappings of gid -> gname
|
||||
|
||||
try:
|
||||
if self.mode == "r":
|
||||
@ -2138,16 +2140,23 @@ class TarFile(object):
|
||||
tarinfo.mtime = statres.st_mtime
|
||||
tarinfo.type = type
|
||||
tarinfo.linkname = linkname
|
||||
|
||||
# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To
|
||||
# speed things up, cache the resolved usernames and group names.
|
||||
if pwd:
|
||||
try:
|
||||
tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
|
||||
except KeyError:
|
||||
pass
|
||||
if tarinfo.uid not in self._unames:
|
||||
try:
|
||||
self._unames[tarinfo.uid] = pwd.getpwuid(tarinfo.uid)[0]
|
||||
except KeyError:
|
||||
self._unames[tarinfo.uid] = ''
|
||||
tarinfo.uname = self._unames[tarinfo.uid]
|
||||
if grp:
|
||||
try:
|
||||
tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
|
||||
except KeyError:
|
||||
pass
|
||||
if tarinfo.gid not in self._gnames:
|
||||
try:
|
||||
self._gnames[tarinfo.gid] = grp.getgrgid(tarinfo.gid)[0]
|
||||
except KeyError:
|
||||
self._gnames[tarinfo.gid] = ''
|
||||
tarinfo.gname = self._gnames[tarinfo.gid]
|
||||
|
||||
if type in (CHRTYPE, BLKTYPE):
|
||||
if hasattr(os, "major") and hasattr(os, "minor"):
|
||||
|
@ -0,0 +1,2 @@
|
||||
Improve the performance of :mod:`tarfile` when writing files, by caching user names
|
||||
and group names.
|
Loading…
Reference in New Issue
Block a user