Patch #943206:
`glob.glob()` currently calls itself recursively to build a list of matches of the dirname part of the pattern and then filters by the basename part. This is effectively BFS. ``glob.glob('*/*/*/*/*/foo')`` will build a huge list of all directories 5 levels deep even if only a handful of them contain a ``foo`` entry. A generator-based recusion would never have to store these list at once by implementing DFS. This patch converts the `glob` function to an `iglob` recursive generator . `glob()` now just returns ``list(iglob(pattern))``. I also cleaned up the code a bit (reduced duplicate `has_magic()` checks and created a second `glob0` helper func so that the main loop need not be duplicated). Thanks to Cherniavsky Beni for the patch!
This commit is contained in:
parent
e4172eadf3
commit
836f5433f7
|
@ -16,7 +16,7 @@ and \function{os.path.expandvars()}.)
|
|||
\index{filenames!pathname expansion}
|
||||
|
||||
\begin{funcdesc}{glob}{pathname}
|
||||
Returns a possibly-empty list of path names that match \var{pathname},
|
||||
Return a possibly-empty list of path names that match \var{pathname},
|
||||
which must be a string containing a path specification.
|
||||
\var{pathname} can be either absolute (like
|
||||
\file{/usr/src/Python-1.5/Makefile}) or relative (like
|
||||
|
@ -24,6 +24,12 @@ which must be a string containing a path specification.
|
|||
Broken symlinks are included in the results (as in the shell).
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{iglob}{pathname}
|
||||
Return an iterator which yields the same values as \function{glob()}
|
||||
without actually storing them all simultaneously.
|
||||
\versionadded{2.5}
|
||||
\end{funcdesc}
|
||||
|
||||
For example, consider a directory containing only the following files:
|
||||
\file{1.gif}, \file{2.txt}, and \file{card.gif}. \function{glob()}
|
||||
will produce the following results. Notice how any leading components
|
||||
|
|
62
Lib/glob.py
62
Lib/glob.py
|
@ -4,43 +4,50 @@ import os
|
|||
import fnmatch
|
||||
import re
|
||||
|
||||
__all__ = ["glob"]
|
||||
__all__ = ["glob", "iglob"]
|
||||
|
||||
def glob(pathname):
|
||||
"""Return a list of paths matching a pathname pattern.
|
||||
|
||||
The pattern may contain simple shell-style wildcards a la fnmatch.
|
||||
|
||||
"""
|
||||
return list(iglob(pathname))
|
||||
|
||||
def iglob(pathname):
|
||||
"""Return a list of paths matching a pathname pattern.
|
||||
|
||||
The pattern may contain simple shell-style wildcards a la fnmatch.
|
||||
|
||||
"""
|
||||
if not has_magic(pathname):
|
||||
if os.path.lexists(pathname):
|
||||
return [pathname]
|
||||
else:
|
||||
return []
|
||||
yield pathname
|
||||
return
|
||||
dirname, basename = os.path.split(pathname)
|
||||
if not dirname:
|
||||
return glob1(os.curdir, basename)
|
||||
elif has_magic(dirname):
|
||||
list = glob(dirname)
|
||||
for name in glob1(os.curdir, basename):
|
||||
yield name
|
||||
return
|
||||
if has_magic(dirname):
|
||||
dirs = iglob(dirname)
|
||||
else:
|
||||
list = [dirname]
|
||||
if not has_magic(basename):
|
||||
result = []
|
||||
for dirname in list:
|
||||
if basename or os.path.isdir(dirname):
|
||||
name = os.path.join(dirname, basename)
|
||||
if os.path.lexists(name):
|
||||
result.append(name)
|
||||
dirs = [dirname]
|
||||
if has_magic(basename):
|
||||
glob_in_dir = glob1
|
||||
else:
|
||||
result = []
|
||||
for dirname in list:
|
||||
sublist = glob1(dirname, basename)
|
||||
for name in sublist:
|
||||
result.append(os.path.join(dirname, name))
|
||||
return result
|
||||
glob_in_dir = glob0
|
||||
for dirname in dirs:
|
||||
for name in glob_in_dir(dirname, basename):
|
||||
yield os.path.join(dirname, name)
|
||||
|
||||
# These 2 helper functions non-recursively glob inside a literal directory.
|
||||
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
|
||||
# takes a literal basename (so it only has to check for its existence).
|
||||
|
||||
def glob1(dirname, pattern):
|
||||
if not dirname: dirname = os.curdir
|
||||
if not dirname:
|
||||
dirname = os.curdir
|
||||
try:
|
||||
names = os.listdir(dirname)
|
||||
except os.error:
|
||||
|
@ -49,6 +56,17 @@ def glob1(dirname, pattern):
|
|||
names=filter(lambda x: x[0]!='.',names)
|
||||
return fnmatch.filter(names,pattern)
|
||||
|
||||
def glob0(dirname, basename):
|
||||
if basename == '':
|
||||
# `os.path.split()` returns an empty basename for paths ending with a
|
||||
# directory separator. 'q*x/' should match only directories.
|
||||
if os.isdir(dirname):
|
||||
return [basename]
|
||||
else:
|
||||
if os.path.lexists(os.path.join(dirname, basename)):
|
||||
return [basename]
|
||||
return []
|
||||
|
||||
|
||||
magic_check = re.compile('[*?[]')
|
||||
|
||||
|
|
|
@ -61,7 +61,9 @@ class GlobTests(unittest.TestCase):
|
|||
else:
|
||||
pattern = os.path.join(*parts)
|
||||
p = os.path.join(self.tempdir, pattern)
|
||||
return glob.glob(p)
|
||||
res = glob.glob(p)
|
||||
self.assertEqual(list(glob.iglob(p)), res)
|
||||
return res
|
||||
|
||||
def assertSequencesEqual_noorder(self, l1, l2):
|
||||
self.assertEqual(set(l1), set(l2))
|
||||
|
|
Loading…
Reference in New Issue