diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index abcbf380d95..50f38a4d365 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -29,7 +29,7 @@ For example, ``'[?]'`` matches the character ``'?'``. The :mod:`pathlib` module offers high-level path objects. -.. function:: glob(pathname) +.. function:: glob(pathname, *, recursive=False) Return a possibly-empty list of path names that match *pathname*, which must be a string containing a path specification. *pathname* can be either absolute @@ -37,8 +37,19 @@ For example, ``'[?]'`` matches the character ``'?'``. :file:`../../Tools/\*/\*.gif`), and can contain shell-style wildcards. Broken symlinks are included in the results (as in the shell). + If *recursive* is true, the pattern "``**``" will match any files and zero or + more directories and subdirectories. If the pattern is followed by a + ``os.sep``, only directories and subdirectories match. -.. function:: iglob(pathname) + .. note:: + Using the "``**``" pattern in large directory trees may consume + an inordinate amount of time. + + .. versionchanged:: 3.5 + Support for recursive globs using "``**``". + + +.. function:: iglob(pathname, recursive=False) Return an :term:`iterator` which yields the same values as :func:`glob` without actually storing them all simultaneously. @@ -55,8 +66,9 @@ For example, ``'[?]'`` matches the character ``'?'``. .. versionadded:: 3.4 -For example, consider a directory containing only the following files: -:file:`1.gif`, :file:`2.txt`, and :file:`card.gif`. :func:`glob` will produce +For example, consider a directory containing the following files: +:file:`1.gif`, :file:`2.txt`, :file:`card.gif` and a subdirectory :file:`sub` +which contains only the file :file:`3.txt`. :func:`glob` will produce the following results. Notice how any leading components of the path are preserved. :: @@ -67,6 +79,10 @@ preserved. :: ['1.gif', 'card.gif'] >>> glob.glob('?.gif') ['1.gif'] + >>> glob.glob('**/*.txt', recursive=True) + ['2.txt', 'sub/3.txt'] + >>> glob.glob('./**/', recursive=True) + ['./', './sub/'] If the directory contains files starting with ``.`` they won't be matched by default. For example, consider a directory containing :file:`card.gif` and diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index 38ea2f203a6..d72e7f3f6bb 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -141,6 +141,13 @@ doctest *module* contains no docstrings instead of raising :exc:`ValueError` (contributed by Glenn Jones in :issue:`15916`). +glob +---- + +* :func:`~glob.iglob` and :func:`~glob.glob` now support recursive search in + subdirectories using the "``**``" pattern. + (Contributed by Serhiy Storchaka in :issue:`13968`.) + imaplib ------- diff --git a/Lib/glob.py b/Lib/glob.py index d6eca248eb7..56d670419a6 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -6,7 +6,7 @@ import fnmatch __all__ = ["glob", "iglob"] -def glob(pathname): +def glob(pathname, *, recursive=False): """Return a list of paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la @@ -14,10 +14,12 @@ def glob(pathname): dot are special cases that are not matched by '*' and '?' patterns. + If recursive is true, the pattern '**' will match any files and + zero or more directories and subdirectories. """ - return list(iglob(pathname)) + return list(iglob(pathname, recursive=recursive)) -def iglob(pathname): +def iglob(pathname, *, recursive=False): """Return an iterator which yields the paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la @@ -25,6 +27,8 @@ def iglob(pathname): dot are special cases that are not matched by '*' and '?' patterns. + If recursive is true, the pattern '**' will match any files and + zero or more directories and subdirectories. """ dirname, basename = os.path.split(pathname) if not has_magic(pathname): @@ -37,17 +41,23 @@ def iglob(pathname): yield pathname return if not dirname: - yield from glob1(None, basename) + if recursive and _isrecursive(basename): + yield from glob2(dirname, basename) + else: + yield from glob1(dirname, basename) return # `os.path.split()` returns the argument itself as a dirname if it is a # drive or UNC path. Prevent an infinite recursion if a drive or UNC path # contains magic characters (i.e. r'\\?\C:'). if dirname != pathname and has_magic(dirname): - dirs = iglob(dirname) + dirs = iglob(dirname, recursive=recursive) else: dirs = [dirname] if has_magic(basename): - glob_in_dir = glob1 + if recursive and _isrecursive(basename): + glob_in_dir = glob2 + else: + glob_in_dir = glob1 else: glob_in_dir = glob0 for dirname in dirs: @@ -83,6 +93,34 @@ def glob0(dirname, basename): return [basename] return [] +# This helper function recursively yields relative pathnames inside a literal +# directory. + +def glob2(dirname, pattern): + assert _isrecursive(pattern) + if dirname: + yield pattern[:0] + yield from _rlistdir(dirname) + +# Recursively yields relative pathnames inside a literal directory. + +def _rlistdir(dirname): + if not dirname: + if isinstance(dirname, bytes): + dirname = bytes(os.curdir, 'ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except os.error: + return + for x in names: + if not _ishidden(x): + yield x + path = os.path.join(dirname, x) if dirname else x + for y in _rlistdir(path): + yield os.path.join(x, y) + magic_check = re.compile('([*?[])') magic_check_bytes = re.compile(b'([*?[])') @@ -97,6 +135,12 @@ def has_magic(s): def _ishidden(path): return path[0] in ('.', b'.'[0]) +def _isrecursive(pattern): + if isinstance(pattern, bytes): + return pattern == b'**' + else: + return pattern == '**' + def escape(pathname): """Escape all special characters. """ diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index a5ab8d6c3e9..72789d08c25 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -4,7 +4,7 @@ import shutil import sys import unittest -from test.support import (run_unittest, TESTFN, skip_unless_symlink, +from test.support import (TESTFN, skip_unless_symlink, can_symlink, create_empty_file) @@ -13,6 +13,9 @@ class GlobTests(unittest.TestCase): def norm(self, *parts): return os.path.normpath(os.path.join(self.tempdir, *parts)) + def joins(self, *tuples): + return [os.path.join(self.tempdir, *parts) for parts in tuples] + def mktemp(self, *parts): filename = self.norm(*parts) base, file = os.path.split(filename) @@ -38,17 +41,17 @@ class GlobTests(unittest.TestCase): def tearDown(self): shutil.rmtree(self.tempdir) - def glob(self, *parts): + def glob(self, *parts, **kwargs): if len(parts) == 1: pattern = parts[0] else: pattern = os.path.join(*parts) p = os.path.join(self.tempdir, pattern) - res = glob.glob(p) - self.assertEqual(list(glob.iglob(p)), res) + res = glob.glob(p, **kwargs) + self.assertEqual(list(glob.iglob(p, **kwargs)), res) bres = [os.fsencode(x) for x in res] - self.assertEqual(glob.glob(os.fsencode(p)), bres) - self.assertEqual(list(glob.iglob(os.fsencode(p))), bres) + self.assertEqual(glob.glob(os.fsencode(p), **kwargs), bres) + self.assertEqual(list(glob.iglob(os.fsencode(p), **kwargs)), bres) return res def assertSequencesEqual_noorder(self, l1, l2): @@ -192,9 +195,116 @@ class GlobTests(unittest.TestCase): check('//?/c:/?', '//?/c:/[?]') check('//*/*/*', '//*/*/[*]') -def test_main(): - run_unittest(GlobTests) + def rglob(self, *parts, **kwargs): + return self.glob(*parts, recursive=True, **kwargs) + + def test_recursive_glob(self): + eq = self.assertSequencesEqual_noorder + full = [('ZZZ',), + ('a',), ('a', 'D'), + ('a', 'bcd'), + ('a', 'bcd', 'EF'), + ('a', 'bcd', 'efg'), + ('a', 'bcd', 'efg', 'ha'), + ('aaa',), ('aaa', 'zzzF'), + ('aab',), ('aab', 'F'), + ] + if can_symlink(): + full += [('sym1',), ('sym2',), + ('sym3',), + ('sym3', 'EF'), + ('sym3', 'efg'), + ('sym3', 'efg', 'ha'), + ] + eq(self.rglob('**'), self.joins(('',), *full)) + eq(self.rglob('.', '**'), self.joins(('.',''), + *(('.',) + i for i in full))) + dirs = [('a', ''), ('a', 'bcd', ''), ('a', 'bcd', 'efg', ''), + ('aaa', ''), ('aab', '')] + if can_symlink(): + dirs += [('sym3', ''), ('sym3', 'efg', '')] + eq(self.rglob('**', ''), self.joins(('',), *dirs)) + + eq(self.rglob('a', '**'), self.joins( + ('a', ''), ('a', 'D'), ('a', 'bcd'), ('a', 'bcd', 'EF'), + ('a', 'bcd', 'efg'), ('a', 'bcd', 'efg', 'ha'))) + eq(self.rglob('a**'), self.joins(('a',), ('aaa',), ('aab',))) + expect = [('a', 'bcd', 'EF')] + if can_symlink(): + expect += [('sym3', 'EF')] + eq(self.rglob('**', 'EF'), self.joins(*expect)) + expect = [('a', 'bcd', 'EF'), ('aaa', 'zzzF'), ('aab', 'F')] + if can_symlink(): + expect += [('sym3', 'EF')] + eq(self.rglob('**', '*F'), self.joins(*expect)) + eq(self.rglob('**', '*F', ''), []) + eq(self.rglob('**', 'bcd', '*'), self.joins( + ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg'))) + eq(self.rglob('a', '**', 'bcd'), self.joins(('a', 'bcd'))) + + predir = os.path.abspath(os.curdir) + try: + os.chdir(self.tempdir) + join = os.path.join + eq(glob.glob('**', recursive=True), [join(*i) for i in full]) + eq(glob.glob(join('**', ''), recursive=True), + [join(*i) for i in dirs]) + eq(glob.glob(join('**','zz*F'), recursive=True), + [join('aaa', 'zzzF')]) + eq(glob.glob('**zz*F', recursive=True), []) + expect = [join('a', 'bcd', 'EF')] + if can_symlink(): + expect += [join('sym3', 'EF')] + eq(glob.glob(join('**', 'EF'), recursive=True), expect) + finally: + os.chdir(predir) + + +@skip_unless_symlink +class SymlinkLoopGlobTests(unittest.TestCase): + + def test_selflink(self): + tempdir = TESTFN + "_dir" + os.makedirs(tempdir) + create_empty_file(os.path.join(tempdir, 'file')) + os.symlink(os.curdir, os.path.join(tempdir, 'link')) + self.addCleanup(shutil.rmtree, tempdir) + + results = glob.glob('**', recursive=True) + self.assertEqual(len(results), len(set(results))) + results = set(results) + depth = 0 + while results: + path = os.path.join(*([tempdir] + ['link'] * depth)) + self.assertIn(path, results) + results.remove(path) + if not results: + break + path = os.path.join(path, 'file') + self.assertIn(path, results) + results.remove(path) + depth += 1 + + results = glob.glob(os.path.join('**', 'file'), recursive=True) + self.assertEqual(len(results), len(set(results))) + results = set(results) + depth = 0 + while results: + path = os.path.join(*([tempdir] + ['link'] * depth + ['file'])) + self.assertIn(path, results) + results.remove(path) + depth += 1 + + results = glob.glob(os.path.join('**', ''), recursive=True) + self.assertEqual(len(results), len(set(results))) + results = set(results) + depth = 0 + while results: + path = os.path.join(*([tempdir] + ['link'] * depth + [''])) + self.assertIn(path, results) + results.remove(path) + depth += 1 if __name__ == "__main__": - test_main() + unittest.main() diff --git a/Misc/NEWS b/Misc/NEWS index b9a112de894..38235b823c7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -132,6 +132,9 @@ Core and Builtins Library ------- +- Issue #13968: The glob module now supports recursive search in + subdirectories using the "**" pattern. + - Issue #21951: Fixed a crash in Tkinter on AIX when called Tcl command with empty string or tuple argument.