issue9584: Add {} list expansion to glob. Original patch by Mathieu Bridon

This commit is contained in:
Tim Golden 2012-11-06 13:50:42 +00:00
parent 10ffea887c
commit 8f323d9aca
4 changed files with 118 additions and 25 deletions

View File

@ -13,10 +13,10 @@
The :mod:`glob` module finds all the pathnames matching a specified pattern The :mod:`glob` module finds all the pathnames matching a specified pattern
according to the rules used by the Unix shell. No tilde expansion is done, but according to the rules used by the Unix shell. No tilde expansion is done, but
``*``, ``?``, and character ranges expressed with ``[]`` will be correctly ``*``, ``?``, character ranges expressed with ``[]`` and list of options
matched. This is done by using the :func:`os.listdir` and expressed with ``{}`` will be correctly matched. This is done by using the
:func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a :func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and not by
subshell. (For tilde and shell variable expansion, use actually invoking a subshell. (For tilde and shell variable expansion, use
:func:`os.path.expanduser` and :func:`os.path.expandvars`.) :func:`os.path.expanduser` and :func:`os.path.expandvars`.)
@ -47,7 +47,8 @@ preserved. ::
['1.gif', 'card.gif'] ['1.gif', 'card.gif']
>>> glob.glob('?.gif') >>> glob.glob('?.gif')
['1.gif'] ['1.gif']
>>> glob.glob('?.{gif,txt}')
['1.gif', '2.txt']
.. seealso:: .. seealso::

View File

@ -14,6 +14,7 @@ def glob(pathname):
""" """
return list(iglob(pathname)) return list(iglob(pathname))
def iglob(pathname): def iglob(pathname):
"""Return an iterator which yields the paths matching a pathname pattern. """Return an iterator which yields the paths matching a pathname pattern.
@ -24,21 +25,24 @@ def iglob(pathname):
if os.path.lexists(pathname): if os.path.lexists(pathname):
yield pathname yield pathname
return return
dirname, basename = os.path.split(pathname) pathnames = expand_braces(pathname)
if not dirname: for pathname in pathnames:
yield from glob1(None, basename) dirname, basename = os.path.split(pathname)
return if not dirname:
if has_magic(dirname): yield from glob1(None, basename)
dirs = iglob(dirname) return
else:
dirs = [dirname] if has_magic(dirname):
if has_magic(basename): dirs = iglob(dirname)
glob_in_dir = glob1 else:
else: dirs = [dirname]
glob_in_dir = glob0 if has_magic(basename):
for dirname in dirs: glob_in_dir = glob1
for name in glob_in_dir(dirname, basename): else:
yield os.path.join(dirname, name) glob_in_dir = glob0
for dirname in dirs:
for name in glob_in_dir(dirname, basename):
yield os.path.join(dirname, name)
# These 2 helper functions non-recursively glob inside a literal directory. # These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. `glob1` accepts a pattern while `glob0` # They return a list of basenames. `glob1` accepts a pattern while `glob0`
@ -70,12 +74,37 @@ def glob0(dirname, basename):
return [] return []
magic_check = re.compile('[*?[]') magic_check = re.compile('[*?[{]')
magic_check_bytes = re.compile(b'[*?[]') magic_check_bytes = re.compile(b'[*?[{]')
def has_magic(s): def has_magic(s):
if isinstance(s, bytes): if isinstance(s, bytes):
match = magic_check_bytes.search(s) match = magic_check_bytes.search(s)
else: else:
match = magic_check.search(s) match = magic_check.search(s)
return match is not None return match is not None
brace_matcher = re.compile(r'.*(\{.+?[^\\]\})')
def expand_braces(text):
"""Find the rightmost, innermost set of braces and, if it contains a
comma-separated list, expand its contents recursively (any of its items
may itself be a list enclosed in braces).
Return the full set of expanded strings.
"""
res = set()
match = brace_matcher.search(text)
if match is not None:
sub = match.group(1)
open_brace, close_brace = match.span(1)
if "," in sub:
for pat in sub.strip('{}').split(','):
res.update(expand_braces(text[:open_brace] + pat + text[close_brace:]))
else:
res.update(expand_braces(text[:open_brace] + sub.replace('}', '\\}') + text[close_brace:]))
else:
res.add(text.replace('\\}', '}'))
return res

View File

@ -5,7 +5,7 @@ import glob
import os import os
import shutil import shutil
class GlobTests(unittest.TestCase): class GlobTestsBase(unittest.TestCase):
def norm(self, *parts): def norm(self, *parts):
return os.path.normpath(os.path.join(self.tempdir, *parts)) return os.path.normpath(os.path.join(self.tempdir, *parts))
@ -45,6 +45,8 @@ class GlobTests(unittest.TestCase):
def assertSequencesEqual_noorder(self, l1, l2): def assertSequencesEqual_noorder(self, l1, l2):
self.assertEqual(set(l1), set(l2)) self.assertEqual(set(l1), set(l2))
class GlobTests(GlobTestsBase):
def test_glob_literal(self): def test_glob_literal(self):
eq = self.assertSequencesEqual_noorder eq = self.assertSequencesEqual_noorder
eq(self.glob('a'), [self.norm('a')]) eq(self.glob('a'), [self.norm('a')])
@ -105,9 +107,67 @@ class GlobTests(unittest.TestCase):
eq(self.glob('sym1'), [self.norm('sym1')]) eq(self.glob('sym1'), [self.norm('sym1')])
eq(self.glob('sym2'), [self.norm('sym2')]) eq(self.glob('sym2'), [self.norm('sym2')])
class GlobBracesTests(GlobTestsBase):
def setUp(self):
super(GlobBracesTests, self).setUp()
self.mktemp('c{}d')
self.mktemp('c{deg')
self.mktemp('c{dfg')
self.mktemp('cd{f}g')
self.mktemp('ce{f}g')
self.mktemp('cdf}g')
self.mktemp('cef}g')
def match_pattern_with_results(self, patterns, paths):
expected = [self.norm(path) for path in [os.path.join(*parts) for parts in paths]]
actual = [os.path.normpath(g) for g in self.glob(*patterns)]
self.assertSequencesEqual_noorder(actual, expected)
def test_two_terms(self):
self.match_pattern_with_results(['a{aa,ab}'], [["aaa"], ["aab"]])
def test_missing_first_plus_nested(self):
self.match_pattern_with_results(['a{,a{a,b}}'], [['a'], ['aaa'], ['aab']])
def test_one_subpath_with_two_file_terms(self):
self.match_pattern_with_results(['a', '{D,bcd}'], [['a', 'D'], ['a', 'bcd']])
def test_two_subpath_terms_with_two_file_terms(self):
self.match_pattern_with_results(['{aaa,aab}', '{F,zzzF}'], [('aaa', 'zzzF'), ('aab', 'F')])
def test_two_subpath_terms_with_wildcard_file_term(self):
self.match_pattern_with_results(['aa{a,b}', '*F'], [('aaa', 'zzzF'), ('aab', 'F')])
def test_wildcard_subpath_with_file_missing_first_term(self):
self.match_pattern_with_results(['aa?', '{,zzz}F'], [('aaa', 'zzzF'), ('aab', 'F')])
#
# Edge cases where braces should not be expanded
#
def test_empty_braces(self):
self.assertSequencesEqual_noorder(self.glob('c{}d'), [self.norm('c{}d')])
def test_missing_end_brace(self):
self.assertSequencesEqual_noorder(self.glob('c{d{e,f}g'), map(self.norm, ['c{deg', 'c{dfg']))
def test_second_brace_one_term(self):
self.assertSequencesEqual_noorder(self.glob('c{d,e}{f}g'), map(self.norm, ['cd{f}g', 'ce{f}g']))
def test_outer_term_missing_first_brace(self):
self.assertSequencesEqual_noorder(self.glob('c{d,e}f}g'), map(self.norm, ['cdf}g', 'cef}g']))
#
# Braces containing folder separators
#
def test_embedded_separator1(self):
self.match_pattern_with_results(['a/{D,bcd/{EF,efg}}'], [('a', 'D'), ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg')])
def test_embedded_separator2(self):
self.match_pattern_with_results(['aa{a/zzz,b/}F'], [('aaa', 'zzzF'), ('aab', 'F')])
def test_main(): def test_main():
run_unittest(GlobTests) run_unittest(GlobTests, GlobBracesTests)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #9584: glob.glob now expands braces to a list of strings.
Original patch by Mathieu Bridon.
- Issue #8271: the utf-8 decoder now outputs the correct number of U+FFFD - Issue #8271: the utf-8 decoder now outputs the correct number of U+FFFD
characters when used with the 'replace' error handler on invalid utf-8 characters when used with the 'replace' error handler on invalid utf-8
sequences. Patch by Serhiy Storchaka, tests by Ezio Melotti. sequences. Patch by Serhiy Storchaka, tests by Ezio Melotti.