From 8f323d9aca1f9de2266684d5bc9ef0b15e786115 Mon Sep 17 00:00:00 2001 From: Tim Golden Date: Tue, 6 Nov 2012 13:50:42 +0000 Subject: [PATCH] issue9584: Add {} list expansion to glob. Original patch by Mathieu Bridon --- Doc/library/glob.rst | 11 ++++---- Lib/glob.py | 65 +++++++++++++++++++++++++++++++------------ Lib/test/test_glob.py | 64 ++++++++++++++++++++++++++++++++++++++++-- Misc/NEWS | 3 ++ 4 files changed, 118 insertions(+), 25 deletions(-) diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 3d31c116c83..7b00071d5f0 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -13,10 +13,10 @@ The :mod:`glob` module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell. No tilde expansion is done, but -``*``, ``?``, and character ranges expressed with ``[]`` will be correctly -matched. This is done by using the :func:`os.listdir` and -:func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a -subshell. (For tilde and shell variable expansion, use +``*``, ``?``, character ranges expressed with ``[]`` and list of options +expressed with ``{}`` will be correctly matched. This is done by using the +:func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and not by +actually invoking a subshell. (For tilde and shell variable expansion, use :func:`os.path.expanduser` and :func:`os.path.expandvars`.) @@ -47,7 +47,8 @@ preserved. :: ['1.gif', 'card.gif'] >>> glob.glob('?.gif') ['1.gif'] - + >>> glob.glob('?.{gif,txt}') + ['1.gif', '2.txt'] .. seealso:: diff --git a/Lib/glob.py b/Lib/glob.py index 3431a695bbe..2d3132d1ea8 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -14,6 +14,7 @@ def glob(pathname): """ return list(iglob(pathname)) + def iglob(pathname): """Return an iterator which yields the paths matching a pathname pattern. @@ -24,21 +25,24 @@ def iglob(pathname): if os.path.lexists(pathname): yield pathname return - dirname, basename = os.path.split(pathname) - if not dirname: - yield from glob1(None, basename) - return - if has_magic(dirname): - dirs = iglob(dirname) - else: - dirs = [dirname] - if has_magic(basename): - glob_in_dir = glob1 - else: - glob_in_dir = glob0 - for dirname in dirs: - for name in glob_in_dir(dirname, basename): - yield os.path.join(dirname, name) + pathnames = expand_braces(pathname) + for pathname in pathnames: + dirname, basename = os.path.split(pathname) + if not dirname: + yield from glob1(None, basename) + return + + if has_magic(dirname): + dirs = iglob(dirname) + else: + dirs = [dirname] + if has_magic(basename): + glob_in_dir = glob1 + else: + glob_in_dir = glob0 + for dirname in dirs: + for name in glob_in_dir(dirname, basename): + yield os.path.join(dirname, name) # These 2 helper functions non-recursively glob inside a literal directory. # They return a list of basenames. `glob1` accepts a pattern while `glob0` @@ -70,12 +74,37 @@ def glob0(dirname, basename): return [] -magic_check = re.compile('[*?[]') -magic_check_bytes = re.compile(b'[*?[]') - +magic_check = re.compile('[*?[{]') +magic_check_bytes = re.compile(b'[*?[{]') def has_magic(s): if isinstance(s, bytes): match = magic_check_bytes.search(s) else: match = magic_check.search(s) return match is not None + +brace_matcher = re.compile(r'.*(\{.+?[^\\]\})') +def expand_braces(text): + """Find the rightmost, innermost set of braces and, if it contains a + comma-separated list, expand its contents recursively (any of its items + may itself be a list enclosed in braces). + + Return the full set of expanded strings. + """ + res = set() + + match = brace_matcher.search(text) + if match is not None: + sub = match.group(1) + open_brace, close_brace = match.span(1) + if "," in sub: + for pat in sub.strip('{}').split(','): + res.update(expand_braces(text[:open_brace] + pat + text[close_brace:])) + + else: + res.update(expand_braces(text[:open_brace] + sub.replace('}', '\\}') + text[close_brace:])) + + else: + res.add(text.replace('\\}', '}')) + + return res diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 6ee08db0b6b..072247e8ef7 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -5,7 +5,7 @@ import glob import os import shutil -class GlobTests(unittest.TestCase): +class GlobTestsBase(unittest.TestCase): def norm(self, *parts): return os.path.normpath(os.path.join(self.tempdir, *parts)) @@ -45,6 +45,8 @@ class GlobTests(unittest.TestCase): def assertSequencesEqual_noorder(self, l1, l2): self.assertEqual(set(l1), set(l2)) +class GlobTests(GlobTestsBase): + def test_glob_literal(self): eq = self.assertSequencesEqual_noorder eq(self.glob('a'), [self.norm('a')]) @@ -105,9 +107,67 @@ class GlobTests(unittest.TestCase): eq(self.glob('sym1'), [self.norm('sym1')]) eq(self.glob('sym2'), [self.norm('sym2')]) +class GlobBracesTests(GlobTestsBase): + + def setUp(self): + super(GlobBracesTests, self).setUp() + self.mktemp('c{}d') + self.mktemp('c{deg') + self.mktemp('c{dfg') + self.mktemp('cd{f}g') + self.mktemp('ce{f}g') + self.mktemp('cdf}g') + self.mktemp('cef}g') + + def match_pattern_with_results(self, patterns, paths): + expected = [self.norm(path) for path in [os.path.join(*parts) for parts in paths]] + actual = [os.path.normpath(g) for g in self.glob(*patterns)] + self.assertSequencesEqual_noorder(actual, expected) + + def test_two_terms(self): + self.match_pattern_with_results(['a{aa,ab}'], [["aaa"], ["aab"]]) + + def test_missing_first_plus_nested(self): + self.match_pattern_with_results(['a{,a{a,b}}'], [['a'], ['aaa'], ['aab']]) + + def test_one_subpath_with_two_file_terms(self): + self.match_pattern_with_results(['a', '{D,bcd}'], [['a', 'D'], ['a', 'bcd']]) + + def test_two_subpath_terms_with_two_file_terms(self): + self.match_pattern_with_results(['{aaa,aab}', '{F,zzzF}'], [('aaa', 'zzzF'), ('aab', 'F')]) + + def test_two_subpath_terms_with_wildcard_file_term(self): + self.match_pattern_with_results(['aa{a,b}', '*F'], [('aaa', 'zzzF'), ('aab', 'F')]) + + def test_wildcard_subpath_with_file_missing_first_term(self): + self.match_pattern_with_results(['aa?', '{,zzz}F'], [('aaa', 'zzzF'), ('aab', 'F')]) + + # + # Edge cases where braces should not be expanded + # + def test_empty_braces(self): + self.assertSequencesEqual_noorder(self.glob('c{}d'), [self.norm('c{}d')]) + + def test_missing_end_brace(self): + self.assertSequencesEqual_noorder(self.glob('c{d{e,f}g'), map(self.norm, ['c{deg', 'c{dfg'])) + + def test_second_brace_one_term(self): + self.assertSequencesEqual_noorder(self.glob('c{d,e}{f}g'), map(self.norm, ['cd{f}g', 'ce{f}g'])) + + def test_outer_term_missing_first_brace(self): + self.assertSequencesEqual_noorder(self.glob('c{d,e}f}g'), map(self.norm, ['cdf}g', 'cef}g'])) + + # + # Braces containing folder separators + # + def test_embedded_separator1(self): + self.match_pattern_with_results(['a/{D,bcd/{EF,efg}}'], [('a', 'D'), ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg')]) + + def test_embedded_separator2(self): + self.match_pattern_with_results(['aa{a/zzz,b/}F'], [('aaa', 'zzzF'), ('aab', 'F')]) def test_main(): - run_unittest(GlobTests) + run_unittest(GlobTests, GlobBracesTests) if __name__ == "__main__": diff --git a/Misc/NEWS b/Misc/NEWS index 5ac9ec2c754..ee706700804 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1? Core and Builtins ----------------- +- Issue #9584: glob.glob now expands braces to a list of strings. + Original patch by Mathieu Bridon. + - Issue #8271: the utf-8 decoder now outputs the correct number of U+FFFD characters when used with the 'replace' error handler on invalid utf-8 sequences. Patch by Serhiy Storchaka, tests by Ezio Melotti.