issue9584: Add {} list expansion to glob. Original patch by Mathieu Bridon
This commit is contained in:
parent
10ffea887c
commit
8f323d9aca
|
@ -13,10 +13,10 @@
|
|||
|
||||
The :mod:`glob` module finds all the pathnames matching a specified pattern
|
||||
according to the rules used by the Unix shell. No tilde expansion is done, but
|
||||
``*``, ``?``, and character ranges expressed with ``[]`` will be correctly
|
||||
matched. This is done by using the :func:`os.listdir` and
|
||||
:func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a
|
||||
subshell. (For tilde and shell variable expansion, use
|
||||
``*``, ``?``, character ranges expressed with ``[]`` and list of options
|
||||
expressed with ``{}`` will be correctly matched. This is done by using the
|
||||
:func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and not by
|
||||
actually invoking a subshell. (For tilde and shell variable expansion, use
|
||||
:func:`os.path.expanduser` and :func:`os.path.expandvars`.)
|
||||
|
||||
|
||||
|
@ -47,7 +47,8 @@ preserved. ::
|
|||
['1.gif', 'card.gif']
|
||||
>>> glob.glob('?.gif')
|
||||
['1.gif']
|
||||
|
||||
>>> glob.glob('?.{gif,txt}')
|
||||
['1.gif', '2.txt']
|
||||
|
||||
.. seealso::
|
||||
|
||||
|
|
65
Lib/glob.py
65
Lib/glob.py
|
@ -14,6 +14,7 @@ def glob(pathname):
|
|||
"""
|
||||
return list(iglob(pathname))
|
||||
|
||||
|
||||
def iglob(pathname):
|
||||
"""Return an iterator which yields the paths matching a pathname pattern.
|
||||
|
||||
|
@ -24,21 +25,24 @@ def iglob(pathname):
|
|||
if os.path.lexists(pathname):
|
||||
yield pathname
|
||||
return
|
||||
dirname, basename = os.path.split(pathname)
|
||||
if not dirname:
|
||||
yield from glob1(None, basename)
|
||||
return
|
||||
if has_magic(dirname):
|
||||
dirs = iglob(dirname)
|
||||
else:
|
||||
dirs = [dirname]
|
||||
if has_magic(basename):
|
||||
glob_in_dir = glob1
|
||||
else:
|
||||
glob_in_dir = glob0
|
||||
for dirname in dirs:
|
||||
for name in glob_in_dir(dirname, basename):
|
||||
yield os.path.join(dirname, name)
|
||||
pathnames = expand_braces(pathname)
|
||||
for pathname in pathnames:
|
||||
dirname, basename = os.path.split(pathname)
|
||||
if not dirname:
|
||||
yield from glob1(None, basename)
|
||||
return
|
||||
|
||||
if has_magic(dirname):
|
||||
dirs = iglob(dirname)
|
||||
else:
|
||||
dirs = [dirname]
|
||||
if has_magic(basename):
|
||||
glob_in_dir = glob1
|
||||
else:
|
||||
glob_in_dir = glob0
|
||||
for dirname in dirs:
|
||||
for name in glob_in_dir(dirname, basename):
|
||||
yield os.path.join(dirname, name)
|
||||
|
||||
# These 2 helper functions non-recursively glob inside a literal directory.
|
||||
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
|
||||
|
@ -70,12 +74,37 @@ def glob0(dirname, basename):
|
|||
return []
|
||||
|
||||
|
||||
magic_check = re.compile('[*?[]')
|
||||
magic_check_bytes = re.compile(b'[*?[]')
|
||||
|
||||
magic_check = re.compile('[*?[{]')
|
||||
magic_check_bytes = re.compile(b'[*?[{]')
|
||||
def has_magic(s):
|
||||
if isinstance(s, bytes):
|
||||
match = magic_check_bytes.search(s)
|
||||
else:
|
||||
match = magic_check.search(s)
|
||||
return match is not None
|
||||
|
||||
brace_matcher = re.compile(r'.*(\{.+?[^\\]\})')
|
||||
def expand_braces(text):
|
||||
"""Find the rightmost, innermost set of braces and, if it contains a
|
||||
comma-separated list, expand its contents recursively (any of its items
|
||||
may itself be a list enclosed in braces).
|
||||
|
||||
Return the full set of expanded strings.
|
||||
"""
|
||||
res = set()
|
||||
|
||||
match = brace_matcher.search(text)
|
||||
if match is not None:
|
||||
sub = match.group(1)
|
||||
open_brace, close_brace = match.span(1)
|
||||
if "," in sub:
|
||||
for pat in sub.strip('{}').split(','):
|
||||
res.update(expand_braces(text[:open_brace] + pat + text[close_brace:]))
|
||||
|
||||
else:
|
||||
res.update(expand_braces(text[:open_brace] + sub.replace('}', '\\}') + text[close_brace:]))
|
||||
|
||||
else:
|
||||
res.add(text.replace('\\}', '}'))
|
||||
|
||||
return res
|
||||
|
|
|
@ -5,7 +5,7 @@ import glob
|
|||
import os
|
||||
import shutil
|
||||
|
||||
class GlobTests(unittest.TestCase):
|
||||
class GlobTestsBase(unittest.TestCase):
|
||||
|
||||
def norm(self, *parts):
|
||||
return os.path.normpath(os.path.join(self.tempdir, *parts))
|
||||
|
@ -45,6 +45,8 @@ class GlobTests(unittest.TestCase):
|
|||
def assertSequencesEqual_noorder(self, l1, l2):
|
||||
self.assertEqual(set(l1), set(l2))
|
||||
|
||||
class GlobTests(GlobTestsBase):
|
||||
|
||||
def test_glob_literal(self):
|
||||
eq = self.assertSequencesEqual_noorder
|
||||
eq(self.glob('a'), [self.norm('a')])
|
||||
|
@ -105,9 +107,67 @@ class GlobTests(unittest.TestCase):
|
|||
eq(self.glob('sym1'), [self.norm('sym1')])
|
||||
eq(self.glob('sym2'), [self.norm('sym2')])
|
||||
|
||||
class GlobBracesTests(GlobTestsBase):
|
||||
|
||||
def setUp(self):
|
||||
super(GlobBracesTests, self).setUp()
|
||||
self.mktemp('c{}d')
|
||||
self.mktemp('c{deg')
|
||||
self.mktemp('c{dfg')
|
||||
self.mktemp('cd{f}g')
|
||||
self.mktemp('ce{f}g')
|
||||
self.mktemp('cdf}g')
|
||||
self.mktemp('cef}g')
|
||||
|
||||
def match_pattern_with_results(self, patterns, paths):
|
||||
expected = [self.norm(path) for path in [os.path.join(*parts) for parts in paths]]
|
||||
actual = [os.path.normpath(g) for g in self.glob(*patterns)]
|
||||
self.assertSequencesEqual_noorder(actual, expected)
|
||||
|
||||
def test_two_terms(self):
|
||||
self.match_pattern_with_results(['a{aa,ab}'], [["aaa"], ["aab"]])
|
||||
|
||||
def test_missing_first_plus_nested(self):
|
||||
self.match_pattern_with_results(['a{,a{a,b}}'], [['a'], ['aaa'], ['aab']])
|
||||
|
||||
def test_one_subpath_with_two_file_terms(self):
|
||||
self.match_pattern_with_results(['a', '{D,bcd}'], [['a', 'D'], ['a', 'bcd']])
|
||||
|
||||
def test_two_subpath_terms_with_two_file_terms(self):
|
||||
self.match_pattern_with_results(['{aaa,aab}', '{F,zzzF}'], [('aaa', 'zzzF'), ('aab', 'F')])
|
||||
|
||||
def test_two_subpath_terms_with_wildcard_file_term(self):
|
||||
self.match_pattern_with_results(['aa{a,b}', '*F'], [('aaa', 'zzzF'), ('aab', 'F')])
|
||||
|
||||
def test_wildcard_subpath_with_file_missing_first_term(self):
|
||||
self.match_pattern_with_results(['aa?', '{,zzz}F'], [('aaa', 'zzzF'), ('aab', 'F')])
|
||||
|
||||
#
|
||||
# Edge cases where braces should not be expanded
|
||||
#
|
||||
def test_empty_braces(self):
|
||||
self.assertSequencesEqual_noorder(self.glob('c{}d'), [self.norm('c{}d')])
|
||||
|
||||
def test_missing_end_brace(self):
|
||||
self.assertSequencesEqual_noorder(self.glob('c{d{e,f}g'), map(self.norm, ['c{deg', 'c{dfg']))
|
||||
|
||||
def test_second_brace_one_term(self):
|
||||
self.assertSequencesEqual_noorder(self.glob('c{d,e}{f}g'), map(self.norm, ['cd{f}g', 'ce{f}g']))
|
||||
|
||||
def test_outer_term_missing_first_brace(self):
|
||||
self.assertSequencesEqual_noorder(self.glob('c{d,e}f}g'), map(self.norm, ['cdf}g', 'cef}g']))
|
||||
|
||||
#
|
||||
# Braces containing folder separators
|
||||
#
|
||||
def test_embedded_separator1(self):
|
||||
self.match_pattern_with_results(['a/{D,bcd/{EF,efg}}'], [('a', 'D'), ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg')])
|
||||
|
||||
def test_embedded_separator2(self):
|
||||
self.match_pattern_with_results(['aa{a/zzz,b/}F'], [('aaa', 'zzzF'), ('aab', 'F')])
|
||||
|
||||
def test_main():
|
||||
run_unittest(GlobTests)
|
||||
run_unittest(GlobTests, GlobBracesTests)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #9584: glob.glob now expands braces to a list of strings.
|
||||
Original patch by Mathieu Bridon.
|
||||
|
||||
- Issue #8271: the utf-8 decoder now outputs the correct number of U+FFFD
|
||||
characters when used with the 'replace' error handler on invalid utf-8
|
||||
sequences. Patch by Serhiy Storchaka, tests by Ezio Melotti.
|
||||
|
|
Loading…
Reference in New Issue