mirror of https://github.com/python/cpython
GH-72904: Add `glob.translate()` function (#106703)
Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`. This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `*` pattern segment matches precisely one path segment. When *recursive* is set to true, `**` pattern segments match any number of path segments, and `**` cannot appear outside its own segment. In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
parent
babb787047
commit
cf67ebfb31
|
@ -145,6 +145,45 @@ default. For example, consider a directory containing :file:`card.gif` and
|
||||||
>>> glob.glob('.c*')
|
>>> glob.glob('.c*')
|
||||||
['.card.gif']
|
['.card.gif']
|
||||||
|
|
||||||
|
|
||||||
|
.. function:: translate(pathname, *, recursive=False, include_hidden=False, seps=None)
|
||||||
|
|
||||||
|
Convert the given path specification to a regular expression for use with
|
||||||
|
:func:`re.match`. The path specification can contain shell-style wildcards.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
>>> import glob, re
|
||||||
|
>>>
|
||||||
|
>>> regex = glob.translate('**/*.txt', recursive=True, include_hidden=True)
|
||||||
|
>>> regex
|
||||||
|
'(?s:(?:.+/)?[^/]*\\.txt)\\Z'
|
||||||
|
>>> reobj = re.compile(regex)
|
||||||
|
>>> reobj.match('foo/bar/baz.txt')
|
||||||
|
<re.Match object; span=(0, 15), match='foo/bar/baz.txt'>
|
||||||
|
|
||||||
|
Path separators and segments are meaningful to this function, unlike
|
||||||
|
:func:`fnmatch.translate`. By default wildcards do not match path
|
||||||
|
separators, and ``*`` pattern segments match precisely one path segment.
|
||||||
|
|
||||||
|
If *recursive* is true, the pattern segment "``**``" will match any number
|
||||||
|
of path segments. If "``**``" occurs in any position other than a full
|
||||||
|
pattern segment, :exc:`ValueError` is raised.
|
||||||
|
|
||||||
|
If *include_hidden* is true, wildcards can match path segments that start
|
||||||
|
with a dot (``.``).
|
||||||
|
|
||||||
|
A sequence of path separators may be supplied to the *seps* argument. If
|
||||||
|
not given, :data:`os.sep` and :data:`~os.altsep` (if available) are used.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:meth:`pathlib.PurePath.match` and :meth:`pathlib.Path.glob` methods,
|
||||||
|
which call this function to implement pattern matching and globbing.
|
||||||
|
|
||||||
|
.. versionadded:: 3.13
|
||||||
|
|
||||||
|
|
||||||
.. seealso::
|
.. seealso::
|
||||||
|
|
||||||
Module :mod:`fnmatch`
|
Module :mod:`fnmatch`
|
||||||
|
|
|
@ -183,6 +183,13 @@ doctest
|
||||||
:attr:`doctest.TestResults.skipped` attributes.
|
:attr:`doctest.TestResults.skipped` attributes.
|
||||||
(Contributed by Victor Stinner in :gh:`108794`.)
|
(Contributed by Victor Stinner in :gh:`108794`.)
|
||||||
|
|
||||||
|
glob
|
||||||
|
----
|
||||||
|
|
||||||
|
* Add :func:`glob.translate` function that converts a path specification with
|
||||||
|
shell-style wildcards to a regular expression.
|
||||||
|
(Contributed by Barney Gale in :gh:`72904`.)
|
||||||
|
|
||||||
io
|
io
|
||||||
--
|
--
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,11 @@ def translate(pat):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
STAR = object()
|
STAR = object()
|
||||||
|
parts = _translate(pat, STAR, '.')
|
||||||
|
return _join_translated_parts(parts, STAR)
|
||||||
|
|
||||||
|
|
||||||
|
def _translate(pat, STAR, QUESTION_MARK):
|
||||||
res = []
|
res = []
|
||||||
add = res.append
|
add = res.append
|
||||||
i, n = 0, len(pat)
|
i, n = 0, len(pat)
|
||||||
|
@ -89,7 +94,7 @@ def translate(pat):
|
||||||
if (not res) or res[-1] is not STAR:
|
if (not res) or res[-1] is not STAR:
|
||||||
add(STAR)
|
add(STAR)
|
||||||
elif c == '?':
|
elif c == '?':
|
||||||
add('.')
|
add(QUESTION_MARK)
|
||||||
elif c == '[':
|
elif c == '[':
|
||||||
j = i
|
j = i
|
||||||
if j < n and pat[j] == '!':
|
if j < n and pat[j] == '!':
|
||||||
|
@ -146,9 +151,11 @@ def translate(pat):
|
||||||
else:
|
else:
|
||||||
add(re.escape(c))
|
add(re.escape(c))
|
||||||
assert i == n
|
assert i == n
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def _join_translated_parts(inp, STAR):
|
||||||
# Deal with STARs.
|
# Deal with STARs.
|
||||||
inp = res
|
|
||||||
res = []
|
res = []
|
||||||
add = res.append
|
add = res.append
|
||||||
i, n = 0, len(inp)
|
i, n = 0, len(inp)
|
||||||
|
|
60
Lib/glob.py
60
Lib/glob.py
|
@ -249,3 +249,63 @@ def escape(pathname):
|
||||||
|
|
||||||
|
|
||||||
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
|
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
|
||||||
|
|
||||||
|
|
||||||
|
def translate(pat, *, recursive=False, include_hidden=False, seps=None):
|
||||||
|
"""Translate a pathname with shell wildcards to a regular expression.
|
||||||
|
|
||||||
|
If `recursive` is true, the pattern segment '**' will match any number of
|
||||||
|
path segments; if '**' appears outside its own segment, ValueError will be
|
||||||
|
raised.
|
||||||
|
|
||||||
|
If `include_hidden` is true, wildcards can match path segments beginning
|
||||||
|
with a dot ('.').
|
||||||
|
|
||||||
|
If a sequence of separator characters is given to `seps`, they will be
|
||||||
|
used to split the pattern into segments and match path separators. If not
|
||||||
|
given, os.path.sep and os.path.altsep (where available) are used.
|
||||||
|
"""
|
||||||
|
if not seps:
|
||||||
|
if os.path.altsep:
|
||||||
|
seps = (os.path.sep, os.path.altsep)
|
||||||
|
else:
|
||||||
|
seps = os.path.sep
|
||||||
|
escaped_seps = ''.join(map(re.escape, seps))
|
||||||
|
any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
|
||||||
|
not_sep = f'[^{escaped_seps}]'
|
||||||
|
if include_hidden:
|
||||||
|
one_last_segment = f'{not_sep}+'
|
||||||
|
one_segment = f'{one_last_segment}{any_sep}'
|
||||||
|
any_segments = f'(?:.+{any_sep})?'
|
||||||
|
any_last_segments = '.*'
|
||||||
|
else:
|
||||||
|
one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
|
||||||
|
one_segment = f'{one_last_segment}{any_sep}'
|
||||||
|
any_segments = f'(?:{one_segment})*'
|
||||||
|
any_last_segments = f'{any_segments}(?:{one_last_segment})?'
|
||||||
|
|
||||||
|
results = []
|
||||||
|
parts = re.split(any_sep, pat)
|
||||||
|
last_part_idx = len(parts) - 1
|
||||||
|
for idx, part in enumerate(parts):
|
||||||
|
if part == '*':
|
||||||
|
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
||||||
|
continue
|
||||||
|
if recursive:
|
||||||
|
if part == '**':
|
||||||
|
if idx < last_part_idx:
|
||||||
|
if parts[idx + 1] != '**':
|
||||||
|
results.append(any_segments)
|
||||||
|
else:
|
||||||
|
results.append(any_last_segments)
|
||||||
|
continue
|
||||||
|
elif '**' in part:
|
||||||
|
raise ValueError("Invalid pattern: '**' can only be an entire path component")
|
||||||
|
if part:
|
||||||
|
if not include_hidden and part[0] in '*?':
|
||||||
|
results.append(r'(?!\.)')
|
||||||
|
results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep))
|
||||||
|
if idx < last_part_idx:
|
||||||
|
results.append(any_sep)
|
||||||
|
res = ''.join(results)
|
||||||
|
return fr'(?s:{res})\Z'
|
||||||
|
|
125
Lib/pathlib.py
125
Lib/pathlib.py
|
@ -6,8 +6,8 @@ operating systems.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import fnmatch
|
|
||||||
import functools
|
import functools
|
||||||
|
import glob
|
||||||
import io
|
import io
|
||||||
import ntpath
|
import ntpath
|
||||||
import os
|
import os
|
||||||
|
@ -76,78 +76,16 @@ def _is_case_sensitive(pathmod):
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
# fnmatch.translate() returns a regular expression that includes a prefix and
|
|
||||||
# a suffix, which enable matching newlines and ensure the end of the string is
|
|
||||||
# matched, respectively. These features are undesirable for our implementation
|
|
||||||
# of PurePatch.match(), which represents path separators as newlines and joins
|
|
||||||
# pattern segments together. As a workaround, we define a slice object that
|
|
||||||
# can remove the prefix and suffix from any translate() result. See the
|
|
||||||
# _compile_pattern_lines() function for more details.
|
|
||||||
_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate('_').split('_')
|
|
||||||
_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX))
|
|
||||||
_SWAP_SEP_AND_NEWLINE = {
|
|
||||||
'/': str.maketrans({'/': '\n', '\n': '/'}),
|
|
||||||
'\\': str.maketrans({'\\': '\n', '\n': '\\'}),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(maxsize=256)
|
@functools.lru_cache(maxsize=256)
|
||||||
def _compile_pattern(pat, case_sensitive):
|
def _compile_pattern(pat, sep, case_sensitive):
|
||||||
"""Compile given glob pattern to a re.Pattern object (observing case
|
"""Compile given glob pattern to a re.Pattern object (observing case
|
||||||
sensitivity), or None if the pattern should match everything."""
|
sensitivity)."""
|
||||||
if pat == '*':
|
|
||||||
return None
|
|
||||||
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
|
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
|
||||||
return re.compile(fnmatch.translate(pat), flags).match
|
regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep)
|
||||||
|
# The string representation of an empty path is a single dot ('.'). Empty
|
||||||
|
# paths shouldn't match wildcards, so we consume it with an atomic group.
|
||||||
@functools.lru_cache()
|
regex = r'(\.\Z)?+' + regex
|
||||||
def _compile_pattern_lines(pattern_lines, case_sensitive):
|
return re.compile(regex, flags).match
|
||||||
"""Compile the given pattern lines to an `re.Pattern` object.
|
|
||||||
|
|
||||||
The *pattern_lines* argument is a glob-style pattern (e.g. '**/*.py') with
|
|
||||||
its path separators and newlines swapped (e.g. '**\n*.py`). By using
|
|
||||||
newlines to separate path components, and not setting `re.DOTALL`, we
|
|
||||||
ensure that the `*` wildcard cannot match path separators.
|
|
||||||
|
|
||||||
The returned `re.Pattern` object may have its `match()` method called to
|
|
||||||
match a complete pattern, or `search()` to match from the right. The
|
|
||||||
argument supplied to these methods must also have its path separators and
|
|
||||||
newlines swapped.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Match the start of the path, or just after a path separator
|
|
||||||
parts = ['^']
|
|
||||||
for part in pattern_lines.splitlines(keepends=True):
|
|
||||||
if part == '*\n':
|
|
||||||
part = r'.+\n'
|
|
||||||
elif part == '*':
|
|
||||||
part = r'.+'
|
|
||||||
elif part == '**\n':
|
|
||||||
# '**/' component: we use '(?s:.)' rather than '.' so that path
|
|
||||||
# separators (i.e. newlines) are matched. The trailing '^' ensures
|
|
||||||
# we terminate after a path separator (i.e. on a new line).
|
|
||||||
part = r'(?s:.)*^'
|
|
||||||
elif part == '**':
|
|
||||||
# '**' component.
|
|
||||||
part = r'(?s:.)*'
|
|
||||||
elif '**' in part:
|
|
||||||
raise ValueError("Invalid pattern: '**' can only be an entire path component")
|
|
||||||
else:
|
|
||||||
# Any other component: pass to fnmatch.translate(). We slice off
|
|
||||||
# the common prefix and suffix added by translate() to ensure that
|
|
||||||
# re.DOTALL is not set, and the end of the string not matched,
|
|
||||||
# respectively. With DOTALL not set, '*' wildcards will not match
|
|
||||||
# path separators, because the '.' characters in the pattern will
|
|
||||||
# not match newlines.
|
|
||||||
part = fnmatch.translate(part)[_FNMATCH_SLICE]
|
|
||||||
parts.append(part)
|
|
||||||
# Match the end of the path, always.
|
|
||||||
parts.append(r'\Z')
|
|
||||||
flags = re.MULTILINE
|
|
||||||
if not case_sensitive:
|
|
||||||
flags |= re.IGNORECASE
|
|
||||||
return re.compile(''.join(parts), flags=flags)
|
|
||||||
|
|
||||||
|
|
||||||
def _select_children(parent_paths, dir_only, follow_symlinks, match):
|
def _select_children(parent_paths, dir_only, follow_symlinks, match):
|
||||||
|
@ -171,7 +109,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
|
||||||
except OSError:
|
except OSError:
|
||||||
continue
|
continue
|
||||||
name = entry.name
|
name = entry.name
|
||||||
if match is None or match(name):
|
if match(name):
|
||||||
yield parent_path._make_child_relpath(name)
|
yield parent_path._make_child_relpath(name)
|
||||||
|
|
||||||
|
|
||||||
|
@ -297,10 +235,6 @@ class PurePath:
|
||||||
# to implement comparison methods like `__lt__()`.
|
# to implement comparison methods like `__lt__()`.
|
||||||
'_parts_normcase_cached',
|
'_parts_normcase_cached',
|
||||||
|
|
||||||
# The `_lines_cached` slot stores the string path with path separators
|
|
||||||
# and newlines swapped. This is used to implement `match()`.
|
|
||||||
'_lines_cached',
|
|
||||||
|
|
||||||
# The `_hash` slot stores the hash of the case-normalized string
|
# The `_hash` slot stores the hash of the case-normalized string
|
||||||
# path. It's set when `__hash__()` is called for the first time.
|
# path. It's set when `__hash__()` is called for the first time.
|
||||||
'_hash',
|
'_hash',
|
||||||
|
@ -475,20 +409,6 @@ class PurePath:
|
||||||
self._parts_normcase_cached = self._str_normcase.split(self.pathmod.sep)
|
self._parts_normcase_cached = self._str_normcase.split(self.pathmod.sep)
|
||||||
return self._parts_normcase_cached
|
return self._parts_normcase_cached
|
||||||
|
|
||||||
@property
|
|
||||||
def _lines(self):
|
|
||||||
# Path with separators and newlines swapped, for pattern matching.
|
|
||||||
try:
|
|
||||||
return self._lines_cached
|
|
||||||
except AttributeError:
|
|
||||||
path_str = str(self)
|
|
||||||
if path_str == '.':
|
|
||||||
self._lines_cached = ''
|
|
||||||
else:
|
|
||||||
trans = _SWAP_SEP_AND_NEWLINE[self.pathmod.sep]
|
|
||||||
self._lines_cached = path_str.translate(trans)
|
|
||||||
return self._lines_cached
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if not isinstance(other, PurePath):
|
if not isinstance(other, PurePath):
|
||||||
return NotImplemented
|
return NotImplemented
|
||||||
|
@ -763,13 +683,16 @@ class PurePath:
|
||||||
path_pattern = self.with_segments(path_pattern)
|
path_pattern = self.with_segments(path_pattern)
|
||||||
if case_sensitive is None:
|
if case_sensitive is None:
|
||||||
case_sensitive = _is_case_sensitive(self.pathmod)
|
case_sensitive = _is_case_sensitive(self.pathmod)
|
||||||
pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive)
|
sep = path_pattern.pathmod.sep
|
||||||
|
pattern_str = str(path_pattern)
|
||||||
if path_pattern.drive or path_pattern.root:
|
if path_pattern.drive or path_pattern.root:
|
||||||
return pattern.match(self._lines) is not None
|
pass
|
||||||
elif path_pattern._tail:
|
elif path_pattern._tail:
|
||||||
return pattern.search(self._lines) is not None
|
pattern_str = f'**{sep}{pattern_str}'
|
||||||
else:
|
else:
|
||||||
raise ValueError("empty pattern")
|
raise ValueError("empty pattern")
|
||||||
|
match = _compile_pattern(pattern_str, sep, case_sensitive)
|
||||||
|
return match(str(self)) is not None
|
||||||
|
|
||||||
|
|
||||||
# Subclassing os.PathLike makes isinstance() checks slower,
|
# Subclassing os.PathLike makes isinstance() checks slower,
|
||||||
|
@ -1069,26 +992,19 @@ class _PathBase(PurePath):
|
||||||
return contextlib.nullcontext(self.iterdir())
|
return contextlib.nullcontext(self.iterdir())
|
||||||
|
|
||||||
def _make_child_relpath(self, name):
|
def _make_child_relpath(self, name):
|
||||||
sep = self.pathmod.sep
|
|
||||||
lines_name = name.replace('\n', sep)
|
|
||||||
lines_str = self._lines
|
|
||||||
path_str = str(self)
|
path_str = str(self)
|
||||||
tail = self._tail
|
tail = self._tail
|
||||||
if tail:
|
if tail:
|
||||||
path_str = f'{path_str}{sep}{name}'
|
path_str = f'{path_str}{self.pathmod.sep}{name}'
|
||||||
lines_str = f'{lines_str}\n{lines_name}'
|
|
||||||
elif path_str != '.':
|
elif path_str != '.':
|
||||||
path_str = f'{path_str}{name}'
|
path_str = f'{path_str}{name}'
|
||||||
lines_str = f'{lines_str}{lines_name}'
|
|
||||||
else:
|
else:
|
||||||
path_str = name
|
path_str = name
|
||||||
lines_str = lines_name
|
|
||||||
path = self.with_segments(path_str)
|
path = self.with_segments(path_str)
|
||||||
path._str = path_str
|
path._str = path_str
|
||||||
path._drv = self.drive
|
path._drv = self.drive
|
||||||
path._root = self.root
|
path._root = self.root
|
||||||
path._tail_cached = tail + [name]
|
path._tail_cached = tail + [name]
|
||||||
path._lines_cached = lines_str
|
|
||||||
return path
|
return path
|
||||||
|
|
||||||
def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
|
def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
|
||||||
|
@ -1139,6 +1055,7 @@ class _PathBase(PurePath):
|
||||||
# do not perform any filesystem access, which can be much faster!
|
# do not perform any filesystem access, which can be much faster!
|
||||||
filter_paths = follow_symlinks is not None and '..' not in pattern_parts
|
filter_paths = follow_symlinks is not None and '..' not in pattern_parts
|
||||||
deduplicate_paths = False
|
deduplicate_paths = False
|
||||||
|
sep = self.pathmod.sep
|
||||||
paths = iter([self] if self.is_dir() else [])
|
paths = iter([self] if self.is_dir() else [])
|
||||||
part_idx = 0
|
part_idx = 0
|
||||||
while part_idx < len(pattern_parts):
|
while part_idx < len(pattern_parts):
|
||||||
|
@ -1159,9 +1076,9 @@ class _PathBase(PurePath):
|
||||||
paths = _select_recursive(paths, dir_only, follow_symlinks)
|
paths = _select_recursive(paths, dir_only, follow_symlinks)
|
||||||
|
|
||||||
# Filter out paths that don't match pattern.
|
# Filter out paths that don't match pattern.
|
||||||
prefix_len = len(self._make_child_relpath('_')._lines) - 1
|
prefix_len = len(str(self._make_child_relpath('_'))) - 1
|
||||||
match = _compile_pattern_lines(path_pattern._lines, case_sensitive).match
|
match = _compile_pattern(str(path_pattern), sep, case_sensitive)
|
||||||
paths = (path for path in paths if match(path._lines[prefix_len:]))
|
paths = (path for path in paths if match(str(path), prefix_len))
|
||||||
return paths
|
return paths
|
||||||
|
|
||||||
dir_only = part_idx < len(pattern_parts)
|
dir_only = part_idx < len(pattern_parts)
|
||||||
|
@ -1174,7 +1091,7 @@ class _PathBase(PurePath):
|
||||||
raise ValueError("Invalid pattern: '**' can only be an entire path component")
|
raise ValueError("Invalid pattern: '**' can only be an entire path component")
|
||||||
else:
|
else:
|
||||||
dir_only = part_idx < len(pattern_parts)
|
dir_only = part_idx < len(pattern_parts)
|
||||||
match = _compile_pattern(part, case_sensitive)
|
match = _compile_pattern(part, sep, case_sensitive)
|
||||||
paths = _select_children(paths, dir_only, follow_symlinks, match)
|
paths = _select_children(paths, dir_only, follow_symlinks, match)
|
||||||
return paths
|
return paths
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
@ -349,6 +350,96 @@ class GlobTests(unittest.TestCase):
|
||||||
for it in iters:
|
for it in iters:
|
||||||
self.assertEqual(next(it), p)
|
self.assertEqual(next(it), p)
|
||||||
|
|
||||||
|
def test_translate_matching(self):
|
||||||
|
match = re.compile(glob.translate('*')).match
|
||||||
|
self.assertIsNotNone(match('foo'))
|
||||||
|
self.assertIsNotNone(match('foo.bar'))
|
||||||
|
self.assertIsNone(match('.foo'))
|
||||||
|
match = re.compile(glob.translate('.*')).match
|
||||||
|
self.assertIsNotNone(match('.foo'))
|
||||||
|
match = re.compile(glob.translate('**', recursive=True)).match
|
||||||
|
self.assertIsNotNone(match('foo'))
|
||||||
|
self.assertIsNone(match('.foo'))
|
||||||
|
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('foo', '.bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('.foo', 'bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('.foo', '.bar')))
|
||||||
|
match = re.compile(glob.translate('**/*', recursive=True)).match
|
||||||
|
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('foo', '.bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('.foo', 'bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('.foo', '.bar')))
|
||||||
|
match = re.compile(glob.translate('*/**', recursive=True)).match
|
||||||
|
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('foo', '.bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('.foo', 'bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('.foo', '.bar')))
|
||||||
|
match = re.compile(glob.translate('**/.bar', recursive=True)).match
|
||||||
|
self.assertIsNotNone(match(os.path.join('foo', '.bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('.foo', '.bar')))
|
||||||
|
match = re.compile(glob.translate('**/*.*', recursive=True)).match
|
||||||
|
self.assertIsNone(match(os.path.join('foo', 'bar')))
|
||||||
|
self.assertIsNone(match(os.path.join('foo', '.bar')))
|
||||||
|
self.assertIsNotNone(match(os.path.join('foo', 'bar.txt')))
|
||||||
|
self.assertIsNone(match(os.path.join('foo', '.bar.txt')))
|
||||||
|
|
||||||
|
def test_translate(self):
|
||||||
|
def fn(pat):
|
||||||
|
return glob.translate(pat, seps='/')
|
||||||
|
self.assertEqual(fn('foo'), r'(?s:foo)\Z')
|
||||||
|
self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\Z')
|
||||||
|
self.assertEqual(fn('*'), r'(?s:[^/.][^/]*)\Z')
|
||||||
|
self.assertEqual(fn('?'), r'(?s:(?!\.)[^/])\Z')
|
||||||
|
self.assertEqual(fn('a*'), r'(?s:a[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('*a'), r'(?s:(?!\.)[^/]*a)\Z')
|
||||||
|
self.assertEqual(fn('.*'), r'(?s:\.[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('?aa'), r'(?s:(?!\.)[^/]aa)\Z')
|
||||||
|
self.assertEqual(fn('aa?'), r'(?s:aa[^/])\Z')
|
||||||
|
self.assertEqual(fn('aa[ab]'), r'(?s:aa[ab])\Z')
|
||||||
|
self.assertEqual(fn('**'), r'(?s:(?!\.)[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('***'), r'(?s:(?!\.)[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('**b'), r'(?s:(?!\.)[^/]*b)\Z')
|
||||||
|
self.assertEqual(fn('/**/*/*.*/**'),
|
||||||
|
r'(?s:/(?!\.)[^/]*/[^/.][^/]*/(?!\.)[^/]*\.[^/]*/(?!\.)[^/]*)\Z')
|
||||||
|
|
||||||
|
def test_translate_include_hidden(self):
|
||||||
|
def fn(pat):
|
||||||
|
return glob.translate(pat, include_hidden=True, seps='/')
|
||||||
|
self.assertEqual(fn('foo'), r'(?s:foo)\Z')
|
||||||
|
self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\Z')
|
||||||
|
self.assertEqual(fn('*'), r'(?s:[^/]+)\Z')
|
||||||
|
self.assertEqual(fn('?'), r'(?s:[^/])\Z')
|
||||||
|
self.assertEqual(fn('a*'), r'(?s:a[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('*a'), r'(?s:[^/]*a)\Z')
|
||||||
|
self.assertEqual(fn('.*'), r'(?s:\.[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('?aa'), r'(?s:[^/]aa)\Z')
|
||||||
|
self.assertEqual(fn('aa?'), r'(?s:aa[^/])\Z')
|
||||||
|
self.assertEqual(fn('aa[ab]'), r'(?s:aa[ab])\Z')
|
||||||
|
self.assertEqual(fn('**'), r'(?s:[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('***'), r'(?s:[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z')
|
||||||
|
self.assertEqual(fn('**b'), r'(?s:[^/]*b)\Z')
|
||||||
|
self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/[^/]*/[^/]+/[^/]*\.[^/]*/[^/]*)\Z')
|
||||||
|
|
||||||
|
def test_translate_recursive(self):
|
||||||
|
def fn(pat):
|
||||||
|
return glob.translate(pat, recursive=True, include_hidden=True, seps='/')
|
||||||
|
self.assertEqual(fn('*'), r'(?s:[^/]+)\Z')
|
||||||
|
self.assertEqual(fn('?'), r'(?s:[^/])\Z')
|
||||||
|
self.assertEqual(fn('**'), r'(?s:.*)\Z')
|
||||||
|
self.assertEqual(fn('**/**'), r'(?s:.*)\Z')
|
||||||
|
self.assertRaises(ValueError, fn, '***')
|
||||||
|
self.assertRaises(ValueError, fn, 'a**')
|
||||||
|
self.assertRaises(ValueError, fn, '**b')
|
||||||
|
self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/(?:.+/)?[^/]+/[^/]*\.[^/]*/.*)\Z')
|
||||||
|
|
||||||
|
def test_translate_seps(self):
|
||||||
|
def fn(pat):
|
||||||
|
return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\'])
|
||||||
|
self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z')
|
||||||
|
self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z')
|
||||||
|
|
||||||
|
|
||||||
@skip_unless_symlink
|
@skip_unless_symlink
|
||||||
class SymlinkLoopGlobTests(unittest.TestCase):
|
class SymlinkLoopGlobTests(unittest.TestCase):
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Add :func:`glob.translate`. This function converts a pathname with shell-style
|
||||||
|
wildcards to a regular expression.
|
Loading…
Reference in New Issue