GH-104104: Optimize `pathlib.Path.glob()` by avoiding repeated calls to `os.path.normcase()` (GH-104105)

Use `re.IGNORECASE` to implement case-insensitive matching. This
restores behaviour from before GH-31691.
This commit is contained in:
Barney Gale 2023-05-02 22:51:18 +01:00 committed by GitHub
parent 1f5384434d
commit 47770a1e91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 11 deletions

View File

@ -59,6 +59,9 @@ def _is_wildcard_pattern(pat):
# be looked up directly as a file.
return "*" in pat or "?" in pat or "[" in pat
def _is_case_sensitive(flavour):
return flavour.normcase('Aa') == 'Aa'
#
# Globbing helpers
#
@ -100,15 +103,14 @@ class _Selector:
is_dir = path_cls.is_dir
exists = path_cls.exists
scandir = path_cls._scandir
normcase = path_cls._flavour.normcase
if not is_dir(parent_path):
return iter([])
return self._select_from(parent_path, is_dir, exists, scandir, normcase)
return self._select_from(parent_path, is_dir, exists, scandir)
class _TerminatingSelector:
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, is_dir, exists, scandir):
yield parent_path
@ -118,11 +120,11 @@ class _PreciseSelector(_Selector):
self.name = name
_Selector.__init__(self, child_parts, flavour)
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, is_dir, exists, scandir):
try:
path = parent_path._make_child_relpath(self.name)
if (is_dir if self.dironly else exists)(path):
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
for p in self.successor._select_from(path, is_dir, exists, scandir):
yield p
except PermissionError:
return
@ -131,10 +133,11 @@ class _PreciseSelector(_Selector):
class _WildcardSelector(_Selector):
def __init__(self, pat, child_parts, flavour):
self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch
flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
_Selector.__init__(self, child_parts, flavour)
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, is_dir, exists, scandir):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
@ -153,9 +156,9 @@ class _WildcardSelector(_Selector):
raise
continue
name = entry.name
if self.match(normcase(name)):
if self.match(name):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
for p in self.successor._select_from(path, is_dir, exists, scandir):
yield p
except PermissionError:
return
@ -187,13 +190,13 @@ class _RecursiveWildcardSelector(_Selector):
except PermissionError:
return
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, is_dir, exists, scandir):
try:
yielded = set()
try:
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
for p in successor_select(starting_point, is_dir, exists, scandir, normcase):
for p in successor_select(starting_point, is_dir, exists, scandir):
if p not in yielded:
yield p
yielded.add(p)

View File

@ -0,0 +1,2 @@
Improve performance of :meth:`pathlib.Path.glob` by using
:data:`re.IGNORECASE` to implement case-insensitive matching.