Issue #3756: make re.escape() handle bytes as well as str.

Patch by Andrew McNamara, reviewed and tweaked by myself.
This commit is contained in:
Guido van Rossum 2008-09-10 17:44:35 +00:00
parent 92f8f3e013
commit 698280df7c
3 changed files with 47 additions and 15 deletions

View File

@ -211,15 +211,16 @@ def template(pattern, flags=0):
"Compile a template pattern, returning a pattern object"
return _compile(pattern, flags|T)
_alphanum = {}
for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890':
_alphanum[c] = 1
del c
_alphanum_str = frozenset(
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
_alphanum_bytes = frozenset(
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
def escape(pattern):
"Escape all non-alphanumeric characters in pattern."
if isinstance(pattern, str):
alphanum = _alphanum_str
s = list(pattern)
alphanum = _alphanum
for i in range(len(pattern)):
c = pattern[i]
if c not in alphanum:
@ -227,7 +228,21 @@ def escape(pattern):
s[i] = "\\000"
else:
s[i] = "\\" + c
return pattern[:0].join(s)
return "".join(s)
else:
alphanum = _alphanum_bytes
s = []
esc = ord(b"\\")
for c in pattern:
if c in alphanum:
s.append(c)
else:
if c == 0:
s.extend(b"\\000")
else:
s.append(esc)
s.append(c)
return bytes(s)
# --------------------------------------------------------------------
# internals
@ -248,7 +263,8 @@ def _compile(*key):
pattern, flags = key
if isinstance(pattern, _pattern_type):
if flags:
raise ValueError('Cannot process flags argument with a compiled pattern')
raise ValueError(
"Cannot process flags argument with a compiled pattern")
return pattern
if not sre_compile.isstring(pattern):
raise TypeError("first argument must be string or compiled pattern")
@ -325,7 +341,7 @@ class Scanner:
if i == j:
break
action = self.lexicon[m.lastindex-1][1]
if hasattr(action, '__call__'):
if hasattr(action, "__call__"):
self.match = m
action = action(self, m.group())
if action is not None:

View File

@ -416,6 +416,7 @@ class ReTests(unittest.TestCase):
def test_re_escape(self):
p=""
self.assertEqual(re.escape(p), p)
for i in range(0, 256):
p = p + chr(i)
self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
@ -426,6 +427,19 @@ class ReTests(unittest.TestCase):
self.assertEqual(pat.match(p) is not None, True)
self.assertEqual(pat.match(p).span(), (0,256))
def test_re_escape_byte(self):
p=b""
self.assertEqual(re.escape(p), p)
for i in range(0, 256):
b = bytes([i])
p += b
self.assertEqual(re.match(re.escape(b), b) is not None, True)
self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
pat=re.compile(re.escape(p))
self.assertEqual(pat.match(p) is not None, True)
self.assertEqual(pat.match(p).span(), (0,256))
def pickle_test(self, pickle):
oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
s = pickle.dumps(oldpat)

View File

@ -96,6 +96,8 @@ C API
Library
-------
- Issue #3756: make re.escape() handle bytes as well as str.
- Issue #3800: fix filter() related bug in formatter.py.
- Issue #874900: fix behaviour of threading module after a fork.