Issue #3756: make re.escape() handle bytes as well as str.
Patch by Andrew McNamara, reviewed and tweaked by myself.
This commit is contained in:
parent
92f8f3e013
commit
698280df7c
46
Lib/re.py
46
Lib/re.py
|
@ -211,23 +211,38 @@ def template(pattern, flags=0):
|
|||
"Compile a template pattern, returning a pattern object"
|
||||
return _compile(pattern, flags|T)
|
||||
|
||||
_alphanum = {}
|
||||
for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890':
|
||||
_alphanum[c] = 1
|
||||
del c
|
||||
_alphanum_str = frozenset(
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
|
||||
_alphanum_bytes = frozenset(
|
||||
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
|
||||
|
||||
def escape(pattern):
|
||||
"Escape all non-alphanumeric characters in pattern."
|
||||
s = list(pattern)
|
||||
alphanum = _alphanum
|
||||
for i in range(len(pattern)):
|
||||
c = pattern[i]
|
||||
if c not in alphanum:
|
||||
if c == "\000":
|
||||
s[i] = "\\000"
|
||||
if isinstance(pattern, str):
|
||||
alphanum = _alphanum_str
|
||||
s = list(pattern)
|
||||
for i in range(len(pattern)):
|
||||
c = pattern[i]
|
||||
if c not in alphanum:
|
||||
if c == "\000":
|
||||
s[i] = "\\000"
|
||||
else:
|
||||
s[i] = "\\" + c
|
||||
return "".join(s)
|
||||
else:
|
||||
alphanum = _alphanum_bytes
|
||||
s = []
|
||||
esc = ord(b"\\")
|
||||
for c in pattern:
|
||||
if c in alphanum:
|
||||
s.append(c)
|
||||
else:
|
||||
s[i] = "\\" + c
|
||||
return pattern[:0].join(s)
|
||||
if c == 0:
|
||||
s.extend(b"\\000")
|
||||
else:
|
||||
s.append(esc)
|
||||
s.append(c)
|
||||
return bytes(s)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# internals
|
||||
|
@ -248,7 +263,8 @@ def _compile(*key):
|
|||
pattern, flags = key
|
||||
if isinstance(pattern, _pattern_type):
|
||||
if flags:
|
||||
raise ValueError('Cannot process flags argument with a compiled pattern')
|
||||
raise ValueError(
|
||||
"Cannot process flags argument with a compiled pattern")
|
||||
return pattern
|
||||
if not sre_compile.isstring(pattern):
|
||||
raise TypeError("first argument must be string or compiled pattern")
|
||||
|
@ -325,7 +341,7 @@ class Scanner:
|
|||
if i == j:
|
||||
break
|
||||
action = self.lexicon[m.lastindex-1][1]
|
||||
if hasattr(action, '__call__'):
|
||||
if hasattr(action, "__call__"):
|
||||
self.match = m
|
||||
action = action(self, m.group())
|
||||
if action is not None:
|
||||
|
|
|
@ -416,6 +416,7 @@ class ReTests(unittest.TestCase):
|
|||
|
||||
def test_re_escape(self):
|
||||
p=""
|
||||
self.assertEqual(re.escape(p), p)
|
||||
for i in range(0, 256):
|
||||
p = p + chr(i)
|
||||
self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
|
||||
|
@ -426,6 +427,19 @@ class ReTests(unittest.TestCase):
|
|||
self.assertEqual(pat.match(p) is not None, True)
|
||||
self.assertEqual(pat.match(p).span(), (0,256))
|
||||
|
||||
def test_re_escape_byte(self):
|
||||
p=b""
|
||||
self.assertEqual(re.escape(p), p)
|
||||
for i in range(0, 256):
|
||||
b = bytes([i])
|
||||
p += b
|
||||
self.assertEqual(re.match(re.escape(b), b) is not None, True)
|
||||
self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
|
||||
|
||||
pat=re.compile(re.escape(p))
|
||||
self.assertEqual(pat.match(p) is not None, True)
|
||||
self.assertEqual(pat.match(p).span(), (0,256))
|
||||
|
||||
def pickle_test(self, pickle):
|
||||
oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
|
||||
s = pickle.dumps(oldpat)
|
||||
|
|
Loading…
Reference in New Issue