#6509: fix re.sub to work properly when the pattern, the string, and the replacement were all bytes. Patch by Antoine Pitrou.
This commit is contained in:
parent
64fb18e192
commit
b92ed7cf36
|
@ -786,12 +786,18 @@ def parse_template(source, pattern):
|
||||||
groups = []
|
groups = []
|
||||||
groupsappend = groups.append
|
groupsappend = groups.append
|
||||||
literals = [None] * len(p)
|
literals = [None] * len(p)
|
||||||
|
if isinstance(source, str):
|
||||||
|
encode = lambda x: x
|
||||||
|
else:
|
||||||
|
# The tokenizer implicitly decodes bytes objects as latin-1, we must
|
||||||
|
# therefore re-encode the final representation.
|
||||||
|
encode = lambda x: x.encode('latin1')
|
||||||
for c, s in p:
|
for c, s in p:
|
||||||
if c is MARK:
|
if c is MARK:
|
||||||
groupsappend((i, s))
|
groupsappend((i, s))
|
||||||
# literal[i] is already None
|
# literal[i] is already None
|
||||||
else:
|
else:
|
||||||
literals[i] = s
|
literals[i] = encode(s)
|
||||||
i = i + 1
|
i = i + 1
|
||||||
return groups, literals
|
return groups, literals
|
||||||
|
|
||||||
|
|
|
@ -717,6 +717,24 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
|
self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
|
||||||
self.assertRaises(ValueError, re.compile, '(?au)\w')
|
self.assertRaises(ValueError, re.compile, '(?au)\w')
|
||||||
|
|
||||||
|
def test_bug_6509(self):
|
||||||
|
# Replacement strings of both types must parse properly.
|
||||||
|
# all strings
|
||||||
|
pat = re.compile('a(\w)')
|
||||||
|
self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
|
||||||
|
pat = re.compile('a(.)')
|
||||||
|
self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
|
||||||
|
pat = re.compile('..')
|
||||||
|
self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
|
||||||
|
|
||||||
|
# all bytes
|
||||||
|
pat = re.compile(b'a(\w)')
|
||||||
|
self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
|
||||||
|
pat = re.compile(b'a(.)')
|
||||||
|
self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
|
||||||
|
pat = re.compile(b'..')
|
||||||
|
self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
|
||||||
|
|
||||||
def test_dealloc(self):
|
def test_dealloc(self):
|
||||||
# issue 3299: check for segfault in debug build
|
# issue 3299: check for segfault in debug build
|
||||||
import _sre
|
import _sre
|
||||||
|
|
|
@ -268,6 +268,9 @@ C-API
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #6509: fix re.sub to work properly when the pattern, the string, and
|
||||||
|
the replacement were all bytes. Patch by Antoine Pitrou.
|
||||||
|
|
||||||
- The sqlite3 module was updated to pysqlite 2.6.0. This fixes several obscure
|
- The sqlite3 module was updated to pysqlite 2.6.0. This fixes several obscure
|
||||||
bugs and allows loading SQLite extensions from shared libraries.
|
bugs and allows loading SQLite extensions from shared libraries.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue