mirror of https://github.com/python/cpython
Fix 're' to work on bytes. It could do with a few more tests, though.
This commit is contained in:
parent
e8c3d266c8
commit
40a088dc27
|
@ -472,7 +472,7 @@ def _compile_info(code, pattern, flags):
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
|
|
||||||
def isstring(obj):
|
def isstring(obj):
|
||||||
return isinstance(obj, str)
|
return isinstance(obj, (str, bytes))
|
||||||
|
|
||||||
def _code(p, flags):
|
def _code(p, flags):
|
||||||
|
|
||||||
|
|
|
@ -192,8 +192,8 @@ class Tokenizer:
|
||||||
char = self.string[self.index:self.index+1]
|
char = self.string[self.index:self.index+1]
|
||||||
# Special case for the str8, since indexing returns a integer
|
# Special case for the str8, since indexing returns a integer
|
||||||
# XXX This is only needed for test_bug_926075 in test_re.py
|
# XXX This is only needed for test_bug_926075 in test_re.py
|
||||||
if isinstance(self.string, bytes):
|
if char and isinstance(char, bytes):
|
||||||
char = chr(char)
|
char = chr(char[0])
|
||||||
if char == "\\":
|
if char == "\\":
|
||||||
try:
|
try:
|
||||||
c = self.string[self.index + 1]
|
c = self.string[self.index + 1]
|
||||||
|
|
|
@ -83,33 +83,22 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
|
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
|
||||||
'abc\ndef\n')
|
'abc\ndef\n')
|
||||||
|
|
||||||
# This test makes no sense until re supports bytes, and should then probably
|
def test_bug_1140(self):
|
||||||
# test for the *in*ability to mix bytes and str this way :)
|
# re.sub(x, y, b'') should return b'', not '', and
|
||||||
#
|
# re.sub(x, y, '') should return '', not b''.
|
||||||
# def test_bug_1140(self):
|
# Also:
|
||||||
# # re.sub(x, y, b'') should return b'', not '', and
|
# re.sub(x, y, str(x)) should return str(y), and
|
||||||
# # re.sub(x, y, '') should return '', not b''.
|
# re.sub(x, y, bytes(x)) should return
|
||||||
# # Also:
|
# str(y) if isinstance(y, str) else unicode(y).
|
||||||
# # re.sub(x, y, str(x)) should return str(y), and
|
for x in 'x', b'x':
|
||||||
# # re.sub(x, y, bytes(x)) should return
|
for y in 'y', b'y':
|
||||||
# # str(y) if isinstance(y, str) else unicode(y).
|
z = re.sub(x, y, b'')
|
||||||
# for x in 'x', u'x':
|
self.assertEqual(z, b'')
|
||||||
# for y in 'y', u'y':
|
self.assertEqual(type(z), bytes)
|
||||||
# z = re.sub(x, y, u'')
|
#
|
||||||
# self.assertEqual(z, u'')
|
z = re.sub(x, y, '')
|
||||||
# self.assertEqual(type(z), unicode)
|
self.assertEqual(z, '')
|
||||||
# #
|
self.assertEqual(type(z), str)
|
||||||
# z = re.sub(x, y, '')
|
|
||||||
# self.assertEqual(z, '')
|
|
||||||
# self.assertEqual(type(z), str)
|
|
||||||
# #
|
|
||||||
# z = re.sub(x, y, unicode(x))
|
|
||||||
# self.assertEqual(z, y)
|
|
||||||
# self.assertEqual(type(z), unicode)
|
|
||||||
# #
|
|
||||||
# z = re.sub(x, y, str(x))
|
|
||||||
# self.assertEqual(z, y)
|
|
||||||
# self.assertEqual(type(z), type(y))
|
|
||||||
|
|
||||||
def test_bug_1661(self):
|
def test_bug_1661(self):
|
||||||
# Verify that flags do not get silently ignored with compiled patterns
|
# Verify that flags do not get silently ignored with compiled patterns
|
||||||
|
@ -599,10 +588,9 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual([item.group(0) for item in iter],
|
self.assertEqual([item.group(0) for item in iter],
|
||||||
[":", "::", ":::"])
|
[":", "::", ":::"])
|
||||||
|
|
||||||
# XXX This needs to be restored for str vs. bytes.
|
def test_bug_926075(self):
|
||||||
## def test_bug_926075(self):
|
self.assert_(re.compile('bug_926075') is not
|
||||||
## self.assert_(re.compile('bug_926075') is not
|
re.compile(b'bug_926075'))
|
||||||
## re.compile(str8('bug_926075')))
|
|
||||||
|
|
||||||
def test_bug_931848(self):
|
def test_bug_931848(self):
|
||||||
pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
|
pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
|
||||||
|
|
Loading…
Reference in New Issue