Patch # 1140 (my code, approved by Effbot).

Make sure the type of the return value of re.sub(x, y, z) is the type
of y+x (i.e. unicode if either is unicode, str if they are both str)
even if there are no substitutions or if x==z (which triggered various
special cases in join_list()).

Could be backported to 2.5; no need to port to 3.0.
This commit is contained in:
Guido van Rossum 2007-09-10 22:02:25 +00:00
parent 98d19dafd9
commit 1ff91d95a2
2 changed files with 33 additions and 17 deletions

View File

@ -83,6 +83,31 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
'abc\ndef\n') 'abc\ndef\n')
def test_bug_1140(self):
# re.sub(x, y, u'') should return u'', not '', and
# re.sub(x, y, '') should return '', not u''.
# Also:
# re.sub(x, y, unicode(x)) should return unicode(y), and
# re.sub(x, y, str(x)) should return
# str(y) if isinstance(y, str) else unicode(y).
for x in 'x', u'x':
for y in 'y', u'y':
z = re.sub(x, y, u'')
self.assertEqual(z, u'')
self.assertEqual(type(z), unicode)
#
z = re.sub(x, y, '')
self.assertEqual(z, '')
self.assertEqual(type(z), str)
#
z = re.sub(x, y, unicode(x))
self.assertEqual(z, y)
self.assertEqual(type(z), unicode)
#
z = re.sub(x, y, str(x))
self.assertEqual(z, y)
self.assertEqual(type(z), type(y))
def test_sub_template_numeric_escape(self): def test_sub_template_numeric_escape(self):
# bug 776311 and friends # bug 776311 and friends
self.assertEqual(re.sub('x', r'\0', 'x'), '\0') self.assertEqual(re.sub('x', r'\0', 'x'), '\0')

View File

@ -1979,7 +1979,7 @@ deepcopy(PyObject** object, PyObject* memo)
#endif #endif
static PyObject* static PyObject*
join_list(PyObject* list, PyObject* pattern) join_list(PyObject* list, PyObject* string)
{ {
/* join list elements */ /* join list elements */
@ -1990,24 +1990,15 @@ join_list(PyObject* list, PyObject* pattern)
#endif #endif
PyObject* result; PyObject* result;
switch (PyList_GET_SIZE(list)) { joiner = PySequence_GetSlice(string, 0, 0);
case 0:
Py_DECREF(list);
return PySequence_GetSlice(pattern, 0, 0);
case 1:
result = PyList_GET_ITEM(list, 0);
Py_INCREF(result);
Py_DECREF(list);
return result;
}
/* two or more elements: slice out a suitable separator from the
first member, and use that to join the entire list */
joiner = PySequence_GetSlice(pattern, 0, 0);
if (!joiner) if (!joiner)
return NULL; return NULL;
if (PyList_GET_SIZE(list) == 0) {
Py_DECREF(list);
return joiner;
}
#if PY_VERSION_HEX >= 0x01060000 #if PY_VERSION_HEX >= 0x01060000
function = PyObject_GetAttrString(joiner, "join"); function = PyObject_GetAttrString(joiner, "join");
if (!function) { if (!function) {
@ -2443,7 +2434,7 @@ next:
Py_DECREF(filter); Py_DECREF(filter);
/* convert list to single string (also removes list) */ /* convert list to single string (also removes list) */
item = join_list(list, self->pattern); item = join_list(list, string);
if (!item) if (!item)
return NULL; return NULL;