Patch # 1140 (my code, approved by Effbot).
Make sure the type of the return value of re.sub(x, y, z) is the type of y+x (i.e. unicode if either is unicode, str if they are both str) even if there are no substitutions or if x==z (which triggered various special cases in join_list()). Could be backported to 2.5; no need to port to 3.0.
This commit is contained in:
parent
98d19dafd9
commit
1ff91d95a2
|
@ -83,6 +83,31 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
|
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
|
||||||
'abc\ndef\n')
|
'abc\ndef\n')
|
||||||
|
|
||||||
|
def test_bug_1140(self):
|
||||||
|
# re.sub(x, y, u'') should return u'', not '', and
|
||||||
|
# re.sub(x, y, '') should return '', not u''.
|
||||||
|
# Also:
|
||||||
|
# re.sub(x, y, unicode(x)) should return unicode(y), and
|
||||||
|
# re.sub(x, y, str(x)) should return
|
||||||
|
# str(y) if isinstance(y, str) else unicode(y).
|
||||||
|
for x in 'x', u'x':
|
||||||
|
for y in 'y', u'y':
|
||||||
|
z = re.sub(x, y, u'')
|
||||||
|
self.assertEqual(z, u'')
|
||||||
|
self.assertEqual(type(z), unicode)
|
||||||
|
#
|
||||||
|
z = re.sub(x, y, '')
|
||||||
|
self.assertEqual(z, '')
|
||||||
|
self.assertEqual(type(z), str)
|
||||||
|
#
|
||||||
|
z = re.sub(x, y, unicode(x))
|
||||||
|
self.assertEqual(z, y)
|
||||||
|
self.assertEqual(type(z), unicode)
|
||||||
|
#
|
||||||
|
z = re.sub(x, y, str(x))
|
||||||
|
self.assertEqual(z, y)
|
||||||
|
self.assertEqual(type(z), type(y))
|
||||||
|
|
||||||
def test_sub_template_numeric_escape(self):
|
def test_sub_template_numeric_escape(self):
|
||||||
# bug 776311 and friends
|
# bug 776311 and friends
|
||||||
self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
|
self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
|
||||||
|
|
|
@ -1979,7 +1979,7 @@ deepcopy(PyObject** object, PyObject* memo)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
join_list(PyObject* list, PyObject* pattern)
|
join_list(PyObject* list, PyObject* string)
|
||||||
{
|
{
|
||||||
/* join list elements */
|
/* join list elements */
|
||||||
|
|
||||||
|
@ -1990,24 +1990,15 @@ join_list(PyObject* list, PyObject* pattern)
|
||||||
#endif
|
#endif
|
||||||
PyObject* result;
|
PyObject* result;
|
||||||
|
|
||||||
switch (PyList_GET_SIZE(list)) {
|
joiner = PySequence_GetSlice(string, 0, 0);
|
||||||
case 0:
|
|
||||||
Py_DECREF(list);
|
|
||||||
return PySequence_GetSlice(pattern, 0, 0);
|
|
||||||
case 1:
|
|
||||||
result = PyList_GET_ITEM(list, 0);
|
|
||||||
Py_INCREF(result);
|
|
||||||
Py_DECREF(list);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* two or more elements: slice out a suitable separator from the
|
|
||||||
first member, and use that to join the entire list */
|
|
||||||
|
|
||||||
joiner = PySequence_GetSlice(pattern, 0, 0);
|
|
||||||
if (!joiner)
|
if (!joiner)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
if (PyList_GET_SIZE(list) == 0) {
|
||||||
|
Py_DECREF(list);
|
||||||
|
return joiner;
|
||||||
|
}
|
||||||
|
|
||||||
#if PY_VERSION_HEX >= 0x01060000
|
#if PY_VERSION_HEX >= 0x01060000
|
||||||
function = PyObject_GetAttrString(joiner, "join");
|
function = PyObject_GetAttrString(joiner, "join");
|
||||||
if (!function) {
|
if (!function) {
|
||||||
|
@ -2443,7 +2434,7 @@ next:
|
||||||
Py_DECREF(filter);
|
Py_DECREF(filter);
|
||||||
|
|
||||||
/* convert list to single string (also removes list) */
|
/* convert list to single string (also removes list) */
|
||||||
item = join_list(list, self->pattern);
|
item = join_list(list, string);
|
||||||
|
|
||||||
if (!item)
|
if (!item)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
Loading…
Reference in New Issue