#1496: revert str.translate() to the old version, and add
str.maketrans() to make a table in a more comfortable way.
This commit is contained in:
parent
45f9af34b3
commit
ceee0773d2
|
@ -800,6 +800,21 @@ functions based on regular expressions.
|
|||
'example.com'
|
||||
|
||||
|
||||
.. method:: str.maketrans(x[, y[, z]])
|
||||
|
||||
This static method returns a translation table usable for :meth:`str.translate`.
|
||||
|
||||
If there is only one argument, it must be a dictionary mapping Unicode
|
||||
ordinals (integers) or characters (strings of length 1) to Unicode ordinals,
|
||||
strings (of arbitrary lengths) or None. Character keys will then be
|
||||
converted to ordinals.
|
||||
|
||||
If there are two arguments, they must be strings of equal length, and in the
|
||||
resulting dictionary, each character in x will be mapped to the character at
|
||||
the same position in y. If there is a third argument, it must be a string,
|
||||
whose characters will be mapped to None in the result.
|
||||
|
||||
|
||||
.. method:: str.partition(sep)
|
||||
|
||||
Split the string at the first occurrence of *sep*, and return a 3-tuple
|
||||
|
@ -934,15 +949,17 @@ functions based on regular expressions.
|
|||
.. method:: str.translate(map)
|
||||
|
||||
Return a copy of the *s* where all characters have been mapped through the
|
||||
*map* which must be a dictionary of characters (strings of length 1) or
|
||||
Unicode ordinals (integers) to Unicode ordinals, strings or ``None``.
|
||||
Unmapped characters are left untouched. Characters mapped to ``None`` are
|
||||
deleted.
|
||||
*map* which must be a dictionary of Unicode ordinals(integers) to Unicode
|
||||
ordinals, strings or ``None``. Unmapped characters are left untouched.
|
||||
Characters mapped to ``None`` are deleted.
|
||||
|
||||
A *map* for :meth:`translate` is usually best created by
|
||||
:meth:`str.maketrans`.
|
||||
|
||||
.. note::
|
||||
|
||||
A more flexible approach is to create a custom character mapping codec
|
||||
using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
|
||||
An even more flexible approach is to create a custom character mapping
|
||||
codec using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
|
||||
example).
|
||||
|
||||
|
||||
|
|
|
@ -166,18 +166,37 @@ class UnicodeTest(
|
|||
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
|
||||
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
|
||||
|
||||
def test_translate(self):
|
||||
self.checkequalnofix('bbbc', 'abababc', 'translate', {ord('a'):None})
|
||||
self.checkequalnofix('iiic', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
|
||||
self.checkequalnofix('iiix', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):'x'})
|
||||
self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', {'a':None, 'b':'<i>'})
|
||||
self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''})
|
||||
self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
|
||||
def test_maketrans_translate(self):
|
||||
# these work with plain translate()
|
||||
self.checkequalnofix('bbbc', 'abababc', 'translate',
|
||||
{ord('a'): None})
|
||||
self.checkequalnofix('iiic', 'abababc', 'translate',
|
||||
{ord('a'): None, ord('b'): ord('i')})
|
||||
self.checkequalnofix('iiix', 'abababc', 'translate',
|
||||
{ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
|
||||
self.checkequalnofix('c', 'abababc', 'translate',
|
||||
{ord('a'): None, ord('b'): ''})
|
||||
self.checkequalnofix('xyyx', 'xzx', 'translate',
|
||||
{ord('z'): 'yy'})
|
||||
# this needs maketrans()
|
||||
self.checkequalnofix('abababc', 'abababc', 'translate',
|
||||
{'b': '<i>'})
|
||||
tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
|
||||
self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
|
||||
# test alternative way of calling maketrans()
|
||||
tbl = self.type2test.maketrans('abc', 'xyz', 'd')
|
||||
self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
|
||||
|
||||
self.assertRaises(TypeError, self.type2test.maketrans)
|
||||
self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
|
||||
self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
|
||||
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
|
||||
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
|
||||
self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
|
||||
self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
|
||||
|
||||
self.assertRaises(TypeError, 'hello'.translate)
|
||||
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
|
||||
self.assertRaises(ValueError, 'abababc'.translate, {'xy':2})
|
||||
self.assertRaises(TypeError, 'abababc'.translate, {(1,):2})
|
||||
|
||||
def test_split(self):
|
||||
string_tests.CommonTest.test_split(self)
|
||||
|
|
|
@ -7793,6 +7793,109 @@ unicode_swapcase(PyUnicodeObject *self)
|
|||
return fixup(self, fixswapcase);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(maketrans__doc__,
|
||||
"str.maketrans(x[, y[, z]]) -> dict (static method)\n\
|
||||
\n\
|
||||
Return a translation table usable for str.translate().\n\
|
||||
If there is only one argument, it must be a dictionary mapping Unicode\n\
|
||||
ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
|
||||
Character keys will then be converted to ordinals.\n\
|
||||
If there are two arguments, they must be strings of equal length, and\n\
|
||||
in the resulting dictionary, each character in x will be mapped to the\n\
|
||||
character at the same position in y. If there is a third argument, it\n\
|
||||
must be a string, whose characters will be mapped to None in the result.");
|
||||
|
||||
static PyObject*
|
||||
unicode_maketrans(PyUnicodeObject *null, PyObject *args)
|
||||
{
|
||||
PyObject *x, *y = NULL, *z = NULL;
|
||||
PyObject *new = NULL, *key, *value;
|
||||
Py_ssize_t i = 0;
|
||||
int res;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
|
||||
return NULL;
|
||||
new = PyDict_New();
|
||||
if (!new)
|
||||
return NULL;
|
||||
if (y != NULL) {
|
||||
/* x must be a string too, of equal length */
|
||||
Py_ssize_t ylen = PyUnicode_GET_SIZE(y);
|
||||
if (!PyUnicode_Check(x)) {
|
||||
PyErr_SetString(PyExc_TypeError, "first maketrans argument must "
|
||||
"be a string if there is a second argument");
|
||||
goto err;
|
||||
}
|
||||
if (PyUnicode_GET_SIZE(x) != ylen) {
|
||||
PyErr_SetString(PyExc_ValueError, "the first two maketrans "
|
||||
"arguments must have equal length");
|
||||
goto err;
|
||||
}
|
||||
/* create entries for translating chars in x to those in y */
|
||||
for (i = 0; i < PyUnicode_GET_SIZE(x); i++) {
|
||||
key = PyInt_FromLong(PyUnicode_AS_UNICODE(x)[i]);
|
||||
value = PyInt_FromLong(PyUnicode_AS_UNICODE(y)[i]);
|
||||
if (!key || !value)
|
||||
goto err;
|
||||
res = PyDict_SetItem(new, key, value);
|
||||
Py_DECREF(key);
|
||||
Py_DECREF(value);
|
||||
if (res < 0)
|
||||
goto err;
|
||||
}
|
||||
/* create entries for deleting chars in z */
|
||||
if (z != NULL) {
|
||||
for (i = 0; i < PyUnicode_GET_SIZE(z); i++) {
|
||||
key = PyInt_FromLong(PyUnicode_AS_UNICODE(z)[i]);
|
||||
if (!key)
|
||||
goto err;
|
||||
res = PyDict_SetItem(new, key, Py_None);
|
||||
Py_DECREF(key);
|
||||
if (res < 0)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* x must be a dict */
|
||||
if (!PyDict_Check(x)) {
|
||||
PyErr_SetString(PyExc_TypeError, "if you give only one argument "
|
||||
"to maketrans it must be a dict");
|
||||
goto err;
|
||||
}
|
||||
/* copy entries into the new dict, converting string keys to int keys */
|
||||
while (PyDict_Next(x, &i, &key, &value)) {
|
||||
if (PyUnicode_Check(key)) {
|
||||
/* convert string keys to integer keys */
|
||||
PyObject *newkey;
|
||||
if (PyUnicode_GET_SIZE(key) != 1) {
|
||||
PyErr_SetString(PyExc_ValueError, "string keys in translate "
|
||||
"table must be of length 1");
|
||||
goto err;
|
||||
}
|
||||
newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
|
||||
if (!newkey)
|
||||
goto err;
|
||||
res = PyDict_SetItem(new, newkey, value);
|
||||
Py_DECREF(newkey);
|
||||
if (res < 0)
|
||||
goto err;
|
||||
} else if (PyInt_Check(key)) {
|
||||
/* just keep integer keys */
|
||||
if (PyDict_SetItem(new, key, value) < 0)
|
||||
goto err;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError, "keys in translate table must "
|
||||
"be strings or integers");
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
return new;
|
||||
err:
|
||||
Py_DECREF(new);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(translate__doc__,
|
||||
"S.translate(table) -> unicode\n\
|
||||
\n\
|
||||
|
@ -7805,54 +7908,7 @@ are deleted.");
|
|||
static PyObject*
|
||||
unicode_translate(PyUnicodeObject *self, PyObject *table)
|
||||
{
|
||||
PyObject *newtable = NULL;
|
||||
Py_ssize_t i = 0;
|
||||
PyObject *key, *value, *result;
|
||||
|
||||
if (!PyDict_Check(table)) {
|
||||
PyErr_SetString(PyExc_TypeError, "translate argument must be a dict");
|
||||
return NULL;
|
||||
}
|
||||
/* fixup the table -- allow size-1 string keys instead of only int keys */
|
||||
newtable = PyDict_Copy(table);
|
||||
if (!newtable) return NULL;
|
||||
while (PyDict_Next(table, &i, &key, &value)) {
|
||||
if (PyUnicode_Check(key)) {
|
||||
/* convert string keys to integer keys */
|
||||
PyObject *newkey;
|
||||
int res;
|
||||
if (PyUnicode_GET_SIZE(key) != 1) {
|
||||
PyErr_SetString(PyExc_ValueError, "string items in translate "
|
||||
"table must be 1 element long");
|
||||
goto err;
|
||||
}
|
||||
newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
|
||||
if (!newkey)
|
||||
goto err;
|
||||
res = PyDict_SetItem(newtable, newkey, value);
|
||||
Py_DECREF(newkey);
|
||||
if (res < 0)
|
||||
goto err;
|
||||
} else if (PyInt_Check(key)) {
|
||||
/* just keep integer keys */
|
||||
if (PyDict_SetItem(newtable, key, value) < 0)
|
||||
goto err;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError, "items in translate table must be "
|
||||
"strings or integers");
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
result = PyUnicode_TranslateCharmap(self->str,
|
||||
self->length,
|
||||
newtable,
|
||||
"ignore");
|
||||
Py_DECREF(newtable);
|
||||
return result;
|
||||
err:
|
||||
Py_DECREF(newtable);
|
||||
return NULL;
|
||||
return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore");
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(upper__doc__,
|
||||
|
@ -8076,6 +8132,8 @@ static PyMethodDef unicode_methods[] = {
|
|||
{"__format__", (PyCFunction) unicode_unicode__format__, METH_VARARGS, p_format__doc__},
|
||||
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
|
||||
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
|
||||
{"maketrans", (PyCFunction) unicode_maketrans,
|
||||
METH_VARARGS | METH_STATIC, maketrans__doc__},
|
||||
#if 0
|
||||
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue