#1496: revert str.translate() to the old version, and add

str.maketrans() to make a table in a more comfortable way.
This commit is contained in:
Georg Brandl 2007-11-27 23:48:05 +00:00
parent 45f9af34b3
commit ceee0773d2
3 changed files with 157 additions and 63 deletions

View File

@ -800,6 +800,21 @@ functions based on regular expressions.
'example.com'
.. method:: str.maketrans(x[, y[, z]])
This static method returns a translation table usable for :meth:`str.translate`.
If there is only one argument, it must be a dictionary mapping Unicode
ordinals (integers) or characters (strings of length 1) to Unicode ordinals,
strings (of arbitrary lengths) or None. Character keys will then be
converted to ordinals.
If there are two arguments, they must be strings of equal length, and in the
resulting dictionary, each character in x will be mapped to the character at
the same position in y. If there is a third argument, it must be a string,
whose characters will be mapped to None in the result.
.. method:: str.partition(sep)
Split the string at the first occurrence of *sep*, and return a 3-tuple
@ -934,15 +949,17 @@ functions based on regular expressions.
.. method:: str.translate(map)
Return a copy of the *s* where all characters have been mapped through the
*map* which must be a dictionary of characters (strings of length 1) or
Unicode ordinals (integers) to Unicode ordinals, strings or ``None``.
Unmapped characters are left untouched. Characters mapped to ``None`` are
deleted.
*map* which must be a dictionary of Unicode ordinals(integers) to Unicode
ordinals, strings or ``None``. Unmapped characters are left untouched.
Characters mapped to ``None`` are deleted.
A *map* for :meth:`translate` is usually best created by
:meth:`str.maketrans`.
.. note::
A more flexible approach is to create a custom character mapping codec
using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
An even more flexible approach is to create a custom character mapping
codec using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
example).

View File

@ -166,18 +166,37 @@ class UnicodeTest(
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, 8)
self.assertRaises(ValueError, 'abcdefghi'.rindex, 'ghi', 0, -1)
def test_translate(self):
self.checkequalnofix('bbbc', 'abababc', 'translate', {ord('a'):None})
self.checkequalnofix('iiic', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
self.checkequalnofix('iiix', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):'x'})
self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', {'a':None, 'b':'<i>'})
self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''})
self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
def test_maketrans_translate(self):
# these work with plain translate()
self.checkequalnofix('bbbc', 'abababc', 'translate',
{ord('a'): None})
self.checkequalnofix('iiic', 'abababc', 'translate',
{ord('a'): None, ord('b'): ord('i')})
self.checkequalnofix('iiix', 'abababc', 'translate',
{ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
self.checkequalnofix('c', 'abababc', 'translate',
{ord('a'): None, ord('b'): ''})
self.checkequalnofix('xyyx', 'xzx', 'translate',
{ord('z'): 'yy'})
# this needs maketrans()
self.checkequalnofix('abababc', 'abababc', 'translate',
{'b': '<i>'})
tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
# test alternative way of calling maketrans()
tbl = self.type2test.maketrans('abc', 'xyz', 'd')
self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
self.assertRaises(TypeError, self.type2test.maketrans)
self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
self.assertRaises(TypeError, 'hello'.translate)
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
self.assertRaises(ValueError, 'abababc'.translate, {'xy':2})
self.assertRaises(TypeError, 'abababc'.translate, {(1,):2})
def test_split(self):
string_tests.CommonTest.test_split(self)

View File

@ -7793,6 +7793,109 @@ unicode_swapcase(PyUnicodeObject *self)
return fixup(self, fixswapcase);
}
PyDoc_STRVAR(maketrans__doc__,
"str.maketrans(x[, y[, z]]) -> dict (static method)\n\
\n\
Return a translation table usable for str.translate().\n\
If there is only one argument, it must be a dictionary mapping Unicode\n\
ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
Character keys will then be converted to ordinals.\n\
If there are two arguments, they must be strings of equal length, and\n\
in the resulting dictionary, each character in x will be mapped to the\n\
character at the same position in y. If there is a third argument, it\n\
must be a string, whose characters will be mapped to None in the result.");
static PyObject*
unicode_maketrans(PyUnicodeObject *null, PyObject *args)
{
PyObject *x, *y = NULL, *z = NULL;
PyObject *new = NULL, *key, *value;
Py_ssize_t i = 0;
int res;
if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
return NULL;
new = PyDict_New();
if (!new)
return NULL;
if (y != NULL) {
/* x must be a string too, of equal length */
Py_ssize_t ylen = PyUnicode_GET_SIZE(y);
if (!PyUnicode_Check(x)) {
PyErr_SetString(PyExc_TypeError, "first maketrans argument must "
"be a string if there is a second argument");
goto err;
}
if (PyUnicode_GET_SIZE(x) != ylen) {
PyErr_SetString(PyExc_ValueError, "the first two maketrans "
"arguments must have equal length");
goto err;
}
/* create entries for translating chars in x to those in y */
for (i = 0; i < PyUnicode_GET_SIZE(x); i++) {
key = PyInt_FromLong(PyUnicode_AS_UNICODE(x)[i]);
value = PyInt_FromLong(PyUnicode_AS_UNICODE(y)[i]);
if (!key || !value)
goto err;
res = PyDict_SetItem(new, key, value);
Py_DECREF(key);
Py_DECREF(value);
if (res < 0)
goto err;
}
/* create entries for deleting chars in z */
if (z != NULL) {
for (i = 0; i < PyUnicode_GET_SIZE(z); i++) {
key = PyInt_FromLong(PyUnicode_AS_UNICODE(z)[i]);
if (!key)
goto err;
res = PyDict_SetItem(new, key, Py_None);
Py_DECREF(key);
if (res < 0)
goto err;
}
}
} else {
/* x must be a dict */
if (!PyDict_Check(x)) {
PyErr_SetString(PyExc_TypeError, "if you give only one argument "
"to maketrans it must be a dict");
goto err;
}
/* copy entries into the new dict, converting string keys to int keys */
while (PyDict_Next(x, &i, &key, &value)) {
if (PyUnicode_Check(key)) {
/* convert string keys to integer keys */
PyObject *newkey;
if (PyUnicode_GET_SIZE(key) != 1) {
PyErr_SetString(PyExc_ValueError, "string keys in translate "
"table must be of length 1");
goto err;
}
newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
if (!newkey)
goto err;
res = PyDict_SetItem(new, newkey, value);
Py_DECREF(newkey);
if (res < 0)
goto err;
} else if (PyInt_Check(key)) {
/* just keep integer keys */
if (PyDict_SetItem(new, key, value) < 0)
goto err;
} else {
PyErr_SetString(PyExc_TypeError, "keys in translate table must "
"be strings or integers");
goto err;
}
}
}
return new;
err:
Py_DECREF(new);
return NULL;
}
PyDoc_STRVAR(translate__doc__,
"S.translate(table) -> unicode\n\
\n\
@ -7805,54 +7908,7 @@ are deleted.");
static PyObject*
unicode_translate(PyUnicodeObject *self, PyObject *table)
{
PyObject *newtable = NULL;
Py_ssize_t i = 0;
PyObject *key, *value, *result;
if (!PyDict_Check(table)) {
PyErr_SetString(PyExc_TypeError, "translate argument must be a dict");
return NULL;
}
/* fixup the table -- allow size-1 string keys instead of only int keys */
newtable = PyDict_Copy(table);
if (!newtable) return NULL;
while (PyDict_Next(table, &i, &key, &value)) {
if (PyUnicode_Check(key)) {
/* convert string keys to integer keys */
PyObject *newkey;
int res;
if (PyUnicode_GET_SIZE(key) != 1) {
PyErr_SetString(PyExc_ValueError, "string items in translate "
"table must be 1 element long");
goto err;
}
newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
if (!newkey)
goto err;
res = PyDict_SetItem(newtable, newkey, value);
Py_DECREF(newkey);
if (res < 0)
goto err;
} else if (PyInt_Check(key)) {
/* just keep integer keys */
if (PyDict_SetItem(newtable, key, value) < 0)
goto err;
} else {
PyErr_SetString(PyExc_TypeError, "items in translate table must be "
"strings or integers");
goto err;
}
}
result = PyUnicode_TranslateCharmap(self->str,
self->length,
newtable,
"ignore");
Py_DECREF(newtable);
return result;
err:
Py_DECREF(newtable);
return NULL;
return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore");
}
PyDoc_STRVAR(upper__doc__,
@ -8076,6 +8132,8 @@ static PyMethodDef unicode_methods[] = {
{"__format__", (PyCFunction) unicode_unicode__format__, METH_VARARGS, p_format__doc__},
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
{"maketrans", (PyCFunction) unicode_maketrans,
METH_VARARGS | METH_STATIC, maketrans__doc__},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
#endif