add keyword arguments support to str/unicode encode and decode #6300

This commit is contained in:
Benjamin Peterson 2009-09-18 21:14:55 +00:00
parent 2985dbb526
commit 332d721750
7 changed files with 49 additions and 14 deletions

View File

@ -815,8 +815,8 @@ String Methods
.. index:: pair: string; methods .. index:: pair: string; methods
Below are listed the string methods which both 8-bit strings and Unicode objects Below are listed the string methods which both 8-bit strings and
support. Note that none of these methods take keyword arguments. Unicode objects support.
In addition, Python's strings support the sequence type methods In addition, Python's strings support the sequence type methods
described in the :ref:`typesseq` section. To output formatted strings described in the :ref:`typesseq` section. To output formatted strings
@ -861,6 +861,8 @@ string functions based on regular expressions.
.. versionchanged:: 2.3 .. versionchanged:: 2.3
Support for other error handling schemes added. Support for other error handling schemes added.
.. versionchanged:: 2.7
Support for keyword arguments added.
.. method:: str.encode([encoding[,errors]]) .. method:: str.encode([encoding[,errors]])
@ -879,6 +881,8 @@ string functions based on regular expressions.
Support for ``'xmlcharrefreplace'`` and ``'backslashreplace'`` and other error Support for ``'xmlcharrefreplace'`` and ``'backslashreplace'`` and other error
handling schemes added. handling schemes added.
.. versionchanged:: 2.7
Support for keyword arguments added.
.. method:: str.endswith(suffix[, start[, end]]) .. method:: str.endswith(suffix[, start[, end]])

View File

@ -401,6 +401,17 @@ class StrTest(
def test_buffer_is_readonly(self): def test_buffer_is_readonly(self):
self.assertRaises(TypeError, sys.stdin.readinto, b"") self.assertRaises(TypeError, sys.stdin.readinto, b"")
def test_encode_and_decode_kwargs(self):
self.assertEqual('abcde'.encode('ascii', 'replace'),
'abcde'.encode('ascii', errors='replace'))
self.assertEqual('abcde'.encode('ascii', 'ignore'),
'abcde'.encode(encoding='ascii', errors='ignore'))
self.assertEqual('Andr\202 x'.decode('ascii', 'ignore'),
'Andr\202 x'.decode('ascii', errors='ignore'))
self.assertEqual('Andr\202 x'.decode('ascii', 'replace'),
'Andr\202 x'.decode(encoding='ascii', errors='replace'))
def test_main(): def test_main():
test_support.run_unittest(StrTest) test_support.run_unittest(StrTest)

View File

@ -593,12 +593,20 @@ class UnicodeTest(
self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict') self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x") self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x") self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'),
u'Andr\202 x'.encode('ascii', errors='replace'))
self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'),
u'Andr\202 x'.encode(encoding='ascii', errors='ignore'))
# Error handling (decoding) # Error handling (decoding)
self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii') self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict') self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x") self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x') self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
self.assertEqual(u'abcde'.decode('ascii', 'ignore'),
u'abcde'.decode('ascii', errors='ignore'))
self.assertEqual(u'abcde'.decode('ascii', 'replace'),
u'abcde'.decode(encoding='ascii', errors='replace'))
# Error handling (unknown character names) # Error handling (unknown character names)
self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx") self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")

View File

@ -88,6 +88,7 @@ Peter Bosch
Eric Bouck Eric Bouck
Thierry Bousch Thierry Bousch
Sebastian Boving Sebastian Boving
Jeff Bradberry
Monty Brandenberg Monty Brandenberg
Georg Brandl Georg Brandl
Christopher Brannon Christopher Brannon

View File

@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #6300: unicode.encode, unicode.docode, str.decode, and str.encode now
take keyword arguments.
- Issue #6922: Fix an infinite loop when trying to decode an invalid - Issue #6922: Fix an infinite loop when trying to decode an invalid
UTF-32 stream with a non-raising error handler like "replace" or "ignore". UTF-32 stream with a non-raising error handler like "replace" or "ignore".

View File

@ -3332,13 +3332,15 @@ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
codecs.register_error that is able to handle UnicodeEncodeErrors."); codecs.register_error that is able to handle UnicodeEncodeErrors.");
static PyObject * static PyObject *
string_encode(PyStringObject *self, PyObject *args) string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
{ {
static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL; char *encoding = NULL;
char *errors = NULL; char *errors = NULL;
PyObject *v; PyObject *v;
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
kwlist, &encoding, &errors))
return NULL; return NULL;
v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL) if (v == NULL)
@ -3369,13 +3371,15 @@ as well as any other name registered with codecs.register_error that is\n\
able to handle UnicodeDecodeErrors."); able to handle UnicodeDecodeErrors.");
static PyObject * static PyObject *
string_decode(PyStringObject *self, PyObject *args) string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
{ {
static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL; char *encoding = NULL;
char *errors = NULL; char *errors = NULL;
PyObject *v; PyObject *v;
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
kwlist, &encoding, &errors))
return NULL; return NULL;
v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
if (v == NULL) if (v == NULL)
@ -4053,8 +4057,8 @@ string_methods[] = {
{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__}, {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__}, {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__}, {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
expandtabs__doc__}, expandtabs__doc__},
{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,

View File

@ -6610,13 +6610,15 @@ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
codecs.register_error that can handle UnicodeEncodeErrors."); codecs.register_error that can handle UnicodeEncodeErrors.");
static PyObject * static PyObject *
unicode_encode(PyUnicodeObject *self, PyObject *args) unicode_encode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
{ {
static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL; char *encoding = NULL;
char *errors = NULL; char *errors = NULL;
PyObject *v; PyObject *v;
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
kwlist, &encoding, &errors))
return NULL; return NULL;
v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors); v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL) if (v == NULL)
@ -6646,13 +6648,15 @@ as well as any other name registerd with codecs.register_error that is\n\
able to handle UnicodeDecodeErrors."); able to handle UnicodeDecodeErrors.");
static PyObject * static PyObject *
unicode_decode(PyUnicodeObject *self, PyObject *args) unicode_decode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
{ {
static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL; char *encoding = NULL;
char *errors = NULL; char *errors = NULL;
PyObject *v; PyObject *v;
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
kwlist, &encoding, &errors))
return NULL; return NULL;
v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors); v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
if (v == NULL) if (v == NULL)
@ -8054,7 +8058,7 @@ static PyMethodDef unicode_methods[] = {
/* Order is according to common usage: often used methods should /* Order is according to common usage: often used methods should
appear first, since lookup is done sequentially. */ appear first, since lookup is done sequentially. */
{"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__}, {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
{"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
{"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__}, {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
{"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__}, {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
@ -8070,7 +8074,7 @@ static PyMethodDef unicode_methods[] = {
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
{"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__}, {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
{"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__}, {"decode", (PyCFunction) unicode_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */ /* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},