mirror of https://github.com/python/cpython
add keyword arguments support to str/unicode encode and decode #6300
This commit is contained in:
parent
2985dbb526
commit
332d721750
|
@ -815,8 +815,8 @@ String Methods
|
||||||
|
|
||||||
.. index:: pair: string; methods
|
.. index:: pair: string; methods
|
||||||
|
|
||||||
Below are listed the string methods which both 8-bit strings and Unicode objects
|
Below are listed the string methods which both 8-bit strings and
|
||||||
support. Note that none of these methods take keyword arguments.
|
Unicode objects support.
|
||||||
|
|
||||||
In addition, Python's strings support the sequence type methods
|
In addition, Python's strings support the sequence type methods
|
||||||
described in the :ref:`typesseq` section. To output formatted strings
|
described in the :ref:`typesseq` section. To output formatted strings
|
||||||
|
@ -861,6 +861,8 @@ string functions based on regular expressions.
|
||||||
.. versionchanged:: 2.3
|
.. versionchanged:: 2.3
|
||||||
Support for other error handling schemes added.
|
Support for other error handling schemes added.
|
||||||
|
|
||||||
|
.. versionchanged:: 2.7
|
||||||
|
Support for keyword arguments added.
|
||||||
|
|
||||||
.. method:: str.encode([encoding[,errors]])
|
.. method:: str.encode([encoding[,errors]])
|
||||||
|
|
||||||
|
@ -879,6 +881,8 @@ string functions based on regular expressions.
|
||||||
Support for ``'xmlcharrefreplace'`` and ``'backslashreplace'`` and other error
|
Support for ``'xmlcharrefreplace'`` and ``'backslashreplace'`` and other error
|
||||||
handling schemes added.
|
handling schemes added.
|
||||||
|
|
||||||
|
.. versionchanged:: 2.7
|
||||||
|
Support for keyword arguments added.
|
||||||
|
|
||||||
.. method:: str.endswith(suffix[, start[, end]])
|
.. method:: str.endswith(suffix[, start[, end]])
|
||||||
|
|
||||||
|
|
|
@ -401,6 +401,17 @@ class StrTest(
|
||||||
def test_buffer_is_readonly(self):
|
def test_buffer_is_readonly(self):
|
||||||
self.assertRaises(TypeError, sys.stdin.readinto, b"")
|
self.assertRaises(TypeError, sys.stdin.readinto, b"")
|
||||||
|
|
||||||
|
def test_encode_and_decode_kwargs(self):
|
||||||
|
self.assertEqual('abcde'.encode('ascii', 'replace'),
|
||||||
|
'abcde'.encode('ascii', errors='replace'))
|
||||||
|
self.assertEqual('abcde'.encode('ascii', 'ignore'),
|
||||||
|
'abcde'.encode(encoding='ascii', errors='ignore'))
|
||||||
|
self.assertEqual('Andr\202 x'.decode('ascii', 'ignore'),
|
||||||
|
'Andr\202 x'.decode('ascii', errors='ignore'))
|
||||||
|
self.assertEqual('Andr\202 x'.decode('ascii', 'replace'),
|
||||||
|
'Andr\202 x'.decode(encoding='ascii', errors='replace'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test_support.run_unittest(StrTest)
|
test_support.run_unittest(StrTest)
|
||||||
|
|
|
@ -593,12 +593,20 @@ class UnicodeTest(
|
||||||
self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
|
self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
|
||||||
self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
|
self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
|
||||||
self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
|
self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
|
||||||
|
self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'),
|
||||||
|
u'Andr\202 x'.encode('ascii', errors='replace'))
|
||||||
|
self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'),
|
||||||
|
u'Andr\202 x'.encode(encoding='ascii', errors='ignore'))
|
||||||
|
|
||||||
# Error handling (decoding)
|
# Error handling (decoding)
|
||||||
self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
|
self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
|
||||||
self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
|
self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
|
||||||
self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
|
self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
|
||||||
self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
|
self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
|
||||||
|
self.assertEqual(u'abcde'.decode('ascii', 'ignore'),
|
||||||
|
u'abcde'.decode('ascii', errors='ignore'))
|
||||||
|
self.assertEqual(u'abcde'.decode('ascii', 'replace'),
|
||||||
|
u'abcde'.decode(encoding='ascii', errors='replace'))
|
||||||
|
|
||||||
# Error handling (unknown character names)
|
# Error handling (unknown character names)
|
||||||
self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
|
self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
|
||||||
|
|
|
@ -88,6 +88,7 @@ Peter Bosch
|
||||||
Eric Bouck
|
Eric Bouck
|
||||||
Thierry Bousch
|
Thierry Bousch
|
||||||
Sebastian Boving
|
Sebastian Boving
|
||||||
|
Jeff Bradberry
|
||||||
Monty Brandenberg
|
Monty Brandenberg
|
||||||
Georg Brandl
|
Georg Brandl
|
||||||
Christopher Brannon
|
Christopher Brannon
|
||||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #6300: unicode.encode, unicode.docode, str.decode, and str.encode now
|
||||||
|
take keyword arguments.
|
||||||
|
|
||||||
- Issue #6922: Fix an infinite loop when trying to decode an invalid
|
- Issue #6922: Fix an infinite loop when trying to decode an invalid
|
||||||
UTF-32 stream with a non-raising error handler like "replace" or "ignore".
|
UTF-32 stream with a non-raising error handler like "replace" or "ignore".
|
||||||
|
|
||||||
|
|
|
@ -3332,13 +3332,15 @@ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
|
||||||
codecs.register_error that is able to handle UnicodeEncodeErrors.");
|
codecs.register_error that is able to handle UnicodeEncodeErrors.");
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
string_encode(PyStringObject *self, PyObject *args)
|
string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
|
static char *kwlist[] = {"encoding", "errors", 0};
|
||||||
char *encoding = NULL;
|
char *encoding = NULL;
|
||||||
char *errors = NULL;
|
char *errors = NULL;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
|
||||||
|
kwlist, &encoding, &errors))
|
||||||
return NULL;
|
return NULL;
|
||||||
v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
|
v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
|
@ -3369,13 +3371,15 @@ as well as any other name registered with codecs.register_error that is\n\
|
||||||
able to handle UnicodeDecodeErrors.");
|
able to handle UnicodeDecodeErrors.");
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
string_decode(PyStringObject *self, PyObject *args)
|
string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
|
static char *kwlist[] = {"encoding", "errors", 0};
|
||||||
char *encoding = NULL;
|
char *encoding = NULL;
|
||||||
char *errors = NULL;
|
char *errors = NULL;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
|
||||||
|
kwlist, &encoding, &errors))
|
||||||
return NULL;
|
return NULL;
|
||||||
v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
|
v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
|
@ -4053,8 +4057,8 @@ string_methods[] = {
|
||||||
{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
|
{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
|
||||||
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
|
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
|
||||||
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
|
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
|
||||||
{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
|
{"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
|
||||||
{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
|
{"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
|
||||||
{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
|
{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
|
||||||
expandtabs__doc__},
|
expandtabs__doc__},
|
||||||
{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
|
{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
|
||||||
|
|
|
@ -6610,13 +6610,15 @@ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
|
||||||
codecs.register_error that can handle UnicodeEncodeErrors.");
|
codecs.register_error that can handle UnicodeEncodeErrors.");
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
unicode_encode(PyUnicodeObject *self, PyObject *args)
|
unicode_encode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
|
static char *kwlist[] = {"encoding", "errors", 0};
|
||||||
char *encoding = NULL;
|
char *encoding = NULL;
|
||||||
char *errors = NULL;
|
char *errors = NULL;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
|
||||||
|
kwlist, &encoding, &errors))
|
||||||
return NULL;
|
return NULL;
|
||||||
v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
|
v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
|
@ -6646,13 +6648,15 @@ as well as any other name registerd with codecs.register_error that is\n\
|
||||||
able to handle UnicodeDecodeErrors.");
|
able to handle UnicodeDecodeErrors.");
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
unicode_decode(PyUnicodeObject *self, PyObject *args)
|
unicode_decode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
|
static char *kwlist[] = {"encoding", "errors", 0};
|
||||||
char *encoding = NULL;
|
char *encoding = NULL;
|
||||||
char *errors = NULL;
|
char *errors = NULL;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
|
||||||
|
kwlist, &encoding, &errors))
|
||||||
return NULL;
|
return NULL;
|
||||||
v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
|
v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
|
@ -8054,7 +8058,7 @@ static PyMethodDef unicode_methods[] = {
|
||||||
/* Order is according to common usage: often used methods should
|
/* Order is according to common usage: often used methods should
|
||||||
appear first, since lookup is done sequentially. */
|
appear first, since lookup is done sequentially. */
|
||||||
|
|
||||||
{"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__},
|
{"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
|
||||||
{"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
|
{"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
|
||||||
{"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
|
{"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
|
||||||
{"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
|
{"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
|
||||||
|
@ -8070,7 +8074,7 @@ static PyMethodDef unicode_methods[] = {
|
||||||
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
|
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
|
||||||
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
|
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
|
||||||
{"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
|
{"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
|
||||||
{"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__},
|
{"decode", (PyCFunction) unicode_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
|
||||||
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
|
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
|
||||||
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
|
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
|
||||||
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
|
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
|
||||||
|
|
Loading…
Reference in New Issue