From 81fabdb437eea29e2616de58e6952b7ef2e5542f Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 22 Jan 2009 10:11:36 +0000 Subject: [PATCH] Issue #4874: Most builtin decoders now reject unicode input. --- Lib/test/test_codecs.py | 31 +++++++++++++++++++++++++++++++ Misc/NEWS | 2 ++ Modules/_codecsmodule.c | 28 ++++++++++++++-------------- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e485fdd2fe8..1730dbe5939 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1475,6 +1475,36 @@ class WithStmtTest(unittest.TestCase): info.streamwriter, 'strict') as srw: self.assertEquals(srw.read(), "\xfc") +class TypesTest(unittest.TestCase): + def test_decode_unicode(self): + # Most decoders don't accept unicode input + decoders = [ + codecs.utf_7_decode, + codecs.utf_8_decode, + codecs.utf_16_le_decode, + codecs.utf_16_be_decode, + codecs.utf_16_ex_decode, + codecs.utf_32_decode, + codecs.utf_32_le_decode, + codecs.utf_32_be_decode, + codecs.utf_32_ex_decode, + codecs.latin_1_decode, + codecs.ascii_decode, + codecs.charmap_decode, + ] + if hasattr(codecs, "mbcs_decode"): + decoders.append(codecs.mbcs_decode) + for decoder in decoders: + self.assertRaises(TypeError, decoder, "xxx") + + def test_unicode_escape(self): + # Escape-decoding an unicode string is supported ang gives the same + # result as decoding the equivalent ASCII bytes string. + self.assertEquals(codecs.unicode_escape_decode(r"\u1234"), ("\u1234", 6)) + self.assertEquals(codecs.unicode_escape_decode(br"\u1234"), ("\u1234", 6)) + self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) + self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) + def test_main(): support.run_unittest( @@ -1501,6 +1531,7 @@ def test_main(): BasicUnicodeTest, CharmapTest, WithStmtTest, + TypesTest, ) diff --git a/Misc/NEWS b/Misc/NEWS index b92048c2c6d..2d18011d514 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 3.1 alpha 0 Core and Builtins ----------------- +- Issue #4874: Most builtin decoders now reject unicode input. + - Issue #4842: Don't allow trailing 'L' when constructing an integer from a string. diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index fc3e3f9bbd6..e7dbf89499e 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -258,7 +258,7 @@ utf_7_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode", + if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode", &pbuf, &errors, &final)) return NULL; consumed = pbuf.len; @@ -281,7 +281,7 @@ utf_8_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode", + if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode", &pbuf, &errors, &final)) return NULL; consumed = pbuf.len; @@ -305,7 +305,7 @@ utf_16_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded; - if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode", + if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode", &pbuf, &errors, &final)) return NULL; consumed = pbuf.len; /* This is overwritten unless final is true. */ @@ -328,7 +328,7 @@ utf_16_le_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode", + if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode", &pbuf, &errors, &final)) return NULL; @@ -352,7 +352,7 @@ utf_16_be_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode", + if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode", &pbuf, &errors, &final)) return NULL; @@ -384,7 +384,7 @@ utf_16_ex_decode(PyObject *self, int final = 0; Py_ssize_t consumed; - if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode", + if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode", &pbuf, &errors, &byteorder, &final)) return NULL; consumed = pbuf.len; /* This is overwritten unless final is true. */ @@ -409,7 +409,7 @@ utf_32_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded; - if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode", + if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode", &pbuf, &errors, &final)) return NULL; consumed = pbuf.len; /* This is overwritten unless final is true. */ @@ -432,7 +432,7 @@ utf_32_le_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded; - if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode", + if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode", &pbuf, &errors, &final)) return NULL; consumed = pbuf.len; /* This is overwritten unless final is true. */ @@ -455,7 +455,7 @@ utf_32_be_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded; - if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode", + if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode", &pbuf, &errors, &final)) return NULL; consumed = pbuf.len; /* This is overwritten unless final is true. */ @@ -486,7 +486,7 @@ utf_32_ex_decode(PyObject *self, int final = 0; Py_ssize_t consumed; - if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode", + if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode", &pbuf, &errors, &byteorder, &final)) return NULL; consumed = pbuf.len; /* This is overwritten unless final is true. */ @@ -542,7 +542,7 @@ latin_1_decode(PyObject *self, PyObject *unicode; const char *errors = NULL; - if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode", + if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode", &pbuf, &errors)) return NULL; @@ -559,7 +559,7 @@ ascii_decode(PyObject *self, PyObject *unicode; const char *errors = NULL; - if (!PyArg_ParseTuple(args, "s*|z:ascii_decode", + if (!PyArg_ParseTuple(args, "y*|z:ascii_decode", &pbuf, &errors)) return NULL; @@ -577,7 +577,7 @@ charmap_decode(PyObject *self, const char *errors = NULL; PyObject *mapping = NULL; - if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode", + if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode", &pbuf, &errors, &mapping)) return NULL; if (mapping == Py_None) @@ -600,7 +600,7 @@ mbcs_decode(PyObject *self, Py_ssize_t consumed; PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode", + if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode", &pbuf, &errors, &final)) return NULL; consumed = pbuf.len;