From 8dc33d56f55597096fa49afc3d36bed37147eb01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Walter=20D=C3=B6rwald?= Date: Wed, 6 May 2009 14:41:26 +0000 Subject: [PATCH] Merged revisions 72404-72406 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r72404 | walter.doerwald | 2009-05-06 16:28:24 +0200 (Mi, 06 Mai 2009) | 3 lines Issue 3739: The unicode-internal encoder now reports the number of *characters* consumed like any other encoder (instead of the number of bytes). ........ r72406 | walter.doerwald | 2009-05-06 16:32:35 +0200 (Mi, 06 Mai 2009) | 2 lines Add NEWS entry about issue #3739. ........ --- Lib/test/test_codecs.py | 9 +++++++-- Misc/NEWS | 3 +++ Modules/_codecsmodule.c | 3 ++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 5a3834d4959..79b1fab0c3f 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -872,6 +872,12 @@ class UnicodeInternalTest(unittest.TestCase): "UnicodeInternalTest") self.assertEquals(("ab", 12), ignored) + def test_encode_length(self): + # Issue 3739 + encoder = codecs.getencoder("unicode_internal") + self.assertEquals(encoder("a")[1], 1) + self.assertEquals(encoder("\xe9\u0142")[1], 2) + # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html nameprep_tests = [ # 3.1 Map to nothing. @@ -1317,8 +1323,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): name = "latin_1" self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-")) (b, size) = codecs.getencoder(encoding)(s) - if encoding != "unicode_internal": - self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding)) + self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding)) (chars, size) = codecs.getdecoder(encoding)(b) self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding)) diff --git a/Misc/NEWS b/Misc/NEWS index 45bd9e758c7..ade72f2c22f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -127,6 +127,9 @@ Core and Builtins - Issue #1113244: Py_XINCREF, Py_DECREF, Py_XDECREF: Add `do { ... } while (0)' to avoid compiler warnings. +- Issue #3739: The unicode-internal encoder now reports the number of characters + consumed like any other encoder (instead of the number of bytes). + Installation ------------ diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index cbcf84dd07d..55b593b37d5 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -669,7 +669,8 @@ unicode_internal_encode(PyObject *self, if (PyUnicode_Check(obj)) { data = PyUnicode_AS_DATA(obj); size = PyUnicode_GET_DATA_SIZE(obj); - return codec_tuple(PyBytes_FromStringAndSize(data, size), size); + return codec_tuple(PyBytes_FromStringAndSize(data, size), + PyUnicode_GET_SIZE(obj)); } else { if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))