(Merge 3.2) Issue #12100: Don't reset incremental encoders of CJK codecs at

each call to their encode() method anymore, but continue to call the reset() method if the final argument is True.
2011-05-24 22:24:11 +02:00 · 2011-05-24 22:24:11 +02:00 · eb734f77ad
parent 19fb53c119 d48ba0bde5
commit eb734f77ad
3 changed files with 38 additions and 4 deletions
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@ -256,6 +256,36 @@ class Test_ISO2022(unittest.TestCase):
            # Any ISO 2022 codec will cause the segfault
            myunichr(x).encode('iso_2022_jp', 'ignore')
 class TestStateful(unittest.TestCase):
    text = '\u4E16\u4E16'
    encoding = 'iso-2022-jp'
    expected = b'\x1b$B@$@$'
    expected_reset = b'\x1b$B@$@$\x1b(B'
    def test_encode(self):
        self.assertEqual(self.text.encode(self.encoding), self.expected_reset)
    def test_incrementalencoder(self):
        encoder = codecs.getincrementalencoder(self.encoding)()
        output = b''.join(
            encoder.encode(char)
            for char in self.text)
        self.assertEqual(output, self.expected)
    def test_incrementalencoder_final(self):
        encoder = codecs.getincrementalencoder(self.encoding)()
        last_index = len(self.text) - 1
        output = b''.join(
            encoder.encode(char, index == last_index)
            for index, char in enumerate(self.text))
        self.assertEqual(output, self.expected_reset)
 class TestHZStateful(TestStateful):
    text = '\u804a\u804a'
    encoding = 'hz'
    expected = b'~{ADAD'
    expected_reset = b'~{ADAD~}'
 def test_main():
    support.run_unittest(__name__)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -161,6 +161,10 @@ Core and Builtins
 Library
 -------
 - Issue #12100: Don't reset incremental encoders of CJK codecs at each call to
  their encode() method anymore, but continue to call the reset() method if the
  final argument is True.
 - Issue #12049: Add RAND_bytes() and RAND_pseudo_bytes() functions to the ssl
  module.
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@ -479,7 +479,7 @@ multibytecodec_encode(MultibyteCodec *codec,
    MultibyteEncodeBuffer buf;
    Py_ssize_t finalsize, r = 0;
-    if (datalen == 0)
+    if (datalen == 0 && !(flags & MBENC_RESET))
        return PyBytes_FromStringAndSize(NULL, 0);
    buf.excobj = NULL;
@ -515,7 +515,7 @@ multibytecodec_encode(MultibyteCodec *codec,
            break;
    }
-    if (codec->encreset != NULL)
+    if (codec->encreset != NULL && (flags & MBENC_RESET))
        for (;;) {
            Py_ssize_t outleft;
@ -785,8 +785,8 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
    inbuf_end = inbuf + datalen;
    r = multibytecodec_encode(ctx->codec, &ctx->state,
-                    (const Py_UNICODE **)&inbuf,
+                    (const Py_UNICODE **)&inbuf, datalen,
-                    datalen, ctx->errors, final ? MBENC_FLUSH : 0);
+                    ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
    if (r == NULL) {
        /* recover the original pending buffer */
        if (origpending > 0)