From d04d8474dfde5ed584a51a43d3d177376a98fb58 Mon Sep 17 00:00:00 2001 From: Xiang Zhang Date: Wed, 23 Nov 2016 19:34:01 +0800 Subject: [PATCH] Issue #28774: Fix start/end pos in unicode_encode_ucs1(). Fix error position of the unicode error in ASCII and Latin1 encoders when a string returned by the error handler contains multiple non-encodable characters (non-ASCII for the ASCII codec, characters out of the U+0000-U+00FF range for Latin1). --- Misc/NEWS | 5 +++++ Objects/unicodeobject.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 92fca57a808..af42d901342 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,11 @@ What's New in Python 3.7.0 alpha 1 Core and Builtins ----------------- +- Issue #28774: Fix error position of the unicode error in ASCII and Latin1 + encoders when a string returned by the error handler contains multiple + non-encodable characters (non-ASCII for the ASCII codec, characters out + of the U+0000-U+00FF range for Latin1). + - Issue #28731: Optimize _PyDict_NewPresized() to create correct size dict. Improve speed of dict literal with constant keys up to 30%. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e88a126eba7..2bf48b756f1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6798,7 +6798,7 @@ unicode_encode_ucs1(PyObject *unicode, goto onError; /* subtract preallocated bytes */ - writer.min_size -= 1; + writer.min_size -= newpos - collstart; if (PyBytes_Check(rep)) { /* Directly copy bytes result to output. */ @@ -6835,7 +6835,7 @@ unicode_encode_ucs1(PyObject *unicode, ch = PyUnicode_READ_CHAR(rep, i); if (ch >= limit) { raise_encode_exception(&exc, encoding, unicode, - pos, pos+1, reason); + collstart, collend, reason); goto onError; } *str = (char)ch;