mirror of https://github.com/python/cpython
Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"
in any mapping, not only in an unicode string.
This commit is contained in:
parent
90b5d9288d
commit
9599745e2c
|
@ -1551,6 +1551,14 @@ class CharmapTest(unittest.TestCase):
|
||||||
(u"abc", 3)
|
(u"abc", 3)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.assertRaises(UnicodeDecodeError,
|
||||||
|
codecs.charmap_decode, b"\x00\x01\x02", "strict", u"ab"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertRaises(UnicodeDecodeError,
|
||||||
|
codecs.charmap_decode, "\x00\x01\x02", "strict", u"ab\ufffe"
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
|
codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
|
||||||
(u"ab\ufffd", 3)
|
(u"ab\ufffd", 3)
|
||||||
|
@ -1566,10 +1574,6 @@ class CharmapTest(unittest.TestCase):
|
||||||
(u"ab", 3)
|
(u"ab", 3)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertRaises(UnicodeDecodeError,
|
|
||||||
codecs.charmap_decode, b"\x00\x01\x02", "strict", u"ab"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
|
codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
|
||||||
(u"ab", 3)
|
(u"ab", 3)
|
||||||
|
@ -1611,6 +1615,17 @@ class CharmapTest(unittest.TestCase):
|
||||||
{0: u'a', 1: u'b'}
|
{0: u'a', 1: u'b'}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.assertRaises(UnicodeDecodeError,
|
||||||
|
codecs.charmap_decode, "\x00\x01\x02", "strict",
|
||||||
|
{0: u'a', 1: u'b', 2: None}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Issue #14850
|
||||||
|
self.assertRaises(UnicodeDecodeError,
|
||||||
|
codecs.charmap_decode, "\x00\x01\x02", "strict",
|
||||||
|
{0: u'a', 1: u'b', 2: u'\ufffe'}
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
codecs.charmap_decode("\x00\x01\x02", "replace",
|
codecs.charmap_decode("\x00\x01\x02", "replace",
|
||||||
{0: u'a', 1: u'b'}),
|
{0: u'a', 1: u'b'}),
|
||||||
|
@ -1623,6 +1638,13 @@ class CharmapTest(unittest.TestCase):
|
||||||
(u"ab\ufffd", 3)
|
(u"ab\ufffd", 3)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Issue #14850
|
||||||
|
self.assertEqual(
|
||||||
|
codecs.charmap_decode("\x00\x01\x02", "replace",
|
||||||
|
{0: u'a', 1: u'b', 2: u'\ufffe'}),
|
||||||
|
(u"ab\ufffd", 3)
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
codecs.charmap_decode("\x00\x01\x02", "ignore",
|
codecs.charmap_decode("\x00\x01\x02", "ignore",
|
||||||
{0: u'a', 1: u'b'}),
|
{0: u'a', 1: u'b'}),
|
||||||
|
@ -1635,7 +1657,14 @@ class CharmapTest(unittest.TestCase):
|
||||||
(u"ab", 3)
|
(u"ab", 3)
|
||||||
)
|
)
|
||||||
|
|
||||||
allbytes = bytes(range(256))
|
# Issue #14850
|
||||||
|
self.assertEqual(
|
||||||
|
codecs.charmap_decode("\x00\x01\x02", "ignore",
|
||||||
|
{0: u'a', 1: u'b', 2: u'\ufffe'}),
|
||||||
|
(u"ab", 3)
|
||||||
|
)
|
||||||
|
|
||||||
|
allbytes = "".join(chr(i) for i in xrange(256))
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
codecs.charmap_decode(allbytes, "ignore", {}),
|
codecs.charmap_decode(allbytes, "ignore", {}),
|
||||||
(u"", len(allbytes))
|
(u"", len(allbytes))
|
||||||
|
@ -1669,18 +1698,35 @@ class CharmapTest(unittest.TestCase):
|
||||||
{0: a, 1: b},
|
{0: a, 1: b},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.assertRaises(UnicodeDecodeError,
|
||||||
|
codecs.charmap_decode, "\x00\x01\x02", "strict",
|
||||||
|
{0: a, 1: b, 2: 0xFFFE},
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
codecs.charmap_decode("\x00\x01\x02", "replace",
|
codecs.charmap_decode("\x00\x01\x02", "replace",
|
||||||
{0: a, 1: b}),
|
{0: a, 1: b}),
|
||||||
(u"ab\ufffd", 3)
|
(u"ab\ufffd", 3)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
codecs.charmap_decode("\x00\x01\x02", "replace",
|
||||||
|
{0: a, 1: b, 2: 0xFFFE}),
|
||||||
|
(u"ab\ufffd", 3)
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
codecs.charmap_decode("\x00\x01\x02", "ignore",
|
codecs.charmap_decode("\x00\x01\x02", "ignore",
|
||||||
{0: a, 1: b}),
|
{0: a, 1: b}),
|
||||||
(u"ab", 3)
|
(u"ab", 3)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
codecs.charmap_decode("\x00\x01\x02", "ignore",
|
||||||
|
{0: a, 1: b, 2: 0xFFFE}),
|
||||||
|
(u"ab", 3)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class WithStmtTest(unittest.TestCase):
|
class WithStmtTest(unittest.TestCase):
|
||||||
def test_encodedfile(self):
|
def test_encodedfile(self):
|
||||||
|
|
|
@ -9,6 +9,9 @@ What's New in Python 2.7.4
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"
|
||||||
|
in any mapping, not only in an unicode string.
|
||||||
|
|
||||||
- Issue #11461: Fix the incremental UTF-16 decoder. Original patch by
|
- Issue #11461: Fix the incremental UTF-16 decoder. Original patch by
|
||||||
Amaury Forgeot d'Arc.
|
Amaury Forgeot d'Arc.
|
||||||
|
|
||||||
|
|
|
@ -4121,15 +4121,18 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
||||||
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
||||||
/* No mapping found means: mapping is undefined. */
|
/* No mapping found means: mapping is undefined. */
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
x = Py_None;
|
goto Undefined;
|
||||||
Py_INCREF(x);
|
|
||||||
} else
|
} else
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Apply mapping */
|
/* Apply mapping */
|
||||||
|
if (x == Py_None)
|
||||||
|
goto Undefined;
|
||||||
if (PyInt_Check(x)) {
|
if (PyInt_Check(x)) {
|
||||||
long value = PyInt_AS_LONG(x);
|
long value = PyInt_AS_LONG(x);
|
||||||
|
if (value == 0xFFFE)
|
||||||
|
goto Undefined;
|
||||||
if (value < 0 || value > 0x10FFFF) {
|
if (value < 0 || value > 0x10FFFF) {
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(PyExc_TypeError,
|
||||||
"character mapping must be in range(0x110000)");
|
"character mapping must be in range(0x110000)");
|
||||||
|
@ -4162,29 +4165,16 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
||||||
#endif
|
#endif
|
||||||
*p++ = (Py_UNICODE)value;
|
*p++ = (Py_UNICODE)value;
|
||||||
}
|
}
|
||||||
else if (x == Py_None) {
|
|
||||||
/* undefined mapping */
|
|
||||||
outpos = p-PyUnicode_AS_UNICODE(v);
|
|
||||||
startinpos = s-starts;
|
|
||||||
endinpos = startinpos+1;
|
|
||||||
if (unicode_decode_call_errorhandler(
|
|
||||||
errors, &errorHandler,
|
|
||||||
"charmap", "character maps to <undefined>",
|
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
|
||||||
&v, &outpos, &p)) {
|
|
||||||
Py_DECREF(x);
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
Py_DECREF(x);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else if (PyUnicode_Check(x)) {
|
else if (PyUnicode_Check(x)) {
|
||||||
Py_ssize_t targetsize = PyUnicode_GET_SIZE(x);
|
Py_ssize_t targetsize = PyUnicode_GET_SIZE(x);
|
||||||
|
|
||||||
if (targetsize == 1)
|
if (targetsize == 1) {
|
||||||
/* 1-1 mapping */
|
/* 1-1 mapping */
|
||||||
*p++ = *PyUnicode_AS_UNICODE(x);
|
Py_UNICODE value = *PyUnicode_AS_UNICODE(x);
|
||||||
|
if (value == 0xFFFE)
|
||||||
|
goto Undefined;
|
||||||
|
*p++ = value;
|
||||||
|
}
|
||||||
else if (targetsize > 1) {
|
else if (targetsize > 1) {
|
||||||
/* 1-n mapping */
|
/* 1-n mapping */
|
||||||
if (targetsize > extrachars) {
|
if (targetsize > extrachars) {
|
||||||
|
@ -4218,6 +4208,20 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
||||||
}
|
}
|
||||||
Py_DECREF(x);
|
Py_DECREF(x);
|
||||||
++s;
|
++s;
|
||||||
|
continue;
|
||||||
|
Undefined:
|
||||||
|
/* undefined mapping */
|
||||||
|
Py_XDECREF(x);
|
||||||
|
outpos = p-PyUnicode_AS_UNICODE(v);
|
||||||
|
startinpos = s-starts;
|
||||||
|
endinpos = startinpos+1;
|
||||||
|
if (unicode_decode_call_errorhandler(
|
||||||
|
errors, &errorHandler,
|
||||||
|
"charmap", "character maps to <undefined>",
|
||||||
|
starts, size, &startinpos, &endinpos, &exc, &s,
|
||||||
|
&v, &outpos, &p)) {
|
||||||
|
goto onError;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
|
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
|
||||||
|
|
Loading…
Reference in New Issue