Issue #18509: handle PyUnicode_Writer() error

This commit is contained in:
Victor Stinner 2013-10-28 23:18:39 +01:00
parent 33aaa73cad
commit 11bdf91a5f
7 changed files with 102 additions and 84 deletions

View File

@ -27,8 +27,10 @@
if ((dc1) == 0xa1 && (dc2) == 0xaa) OUTCHAR(0x2014); \
else if ((dc1) == 0xa8 && (dc2) == 0x44) OUTCHAR(0x2015); \
else if ((dc1) == 0xa1 && (dc2) == 0xa4) OUTCHAR(0x00b7); \
else TRYMAP_DEC(gb2312, writer, dc1 ^ 0x80, dc2 ^ 0x80); \
else TRYMAP_DEC(gbkext, writer, dc1, dc2);
else if (TRYMAP_DEC(gb2312, decoded, dc1 ^ 0x80, dc2 ^ 0x80)) \
OUTCHAR(decoded); \
else if (TRYMAP_DEC(gbkext, decoded, dc1, dc2)) \
OUTCHAR(decoded);
#define GBK_ENCODE(code, assi) \
if ((code) == 0x2014) (assi) = 0xa1aa; \
@ -74,6 +76,7 @@ DECODER(gb2312)
{
while (inleft > 0) {
unsigned char c = **inbuf;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -82,7 +85,8 @@ DECODER(gb2312)
}
REQUIRE_INBUF(2)
TRYMAP_DEC(gb2312, writer, c ^ 0x80, INBYTE2 ^ 0x80) {
if (TRYMAP_DEC(gb2312, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else return 1;
@ -131,6 +135,7 @@ DECODER(gbk)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -236,6 +241,7 @@ DECODER(gb18030)
{
while (inleft > 0) {
unsigned char c = INBYTE1, c2;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -284,7 +290,8 @@ DECODER(gb18030)
}
GBK_DECODE(c, c2, writer)
else TRYMAP_DEC(gb18030ext, writer, c, c2);
else if (TRYMAP_DEC(gb18030ext, decoded, c, c2))
OUTCHAR(decoded);
else return 1;
NEXT_IN(2);
@ -372,6 +379,7 @@ DECODER(hz)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c == '~') {
unsigned char c2 = INBYTE2;
@ -403,7 +411,8 @@ DECODER(hz)
}
else { /* GB mode */
REQUIRE_INBUF(2)
TRYMAP_DEC(gb2312, writer, c, INBYTE2) {
if (TRYMAP_DEC(gb2312, decoded, c, INBYTE2)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else

View File

@ -118,13 +118,14 @@ DECODER(big5hkscs)
REQUIRE_INBUF(2)
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
TRYMAP_DEC(big5, writer, c, INBYTE2) {
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
OUTCHAR(decoded);
NEXT_IN(2);
continue;
}
}
TRYMAP_DEC_CHAR(big5hkscs, decoded, c, INBYTE2)
if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
{
int s = BH2S(c, INBYTE2);
const unsigned char *hintbase;

View File

@ -566,7 +566,7 @@ static Py_UCS4
ksx1001_decoder(const unsigned char *data)
{
Py_UCS4 u;
TRYMAP_DEC_CHAR(ksx1001, u, data[0], data[1])
if (TRYMAP_DEC(ksx1001, u, data[0], data[1]))
return u;
else
return MAP_UNMAPPABLE;
@ -604,7 +604,7 @@ jisx0208_decoder(const unsigned char *data)
Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1])
else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
return u;
else
return MAP_UNMAPPABLE;
@ -643,7 +643,7 @@ static Py_UCS4
jisx0212_decoder(const unsigned char *data)
{
Py_UCS4 u;
TRYMAP_DEC_CHAR(jisx0212, u, data[0], data[1])
if (TRYMAP_DEC(jisx0212, u, data[0], data[1]))
return u;
else
return MAP_UNMAPPABLE;
@ -697,11 +697,11 @@ jisx0213_2000_1_decoder(const unsigned char *data)
EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]))
u |= 0x20000;
else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]));
else
return MAP_UNMAPPABLE;
return u;
@ -712,8 +712,8 @@ jisx0213_2000_2_decoder(const unsigned char *data)
{
Py_UCS4 u;
EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]))
u |= 0x20000;
else
return MAP_UNMAPPABLE;
@ -727,11 +727,11 @@ jisx0213_2004_1_decoder(const unsigned char *data)
Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]))
u |= 0x20000;
else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]));
else
return MAP_UNMAPPABLE;
return u;
@ -741,8 +741,8 @@ static Py_UCS4
jisx0213_2004_2_decoder(const unsigned char *data)
{
Py_UCS4 u;
TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]))
u |= 0x20000;
else
return MAP_UNMAPPABLE;
@ -950,7 +950,7 @@ static Py_UCS4
gb2312_decoder(const unsigned char *data)
{
Py_UCS4 u;
TRYMAP_DEC_CHAR(gb2312, u, data[0], data[1])
if (TRYMAP_DEC(gb2312, u, data[0], data[1]))
return u;
else
return MAP_UNMAPPABLE;

View File

@ -85,6 +85,7 @@ DECODER(cp932)
{
while (inleft > 0) {
unsigned char c = INBYTE1, c2;
Py_UCS4 decoded;
if (c <= 0x80) {
OUTCHAR(c);
@ -109,7 +110,8 @@ DECODER(cp932)
REQUIRE_INBUF(2)
c2 = INBYTE2;
TRYMAP_DEC(cp932ext, writer, c, c2);
if (TRYMAP_DEC(cp932ext, decoded, c, c2))
OUTCHAR(decoded);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
@ -119,8 +121,10 @@ DECODER(cp932)
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, writer, c, c2);
else return 1;
if (TRYMAP_DEC(jisx0208, decoded, c, c2))
OUTCHAR(decoded);
else
return 1;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
@ -235,7 +239,7 @@ DECODER(euc_jis_2004)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 code;
Py_UCS4 code, decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -265,13 +269,15 @@ DECODER(euc_jis_2004)
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ;
else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) {
else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c2, c3))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_2_emp, code, c2, c3)) {
OUTCHAR(EMPBASE | code);
NEXT_IN(3);
continue;
}
else TRYMAP_DEC(jisx0212, writer, c2, c3) ;
else if (TRYMAP_DEC(jisx0212, decoded, c2, c3))
OUTCHAR(decoded);
else return 1;
NEXT_IN(3);
}
@ -286,14 +292,16 @@ DECODER(euc_jis_2004)
EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);
else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);
else TRYMAP_DEC(jisx0208, writer, c, c2);
else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2);
else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) {
else if (TRYMAP_DEC(jisx0208, decoded, c, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_emp, code, c, c2)) {
OUTCHAR(EMPBASE | code);
NEXT_IN(2);
continue;
}
else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) {
else if (TRYMAP_DEC(jisx0213_pair, code, c, c2)) {
OUTCHAR2(code >> 16, code & 0xffff);
NEXT_IN(2);
continue;
@ -367,6 +375,7 @@ DECODER(euc_jp)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -394,7 +403,8 @@ DECODER(euc_jp)
c2 = INBYTE2;
c3 = INBYTE3;
/* JIS X 0212 */
TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {
if (TRYMAP_DEC(jisx0212, decoded, c2 ^ 0x80, c3 ^ 0x80)) {
OUTCHAR(decoded);
NEXT_IN(3);
}
else
@ -412,9 +422,10 @@ DECODER(euc_jp)
OUTCHAR(0xff3c);
else
#endif
TRYMAP_DEC(jisx0208, writer,
c ^ 0x80, c2 ^ 0x80) ;
else return 1;
if (TRYMAP_DEC(jisx0208, decoded, c ^ 0x80, c2 ^ 0x80))
OUTCHAR(decoded);
else
return 1;
NEXT_IN(2);
}
}
@ -486,6 +497,7 @@ DECODER(shift_jis)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
#ifdef STRICT_BUILD
JISX0201_R_DECODE(c, writer)
@ -514,7 +526,8 @@ DECODER(shift_jis)
continue;
}
#endif
TRYMAP_DEC(jisx0208, writer, c1, c2) {
if (TRYMAP_DEC(jisx0208, decoded, c1, c2)) {
OUTCHAR(decoded);
NEXT_IN(2);
continue;
}
@ -636,7 +649,7 @@ DECODER(shift_jis_2004)
JISX0201_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
unsigned char c1, c2;
Py_UCS4 code;
Py_UCS4 code, decoded;
REQUIRE_INBUF(2)
c2 = INBYTE2;
@ -652,17 +665,14 @@ DECODER(shift_jis_2004)
c1 += 0x21;
EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
c1, c2)
else TRYMAP_DEC(jisx0208, writer, c1, c2) {
}
else TRYMAP_DEC(jisx0213_1_bmp, writer,
c1, c2) {
}
else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) {
else if (TRYMAP_DEC(jisx0208, decoded, c1, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c1, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_emp, code, c1, c2))
OUTCHAR(EMPBASE | code);
}
else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) {
else if (TRYMAP_DEC(jisx0213_pair, code, c1, c2))
OUTCHAR2(code >> 16, code & 0xffff);
}
else
return 1;
NEXT_IN(2);
@ -674,9 +684,9 @@ DECODER(shift_jis_2004)
EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
c1, c2)
else TRYMAP_DEC(jisx0213_2_bmp, writer,
c1, c2) {
} else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) {
else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c1, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_2_emp, code, c1, c2)) {
OUTCHAR(EMPBASE | code);
NEXT_IN(2);
continue;

View File

@ -105,6 +105,7 @@ DECODER(euc_kr)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -148,7 +149,8 @@ DECODER(euc_kr)
OUTCHAR(0xac00 + cho*588 + jung*28 + jong);
NEXT_IN(8);
}
else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, INBYTE2 ^ 0x80) {
else if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else
@ -198,6 +200,7 @@ DECODER(cp949)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -206,8 +209,10 @@ DECODER(cp949)
}
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, writer, c ^ 0x80, INBYTE2 ^ 0x80);
else TRYMAP_DEC(cp949ext, writer, c, INBYTE2);
if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80))
OUTCHAR(decoded);
else if (TRYMAP_DEC(cp949ext, decoded, c, INBYTE2))
OUTCHAR(decoded);
else return 1;
NEXT_IN(2);
@ -350,7 +355,8 @@ static const unsigned char johabjamo_jongseong[32] = {
DECODER(johab)
{
while (inleft > 0) {
unsigned char c = INBYTE1, c2;
unsigned char c = INBYTE1, c2;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -424,9 +430,13 @@ DECODER(johab)
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
TRYMAP_DEC(ksx1001, writer, t1, t2);
else return 1;
NEXT_IN(2);
if (TRYMAP_DEC(ksx1001, decoded, t1, t2)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else {
return 1;
}
}
}
}

View File

@ -44,6 +44,7 @@ DECODER(big5)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -52,7 +53,8 @@ DECODER(big5)
}
REQUIRE_INBUF(2)
TRYMAP_DEC(big5, writer, c, INBYTE2) {
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else return 1;
@ -98,6 +100,7 @@ DECODER(cp950)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
@ -107,9 +110,12 @@ DECODER(cp950)
REQUIRE_INBUF(2)
TRYMAP_DEC(cp950ext, writer, c, INBYTE2);
else TRYMAP_DEC(big5, writer, c, INBYTE2);
else return 1;
if (TRYMAP_DEC(cp950ext, decoded, c, INBYTE2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(big5, decoded, c, INBYTE2))
OUTCHAR(decoded);
else
return 1;
NEXT_IN(2);
}

View File

@ -177,29 +177,13 @@ static const struct dbcs_map *mapping_list;
#define TRYMAP_ENC(charset, assi, uni) \
if TRYMAP_ENC_COND(charset, assi, uni)
Py_LOCAL_INLINE(int)
_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
{
if (c == UNIINV || _PyUnicodeWriter_WriteChar(writer, c) < 0)
return UNIINV;
else
return c;
}
#define _TRYMAP_DEC(m, writer, val) \
((m)->map != NULL && \
(val) >= (m)->bottom && \
(val)<= (m)->top && \
_TRYMAP_DEC_WRITE(writer, (m)->map[(val) - (m)->bottom]) != UNIINV)
#define _TRYMAP_DEC_CHAR(m, assi, val) \
#define _TRYMAP_DEC(m, assi, val) \
((m)->map != NULL && \
(val) >= (m)->bottom && \
(val)<= (m)->top && \
((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
#define TRYMAP_DEC(charset, writer, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[c1], writer, c2)
#define TRYMAP_DEC_CHAR(charset, assi, c1, c2) \
if _TRYMAP_DEC_CHAR(&charset##_decmap[c1], assi, c2)
#define TRYMAP_DEC(charset, assi, c1, c2) \
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
@ -210,8 +194,6 @@ _TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
assplane, asshi, asslo, (uni) & 0xff)
#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},