Add fast-path in PyUnicode_DecodeCharmap() for pure 8 bit encodings:
cp037, cp500 and iso8859_1 codecs
This commit is contained in:
parent
0f344b6e05
commit
03c3e35d42
|
@ -301,7 +301,6 @@ decoding_table = (
|
||||||
'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
|
'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
|
'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
'\x9f' # 0xFF -> CONTROL
|
'\x9f' # 0xFF -> CONTROL
|
||||||
'\ufffe' ## Widen to UCS2 for optimization
|
|
||||||
)
|
)
|
||||||
|
|
||||||
### Encoding table
|
### Encoding table
|
||||||
|
|
|
@ -301,7 +301,6 @@ decoding_table = (
|
||||||
'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
|
'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
|
'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
'\x9f' # 0xFF -> CONTROL
|
'\x9f' # 0xFF -> CONTROL
|
||||||
'\ufffe' ## Widen to UCS2 for optimization
|
|
||||||
)
|
)
|
||||||
|
|
||||||
### Encoding table
|
### Encoding table
|
||||||
|
|
|
@ -301,7 +301,6 @@ decoding_table = (
|
||||||
'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
|
'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
|
||||||
'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic)
|
'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic)
|
||||||
'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||||
'\ufffe' ## Widen to UCS2 for optimization
|
|
||||||
)
|
)
|
||||||
|
|
||||||
### Encoding table
|
### Encoding table
|
||||||
|
|
|
@ -7281,6 +7281,7 @@ PyUnicode_DecodeCharmap(const char *s,
|
||||||
enum PyUnicode_Kind mapkind;
|
enum PyUnicode_Kind mapkind;
|
||||||
void *mapdata;
|
void *mapdata;
|
||||||
Py_UCS4 x;
|
Py_UCS4 x;
|
||||||
|
unsigned char ch;
|
||||||
|
|
||||||
if (PyUnicode_READY(mapping) == -1)
|
if (PyUnicode_READY(mapping) == -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -7288,8 +7289,32 @@ PyUnicode_DecodeCharmap(const char *s,
|
||||||
maplen = PyUnicode_GET_LENGTH(mapping);
|
maplen = PyUnicode_GET_LENGTH(mapping);
|
||||||
mapdata = PyUnicode_DATA(mapping);
|
mapdata = PyUnicode_DATA(mapping);
|
||||||
mapkind = PyUnicode_KIND(mapping);
|
mapkind = PyUnicode_KIND(mapping);
|
||||||
|
|
||||||
|
if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) {
|
||||||
|
/* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1
|
||||||
|
* is disabled in encoding aliases, latin1 is preferred because
|
||||||
|
* its implementation is faster. */
|
||||||
|
Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata;
|
||||||
|
Py_UCS1 *outdata = (Py_UCS1 *)writer.data;
|
||||||
|
Py_UCS4 maxchar = writer.maxchar;
|
||||||
|
|
||||||
|
assert (writer.kind == PyUnicode_1BYTE_KIND);
|
||||||
|
while (s < e) {
|
||||||
|
ch = *s;
|
||||||
|
x = mapdata_ucs1[ch];
|
||||||
|
if (x > maxchar) {
|
||||||
|
if (_PyUnicodeWriter_PrepareInternal(&writer, 1, 0xff) == -1)
|
||||||
|
goto onError;
|
||||||
|
maxchar = writer.maxchar;
|
||||||
|
outdata = (Py_UCS1 *)writer.data;
|
||||||
|
}
|
||||||
|
outdata[writer.pos] = x;
|
||||||
|
writer.pos++;
|
||||||
|
++s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while (s < e) {
|
while (s < e) {
|
||||||
unsigned char ch;
|
|
||||||
if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
|
if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
|
||||||
enum PyUnicode_Kind outkind = writer.kind;
|
enum PyUnicode_Kind outkind = writer.kind;
|
||||||
void *outdata = writer.data;
|
void *outdata = writer.data;
|
||||||
|
|
Loading…
Reference in New Issue