Port UCS1 and charmap codecs to new API.
This commit is contained in:
parent
9e8166843c
commit
23e275b3ad
|
@ -1425,6 +1425,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
|
||||||
(unicode ordinal -> char ordinal) */
|
(unicode ordinal -> char ordinal) */
|
||||||
const char *errors /* error handling */
|
const char *errors /* error handling */
|
||||||
);
|
);
|
||||||
|
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
|
||||||
|
PyObject *unicode, /* Unicode object */
|
||||||
|
PyObject *mapping, /* character mapping
|
||||||
|
(unicode ordinal -> char ordinal) */
|
||||||
|
const char *errors /* error handling */
|
||||||
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Translate a Py_UNICODE buffer of the given length by applying a
|
/* Translate a Py_UNICODE buffer of the given length by applying a
|
||||||
|
|
|
@ -992,11 +992,7 @@ charmap_encode(PyObject *self,
|
||||||
str = PyUnicode_FromObject(str);
|
str = PyUnicode_FromObject(str);
|
||||||
if (str == NULL)
|
if (str == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
v = codec_tuple(PyUnicode_EncodeCharmap(
|
v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
|
||||||
PyUnicode_AS_UNICODE(str),
|
|
||||||
PyUnicode_GET_SIZE(str),
|
|
||||||
mapping,
|
|
||||||
errors),
|
|
||||||
PyUnicode_GET_SIZE(str));
|
PyUnicode_GET_SIZE(str));
|
||||||
Py_DECREF(str);
|
Py_DECREF(str);
|
||||||
return v;
|
return v;
|
||||||
|
|
|
@ -248,7 +248,7 @@ _PyUnicode_FromUCS4(const Py_UCS4 *s, Py_ssize_t size);
|
||||||
static PyObject *
|
static PyObject *
|
||||||
unicode_encode_call_errorhandler(const char *errors,
|
unicode_encode_call_errorhandler(const char *errors,
|
||||||
PyObject **errorHandler,const char *encoding, const char *reason,
|
PyObject **errorHandler,const char *encoding, const char *reason,
|
||||||
const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
|
PyObject *unicode, PyObject **exceptionObject,
|
||||||
Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
|
Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -4745,8 +4745,7 @@ _PyUnicode_AsUTF8String(PyObject *obj, const char *errors)
|
||||||
#endif
|
#endif
|
||||||
rep = unicode_encode_call_errorhandler(
|
rep = unicode_encode_call_errorhandler(
|
||||||
errors, &errorHandler, "utf-8", "surrogates not allowed",
|
errors, &errorHandler, "utf-8", "surrogates not allowed",
|
||||||
PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode),
|
obj, &exc, startpos, startpos+1, &newpos);
|
||||||
&exc, startpos, startpos+1, &newpos);
|
|
||||||
if (!rep)
|
if (!rep)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
|
@ -6450,7 +6449,7 @@ make_encode_exception_obj(PyObject **exceptionObject,
|
||||||
{
|
{
|
||||||
if (*exceptionObject == NULL) {
|
if (*exceptionObject == NULL) {
|
||||||
*exceptionObject = PyObject_CallFunction(
|
*exceptionObject = PyObject_CallFunction(
|
||||||
PyExc_UnicodeEncodeError, "sUnns",
|
PyExc_UnicodeEncodeError, "sOnns",
|
||||||
encoding, unicode, startpos, endpos, reason);
|
encoding, unicode, startpos, endpos, reason);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -6502,12 +6501,12 @@ static PyObject *
|
||||||
unicode_encode_call_errorhandler(const char *errors,
|
unicode_encode_call_errorhandler(const char *errors,
|
||||||
PyObject **errorHandler,
|
PyObject **errorHandler,
|
||||||
const char *encoding, const char *reason,
|
const char *encoding, const char *reason,
|
||||||
const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
|
PyObject *unicode, PyObject **exceptionObject,
|
||||||
Py_ssize_t startpos, Py_ssize_t endpos,
|
Py_ssize_t startpos, Py_ssize_t endpos,
|
||||||
Py_ssize_t *newpos)
|
Py_ssize_t *newpos)
|
||||||
{
|
{
|
||||||
static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
|
static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
|
||||||
|
Py_ssize_t len;
|
||||||
PyObject *restuple;
|
PyObject *restuple;
|
||||||
PyObject *resunicode;
|
PyObject *resunicode;
|
||||||
|
|
||||||
|
@ -6517,8 +6516,12 @@ unicode_encode_call_errorhandler(const char *errors,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
make_encode_exception(exceptionObject,
|
if (PyUnicode_READY(unicode) < 0)
|
||||||
encoding, unicode, size, startpos, endpos, reason);
|
return NULL;
|
||||||
|
len = PyUnicode_GET_LENGTH(unicode);
|
||||||
|
|
||||||
|
make_encode_exception_obj(exceptionObject,
|
||||||
|
encoding, unicode, startpos, endpos, reason);
|
||||||
if (*exceptionObject == NULL)
|
if (*exceptionObject == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -6542,8 +6545,8 @@ unicode_encode_call_errorhandler(const char *errors,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (*newpos<0)
|
if (*newpos<0)
|
||||||
*newpos = size+*newpos;
|
*newpos = len + *newpos;
|
||||||
if (*newpos<0 || *newpos>size) {
|
if (*newpos<0 || *newpos>len) {
|
||||||
PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
|
PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
|
||||||
Py_DECREF(restuple);
|
Py_DECREF(restuple);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -6554,18 +6557,16 @@ unicode_encode_call_errorhandler(const char *errors,
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
unicode_encode_ucs1(const Py_UNICODE *p,
|
unicode_encode_ucs1(PyObject *unicode,
|
||||||
Py_ssize_t size,
|
|
||||||
const char *errors,
|
const char *errors,
|
||||||
int limit)
|
int limit)
|
||||||
{
|
{
|
||||||
|
/* input state */
|
||||||
|
Py_ssize_t pos=0, size;
|
||||||
|
int kind;
|
||||||
|
void *data;
|
||||||
/* output object */
|
/* output object */
|
||||||
PyObject *res;
|
PyObject *res;
|
||||||
/* pointers to the beginning and end+1 of input */
|
|
||||||
const Py_UNICODE *startp = p;
|
|
||||||
const Py_UNICODE *endp = p + size;
|
|
||||||
/* pointer to the beginning of the unencodable characters */
|
|
||||||
/* const Py_UNICODE *badp = NULL; */
|
|
||||||
/* pointer into the output */
|
/* pointer into the output */
|
||||||
char *str;
|
char *str;
|
||||||
/* current output position */
|
/* current output position */
|
||||||
|
@ -6578,6 +6579,11 @@ unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
|
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
|
||||||
int known_errorHandler = -1;
|
int known_errorHandler = -1;
|
||||||
|
|
||||||
|
if (PyUnicode_READY(unicode) < 0)
|
||||||
|
return NULL;
|
||||||
|
size = PyUnicode_GET_LENGTH(unicode);
|
||||||
|
kind = PyUnicode_KIND(unicode);
|
||||||
|
data = PyUnicode_DATA(unicode);
|
||||||
/* allocate enough for a simple encoding without
|
/* allocate enough for a simple encoding without
|
||||||
replacements, if we need more, we'll resize */
|
replacements, if we need more, we'll resize */
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
|
@ -6588,28 +6594,24 @@ unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
str = PyBytes_AS_STRING(res);
|
str = PyBytes_AS_STRING(res);
|
||||||
ressize = size;
|
ressize = size;
|
||||||
|
|
||||||
while (p<endp) {
|
while (pos < size) {
|
||||||
Py_UNICODE c = *p;
|
Py_UCS4 c = PyUnicode_READ(kind, data, pos);
|
||||||
|
|
||||||
/* can we encode this? */
|
/* can we encode this? */
|
||||||
if (c<limit) {
|
if (c<limit) {
|
||||||
/* no overflow check, because we know that the space is enough */
|
/* no overflow check, because we know that the space is enough */
|
||||||
*str++ = (char)c;
|
*str++ = (char)c;
|
||||||
++p;
|
++pos;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Py_ssize_t unicodepos = p-startp;
|
|
||||||
Py_ssize_t requiredsize;
|
Py_ssize_t requiredsize;
|
||||||
PyObject *repunicode;
|
PyObject *repunicode;
|
||||||
Py_ssize_t repsize;
|
Py_ssize_t repsize, newpos, respos, i;
|
||||||
Py_ssize_t newpos;
|
|
||||||
Py_ssize_t respos;
|
|
||||||
Py_UNICODE *uni2;
|
|
||||||
/* startpos for collecting unencodable chars */
|
/* startpos for collecting unencodable chars */
|
||||||
const Py_UNICODE *collstart = p;
|
Py_ssize_t collstart = pos;
|
||||||
const Py_UNICODE *collend = p;
|
Py_ssize_t collend = pos;
|
||||||
/* find all unecodable characters */
|
/* find all unecodable characters */
|
||||||
while ((collend < endp) && ((*collend)>=limit))
|
while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit))
|
||||||
++collend;
|
++collend;
|
||||||
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
|
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
|
||||||
if (known_errorHandler==-1) {
|
if (known_errorHandler==-1) {
|
||||||
|
@ -6626,39 +6628,40 @@ unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
}
|
}
|
||||||
switch (known_errorHandler) {
|
switch (known_errorHandler) {
|
||||||
case 1: /* strict */
|
case 1: /* strict */
|
||||||
raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason);
|
raise_encode_exception_obj(&exc, encoding, unicode, collstart, collend, reason);
|
||||||
goto onError;
|
goto onError;
|
||||||
case 2: /* replace */
|
case 2: /* replace */
|
||||||
while (collstart++<collend)
|
while (collstart++<collend)
|
||||||
*str++ = '?'; /* fall through */
|
*str++ = '?'; /* fall through */
|
||||||
case 3: /* ignore */
|
case 3: /* ignore */
|
||||||
p = collend;
|
pos = collend;
|
||||||
break;
|
break;
|
||||||
case 4: /* xmlcharrefreplace */
|
case 4: /* xmlcharrefreplace */
|
||||||
respos = str - PyBytes_AS_STRING(res);
|
respos = str - PyBytes_AS_STRING(res);
|
||||||
/* determine replacement size (temporarily (mis)uses p) */
|
/* determine replacement size */
|
||||||
for (p = collstart, repsize = 0; p < collend; ++p) {
|
for (i = collstart, repsize = 0; i < collend; ++i) {
|
||||||
if (*p<10)
|
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||||
|
if (ch < 10)
|
||||||
repsize += 2+1+1;
|
repsize += 2+1+1;
|
||||||
else if (*p<100)
|
else if (ch < 100)
|
||||||
repsize += 2+2+1;
|
repsize += 2+2+1;
|
||||||
else if (*p<1000)
|
else if (ch < 1000)
|
||||||
repsize += 2+3+1;
|
repsize += 2+3+1;
|
||||||
else if (*p<10000)
|
else if (ch < 10000)
|
||||||
repsize += 2+4+1;
|
repsize += 2+4+1;
|
||||||
#ifndef Py_UNICODE_WIDE
|
#ifndef Py_UNICODE_WIDE
|
||||||
else
|
else
|
||||||
repsize += 2+5+1;
|
repsize += 2+5+1;
|
||||||
#else
|
#else
|
||||||
else if (*p<100000)
|
else if (ch < 100000)
|
||||||
repsize += 2+5+1;
|
repsize += 2+5+1;
|
||||||
else if (*p<1000000)
|
else if (ch < 1000000)
|
||||||
repsize += 2+6+1;
|
repsize += 2+6+1;
|
||||||
else
|
else
|
||||||
repsize += 2+7+1;
|
repsize += 2+7+1;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
requiredsize = respos+repsize+(endp-collend);
|
requiredsize = respos+repsize+(size-collend);
|
||||||
if (requiredsize > ressize) {
|
if (requiredsize > ressize) {
|
||||||
if (requiredsize<2*ressize)
|
if (requiredsize<2*ressize)
|
||||||
requiredsize = 2*ressize;
|
requiredsize = 2*ressize;
|
||||||
|
@ -6667,17 +6670,18 @@ unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
str = PyBytes_AS_STRING(res) + respos;
|
str = PyBytes_AS_STRING(res) + respos;
|
||||||
ressize = requiredsize;
|
ressize = requiredsize;
|
||||||
}
|
}
|
||||||
/* generate replacement (temporarily (mis)uses p) */
|
/* generate replacement */
|
||||||
for (p = collstart; p < collend; ++p) {
|
for (i = collstart; i < collend; ++i) {
|
||||||
str += sprintf(str, "&#%d;", (int)*p);
|
str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
|
||||||
}
|
}
|
||||||
p = collend;
|
pos = collend;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
|
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
|
||||||
encoding, reason, startp, size, &exc,
|
encoding, reason, unicode, &exc,
|
||||||
collstart-startp, collend-startp, &newpos);
|
collstart, collend, &newpos);
|
||||||
if (repunicode == NULL)
|
if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
|
||||||
|
PyUnicode_READY(repunicode) < 0))
|
||||||
goto onError;
|
goto onError;
|
||||||
if (PyBytes_Check(repunicode)) {
|
if (PyBytes_Check(repunicode)) {
|
||||||
/* Directly copy bytes result to output. */
|
/* Directly copy bytes result to output. */
|
||||||
|
@ -6694,7 +6698,7 @@ unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
}
|
}
|
||||||
memcpy(str, PyBytes_AsString(repunicode), repsize);
|
memcpy(str, PyBytes_AsString(repunicode), repsize);
|
||||||
str += repsize;
|
str += repsize;
|
||||||
p = startp + newpos;
|
pos = newpos;
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -6702,8 +6706,8 @@ unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
have+the replacement+the rest of the string, so
|
have+the replacement+the rest of the string, so
|
||||||
we won't have to check space for encodable characters) */
|
we won't have to check space for encodable characters) */
|
||||||
respos = str - PyBytes_AS_STRING(res);
|
respos = str - PyBytes_AS_STRING(res);
|
||||||
repsize = PyUnicode_GET_SIZE(repunicode);
|
repsize = PyUnicode_GET_LENGTH(repunicode);
|
||||||
requiredsize = respos+repsize+(endp-collend);
|
requiredsize = respos+repsize+(size-collend);
|
||||||
if (requiredsize > ressize) {
|
if (requiredsize > ressize) {
|
||||||
if (requiredsize<2*ressize)
|
if (requiredsize<2*ressize)
|
||||||
requiredsize = 2*ressize;
|
requiredsize = 2*ressize;
|
||||||
|
@ -6716,17 +6720,17 @@ unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
}
|
}
|
||||||
/* check if there is anything unencodable in the replacement
|
/* check if there is anything unencodable in the replacement
|
||||||
and copy it to the output */
|
and copy it to the output */
|
||||||
for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) {
|
for (i = 0; repsize-->0; ++i, ++str) {
|
||||||
c = *uni2;
|
c = PyUnicode_READ_CHAR(repunicode, i);
|
||||||
if (c >= limit) {
|
if (c >= limit) {
|
||||||
raise_encode_exception(&exc, encoding, startp, size,
|
raise_encode_exception_obj(&exc, encoding, unicode,
|
||||||
unicodepos, unicodepos+1, reason);
|
pos, pos+1, reason);
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
*str = (char)c;
|
*str = (char)c;
|
||||||
}
|
}
|
||||||
p = startp + newpos;
|
pos = newpos;
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6750,12 +6754,19 @@ unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Deprecated */
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_EncodeLatin1(const Py_UNICODE *p,
|
PyUnicode_EncodeLatin1(const Py_UNICODE *p,
|
||||||
Py_ssize_t size,
|
Py_ssize_t size,
|
||||||
const char *errors)
|
const char *errors)
|
||||||
{
|
{
|
||||||
return unicode_encode_ucs1(p, size, errors, 256);
|
PyObject *result;
|
||||||
|
PyObject *unicode = PyUnicode_FromUnicode(p, size);
|
||||||
|
if (unicode == NULL)
|
||||||
|
return NULL;
|
||||||
|
result = unicode_encode_ucs1(unicode, errors, 256);
|
||||||
|
Py_DECREF(unicode);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -6774,9 +6785,7 @@ _PyUnicode_AsLatin1String(PyObject *unicode, const char *errors)
|
||||||
PyUnicode_GET_LENGTH(unicode));
|
PyUnicode_GET_LENGTH(unicode));
|
||||||
/* Non-Latin-1 characters present. Defer to above function to
|
/* Non-Latin-1 characters present. Defer to above function to
|
||||||
raise the exception. */
|
raise the exception. */
|
||||||
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
|
return unicode_encode_ucs1(unicode, errors, 256);
|
||||||
PyUnicode_GET_SIZE(unicode),
|
|
||||||
errors);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject*
|
PyObject*
|
||||||
|
@ -6888,12 +6897,19 @@ PyUnicode_DecodeASCII(const char *s,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Deprecated */
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_EncodeASCII(const Py_UNICODE *p,
|
PyUnicode_EncodeASCII(const Py_UNICODE *p,
|
||||||
Py_ssize_t size,
|
Py_ssize_t size,
|
||||||
const char *errors)
|
const char *errors)
|
||||||
{
|
{
|
||||||
return unicode_encode_ucs1(p, size, errors, 128);
|
PyObject *result;
|
||||||
|
PyObject *unicode = PyUnicode_FromUnicode(p, size);
|
||||||
|
if (unicode == NULL)
|
||||||
|
return NULL;
|
||||||
|
result = unicode_encode_ucs1(unicode, errors, 128);
|
||||||
|
Py_DECREF(unicode);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -6910,9 +6926,7 @@ _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors)
|
||||||
if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
|
if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
|
||||||
return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
|
return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
|
||||||
PyUnicode_GET_LENGTH(unicode));
|
PyUnicode_GET_LENGTH(unicode));
|
||||||
return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
|
return unicode_encode_ucs1(unicode, errors, 128);
|
||||||
PyUnicode_GET_SIZE(unicode),
|
|
||||||
errors);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -8182,13 +8196,13 @@ charmapencode_output(Py_UNICODE c, PyObject *mapping,
|
||||||
Return 0 on success, -1 on error */
|
Return 0 on success, -1 on error */
|
||||||
static int
|
static int
|
||||||
charmap_encoding_error(
|
charmap_encoding_error(
|
||||||
const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping,
|
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
|
||||||
PyObject **exceptionObject,
|
PyObject **exceptionObject,
|
||||||
int *known_errorHandler, PyObject **errorHandler, const char *errors,
|
int *known_errorHandler, PyObject **errorHandler, const char *errors,
|
||||||
PyObject **res, Py_ssize_t *respos)
|
PyObject **res, Py_ssize_t *respos)
|
||||||
{
|
{
|
||||||
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
|
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
|
||||||
Py_ssize_t repsize;
|
Py_ssize_t size, repsize;
|
||||||
Py_ssize_t newpos;
|
Py_ssize_t newpos;
|
||||||
Py_UNICODE *uni2;
|
Py_UNICODE *uni2;
|
||||||
/* startpos for collecting unencodable chars */
|
/* startpos for collecting unencodable chars */
|
||||||
|
@ -8198,19 +8212,25 @@ charmap_encoding_error(
|
||||||
char *encoding = "charmap";
|
char *encoding = "charmap";
|
||||||
char *reason = "character maps to <undefined>";
|
char *reason = "character maps to <undefined>";
|
||||||
charmapencode_result x;
|
charmapencode_result x;
|
||||||
|
Py_UCS4 ch;
|
||||||
|
|
||||||
|
if (PyUnicode_READY(unicode) < 0)
|
||||||
|
return -1;
|
||||||
|
size = PyUnicode_GET_LENGTH(unicode);
|
||||||
/* find all unencodable characters */
|
/* find all unencodable characters */
|
||||||
while (collendpos < size) {
|
while (collendpos < size) {
|
||||||
PyObject *rep;
|
PyObject *rep;
|
||||||
if (Py_TYPE(mapping) == &EncodingMapType) {
|
if (Py_TYPE(mapping) == &EncodingMapType) {
|
||||||
int res = encoding_map_lookup(p[collendpos], mapping);
|
ch = PyUnicode_READ_CHAR(unicode, collendpos);
|
||||||
|
int res = encoding_map_lookup(ch, mapping);
|
||||||
if (res != -1)
|
if (res != -1)
|
||||||
break;
|
break;
|
||||||
++collendpos;
|
++collendpos;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
rep = charmapencode_lookup(p[collendpos], mapping);
|
ch = PyUnicode_READ_CHAR(unicode, collendpos);
|
||||||
|
rep = charmapencode_lookup(ch, mapping);
|
||||||
if (rep==NULL)
|
if (rep==NULL)
|
||||||
return -1;
|
return -1;
|
||||||
else if (rep!=Py_None) {
|
else if (rep!=Py_None) {
|
||||||
|
@ -8236,7 +8256,7 @@ charmap_encoding_error(
|
||||||
}
|
}
|
||||||
switch (*known_errorHandler) {
|
switch (*known_errorHandler) {
|
||||||
case 1: /* strict */
|
case 1: /* strict */
|
||||||
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
|
raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
|
||||||
return -1;
|
return -1;
|
||||||
case 2: /* replace */
|
case 2: /* replace */
|
||||||
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
|
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
|
||||||
|
@ -8245,7 +8265,7 @@ charmap_encoding_error(
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
else if (x==enc_FAILED) {
|
else if (x==enc_FAILED) {
|
||||||
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
|
raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8258,13 +8278,13 @@ charmap_encoding_error(
|
||||||
for (collpos = collstartpos; collpos < collendpos; ++collpos) {
|
for (collpos = collstartpos; collpos < collendpos; ++collpos) {
|
||||||
char buffer[2+29+1+1];
|
char buffer[2+29+1+1];
|
||||||
char *cp;
|
char *cp;
|
||||||
sprintf(buffer, "&#%d;", (int)p[collpos]);
|
sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos));
|
||||||
for (cp = buffer; *cp; ++cp) {
|
for (cp = buffer; *cp; ++cp) {
|
||||||
x = charmapencode_output(*cp, mapping, res, respos);
|
x = charmapencode_output(*cp, mapping, res, respos);
|
||||||
if (x==enc_EXCEPTION)
|
if (x==enc_EXCEPTION)
|
||||||
return -1;
|
return -1;
|
||||||
else if (x==enc_FAILED) {
|
else if (x==enc_FAILED) {
|
||||||
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
|
raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8273,7 +8293,7 @@ charmap_encoding_error(
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
|
repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
|
||||||
encoding, reason, p, size, exceptionObject,
|
encoding, reason, unicode, exceptionObject,
|
||||||
collstartpos, collendpos, &newpos);
|
collstartpos, collendpos, &newpos);
|
||||||
if (repunicode == NULL)
|
if (repunicode == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -8305,7 +8325,7 @@ charmap_encoding_error(
|
||||||
}
|
}
|
||||||
else if (x==enc_FAILED) {
|
else if (x==enc_FAILED) {
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
|
raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8316,15 +8336,15 @@ charmap_encoding_error(
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
_PyUnicode_EncodeCharmap(PyObject *unicode,
|
||||||
Py_ssize_t size,
|
PyObject *mapping,
|
||||||
PyObject *mapping,
|
const char *errors)
|
||||||
const char *errors)
|
|
||||||
{
|
{
|
||||||
/* output object */
|
/* output object */
|
||||||
PyObject *res = NULL;
|
PyObject *res = NULL;
|
||||||
/* current input position */
|
/* current input position */
|
||||||
Py_ssize_t inpos = 0;
|
Py_ssize_t inpos = 0;
|
||||||
|
Py_ssize_t size;
|
||||||
/* current output position */
|
/* current output position */
|
||||||
Py_ssize_t respos = 0;
|
Py_ssize_t respos = 0;
|
||||||
PyObject *errorHandler = NULL;
|
PyObject *errorHandler = NULL;
|
||||||
|
@ -8334,9 +8354,13 @@ PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
||||||
* 3=ignore, 4=xmlcharrefreplace */
|
* 3=ignore, 4=xmlcharrefreplace */
|
||||||
int known_errorHandler = -1;
|
int known_errorHandler = -1;
|
||||||
|
|
||||||
|
if (PyUnicode_READY(unicode) < 0)
|
||||||
|
return NULL;
|
||||||
|
size = PyUnicode_GET_LENGTH(unicode);
|
||||||
|
|
||||||
/* Default to Latin-1 */
|
/* Default to Latin-1 */
|
||||||
if (mapping == NULL)
|
if (mapping == NULL)
|
||||||
return PyUnicode_EncodeLatin1(p, size, errors);
|
return unicode_encode_ucs1(unicode, errors, 256);
|
||||||
|
|
||||||
/* allocate enough for a simple encoding without
|
/* allocate enough for a simple encoding without
|
||||||
replacements, if we need more, we'll resize */
|
replacements, if we need more, we'll resize */
|
||||||
|
@ -8347,12 +8371,13 @@ PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
while (inpos<size) {
|
while (inpos<size) {
|
||||||
|
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos);
|
||||||
/* try to encode it */
|
/* try to encode it */
|
||||||
charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos);
|
charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
|
||||||
if (x==enc_EXCEPTION) /* error */
|
if (x==enc_EXCEPTION) /* error */
|
||||||
goto onError;
|
goto onError;
|
||||||
if (x==enc_FAILED) { /* unencodable character */
|
if (x==enc_FAILED) { /* unencodable character */
|
||||||
if (charmap_encoding_error(p, size, &inpos, mapping,
|
if (charmap_encoding_error(unicode, &inpos, mapping,
|
||||||
&exc,
|
&exc,
|
||||||
&known_errorHandler, &errorHandler, errors,
|
&known_errorHandler, &errorHandler, errors,
|
||||||
&res, &respos)) {
|
&res, &respos)) {
|
||||||
|
@ -8380,6 +8405,22 @@ PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Deprecated */
|
||||||
|
PyObject *
|
||||||
|
PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
||||||
|
Py_ssize_t size,
|
||||||
|
PyObject *mapping,
|
||||||
|
const char *errors)
|
||||||
|
{
|
||||||
|
PyObject *result;
|
||||||
|
PyObject *unicode = PyUnicode_FromUnicode(p, size);
|
||||||
|
if (unicode == NULL)
|
||||||
|
return NULL;
|
||||||
|
result = _PyUnicode_EncodeCharmap(unicode, mapping, errors);
|
||||||
|
Py_DECREF(unicode);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_AsCharmapString(PyObject *unicode,
|
PyUnicode_AsCharmapString(PyObject *unicode,
|
||||||
PyObject *mapping)
|
PyObject *mapping)
|
||||||
|
@ -8388,10 +8429,7 @@ PyUnicode_AsCharmapString(PyObject *unicode,
|
||||||
PyErr_BadArgument();
|
PyErr_BadArgument();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode),
|
return _PyUnicode_EncodeCharmap(unicode, mapping, NULL);
|
||||||
PyUnicode_GET_SIZE(unicode),
|
|
||||||
mapping,
|
|
||||||
NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* create or adjust a UnicodeTranslateError */
|
/* create or adjust a UnicodeTranslateError */
|
||||||
|
@ -8893,6 +8931,7 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
||||||
Py_UNICODE *p, *end;
|
Py_UNICODE *p, *end;
|
||||||
PyObject *errorHandler = NULL;
|
PyObject *errorHandler = NULL;
|
||||||
PyObject *exc = NULL;
|
PyObject *exc = NULL;
|
||||||
|
PyObject *unicode;
|
||||||
const char *encoding = "decimal";
|
const char *encoding = "decimal";
|
||||||
const char *reason = "invalid decimal Unicode string";
|
const char *reason = "invalid decimal Unicode string";
|
||||||
/* the following variable is used for caching string comparisons
|
/* the following variable is used for caching string comparisons
|
||||||
|
@ -8973,9 +9012,13 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
||||||
p = collend;
|
p = collend;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
unicode = PyUnicode_FromUnicode(s, length);
|
||||||
|
if (unicode == NULL)
|
||||||
|
goto onError;
|
||||||
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
|
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
|
||||||
encoding, reason, s, length, &exc,
|
encoding, reason, unicode, &exc,
|
||||||
collstart-s, collend-s, &newpos);
|
collstart-s, collend-s, &newpos);
|
||||||
|
Py_DECREF(unicode);
|
||||||
if (repunicode == NULL)
|
if (repunicode == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
if (!PyUnicode_Check(repunicode)) {
|
if (!PyUnicode_Check(repunicode)) {
|
||||||
|
|
Loading…
Reference in New Issue