Port UCS1 and charmap codecs to new API.

This commit is contained in:
Martin v. Löwis 2011-11-02 18:02:51 +01:00
parent 9e8166843c
commit 23e275b3ad
3 changed files with 134 additions and 89 deletions

View File

@ -1425,6 +1425,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
(unicode ordinal -> char ordinal) */ (unicode ordinal -> char ordinal) */
const char *errors /* error handling */ const char *errors /* error handling */
); );
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
PyObject *unicode, /* Unicode object */
PyObject *mapping, /* character mapping
(unicode ordinal -> char ordinal) */
const char *errors /* error handling */
);
#endif #endif
/* Translate a Py_UNICODE buffer of the given length by applying a /* Translate a Py_UNICODE buffer of the given length by applying a

View File

@ -992,11 +992,7 @@ charmap_encode(PyObject *self,
str = PyUnicode_FromObject(str); str = PyUnicode_FromObject(str);
if (str == NULL) if (str == NULL)
return NULL; return NULL;
v = codec_tuple(PyUnicode_EncodeCharmap( v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
PyUnicode_AS_UNICODE(str),
PyUnicode_GET_SIZE(str),
mapping,
errors),
PyUnicode_GET_SIZE(str)); PyUnicode_GET_SIZE(str));
Py_DECREF(str); Py_DECREF(str);
return v; return v;

View File

@ -248,7 +248,7 @@ _PyUnicode_FromUCS4(const Py_UCS4 *s, Py_ssize_t size);
static PyObject * static PyObject *
unicode_encode_call_errorhandler(const char *errors, unicode_encode_call_errorhandler(const char *errors,
PyObject **errorHandler,const char *encoding, const char *reason, PyObject **errorHandler,const char *encoding, const char *reason,
const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, PyObject *unicode, PyObject **exceptionObject,
Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos); Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
static void static void
@ -4745,8 +4745,7 @@ _PyUnicode_AsUTF8String(PyObject *obj, const char *errors)
#endif #endif
rep = unicode_encode_call_errorhandler( rep = unicode_encode_call_errorhandler(
errors, &errorHandler, "utf-8", "surrogates not allowed", errors, &errorHandler, "utf-8", "surrogates not allowed",
PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), obj, &exc, startpos, startpos+1, &newpos);
&exc, startpos, startpos+1, &newpos);
if (!rep) if (!rep)
goto error; goto error;
@ -6450,7 +6449,7 @@ make_encode_exception_obj(PyObject **exceptionObject,
{ {
if (*exceptionObject == NULL) { if (*exceptionObject == NULL) {
*exceptionObject = PyObject_CallFunction( *exceptionObject = PyObject_CallFunction(
PyExc_UnicodeEncodeError, "sUnns", PyExc_UnicodeEncodeError, "sOnns",
encoding, unicode, startpos, endpos, reason); encoding, unicode, startpos, endpos, reason);
} }
else { else {
@ -6502,12 +6501,12 @@ static PyObject *
unicode_encode_call_errorhandler(const char *errors, unicode_encode_call_errorhandler(const char *errors,
PyObject **errorHandler, PyObject **errorHandler,
const char *encoding, const char *reason, const char *encoding, const char *reason,
const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, PyObject *unicode, PyObject **exceptionObject,
Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t startpos, Py_ssize_t endpos,
Py_ssize_t *newpos) Py_ssize_t *newpos)
{ {
static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple"; static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
Py_ssize_t len;
PyObject *restuple; PyObject *restuple;
PyObject *resunicode; PyObject *resunicode;
@ -6517,8 +6516,12 @@ unicode_encode_call_errorhandler(const char *errors,
return NULL; return NULL;
} }
make_encode_exception(exceptionObject, if (PyUnicode_READY(unicode) < 0)
encoding, unicode, size, startpos, endpos, reason); return NULL;
len = PyUnicode_GET_LENGTH(unicode);
make_encode_exception_obj(exceptionObject,
encoding, unicode, startpos, endpos, reason);
if (*exceptionObject == NULL) if (*exceptionObject == NULL)
return NULL; return NULL;
@ -6542,8 +6545,8 @@ unicode_encode_call_errorhandler(const char *errors,
return NULL; return NULL;
} }
if (*newpos<0) if (*newpos<0)
*newpos = size+*newpos; *newpos = len + *newpos;
if (*newpos<0 || *newpos>size) { if (*newpos<0 || *newpos>len) {
PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos); PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
Py_DECREF(restuple); Py_DECREF(restuple);
return NULL; return NULL;
@ -6554,18 +6557,16 @@ unicode_encode_call_errorhandler(const char *errors,
} }
static PyObject * static PyObject *
unicode_encode_ucs1(const Py_UNICODE *p, unicode_encode_ucs1(PyObject *unicode,
Py_ssize_t size,
const char *errors, const char *errors,
int limit) int limit)
{ {
/* input state */
Py_ssize_t pos=0, size;
int kind;
void *data;
/* output object */ /* output object */
PyObject *res; PyObject *res;
/* pointers to the beginning and end+1 of input */
const Py_UNICODE *startp = p;
const Py_UNICODE *endp = p + size;
/* pointer to the beginning of the unencodable characters */
/* const Py_UNICODE *badp = NULL; */
/* pointer into the output */ /* pointer into the output */
char *str; char *str;
/* current output position */ /* current output position */
@ -6578,6 +6579,11 @@ unicode_encode_ucs1(const Py_UNICODE *p,
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1; int known_errorHandler = -1;
if (PyUnicode_READY(unicode) < 0)
return NULL;
size = PyUnicode_GET_LENGTH(unicode);
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
/* allocate enough for a simple encoding without /* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */ replacements, if we need more, we'll resize */
if (size == 0) if (size == 0)
@ -6588,28 +6594,24 @@ unicode_encode_ucs1(const Py_UNICODE *p,
str = PyBytes_AS_STRING(res); str = PyBytes_AS_STRING(res);
ressize = size; ressize = size;
while (p<endp) { while (pos < size) {
Py_UNICODE c = *p; Py_UCS4 c = PyUnicode_READ(kind, data, pos);
/* can we encode this? */ /* can we encode this? */
if (c<limit) { if (c<limit) {
/* no overflow check, because we know that the space is enough */ /* no overflow check, because we know that the space is enough */
*str++ = (char)c; *str++ = (char)c;
++p; ++pos;
} }
else { else {
Py_ssize_t unicodepos = p-startp;
Py_ssize_t requiredsize; Py_ssize_t requiredsize;
PyObject *repunicode; PyObject *repunicode;
Py_ssize_t repsize; Py_ssize_t repsize, newpos, respos, i;
Py_ssize_t newpos;
Py_ssize_t respos;
Py_UNICODE *uni2;
/* startpos for collecting unencodable chars */ /* startpos for collecting unencodable chars */
const Py_UNICODE *collstart = p; Py_ssize_t collstart = pos;
const Py_UNICODE *collend = p; Py_ssize_t collend = pos;
/* find all unecodable characters */ /* find all unecodable characters */
while ((collend < endp) && ((*collend)>=limit)) while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit))
++collend; ++collend;
/* cache callback name lookup (if not done yet, i.e. it's the first error) */ /* cache callback name lookup (if not done yet, i.e. it's the first error) */
if (known_errorHandler==-1) { if (known_errorHandler==-1) {
@ -6626,39 +6628,40 @@ unicode_encode_ucs1(const Py_UNICODE *p,
} }
switch (known_errorHandler) { switch (known_errorHandler) {
case 1: /* strict */ case 1: /* strict */
raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason); raise_encode_exception_obj(&exc, encoding, unicode, collstart, collend, reason);
goto onError; goto onError;
case 2: /* replace */ case 2: /* replace */
while (collstart++<collend) while (collstart++<collend)
*str++ = '?'; /* fall through */ *str++ = '?'; /* fall through */
case 3: /* ignore */ case 3: /* ignore */
p = collend; pos = collend;
break; break;
case 4: /* xmlcharrefreplace */ case 4: /* xmlcharrefreplace */
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
/* determine replacement size (temporarily (mis)uses p) */ /* determine replacement size */
for (p = collstart, repsize = 0; p < collend; ++p) { for (i = collstart, repsize = 0; i < collend; ++i) {
if (*p<10) Py_UCS4 ch = PyUnicode_READ(kind, data, i);
if (ch < 10)
repsize += 2+1+1; repsize += 2+1+1;
else if (*p<100) else if (ch < 100)
repsize += 2+2+1; repsize += 2+2+1;
else if (*p<1000) else if (ch < 1000)
repsize += 2+3+1; repsize += 2+3+1;
else if (*p<10000) else if (ch < 10000)
repsize += 2+4+1; repsize += 2+4+1;
#ifndef Py_UNICODE_WIDE #ifndef Py_UNICODE_WIDE
else else
repsize += 2+5+1; repsize += 2+5+1;
#else #else
else if (*p<100000) else if (ch < 100000)
repsize += 2+5+1; repsize += 2+5+1;
else if (*p<1000000) else if (ch < 1000000)
repsize += 2+6+1; repsize += 2+6+1;
else else
repsize += 2+7+1; repsize += 2+7+1;
#endif #endif
} }
requiredsize = respos+repsize+(endp-collend); requiredsize = respos+repsize+(size-collend);
if (requiredsize > ressize) { if (requiredsize > ressize) {
if (requiredsize<2*ressize) if (requiredsize<2*ressize)
requiredsize = 2*ressize; requiredsize = 2*ressize;
@ -6667,17 +6670,18 @@ unicode_encode_ucs1(const Py_UNICODE *p,
str = PyBytes_AS_STRING(res) + respos; str = PyBytes_AS_STRING(res) + respos;
ressize = requiredsize; ressize = requiredsize;
} }
/* generate replacement (temporarily (mis)uses p) */ /* generate replacement */
for (p = collstart; p < collend; ++p) { for (i = collstart; i < collend; ++i) {
str += sprintf(str, "&#%d;", (int)*p); str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
} }
p = collend; pos = collend;
break; break;
default: default:
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
encoding, reason, startp, size, &exc, encoding, reason, unicode, &exc,
collstart-startp, collend-startp, &newpos); collstart, collend, &newpos);
if (repunicode == NULL) if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
PyUnicode_READY(repunicode) < 0))
goto onError; goto onError;
if (PyBytes_Check(repunicode)) { if (PyBytes_Check(repunicode)) {
/* Directly copy bytes result to output. */ /* Directly copy bytes result to output. */
@ -6694,7 +6698,7 @@ unicode_encode_ucs1(const Py_UNICODE *p,
} }
memcpy(str, PyBytes_AsString(repunicode), repsize); memcpy(str, PyBytes_AsString(repunicode), repsize);
str += repsize; str += repsize;
p = startp + newpos; pos = newpos;
Py_DECREF(repunicode); Py_DECREF(repunicode);
break; break;
} }
@ -6702,8 +6706,8 @@ unicode_encode_ucs1(const Py_UNICODE *p,
have+the replacement+the rest of the string, so have+the replacement+the rest of the string, so
we won't have to check space for encodable characters) */ we won't have to check space for encodable characters) */
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
repsize = PyUnicode_GET_SIZE(repunicode); repsize = PyUnicode_GET_LENGTH(repunicode);
requiredsize = respos+repsize+(endp-collend); requiredsize = respos+repsize+(size-collend);
if (requiredsize > ressize) { if (requiredsize > ressize) {
if (requiredsize<2*ressize) if (requiredsize<2*ressize)
requiredsize = 2*ressize; requiredsize = 2*ressize;
@ -6716,17 +6720,17 @@ unicode_encode_ucs1(const Py_UNICODE *p,
} }
/* check if there is anything unencodable in the replacement /* check if there is anything unencodable in the replacement
and copy it to the output */ and copy it to the output */
for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) { for (i = 0; repsize-->0; ++i, ++str) {
c = *uni2; c = PyUnicode_READ_CHAR(repunicode, i);
if (c >= limit) { if (c >= limit) {
raise_encode_exception(&exc, encoding, startp, size, raise_encode_exception_obj(&exc, encoding, unicode,
unicodepos, unicodepos+1, reason); pos, pos+1, reason);
Py_DECREF(repunicode); Py_DECREF(repunicode);
goto onError; goto onError;
} }
*str = (char)c; *str = (char)c;
} }
p = startp + newpos; pos = newpos;
Py_DECREF(repunicode); Py_DECREF(repunicode);
} }
} }
@ -6750,12 +6754,19 @@ unicode_encode_ucs1(const Py_UNICODE *p,
return NULL; return NULL;
} }
/* Deprecated */
PyObject * PyObject *
PyUnicode_EncodeLatin1(const Py_UNICODE *p, PyUnicode_EncodeLatin1(const Py_UNICODE *p,
Py_ssize_t size, Py_ssize_t size,
const char *errors) const char *errors)
{ {
return unicode_encode_ucs1(p, size, errors, 256); PyObject *result;
PyObject *unicode = PyUnicode_FromUnicode(p, size);
if (unicode == NULL)
return NULL;
result = unicode_encode_ucs1(unicode, errors, 256);
Py_DECREF(unicode);
return result;
} }
PyObject * PyObject *
@ -6774,9 +6785,7 @@ _PyUnicode_AsLatin1String(PyObject *unicode, const char *errors)
PyUnicode_GET_LENGTH(unicode)); PyUnicode_GET_LENGTH(unicode));
/* Non-Latin-1 characters present. Defer to above function to /* Non-Latin-1 characters present. Defer to above function to
raise the exception. */ raise the exception. */
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode), return unicode_encode_ucs1(unicode, errors, 256);
PyUnicode_GET_SIZE(unicode),
errors);
} }
PyObject* PyObject*
@ -6888,12 +6897,19 @@ PyUnicode_DecodeASCII(const char *s,
return NULL; return NULL;
} }
/* Deprecated */
PyObject * PyObject *
PyUnicode_EncodeASCII(const Py_UNICODE *p, PyUnicode_EncodeASCII(const Py_UNICODE *p,
Py_ssize_t size, Py_ssize_t size,
const char *errors) const char *errors)
{ {
return unicode_encode_ucs1(p, size, errors, 128); PyObject *result;
PyObject *unicode = PyUnicode_FromUnicode(p, size);
if (unicode == NULL)
return NULL;
result = unicode_encode_ucs1(unicode, errors, 128);
Py_DECREF(unicode);
return result;
} }
PyObject * PyObject *
@ -6910,9 +6926,7 @@ _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors)
if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode), return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
PyUnicode_GET_LENGTH(unicode)); PyUnicode_GET_LENGTH(unicode));
return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode), return unicode_encode_ucs1(unicode, errors, 128);
PyUnicode_GET_SIZE(unicode),
errors);
} }
PyObject * PyObject *
@ -8182,13 +8196,13 @@ charmapencode_output(Py_UNICODE c, PyObject *mapping,
Return 0 on success, -1 on error */ Return 0 on success, -1 on error */
static int static int
charmap_encoding_error( charmap_encoding_error(
const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping, PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
PyObject **exceptionObject, PyObject **exceptionObject,
int *known_errorHandler, PyObject **errorHandler, const char *errors, int *known_errorHandler, PyObject **errorHandler, const char *errors,
PyObject **res, Py_ssize_t *respos) PyObject **res, Py_ssize_t *respos)
{ {
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
Py_ssize_t repsize; Py_ssize_t size, repsize;
Py_ssize_t newpos; Py_ssize_t newpos;
Py_UNICODE *uni2; Py_UNICODE *uni2;
/* startpos for collecting unencodable chars */ /* startpos for collecting unencodable chars */
@ -8198,19 +8212,25 @@ charmap_encoding_error(
char *encoding = "charmap"; char *encoding = "charmap";
char *reason = "character maps to <undefined>"; char *reason = "character maps to <undefined>";
charmapencode_result x; charmapencode_result x;
Py_UCS4 ch;
if (PyUnicode_READY(unicode) < 0)
return -1;
size = PyUnicode_GET_LENGTH(unicode);
/* find all unencodable characters */ /* find all unencodable characters */
while (collendpos < size) { while (collendpos < size) {
PyObject *rep; PyObject *rep;
if (Py_TYPE(mapping) == &EncodingMapType) { if (Py_TYPE(mapping) == &EncodingMapType) {
int res = encoding_map_lookup(p[collendpos], mapping); ch = PyUnicode_READ_CHAR(unicode, collendpos);
int res = encoding_map_lookup(ch, mapping);
if (res != -1) if (res != -1)
break; break;
++collendpos; ++collendpos;
continue; continue;
} }
rep = charmapencode_lookup(p[collendpos], mapping); ch = PyUnicode_READ_CHAR(unicode, collendpos);
rep = charmapencode_lookup(ch, mapping);
if (rep==NULL) if (rep==NULL)
return -1; return -1;
else if (rep!=Py_None) { else if (rep!=Py_None) {
@ -8236,7 +8256,7 @@ charmap_encoding_error(
} }
switch (*known_errorHandler) { switch (*known_errorHandler) {
case 1: /* strict */ case 1: /* strict */
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason); raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1; return -1;
case 2: /* replace */ case 2: /* replace */
for (collpos = collstartpos; collpos<collendpos; ++collpos) { for (collpos = collstartpos; collpos<collendpos; ++collpos) {
@ -8245,7 +8265,7 @@ charmap_encoding_error(
return -1; return -1;
} }
else if (x==enc_FAILED) { else if (x==enc_FAILED) {
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason); raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1; return -1;
} }
} }
@ -8258,13 +8278,13 @@ charmap_encoding_error(
for (collpos = collstartpos; collpos < collendpos; ++collpos) { for (collpos = collstartpos; collpos < collendpos; ++collpos) {
char buffer[2+29+1+1]; char buffer[2+29+1+1];
char *cp; char *cp;
sprintf(buffer, "&#%d;", (int)p[collpos]); sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos));
for (cp = buffer; *cp; ++cp) { for (cp = buffer; *cp; ++cp) {
x = charmapencode_output(*cp, mapping, res, respos); x = charmapencode_output(*cp, mapping, res, respos);
if (x==enc_EXCEPTION) if (x==enc_EXCEPTION)
return -1; return -1;
else if (x==enc_FAILED) { else if (x==enc_FAILED) {
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason); raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1; return -1;
} }
} }
@ -8273,7 +8293,7 @@ charmap_encoding_error(
break; break;
default: default:
repunicode = unicode_encode_call_errorhandler(errors, errorHandler, repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
encoding, reason, p, size, exceptionObject, encoding, reason, unicode, exceptionObject,
collstartpos, collendpos, &newpos); collstartpos, collendpos, &newpos);
if (repunicode == NULL) if (repunicode == NULL)
return -1; return -1;
@ -8305,7 +8325,7 @@ charmap_encoding_error(
} }
else if (x==enc_FAILED) { else if (x==enc_FAILED) {
Py_DECREF(repunicode); Py_DECREF(repunicode);
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason); raise_encode_exception_obj(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
return -1; return -1;
} }
} }
@ -8316,15 +8336,15 @@ charmap_encoding_error(
} }
PyObject * PyObject *
PyUnicode_EncodeCharmap(const Py_UNICODE *p, _PyUnicode_EncodeCharmap(PyObject *unicode,
Py_ssize_t size, PyObject *mapping,
PyObject *mapping, const char *errors)
const char *errors)
{ {
/* output object */ /* output object */
PyObject *res = NULL; PyObject *res = NULL;
/* current input position */ /* current input position */
Py_ssize_t inpos = 0; Py_ssize_t inpos = 0;
Py_ssize_t size;
/* current output position */ /* current output position */
Py_ssize_t respos = 0; Py_ssize_t respos = 0;
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
@ -8334,9 +8354,13 @@ PyUnicode_EncodeCharmap(const Py_UNICODE *p,
* 3=ignore, 4=xmlcharrefreplace */ * 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1; int known_errorHandler = -1;
if (PyUnicode_READY(unicode) < 0)
return NULL;
size = PyUnicode_GET_LENGTH(unicode);
/* Default to Latin-1 */ /* Default to Latin-1 */
if (mapping == NULL) if (mapping == NULL)
return PyUnicode_EncodeLatin1(p, size, errors); return unicode_encode_ucs1(unicode, errors, 256);
/* allocate enough for a simple encoding without /* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */ replacements, if we need more, we'll resize */
@ -8347,12 +8371,13 @@ PyUnicode_EncodeCharmap(const Py_UNICODE *p,
return res; return res;
while (inpos<size) { while (inpos<size) {
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos);
/* try to encode it */ /* try to encode it */
charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos); charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
if (x==enc_EXCEPTION) /* error */ if (x==enc_EXCEPTION) /* error */
goto onError; goto onError;
if (x==enc_FAILED) { /* unencodable character */ if (x==enc_FAILED) { /* unencodable character */
if (charmap_encoding_error(p, size, &inpos, mapping, if (charmap_encoding_error(unicode, &inpos, mapping,
&exc, &exc,
&known_errorHandler, &errorHandler, errors, &known_errorHandler, &errorHandler, errors,
&res, &respos)) { &res, &respos)) {
@ -8380,6 +8405,22 @@ PyUnicode_EncodeCharmap(const Py_UNICODE *p,
return NULL; return NULL;
} }
/* Deprecated */
PyObject *
PyUnicode_EncodeCharmap(const Py_UNICODE *p,
Py_ssize_t size,
PyObject *mapping,
const char *errors)
{
PyObject *result;
PyObject *unicode = PyUnicode_FromUnicode(p, size);
if (unicode == NULL)
return NULL;
result = _PyUnicode_EncodeCharmap(unicode, mapping, errors);
Py_DECREF(unicode);
return NULL;
}
PyObject * PyObject *
PyUnicode_AsCharmapString(PyObject *unicode, PyUnicode_AsCharmapString(PyObject *unicode,
PyObject *mapping) PyObject *mapping)
@ -8388,10 +8429,7 @@ PyUnicode_AsCharmapString(PyObject *unicode,
PyErr_BadArgument(); PyErr_BadArgument();
return NULL; return NULL;
} }
return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode), return _PyUnicode_EncodeCharmap(unicode, mapping, NULL);
PyUnicode_GET_SIZE(unicode),
mapping,
NULL);
} }
/* create or adjust a UnicodeTranslateError */ /* create or adjust a UnicodeTranslateError */
@ -8893,6 +8931,7 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
Py_UNICODE *p, *end; Py_UNICODE *p, *end;
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
PyObject *unicode;
const char *encoding = "decimal"; const char *encoding = "decimal";
const char *reason = "invalid decimal Unicode string"; const char *reason = "invalid decimal Unicode string";
/* the following variable is used for caching string comparisons /* the following variable is used for caching string comparisons
@ -8973,9 +9012,13 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
p = collend; p = collend;
break; break;
default: default:
unicode = PyUnicode_FromUnicode(s, length);
if (unicode == NULL)
goto onError;
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
encoding, reason, s, length, &exc, encoding, reason, unicode, &exc,
collstart-s, collend-s, &newpos); collstart-s, collend-s, &newpos);
Py_DECREF(unicode);
if (repunicode == NULL) if (repunicode == NULL)
goto onError; goto onError;
if (!PyUnicode_Check(repunicode)) { if (!PyUnicode_Check(repunicode)) {