mirror of https://github.com/python/cpython
Issue #5108: Handle %s like %S and %R in PyUnicode_FromFormatV(): Call
PyUnicode_DecodeUTF8() once, remember the result and output it in a second step. This avoids problems with counting UTF-8 bytes that ignores the effect of using the replace error handler in PyUnicode_DecodeUTF8().
This commit is contained in:
parent
01fce5adc0
commit
ed960ac404
|
@ -258,6 +258,11 @@ Core and Builtins
|
||||||
- Issue #5705: os.setuid() would not accept values > 2**31-1 but pwd.getpwnam()
|
- Issue #5705: os.setuid() would not accept values > 2**31-1 but pwd.getpwnam()
|
||||||
returned them on 64bit platforms.
|
returned them on 64bit platforms.
|
||||||
|
|
||||||
|
- Issue #5108: Handle %s like %S and %R in PyUnicode_FromFormatV(): Call
|
||||||
|
PyUnicode_DecodeUTF8() once, remember the result and output it in a second
|
||||||
|
step. This avoids problems with counting UTF-8 bytes that ignores the effect
|
||||||
|
of using the replace error handler in PyUnicode_DecodeUTF8().
|
||||||
|
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
|
|
@ -674,15 +674,25 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
count = vargs;
|
count = vargs;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
/* step 1: count the number of %S/%R format specifications
|
/* step 1: count the number of %S/%R/%s format specifications
|
||||||
* (we call PyObject_Str()/PyObject_Repr() for these objects
|
* (we call PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() for these
|
||||||
* once during step 3 and put the result in an array) */
|
* objects once during step 3 and put the result in an array) */
|
||||||
for (f = format; *f; f++) {
|
for (f = format; *f; f++) {
|
||||||
if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R'))
|
if (*f == '%') {
|
||||||
++callcount;
|
if (*(f+1)=='%')
|
||||||
|
continue;
|
||||||
|
if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A')
|
||||||
|
++callcount;
|
||||||
|
while (isdigit((unsigned)*f))
|
||||||
|
width = (width*10) + *f++ - '0';
|
||||||
|
while (*++f && *f != '%' && !isalpha((unsigned)*f))
|
||||||
|
;
|
||||||
|
if (*f == 's')
|
||||||
|
++callcount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* step 2: allocate memory for the results of
|
/* step 2: allocate memory for the results of
|
||||||
* PyObject_Str()/PyObject_Repr() calls */
|
* PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
|
||||||
if (callcount) {
|
if (callcount) {
|
||||||
callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
|
callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
|
||||||
if (!callresults) {
|
if (!callresults) {
|
||||||
|
@ -731,35 +741,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
case 's':
|
case 's':
|
||||||
{
|
{
|
||||||
/* UTF-8 */
|
/* UTF-8 */
|
||||||
unsigned char*s;
|
unsigned char *s = va_arg(count, unsigned char*);
|
||||||
s = va_arg(count, unsigned char*);
|
PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace");
|
||||||
while (*s) {
|
if (!str)
|
||||||
if (*s < 128) {
|
goto fail;
|
||||||
n++; s++;
|
n += PyUnicode_GET_SIZE(str);
|
||||||
} else if (*s < 0xc0) {
|
/* Remember the str and switch to the next slot */
|
||||||
/* invalid UTF-8 */
|
*callresult++ = str;
|
||||||
n++; s++;
|
|
||||||
} else if (*s < 0xc0) {
|
|
||||||
n++;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++;
|
|
||||||
} else if (*s < 0xe0) {
|
|
||||||
n++;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++;
|
|
||||||
} else {
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
n++;
|
|
||||||
#else
|
|
||||||
n+=2;
|
|
||||||
#endif
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'U':
|
case 'U':
|
||||||
|
@ -915,19 +903,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
break;
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
{
|
{
|
||||||
/* Parameter must be UTF-8 encoded.
|
/* unused, since we already have the result */
|
||||||
In case of encoding errors, use
|
(void) va_arg(vargs, char *);
|
||||||
the replacement character. */
|
Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
|
||||||
PyObject *u;
|
PyUnicode_GET_SIZE(*callresult));
|
||||||
p = va_arg(vargs, char*);
|
s += PyUnicode_GET_SIZE(*callresult);
|
||||||
u = PyUnicode_DecodeUTF8(p, strlen(p),
|
/* We're done with the unicode()/repr() => forget it */
|
||||||
"replace");
|
Py_DECREF(*callresult);
|
||||||
if (!u)
|
/* switch to next unicode()/repr() result */
|
||||||
goto fail;
|
++callresult;
|
||||||
Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u),
|
|
||||||
PyUnicode_GET_SIZE(u));
|
|
||||||
s += PyUnicode_GET_SIZE(u);
|
|
||||||
Py_DECREF(u);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'U':
|
case 'U':
|
||||||
|
|
Loading…
Reference in New Issue