Merged revisions 72260 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r72260 | walter.doerwald | 2009-05-04 00:36:33 +0200 (Mo, 04 Mai 2009) | 5 lines Issue #5108: Handle %s like %S and %R in PyUnicode_FromFormatV(): Call PyUnicode_DecodeUTF8() once, remember the result and output it in a second step. This avoids problems with counting UTF-8 bytes that ignores the effect of using the replace error handler in PyUnicode_DecodeUTF8(). ........
This commit is contained in:
parent
129ab1d809
commit
c1651a0b96
|
@ -499,6 +499,11 @@ Core and Builtins
|
||||||
|
|
||||||
- The re.sub(), re.subn() and re.split() functions now accept a flags parameter.
|
- The re.sub(), re.subn() and re.split() functions now accept a flags parameter.
|
||||||
|
|
||||||
|
- Issue #5108: Handle %s like %S, %R and %A in PyUnicode_FromFormatV(): Call
|
||||||
|
PyUnicode_DecodeUTF8() once, remember the result and output it in a second
|
||||||
|
step. This avoids problems with counting UTF-8 bytes that ignores the effect
|
||||||
|
of using the replace error handler in PyUnicode_DecodeUTF8().
|
||||||
|
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
|
|
@ -723,16 +723,26 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
count = vargs;
|
count = vargs;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
/* step 1: count the number of %S/%R/%A format specifications
|
/* step 1: count the number of %S/%R/%A/%s format specifications
|
||||||
* (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII() for
|
* (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
|
||||||
* these objects once during step 3 and put the result in
|
* PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
|
||||||
an array) */
|
* result in an array) */
|
||||||
for (f = format; *f; f++) {
|
for (f = format; *f; f++) {
|
||||||
if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A'))
|
if (*f == '%') {
|
||||||
++callcount;
|
if (*(f+1)=='%')
|
||||||
|
continue;
|
||||||
|
if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A')
|
||||||
|
++callcount;
|
||||||
|
while (ISDIGIT((unsigned)*f))
|
||||||
|
width = (width*10) + *f++ - '0';
|
||||||
|
while (*++f && *f != '%' && !ISALPHA((unsigned)*f))
|
||||||
|
;
|
||||||
|
if (*f == 's')
|
||||||
|
++callcount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* step 2: allocate memory for the results of
|
/* step 2: allocate memory for the results of
|
||||||
* PyObject_Str()/PyObject_Repr() calls */
|
* PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
|
||||||
if (callcount) {
|
if (callcount) {
|
||||||
callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
|
callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
|
||||||
if (!callresults) {
|
if (!callresults) {
|
||||||
|
@ -781,35 +791,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
case 's':
|
case 's':
|
||||||
{
|
{
|
||||||
/* UTF-8 */
|
/* UTF-8 */
|
||||||
unsigned char*s;
|
unsigned char *s = va_arg(count, unsigned char*);
|
||||||
s = va_arg(count, unsigned char*);
|
PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace");
|
||||||
while (*s) {
|
if (!str)
|
||||||
if (*s < 128) {
|
goto fail;
|
||||||
n++; s++;
|
n += PyUnicode_GET_SIZE(str);
|
||||||
} else if (*s < 0xc0) {
|
/* Remember the str and switch to the next slot */
|
||||||
/* invalid UTF-8 */
|
*callresult++ = str;
|
||||||
n++; s++;
|
|
||||||
} else if (*s < 0xc0) {
|
|
||||||
n++;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++;
|
|
||||||
} else if (*s < 0xe0) {
|
|
||||||
n++;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++;
|
|
||||||
} else {
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
n++;
|
|
||||||
#else
|
|
||||||
n+=2;
|
|
||||||
#endif
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++; if(!*s)break;
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'U':
|
case 'U':
|
||||||
|
@ -978,19 +966,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
break;
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
{
|
{
|
||||||
/* Parameter must be UTF-8 encoded.
|
/* unused, since we already have the result */
|
||||||
In case of encoding errors, use
|
(void) va_arg(vargs, char *);
|
||||||
the replacement character. */
|
Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
|
||||||
PyObject *u;
|
PyUnicode_GET_SIZE(*callresult));
|
||||||
p = va_arg(vargs, char*);
|
s += PyUnicode_GET_SIZE(*callresult);
|
||||||
u = PyUnicode_DecodeUTF8(p, strlen(p),
|
/* We're done with the unicode()/repr() => forget it */
|
||||||
"replace");
|
Py_DECREF(*callresult);
|
||||||
if (!u)
|
/* switch to next unicode()/repr() result */
|
||||||
goto fail;
|
++callresult;
|
||||||
Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u),
|
|
||||||
PyUnicode_GET_SIZE(u));
|
|
||||||
s += PyUnicode_GET_SIZE(u);
|
|
||||||
Py_DECREF(u);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'U':
|
case 'U':
|
||||||
|
|
Loading…
Reference in New Issue