Merged revisions 72260 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r72260 | walter.doerwald | 2009-05-04 00:36:33 +0200 (Mo, 04 Mai 2009) | 5 lines Issue #5108: Handle %s like %S and %R in PyUnicode_FromFormatV(): Call PyUnicode_DecodeUTF8() once, remember the result and output it in a second step. This avoids problems with counting UTF-8 bytes that ignores the effect of using the replace error handler in PyUnicode_DecodeUTF8(). ........
This commit is contained in:
parent
129ab1d809
commit
c1651a0b96
|
@ -499,6 +499,11 @@ Core and Builtins
|
|||
|
||||
- The re.sub(), re.subn() and re.split() functions now accept a flags parameter.
|
||||
|
||||
- Issue #5108: Handle %s like %S, %R and %A in PyUnicode_FromFormatV(): Call
|
||||
PyUnicode_DecodeUTF8() once, remember the result and output it in a second
|
||||
step. This avoids problems with counting UTF-8 bytes that ignores the effect
|
||||
of using the replace error handler in PyUnicode_DecodeUTF8().
|
||||
|
||||
Library
|
||||
-------
|
||||
|
||||
|
|
|
@ -723,16 +723,26 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
|||
count = vargs;
|
||||
#endif
|
||||
#endif
|
||||
/* step 1: count the number of %S/%R/%A format specifications
|
||||
* (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII() for
|
||||
* these objects once during step 3 and put the result in
|
||||
an array) */
|
||||
/* step 1: count the number of %S/%R/%A/%s format specifications
|
||||
* (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
|
||||
* PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
|
||||
* result in an array) */
|
||||
for (f = format; *f; f++) {
|
||||
if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A'))
|
||||
++callcount;
|
||||
if (*f == '%') {
|
||||
if (*(f+1)=='%')
|
||||
continue;
|
||||
if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A')
|
||||
++callcount;
|
||||
while (ISDIGIT((unsigned)*f))
|
||||
width = (width*10) + *f++ - '0';
|
||||
while (*++f && *f != '%' && !ISALPHA((unsigned)*f))
|
||||
;
|
||||
if (*f == 's')
|
||||
++callcount;
|
||||
}
|
||||
}
|
||||
/* step 2: allocate memory for the results of
|
||||
* PyObject_Str()/PyObject_Repr() calls */
|
||||
* PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
|
||||
if (callcount) {
|
||||
callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
|
||||
if (!callresults) {
|
||||
|
@ -781,35 +791,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
|||
case 's':
|
||||
{
|
||||
/* UTF-8 */
|
||||
unsigned char*s;
|
||||
s = va_arg(count, unsigned char*);
|
||||
while (*s) {
|
||||
if (*s < 128) {
|
||||
n++; s++;
|
||||
} else if (*s < 0xc0) {
|
||||
/* invalid UTF-8 */
|
||||
n++; s++;
|
||||
} else if (*s < 0xc0) {
|
||||
n++;
|
||||
s++; if(!*s)break;
|
||||
s++;
|
||||
} else if (*s < 0xe0) {
|
||||
n++;
|
||||
s++; if(!*s)break;
|
||||
s++; if(!*s)break;
|
||||
s++;
|
||||
} else {
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
n++;
|
||||
#else
|
||||
n+=2;
|
||||
#endif
|
||||
s++; if(!*s)break;
|
||||
s++; if(!*s)break;
|
||||
s++; if(!*s)break;
|
||||
s++;
|
||||
}
|
||||
}
|
||||
unsigned char *s = va_arg(count, unsigned char*);
|
||||
PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace");
|
||||
if (!str)
|
||||
goto fail;
|
||||
n += PyUnicode_GET_SIZE(str);
|
||||
/* Remember the str and switch to the next slot */
|
||||
*callresult++ = str;
|
||||
break;
|
||||
}
|
||||
case 'U':
|
||||
|
@ -978,19 +966,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
|||
break;
|
||||
case 's':
|
||||
{
|
||||
/* Parameter must be UTF-8 encoded.
|
||||
In case of encoding errors, use
|
||||
the replacement character. */
|
||||
PyObject *u;
|
||||
p = va_arg(vargs, char*);
|
||||
u = PyUnicode_DecodeUTF8(p, strlen(p),
|
||||
"replace");
|
||||
if (!u)
|
||||
goto fail;
|
||||
Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u),
|
||||
PyUnicode_GET_SIZE(u));
|
||||
s += PyUnicode_GET_SIZE(u);
|
||||
Py_DECREF(u);
|
||||
/* unused, since we already have the result */
|
||||
(void) va_arg(vargs, char *);
|
||||
Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
|
||||
PyUnicode_GET_SIZE(*callresult));
|
||||
s += PyUnicode_GET_SIZE(*callresult);
|
||||
/* We're done with the unicode()/repr() => forget it */
|
||||
Py_DECREF(*callresult);
|
||||
/* switch to next unicode()/repr() result */
|
||||
++callresult;
|
||||
break;
|
||||
}
|
||||
case 'U':
|
||||
|
|
Loading…
Reference in New Issue