Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
of the command line.
This commit is contained in:
parent
e23c8683a5
commit
8ed91b2768
|
@ -12,6 +12,9 @@ What's New in Python 3.1 release candidate 1?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
|
||||
of the command line.
|
||||
|
||||
- Issue #6012: Add cleanup support to O& argument parsing.
|
||||
|
||||
- Issue #6089: Fixed str.format with certain invalid field specifiers
|
||||
|
|
|
@ -38,8 +38,16 @@ char2wchar(char* arg)
|
|||
if (!res)
|
||||
goto oom;
|
||||
count = mbstowcs(res, arg, argsize+1);
|
||||
if (count != (size_t)-1)
|
||||
if (count != (size_t)-1) {
|
||||
wchar_t *tmp;
|
||||
/* Only use the result if it contains no
|
||||
surrogate characters. */
|
||||
for (tmp = res; *tmp != 0 &&
|
||||
(*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
|
||||
;
|
||||
if (*tmp == 0)
|
||||
return res;
|
||||
}
|
||||
PyMem_Free(res);
|
||||
}
|
||||
/* Conversion failed. Fall back to escaping with surrogateescape. */
|
||||
|
@ -75,6 +83,14 @@ char2wchar(char* arg)
|
|||
memset(&mbs, 0, sizeof mbs);
|
||||
continue;
|
||||
}
|
||||
if (*out >= 0xd800 && *out <= 0xdfff) {
|
||||
/* Surrogate character. Escape the original
|
||||
byte sequence with surrogateescape. */
|
||||
argsize -= converted;
|
||||
while (converted--)
|
||||
*out++ = 0xdc00 + *in++;
|
||||
continue;
|
||||
}
|
||||
/* successfully converted some bytes */
|
||||
in += converted;
|
||||
argsize -= converted;
|
||||
|
|
Loading…
Reference in New Issue