Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
of the command line.
This commit is contained in:
parent
e23c8683a5
commit
8ed91b2768
|
@ -12,6 +12,9 @@ What's New in Python 3.1 release candidate 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
|
||||||
|
of the command line.
|
||||||
|
|
||||||
- Issue #6012: Add cleanup support to O& argument parsing.
|
- Issue #6012: Add cleanup support to O& argument parsing.
|
||||||
|
|
||||||
- Issue #6089: Fixed str.format with certain invalid field specifiers
|
- Issue #6089: Fixed str.format with certain invalid field specifiers
|
||||||
|
|
|
@ -38,8 +38,16 @@ char2wchar(char* arg)
|
||||||
if (!res)
|
if (!res)
|
||||||
goto oom;
|
goto oom;
|
||||||
count = mbstowcs(res, arg, argsize+1);
|
count = mbstowcs(res, arg, argsize+1);
|
||||||
if (count != (size_t)-1)
|
if (count != (size_t)-1) {
|
||||||
return res;
|
wchar_t *tmp;
|
||||||
|
/* Only use the result if it contains no
|
||||||
|
surrogate characters. */
|
||||||
|
for (tmp = res; *tmp != 0 &&
|
||||||
|
(*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
|
||||||
|
;
|
||||||
|
if (*tmp == 0)
|
||||||
|
return res;
|
||||||
|
}
|
||||||
PyMem_Free(res);
|
PyMem_Free(res);
|
||||||
}
|
}
|
||||||
/* Conversion failed. Fall back to escaping with surrogateescape. */
|
/* Conversion failed. Fall back to escaping with surrogateescape. */
|
||||||
|
@ -75,6 +83,14 @@ char2wchar(char* arg)
|
||||||
memset(&mbs, 0, sizeof mbs);
|
memset(&mbs, 0, sizeof mbs);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (*out >= 0xd800 && *out <= 0xdfff) {
|
||||||
|
/* Surrogate character. Escape the original
|
||||||
|
byte sequence with surrogateescape. */
|
||||||
|
argsize -= converted;
|
||||||
|
while (converted--)
|
||||||
|
*out++ = 0xdc00 + *in++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
/* successfully converted some bytes */
|
/* successfully converted some bytes */
|
||||||
in += converted;
|
in += converted;
|
||||||
argsize -= converted;
|
argsize -= converted;
|
||||||
|
|
Loading…
Reference in New Issue