mirror of https://github.com/python/cpython
Add a workaround for a problem that UTF-8 strings can be corrupted
or broken by basic ctype functions in 4.4BSD descendants. This will be fixed in their future development branches but they'll keep the POSIX-incompatibility for their backward-compatiblities in near future.
This commit is contained in:
parent
6db15d7307
commit
b5047fd019
|
@ -411,6 +411,39 @@ extern int fdatasync(int);
|
|||
extern double hypot(double, double);
|
||||
#endif
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
On 4.4BSD-descendants, ctype functions serves the whole range of
|
||||
wchar_t character set rather than single byte code points only.
|
||||
This characteristic can break some operations of string object
|
||||
including str.upper() and str.split() on UTF-8 locales. This
|
||||
workaround was provided by Tim Robbins of FreeBSD project. He said
|
||||
the incompatibility will be fixed in FreeBSD 6.
|
||||
********************************************************************/
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#include <osreldate.h>
|
||||
#if __FreeBSD_version > 500039
|
||||
#include <ctype.h>
|
||||
#include <wctype.h>
|
||||
#undef isalnum
|
||||
#define isalnum(c) iswalnum(btowc(c))
|
||||
#undef isalpha
|
||||
#define isalpha(c) iswalpha(btowc(c))
|
||||
#undef islower
|
||||
#define islower(c) iswlower(btowc(c))
|
||||
#undef isspace
|
||||
#define isspace(c) iswspace(btowc(c))
|
||||
#undef isupper
|
||||
#define isupper(c) iswupper(btowc(c))
|
||||
#undef tolower
|
||||
#define tolower(c) towlower(btowc(c))
|
||||
#undef toupper
|
||||
#define toupper(c) towupper(btowc(c))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* Declarations for symbol visibility.
|
||||
|
||||
PyAPI_FUNC(type): Declares a public Python API function and return type
|
||||
|
|
|
@ -47,3 +47,38 @@ try:
|
|||
locale.getpreferredencoding()
|
||||
finally:
|
||||
locale.setlocale(locale.LC_NUMERIC, oldlocale)
|
||||
|
||||
|
||||
# Test BSD Rune locale's bug for isctype functions.
|
||||
def teststrop(s, method, output):
|
||||
if verbose:
|
||||
print "%s.%s() =? %s ..." % (repr(s), method, repr(output)),
|
||||
result = getattr(s, method)()
|
||||
if result != output:
|
||||
if verbose:
|
||||
print "no"
|
||||
print "%s.%s() == %s != %s" % (repr(s), method, repr(result),
|
||||
repr(output))
|
||||
elif verbose:
|
||||
print "yes"
|
||||
|
||||
try:
|
||||
oldlocale = locale.setlocale(locale.LC_CTYPE)
|
||||
locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8')
|
||||
except locale.Error:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
teststrop('\x20', 'isspace', True)
|
||||
teststrop('\xa0', 'isspace', False)
|
||||
teststrop('\xa1', 'isspace', False)
|
||||
teststrop('\xc0', 'isalpha', False)
|
||||
teststrop('\xc0', 'isalnum', False)
|
||||
teststrop('\xc0', 'isupper', False)
|
||||
teststrop('\xc0', 'islower', False)
|
||||
teststrop('\xec\xa0\xbc', 'split', ['\xec\xa0\xbc'])
|
||||
teststrop('\xed\x95\xa0', 'strip', '\xed\x95\xa0')
|
||||
teststrop('\xcc\x85', 'lower', '\xcc\x85')
|
||||
teststrop('\xed\x95\xa0', 'upper', '\xed\x95\xa0')
|
||||
finally:
|
||||
locale.setlocale(locale.LC_CTYPE, oldlocale)
|
||||
|
|
Loading…
Reference in New Issue