bpo-34527: POSIX locale enables the UTF-8 Mode (GH-8972) (GH-8974)
* The UTF-8 Mode is now also enabled by the "POSIX" locale, not only
by the "C" locale.
* On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces
the ASCII encoding if the LC_CTYPE locale is "POSIX", not only if
the LC_CTYPE locale is "C".
* test_utf8_mode.test_cmd_line() checks also that the command line
arguments are decoded from UTF-8 when the the UTF-8 Mode is enabled
with POSIX locale or C locale.
(cherry picked from commit 5cb258950c
)
This commit is contained in:
parent
e3f20828f6
commit
65ef7425a3
|
@ -12,6 +12,7 @@ from test.support.script_helper import assert_python_ok, assert_python_failure
|
||||||
|
|
||||||
|
|
||||||
MS_WINDOWS = (sys.platform == 'win32')
|
MS_WINDOWS = (sys.platform == 'win32')
|
||||||
|
POSIX_LOCALES = ('C', 'POSIX')
|
||||||
|
|
||||||
|
|
||||||
class UTF8ModeTests(unittest.TestCase):
|
class UTF8ModeTests(unittest.TestCase):
|
||||||
|
@ -23,7 +24,7 @@ class UTF8ModeTests(unittest.TestCase):
|
||||||
|
|
||||||
def posix_locale(self):
|
def posix_locale(self):
|
||||||
loc = locale.setlocale(locale.LC_CTYPE, None)
|
loc = locale.setlocale(locale.LC_CTYPE, None)
|
||||||
return (loc == 'C')
|
return (loc in POSIX_LOCALES)
|
||||||
|
|
||||||
def get_output(self, *args, failure=False, **kw):
|
def get_output(self, *args, failure=False, **kw):
|
||||||
kw = dict(self.DEFAULT_ENV, **kw)
|
kw = dict(self.DEFAULT_ENV, **kw)
|
||||||
|
@ -39,7 +40,9 @@ class UTF8ModeTests(unittest.TestCase):
|
||||||
def test_posix_locale(self):
|
def test_posix_locale(self):
|
||||||
code = 'import sys; print(sys.flags.utf8_mode)'
|
code = 'import sys; print(sys.flags.utf8_mode)'
|
||||||
|
|
||||||
out = self.get_output('-c', code, LC_ALL='C')
|
for loc in POSIX_LOCALES:
|
||||||
|
with self.subTest(LC_ALL=loc):
|
||||||
|
out = self.get_output('-c', code, LC_ALL=loc)
|
||||||
self.assertEqual(out, '1')
|
self.assertEqual(out, '1')
|
||||||
|
|
||||||
def test_xoption(self):
|
def test_xoption(self):
|
||||||
|
@ -201,7 +204,9 @@ class UTF8ModeTests(unittest.TestCase):
|
||||||
out = self.get_output('-X', 'utf8', '-c', code)
|
out = self.get_output('-X', 'utf8', '-c', code)
|
||||||
self.assertEqual(out, 'UTF-8 UTF-8')
|
self.assertEqual(out, 'UTF-8 UTF-8')
|
||||||
|
|
||||||
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
|
for loc in POSIX_LOCALES:
|
||||||
|
with self.subTest(LC_ALL=loc):
|
||||||
|
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
|
||||||
self.assertEqual(out, 'UTF-8 UTF-8')
|
self.assertEqual(out, 'UTF-8 UTF-8')
|
||||||
|
|
||||||
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
|
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
|
||||||
|
@ -217,11 +222,17 @@ class UTF8ModeTests(unittest.TestCase):
|
||||||
self.assertEqual(args, ascii(expected), out)
|
self.assertEqual(args, ascii(expected), out)
|
||||||
|
|
||||||
check('utf8', [arg_utf8])
|
check('utf8', [arg_utf8])
|
||||||
|
for loc in POSIX_LOCALES:
|
||||||
|
with self.subTest(LC_ALL=loc):
|
||||||
|
check('utf8', [arg_utf8], LC_ALL=loc)
|
||||||
|
|
||||||
if sys.platform == 'darwin' or support.is_android:
|
if sys.platform == 'darwin' or support.is_android:
|
||||||
c_arg = arg_utf8
|
c_arg = arg_utf8
|
||||||
else:
|
else:
|
||||||
c_arg = arg_ascii
|
c_arg = arg_ascii
|
||||||
check('utf8=0', [c_arg], LC_ALL='C')
|
for loc in POSIX_LOCALES:
|
||||||
|
with self.subTest(LC_ALL=loc):
|
||||||
|
check('utf8=0', [c_arg], LC_ALL=loc)
|
||||||
|
|
||||||
def test_optim_level(self):
|
def test_optim_level(self):
|
||||||
# CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
|
# CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
The UTF-8 Mode is now also enabled by the "POSIX" locale, not only by the "C"
|
||||||
|
locale.
|
|
@ -0,0 +1,3 @@
|
||||||
|
On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces the
|
||||||
|
ASCII encoding if the LC_CTYPE locale is "POSIX", not only if the LC_CTYPE
|
||||||
|
locale is "C".
|
|
@ -2102,15 +2102,25 @@ done:
|
||||||
static void
|
static void
|
||||||
config_init_locale(_PyCoreConfig *config)
|
config_init_locale(_PyCoreConfig *config)
|
||||||
{
|
{
|
||||||
if (_Py_LegacyLocaleDetected()) {
|
|
||||||
/* POSIX locale: enable C locale coercion and UTF-8 Mode */
|
|
||||||
if (config->utf8_mode < 0) {
|
|
||||||
config->utf8_mode = 1;
|
|
||||||
}
|
|
||||||
if (config->coerce_c_locale < 0) {
|
if (config->coerce_c_locale < 0) {
|
||||||
|
/* The C locale enables the C locale coercion (PEP 538) */
|
||||||
|
if (_Py_LegacyLocaleDetected()) {
|
||||||
config->coerce_c_locale = 1;
|
config->coerce_c_locale = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef MS_WINDOWS
|
||||||
|
if (config->utf8_mode < 0) {
|
||||||
|
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
|
||||||
|
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
||||||
|
if (ctype_loc != NULL
|
||||||
|
&& (strcmp(ctype_loc, "C") == 0
|
||||||
|
|| strcmp(ctype_loc, "POSIX") == 0))
|
||||||
|
{
|
||||||
|
config->utf8_mode = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -128,7 +128,7 @@ check_force_ascii(void)
|
||||||
loc = setlocale(LC_CTYPE, NULL);
|
loc = setlocale(LC_CTYPE, NULL);
|
||||||
if (loc == NULL)
|
if (loc == NULL)
|
||||||
goto error;
|
goto error;
|
||||||
if (strcmp(loc, "C") != 0) {
|
if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
|
||||||
/* the LC_CTYPE locale is different than C */
|
/* the LC_CTYPE locale is different than C */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue