bpo-34527: POSIX locale enables the UTF-8 Mode (GH-8972) (GH-8974)

* The UTF-8 Mode is now also enabled by the "POSIX" locale, not only
  by the "C" locale.
* On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces
  the ASCII encoding if the LC_CTYPE locale is "POSIX", not only if
  the LC_CTYPE locale is "C".
* test_utf8_mode.test_cmd_line() checks also that the command line
  arguments are decoded from UTF-8 when the the UTF-8 Mode is enabled
  with POSIX locale or C locale.

(cherry picked from commit 5cb258950c)
This commit is contained in:
Victor Stinner 2018-08-28 13:51:20 +02:00 committed by GitHub
parent e3f20828f6
commit 65ef7425a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 39 additions and 13 deletions

View File

@ -12,6 +12,7 @@ from test.support.script_helper import assert_python_ok, assert_python_failure
MS_WINDOWS = (sys.platform == 'win32') MS_WINDOWS = (sys.platform == 'win32')
POSIX_LOCALES = ('C', 'POSIX')
class UTF8ModeTests(unittest.TestCase): class UTF8ModeTests(unittest.TestCase):
@ -23,7 +24,7 @@ class UTF8ModeTests(unittest.TestCase):
def posix_locale(self): def posix_locale(self):
loc = locale.setlocale(locale.LC_CTYPE, None) loc = locale.setlocale(locale.LC_CTYPE, None)
return (loc == 'C') return (loc in POSIX_LOCALES)
def get_output(self, *args, failure=False, **kw): def get_output(self, *args, failure=False, **kw):
kw = dict(self.DEFAULT_ENV, **kw) kw = dict(self.DEFAULT_ENV, **kw)
@ -39,8 +40,10 @@ class UTF8ModeTests(unittest.TestCase):
def test_posix_locale(self): def test_posix_locale(self):
code = 'import sys; print(sys.flags.utf8_mode)' code = 'import sys; print(sys.flags.utf8_mode)'
out = self.get_output('-c', code, LC_ALL='C') for loc in POSIX_LOCALES:
self.assertEqual(out, '1') with self.subTest(LC_ALL=loc):
out = self.get_output('-c', code, LC_ALL=loc)
self.assertEqual(out, '1')
def test_xoption(self): def test_xoption(self):
code = 'import sys; print(sys.flags.utf8_mode)' code = 'import sys; print(sys.flags.utf8_mode)'
@ -201,8 +204,10 @@ class UTF8ModeTests(unittest.TestCase):
out = self.get_output('-X', 'utf8', '-c', code) out = self.get_output('-X', 'utf8', '-c', code)
self.assertEqual(out, 'UTF-8 UTF-8') self.assertEqual(out, 'UTF-8 UTF-8')
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C') for loc in POSIX_LOCALES:
self.assertEqual(out, 'UTF-8 UTF-8') with self.subTest(LC_ALL=loc):
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
self.assertEqual(out, 'UTF-8 UTF-8')
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix') @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
def test_cmd_line(self): def test_cmd_line(self):
@ -217,11 +222,17 @@ class UTF8ModeTests(unittest.TestCase):
self.assertEqual(args, ascii(expected), out) self.assertEqual(args, ascii(expected), out)
check('utf8', [arg_utf8]) check('utf8', [arg_utf8])
for loc in POSIX_LOCALES:
with self.subTest(LC_ALL=loc):
check('utf8', [arg_utf8], LC_ALL=loc)
if sys.platform == 'darwin' or support.is_android: if sys.platform == 'darwin' or support.is_android:
c_arg = arg_utf8 c_arg = arg_utf8
else: else:
c_arg = arg_ascii c_arg = arg_ascii
check('utf8=0', [c_arg], LC_ALL='C') for loc in POSIX_LOCALES:
with self.subTest(LC_ALL=loc):
check('utf8=0', [c_arg], LC_ALL=loc)
def test_optim_level(self): def test_optim_level(self):
# CPython: check that Py_Main() doesn't increment Py_OptimizeFlag # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag

View File

@ -0,0 +1,2 @@
The UTF-8 Mode is now also enabled by the "POSIX" locale, not only by the "C"
locale.

View File

@ -0,0 +1,3 @@
On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces the
ASCII encoding if the LC_CTYPE locale is "POSIX", not only if the LC_CTYPE
locale is "C".

View File

@ -2102,15 +2102,25 @@ done:
static void static void
config_init_locale(_PyCoreConfig *config) config_init_locale(_PyCoreConfig *config)
{ {
if (_Py_LegacyLocaleDetected()) { if (config->coerce_c_locale < 0) {
/* POSIX locale: enable C locale coercion and UTF-8 Mode */ /* The C locale enables the C locale coercion (PEP 538) */
if (config->utf8_mode < 0) { if (_Py_LegacyLocaleDetected()) {
config->utf8_mode = 1;
}
if (config->coerce_c_locale < 0) {
config->coerce_c_locale = 1; config->coerce_c_locale = 1;
} }
} }
#ifndef MS_WINDOWS
if (config->utf8_mode < 0) {
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL
&& (strcmp(ctype_loc, "C") == 0
|| strcmp(ctype_loc, "POSIX") == 0))
{
config->utf8_mode = 1;
}
}
#endif
} }

View File

@ -128,7 +128,7 @@ check_force_ascii(void)
loc = setlocale(LC_CTYPE, NULL); loc = setlocale(LC_CTYPE, NULL);
if (loc == NULL) if (loc == NULL)
goto error; goto error;
if (strcmp(loc, "C") != 0) { if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
/* the LC_CTYPE locale is different than C */ /* the LC_CTYPE locale is different than C */
return 0; return 0;
} }