From 76febd079299d64abffee0bdd7c4c1785e5a0fa7 Mon Sep 17 00:00:00 2001 From: Xavier de Gaye Date: Thu, 15 Dec 2016 20:59:58 +0100 Subject: [PATCH] Issue #26919: On Android, operating system data is now always encoded/decoded to/from UTF-8, instead of the locale encoding to avoid inconsistencies with os.fsencode() and os.fsdecode() which are already using UTF-8. --- Lib/test/test_cmd_line.py | 9 +++++---- Misc/NEWS | 4 ++++ Objects/unicodeobject.c | 6 +++--- Python/fileutils.c | 10 +++++----- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index b71bb9f7ee5..ae2bcd43754 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -8,7 +8,7 @@ import shutil import sys import subprocess import tempfile -from test.support import script_helper +from test.support import script_helper, is_android from test.support.script_helper import (spawn_python, kill_python, assert_python_ok, assert_python_failure) @@ -178,15 +178,16 @@ class CmdLineTest(unittest.TestCase): if not stdout.startswith(pattern): raise AssertionError("%a doesn't start with %a" % (stdout, pattern)) - @unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X') - def test_osx_utf8(self): + @unittest.skipUnless((sys.platform == 'darwin' or + is_android), 'test specific to Mac OS X and Android') + def test_osx_android_utf8(self): def check_output(text): decoded = text.decode('utf-8', 'surrogateescape') expected = ascii(decoded).encode('ascii') + b'\n' env = os.environ.copy() # C locale gives ASCII locale encoding, but Python uses UTF-8 - # to parse the command line arguments on Mac OS X + # to parse the command line arguments on Mac OS X and Android. env['LC_ALL'] = 'C' p = subprocess.Popen( diff --git a/Misc/NEWS b/Misc/NEWS index 165f9a01980..477dc9ba146 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.6.1 release candidate 1 Core and Builtins ----------------- +- Issue #26919: On Android, operating system data is now always encoded/decoded + to/from UTF-8, instead of the locale encoding to avoid inconsistencies with + os.fsencode() and os.fsdecode() which are already using UTF-8. + - Issue #28147: Fix a memory leak in split-table dictionaries: setattr() must not convert combined table into split table. Patch written by INADA Naoki. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9c998f7ab3e..44911671a0e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5083,10 +5083,10 @@ onError: return NULL; } -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__ANDROID__) /* Simplified UTF-8 decoder using surrogateescape error handler, - used to decode the command line arguments on Mac OS X. + used to decode the command line arguments on Mac OS X and Android. Return a pointer to a newly allocated wide character string (use PyMem_RawFree() to free the memory), or NULL on memory allocation error. */ @@ -5137,7 +5137,7 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size) return unicode; } -#endif /* __APPLE__ */ +#endif /* __APPLE__ or __ANDROID__ */ /* Primary internal function which creates utf8 encoded bytes objects. diff --git a/Python/fileutils.c b/Python/fileutils.c index 6a32c42c80a..e84d66e99a4 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -20,7 +20,7 @@ extern int winerror_to_errno(int); #include #endif /* HAVE_FCNTL_H */ -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__ANDROID__) extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size); #endif @@ -273,7 +273,7 @@ decode_ascii_surrogateescape(const char *arg, size_t *size) wchar_t* Py_DecodeLocale(const char* arg, size_t *size) { -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__ANDROID__) wchar_t *wstr; wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg)); if (size != NULL) { @@ -406,7 +406,7 @@ oom: if (size != NULL) *size = (size_t)-1; return NULL; -#endif /* __APPLE__ */ +#endif /* __APPLE__ or __ANDROID__ */ } /* Encode a wide character string to the locale encoding with the @@ -424,7 +424,7 @@ oom: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos) { -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__ANDROID__) Py_ssize_t len; PyObject *unicode, *bytes = NULL; char *cpath; @@ -522,7 +522,7 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos) bytes = result; } return result; -#endif /* __APPLE__ */ +#endif /* __APPLE__ or __ANDROID__ */ }