Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to codec lookup
This commit is contained in:
parent
22d0698d3b
commit
f5aba58480
|
@ -1663,7 +1663,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
|
|||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
|
||||
const char *string, /* MBCS encoded string */
|
||||
Py_ssize_t length, /* size of string */
|
||||
Py_ssize_t length, /* size of string */
|
||||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
|
|||
"""#"
|
||||
|
||||
import codecs
|
||||
import sys
|
||||
from . import aliases
|
||||
|
||||
_cache = {}
|
||||
|
@ -151,3 +152,12 @@ def search_function(encoding):
|
|||
|
||||
# Register the search_function in the Python codec registry
|
||||
codecs.register(search_function)
|
||||
|
||||
if sys.platform == 'win32':
|
||||
def _alias_mbcs(encoding):
|
||||
import _bootlocale
|
||||
if encoding == _bootlocale.getpreferredencoding(False):
|
||||
import encodings.mbcs
|
||||
return encodings.mbcs.getregentry()
|
||||
|
||||
codecs.register(_alias_mbcs)
|
||||
|
|
|
@ -458,6 +458,7 @@ aliases = {
|
|||
'macturkish' : 'mac_turkish',
|
||||
|
||||
# mbcs codec
|
||||
'ansi' : 'mbcs',
|
||||
'dbcs' : 'mbcs',
|
||||
|
||||
# ptcp154 codec
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
""" Python 'oem' Codec for Windows
|
||||
|
||||
"""
|
||||
# Import them explicitly to cause an ImportError
|
||||
# on non-Windows systems
|
||||
from codecs import oem_encode, oem_decode
|
||||
# for IncrementalDecoder, IncrementalEncoder, ...
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
encode = oem_encode
|
||||
|
||||
def decode(input, errors='strict'):
|
||||
return oem_decode(input, errors, True)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return oem_encode(input, self.errors)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
||||
_buffer_decode = oem_decode
|
||||
|
||||
class StreamWriter(codecs.StreamWriter):
|
||||
encode = oem_encode
|
||||
|
||||
class StreamReader(codecs.StreamReader):
|
||||
decode = oem_decode
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
return codecs.CodecInfo(
|
||||
name='oem',
|
||||
encode=encode,
|
||||
decode=decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
16
Lib/site.py
16
Lib/site.py
|
@ -423,21 +423,6 @@ def enablerlcompleter():
|
|||
|
||||
sys.__interactivehook__ = register_readline
|
||||
|
||||
def aliasmbcs():
|
||||
"""On Windows, some default encodings are not provided by Python,
|
||||
while they are always available as "mbcs" in each locale. Make
|
||||
them usable by aliasing to "mbcs" in such a case."""
|
||||
if sys.platform == 'win32':
|
||||
import _bootlocale, codecs
|
||||
enc = _bootlocale.getpreferredencoding(False)
|
||||
if enc.startswith('cp'): # "cp***" ?
|
||||
try:
|
||||
codecs.lookup(enc)
|
||||
except LookupError:
|
||||
import encodings
|
||||
encodings._cache[enc] = encodings._unknown
|
||||
encodings.aliases.aliases[enc] = 'mbcs'
|
||||
|
||||
CONFIG_LINE = r'^(?P<key>(\w|[-_])+)\s*=\s*(?P<value>.*)\s*$'
|
||||
|
||||
def venv(known_paths):
|
||||
|
@ -560,7 +545,6 @@ def main():
|
|||
setcopyright()
|
||||
sethelper()
|
||||
enablerlcompleter()
|
||||
aliasmbcs()
|
||||
execsitecustomize()
|
||||
if ENABLE_USER_SITE:
|
||||
execusercustomize()
|
||||
|
|
|
@ -8,11 +8,6 @@ import encodings
|
|||
|
||||
from test import support
|
||||
|
||||
if sys.platform == 'win32':
|
||||
VISTA_OR_LATER = (sys.getwindowsversion().major >= 6)
|
||||
else:
|
||||
VISTA_OR_LATER = False
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
except ImportError:
|
||||
|
@ -841,18 +836,13 @@ class CP65001Test(ReadTest, unittest.TestCase):
|
|||
('abc', 'strict', b'abc'),
|
||||
('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),
|
||||
('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
|
||||
('\udc80', 'strict', None),
|
||||
('\udc80', 'ignore', b''),
|
||||
('\udc80', 'replace', b'?'),
|
||||
('\udc80', 'backslashreplace', b'\\udc80'),
|
||||
('\udc80', 'namereplace', b'\\udc80'),
|
||||
('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
|
||||
]
|
||||
if VISTA_OR_LATER:
|
||||
tests.extend((
|
||||
('\udc80', 'strict', None),
|
||||
('\udc80', 'ignore', b''),
|
||||
('\udc80', 'replace', b'?'),
|
||||
('\udc80', 'backslashreplace', b'\\udc80'),
|
||||
('\udc80', 'namereplace', b'\\udc80'),
|
||||
('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
|
||||
))
|
||||
else:
|
||||
tests.append(('\udc80', 'strict', b'\xed\xb2\x80'))
|
||||
for text, errors, expected in tests:
|
||||
if expected is not None:
|
||||
try:
|
||||
|
@ -879,17 +869,10 @@ class CP65001Test(ReadTest, unittest.TestCase):
|
|||
(b'[\xff]', 'ignore', '[]'),
|
||||
(b'[\xff]', 'replace', '[\ufffd]'),
|
||||
(b'[\xff]', 'surrogateescape', '[\udcff]'),
|
||||
(b'[\xed\xb2\x80]', 'strict', None),
|
||||
(b'[\xed\xb2\x80]', 'ignore', '[]'),
|
||||
(b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
|
||||
]
|
||||
if VISTA_OR_LATER:
|
||||
tests.extend((
|
||||
(b'[\xed\xb2\x80]', 'strict', None),
|
||||
(b'[\xed\xb2\x80]', 'ignore', '[]'),
|
||||
(b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
|
||||
))
|
||||
else:
|
||||
tests.extend((
|
||||
(b'[\xed\xb2\x80]', 'strict', '[\udc80]'),
|
||||
))
|
||||
for raw, errors, expected in tests:
|
||||
if expected is not None:
|
||||
try:
|
||||
|
@ -904,7 +887,6 @@ class CP65001Test(ReadTest, unittest.TestCase):
|
|||
self.assertRaises(UnicodeDecodeError,
|
||||
raw.decode, 'cp65001', errors)
|
||||
|
||||
@unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
|
||||
def test_lone_surrogates(self):
|
||||
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
|
||||
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
|
||||
|
@ -921,7 +903,6 @@ class CP65001Test(ReadTest, unittest.TestCase):
|
|||
self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
|
||||
b'[?]')
|
||||
|
||||
@unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
|
||||
def test_surrogatepass_handler(self):
|
||||
self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
|
||||
b"abc\xed\xa0\x80def")
|
||||
|
@ -1951,6 +1932,8 @@ all_unicode_encodings = [
|
|||
|
||||
if hasattr(codecs, "mbcs_encode"):
|
||||
all_unicode_encodings.append("mbcs")
|
||||
if hasattr(codecs, "oem_encode"):
|
||||
all_unicode_encodings.append("oem")
|
||||
|
||||
# The following encoding is not tested, because it's not supposed
|
||||
# to work:
|
||||
|
@ -3119,11 +3102,10 @@ class CodePageTest(unittest.TestCase):
|
|||
(b'\xff\xf4\x8f\xbf\xbf', 'ignore', '\U0010ffff'),
|
||||
(b'\xff\xf4\x8f\xbf\xbf', 'replace', '\ufffd\U0010ffff'),
|
||||
))
|
||||
if VISTA_OR_LATER:
|
||||
self.check_encode(self.CP_UTF8, (
|
||||
('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
|
||||
('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
|
||||
))
|
||||
self.check_encode(self.CP_UTF8, (
|
||||
('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
|
||||
('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
|
||||
))
|
||||
|
||||
def test_incremental(self):
|
||||
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
|
||||
|
@ -3144,6 +3126,20 @@ class CodePageTest(unittest.TestCase):
|
|||
False)
|
||||
self.assertEqual(decoded, ('abc', 3))
|
||||
|
||||
def test_mbcs_alias(self):
|
||||
# Check that looking up our 'default' codepage will return
|
||||
# mbcs when we don't have a more specific one available
|
||||
import _bootlocale
|
||||
def _get_fake_codepage(*a):
|
||||
return 'cp123'
|
||||
old_getpreferredencoding = _bootlocale.getpreferredencoding
|
||||
_bootlocale.getpreferredencoding = _get_fake_codepage
|
||||
try:
|
||||
codec = codecs.lookup('cp123')
|
||||
self.assertEqual(codec.name, 'mbcs')
|
||||
finally:
|
||||
_bootlocale.getpreferredencoding = old_getpreferredencoding
|
||||
|
||||
|
||||
class ASCIITest(unittest.TestCase):
|
||||
def test_encode(self):
|
||||
|
|
|
@ -625,6 +625,25 @@ _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
|
|||
return codec_tuple(decoded, consumed);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_codecs.oem_decode
|
||||
data: Py_buffer
|
||||
errors: str(accept={str, NoneType}) = NULL
|
||||
final: int(c_default="0") = False
|
||||
/
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
|
||||
const char *errors, int final)
|
||||
/*[clinic end generated code: output=da1617612f3fcad8 input=95b8a92c446b03cd]*/
|
||||
{
|
||||
Py_ssize_t consumed = data->len;
|
||||
PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
|
||||
data->buf, data->len, errors, final ? NULL : &consumed);
|
||||
return codec_tuple(decoded, consumed);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_codecs.code_page_decode
|
||||
codepage: int
|
||||
|
@ -970,6 +989,21 @@ _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
|
|||
PyUnicode_GET_LENGTH(str));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_codecs.oem_encode
|
||||
str: unicode
|
||||
errors: str(accept={str, NoneType}) = NULL
|
||||
/
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
|
||||
/*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/
|
||||
{
|
||||
return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
|
||||
PyUnicode_GET_LENGTH(str));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_codecs.code_page_encode
|
||||
code_page: int
|
||||
|
@ -1075,6 +1109,8 @@ static PyMethodDef _codecs_functions[] = {
|
|||
_CODECS_READBUFFER_ENCODE_METHODDEF
|
||||
_CODECS_MBCS_ENCODE_METHODDEF
|
||||
_CODECS_MBCS_DECODE_METHODDEF
|
||||
_CODECS_OEM_ENCODE_METHODDEF
|
||||
_CODECS_OEM_DECODE_METHODDEF
|
||||
_CODECS_CODE_PAGE_ENCODE_METHODDEF
|
||||
_CODECS_CODE_PAGE_DECODE_METHODDEF
|
||||
_CODECS_REGISTER_ERROR_METHODDEF
|
||||
|
|
|
@ -805,6 +805,45 @@ exit:
|
|||
|
||||
#if defined(HAVE_MBCS)
|
||||
|
||||
PyDoc_STRVAR(_codecs_oem_decode__doc__,
|
||||
"oem_decode($module, data, errors=None, final=False, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _CODECS_OEM_DECODE_METHODDEF \
|
||||
{"oem_decode", (PyCFunction)_codecs_oem_decode, METH_VARARGS, _codecs_oem_decode__doc__},
|
||||
|
||||
static PyObject *
|
||||
_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
|
||||
const char *errors, int final);
|
||||
|
||||
static PyObject *
|
||||
_codecs_oem_decode(PyObject *module, PyObject *args)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
Py_buffer data = {NULL, NULL};
|
||||
const char *errors = NULL;
|
||||
int final = 0;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "y*|zi:oem_decode",
|
||||
&data, &errors, &final)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _codecs_oem_decode_impl(module, &data, errors, final);
|
||||
|
||||
exit:
|
||||
/* Cleanup for data */
|
||||
if (data.obj) {
|
||||
PyBuffer_Release(&data);
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#endif /* defined(HAVE_MBCS) */
|
||||
|
||||
#if defined(HAVE_MBCS)
|
||||
|
||||
PyDoc_STRVAR(_codecs_code_page_decode__doc__,
|
||||
"code_page_decode($module, codepage, data, errors=None, final=False, /)\n"
|
||||
"--\n"
|
||||
|
@ -1346,6 +1385,38 @@ exit:
|
|||
|
||||
#if defined(HAVE_MBCS)
|
||||
|
||||
PyDoc_STRVAR(_codecs_oem_encode__doc__,
|
||||
"oem_encode($module, str, errors=None, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _CODECS_OEM_ENCODE_METHODDEF \
|
||||
{"oem_encode", (PyCFunction)_codecs_oem_encode, METH_VARARGS, _codecs_oem_encode__doc__},
|
||||
|
||||
static PyObject *
|
||||
_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors);
|
||||
|
||||
static PyObject *
|
||||
_codecs_oem_encode(PyObject *module, PyObject *args)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyObject *str;
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "U|z:oem_encode",
|
||||
&str, &errors)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _codecs_oem_encode_impl(module, str, errors);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#endif /* defined(HAVE_MBCS) */
|
||||
|
||||
#if defined(HAVE_MBCS)
|
||||
|
||||
PyDoc_STRVAR(_codecs_code_page_encode__doc__,
|
||||
"code_page_encode($module, code_page, str, errors=None, /)\n"
|
||||
"--\n"
|
||||
|
@ -1446,6 +1517,10 @@ exit:
|
|||
#define _CODECS_MBCS_DECODE_METHODDEF
|
||||
#endif /* !defined(_CODECS_MBCS_DECODE_METHODDEF) */
|
||||
|
||||
#ifndef _CODECS_OEM_DECODE_METHODDEF
|
||||
#define _CODECS_OEM_DECODE_METHODDEF
|
||||
#endif /* !defined(_CODECS_OEM_DECODE_METHODDEF) */
|
||||
|
||||
#ifndef _CODECS_CODE_PAGE_DECODE_METHODDEF
|
||||
#define _CODECS_CODE_PAGE_DECODE_METHODDEF
|
||||
#endif /* !defined(_CODECS_CODE_PAGE_DECODE_METHODDEF) */
|
||||
|
@ -1454,7 +1529,11 @@ exit:
|
|||
#define _CODECS_MBCS_ENCODE_METHODDEF
|
||||
#endif /* !defined(_CODECS_MBCS_ENCODE_METHODDEF) */
|
||||
|
||||
#ifndef _CODECS_OEM_ENCODE_METHODDEF
|
||||
#define _CODECS_OEM_ENCODE_METHODDEF
|
||||
#endif /* !defined(_CODECS_OEM_ENCODE_METHODDEF) */
|
||||
|
||||
#ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
|
||||
#define _CODECS_CODE_PAGE_ENCODE_METHODDEF
|
||||
#endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
|
||||
/*[clinic end generated code: output=0221e4eece62c905 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=7874e2d559d49368 input=a9049054013a1b77]*/
|
||||
|
|
Loading…
Reference in New Issue