mirror of https://github.com/python/cpython
merge heads
This commit is contained in:
commit
26fd8feb5e
|
@ -32,7 +32,6 @@ Modules/Setup.local
|
|||
Modules/config.c
|
||||
Modules/ld_so_aix$
|
||||
Parser/pgen$
|
||||
PCbuild/amd64/
|
||||
^core
|
||||
^python-gdb.py
|
||||
^python.exe-gdb.py
|
||||
|
@ -56,6 +55,12 @@ PC/python_nt*.h
|
|||
PC/pythonnt_rc*.h
|
||||
PC/*.obj
|
||||
PC/*.exe
|
||||
PC/*/*.user
|
||||
PC/*/*.ncb
|
||||
PC/*/*.suo
|
||||
PC/*/Win32-temp-*
|
||||
PC/*/x64-temp-*
|
||||
PC/*/amd64
|
||||
PCbuild/*.exe
|
||||
PCbuild/*.dll
|
||||
PCbuild/*.pdb
|
||||
|
@ -69,6 +74,8 @@ PCbuild/*.suo
|
|||
PCbuild/*.*sdf
|
||||
PCbuild/Win32-temp-*
|
||||
PCbuild/x64-temp-*
|
||||
PCbuild/amd64
|
||||
BuildLog.htm
|
||||
__pycache__
|
||||
Modules/_testembed
|
||||
.coverage
|
||||
|
|
|
@ -17,10 +17,10 @@ yourself. However the bundled generator knows how to generate most email in a
|
|||
standards-compliant way, should handle MIME and non-MIME email messages just
|
||||
fine, and is designed so that the transformation from flat text, to a message
|
||||
structure via the :class:`~email.parser.Parser` class, and back to flat text,
|
||||
is idempotent (the input is identical to the output). On the other hand, using
|
||||
the Generator on a :class:`~email.message.Message` constructed by program may
|
||||
result in changes to the :class:`~email.message.Message` object as defaults are
|
||||
filled in.
|
||||
is idempotent (the input is identical to the output) [#]_. On the other hand,
|
||||
using the Generator on a :class:`~email.message.Message` constructed by program
|
||||
may result in changes to the :class:`~email.message.Message` object as defaults
|
||||
are filled in.
|
||||
|
||||
:class:`bytes` output can be generated using the :class:`BytesGenerator` class.
|
||||
If the message object structure contains non-ASCII bytes, this generator's
|
||||
|
@ -223,3 +223,12 @@ representing the part.
|
|||
The default value for *fmt* is ``None``, meaning ::
|
||||
|
||||
[Non-text (%(type)s) part of message omitted, filename %(filename)s]
|
||||
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#] This statement assumes that you use the appropriate setting for the
|
||||
``unixfrom`` argument, and that you set maxheaderlen=0 (which will
|
||||
preserve whatever the input line lengths were). It is also not strictly
|
||||
true, since in many cases runs of whitespace in headers are collapsed
|
||||
into single blanks. The latter is a bug that will eventually be fixed.
|
||||
|
|
|
@ -339,6 +339,15 @@ and also the following constants for integer status codes:
|
|||
| :const:`UPGRADE_REQUIRED` | ``426`` | HTTP Upgrade to TLS, |
|
||||
| | | :rfc:`2817`, Section 6 |
|
||||
+------------------------------------------+---------+-----------------------------------------------------------------------+
|
||||
| :const:`PRECONDITION_REQUIRED` | ``428`` | Additional HTTP Status Codes, |
|
||||
| | | :rfc:`6585`, Section 3 |
|
||||
+------------------------------------------+---------+-----------------------------------------------------------------------+
|
||||
| :const:`TOO_MANY_REQUESTS` | ``429`` | Additional HTTP Status Codes, |
|
||||
| | | :rfc:`6585`, Section 4 |
|
||||
+------------------------------------------+---------+-----------------------------------------------------------------------+
|
||||
| :const:`REQUEST_HEADER_FIELDS_TOO_LARGE` | ``431`` | Additional HTTP Status Codes, |
|
||||
| | | :rfc:`6585`, Section 5 |
|
||||
+------------------------------------------+---------+-----------------------------------------------------------------------+
|
||||
| :const:`INTERNAL_SERVER_ERROR` | ``500`` | HTTP/1.1, `RFC 2616, Section |
|
||||
| | | 10.5.1 |
|
||||
| | | <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.1>`_ |
|
||||
|
@ -369,6 +378,12 @@ and also the following constants for integer status codes:
|
|||
| :const:`NOT_EXTENDED` | ``510`` | An HTTP Extension Framework, |
|
||||
| | | :rfc:`2774`, Section 7 |
|
||||
+------------------------------------------+---------+-----------------------------------------------------------------------+
|
||||
| :const:`NETWORK_AUTHENTICATION_REQUIRED` | ``511`` | Additional HTTP Status Codes, |
|
||||
| | | :rfc:`6585`, Section 6 |
|
||||
+------------------------------------------+---------+-----------------------------------------------------------------------+
|
||||
|
||||
.. versionchanged:: 3.3
|
||||
Added codes ``428``, ``429``, ``431`` and ``511`` from :rfc:`6585`.
|
||||
|
||||
|
||||
.. data:: responses
|
||||
|
|
|
@ -141,6 +141,9 @@ UNPROCESSABLE_ENTITY = 422
|
|||
LOCKED = 423
|
||||
FAILED_DEPENDENCY = 424
|
||||
UPGRADE_REQUIRED = 426
|
||||
PRECONDITION_REQUIRED = 428
|
||||
TOO_MANY_REQUESTS = 429
|
||||
REQUEST_HEADER_FIELDS_TOO_LARGE = 431
|
||||
|
||||
# server error
|
||||
INTERNAL_SERVER_ERROR = 500
|
||||
|
@ -151,6 +154,7 @@ GATEWAY_TIMEOUT = 504
|
|||
HTTP_VERSION_NOT_SUPPORTED = 505
|
||||
INSUFFICIENT_STORAGE = 507
|
||||
NOT_EXTENDED = 510
|
||||
NETWORK_AUTHENTICATION_REQUIRED = 511
|
||||
|
||||
# Mapping status codes to official W3C names
|
||||
responses = {
|
||||
|
@ -192,6 +196,9 @@ responses = {
|
|||
415: 'Unsupported Media Type',
|
||||
416: 'Requested Range Not Satisfiable',
|
||||
417: 'Expectation Failed',
|
||||
428: 'Precondition Required',
|
||||
429: 'Too Many Requests',
|
||||
431: 'Request Header Fields Too Large',
|
||||
|
||||
500: 'Internal Server Error',
|
||||
501: 'Not Implemented',
|
||||
|
@ -199,6 +206,7 @@ responses = {
|
|||
503: 'Service Unavailable',
|
||||
504: 'Gateway Timeout',
|
||||
505: 'HTTP Version Not Supported',
|
||||
511: 'Network Authentication Required',
|
||||
}
|
||||
|
||||
# maximal amount of data to read at one time in _safe_read
|
||||
|
|
|
@ -573,7 +573,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
|
|||
|
||||
# Table mapping response codes to messages; entries have the
|
||||
# form {code: (shortmessage, longmessage)}.
|
||||
# See RFC 2616.
|
||||
# See RFC 2616 and 6585.
|
||||
responses = {
|
||||
100: ('Continue', 'Request received, please continue'),
|
||||
101: ('Switching Protocols',
|
||||
|
@ -628,6 +628,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
|
|||
'Cannot satisfy request range.'),
|
||||
417: ('Expectation Failed',
|
||||
'Expect condition could not be satisfied.'),
|
||||
428: ('Precondition Required',
|
||||
'The origin server requires the request to be conditional.'),
|
||||
429: ('Too Many Requests', 'The user has sent too many requests '
|
||||
'in a given amount of time ("rate limiting").'),
|
||||
431: ('Request Header Fields Too Large', 'The server is unwilling to '
|
||||
'process the request because its header fields are too large.'),
|
||||
|
||||
500: ('Internal Server Error', 'Server got itself in trouble'),
|
||||
501: ('Not Implemented',
|
||||
|
@ -638,6 +644,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
|
|||
504: ('Gateway Timeout',
|
||||
'The gateway server did not receive a timely response'),
|
||||
505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
|
||||
511: ('Network Authentication Required',
|
||||
'The client needs to authenticate to gain network access.'),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -23,6 +23,28 @@ del sys.modules['bisect']
|
|||
import bisect as c_bisect
|
||||
|
||||
|
||||
class Range(object):
|
||||
"""A trivial range()-like object without any integer width limitations."""
|
||||
def __init__(self, start, stop):
|
||||
self.start = start
|
||||
self.stop = stop
|
||||
self.last_insert = None
|
||||
|
||||
def __len__(self):
|
||||
return self.stop - self.start
|
||||
|
||||
def __getitem__(self, idx):
|
||||
n = self.stop - self.start
|
||||
if idx < 0:
|
||||
idx += n
|
||||
if idx >= n:
|
||||
raise IndexError(idx)
|
||||
return self.start + idx
|
||||
|
||||
def insert(self, idx, item):
|
||||
self.last_insert = idx, item
|
||||
|
||||
|
||||
class TestBisect(unittest.TestCase):
|
||||
module = None
|
||||
|
||||
|
@ -125,9 +147,28 @@ class TestBisect(unittest.TestCase):
|
|||
def test_large_range(self):
|
||||
# Issue 13496
|
||||
mod = self.module
|
||||
data = range(sys.maxsize-1)
|
||||
self.assertEqual(mod.bisect_left(data, sys.maxsize-3), sys.maxsize-3)
|
||||
self.assertEqual(mod.bisect_right(data, sys.maxsize-3), sys.maxsize-2)
|
||||
n = sys.maxsize
|
||||
data = range(n-1)
|
||||
self.assertEqual(mod.bisect_left(data, n-3), n-3)
|
||||
self.assertEqual(mod.bisect_right(data, n-3), n-2)
|
||||
self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
|
||||
self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
|
||||
|
||||
def test_large_pyrange(self):
|
||||
# Same as above, but without C-imposed limits on range() parameters
|
||||
mod = self.module
|
||||
n = sys.maxsize
|
||||
data = Range(0, n-1)
|
||||
self.assertEqual(mod.bisect_left(data, n-3), n-3)
|
||||
self.assertEqual(mod.bisect_right(data, n-3), n-2)
|
||||
self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
|
||||
self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
|
||||
x = n - 100
|
||||
mod.insort_left(data, x, x - 50, x + 50)
|
||||
self.assertEqual(data.last_insert, (x, x))
|
||||
x = n - 200
|
||||
mod.insort_right(data, x, x - 50, x + 50)
|
||||
self.assertEqual(data.last_insert, (x + 1, x))
|
||||
|
||||
def test_random(self, n=25):
|
||||
from random import randrange
|
||||
|
|
|
@ -137,8 +137,57 @@ class PkgutilPEP302Tests(unittest.TestCase):
|
|||
self.assertEqual(foo.loads, 1)
|
||||
del sys.modules['foo']
|
||||
|
||||
|
||||
class ExtendPathTests(unittest.TestCase):
|
||||
def create_init(self, pkgname):
|
||||
dirname = tempfile.mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, dirname)
|
||||
sys.path.insert(0, dirname)
|
||||
|
||||
pkgdir = os.path.join(dirname, pkgname)
|
||||
os.mkdir(pkgdir)
|
||||
with open(os.path.join(pkgdir, '__init__.py'), 'w') as fl:
|
||||
fl.write('from pkgutil import extend_path\n__path__ = extend_path(__path__, __name__)\n')
|
||||
|
||||
return dirname
|
||||
|
||||
def create_submodule(self, dirname, pkgname, submodule_name, value):
|
||||
module_name = os.path.join(dirname, pkgname, submodule_name + '.py')
|
||||
with open(module_name, 'w') as fl:
|
||||
print('value={}'.format(value), file=fl)
|
||||
|
||||
def setUp(self):
|
||||
# Create 2 directories on sys.path
|
||||
self.pkgname = 'foo'
|
||||
self.dirname_0 = self.create_init(self.pkgname)
|
||||
self.dirname_1 = self.create_init(self.pkgname)
|
||||
|
||||
def tearDown(self):
|
||||
del sys.path[0]
|
||||
del sys.path[0]
|
||||
del sys.modules['foo']
|
||||
del sys.modules['foo.bar']
|
||||
del sys.modules['foo.baz']
|
||||
|
||||
def test_simple(self):
|
||||
self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0)
|
||||
self.create_submodule(self.dirname_1, self.pkgname, 'baz', 1)
|
||||
import foo.bar
|
||||
import foo.baz
|
||||
# Ensure we read the expected values
|
||||
self.assertEqual(foo.bar.value, 0)
|
||||
self.assertEqual(foo.baz.value, 1)
|
||||
|
||||
# Ensure the path is set up correctly
|
||||
self.assertEqual(sorted(foo.__path__),
|
||||
sorted([os.path.join(self.dirname_0, self.pkgname),
|
||||
os.path.join(self.dirname_1, self.pkgname)]))
|
||||
|
||||
# XXX: test .pkg files
|
||||
|
||||
|
||||
def test_main():
|
||||
run_unittest(PkgutilTests, PkgutilPEP302Tests)
|
||||
run_unittest(PkgutilTests, PkgutilPEP302Tests, ExtendPathTests)
|
||||
# this is necessary if test is run repeated (like when finding leaks)
|
||||
import zipimport
|
||||
zipimport._zip_directory_cache.clear()
|
||||
|
|
|
@ -540,12 +540,19 @@ class Misc:
|
|||
|
||||
The type keyword specifies the form in which the data is
|
||||
to be returned and should be an atom name such as STRING
|
||||
or FILE_NAME. Type defaults to STRING.
|
||||
or FILE_NAME. Type defaults to STRING, except on X11, where the default
|
||||
is to try UTF8_STRING and fall back to STRING.
|
||||
|
||||
This command is equivalent to:
|
||||
|
||||
selection_get(CLIPBOARD)
|
||||
"""
|
||||
if 'type' not in kw and self._windowingsystem == 'x11':
|
||||
try:
|
||||
kw['type'] = 'UTF8_STRING'
|
||||
return self.tk.call(('clipboard', 'get') + self._options(kw))
|
||||
except TclError:
|
||||
del kw['type']
|
||||
return self.tk.call(('clipboard', 'get') + self._options(kw))
|
||||
|
||||
def clipboard_clear(self, **kw):
|
||||
|
@ -627,8 +634,16 @@ class Misc:
|
|||
A keyword parameter selection specifies the name of
|
||||
the selection and defaults to PRIMARY. A keyword
|
||||
parameter displayof specifies a widget on the display
|
||||
to use."""
|
||||
to use. A keyword parameter type specifies the form of data to be
|
||||
fetched, defaulting to STRING except on X11, where UTF8_STRING is tried
|
||||
before STRING."""
|
||||
if 'displayof' not in kw: kw['displayof'] = self._w
|
||||
if 'type' not in kw and self._windowingsystem == 'x11':
|
||||
try:
|
||||
kw['type'] = 'UTF8_STRING'
|
||||
return self.tk.call(('selection', 'get') + self._options(kw))
|
||||
except TclError:
|
||||
del kw['type']
|
||||
return self.tk.call(('selection', 'get') + self._options(kw))
|
||||
def selection_handle(self, command, **kw):
|
||||
"""Specify a function COMMAND to call if the X
|
||||
|
@ -1043,6 +1058,15 @@ class Misc:
|
|||
if displayof is None:
|
||||
return ('-displayof', self._w)
|
||||
return ()
|
||||
@property
|
||||
def _windowingsystem(self):
|
||||
"""Internal function."""
|
||||
try:
|
||||
return self._root()._windowingsystem_cached
|
||||
except AttributeError:
|
||||
ws = self._root()._windowingsystem_cached = \
|
||||
self.tk.call('tk', 'windowingsystem')
|
||||
return ws
|
||||
def _options(self, cnf, kw = None):
|
||||
"""Internal function."""
|
||||
if kw:
|
||||
|
|
|
@ -919,6 +919,7 @@ Ralf Schmitt
|
|||
Michael Schneider
|
||||
Peter Schneider-Kamp
|
||||
Arvin Schnell
|
||||
Robin Schreiber
|
||||
Chad J. Schroeder
|
||||
Sam Schulenburg
|
||||
Stefan Schwarzer
|
||||
|
@ -1129,6 +1130,7 @@ Florent Xicluna
|
|||
Hirokazu Yamamoto
|
||||
Ka-Ping Yee
|
||||
Jason Yeo
|
||||
EungJun Yi
|
||||
Bob Yodlowski
|
||||
Danny Yoo
|
||||
George Yoshida
|
||||
|
|
18
Misc/NEWS
18
Misc/NEWS
|
@ -10,6 +10,9 @@ What's New in Python 3.3.0 Alpha 4?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
|
||||
Patch by Serhiy Storchaka.
|
||||
|
||||
- asdl_seq and asdl_int_seq are now Py_ssize_t sized.
|
||||
|
||||
- Issue #14133 (PEP 415): Implement suppression of __context__ display with an
|
||||
|
@ -31,6 +34,21 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #14829: Fix bisect and range() indexing with large indices
|
||||
(>= 2 ** 32) under 64-bit Windows.
|
||||
|
||||
- Issue #14732: The _csv module now uses PEP 3121 module initialization.
|
||||
Patch by Robin Schreiber.
|
||||
|
||||
- Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server
|
||||
and http.client. Patch by EungJun Yi.
|
||||
|
||||
- Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when
|
||||
accessing the Tk clipboard. Modify clipboad_get() to first request type
|
||||
UTF8_STRING when no specific type is requested in an X11 windowing
|
||||
environment, falling back to the current default type STRING if that fails.
|
||||
Original patch by Thomas Kluyver.
|
||||
|
||||
- Issue #14773: Fix os.fwalk() failing on dangling symlinks.
|
||||
|
||||
- Issue #12541: Be lenient with quotes around Realm field of HTTP Basic
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru).
|
||||
*/
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
|
||||
static Py_ssize_t
|
||||
|
@ -195,8 +196,7 @@ insort_left(PyObject *self, PyObject *args, PyObject *kw)
|
|||
return NULL;
|
||||
} else {
|
||||
_Py_IDENTIFIER(insert);
|
||||
|
||||
result = _PyObject_CallMethodId(list, &PyId_insert, "iO", index, item);
|
||||
result = _PyObject_CallMethodId(list, &PyId_insert, "nO", index, item);
|
||||
if (result == NULL)
|
||||
return NULL;
|
||||
Py_DECREF(result);
|
||||
|
|
105
Modules/_csv.c
105
Modules/_csv.c
|
@ -16,9 +16,39 @@ module instead.
|
|||
#define IS_BASESTRING(o) \
|
||||
PyUnicode_Check(o)
|
||||
|
||||
static PyObject *error_obj; /* CSV exception */
|
||||
static PyObject *dialects; /* Dialect registry */
|
||||
static long field_limit = 128 * 1024; /* max parsed field size */
|
||||
typedef struct {
|
||||
PyObject *error_obj; /* CSV exception */
|
||||
PyObject *dialects; /* Dialect registry */
|
||||
long field_limit; /* max parsed field size */
|
||||
} _csvstate;
|
||||
|
||||
#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
|
||||
|
||||
static int
|
||||
_csv_clear(PyObject *m)
|
||||
{
|
||||
Py_CLEAR(_csvstate(m)->error_obj);
|
||||
Py_CLEAR(_csvstate(m)->dialects);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
_csv_traverse(PyObject *m, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(_csvstate(m)->error_obj);
|
||||
Py_VISIT(_csvstate(m)->dialects);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
_csv_free(void *m)
|
||||
{
|
||||
_csv_clear((PyObject *)m);
|
||||
}
|
||||
|
||||
static struct PyModuleDef _csvmodule;
|
||||
|
||||
#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
|
||||
|
||||
typedef enum {
|
||||
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
|
||||
|
@ -103,10 +133,10 @@ get_dialect_from_registry(PyObject * name_obj)
|
|||
{
|
||||
PyObject *dialect_obj;
|
||||
|
||||
dialect_obj = PyDict_GetItem(dialects, name_obj);
|
||||
dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
|
||||
if (dialect_obj == NULL) {
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_Format(error_obj, "unknown dialect");
|
||||
PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
|
||||
}
|
||||
else
|
||||
Py_INCREF(dialect_obj);
|
||||
|
@ -544,9 +574,9 @@ parse_grow_buff(ReaderObj *self)
|
|||
static int
|
||||
parse_add_char(ReaderObj *self, Py_UCS4 c)
|
||||
{
|
||||
if (self->field_len >= field_limit) {
|
||||
PyErr_Format(error_obj, "field larger than field limit (%ld)",
|
||||
field_limit);
|
||||
if (self->field_len >= _csvstate_global->field_limit) {
|
||||
PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
|
||||
_csvstate_global->field_limit);
|
||||
return -1;
|
||||
}
|
||||
if (self->field_len == self->field_size && !parse_grow_buff(self))
|
||||
|
@ -703,7 +733,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
|
|||
}
|
||||
else {
|
||||
/* illegal */
|
||||
PyErr_Format(error_obj, "'%c' expected after '%c'",
|
||||
PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
|
||||
dialect->delimiter,
|
||||
dialect->quotechar);
|
||||
return -1;
|
||||
|
@ -716,7 +746,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
|
|||
else if (c == '\0')
|
||||
self->state = START_RECORD;
|
||||
else {
|
||||
PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
|
||||
PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
|
@ -755,12 +785,12 @@ Reader_iternext(ReaderObj *self)
|
|||
if (lineobj == NULL) {
|
||||
/* End of input OR exception */
|
||||
if (!PyErr_Occurred() && self->field_len != 0)
|
||||
PyErr_Format(error_obj,
|
||||
PyErr_Format(_csvstate_global->error_obj,
|
||||
"newline inside string");
|
||||
return NULL;
|
||||
}
|
||||
if (!PyUnicode_Check(lineobj)) {
|
||||
PyErr_Format(error_obj,
|
||||
PyErr_Format(_csvstate_global->error_obj,
|
||||
"iterator should return strings, "
|
||||
"not %.200s "
|
||||
"(did you open the file in text mode?)",
|
||||
|
@ -778,7 +808,7 @@ Reader_iternext(ReaderObj *self)
|
|||
c = PyUnicode_READ(kind, data, pos);
|
||||
if (c == '\0') {
|
||||
Py_DECREF(lineobj);
|
||||
PyErr_Format(error_obj,
|
||||
PyErr_Format(_csvstate_global->error_obj,
|
||||
"line contains NULL byte");
|
||||
goto err;
|
||||
}
|
||||
|
@ -994,7 +1024,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
|
|||
}
|
||||
if (want_escape) {
|
||||
if (!dialect->escapechar) {
|
||||
PyErr_Format(error_obj,
|
||||
PyErr_Format(_csvstate_global->error_obj,
|
||||
"need to escape, but no escapechar set");
|
||||
return -1;
|
||||
}
|
||||
|
@ -1010,7 +1040,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
|
|||
*/
|
||||
if (i == 0 && quote_empty) {
|
||||
if (dialect->quoting == QUOTE_NONE) {
|
||||
PyErr_Format(error_obj,
|
||||
PyErr_Format(_csvstate_global->error_obj,
|
||||
"single empty field record must be quoted");
|
||||
return -1;
|
||||
}
|
||||
|
@ -1127,7 +1157,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
|
|||
PyObject *line, *result;
|
||||
|
||||
if (!PySequence_Check(seq))
|
||||
return PyErr_Format(error_obj, "sequence expected");
|
||||
return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
|
||||
|
||||
len = PySequence_Length(seq);
|
||||
if (len < 0)
|
||||
|
@ -1353,7 +1383,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
|
|||
static PyObject *
|
||||
csv_list_dialects(PyObject *module, PyObject *args)
|
||||
{
|
||||
return PyDict_Keys(dialects);
|
||||
return PyDict_Keys(_csvstate_global->dialects);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -1372,7 +1402,7 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
|
|||
dialect = _call_dialect(dialect_obj, kwargs);
|
||||
if (dialect == NULL)
|
||||
return NULL;
|
||||
if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
|
||||
if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
|
||||
Py_DECREF(dialect);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1384,8 +1414,8 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
|
|||
static PyObject *
|
||||
csv_unregister_dialect(PyObject *module, PyObject *name_obj)
|
||||
{
|
||||
if (PyDict_DelItem(dialects, name_obj) < 0)
|
||||
return PyErr_Format(error_obj, "unknown dialect");
|
||||
if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
|
||||
return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
@ -1400,7 +1430,7 @@ static PyObject *
|
|||
csv_field_size_limit(PyObject *module, PyObject *args)
|
||||
{
|
||||
PyObject *new_limit = NULL;
|
||||
long old_limit = field_limit;
|
||||
long old_limit = _csvstate_global->field_limit;
|
||||
|
||||
if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
|
||||
return NULL;
|
||||
|
@ -1410,9 +1440,9 @@ csv_field_size_limit(PyObject *module, PyObject *args)
|
|||
"limit must be an integer");
|
||||
return NULL;
|
||||
}
|
||||
field_limit = PyLong_AsLong(new_limit);
|
||||
if (field_limit == -1 && PyErr_Occurred()) {
|
||||
field_limit = old_limit;
|
||||
_csvstate_global->field_limit = PyLong_AsLong(new_limit);
|
||||
if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
|
||||
_csvstate_global->field_limit = old_limit;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -1551,17 +1581,16 @@ static struct PyMethodDef csv_methods[] = {
|
|||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
|
||||
static struct PyModuleDef _csvmodule = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_csv",
|
||||
csv_module_doc,
|
||||
-1,
|
||||
sizeof(_csvstate),
|
||||
csv_methods,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
_csv_traverse,
|
||||
_csv_clear,
|
||||
_csv_free
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
|
@ -1589,11 +1618,16 @@ PyInit__csv(void)
|
|||
MODULE_VERSION) == -1)
|
||||
return NULL;
|
||||
|
||||
/* Set the field limit */
|
||||
_csvstate(module)->field_limit = 128 * 1024;
|
||||
/* Do I still need to add this var to the Module Dict? */
|
||||
|
||||
/* Add _dialects dictionary */
|
||||
dialects = PyDict_New();
|
||||
if (dialects == NULL)
|
||||
_csvstate(module)->dialects = PyDict_New();
|
||||
if (_csvstate(module)->dialects == NULL)
|
||||
return NULL;
|
||||
if (PyModule_AddObject(module, "_dialects", dialects))
|
||||
Py_INCREF(_csvstate(module)->dialects);
|
||||
if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
|
||||
return NULL;
|
||||
|
||||
/* Add quote styles into dictionary */
|
||||
|
@ -1609,9 +1643,10 @@ PyInit__csv(void)
|
|||
return NULL;
|
||||
|
||||
/* Add the CSV exception object to the module. */
|
||||
error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
|
||||
if (error_obj == NULL)
|
||||
_csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
|
||||
if (_csvstate(module)->error_obj == NULL)
|
||||
return NULL;
|
||||
PyModule_AddObject(module, "Error", error_obj);
|
||||
Py_INCREF(_csvstate(module)->error_obj);
|
||||
PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
|
||||
return module;
|
||||
}
|
||||
|
|
|
@ -349,7 +349,8 @@ PyException_SetContext(PyObject *self, PyObject *context) {
|
|||
|
||||
static struct PyMemberDef BaseException_members[] = {
|
||||
{"__suppress_context__", T_BOOL,
|
||||
offsetof(PyBaseExceptionObject, suppress_context)}
|
||||
offsetof(PyBaseExceptionObject, suppress_context)},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -308,7 +308,7 @@ compute_range_item(rangeobject *r, PyObject *arg)
|
|||
static PyObject *
|
||||
range_item(rangeobject *r, Py_ssize_t i)
|
||||
{
|
||||
PyObject *res, *arg = PyLong_FromLong(i);
|
||||
PyObject *res, *arg = PyLong_FromSsize_t(i);
|
||||
if (!arg) {
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -215,7 +215,6 @@ InvalidContinuation:
|
|||
goto Return;
|
||||
}
|
||||
|
||||
#undef LONG_PTR_MASK
|
||||
#undef ASCII_CHAR_MASK
|
||||
|
||||
|
||||
|
@ -415,4 +414,152 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
|||
#undef MAX_SHORT_UNICHARS
|
||||
}
|
||||
|
||||
/* The pattern for constructing UCS2-repeated masks. */
|
||||
#if SIZEOF_LONG == 8
|
||||
# define UCS2_REPEAT_MASK 0x0001000100010001ul
|
||||
#elif SIZEOF_LONG == 4
|
||||
# define UCS2_REPEAT_MASK 0x00010001ul
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
/* The mask for fast checking. */
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
/* The mask for fast checking of whether a C 'long' contains a
|
||||
non-ASCII or non-Latin1 UTF16-encoded characters. */
|
||||
# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
|
||||
#else
|
||||
/* The mask for fast checking of whether a C 'long' may contain
|
||||
UTF16-encoded surrogate characters. This is an efficient heuristic,
|
||||
assuming that non-surrogate characters with a code point >= 0x8000 are
|
||||
rare in most input.
|
||||
*/
|
||||
# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * 0x8000u)
|
||||
#endif
|
||||
/* The mask for fast byte-swapping. */
|
||||
#define STRIPPED_MASK (UCS2_REPEAT_MASK * 0x00FFu)
|
||||
/* Swap bytes. */
|
||||
#define SWAB(value) ((((value) >> 8) & STRIPPED_MASK) | \
|
||||
(((value) & STRIPPED_MASK) << 8))
|
||||
|
||||
Py_LOCAL_INLINE(Py_UCS4)
|
||||
STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
|
||||
STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
|
||||
int native_ordering)
|
||||
{
|
||||
Py_UCS4 ch;
|
||||
const unsigned char *aligned_end =
|
||||
(const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
|
||||
const unsigned char *q = *inptr;
|
||||
STRINGLIB_CHAR *p = dest + *outpos;
|
||||
/* Offsets from q for retrieving byte pairs in the right order. */
|
||||
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
|
||||
int ihi = !!native_ordering, ilo = !native_ordering;
|
||||
#else
|
||||
int ihi = !native_ordering, ilo = !!native_ordering;
|
||||
#endif
|
||||
--e;
|
||||
|
||||
while (q < e) {
|
||||
Py_UCS4 ch2;
|
||||
/* First check for possible aligned read of a C 'long'. Unaligned
|
||||
reads are more expensive, better to defer to another iteration. */
|
||||
if (!((size_t) q & LONG_PTR_MASK)) {
|
||||
/* Fast path for runs of in-range non-surrogate chars. */
|
||||
register const unsigned char *_q = q;
|
||||
while (_q < aligned_end) {
|
||||
unsigned long block = * (unsigned long *) _q;
|
||||
if (native_ordering) {
|
||||
/* Can use buffer directly */
|
||||
if (block & FAST_CHAR_MASK)
|
||||
break;
|
||||
}
|
||||
else {
|
||||
/* Need to byte-swap */
|
||||
if (block & SWAB(FAST_CHAR_MASK))
|
||||
break;
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
block >>= 8;
|
||||
#else
|
||||
block = SWAB(block);
|
||||
#endif
|
||||
}
|
||||
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
|
||||
# if SIZEOF_LONG == 4
|
||||
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
|
||||
p[1] = (STRINGLIB_CHAR)(block >> 16);
|
||||
# elif SIZEOF_LONG == 8
|
||||
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
|
||||
p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
|
||||
p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
|
||||
p[3] = (STRINGLIB_CHAR)(block >> 48);
|
||||
# endif
|
||||
#else
|
||||
# if SIZEOF_LONG == 4
|
||||
p[0] = (STRINGLIB_CHAR)(block >> 16);
|
||||
p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
|
||||
# elif SIZEOF_LONG == 8
|
||||
p[0] = (STRINGLIB_CHAR)(block >> 48);
|
||||
p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
|
||||
p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
|
||||
p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
|
||||
# endif
|
||||
#endif
|
||||
_q += SIZEOF_LONG;
|
||||
p += SIZEOF_LONG / 2;
|
||||
}
|
||||
q = _q;
|
||||
if (q >= e)
|
||||
break;
|
||||
}
|
||||
|
||||
ch = (q[ihi] << 8) | q[ilo];
|
||||
q += 2;
|
||||
if (!Py_UNICODE_IS_SURROGATE(ch)) {
|
||||
#if STRINGLIB_SIZEOF_CHAR < 2
|
||||
if (ch > STRINGLIB_MAX_CHAR)
|
||||
/* Out-of-range */
|
||||
goto Return;
|
||||
#endif
|
||||
*p++ = (STRINGLIB_CHAR)ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* UTF-16 code pair: */
|
||||
if (q >= e)
|
||||
goto UnexpectedEnd;
|
||||
if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
|
||||
goto IllegalEncoding;
|
||||
ch2 = (q[ihi] << 8) | q[ilo];
|
||||
q += 2;
|
||||
if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
|
||||
goto IllegalSurrogate;
|
||||
ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
|
||||
#if STRINGLIB_SIZEOF_CHAR < 4
|
||||
/* Out-of-range */
|
||||
goto Return;
|
||||
#else
|
||||
*p++ = (STRINGLIB_CHAR)ch;
|
||||
#endif
|
||||
}
|
||||
ch = 0;
|
||||
Return:
|
||||
*inptr = q;
|
||||
*outpos = p - dest;
|
||||
return ch;
|
||||
UnexpectedEnd:
|
||||
ch = 1;
|
||||
goto Return;
|
||||
IllegalEncoding:
|
||||
ch = 2;
|
||||
goto Return;
|
||||
IllegalSurrogate:
|
||||
ch = 3;
|
||||
goto Return;
|
||||
}
|
||||
#undef UCS2_REPEAT_MASK
|
||||
#undef FAST_CHAR_MASK
|
||||
#undef STRIPPED_MASK
|
||||
#undef SWAB
|
||||
#undef LONG_PTR_MASK
|
||||
#endif /* STRINGLIB_IS_UNICODE */
|
||||
|
|
|
@ -5195,25 +5195,6 @@ PyUnicode_DecodeUTF16(const char *s,
|
|||
return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
|
||||
}
|
||||
|
||||
/* Two masks for fast checking of whether a C 'long' may contain
|
||||
UTF16-encoded surrogate characters. This is an efficient heuristic,
|
||||
assuming that non-surrogate characters with a code point >= 0x8000 are
|
||||
rare in most input.
|
||||
FAST_CHAR_MASK is used when the input is in native byte ordering,
|
||||
SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering.
|
||||
*/
|
||||
#if (SIZEOF_LONG == 8)
|
||||
# define FAST_CHAR_MASK 0x8000800080008000L
|
||||
# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L
|
||||
# define STRIPPED_MASK 0x00FF00FF00FF00FFL
|
||||
#elif (SIZEOF_LONG == 4)
|
||||
# define FAST_CHAR_MASK 0x80008000L
|
||||
# define SWAPPED_FAST_CHAR_MASK 0x00800080L
|
||||
# define STRIPPED_MASK 0x00FF00FFL
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
PyObject *
|
||||
PyUnicode_DecodeUTF16Stateful(const char *s,
|
||||
Py_ssize_t size,
|
||||
|
@ -5226,30 +5207,15 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
|
|||
Py_ssize_t endinpos;
|
||||
Py_ssize_t outpos;
|
||||
PyObject *unicode;
|
||||
const unsigned char *q, *e, *aligned_end;
|
||||
const unsigned char *q, *e;
|
||||
int bo = 0; /* assume native ordering by default */
|
||||
int native_ordering = 0;
|
||||
int native_ordering;
|
||||
const char *errmsg = "";
|
||||
/* Offsets from q for retrieving byte pairs in the right order. */
|
||||
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
|
||||
int ihi = 1, ilo = 0;
|
||||
#else
|
||||
int ihi = 0, ilo = 1;
|
||||
#endif
|
||||
PyObject *errorHandler = NULL;
|
||||
PyObject *exc = NULL;
|
||||
|
||||
/* Note: size will always be longer than the resulting Unicode
|
||||
character count */
|
||||
unicode = PyUnicode_New(size, 127);
|
||||
if (!unicode)
|
||||
return NULL;
|
||||
if (size == 0)
|
||||
return unicode;
|
||||
outpos = 0;
|
||||
|
||||
q = (unsigned char *)s;
|
||||
e = q + size - 1;
|
||||
e = q + size;
|
||||
|
||||
if (byteorder)
|
||||
bo = *byteorder;
|
||||
|
@ -5258,10 +5224,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
|
|||
byte order setting accordingly. In native mode, the leading BOM
|
||||
mark is skipped, in all other modes, it is copied to the output
|
||||
stream as-is (giving a ZWNBSP character). */
|
||||
if (bo == 0) {
|
||||
if (size >= 2) {
|
||||
const Py_UCS4 bom = (q[ihi] << 8) | q[ilo];
|
||||
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
|
||||
if (bo == 0 && size >= 2) {
|
||||
const Py_UCS4 bom = (q[1] << 8) | q[0];
|
||||
if (bom == 0xFEFF) {
|
||||
q += 2;
|
||||
bo = -1;
|
||||
|
@ -5270,143 +5234,88 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
|
|||
q += 2;
|
||||
bo = 1;
|
||||
}
|
||||
#else
|
||||
if (bom == 0xFEFF) {
|
||||
q += 2;
|
||||
bo = 1;
|
||||
}
|
||||
else if (bom == 0xFFFE) {
|
||||
q += 2;
|
||||
bo = -1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (byteorder)
|
||||
*byteorder = bo;
|
||||
}
|
||||
|
||||
if (bo == -1) {
|
||||
/* force LE */
|
||||
ihi = 1;
|
||||
ilo = 0;
|
||||
}
|
||||
else if (bo == 1) {
|
||||
/* force BE */
|
||||
ihi = 0;
|
||||
ilo = 1;
|
||||
if (q == e) {
|
||||
if (consumed)
|
||||
*consumed = size;
|
||||
Py_INCREF(unicode_empty);
|
||||
return unicode_empty;
|
||||
}
|
||||
|
||||
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
|
||||
native_ordering = ilo < ihi;
|
||||
native_ordering = bo <= 0;
|
||||
#else
|
||||
native_ordering = ilo > ihi;
|
||||
native_ordering = bo >= 0;
|
||||
#endif
|
||||
|
||||
aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
|
||||
while (q < e) {
|
||||
Py_UCS4 ch;
|
||||
/* First check for possible aligned read of a C 'long'. Unaligned
|
||||
reads are more expensive, better to defer to another iteration. */
|
||||
if (!((size_t) q & LONG_PTR_MASK)) {
|
||||
/* Fast path for runs of non-surrogate chars. */
|
||||
register const unsigned char *_q = q;
|
||||
/* Note: size will always be longer than the resulting Unicode
|
||||
character count */
|
||||
unicode = PyUnicode_New((e - q + 1) / 2, 127);
|
||||
if (!unicode)
|
||||
return NULL;
|
||||
|
||||
outpos = 0;
|
||||
while (1) {
|
||||
Py_UCS4 ch = 0;
|
||||
if (e - q >= 2) {
|
||||
int kind = PyUnicode_KIND(unicode);
|
||||
void *data = PyUnicode_DATA(unicode);
|
||||
while (_q < aligned_end) {
|
||||
unsigned long block = * (unsigned long *) _q;
|
||||
Py_UCS4 maxch;
|
||||
if (native_ordering) {
|
||||
/* Can use buffer directly */
|
||||
if (block & FAST_CHAR_MASK)
|
||||
break;
|
||||
if (kind == PyUnicode_1BYTE_KIND) {
|
||||
if (PyUnicode_IS_ASCII(unicode))
|
||||
ch = asciilib_utf16_decode(&q, e,
|
||||
PyUnicode_1BYTE_DATA(unicode), &outpos,
|
||||
native_ordering);
|
||||
else
|
||||
ch = ucs1lib_utf16_decode(&q, e,
|
||||
PyUnicode_1BYTE_DATA(unicode), &outpos,
|
||||
native_ordering);
|
||||
} else if (kind == PyUnicode_2BYTE_KIND) {
|
||||
ch = ucs2lib_utf16_decode(&q, e,
|
||||
PyUnicode_2BYTE_DATA(unicode), &outpos,
|
||||
native_ordering);
|
||||
} else {
|
||||
assert(kind == PyUnicode_4BYTE_KIND);
|
||||
ch = ucs4lib_utf16_decode(&q, e,
|
||||
PyUnicode_4BYTE_DATA(unicode), &outpos,
|
||||
native_ordering);
|
||||
}
|
||||
else {
|
||||
/* Need to byte-swap */
|
||||
if (block & SWAPPED_FAST_CHAR_MASK)
|
||||
break;
|
||||
block = ((block >> 8) & STRIPPED_MASK) |
|
||||
((block & STRIPPED_MASK) << 8);
|
||||
}
|
||||
maxch = (Py_UCS2)(block & 0xFFFF);
|
||||
#if SIZEOF_LONG == 8
|
||||
ch = (Py_UCS2)((block >> 16) & 0xFFFF);
|
||||
maxch = MAX_MAXCHAR(maxch, ch);
|
||||
ch = (Py_UCS2)((block >> 32) & 0xFFFF);
|
||||
maxch = MAX_MAXCHAR(maxch, ch);
|
||||
ch = (Py_UCS2)(block >> 48);
|
||||
maxch = MAX_MAXCHAR(maxch, ch);
|
||||
#else
|
||||
ch = (Py_UCS2)(block >> 16);
|
||||
maxch = MAX_MAXCHAR(maxch, ch);
|
||||
#endif
|
||||
if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
|
||||
if (unicode_widen(&unicode, outpos, maxch) < 0)
|
||||
goto onError;
|
||||
kind = PyUnicode_KIND(unicode);
|
||||
data = PyUnicode_DATA(unicode);
|
||||
}
|
||||
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
|
||||
#if SIZEOF_LONG == 8
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
|
||||
#else
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
|
||||
#endif
|
||||
#else
|
||||
#if SIZEOF_LONG == 8
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
|
||||
#else
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
|
||||
#endif
|
||||
PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
|
||||
#endif
|
||||
_q += SIZEOF_LONG;
|
||||
}
|
||||
q = _q;
|
||||
if (q >= e)
|
||||
break;
|
||||
}
|
||||
ch = (q[ihi] << 8) | q[ilo];
|
||||
|
||||
q += 2;
|
||||
|
||||
if (!Py_UNICODE_IS_SURROGATE(ch)) {
|
||||
switch (ch)
|
||||
{
|
||||
case 0:
|
||||
/* remaining byte at the end? (size should be even) */
|
||||
if (q == e || consumed)
|
||||
goto End;
|
||||
errmsg = "truncated data";
|
||||
startinpos = ((const char *)q) - starts;
|
||||
endinpos = ((const char *)e) - starts;
|
||||
break;
|
||||
/* The remaining input chars are ignored if the callback
|
||||
chooses to skip the input */
|
||||
case 1:
|
||||
errmsg = "unexpected end of data";
|
||||
startinpos = ((const char *)q) - 2 - starts;
|
||||
endinpos = ((const char *)e) - starts;
|
||||
break;
|
||||
case 2:
|
||||
errmsg = "illegal encoding";
|
||||
startinpos = ((const char *)q) - 2 - starts;
|
||||
endinpos = startinpos + 2;
|
||||
break;
|
||||
case 3:
|
||||
errmsg = "illegal UTF-16 surrogate";
|
||||
startinpos = ((const char *)q) - 4 - starts;
|
||||
endinpos = startinpos + 2;
|
||||
break;
|
||||
default:
|
||||
if (unicode_putchar(&unicode, &outpos, ch) < 0)
|
||||
goto onError;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* UTF-16 code pair: */
|
||||
if (q > e) {
|
||||
errmsg = "unexpected end of data";
|
||||
startinpos = (((const char *)q) - 2) - starts;
|
||||
endinpos = ((const char *)e) + 1 - starts;
|
||||
goto utf16Error;
|
||||
}
|
||||
if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) {
|
||||
Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo];
|
||||
q += 2;
|
||||
if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) {
|
||||
if (unicode_putchar(&unicode, &outpos,
|
||||
Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0)
|
||||
goto onError;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
errmsg = "illegal UTF-16 surrogate";
|
||||
startinpos = (((const char *)q)-4)-starts;
|
||||
endinpos = startinpos+2;
|
||||
goto utf16Error;
|
||||
}
|
||||
|
||||
}
|
||||
errmsg = "illegal encoding";
|
||||
startinpos = (((const char *)q)-2)-starts;
|
||||
endinpos = startinpos+2;
|
||||
/* Fall through to report the error */
|
||||
|
||||
utf16Error:
|
||||
if (unicode_decode_call_errorhandler(
|
||||
errors,
|
||||
&errorHandler,
|
||||
|
@ -5421,33 +5330,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
|
|||
&outpos))
|
||||
goto onError;
|
||||
}
|
||||
/* remaining byte at the end? (size should be even) */
|
||||
if (e == q) {
|
||||
if (!consumed) {
|
||||
errmsg = "truncated data";
|
||||
startinpos = ((const char *)q) - starts;
|
||||
endinpos = ((const char *)e) + 1 - starts;
|
||||
if (unicode_decode_call_errorhandler(
|
||||
errors,
|
||||
&errorHandler,
|
||||
"utf16", errmsg,
|
||||
&starts,
|
||||
(const char **)&e,
|
||||
&startinpos,
|
||||
&endinpos,
|
||||
&exc,
|
||||
(const char **)&q,
|
||||
&unicode,
|
||||
&outpos))
|
||||
goto onError;
|
||||
/* The remaining input chars are ignored if the callback
|
||||
chooses to skip the input */
|
||||
}
|
||||
}
|
||||
|
||||
if (byteorder)
|
||||
*byteorder = bo;
|
||||
|
||||
End:
|
||||
if (consumed)
|
||||
*consumed = (const char *)q-starts;
|
||||
|
||||
|
@ -5466,9 +5350,6 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
#undef FAST_CHAR_MASK
|
||||
#undef SWAPPED_FAST_CHAR_MASK
|
||||
|
||||
PyObject *
|
||||
_PyUnicode_EncodeUTF16(PyObject *str,
|
||||
const char *errors,
|
||||
|
|
|
@ -25,6 +25,8 @@ def main(input_path, output_path):
|
|||
with open(output_path, 'w', encoding='utf-8') as output_file:
|
||||
output_file.write('\n'.join(lines))
|
||||
output_file.write('/* Mercurial binary marker: \x00 */')
|
||||
# Avoid a compiler warning for lack of EOL
|
||||
output_file.write('\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue