merge heads

2012-05-16 16:03:07 +02:00 · 2012-05-16 16:03:07 +02:00 · 26fd8feb5e
parent e126678216 7ca29507f6
commit 26fd8feb5e
18 changed files with 498 additions and 251 deletions
--- a/.hgignore
+++ b/.hgignore
@ -32,7 +32,6 @@ Modules/Setup.local
 Modules/config.c
 Modules/ld_so_aix$
 Parser/pgen$
-PCbuild/amd64/
 ^core
 ^python-gdb.py
 ^python.exe-gdb.py
@ -56,6 +55,12 @@ PC/python_nt*.h
 PC/pythonnt_rc*.h
 PC/*.obj
 PC/*.exe
+PC/*/*.user
+PC/*/*.ncb
+PC/*/*.suo
+PC/*/Win32-temp-*
+PC/*/x64-temp-*
+PC/*/amd64
 PCbuild/*.exe
 PCbuild/*.dll
 PCbuild/*.pdb
@ -69,6 +74,8 @@ PCbuild/*.suo
 PCbuild/*.*sdf
 PCbuild/Win32-temp-*
 PCbuild/x64-temp-*
+PCbuild/amd64
+BuildLog.htm
 __pycache__
 Modules/_testembed
 .coverage
--- a/Doc/library/email.generator.rst
+++ b/Doc/library/email.generator.rst
@ -17,10 +17,10 @@ yourself.  However the bundled generator knows how to generate most email in a
 standards-compliant way, should handle MIME and non-MIME email messages just
 fine, and is designed so that the transformation from flat text, to a message
 structure via the :class:`~email.parser.Parser` class, and back to flat text,
-is idempotent (the input is identical to the output).  On the other hand, using
-the Generator on a :class:`~email.message.Message` constructed by program may
-result in changes to the :class:`~email.message.Message` object as defaults are
-filled in.
+is idempotent (the input is identical to the output) [#]_.  On the other hand,
+using the Generator on a :class:`~email.message.Message` constructed by program
+may result in changes to the :class:`~email.message.Message` object as defaults
+are filled in.

 :class:`bytes` output can be generated using the :class:`BytesGenerator` class.
 If the message object structure contains non-ASCII bytes, this generator's
@ -223,3 +223,12 @@ representing the part.
   The default value for *fmt* is ``None``, meaning ::

      [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+
+
+.. rubric:: Footnotes
+
+.. [#] This statement assumes that you use the appropriate setting for the
+       ``unixfrom`` argument, and that you set maxheaderlen=0 (which will
+       preserve whatever the input line lengths were).  It is also not strictly
+       true, since in many cases runs of whitespace in headers are collapsed
+       into single blanks.  The latter is a bug that will eventually be fixed.
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@ -339,6 +339,15 @@ and also the following constants for integer status codes:
 | :const:`UPGRADE_REQUIRED`                | ``426`` | HTTP Upgrade to TLS,                                                  |
 |                                          |         | :rfc:`2817`, Section 6                                                |
 +------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`PRECONDITION_REQUIRED`           | ``428`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 3                                                |
+------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`TOO_MANY_REQUESTS`               | ``429`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 4                                                |
+------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`REQUEST_HEADER_FIELDS_TOO_LARGE` | ``431`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 5                                                |
+------------------------------------------+---------+-----------------------------------------------------------------------+
 | :const:`INTERNAL_SERVER_ERROR`           | ``500`` | HTTP/1.1, `RFC 2616, Section                                          |
 |                                          |         | 10.5.1                                                                |
 |                                          |         | <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.1>`_  |
@ -369,6 +378,12 @@ and also the following constants for integer status codes:
 | :const:`NOT_EXTENDED`                    | ``510`` | An HTTP Extension Framework,                                          |
 |                                          |         | :rfc:`2774`, Section 7                                                |
 +------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`NETWORK_AUTHENTICATION_REQUIRED` | ``511`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 6                                                |
+------------------------------------------+---------+-----------------------------------------------------------------------+
+
+  .. versionchanged:: 3.3
+     Added codes ``428``, ``429``, ``431`` and ``511`` from :rfc:`6585`.


 .. data:: responses
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@ -141,6 +141,9 @@ UNPROCESSABLE_ENTITY = 422
 LOCKED = 423
 FAILED_DEPENDENCY = 424
 UPGRADE_REQUIRED = 426
+PRECONDITION_REQUIRED = 428
+TOO_MANY_REQUESTS = 429
+REQUEST_HEADER_FIELDS_TOO_LARGE = 431

 # server error
 INTERNAL_SERVER_ERROR = 500
@ -151,6 +154,7 @@ GATEWAY_TIMEOUT = 504
 HTTP_VERSION_NOT_SUPPORTED = 505
 INSUFFICIENT_STORAGE = 507
 NOT_EXTENDED = 510
+NETWORK_AUTHENTICATION_REQUIRED = 511

 # Mapping status codes to official W3C names
 responses = {
@ -192,6 +196,9 @@ responses = {
    415: 'Unsupported Media Type',
    416: 'Requested Range Not Satisfiable',
    417: 'Expectation Failed',
+    428: 'Precondition Required',
+    429: 'Too Many Requests',
+    431: 'Request Header Fields Too Large',

    500: 'Internal Server Error',
    501: 'Not Implemented',
@ -199,6 +206,7 @@ responses = {
    503: 'Service Unavailable',
    504: 'Gateway Timeout',
    505: 'HTTP Version Not Supported',
+    511: 'Network Authentication Required',
 }

 # maximal amount of data to read at one time in _safe_read
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@ -573,7 +573,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):

    # Table mapping response codes to messages; entries have the
    # form {code: (shortmessage, longmessage)}.
-    # See RFC 2616.
+    # See RFC 2616 and 6585.
    responses = {
        100: ('Continue', 'Request received, please continue'),
        101: ('Switching Protocols',
@ -628,6 +628,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
              'Cannot satisfy request range.'),
        417: ('Expectation Failed',
              'Expect condition could not be satisfied.'),
+        428: ('Precondition Required',
+              'The origin server requires the request to be conditional.'),
+        429: ('Too Many Requests', 'The user has sent too many requests '
+              'in a given amount of time ("rate limiting").'),
+        431: ('Request Header Fields Too Large', 'The server is unwilling to '
+              'process the request because its header fields are too large.'),

        500: ('Internal Server Error', 'Server got itself in trouble'),
        501: ('Not Implemented',
@ -638,6 +644,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
        504: ('Gateway Timeout',
              'The gateway server did not receive a timely response'),
        505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
+        511: ('Network Authentication Required',
+              'The client needs to authenticate to gain network access.'),
        }


--- a/Lib/test/test_bisect.py
+++ b/Lib/test/test_bisect.py
@ -23,6 +23,28 @@ del sys.modules['bisect']
 import bisect as c_bisect


+class Range(object):
+    """A trivial range()-like object without any integer width limitations."""
+    def __init__(self, start, stop):
+        self.start = start
+        self.stop = stop
+        self.last_insert = None
+
+    def __len__(self):
+        return self.stop - self.start
+
+    def __getitem__(self, idx):
+        n = self.stop - self.start
+        if idx < 0:
+            idx += n
+        if idx >= n:
+            raise IndexError(idx)
+        return self.start + idx
+
+    def insert(self, idx, item):
+        self.last_insert = idx, item
+
+
 class TestBisect(unittest.TestCase):
    module = None

@ -125,9 +147,28 @@ class TestBisect(unittest.TestCase):
    def test_large_range(self):
        # Issue 13496
        mod = self.module
-        data = range(sys.maxsize-1)
-        self.assertEqual(mod.bisect_left(data, sys.maxsize-3), sys.maxsize-3)
-        self.assertEqual(mod.bisect_right(data, sys.maxsize-3), sys.maxsize-2)
+        n = sys.maxsize
+        data = range(n-1)
+        self.assertEqual(mod.bisect_left(data, n-3), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3), n-2)
+        self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
+
+    def test_large_pyrange(self):
+        # Same as above, but without C-imposed limits on range() parameters
+        mod = self.module
+        n = sys.maxsize
+        data = Range(0, n-1)
+        self.assertEqual(mod.bisect_left(data, n-3), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3), n-2)
+        self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
+        x = n - 100
+        mod.insort_left(data, x, x - 50, x + 50)
+        self.assertEqual(data.last_insert, (x, x))
+        x = n - 200
+        mod.insort_right(data, x, x - 50, x + 50)
+        self.assertEqual(data.last_insert, (x + 1, x))

    def test_random(self, n=25):
        from random import randrange
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@ -137,8 +137,57 @@ class PkgutilPEP302Tests(unittest.TestCase):
        self.assertEqual(foo.loads, 1)
        del sys.modules['foo']

+
+class ExtendPathTests(unittest.TestCase):
+    def create_init(self, pkgname):
+        dirname = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, dirname)
+        sys.path.insert(0, dirname)
+
+        pkgdir = os.path.join(dirname, pkgname)
+        os.mkdir(pkgdir)
+        with open(os.path.join(pkgdir, '__init__.py'), 'w') as fl:
+            fl.write('from pkgutil import extend_path\n__path__ = extend_path(__path__, __name__)\n')
+
+        return dirname
+
+    def create_submodule(self, dirname, pkgname, submodule_name, value):
+        module_name = os.path.join(dirname, pkgname, submodule_name + '.py')
+        with open(module_name, 'w') as fl:
+            print('value={}'.format(value), file=fl)
+
+    def setUp(self):
+        # Create 2 directories on sys.path
+        self.pkgname = 'foo'
+        self.dirname_0 = self.create_init(self.pkgname)
+        self.dirname_1 = self.create_init(self.pkgname)
+
+    def tearDown(self):
+        del sys.path[0]
+        del sys.path[0]
+        del sys.modules['foo']
+        del sys.modules['foo.bar']
+        del sys.modules['foo.baz']
+
+    def test_simple(self):
+        self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0)
+        self.create_submodule(self.dirname_1, self.pkgname, 'baz', 1)
+        import foo.bar
+        import foo.baz
+        # Ensure we read the expected values
+        self.assertEqual(foo.bar.value, 0)
+        self.assertEqual(foo.baz.value, 1)
+
+        # Ensure the path is set up correctly
+        self.assertEqual(sorted(foo.__path__),
+                         sorted([os.path.join(self.dirname_0, self.pkgname),
+                                 os.path.join(self.dirname_1, self.pkgname)]))
+
+    # XXX: test .pkg files
+
+
 def test_main():
-    run_unittest(PkgutilTests, PkgutilPEP302Tests)
+    run_unittest(PkgutilTests, PkgutilPEP302Tests, ExtendPathTests)
    # this is necessary if test is run repeated (like when finding leaks)
    import zipimport
    zipimport._zip_directory_cache.clear()
--- a/Lib/tkinter/init.py
+++ b/Lib/tkinter/init.py
@ -540,12 +540,19 @@ class Misc:

        The type keyword specifies the form in which the data is
        to be returned and should be an atom name such as STRING
-        or FILE_NAME.  Type defaults to STRING.
+        or FILE_NAME.  Type defaults to STRING, except on X11, where the default
+        is to try UTF8_STRING and fall back to STRING.

        This command is equivalent to:

        selection_get(CLIPBOARD)
        """
+        if 'type' not in kw and self._windowingsystem == 'x11':
+            try:
+                kw['type'] = 'UTF8_STRING'
+                return self.tk.call(('clipboard', 'get') + self._options(kw))
+            except TclError:
+                del kw['type']
        return self.tk.call(('clipboard', 'get') + self._options(kw))

    def clipboard_clear(self, **kw):
@ -627,8 +634,16 @@ class Misc:
        A keyword parameter selection specifies the name of
        the selection and defaults to PRIMARY.  A keyword
        parameter displayof specifies a widget on the display
-        to use."""
+        to use. A keyword parameter type specifies the form of data to be
+        fetched, defaulting to STRING except on X11, where UTF8_STRING is tried
+        before STRING."""
        if 'displayof' not in kw: kw['displayof'] = self._w
+        if 'type' not in kw and self._windowingsystem == 'x11':
+            try:
+                kw['type'] = 'UTF8_STRING'
+                return self.tk.call(('selection', 'get') + self._options(kw))
+            except TclError:
+                del kw['type']
        return self.tk.call(('selection', 'get') + self._options(kw))
    def selection_handle(self, command, **kw):
        """Specify a function COMMAND to call if the X
@ -1043,6 +1058,15 @@ class Misc:
        if displayof is None:
            return ('-displayof', self._w)
        return ()
+    @property
+    def _windowingsystem(self):
+        """Internal function."""
+        try:
+            return self._root()._windowingsystem_cached
+        except AttributeError:
+            ws = self._root()._windowingsystem_cached = \
+                        self.tk.call('tk', 'windowingsystem')
+            return ws
    def _options(self, cnf, kw = None):
        """Internal function."""
        if kw:
--- a/Misc/ACKS
+++ b/Misc/ACKS
@ -919,6 +919,7 @@ Ralf Schmitt
 Michael Schneider
 Peter Schneider-Kamp
 Arvin Schnell
+Robin Schreiber
 Chad J. Schroeder
 Sam Schulenburg
 Stefan Schwarzer
@ -1129,6 +1130,7 @@ Florent Xicluna
 Hirokazu Yamamoto
 Ka-Ping Yee
 Jason Yeo
+EungJun Yi
 Bob Yodlowski
 Danny Yoo
 George Yoshida
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -10,6 +10,9 @@ What's New in Python 3.3.0 Alpha 4?
 Core and Builtins
 -----------------

+- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
+  Patch by Serhiy Storchaka.
+
 - asdl_seq and asdl_int_seq are now Py_ssize_t sized.

 - Issue #14133 (PEP 415): Implement suppression of __context__ display with an
@ -31,6 +34,21 @@ Core and Builtins
 Library
 -------

+- Issue #14829: Fix bisect and range() indexing with large indices
+  (>= 2 ** 32) under 64-bit Windows.
+
+- Issue #14732: The _csv module now uses PEP 3121 module initialization.
+  Patch by Robin Schreiber.
+
+- Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server
+  and http.client. Patch by EungJun Yi.
+
+- Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when
+  accessing the Tk clipboard.  Modify clipboad_get() to first request type
+  UTF8_STRING when no specific type is requested in an X11 windowing
+  environment, falling back to the current default type STRING if that fails.
+  Original patch by Thomas Kluyver.
+
 - Issue #14773: Fix os.fwalk() failing on dangling symlinks.

 - Issue #12541: Be lenient with quotes around Realm field of HTTP Basic
--- a/Modules/_bisectmodule.c
+++ b/Modules/_bisectmodule.c
@ -3,6 +3,7 @@
 Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru).
 */

+#define PY_SSIZE_T_CLEAN
 #include "Python.h"

 static Py_ssize_t
@ -195,8 +196,7 @@ insort_left(PyObject *self, PyObject *args, PyObject *kw)
            return NULL;
    } else {
        _Py_IDENTIFIER(insert);
-
-        result = _PyObject_CallMethodId(list, &PyId_insert, "iO", index, item);
+        result = _PyObject_CallMethodId(list, &PyId_insert, "nO", index, item);
        if (result == NULL)
            return NULL;
        Py_DECREF(result);
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@ -16,9 +16,39 @@ module instead.
 #define IS_BASESTRING(o) \
    PyUnicode_Check(o)

-static PyObject *error_obj;     /* CSV exception */
-static PyObject *dialects;      /* Dialect registry */
-static long field_limit = 128 * 1024;   /* max parsed field size */
+typedef struct {
+    PyObject *error_obj;   /* CSV exception */
+    PyObject *dialects;   /* Dialect registry */
+    long field_limit;   /* max parsed field size */
+} _csvstate;
+
+#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
+
+static int
+_csv_clear(PyObject *m)
+{
+    Py_CLEAR(_csvstate(m)->error_obj);
+    Py_CLEAR(_csvstate(m)->dialects);
+    return 0;
+}
+
+static int
+_csv_traverse(PyObject *m, visitproc visit, void *arg)
+{
+    Py_VISIT(_csvstate(m)->error_obj);
+    Py_VISIT(_csvstate(m)->dialects);
+    return 0;
+}
+
+static void
+_csv_free(void *m)
+{
+   _csv_clear((PyObject *)m);
+}
+
+static struct PyModuleDef _csvmodule;
+
+#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))

 typedef enum {
    START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
@ -103,10 +133,10 @@ get_dialect_from_registry(PyObject * name_obj)
 {
    PyObject *dialect_obj;

-    dialect_obj = PyDict_GetItem(dialects, name_obj);
+    dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
    if (dialect_obj == NULL) {
        if (!PyErr_Occurred())
-            PyErr_Format(error_obj, "unknown dialect");
+            PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
    }
    else
        Py_INCREF(dialect_obj);
@ -544,9 +574,9 @@ parse_grow_buff(ReaderObj *self)
 static int
 parse_add_char(ReaderObj *self, Py_UCS4 c)
 {
-    if (self->field_len >= field_limit) {
-        PyErr_Format(error_obj, "field larger than field limit (%ld)",
-                     field_limit);
+    if (self->field_len >= _csvstate_global->field_limit) {
+        PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
+                     _csvstate_global->field_limit);
        return -1;
    }
    if (self->field_len == self->field_size && !parse_grow_buff(self))
@ -703,7 +733,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
        }
        else {
            /* illegal */
-            PyErr_Format(error_obj, "'%c' expected after '%c'",
+            PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
                            dialect->delimiter,
                            dialect->quotechar);
            return -1;
@ -716,7 +746,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
        else if (c == '\0')
            self->state = START_RECORD;
        else {
-            PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
+            PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
            return -1;
        }
        break;
@ -755,12 +785,12 @@ Reader_iternext(ReaderObj *self)
        if (lineobj == NULL) {
            /* End of input OR exception */
            if (!PyErr_Occurred() && self->field_len != 0)
-                PyErr_Format(error_obj,
+                PyErr_Format(_csvstate_global->error_obj,
                             "newline inside string");
            return NULL;
        }
        if (!PyUnicode_Check(lineobj)) {
-            PyErr_Format(error_obj,
+            PyErr_Format(_csvstate_global->error_obj,
                         "iterator should return strings, "
                         "not %.200s "
                         "(did you open the file in text mode?)",
@ -778,7 +808,7 @@ Reader_iternext(ReaderObj *self)
            c = PyUnicode_READ(kind, data, pos);
            if (c == '\0') {
                Py_DECREF(lineobj);
-                PyErr_Format(error_obj,
+                PyErr_Format(_csvstate_global->error_obj,
                             "line contains NULL byte");
                goto err;
            }
@ -994,7 +1024,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
            }
            if (want_escape) {
                if (!dialect->escapechar) {
-                    PyErr_Format(error_obj,
+                    PyErr_Format(_csvstate_global->error_obj,
                                 "need to escape, but no escapechar set");
                    return -1;
                }
@ -1010,7 +1040,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
     */
    if (i == 0 && quote_empty) {
        if (dialect->quoting == QUOTE_NONE) {
-            PyErr_Format(error_obj,
+            PyErr_Format(_csvstate_global->error_obj,
                "single empty field record must be quoted");
            return -1;
        }
@ -1127,7 +1157,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
    PyObject *line, *result;

    if (!PySequence_Check(seq))
-        return PyErr_Format(error_obj, "sequence expected");
+        return PyErr_Format(_csvstate_global->error_obj, "sequence expected");

    len = PySequence_Length(seq);
    if (len < 0)
@ -1353,7 +1383,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
 static PyObject *
 csv_list_dialects(PyObject *module, PyObject *args)
 {
-    return PyDict_Keys(dialects);
+    return PyDict_Keys(_csvstate_global->dialects);
 }

 static PyObject *
@ -1372,7 +1402,7 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
    dialect = _call_dialect(dialect_obj, kwargs);
    if (dialect == NULL)
        return NULL;
-    if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
+    if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
        Py_DECREF(dialect);
        return NULL;
    }
@ -1384,8 +1414,8 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
 static PyObject *
 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
 {
-    if (PyDict_DelItem(dialects, name_obj) < 0)
-        return PyErr_Format(error_obj, "unknown dialect");
+    if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
+        return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
    Py_INCREF(Py_None);
    return Py_None;
 }
@ -1400,7 +1430,7 @@ static PyObject *
 csv_field_size_limit(PyObject *module, PyObject *args)
 {
    PyObject *new_limit = NULL;
-    long old_limit = field_limit;
+    long old_limit = _csvstate_global->field_limit;

    if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
        return NULL;
@ -1410,9 +1440,9 @@ csv_field_size_limit(PyObject *module, PyObject *args)
                         "limit must be an integer");
            return NULL;
        }
-        field_limit = PyLong_AsLong(new_limit);
-        if (field_limit == -1 && PyErr_Occurred()) {
-            field_limit = old_limit;
+        _csvstate_global->field_limit = PyLong_AsLong(new_limit);
+        if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
+            _csvstate_global->field_limit = old_limit;
            return NULL;
        }
    }
@ -1551,17 +1581,16 @@ static struct PyMethodDef csv_methods[] = {
    { NULL, NULL }
 };

-
 static struct PyModuleDef _csvmodule = {
    PyModuleDef_HEAD_INIT,
    "_csv",
    csv_module_doc,
-    -1,
+    sizeof(_csvstate),
    csv_methods,
    NULL,
-    NULL,
-    NULL,
-    NULL
+    _csv_traverse,
+    _csv_clear,
+    _csv_free
 };

 PyMODINIT_FUNC
@ -1589,11 +1618,16 @@ PyInit__csv(void)
                                   MODULE_VERSION) == -1)
        return NULL;

+    /* Set the field limit */
+    _csvstate(module)->field_limit = 128 * 1024;
+    /* Do I still need to add this var to the Module Dict? */
+
    /* Add _dialects dictionary */
-    dialects = PyDict_New();
-    if (dialects == NULL)
+    _csvstate(module)->dialects = PyDict_New();
+    if (_csvstate(module)->dialects == NULL)
        return NULL;
-    if (PyModule_AddObject(module, "_dialects", dialects))
+    Py_INCREF(_csvstate(module)->dialects);
+    if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
        return NULL;

    /* Add quote styles into dictionary */
@ -1609,9 +1643,10 @@ PyInit__csv(void)
        return NULL;

    /* Add the CSV exception object to the module. */
-    error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
-    if (error_obj == NULL)
+    _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
+    if (_csvstate(module)->error_obj == NULL)
        return NULL;
-    PyModule_AddObject(module, "Error", error_obj);
+    Py_INCREF(_csvstate(module)->error_obj);
+    PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
    return module;
 }
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@ -349,7 +349,8 @@ PyException_SetContext(PyObject *self, PyObject *context) {

 static struct PyMemberDef BaseException_members[] = {
    {"__suppress_context__", T_BOOL,
-     offsetof(PyBaseExceptionObject, suppress_context)}
+     offsetof(PyBaseExceptionObject, suppress_context)},
+    {NULL}
 };


--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@ -308,7 +308,7 @@ compute_range_item(rangeobject *r, PyObject *arg)
 static PyObject *
 range_item(rangeobject *r, Py_ssize_t i)
 {
-    PyObject *res, *arg = PyLong_FromLong(i);
+    PyObject *res, *arg = PyLong_FromSsize_t(i);
    if (!arg) {
        return NULL;
    }
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@ -215,7 +215,6 @@ InvalidContinuation:
    goto Return;
 }

-#undef LONG_PTR_MASK
 #undef ASCII_CHAR_MASK


@ -415,4 +414,152 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 #undef MAX_SHORT_UNICHARS
 }

+/* The pattern for constructing UCS2-repeated masks. */
+#if SIZEOF_LONG == 8
+# define UCS2_REPEAT_MASK 0x0001000100010001ul
+#elif SIZEOF_LONG == 4
+# define UCS2_REPEAT_MASK 0x00010001ul
+#else
+# error C 'long' size should be either 4 or 8!
+#endif
+
+/* The mask for fast checking. */
+#if STRINGLIB_SIZEOF_CHAR == 1
+/* The mask for fast checking of whether a C 'long' contains a
+   non-ASCII or non-Latin1 UTF16-encoded characters. */
+# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
+#else
+/* The mask for fast checking of whether a C 'long' may contain
+   UTF16-encoded surrogate characters. This is an efficient heuristic,
+   assuming that non-surrogate characters with a code point >= 0x8000 are
+   rare in most input.
+*/
+# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * 0x8000u)
+#endif
+/* The mask for fast byte-swapping. */
+#define STRIPPED_MASK           (UCS2_REPEAT_MASK * 0x00FFu)
+/* Swap bytes. */
+#define SWAB(value)             ((((value) >> 8) & STRIPPED_MASK) | \
+                                 (((value) & STRIPPED_MASK) << 8))
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
+                        STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
+                        int native_ordering)
+{
+    Py_UCS4 ch;
+    const unsigned char *aligned_end =
+            (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
+    const unsigned char *q = *inptr;
+    STRINGLIB_CHAR *p = dest + *outpos;
+    /* Offsets from q for retrieving byte pairs in the right order. */
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+    int ihi = !!native_ordering, ilo = !native_ordering;
+#else
+    int ihi = !native_ordering, ilo = !!native_ordering;
+#endif
+    --e;
+
+    while (q < e) {
+        Py_UCS4 ch2;
+        /* First check for possible aligned read of a C 'long'. Unaligned
+           reads are more expensive, better to defer to another iteration. */
+        if (!((size_t) q & LONG_PTR_MASK)) {
+            /* Fast path for runs of in-range non-surrogate chars. */
+            register const unsigned char *_q = q;
+            while (_q < aligned_end) {
+                unsigned long block = * (unsigned long *) _q;
+                if (native_ordering) {
+                    /* Can use buffer directly */
+                    if (block & FAST_CHAR_MASK)
+                        break;
+                }
+                else {
+                    /* Need to byte-swap */
+                    if (block & SWAB(FAST_CHAR_MASK))
+                        break;
+#if STRINGLIB_SIZEOF_CHAR == 1
+                    block >>= 8;
+#else
+                    block = SWAB(block);
+#endif
+                }
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+# if SIZEOF_LONG == 4
+                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+                p[1] = (STRINGLIB_CHAR)(block >> 16);
+# elif SIZEOF_LONG == 8
+                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+                p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+                p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+                p[3] = (STRINGLIB_CHAR)(block >> 48);
+# endif
+#else
+# if SIZEOF_LONG == 4
+                p[0] = (STRINGLIB_CHAR)(block >> 16);
+                p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# elif SIZEOF_LONG == 8
+                p[0] = (STRINGLIB_CHAR)(block >> 48);
+                p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+                p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+                p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# endif
+#endif
+                _q += SIZEOF_LONG;
+                p += SIZEOF_LONG / 2;
+            }
+            q = _q;
+            if (q >= e)
+                break;
+        }
+
+        ch = (q[ihi] << 8) | q[ilo];
+        q += 2;
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+#if STRINGLIB_SIZEOF_CHAR < 2
+            if (ch > STRINGLIB_MAX_CHAR)
+                /* Out-of-range */
+                goto Return;
+#endif
+            *p++ = (STRINGLIB_CHAR)ch;
+            continue;
+        }
+
+        /* UTF-16 code pair: */
+        if (q >= e)
+            goto UnexpectedEnd;
+        if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
+            goto IllegalEncoding;
+        ch2 = (q[ihi] << 8) | q[ilo];
+        q += 2;
+        if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
+            goto IllegalSurrogate;
+        ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
+#if STRINGLIB_SIZEOF_CHAR < 4
+        /* Out-of-range */
+        goto Return;
+#else
+        *p++ = (STRINGLIB_CHAR)ch;
+#endif
+    }
+    ch = 0;
+Return:
+    *inptr = q;
+    *outpos = p - dest;
+    return ch;
+UnexpectedEnd:
+    ch = 1;
+    goto Return;
+IllegalEncoding:
+    ch = 2;
+    goto Return;
+IllegalSurrogate:
+    ch = 3;
+    goto Return;
+}
+#undef UCS2_REPEAT_MASK
+#undef FAST_CHAR_MASK
+#undef STRIPPED_MASK
+#undef SWAB
+#undef LONG_PTR_MASK
 #endif /* STRINGLIB_IS_UNICODE */
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -5195,25 +5195,6 @@ PyUnicode_DecodeUTF16(const char *s,
    return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
 }

-/* Two masks for fast checking of whether a C 'long' may contain
-   UTF16-encoded surrogate characters. This is an efficient heuristic,
-   assuming that non-surrogate characters with a code point >= 0x8000 are
-   rare in most input.
-   FAST_CHAR_MASK is used when the input is in native byte ordering,
-   SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering.
-*/
-#if (SIZEOF_LONG == 8)
-# define FAST_CHAR_MASK         0x8000800080008000L
-# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L
-# define STRIPPED_MASK          0x00FF00FF00FF00FFL
-#elif (SIZEOF_LONG == 4)
-# define FAST_CHAR_MASK         0x80008000L
-# define SWAPPED_FAST_CHAR_MASK 0x00800080L
-# define STRIPPED_MASK          0x00FF00FFL
-#else
-# error C 'long' size should be either 4 or 8!
-#endif
-
 PyObject *
 PyUnicode_DecodeUTF16Stateful(const char *s,
                              Py_ssize_t size,
@ -5226,30 +5207,15 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyObject *unicode;
-    const unsigned char *q, *e, *aligned_end;
+    const unsigned char *q, *e;
    int bo = 0;       /* assume native ordering by default */
-    int native_ordering = 0;
+    int native_ordering;
    const char *errmsg = "";
-    /* Offsets from q for retrieving byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    int ihi = 1, ilo = 0;
-#else
-    int ihi = 0, ilo = 1;
-#endif
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

-    /* Note: size will always be longer than the resulting Unicode
-       character count */
-    unicode = PyUnicode_New(size, 127);
-    if (!unicode)
-        return NULL;
-    if (size == 0)
-        return unicode;
-    outpos = 0;
-
    q = (unsigned char *)s;
-    e = q + size - 1;
+    e = q + size;

    if (byteorder)
        bo = *byteorder;
@ -5258,155 +5224,98 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
       byte order setting accordingly. In native mode, the leading BOM
       mark is skipped, in all other modes, it is copied to the output
       stream as-is (giving a ZWNBSP character). */
-    if (bo == 0) {
-        if (size >= 2) {
-            const Py_UCS4 bom = (q[ihi] << 8) | q[ilo];
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-            if (bom == 0xFEFF) {
-                q += 2;
-                bo = -1;
-            }
-            else if (bom == 0xFFFE) {
-                q += 2;
-                bo = 1;
-            }
-#else
-            if (bom == 0xFEFF) {
-                q += 2;
-                bo = 1;
-            }
-            else if (bom == 0xFFFE) {
-                q += 2;
-                bo = -1;
-            }
-#endif
+    if (bo == 0 && size >= 2) {
+        const Py_UCS4 bom = (q[1] << 8) | q[0];
+        if (bom == 0xFEFF) {
+            q += 2;
+            bo = -1;
        }
+        else if (bom == 0xFFFE) {
+            q += 2;
+            bo = 1;
+        }
+        if (byteorder)
+            *byteorder = bo;
    }

-    if (bo == -1) {
-        /* force LE */
-        ihi = 1;
-        ilo = 0;
-    }
-    else if (bo == 1) {
-        /* force BE */
-        ihi = 0;
-        ilo = 1;
+    if (q == e) {
+        if (consumed)
+            *consumed = size;
+        Py_INCREF(unicode_empty);
+        return unicode_empty;
    }
+
 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    native_ordering = ilo < ihi;
+    native_ordering = bo <= 0;
 #else
-    native_ordering = ilo > ihi;
+    native_ordering = bo >= 0;
 #endif

-    aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
-    while (q < e) {
-        Py_UCS4 ch;
-        /* First check for possible aligned read of a C 'long'. Unaligned
-           reads are more expensive, better to defer to another iteration. */
-        if (!((size_t) q & LONG_PTR_MASK)) {
-            /* Fast path for runs of non-surrogate chars. */
-            register const unsigned char *_q = q;
+    /* Note: size will always be longer than the resulting Unicode
+       character count */
+    unicode = PyUnicode_New((e - q + 1) / 2, 127);
+    if (!unicode)
+        return NULL;
+
+    outpos = 0;
+    while (1) {
+        Py_UCS4 ch = 0;
+        if (e - q >= 2) {
            int kind = PyUnicode_KIND(unicode);
-            void *data = PyUnicode_DATA(unicode);
-            while (_q < aligned_end) {
-                unsigned long block = * (unsigned long *) _q;
-                Py_UCS4 maxch;
-                if (native_ordering) {
-                    /* Can use buffer directly */
-                    if (block & FAST_CHAR_MASK)
-                        break;
-                }
-                else {
-                    /* Need to byte-swap */
-                    if (block & SWAPPED_FAST_CHAR_MASK)
-                        break;
-                    block = ((block >> 8) & STRIPPED_MASK) |
-                            ((block & STRIPPED_MASK) << 8);
-                }
-                maxch = (Py_UCS2)(block & 0xFFFF);
-#if SIZEOF_LONG == 8
-                ch = (Py_UCS2)((block >> 16) & 0xFFFF);
-                maxch = MAX_MAXCHAR(maxch, ch);
-                ch = (Py_UCS2)((block >> 32) & 0xFFFF);
-                maxch = MAX_MAXCHAR(maxch, ch);
-                ch = (Py_UCS2)(block >> 48);
-                maxch = MAX_MAXCHAR(maxch, ch);
-#else
-                ch = (Py_UCS2)(block >> 16);
-                maxch = MAX_MAXCHAR(maxch, ch);
-#endif
-                if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
-                    if (unicode_widen(&unicode, outpos, maxch) < 0)
-                        goto onError;
-                    kind = PyUnicode_KIND(unicode);
-                    data = PyUnicode_DATA(unicode);
-                }
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
-#if SIZEOF_LONG == 8
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
-#else
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
-#endif
-#else
-#if SIZEOF_LONG == 8
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
-#else
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
-#endif
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
-#endif
-                _q += SIZEOF_LONG;
+            if (kind == PyUnicode_1BYTE_KIND) {
+                if (PyUnicode_IS_ASCII(unicode))
+                    ch = asciilib_utf16_decode(&q, e,
+                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            native_ordering);
+                else
+                    ch = ucs1lib_utf16_decode(&q, e,
+                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            native_ordering);
+            } else if (kind == PyUnicode_2BYTE_KIND) {
+                ch = ucs2lib_utf16_decode(&q, e,
+                        PyUnicode_2BYTE_DATA(unicode), &outpos,
+                        native_ordering);
+            } else {
+                assert(kind == PyUnicode_4BYTE_KIND);
+                ch = ucs4lib_utf16_decode(&q, e,
+                        PyUnicode_4BYTE_DATA(unicode), &outpos,
+                        native_ordering);
            }
-            q = _q;
-            if (q >= e)
-                break;
        }
-        ch = (q[ihi] << 8) | q[ilo];

-        q += 2;
-
-        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+        switch (ch)
+        {
+        case 0:
+            /* remaining byte at the end? (size should be even) */
+            if (q == e || consumed)
+                goto End;
+            errmsg = "truncated data";
+            startinpos = ((const char *)q) - starts;
+            endinpos = ((const char *)e) - starts;
+            break;
+            /* The remaining input chars are ignored if the callback
+               chooses to skip the input */
+        case 1:
+            errmsg = "unexpected end of data";
+            startinpos = ((const char *)q) - 2 - starts;
+            endinpos = ((const char *)e) - starts;
+            break;
+        case 2:
+            errmsg = "illegal encoding";
+            startinpos = ((const char *)q) - 2 - starts;
+            endinpos = startinpos + 2;
+            break;
+        case 3:
+            errmsg = "illegal UTF-16 surrogate";
+            startinpos = ((const char *)q) - 4 - starts;
+            endinpos = startinpos + 2;
+            break;
+        default:
            if (unicode_putchar(&unicode, &outpos, ch) < 0)
                goto onError;
            continue;
        }

-        /* UTF-16 code pair: */
-        if (q > e) {
-            errmsg = "unexpected end of data";
-            startinpos = (((const char *)q) - 2) - starts;
-            endinpos = ((const char *)e) + 1 - starts;
-            goto utf16Error;
-        }
-        if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) {
-            Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo];
-            q += 2;
-            if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) {
-                if (unicode_putchar(&unicode, &outpos,
-                                    Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0)
-                    goto onError;
-                continue;
-            }
-            else {
-                errmsg = "illegal UTF-16 surrogate";
-                startinpos = (((const char *)q)-4)-starts;
-                endinpos = startinpos+2;
-                goto utf16Error;
-            }
-
-        }
-        errmsg = "illegal encoding";
-        startinpos = (((const char *)q)-2)-starts;
-        endinpos = startinpos+2;
-        /* Fall through to report the error */
-
-      utf16Error:
        if (unicode_decode_call_errorhandler(
                errors,
                &errorHandler,
@ -5421,33 +5330,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
                &outpos))
            goto onError;
    }
-    /* remaining byte at the end? (size should be even) */
-    if (e == q) {
-        if (!consumed) {
-            errmsg = "truncated data";
-            startinpos = ((const char *)q) - starts;
-            endinpos = ((const char *)e) + 1 - starts;
-            if (unicode_decode_call_errorhandler(
-                    errors,
-                    &errorHandler,
-                    "utf16", errmsg,
-                    &starts,
-                    (const char **)&e,
-                    &startinpos,
-                    &endinpos,
-                    &exc,
-                    (const char **)&q,
-                    &unicode,
-                    &outpos))
-                goto onError;
-            /* The remaining input chars are ignored if the callback
-               chooses to skip the input */
-        }
-    }
-
-    if (byteorder)
-        *byteorder = bo;

+End:
    if (consumed)
        *consumed = (const char *)q-starts;

@ -5466,9 +5350,6 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
    return NULL;
 }

-#undef FAST_CHAR_MASK
-#undef SWAPPED_FAST_CHAR_MASK
-
 PyObject *
 _PyUnicode_EncodeUTF16(PyObject *str,
                       const char *errors,
--- a/Python/freeze_importlib.py
+++ b/Python/freeze_importlib.py
@ -25,6 +25,8 @@ def main(input_path, output_path):
    with open(output_path, 'w', encoding='utf-8') as output_file:
        output_file.write('\n'.join(lines))
        output_file.write('/* Mercurial binary marker: \x00 */')
+        # Avoid a compiler warning for lack of EOL
+        output_file.write('\n')


 if __name__ == '__main__':
--- a/Python/importlib.h
+++ b/Python/importlib.h
@ -3029,4 +3029,4 @@ unsigned char _Py_M__importlib[] = {
    12,17,6,2,12,47,18,25,12,23,12,15,24,30,6,1,
    6,3,12,54,
 };
-/* Mercurial binary marker:  */
+/* Mercurial binary marker:  */