diff --git a/Doc/howto/cporting.rst b/Doc/howto/cporting.rst new file mode 100644 index 00000000000..3451f5c061f --- /dev/null +++ b/Doc/howto/cporting.rst @@ -0,0 +1,216 @@ +.. highlightlang:: c + +******************************** +Porting Extension Modules to 3.0 +******************************** + +:author: Benjamin Peterson + + +.. topic:: Abstract + + Although changing the C-API was not one of Python 3.0's objectives, the many + Python level changes made leaving 2.x's API intact impossible. In fact, some + changes such as :func:`int` and :func:`long` unification are more obvious on + the C level. This document endeavors to document incompatibilities and how + they can be worked around. + + +Conditional compilation +======================= + +The easiest way to compile only some code for 3.0 is to check if +:cmacro:`PY_MAJOR_VERSION` is greater than or equal to 3. :: + + #if PY_MAJOR_VERSION >= 3 + #define IS_PY3K + #endif + +API functions that are not present can be aliased to their equivalents within +conditional blocks. + + +Changes to Object APIs +====================== + +Python 3.0 merged together some types with similar functions while cleanly +separating others. + + +str/unicode Unification +----------------------- + + +Python 3.0's :func:`str` (``PyString_*`` functions in C) type is equivalent to +2.x's :func:`unicode` (``PyUnicode_*``). The old 8-bit string type has become +:func:`bytes`. Python 2.6 and later provide a compatibility header, +:file:`bytesobject.h`, mapping ``PyBytes`` names to ``PyString`` ones. For best +compatibility with 3.0, :ctype:`PyUnicode` should be used for textual data and +:ctype:`PyBytes` for binary data. It's also important to remember that +:ctype:`PyBytes` and :ctype:`PyUnicode` in 3.0 are not interchangeable like +:ctype:`PyString` and :ctype:`PyString` are in 2.x. The following example shows +best practices with regards to :ctype:`PyUnicode`, :ctype:`PyString`, and +:ctype:`PyBytes`. :: + + #include "stdlib.h" + #include "Python.h" + #include "bytesobject.h" + + /* text example */ + static PyObject * + say_hello(PyObject *self, PyObject *args) { + PyObject *name, *result; + + if (!PyArg_ParseTuple(args, "U:say_hello", &name)) + return NULL; + + result = PyUnicode_FromFormat("Hello, %S!", name); + return result; + } + + /* just a forward */ + static char * do_encode(PyObject *); + + /* bytes example */ + static PyObject * + encode_object(PyObject *self, PyObject *args) { + char *encoded; + PyObject *result, *myobj; + + if (!PyArg_ParseTuple(args, "O:encode_object", &myobj)) + return NULL; + + encoded = do_encode(myobj); + if (encoded == NULL) + return NULL; + result = PyBytes_FromString(encoded); + free(encoded); + return result; + } + + +long/int Unification +-------------------- + +In Python 3.0, there is only one integer type. It is called :func:`int` on the +Python level, but actually corresponds to 2.x's :func:`long` type. In the +C-API, ``PyInt_*`` functions are replaced by their ``PyLong_*`` neighbors. The +best course of action here is using the ``PyInt_*`` functions aliased to +``PyLong_*`` found in :file:`intobject.h`. The the abstract ``PyNumber_*`` APIs +can also be used in some cases. :: + + #include "Python.h" + #include "intobject.h" + + static PyObject * + add_ints(PyObject *self, PyObject *args) { + int one, two; + PyObject *result; + + if (!PyArg_ParseTuple(args, "ii:add_ints", &one, &two)) + return NULL; + + return PyInt_FromLong(one + two); + } + + + +Module initialization and state +=============================== + +Python 3.0 has a revamped extension module initialization system. (See PEP +:pep:`3121`.) Instead of storing module state in globals, they should be stored +in an interpreter specific structure. Creating modules that act correctly in +both 2.x and 3.0 is tricky. The following simple example demonstrates how. :: + + #include "Python.h" + + struct module_state { + PyObject *error; + }; + + #if PY_MAJOR_VERSION >= 3 + #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) + #else + #define GETSTATE(m) (&_state) + static struct module_state _state; + #endif + + static PyObject * + error_out(PyObject *m) { + struct module_state *st = GETSTATE(m); + PyErr_SetString(st->error, "something bad happened"); + return NULL; + } + + static PyMethodDef myextension_methods[] = { + {"error_out", (PyCFunction)error_out, METH_NOARGS, NULL}, + {NULL, NULL} + }; + + #if PY_MAJOR_VERSION >= 3 + + static int myextension_traverse(PyObject *m, visitproc visit, void *arg) { + Py_VISIT(GETSTATE(m)->error); + return 0; + } + + static int myextension_clear(PyObject *m) { + Py_CLEAR(GETSTATE(m)->error); + return 0; + } + + + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "myextension", + NULL, + sizeof(struct module_state), + myextension_methods, + NULL, + myextension_traverse, + myextension_clear, + NULL + }; + + #define INITERROR return NULL + + PyObject * + PyInit_myextension(void) + + #else + #define INITERROR return + + void + initmyextension(void) + #endif + { + #if PY_MAJOR_VERSION >= 3 + PyObject *module = PyModule_Create(&moduledef); + #else + PyObject *module = Py_InitModule("myextension", myextension_methods); + #endif + + if (module == NULL) + INITERROR; + struct module_state *st = GETSTATE(module); + + st->error = PyErr_NewException("myextension.Error", NULL, NULL); + if (st->error == NULL) { + Py_DECREF(module); + INITERROR; + } + + #if PY_MAJOR_VERSION >= 3 + return module; + #endif + } + + +Other options +============= + +If you are writing a new extension module, you might consider `Cython +`_. It translates a Python-like language to C. The +extension modules it creates are compatible with Python 3.x and 2.x. + diff --git a/Doc/howto/index.rst b/Doc/howto/index.rst index 5a1f397c584..7d6468860db 100644 --- a/Doc/howto/index.rst +++ b/Doc/howto/index.rst @@ -14,6 +14,7 @@ Currently, the HOWTOs are: :maxdepth: 1 advocacy.rst + cporting.rst curses.rst doanddont.rst functional.rst diff --git a/Doc/library/2to3.rst b/Doc/library/2to3.rst index 80401243b09..2e9547c6d46 100644 --- a/Doc/library/2to3.rst +++ b/Doc/library/2to3.rst @@ -74,7 +74,9 @@ warning beneath the diff for a file. You should address the warning in order to have compliant 3.x code. 2to3 can also refactor doctests. To enable this mode, use the :option:`-d` -flag. Note that *only* doctests will be refactored. +flag. Note that *only* doctests will be refactored. This also doesn't require +the module to be valid Python. For example, doctest like examples in a reST +document could also be refactored with this option. The :option:`-v` option enables the output of more information on the translation process. @@ -95,4 +97,10 @@ true function call. .. moduleauthor:: Guido van Rossum .. moduleauthor:: Collin Winter + +.. warning:: + + The :mod:`lib2to3` API should be considered unstable and may change + drastically in the future. + .. XXX What is the public interface anyway? diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 4eaa690de18..b67d724e13f 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -370,9 +370,9 @@ Encoders and decoders def default(self, o): try: - iterable = iter(o) + iterable = iter(o) except TypeError: - pass + pass else: return list(iterable) return JSONEncoder.default(self, o) diff --git a/Doc/library/optparse.rst b/Doc/library/optparse.rst index de1a116ce2b..4936e7d3603 100644 --- a/Doc/library/optparse.rst +++ b/Doc/library/optparse.rst @@ -1193,17 +1193,32 @@ traditional Unix exit status for command-line errors). Querying and manipulating your option parser ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Sometimes, it's useful to poke around your option parser and see what's there. -OptionParser provides a couple of methods to help you out: +The default behavior of the option parser can be customized slightly, +and you can also poke around your option parser and see what's there. +OptionParser provides several methods to help you out: -``has_option(opt_str)`` - Return true if the OptionParser has an option with option string ``opt_str`` - (e.g., ``"-q"`` or ``"--verbose"``). +``disable_interspersed_args()`` + Set parsing to stop on the first non-option. Use this if you have a + command processor which runs another command which has options of + its own and you want to make sure these options don't get + confused. For example, each command might have a different + set of options. + +``enable_interspersed_args()`` + Set parsing to not stop on the first non-option, allowing + interspersing switches with command arguments. For example, + ``"-s arg1 --long arg2"`` would return ``["arg1", "arg2"]`` + as the command arguments and ``-s, --long`` as options. + This is the default behavior. ``get_option(opt_str)`` Returns the Option instance with the option string ``opt_str``, or ``None`` if no options have that option string. +``has_option(opt_str)`` + Return true if the OptionParser has an option with option string ``opt_str`` + (e.g., ``"-q"`` or ``"--verbose"``). + ``remove_option(opt_str)`` If the OptionParser has an option corresponding to ``opt_str``, that option is removed. If that option provided any other option strings, all of those option diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 221596c3da4..9e14003ac86 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1215,7 +1215,13 @@ to be ignored. These functions all execute a new program, replacing the current process; they do not return. On Unix, the new executable is loaded into the current process, and will have the same process id as the caller. Errors will be reported as - :exc:`OSError` exceptions. + :exc:`OSError` exceptions. + + The current process is replaced immediately. Open file objects and + descriptors are not flushed, so if there may be data buffered + on these open files, you should flush them using + :func:`sys.stdout.flush` or :func:`os.fsync` before calling an + :func:`exec\*` function. The "l" and "v" variants of the :func:`exec\*` functions differ in how command-line arguments are passed. The "l" variants are perhaps the easiest @@ -1241,8 +1247,9 @@ to be ignored. used to define the environment variables for the new process (these are used instead of the current process' environment); the functions :func:`execl`, :func:`execlp`, :func:`execv`, and :func:`execvp` all cause the new process to - inherit the environment of the current process. Availability: Unix, - Windows. + inherit the environment of the current process. + + Availability: Unix, Windows. .. function:: _exit(n) diff --git a/Doc/library/platform.rst b/Doc/library/platform.rst index 22ac72d9f01..3d5228ae047 100644 --- a/Doc/library/platform.rst +++ b/Doc/library/platform.rst @@ -226,29 +226,23 @@ Unix Platforms .. function:: dist(distname='', version='', id='', supported_dists=('SuSE','debian','redhat','mandrake',...)) - Tries to determine the name of the OS distribution name Returns a tuple - ``(distname, version, id)`` which defaults to the args given as parameters. - - ``supported_dists`` may be given to define the set of Linux - distributions to look for. It defaults to a list of currently - supported Linux distributions identified by their release file - name. + This is another name for :func:`linux_distribution`. .. function:: linux_distribution(distname='', version='', id='', supported_dists=('SuSE','debian','redhat','mandrake',...), full_distribution_name=1) Tries to determine the name of the Linux OS distribution name. - ``supported_dists`` may be given to define the set of Linux - distributions to look for. It defaults to a list of currently - supported Linux distributions identified by their release file - name. + ``supported_dists`` may be given to define the set of Linux distributions to + look for. It defaults to a list of currently supported Linux distributions + identified by their release file name. - If ``full_distribution_name`` is true (default), the full - distribution read from the OS is returned. Otherwise the short name - taken from ``supported_dists`` is used. + If ``full_distribution_name`` is true (default), the full distribution read + from the OS is returned. Otherwise the short name taken from + ``supported_dists`` is used. - Returns a tuple ``(distname,version,id)`` which defaults to the - args given as parameters. + Returns a tuple ``(distname,version,id)`` which defaults to the args given as + parameters. ``id`` is the item in parentheses after the version number. It + is usually the version codename. .. function:: libc_ver(executable=sys.executable, lib='', version='', chunksize=2048) diff --git a/Doc/library/site.rst b/Doc/library/site.rst index 73ec7bfc8a1..0fe63a3a4d0 100644 --- a/Doc/library/site.rst +++ b/Doc/library/site.rst @@ -59,10 +59,11 @@ and :file:`bar.pth` contains:: bar -Then the following directories are added to ``sys.path``, in this order:: +Then the following version-specific directories are added to +``sys.path``, in this order:: - /usr/local/lib/python3.0/site-packages/bar - /usr/local/lib/python3.0/site-packages/foo + /usr/local/lib/pythonX.Y/site-packages/bar + /usr/local/lib/pythonX.Y/site-packages/foo Note that :file:`bletch` is omitted because it doesn't exist; the :file:`bar` directory precedes the :file:`foo` directory because :file:`bar.pth` comes diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 8f78498f624..620d08e3382 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -207,18 +207,18 @@ The module :mod:`socket` exports the following constants and functions: .. function:: getaddrinfo(host, port[, family[, socktype[, proto[, flags]]]]) Resolves the *host*/*port* argument, into a sequence of 5-tuples that contain - all the necessary argument for the sockets manipulation. *host* is a domain - name, a string representation of IPv4/v6 address or ``None``. *port* is a string - service name (like ``'http'``), a numeric port number or ``None``. + all the necessary arguments for creating the corresponding socket. *host* is a domain + name, a string representation of an IPv4/v6 address or ``None``. *port* is a string + service name such as ``'http'``, a numeric port number or ``None``. + The rest of the arguments are optional and must be numeric if specified. + By passing ``None`` as the value of *host* and *port*, , you can pass ``NULL`` to the C API. - The rest of the arguments are optional and must be numeric if specified. For - *host* and *port*, by passing ``None``, you can pass ``NULL`` to the C API. The :func:`getaddrinfo` function returns a list of 5-tuples with the following structure: ``(family, socktype, proto, canonname, sockaddr)`` - *family*, *socktype*, *proto* are all integer and are meant to be passed to the + *family*, *socktype*, *proto* are all integers and are meant to be passed to the :func:`socket` function. *canonname* is a string representing the canonical name of the *host*. It can be a numeric IPv4/v6 address when :const:`AI_CANONNAME` is specified for a numeric *host*. *sockaddr* is a tuple describing a socket @@ -230,7 +230,7 @@ The module :mod:`socket` exports the following constants and functions: Return a fully qualified domain name for *name*. If *name* is omitted or empty, it is interpreted as the local host. To find the fully qualified name, the - hostname returned by :func:`gethostbyaddr` is checked, then aliases for the + hostname returned by :func:`gethostbyaddr` is checked, followed by aliases for the host, if available. The first name which includes a period is selected. In case no fully qualified domain name is available, the hostname as returned by :func:`gethostname` is returned. diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index 9410e2219cb..7e4d68ab7cc 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -1806,8 +1806,11 @@ changes, or look through the Subversion logs for all the details. is now available as a standalone package. The web page for the package is `www.jcea.es/programacion/pybsddb.htm `__. + The plan is to remove the package from the standard library + in Python 3.0, because its pace of releases is much more frequent than + Python's. -* The :mod:`bsddb.dbshelve` module now uses the highest pickling protocol + The :mod:`bsddb.dbshelve` module now uses the highest pickling protocol available, instead of restricting itself to protocol 1. (Contributed by W. Barnes; :issue:`1551443`.) @@ -1817,6 +1820,12 @@ changes, or look through the Subversion logs for all the details. "/cgi-bin/add.py?category=1". (Contributed by Alexandre Fiori and Nubis; :issue:`1817`.) + The :func:`parse_qs` and :func:`parse_qsl` functions have been + relocated from the :mod:`cgi` module to the :mod:`urlparse` module. + The versions still available in the :mod:`cgi` module will + trigger :exc:`PendingDeprecationWarning` messages in 2.6 + (:issue:`600362`). + * The :mod:`cmath` module underwent extensive revision, contributed by Mark Dickinson and Christian Heimes. Five new functions were added: @@ -1900,6 +1909,11 @@ changes, or look through the Subversion logs for all the details. (Contributed by Raymond Hettinger.) +* The :mod:`Cookie` module's :class:`Morsel` objects now support an + :attr:`httponly` attribute. In some browsers. cookies with this attribute + set cannot be accessed or manipulated by JavaScript code. + (Contributed by Arvin Schnell; :issue:`1638033`.) + * A new window method in the :mod:`curses` module, :meth:`chgat`, changes the display attributes for a certain number of characters on a single line. (Contributed by Fabian Kreutz.) :: @@ -2498,8 +2512,9 @@ changes, or look through the Subversion logs for all the details. ``with tempfile.NamedTemporaryFile() as tmp: ...``. (Contributed by Alexander Belopolsky; :issue:`2021`.) -* The :mod:`test.test_support` module now contains an - :func:`EnvironmentVarGuard` +* The :mod:`test.test_support` module gained a number + of context managers useful for writing tests. + :func:`EnvironmentVarGuard` is a context manager that temporarily changes environment variables and automatically restores them to their old values. @@ -2514,6 +2529,16 @@ changes, or look through the Subversion logs for all the details. f = urllib.urlopen('https://sf.net') ... + Finally, :func:`check_warnings` resets the :mod:`warning` module's + warning filters and returns an object that will record all warning + messages triggered (:issue:`3781`):: + + with test_support.check_warnings() as wrec: + warnings.simplefilter("always") + ... code that triggers a warning ... + assert str(wrec.message) == "function is outdated" + assert len(wrec.warnings) == 1, "Multiple warnings raised" + (Contributed by Brett Cannon.) * The :mod:`textwrap` module can now preserve existing whitespace @@ -2600,11 +2625,19 @@ changes, or look through the Subversion logs for all the details. (Added by Facundo Batista.) +* The Unicode database provided by the :mod:`unicodedata` module + has been updated to version 5.1.0. (Updated by + Martin von Loewis; :issue:`3811`.) + * The :mod:`warnings` module's :func:`formatwarning` and :func:`showwarning` gained an optional *line* argument that can be used to supply the line of source code. (Added as part of :issue:`1631171`, which re-implemented part of the :mod:`warnings` module in C code.) + A new function, :func:`catch_warnings`, is a context manager + intended for testing purposes that lets you temporarily modify the + warning filters and then restore their original values (:issue:`3781`). + * The XML-RPC :class:`SimpleXMLRPCServer` and :class:`DocXMLRPCServer` classes can now be prevented from immediately opening and binding to their socket by passing True as the ``bind_and_activate`` diff --git a/Lib/collections.py b/Lib/collections.py index 6d439e52690..458cbefdf6b 100644 --- a/Lib/collections.py +++ b/Lib/collections.py @@ -43,7 +43,7 @@ def namedtuple(typename, field_names, verbose=False): # generating informative error messages and preventing template injection attacks. if isinstance(field_names, str): field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas - field_names = tuple(field_names) + field_names = tuple(map(str, field_names)) for name in (typename,) + field_names: if not all(c.isalnum() or c=='_' for c in name): raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name) diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index ec0aa809f3c..60072cc7706 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -37,6 +37,11 @@ class TestNamedTuple(unittest.TestCase): namedtuple('Point0', 'x1 y2') # Verify that numbers are allowed in names namedtuple('_', 'a b c') # Test leading underscores in a typename + nt = namedtuple('nt', 'the quick brown fox') # check unicode input + self.assert_("u'" not in repr(nt._fields)) + nt = namedtuple('nt', ('the', 'quick')) # check unicode input + self.assert_("u'" not in repr(nt._fields)) + self.assertRaises(TypeError, Point._make, [11]) # catch too few args self.assertRaises(TypeError, Point._make, [11, 22, 33]) # catch too many args