From 6245cb3c015a5b9febef4d6ef30d3acfc762a79d Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Fri, 15 Apr 2016 02:14:19 +0000 Subject: [PATCH 1/2] =?UTF-8?q?Correct=20=E2=80=9Can=E2=80=9D=20=E2=86=92?= =?UTF-8?q?=20=E2=80=9Ca=E2=80=9D=20with=20=E2=80=9CUnicode=E2=80=9D,=20?= =?UTF-8?q?=E2=80=9Cuser=E2=80=9D,=20=E2=80=9CUTF=E2=80=9D,=20etc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This affects documentation, code comments, and a debugging messages. --- Doc/c-api/exceptions.rst | 4 ++-- Doc/c-api/import.rst | 2 +- Doc/c-api/method.rst | 2 +- Doc/c-api/module.rst | 2 +- Doc/c-api/unicode.rst | 6 +++--- Doc/howto/urllib2.rst | 2 +- Doc/library/heapq.rst | 2 +- Doc/library/logging.handlers.rst | 2 +- Doc/library/os.rst | 2 +- Doc/library/re.rst | 2 +- Doc/library/tkinter.ttk.rst | 2 +- Doc/library/webbrowser.rst | 2 +- Doc/library/xml.etree.elementtree.rst | 2 +- Doc/library/xml.sax.reader.rst | 2 +- Doc/library/xmlrpc.client.rst | 2 +- Include/bytesobject.h | 2 +- Include/unicodeobject.h | 8 ++++---- Lib/asyncio/windows_events.py | 2 +- Lib/email/utils.py | 2 +- Lib/heapq.py | 2 +- Lib/nntplib.py | 8 ++++---- Lib/ntpath.py | 2 +- Lib/test/test_codecs.py | 2 +- Lib/test/test_multibytecodec.py | 4 ++-- Lib/test/test_nntplib.py | 2 +- Lib/test/test_unicode.py | 2 +- Lib/test/test_zlib.py | 2 +- Misc/HISTORY | 10 +++++----- Misc/NEWS | 6 +++--- Modules/_heapqmodule.c | 2 +- Modules/_pickle.c | 2 +- Modules/_tracemalloc.c | 2 +- Objects/codeobject.c | 2 +- Objects/unicodeobject.c | 2 +- Python/codecs.c | 4 ++-- 35 files changed, 52 insertions(+), 52 deletions(-) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index 1e708a81284..19cbb3bcb87 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -285,7 +285,7 @@ an error value). .. c:function:: int PyErr_WarnEx(PyObject *category, const char *message, Py_ssize_t stack_level) Issue a warning message. The *category* argument is a warning category (see - below) or *NULL*; the *message* argument is an UTF-8 encoded string. *stack_level* is a + below) or *NULL*; the *message* argument is a UTF-8 encoded string. *stack_level* is a positive number giving a number of stack frames; the warning will be issued from the currently executing line of code in that stack frame. A *stack_level* of 1 is the function calling :c:func:`PyErr_WarnEx`, 2 is the function above that, @@ -609,7 +609,7 @@ The following functions are used to create and modify Unicode exceptions from C. .. c:function:: PyObject* PyUnicodeTranslateError_Create(const Py_UNICODE *object, Py_ssize_t length, Py_ssize_t start, Py_ssize_t end, const char *reason) Create a :class:`UnicodeTranslateError` object with the attributes *object*, - *length*, *start*, *end* and *reason*. *reason* is an UTF-8 encoded string. + *length*, *start*, *end* and *reason*. *reason* is a UTF-8 encoded string. .. c:function:: PyObject* PyUnicodeDecodeError_GetEncoding(PyObject *exc) PyObject* PyUnicodeEncodeError_GetEncoding(PyObject *exc) diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst index 15a9e255018..86c1d7d6e34 100644 --- a/Doc/c-api/import.rst +++ b/Doc/c-api/import.rst @@ -72,7 +72,7 @@ Importing Modules .. c:function:: PyObject* PyImport_ImportModuleLevel(const char *name, PyObject *globals, PyObject *locals, PyObject *fromlist, int level) - Similar to :c:func:`PyImport_ImportModuleLevelObject`, but the name is an + Similar to :c:func:`PyImport_ImportModuleLevelObject`, but the name is a UTF-8 encoded string instead of a Unicode object. .. versionchanged:: 3.3 diff --git a/Doc/c-api/method.rst b/Doc/c-api/method.rst index acc81e48146..7a2a84fe110 100644 --- a/Doc/c-api/method.rst +++ b/Doc/c-api/method.rst @@ -49,7 +49,7 @@ Method Objects .. index:: object: method Methods are bound function objects. Methods are always bound to an instance of -an user-defined class. Unbound methods (methods bound to a class object) are +a user-defined class. Unbound methods (methods bound to a class object) are no longer available. diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst index ef778ccaedb..97a63546a9d 100644 --- a/Doc/c-api/module.rst +++ b/Doc/c-api/module.rst @@ -50,7 +50,7 @@ Module Objects .. c:function:: PyObject* PyModule_New(const char *name) - Similar to :c:func:`PyImport_NewObject`, but the name is an UTF-8 encoded + Similar to :c:func:`PyImport_NewObject`, but the name is a UTF-8 encoded string instead of a Unicode object. diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 2eeadb52c90..0c9ea8fc7c7 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -423,7 +423,7 @@ APIs: .. c:function:: PyObject *PyUnicode_FromString(const char *u) - Create a Unicode object from an UTF-8 encoded null-terminated char buffer + Create a Unicode object from a UTF-8 encoded null-terminated char buffer *u*. @@ -556,7 +556,7 @@ APIs: .. c:function:: PyObject* PyUnicode_FromEncodedObject(PyObject *obj, \ const char *encoding, const char *errors) - Coerce an encoded object *obj* to an Unicode object and return a reference with + Coerce an encoded object *obj* to a Unicode object and return a reference with incremented refcount. :class:`bytes`, :class:`bytearray` and other @@ -1224,7 +1224,7 @@ These are the UTF-16 codec APIs: If *Py_UNICODE_WIDE* is defined, a single :c:type:`Py_UNICODE` value may get represented as a surrogate pair. If it is not defined, each :c:type:`Py_UNICODE` - values is interpreted as an UCS-2 character. + values is interpreted as a UCS-2 character. Return *NULL* if an exception was raised by the codec. diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index 7d5044b3191..0d04c9f4e68 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -64,7 +64,7 @@ you can do so via the :func:`~urllib.request.urlretrieve` function:: html = open(local_filename) Many uses of urllib will be that simple (note that instead of an 'http:' URL we -could have used an URL starting with 'ftp:', 'file:', etc.). However, it's the +could have used a URL starting with 'ftp:', 'file:', etc.). However, it's the purpose of this tutorial to explain the more complicated cases, concentrating on HTTP. diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index e29a31b3b3a..45720bfaac2 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -242,7 +242,7 @@ for a tournament. The numbers below are *k*, not ``a[k]``:: 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 -In the tree above, each cell *k* is topping ``2*k+1`` and ``2*k+2``. In an usual +In the tree above, each cell *k* is topping ``2*k+1`` and ``2*k+2``. In a usual binary tournament we see in sports, each cell is the winner over the two cells it tops, and we can trace the winner down the tree to see all opponents s/he had. However, in many computer applications of such tournaments, we do not need diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index 629cd34d49c..5936e12e077 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -866,7 +866,7 @@ supports sending logging messages to a Web server, using either ``GET`` or .. method:: emit(record) - Sends the record to the Web server as an URL-encoded dictionary. The + Sends the record to the Web server as a URL-encoded dictionary. The :meth:`mapLogRecord` method is used to convert the record to the dictionary to be sent. diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 5cd023f21c7..f2cfdb4cbe2 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1762,7 +1762,7 @@ features: ``os.path.join(os.path.dirname(path), result)``. If the *path* is a string object, the result will also be a string object, - and the call may raise an UnicodeDecodeError. If the *path* is a bytes + and the call may raise a UnicodeDecodeError. If the *path* is a bytes object, the result will be a bytes object. This function can also support :ref:`paths relative to directory descriptors diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 1df768c3287..fdd5083d744 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -12,7 +12,7 @@ those found in Perl. Both patterns and strings to be searched can be Unicode strings as well as 8-bit strings. However, Unicode strings and 8-bit strings cannot be mixed: -that is, you cannot match an Unicode string with a byte pattern or +that is, you cannot match a Unicode string with a byte pattern or vice-versa; similarly, when asking for a substitution, the replacement string must be of the same type as both the pattern and the search string. diff --git a/Doc/library/tkinter.ttk.rst b/Doc/library/tkinter.ttk.rst index 4efdfac8a98..7aebddcaf57 100644 --- a/Doc/library/tkinter.ttk.rst +++ b/Doc/library/tkinter.ttk.rst @@ -701,7 +701,7 @@ the widget option ``displaycolumns``. The tree widget can also display column headings. Columns may be accessed by number or symbolic names listed in the widget option columns. See `Column Identifiers`_. -Each item is identified by an unique name. The widget will generate item IDs +Each item is identified by a unique name. The widget will generate item IDs if they are not supplied by the caller. There is a distinguished root item, named ``{}``. The root item itself is not displayed; its children appear at the top level of the hierarchy. diff --git a/Doc/library/webbrowser.rst b/Doc/library/webbrowser.rst index aa5e4ad15d7..17c6dc365ae 100644 --- a/Doc/library/webbrowser.rst +++ b/Doc/library/webbrowser.rst @@ -33,7 +33,7 @@ browsers are not available on Unix, the controlling process will launch a new browser and wait. The script :program:`webbrowser` can be used as a command-line interface for the -module. It accepts an URL as the argument. It accepts the following optional +module. It accepts a URL as the argument. It accepts the following optional parameters: ``-n`` opens the URL in a new browser window, if possible; ``-t`` opens the URL in a new browser page ("tab"). The options are, naturally, mutually exclusive. Usage example:: diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index fe7ad9841ae..01fccb35593 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -978,7 +978,7 @@ QName Objects to get proper namespace handling on output. *text_or_uri* is a string containing the QName value, in the form {uri}local, or, if the tag argument is given, the URI part of a QName. If *tag* is given, the first argument is - interpreted as an URI, and this argument is interpreted as a local name. + interpreted as a URI, and this argument is interpreted as a local name. :class:`QName` instances are opaque. diff --git a/Doc/library/xml.sax.reader.rst b/Doc/library/xml.sax.reader.rst index 31ca260f7ce..b4acd4cdbb1 100644 --- a/Doc/library/xml.sax.reader.rst +++ b/Doc/library/xml.sax.reader.rst @@ -98,7 +98,7 @@ The :class:`XMLReader` interface supports the following methods: Process an input source, producing SAX events. The *source* object can be a system identifier (a string identifying the input source -- typically a file - name or an URL), a file-like object, or an :class:`InputSource` object. When + name or a URL), a file-like object, or an :class:`InputSource` object. When :meth:`parse` returns, the input is completely processed, and the parser object can be discarded or reset. diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index a12e6e18e04..980516873c6 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -420,7 +420,7 @@ by providing an invalid URI:: import xmlrpc.client - # create a ServerProxy with an URI that doesn't respond to XMLRPC requests + # create a ServerProxy with a URI that doesn't respond to XMLRPC requests proxy = xmlrpc.client.ServerProxy("http://google.com/") try: diff --git a/Include/bytesobject.h b/Include/bytesobject.h index e379bace37e..6c1e0c3aacc 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -82,7 +82,7 @@ PyAPI_FUNC(PyObject *) _PyBytes_Join(PyObject *sep, PyObject *x); #endif /* Provides access to the internal data buffer and size of a string - object or the default encoded version of an Unicode object. Passing + object or the default encoded version of a Unicode object. Passing NULL as *len parameter will force the string buffer to be 0-terminated (passing a string with embedded NULL characters will cause an exception). */ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 143abd0c2fb..4e8e3ec1012 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -823,7 +823,7 @@ PyAPI_FUNC(int) PyUnicode_WriteChar( PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); #endif -/* Resize an Unicode object. The length is the number of characters, except +/* Resize a Unicode object. The length is the number of characters, except if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length is the number of Py_UNICODE characters. @@ -844,7 +844,7 @@ PyAPI_FUNC(int) PyUnicode_Resize( Py_ssize_t length /* New length */ ); -/* Coerce obj to an Unicode object and return a reference with +/* Coerce obj to a Unicode object and return a reference with *incremented* refcount. Coercion is done in the following way: @@ -867,7 +867,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( const char *errors /* error handling */ ); -/* Coerce obj to an Unicode object and return a reference with +/* Coerce obj to a Unicode object and return a reference with *incremented* refcount. Unicode objects are passed back as-is (subclasses are converted to @@ -981,7 +981,7 @@ _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, Py_ssize_t len /* length in bytes */ ); -/* Get the value of the writer as an Unicode string. Clear the +/* Get the value of the writer as a Unicode string. Clear the buffer of the writer. Raise an exception and return NULL on error. */ PyAPI_FUNC(PyObject *) diff --git a/Lib/asyncio/windows_events.py b/Lib/asyncio/windows_events.py index 922594f1724..7be3e022327 100644 --- a/Lib/asyncio/windows_events.py +++ b/Lib/asyncio/windows_events.py @@ -197,7 +197,7 @@ class _WaitHandleFuture(_BaseWaitHandleFuture): # # If the IocpProactor already received the event, it's safe to call # _unregister() because we kept a reference to the Overlapped object - # which is used as an unique key. + # which is used as a unique key. self._proactor._unregister(self._ov) self._proactor = None diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 5080d81909b..a759d23308d 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -87,7 +87,7 @@ def formataddr(pair, charset='utf-8'): 'utf-8'. """ name, address = pair - # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. + # The address MUST (per RFC) be ascii, so raise a UnicodeError if it isn't. address.encode('ascii') if name: try: diff --git a/Lib/heapq.py b/Lib/heapq.py index 07af37e717e..0b3e89a3a97 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -54,7 +54,7 @@ representation for a tournament. The numbers below are `k', not a[k]: In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In -an usual binary tournament we see in sports, each cell is the winner +a usual binary tournament we see in sports, each cell is the winner over the two cells it tops, and we can trace the winner down the tree to see all opponents s/he had. However, in many computer applications of such tournaments, we do not need to trace the history of a winner. diff --git a/Lib/nntplib.py b/Lib/nntplib.py index a75faade146..28cd0992dd4 100644 --- a/Lib/nntplib.py +++ b/Lib/nntplib.py @@ -165,7 +165,7 @@ ArticleInfo = collections.namedtuple('ArticleInfo', # Helper function(s) def decode_header(header_str): - """Takes an unicode string representing a munged header value + """Takes a unicode string representing a munged header value and decodes it as a (possibly non-ASCII) readable value.""" parts = [] for v, enc in _email_decode_header(header_str): @@ -420,7 +420,7 @@ class _NNTPBase: def _putcmd(self, line): """Internal: send one command to the server (through _putline()). - The `line` must be an unicode string.""" + The `line` must be a unicode string.""" if self.debugging: print('*cmd*', repr(line)) line = line.encode(self.encoding, self.errors) self._putline(line) @@ -445,7 +445,7 @@ class _NNTPBase: def _getresp(self): """Internal: get a response from the server. Raise various errors if the response indicates an error. - Returns an unicode string.""" + Returns a unicode string.""" resp = self._getline() if self.debugging: print('*resp*', repr(resp)) resp = resp.decode(self.encoding, self.errors) @@ -462,7 +462,7 @@ class _NNTPBase: """Internal: get a response plus following text from the server. Raise various errors if the response indicates an error. - Returns a (response, lines) tuple where `response` is an unicode + Returns a (response, lines) tuple where `response` is a unicode string and `lines` is a list of bytes objects. If `file` is a file-like object, it must be open in binary mode. """ diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 9cc5ca738d4..af6a7091f9a 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -177,7 +177,7 @@ def splitunc(p): Return a 2-tuple (unc, rest); either part may be empty. If unc is not empty, it has the form '//host/mount' (or similar using backslashes). unc+rest is always the input path. - Paths containing drive letters never have an UNC part. + Paths containing drive letters never have a UNC part. """ import warnings warnings.warn("ntpath.splitunc is deprecated, use ntpath.splitdrive instead", diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index b93e0ab0e2b..c98afea5322 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2355,7 +2355,7 @@ class TypesTest(unittest.TestCase): self.assertRaises(TypeError, decoder, "xxx") def test_unicode_escape(self): - # Escape-decoding an unicode string is supported ang gives the same + # Escape-decoding a unicode string is supported ang gives the same # result as decoding the equivalent ASCII bytes string. self.assertEqual(codecs.unicode_escape_decode(r"\u1234"), ("\u1234", 6)) self.assertEqual(codecs.unicode_escape_decode(br"\u1234"), ("\u1234", 6)) diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 2929f988a8e..8d7a213c168 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -67,7 +67,7 @@ class Test_MultibyteCodec(unittest.TestCase): _multibytecodec.MultibyteStreamWriter, None) def test_decode_unicode(self): - # Trying to decode an unicode string should raise a TypeError + # Trying to decode a unicode string should raise a TypeError for enc in ALL_CJKENCODINGS: self.assertRaises(TypeError, codecs.getdecoder(enc), "") @@ -160,7 +160,7 @@ class Test_IncrementalDecoder(unittest.TestCase): self.assertEqual(decoder.decode(b'B@$'), '\u4e16') def test_decode_unicode(self): - # Trying to decode an unicode string should raise a TypeError + # Trying to decode a unicode string should raise a TypeError for enc in ALL_CJKENCODINGS: decoder = codecs.getincrementaldecoder(enc)() self.assertRaises(TypeError, decoder.decode, "") diff --git a/Lib/test/test_nntplib.py b/Lib/test/test_nntplib.py index 3c69c3e51eb..994532b61ee 100644 --- a/Lib/test/test_nntplib.py +++ b/Lib/test/test_nntplib.py @@ -609,7 +609,7 @@ class NNTPv1Handler: "\t\t6683\t16" "\t" "\n" - # An UTF-8 overview line from fr.comp.lang.python + # A UTF-8 overview line from fr.comp.lang.python "59\tRe: Message d'erreur incompréhensible (par moi)" "\tEric Brunel " "\tWed, 15 Sep 2010 18:09:15 +0200" diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index ab263ab1ece..a38e7b16104 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1774,7 +1774,7 @@ class UnicodeTest(string_tests.CommonTest, def assertCorrectUTF8Decoding(self, seq, res, err): """ - Check that an invalid UTF-8 sequence raises an UnicodeDecodeError when + Check that an invalid UTF-8 sequence raises a UnicodeDecodeError when 'strict' is used, returns res when 'replace' is used, and that doesn't return anything when 'ignore' is used. """ diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index ecdb5a7de86..1cafcb59875 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -171,7 +171,7 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase): self.assertEqual(zlib.decompress(ob), data) def test_incomplete_stream(self): - # An useful error message is given + # A useful error message is given x = zlib.compress(HAMLET_SCENE) self.assertRaisesRegex(zlib.error, "Error -5 while decompressing data: incomplete or truncated stream", diff --git a/Misc/HISTORY b/Misc/HISTORY index f964f5fb6fb..a8de7f91711 100644 --- a/Misc/HISTORY +++ b/Misc/HISTORY @@ -2237,7 +2237,7 @@ Library attribute which allows setting custom per-pickler reduction functions. Patch by sbt. -- Issue #14177: marshal.loads() now raises TypeError when given an unicode +- Issue #14177: marshal.loads() now raises TypeError when given a unicode string. Patch by Guilherme Gonçalves. - Issue #13550: Remove the debug machinery from the threading module: remove @@ -8304,7 +8304,7 @@ Library - Issue #1664: Make nntplib IPv6-capable. Patch by Derek Morr. - Issue #5006: Better handling of unicode byte-order marks (BOM) in the io - library. This means, for example, that opening an UTF-16 text file in + library. This means, for example, that opening a UTF-16 text file in append mode doesn't add a BOM at the end of the file if the file isn't empty. @@ -9328,7 +9328,7 @@ Library - Issue #4756: zipfile.is_zipfile() now supports file-like objects. Patch by Gabriel Genellina. -- Issue #4574: reading an UTF16-encoded text file crashes if \r on 64-char +- Issue #4574: reading a UTF16-encoded text file crashes if \r on 64-char boundary. - Issue #4223: inspect.getsource() will now correctly display source code @@ -9929,7 +9929,7 @@ Extension Modules exploitation of poor argument checking. - bsddb code updated to version 4.7.3pre2. This code is the same than - Python 2.6 one, since the intention is to keep an unified 2.x/3.x codebase. + Python 2.6 one, since the intention is to keep a unified 2.x/3.x codebase. The Python code is automatically translated using "2to3". Please, do not update this code in Python 3.0 by hand. Update the 2.6 one and then do "2to3". @@ -18936,7 +18936,7 @@ Core language, builtins, and interpreter - There is a new Unicode companion to the PyObject_Str() API called PyObject_Unicode(). It behaves in the same way as the - former, but assures that the returned value is an Unicode object + former, but assures that the returned value is a Unicode object (applying the usual coercion if necessary). - The comparison operators support "rich comparison overloading" (PEP diff --git a/Misc/NEWS b/Misc/NEWS index 37dc45333d7..124866423d8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -2227,7 +2227,7 @@ Core and Builtins static type in some cases. - Issue #15859: PyUnicode_EncodeFSDefault(), PyUnicode_EncodeMBCS() and - PyUnicode_EncodeCodePage() now raise an exception if the object is not an + PyUnicode_EncodeCodePage() now raise an exception if the object is not a Unicode object. For PyUnicode_EncodeFSDefault(), it was already the case on platforms other than Windows. Patch written by Campbell Barton. @@ -3318,7 +3318,7 @@ Library - Issue #21226: Set up modules properly in PyImport_ExecCodeModuleObject (and friends). -- Issue #21398: Fix an unicode error in the pydoc pager when the documentation +- Issue #21398: Fix a unicode error in the pydoc pager when the documentation contains characters not encodable to the stdout encoding. - Issue #16531: ipaddress.IPv4Network and ipaddress.IPv6Network now accept @@ -6322,7 +6322,7 @@ Core and Builtins - Issue #17173: Remove uses of locale-dependent C functions (isalpha() etc.) in the interpreter. -- Issue #17137: When an Unicode string is resized, the internal wide character +- Issue #17137: When a Unicode string is resized, the internal wide character string (wstr) format is now cleared. - Issue #17043: The unicode-internal decoder no longer read past the end of diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index c343862b8cc..136abf59859 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -550,7 +550,7 @@ representation for a tournament. The numbers below are `k', not a[k]:\n\ \n\ \n\ In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\ -an usual binary tournament we see in sports, each cell is the winner\n\ +a usual binary tournament we see in sports, each cell is the winner\n\ over the two cells it tops, and we can trace the winner down the tree\n\ to see all opponents s/he had. However, in many computer applications\n\ of such tournaments, we do not need to trace the history of a winner.\n\ diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 1acf14ade87..78cfe203840 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1096,7 +1096,7 @@ _Unpickler_SkipConsumed(UnpicklerObject *self) return 0; assert(self->peek); /* otherwise we did something wrong */ - /* This makes an useless copy... */ + /* This makes a useless copy... */ r = PyObject_CallFunction(self->read, "n", consumed); if (r == NULL) return -1; diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c index 6327c95d2a6..60c1b063ac8 100644 --- a/Modules/_tracemalloc.c +++ b/Modules/_tracemalloc.c @@ -296,7 +296,7 @@ tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame) if (!PyUnicode_Check(filename)) { #ifdef TRACE_DEBUG - tracemalloc_error("filename is not an unicode string"); + tracemalloc_error("filename is not a unicode string"); #endif return; } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3ad0e9942b6..964ae62146b 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -520,7 +520,7 @@ _PyCode_ConstantKey(PyObject *op) return key; } else { - /* for other types, use the object identifier as an unique identifier + /* for other types, use the object identifier as a unique identifier * to ensure that they are seen as unequal. */ PyObject *obj_id = PyLong_FromVoidPtr(op); if (obj_id == NULL) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 230125b62ba..b146da952db 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -675,7 +675,7 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind, } #ifdef Py_DEBUG -/* Fill the data of an Unicode string with invalid characters to detect bugs +/* Fill the data of a Unicode string with invalid characters to detect bugs earlier. _PyUnicode_CheckConsistency(str, 1) detects invalid characters, at least for diff --git a/Python/codecs.c b/Python/codecs.c index d90bf7374d4..fe57d0dc42d 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -403,7 +403,7 @@ wrap_codec_error(const char *operation, operation, encoding); } -/* Encode an object (e.g. an Unicode object) using the given encoding +/* Encode an object (e.g. a Unicode object) using the given encoding and return the resulting encoded object (usually a Python string). errors is passed to the encoder factory as argument if non-NULL. */ @@ -450,7 +450,7 @@ _PyCodec_EncodeInternal(PyObject *object, } /* Decode an object (usually a Python string) using the given encoding - and return an equivalent object (e.g. an Unicode object). + and return an equivalent object (e.g. a Unicode object). errors is passed to the decoder factory as argument if non-NULL. */ From 20d325574eaa4f2a88036eac81e8d3cf9135372f Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Fri, 15 Apr 2016 00:56:21 +0000 Subject: [PATCH 2/2] Issue #15984: Correct PyUnicode_FromObject() and _FromEncodedObject() docs --- Doc/c-api/unicode.rst | 12 +++++++----- Include/unicodeobject.h | 24 ++++++++---------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 0c9ea8fc7c7..1ed81403b4f 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -556,14 +556,13 @@ APIs: .. c:function:: PyObject* PyUnicode_FromEncodedObject(PyObject *obj, \ const char *encoding, const char *errors) - Coerce an encoded object *obj* to a Unicode object and return a reference with - incremented refcount. + Decode an encoded object *obj* to a Unicode object. :class:`bytes`, :class:`bytearray` and other :term:`bytes-like objects ` are decoded according to the given *encoding* and using the error handling defined by *errors*. Both can be *NULL* to have the interface use the default - values (see the next section for details). + values (see :ref:`builtincodecs` for details). All other objects, including Unicode objects, cause a :exc:`TypeError` to be set. @@ -745,8 +744,11 @@ Extension modules can continue using them, as they will not be removed in Python .. c:function:: PyObject* PyUnicode_FromObject(PyObject *obj) - Shortcut for ``PyUnicode_FromEncodedObject(obj, NULL, "strict")`` which is used - throughout the interpreter whenever coercion to Unicode is needed. + Copy an instance of a Unicode subtype to a new true Unicode object if + necessary. If *obj* is already a true Unicode object (not a subtype), + return the reference with incremented refcount. + + Objects other than Unicode or its subtypes will cause a :exc:`TypeError`. Locale Encoding diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 4e8e3ec1012..ea4591d598f 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -844,17 +844,13 @@ PyAPI_FUNC(int) PyUnicode_Resize( Py_ssize_t length /* New length */ ); -/* Coerce obj to a Unicode object and return a reference with - *incremented* refcount. +/* Decode obj to an Unicode object. - Coercion is done in the following way: + bytes, bytearray and other bytes-like objects are decoded according to the + given encoding and error handler. The encoding and error handler can be + NULL to have the interface use UTF-8 and "strict". - 1. bytes, bytearray and other bytes-like objects are decoded - under the assumptions that they contain data using the UTF-8 - encoding. Decoding is done in "strict" mode. - - 2. All other objects (including Unicode objects) raise an - exception. + All other objects (including Unicode objects) raise an exception. The API returns NULL in case of an error. The caller is responsible for decref'ing the returned objects. @@ -867,13 +863,9 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( const char *errors /* error handling */ ); -/* Coerce obj to a Unicode object and return a reference with - *incremented* refcount. - - Unicode objects are passed back as-is (subclasses are converted to - true Unicode objects), all other objects are delegated to - PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in - using UTF-8 encoding as basis for decoding the object. +/* Copy an instance of a Unicode subtype to a new true Unicode object if + necessary. If obj is already a true Unicode object (not a subtype), return + the reference with *incremented* refcount. The API returns NULL in case of an error. The caller is responsible for decref'ing the returned objects.