bpo-12486: Document tokenize.generate_tokens() as public API (#6957)

* Document tokenize.generate_tokens() * Add news file * Add test for generate_tokens * Document behaviour around ENCODING token * Add generate_tokens to __all__
2018-06-05 19:26:39 +02:00 · 2018-06-05 19:26:39 +02:00 · c56b17bd8c
parent c2745d2d05
commit c56b17bd8c
4 changed files with 35 additions and 6 deletions
--- a/Doc/library/tokenize.rst
+++ b/Doc/library/tokenize.rst
@ -57,6 +57,16 @@ The primary entry point is a :term:`generator`:
   :func:`.tokenize` determines the source encoding of the file by looking for a
   UTF-8 BOM or encoding cookie, according to :pep:`263`.
 .. function:: generate_tokens(readline)
   Tokenize a source reading unicode strings instead of bytes.
   Like :func:`.tokenize`, the *readline* argument is a callable returning
   a single line of input. However, :func:`generate_tokens` expects *readline*
   to return a str object rather than bytes.
   The result is an iterator yielding named tuples, exactly like
   :func:`.tokenize`. It does not yield an :data:`~token.ENCODING` token.
 All constants from the :mod:`token` module are also exported from
 :mod:`tokenize`.
@ -79,7 +89,8 @@ write back the modified script.
    positions) may change.
    It returns bytes, encoded using the :data:`~token.ENCODING` token, which
-    is the first token sequence output by :func:`.tokenize`.
+    is the first token sequence output by :func:`.tokenize`. If there is no
    encoding token in the input, it returns a str instead.
 :func:`.tokenize` needs to detect the encoding of source files it tokenizes. The
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -1,8 +1,8 @@
 from test import support
 from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
                     STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
-                     open as tokenize_open, Untokenizer)
+                     open as tokenize_open, Untokenizer, generate_tokens)
-from io import BytesIO
+from io import BytesIO, StringIO
 import unittest
 from unittest import TestCase, mock
 from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
@ -919,6 +919,19 @@ async def f():
    DEDENT     ''            (7, 0) (7, 0)
    """)
 class GenerateTokensTest(TokenizeTest):
    def check_tokenize(self, s, expected):
        # Format the tokens in s in a table format.
        # The ENDMARKER is omitted.
        result = []
        f = StringIO(s)
        for type, token, start, end, line in generate_tokens(f.readline):
            if type == ENDMARKER:
                break
            type = tok_name[type]
            result.append(f"    {type:10} {token!r:13} {start} {end}")
        self.assertEqual(result, expected.rstrip().splitlines())
 def decistmt(s):
    result = []
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -37,7 +37,7 @@ cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
 import token
-__all__ = token.__all__ + ["tokenize", "detect_encoding",
+__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
                           "untokenize", "TokenInfo"]
 del token
@ -653,9 +653,12 @@ def _tokenize(readline, encoding):
    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
 # An undocumented, backwards compatible, API for all the places in the standard
 # library that expect to be able to use tokenize with strings
 def generate_tokens(readline):
    """Tokenize a source reading Python code as unicode strings.
    This has the same API as tokenize(), except that it expects the *readline*
    callable to return str objects instead of bytes.
    """
    return _tokenize(readline, None)
 def main():
--- a/Misc/NEWS.d/next/Library/2018-05-17-22-14-58.bpo-12486.HBeh62.rst
+++ b/Misc/NEWS.d/next/Library/2018-05-17-22-14-58.bpo-12486.HBeh62.rst
@ -0,0 +1,2 @@
 :func:`tokenize.generate_tokens` is now documented as a public API to
 tokenize unicode strings. It was previously present but undocumented.
		`@ -0,0 +1,2 @@`
							:func:`tokenize.generate_tokens` is now documented as a public API to
							`tokenize unicode strings. It was previously present but undocumented.`