bpo-12486: Document tokenize.generate_tokens() as public API (#6957)
* Document tokenize.generate_tokens() * Add news file * Add test for generate_tokens * Document behaviour around ENCODING token * Add generate_tokens to __all__
This commit is contained in:
parent
c2745d2d05
commit
c56b17bd8c
|
@ -57,6 +57,16 @@ The primary entry point is a :term:`generator`:
|
|||
:func:`.tokenize` determines the source encoding of the file by looking for a
|
||||
UTF-8 BOM or encoding cookie, according to :pep:`263`.
|
||||
|
||||
.. function:: generate_tokens(readline)
|
||||
|
||||
Tokenize a source reading unicode strings instead of bytes.
|
||||
|
||||
Like :func:`.tokenize`, the *readline* argument is a callable returning
|
||||
a single line of input. However, :func:`generate_tokens` expects *readline*
|
||||
to return a str object rather than bytes.
|
||||
|
||||
The result is an iterator yielding named tuples, exactly like
|
||||
:func:`.tokenize`. It does not yield an :data:`~token.ENCODING` token.
|
||||
|
||||
All constants from the :mod:`token` module are also exported from
|
||||
:mod:`tokenize`.
|
||||
|
@ -79,7 +89,8 @@ write back the modified script.
|
|||
positions) may change.
|
||||
|
||||
It returns bytes, encoded using the :data:`~token.ENCODING` token, which
|
||||
is the first token sequence output by :func:`.tokenize`.
|
||||
is the first token sequence output by :func:`.tokenize`. If there is no
|
||||
encoding token in the input, it returns a str instead.
|
||||
|
||||
|
||||
:func:`.tokenize` needs to detect the encoding of source files it tokenizes. The
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from test import support
|
||||
from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
|
||||
STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
|
||||
open as tokenize_open, Untokenizer)
|
||||
from io import BytesIO
|
||||
open as tokenize_open, Untokenizer, generate_tokens)
|
||||
from io import BytesIO, StringIO
|
||||
import unittest
|
||||
from unittest import TestCase, mock
|
||||
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
||||
|
@ -919,6 +919,19 @@ async def f():
|
|||
DEDENT '' (7, 0) (7, 0)
|
||||
""")
|
||||
|
||||
class GenerateTokensTest(TokenizeTest):
|
||||
def check_tokenize(self, s, expected):
|
||||
# Format the tokens in s in a table format.
|
||||
# The ENDMARKER is omitted.
|
||||
result = []
|
||||
f = StringIO(s)
|
||||
for type, token, start, end, line in generate_tokens(f.readline):
|
||||
if type == ENDMARKER:
|
||||
break
|
||||
type = tok_name[type]
|
||||
result.append(f" {type:10} {token!r:13} {start} {end}")
|
||||
self.assertEqual(result, expected.rstrip().splitlines())
|
||||
|
||||
|
||||
def decistmt(s):
|
||||
result = []
|
||||
|
|
|
@ -37,7 +37,7 @@ cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
|
|||
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
|
||||
|
||||
import token
|
||||
__all__ = token.__all__ + ["tokenize", "detect_encoding",
|
||||
__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
|
||||
"untokenize", "TokenInfo"]
|
||||
del token
|
||||
|
||||
|
@ -653,9 +653,12 @@ def _tokenize(readline, encoding):
|
|||
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
|
||||
|
||||
|
||||
# An undocumented, backwards compatible, API for all the places in the standard
|
||||
# library that expect to be able to use tokenize with strings
|
||||
def generate_tokens(readline):
|
||||
"""Tokenize a source reading Python code as unicode strings.
|
||||
|
||||
This has the same API as tokenize(), except that it expects the *readline*
|
||||
callable to return str objects instead of bytes.
|
||||
"""
|
||||
return _tokenize(readline, None)
|
||||
|
||||
def main():
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
:func:`tokenize.generate_tokens` is now documented as a public API to
|
||||
tokenize unicode strings. It was previously present but undocumented.
|
Loading…
Reference in New Issue