2020-04-22 19:29:27 -03:00
|
|
|
import token
|
|
|
|
import tokenize
|
2021-08-12 13:37:30 -03:00
|
|
|
from typing import Dict, Iterator, List
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
Mark = int # NewType('Mark', int)
|
|
|
|
|
2021-08-12 13:37:30 -03:00
|
|
|
exact_token_types = token.EXACT_TOKEN_TYPES
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
|
|
|
|
def shorttok(tok: tokenize.TokenInfo) -> str:
|
|
|
|
return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
|
|
|
|
|
|
|
|
|
|
|
|
class Tokenizer:
|
|
|
|
"""Caching wrapper for the tokenize module.
|
|
|
|
|
|
|
|
This is pretty tied to Python's syntax.
|
|
|
|
"""
|
|
|
|
|
|
|
|
_tokens: List[tokenize.TokenInfo]
|
|
|
|
|
2021-08-12 13:37:30 -03:00
|
|
|
def __init__(
|
|
|
|
self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
|
|
|
|
):
|
2020-04-22 19:29:27 -03:00
|
|
|
self._tokengen = tokengen
|
|
|
|
self._tokens = []
|
|
|
|
self._index = 0
|
|
|
|
self._verbose = verbose
|
2021-08-12 13:37:30 -03:00
|
|
|
self._lines: Dict[int, str] = {}
|
|
|
|
self._path = path
|
2020-04-22 19:29:27 -03:00
|
|
|
if verbose:
|
|
|
|
self.report(False, False)
|
|
|
|
|
|
|
|
def getnext(self) -> tokenize.TokenInfo:
|
|
|
|
"""Return the next token and updates the index."""
|
2021-08-12 13:37:30 -03:00
|
|
|
cached = not self._index == len(self._tokens)
|
|
|
|
tok = self.peek()
|
2020-04-22 19:29:27 -03:00
|
|
|
self._index += 1
|
|
|
|
if self._verbose:
|
|
|
|
self.report(cached, False)
|
|
|
|
return tok
|
|
|
|
|
|
|
|
def peek(self) -> tokenize.TokenInfo:
|
|
|
|
"""Return the next token *without* updating the index."""
|
|
|
|
while self._index == len(self._tokens):
|
|
|
|
tok = next(self._tokengen)
|
|
|
|
if tok.type in (tokenize.NL, tokenize.COMMENT):
|
|
|
|
continue
|
|
|
|
if tok.type == token.ERRORTOKEN and tok.string.isspace():
|
|
|
|
continue
|
2021-08-12 13:37:30 -03:00
|
|
|
if (
|
|
|
|
tok.type == token.NEWLINE
|
|
|
|
and self._tokens
|
|
|
|
and self._tokens[-1].type == token.NEWLINE
|
|
|
|
):
|
|
|
|
continue
|
2020-04-22 19:29:27 -03:00
|
|
|
self._tokens.append(tok)
|
2021-08-12 13:37:30 -03:00
|
|
|
if not self._path:
|
|
|
|
self._lines[tok.start[0]] = tok.line
|
2020-04-22 19:29:27 -03:00
|
|
|
return self._tokens[self._index]
|
|
|
|
|
|
|
|
def diagnose(self) -> tokenize.TokenInfo:
|
|
|
|
if not self._tokens:
|
|
|
|
self.getnext()
|
|
|
|
return self._tokens[-1]
|
|
|
|
|
2021-08-12 13:37:30 -03:00
|
|
|
def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
|
|
|
|
for tok in reversed(self._tokens[: self._index]):
|
|
|
|
if tok.type != tokenize.ENDMARKER and (
|
|
|
|
tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
|
|
|
|
):
|
|
|
|
break
|
|
|
|
return tok
|
|
|
|
|
|
|
|
def get_lines(self, line_numbers: List[int]) -> List[str]:
|
|
|
|
"""Retrieve source lines corresponding to line numbers."""
|
|
|
|
if self._lines:
|
|
|
|
lines = self._lines
|
|
|
|
else:
|
|
|
|
n = len(line_numbers)
|
|
|
|
lines = {}
|
|
|
|
count = 0
|
|
|
|
seen = 0
|
|
|
|
with open(self._path) as f:
|
|
|
|
for l in f:
|
|
|
|
count += 1
|
|
|
|
if count in line_numbers:
|
|
|
|
seen += 1
|
|
|
|
lines[count] = l
|
|
|
|
if seen == n:
|
|
|
|
break
|
|
|
|
|
|
|
|
return [lines[n] for n in line_numbers]
|
|
|
|
|
2020-04-22 19:29:27 -03:00
|
|
|
def mark(self) -> Mark:
|
|
|
|
return self._index
|
|
|
|
|
|
|
|
def reset(self, index: Mark) -> None:
|
|
|
|
if index == self._index:
|
|
|
|
return
|
|
|
|
assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
|
|
|
|
old_index = self._index
|
|
|
|
self._index = index
|
|
|
|
if self._verbose:
|
|
|
|
self.report(True, index < old_index)
|
|
|
|
|
|
|
|
def report(self, cached: bool, back: bool) -> None:
|
|
|
|
if back:
|
|
|
|
fill = "-" * self._index + "-"
|
|
|
|
elif cached:
|
|
|
|
fill = "-" * self._index + ">"
|
|
|
|
else:
|
|
|
|
fill = "-" * self._index + "*"
|
|
|
|
if self._index == 0:
|
|
|
|
print(f"{fill} (Bof)")
|
|
|
|
else:
|
|
|
|
tok = self._tokens[self._index - 1]
|
|
|
|
print(f"{fill} {shorttok(tok)}")
|