import token import tokenize from typing import List, Iterator Mark = int # NewType('Mark', int) exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore def shorttok(tok: tokenize.TokenInfo) -> str: return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" class Tokenizer: """Caching wrapper for the tokenize module. This is pretty tied to Python's syntax. """ _tokens: List[tokenize.TokenInfo] def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False): self._tokengen = tokengen self._tokens = [] self._index = 0 self._verbose = verbose if verbose: self.report(False, False) def getnext(self) -> tokenize.TokenInfo: """Return the next token and updates the index.""" cached = True while self._index == len(self._tokens): tok = next(self._tokengen) if tok.type in (tokenize.NL, tokenize.COMMENT): continue if tok.type == token.ERRORTOKEN and tok.string.isspace(): continue self._tokens.append(tok) cached = False tok = self._tokens[self._index] self._index += 1 if self._verbose: self.report(cached, False) return tok def peek(self) -> tokenize.TokenInfo: """Return the next token *without* updating the index.""" while self._index == len(self._tokens): tok = next(self._tokengen) if tok.type in (tokenize.NL, tokenize.COMMENT): continue if tok.type == token.ERRORTOKEN and tok.string.isspace(): continue self._tokens.append(tok) return self._tokens[self._index] def diagnose(self) -> tokenize.TokenInfo: if not self._tokens: self.getnext() return self._tokens[-1] def mark(self) -> Mark: return self._index def reset(self, index: Mark) -> None: if index == self._index: return assert 0 <= index <= len(self._tokens), (index, len(self._tokens)) old_index = self._index self._index = index if self._verbose: self.report(True, index < old_index) def report(self, cached: bool, back: bool) -> None: if back: fill = "-" * self._index + "-" elif cached: fill = "-" * self._index + ">" else: fill = "-" * self._index + "*" if self._index == 0: print(f"{fill} (Bof)") else: tok = self._tokens[self._index - 1] print(f"{fill} {shorttok(tok)}")