import token import tokenize from typing import Dict, Iterator, List Mark = int # NewType('Mark', int) exact_token_types = token.EXACT_TOKEN_TYPES def shorttok(tok: tokenize.TokenInfo) -> str: return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" class Tokenizer: """Caching wrapper for the tokenize module. This is pretty tied to Python's syntax. """ _tokens: List[tokenize.TokenInfo] def __init__( self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False ): self._tokengen = tokengen self._tokens = [] self._index = 0 self._verbose = verbose self._lines: Dict[int, str] = {} self._path = path if verbose: self.report(False, False) def getnext(self) -> tokenize.TokenInfo: """Return the next token and updates the index.""" cached = not self._index == len(self._tokens) tok = self.peek() self._index += 1 if self._verbose: self.report(cached, False) return tok def peek(self) -> tokenize.TokenInfo: """Return the next token *without* updating the index.""" while self._index == len(self._tokens): tok = next(self._tokengen) if tok.type in (tokenize.NL, tokenize.COMMENT): continue if tok.type == token.ERRORTOKEN and tok.string.isspace(): continue if ( tok.type == token.NEWLINE and self._tokens and self._tokens[-1].type == token.NEWLINE ): continue self._tokens.append(tok) if not self._path: self._lines[tok.start[0]] = tok.line return self._tokens[self._index] def diagnose(self) -> tokenize.TokenInfo: if not self._tokens: self.getnext() return self._tokens[-1] def get_last_non_whitespace_token(self) -> tokenize.TokenInfo: for tok in reversed(self._tokens[: self._index]): if tok.type != tokenize.ENDMARKER and ( tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT ): break return tok def get_lines(self, line_numbers: List[int]) -> List[str]: """Retrieve source lines corresponding to line numbers.""" if self._lines: lines = self._lines else: n = len(line_numbers) lines = {} count = 0 seen = 0 with open(self._path) as f: for l in f: count += 1 if count in line_numbers: seen += 1 lines[count] = l if seen == n: break return [lines[n] for n in line_numbers] def mark(self) -> Mark: return self._index def reset(self, index: Mark) -> None: if index == self._index: return assert 0 <= index <= len(self._tokens), (index, len(self._tokens)) old_index = self._index self._index = index if self._verbose: self.report(True, index < old_index) def report(self, cached: bool, back: bool) -> None: if back: fill = "-" * self._index + "-" elif cached: fill = "-" * self._index + ">" else: fill = "-" * self._index + "*" if self._index == 0: print(f"{fill} (Bof)") else: tok = self._tokens[self._index - 1] print(f"{fill} {shorttok(tok)}")