From 9b8d24b17de49813eb53d6f9a4d615bfac574d11 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Tue, 24 Mar 2009 22:30:15 +0000 Subject: [PATCH] reuse tokenize.detect_encoding in linecache instead of a custom solution patch by Victor Stinner #4016 --- Lib/linecache.py | 24 ++++-------------------- Lib/tokenize.py | 7 ++++--- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/Lib/linecache.py b/Lib/linecache.py index 6a9535ed1f4..51404e2698d 100644 --- a/Lib/linecache.py +++ b/Lib/linecache.py @@ -7,7 +7,7 @@ that name. import sys import os -import re +import tokenize __all__ = ["getline", "clearcache", "checkcache"] @@ -120,27 +120,11 @@ def updatecache(filename, module_globals=None): pass else: # No luck -## print '*** Cannot stat', filename, ':', msg return [] -## print("Refreshing cache for %s..." % fullname) - try: - fp = open(fullname, 'rU') + with open(fullname, 'rb') as fp: + coding, line = tokenize.detect_encoding(fp.readline) + with open(fullname, 'r', encoding=coding) as fp: lines = fp.readlines() - fp.close() - except Exception as msg: -## print '*** Cannot open', fullname, ':', msg - return [] - coding = "utf-8" - for line in lines[:2]: - m = re.search(r"coding[:=]\s*([-\w.]+)", line) - if m: - coding = m.group(1) - break - try: - lines = [line if isinstance(line, str) else str(line, coding) - for line in lines] - except: - pass # Hope for the best size, mtime = stat.st_size, stat.st_mtime cache[filename] = size, mtime, lines, fullname return lines diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 16c4f3f0298..4ff859d9d0d 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -27,7 +27,6 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' import re, string, sys from token import * from codecs import lookup, BOM_UTF8 -from itertools import chain, repeat cookie_re = re.compile("coding[:=]\s*([-\w.]+)") import token @@ -327,13 +326,15 @@ def tokenize(readline): which tells you which encoding was used to decode the bytes stream. """ encoding, consumed = detect_encoding(readline) - def readline_generator(): + def readline_generator(consumed): + for line in consumed: + yield line while True: try: yield readline() except StopIteration: return - chained = chain(consumed, readline_generator()) + chained = readline_generator(consumed) return _tokenize(chained.__next__, encoding)