reuse tokenize.detect_encoding in linecache instead of a custom solution

patch by Victor Stinner #4016
This commit is contained in:
Benjamin Peterson 2009-03-24 22:30:15 +00:00
parent a8abe86331
commit 9b8d24b17d
2 changed files with 8 additions and 23 deletions

View File

@ -7,7 +7,7 @@ that name.
import sys
import os
import re
import tokenize
__all__ = ["getline", "clearcache", "checkcache"]
@ -120,27 +120,11 @@ def updatecache(filename, module_globals=None):
pass
else:
# No luck
## print '*** Cannot stat', filename, ':', msg
return []
## print("Refreshing cache for %s..." % fullname)
try:
fp = open(fullname, 'rU')
with open(fullname, 'rb') as fp:
coding, line = tokenize.detect_encoding(fp.readline)
with open(fullname, 'r', encoding=coding) as fp:
lines = fp.readlines()
fp.close()
except Exception as msg:
## print '*** Cannot open', fullname, ':', msg
return []
coding = "utf-8"
for line in lines[:2]:
m = re.search(r"coding[:=]\s*([-\w.]+)", line)
if m:
coding = m.group(1)
break
try:
lines = [line if isinstance(line, str) else str(line, coding)
for line in lines]
except:
pass # Hope for the best
size, mtime = stat.st_size, stat.st_mtime
cache[filename] = size, mtime, lines, fullname
return lines

View File

@ -27,7 +27,6 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
import re, string, sys
from token import *
from codecs import lookup, BOM_UTF8
from itertools import chain, repeat
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
import token
@ -327,13 +326,15 @@ def tokenize(readline):
which tells you which encoding was used to decode the bytes stream.
"""
encoding, consumed = detect_encoding(readline)
def readline_generator():
def readline_generator(consumed):
for line in consumed:
yield line
while True:
try:
yield readline()
except StopIteration:
return
chained = chain(consumed, readline_generator())
chained = readline_generator(consumed)
return _tokenize(chained.__next__, encoding)