Merged revisions 72494 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ................ r72494 | benjamin.peterson | 2009-05-08 20:01:14 -0500 (Fri, 08 May 2009) | 21 lines Merged revisions 72491-72493 via svnmerge from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r72491 | benjamin.peterson | 2009-05-08 19:33:27 -0500 (Fri, 08 May 2009) | 7 lines make 2to3 use unicode internally on 2.x This started out as a fix for #2660, but became this large refactoring when I realized the dire state this was in. 2to3 now uses tokenize.detect_encoding to decode the files correctly into unicode. ........ r72492 | benjamin.peterson | 2009-05-08 19:35:38 -0500 (Fri, 08 May 2009) | 1 line remove compat code ........ r72493 | benjamin.peterson | 2009-05-08 19:54:15 -0500 (Fri, 08 May 2009) | 1 line add a test for \r\n newlines ........ ................
This commit is contained in:
parent
b0ba27dff1
commit
d481e3d791
|
@ -123,7 +123,7 @@ class FixImports(fixer_base.BaseFix):
|
|||
import_mod = results.get("module_name")
|
||||
if import_mod:
|
||||
mod_name = import_mod.value
|
||||
new_name = self.mapping[mod_name]
|
||||
new_name = str(self.mapping[mod_name])
|
||||
import_mod.replace(Name(new_name, prefix=import_mod.get_prefix()))
|
||||
if "name_import" in results:
|
||||
# If it's not a "from x import x, y" or "import x as y" import,
|
||||
|
|
|
@ -19,5 +19,5 @@ class FixMethodattrs(fixer_base.BaseFix):
|
|||
|
||||
def transform(self, node, results):
|
||||
attr = results["attr"][0]
|
||||
new = MAP[attr.value]
|
||||
new = str(MAP[attr.value])
|
||||
attr.replace(Name(new, prefix=attr.get_prefix()))
|
||||
|
|
|
@ -65,5 +65,5 @@ class FixRenames(fixer_base.BaseFix):
|
|||
#import_mod = results.get("module")
|
||||
|
||||
if mod_name and attr_name:
|
||||
new_attr = LOOKUP[(mod_name.value, attr_name.value)]
|
||||
new_attr = str(LOOKUP[(mod_name.value, attr_name.value)])
|
||||
attr_name.replace(Name(new_attr, prefix=attr_name.get_prefix()))
|
||||
|
|
|
@ -56,7 +56,7 @@ class FixTypes(fixer_base.BaseFix):
|
|||
PATTERN = '|'.join(_pats)
|
||||
|
||||
def transform(self, node, results):
|
||||
new_value = _TYPE_MAPPING.get(results["name"].value)
|
||||
new_value = str(_TYPE_MAPPING.get(results["name"].value))
|
||||
if new_value:
|
||||
return Name(new_value, prefix=node.get_prefix())
|
||||
return None
|
||||
|
|
|
@ -23,7 +23,7 @@ class StdoutRefactoringTool(refactor.MultiprocessRefactoringTool):
|
|||
self.errors.append((msg, args, kwargs))
|
||||
self.logger.error(msg, *args, **kwargs)
|
||||
|
||||
def write_file(self, new_text, filename, old_text):
|
||||
def write_file(self, new_text, filename, old_text, encoding):
|
||||
if not self.nobackups:
|
||||
# Make backup
|
||||
backup = filename + ".bak"
|
||||
|
@ -37,8 +37,8 @@ class StdoutRefactoringTool(refactor.MultiprocessRefactoringTool):
|
|||
except os.error as err:
|
||||
self.log_message("Can't rename %s to %s", filename, backup)
|
||||
# Actually write the new file
|
||||
super(StdoutRefactoringTool, self).write_file(new_text,
|
||||
filename, old_text)
|
||||
write = super(StdoutRefactoringTool, self).write_file
|
||||
write(new_text, filename, old_text, encoding)
|
||||
if not self.nobackups:
|
||||
shutil.copymode(backup, filename)
|
||||
|
||||
|
|
|
@ -133,7 +133,7 @@ class PatternCompiler(object):
|
|||
assert len(nodes) >= 1
|
||||
node = nodes[0]
|
||||
if node.type == token.STRING:
|
||||
value = literals.evalString(node.value)
|
||||
value = str(literals.evalString(node.value))
|
||||
return pytree.LeafPattern(content=value)
|
||||
elif node.type == token.NAME:
|
||||
value = node.value
|
||||
|
|
|
@ -16,6 +16,7 @@ __author__ = "Guido van Rossum <guido@python.org>"
|
|||
__all__ = ["Driver", "load_grammar"]
|
||||
|
||||
# Python imports
|
||||
import codecs
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
|
@ -90,9 +91,9 @@ class Driver(object):
|
|||
"""Parse a stream and return the syntax tree."""
|
||||
return self.parse_stream_raw(stream, debug)
|
||||
|
||||
def parse_file(self, filename, debug=False):
|
||||
def parse_file(self, filename, encoding=None, debug=False):
|
||||
"""Parse a file and return the syntax tree."""
|
||||
stream = open(filename)
|
||||
stream = codecs.open(filename, "r", encoding)
|
||||
try:
|
||||
return self.parse_stream(stream, debug)
|
||||
finally:
|
||||
|
|
|
@ -30,6 +30,7 @@ __credits__ = \
|
|||
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
|
||||
|
||||
import string, re
|
||||
from codecs import BOM_UTF8, lookup
|
||||
from lib2to3.pgen2.token import *
|
||||
|
||||
from . import token
|
||||
|
@ -228,6 +229,75 @@ class Untokenizer:
|
|||
startline = False
|
||||
toks_append(tokval)
|
||||
|
||||
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||
|
||||
def detect_encoding(readline):
|
||||
"""
|
||||
The detect_encoding() function is used to detect the encoding that should
|
||||
be used to decode a Python source file. It requires one argment, readline,
|
||||
in the same way as the tokenize() generator.
|
||||
|
||||
It will call readline a maximum of twice, and return the encoding used
|
||||
(as a string) and a list of any lines (left as bytes) it has read
|
||||
in.
|
||||
|
||||
It detects the encoding from the presence of a utf-8 bom or an encoding
|
||||
cookie as specified in pep-0263. If both a bom and a cookie are present,
|
||||
but disagree, a SyntaxError will be raised. If the encoding cookie is an
|
||||
invalid charset, raise a SyntaxError.
|
||||
|
||||
If no encoding is specified, then the default of 'utf-8' will be returned.
|
||||
"""
|
||||
bom_found = False
|
||||
encoding = None
|
||||
def read_or_stop():
|
||||
try:
|
||||
return readline()
|
||||
except StopIteration:
|
||||
return b''
|
||||
|
||||
def find_cookie(line):
|
||||
try:
|
||||
line_string = line.decode('ascii')
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
|
||||
matches = cookie_re.findall(line_string)
|
||||
if not matches:
|
||||
return None
|
||||
encoding = matches[0]
|
||||
try:
|
||||
codec = lookup(encoding)
|
||||
except LookupError:
|
||||
# This behaviour mimics the Python interpreter
|
||||
raise SyntaxError("unknown encoding: " + encoding)
|
||||
|
||||
if bom_found and codec.name != 'utf-8':
|
||||
# This behaviour mimics the Python interpreter
|
||||
raise SyntaxError('encoding problem: utf-8')
|
||||
return encoding
|
||||
|
||||
first = read_or_stop()
|
||||
if first.startswith(BOM_UTF8):
|
||||
bom_found = True
|
||||
first = first[3:]
|
||||
if not first:
|
||||
return 'utf-8', []
|
||||
|
||||
encoding = find_cookie(first)
|
||||
if encoding:
|
||||
return encoding, [first]
|
||||
|
||||
second = read_or_stop()
|
||||
if not second:
|
||||
return 'utf-8', [first]
|
||||
|
||||
encoding = find_cookie(second)
|
||||
if encoding:
|
||||
return encoding, [first, second]
|
||||
|
||||
return 'utf-8', [first, second]
|
||||
|
||||
def untokenize(iterable):
|
||||
"""Transform tokens back into Python source code.
|
||||
|
||||
|
|
|
@ -216,6 +216,10 @@ class Base(object):
|
|||
return ""
|
||||
return next_sib.get_prefix()
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
def __str__(self):
|
||||
return str(self).encode("ascii")
|
||||
|
||||
|
||||
class Node(Base):
|
||||
|
||||
|
@ -245,7 +249,7 @@ class Node(Base):
|
|||
type_repr(self.type),
|
||||
self.children)
|
||||
|
||||
def __str__(self):
|
||||
def __unicode__(self):
|
||||
"""
|
||||
Return a pretty string representation.
|
||||
|
||||
|
@ -253,6 +257,9 @@ class Node(Base):
|
|||
"""
|
||||
return "".join(map(str, self.children))
|
||||
|
||||
if sys.version_info > (3, 0):
|
||||
__str__ = __unicode__
|
||||
|
||||
def _eq(self, other):
|
||||
"""Compare two nodes for equality."""
|
||||
return (self.type, self.children) == (other.type, other.children)
|
||||
|
@ -353,7 +360,7 @@ class Leaf(Base):
|
|||
self.type,
|
||||
self.value)
|
||||
|
||||
def __str__(self):
|
||||
def __unicode__(self):
|
||||
"""
|
||||
Return a pretty string representation.
|
||||
|
||||
|
@ -361,6 +368,9 @@ class Leaf(Base):
|
|||
"""
|
||||
return self.prefix + str(self.value)
|
||||
|
||||
if sys.version_info > (3, 0):
|
||||
__str__ = __unicode__
|
||||
|
||||
def _eq(self, other):
|
||||
"""Compare two nodes for equality."""
|
||||
return (self.type, self.value) == (other.type, other.value)
|
||||
|
|
|
@ -22,8 +22,7 @@ from collections import defaultdict
|
|||
from itertools import chain
|
||||
|
||||
# Local imports
|
||||
from .pgen2 import driver
|
||||
from .pgen2 import tokenize
|
||||
from .pgen2 import driver, tokenize
|
||||
|
||||
from . import pytree
|
||||
from . import patcomp
|
||||
|
@ -87,6 +86,25 @@ def get_fixers_from_package(pkg_name):
|
|||
return [pkg_name + "." + fix_name
|
||||
for fix_name in get_all_fix_names(pkg_name, False)]
|
||||
|
||||
def _identity(obj):
|
||||
return obj
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
import codecs
|
||||
_open_with_encoding = codecs.open
|
||||
# codecs.open doesn't translate newlines sadly.
|
||||
def _from_system_newlines(input):
|
||||
return input.replace("\r\n", "\n")
|
||||
def _to_system_newlines(input):
|
||||
if os.linesep != "\n":
|
||||
return input.replace("\n", os.linesep)
|
||||
else:
|
||||
return input
|
||||
else:
|
||||
_open_with_encoding = open
|
||||
_from_system_newlines = _identity
|
||||
_to_system_newlines = _identity
|
||||
|
||||
|
||||
class FixerError(Exception):
|
||||
"""A fixer could not be loaded."""
|
||||
|
@ -213,29 +231,42 @@ class RefactoringTool(object):
|
|||
# Modify dirnames in-place to remove subdirs with leading dots
|
||||
dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")]
|
||||
|
||||
def refactor_file(self, filename, write=False, doctests_only=False):
|
||||
"""Refactors a file."""
|
||||
def _read_python_source(self, filename):
|
||||
"""
|
||||
Do our best to decode a Python source file correctly.
|
||||
"""
|
||||
try:
|
||||
f = open(filename)
|
||||
f = open(filename, "rb")
|
||||
except IOError as err:
|
||||
self.log_error("Can't open %s: %s", filename, err)
|
||||
return
|
||||
return None, None
|
||||
try:
|
||||
input = f.read() + "\n" # Silence certain parse errors
|
||||
encoding = tokenize.detect_encoding(f.readline)[0]
|
||||
finally:
|
||||
f.close()
|
||||
with _open_with_encoding(filename, "r", encoding=encoding) as f:
|
||||
return _from_system_newlines(f.read()), encoding
|
||||
|
||||
def refactor_file(self, filename, write=False, doctests_only=False):
|
||||
"""Refactors a file."""
|
||||
input, encoding = self._read_python_source(filename)
|
||||
if input is None:
|
||||
# Reading the file failed.
|
||||
return
|
||||
input += "\n" # Silence certain parse errors
|
||||
if doctests_only:
|
||||
self.log_debug("Refactoring doctests in %s", filename)
|
||||
output = self.refactor_docstring(input, filename)
|
||||
if output != input:
|
||||
self.processed_file(output, filename, input, write=write)
|
||||
self.processed_file(output, filename, input, write, encoding)
|
||||
else:
|
||||
self.log_debug("No doctest changes in %s", filename)
|
||||
else:
|
||||
tree = self.refactor_string(input, filename)
|
||||
if tree and tree.was_changed:
|
||||
# The [:-1] is to take off the \n we added earlier
|
||||
self.processed_file(str(tree)[:-1], filename, write=write)
|
||||
self.processed_file(str(tree)[:-1], filename,
|
||||
write=write, encoding=encoding)
|
||||
else:
|
||||
self.log_debug("No changes in %s", filename)
|
||||
|
||||
|
@ -321,31 +352,26 @@ class RefactoringTool(object):
|
|||
node.replace(new)
|
||||
node = new
|
||||
|
||||
def processed_file(self, new_text, filename, old_text=None, write=False):
|
||||
def processed_file(self, new_text, filename, old_text=None, write=False,
|
||||
encoding=None):
|
||||
"""
|
||||
Called when a file has been refactored, and there are changes.
|
||||
"""
|
||||
self.files.append(filename)
|
||||
if old_text is None:
|
||||
try:
|
||||
f = open(filename, "r")
|
||||
except IOError as err:
|
||||
self.log_error("Can't read %s: %s", filename, err)
|
||||
old_text = self._read_python_source(filename)[0]
|
||||
if old_text is None:
|
||||
return
|
||||
try:
|
||||
old_text = f.read()
|
||||
finally:
|
||||
f.close()
|
||||
if old_text == new_text:
|
||||
self.log_debug("No changes to %s", filename)
|
||||
return
|
||||
self.print_output(diff_texts(old_text, new_text, filename))
|
||||
if write:
|
||||
self.write_file(new_text, filename, old_text)
|
||||
self.write_file(new_text, filename, old_text, encoding)
|
||||
else:
|
||||
self.log_debug("Not writing changes to %s", filename)
|
||||
|
||||
def write_file(self, new_text, filename, old_text):
|
||||
def write_file(self, new_text, filename, old_text, encoding=None):
|
||||
"""Writes a string to a file.
|
||||
|
||||
It first shows a unified diff between the old text and the new text, and
|
||||
|
@ -353,12 +379,12 @@ class RefactoringTool(object):
|
|||
set.
|
||||
"""
|
||||
try:
|
||||
f = open(filename, "w")
|
||||
f = _open_with_encoding(filename, "w", encoding=encoding)
|
||||
except os.error as err:
|
||||
self.log_error("Can't create %s: %s", filename, err)
|
||||
return
|
||||
try:
|
||||
f.write(new_text)
|
||||
f.write(_to_system_newlines(new_text))
|
||||
except os.error as err:
|
||||
self.log_error("Can't write %s: %s", filename, err)
|
||||
finally:
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
print "hi"
|
||||
|
||||
print "Like bad Windows newlines?"
|
|
@ -0,0 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
print(u'ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
|
||||
|
|
@ -9,12 +9,9 @@ import os.path
|
|||
import re
|
||||
from textwrap import dedent
|
||||
|
||||
#sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
# Local imports
|
||||
from .. import pytree
|
||||
from .. import refactor
|
||||
from ..pgen2 import driver
|
||||
from lib2to3 import pytree, refactor
|
||||
from lib2to3.pgen2 import driver
|
||||
|
||||
test_dir = os.path.dirname(__file__)
|
||||
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
|
||||
|
@ -25,12 +22,6 @@ driver = driver.Driver(grammar, convert=pytree.convert)
|
|||
def parse_string(string):
|
||||
return driver.parse_string(reformat(string), debug=True)
|
||||
|
||||
# Python 2.3's TestSuite is not iter()-able
|
||||
if sys.version_info < (2, 4):
|
||||
def TestSuite_iter(self):
|
||||
return iter(self._tests)
|
||||
unittest.TestSuite.__iter__ = TestSuite_iter
|
||||
|
||||
def run_all_tests(test_mod=None, tests=None):
|
||||
if tests is None:
|
||||
tests = unittest.TestLoader().loadTestsFromModule(test_mod)
|
||||
|
|
|
@ -28,7 +28,7 @@ class Test_all(support.TestCase):
|
|||
def test_all_project_files(self):
|
||||
for filepath in support.all_project_files():
|
||||
print("Fixing %s..." % filepath)
|
||||
self.refactor.refactor_string(open(filepath).read(), filepath)
|
||||
self.refactor.refactor_file(filepath)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -14,9 +14,9 @@ from .support import driver, test_dir
|
|||
|
||||
# Python imports
|
||||
import os
|
||||
import os.path
|
||||
|
||||
# Local imports
|
||||
from lib2to3.pgen2 import tokenize
|
||||
from ..pgen2.parse import ParseError
|
||||
|
||||
|
||||
|
@ -150,13 +150,25 @@ class TestParserIdempotency(support.TestCase):
|
|||
def test_all_project_files(self):
|
||||
for filepath in support.all_project_files():
|
||||
print("Parsing %s..." % filepath)
|
||||
tree = driver.parse_file(filepath, debug=True)
|
||||
if diff(filepath, tree):
|
||||
with open(filepath, "rb") as fp:
|
||||
encoding = tokenize.detect_encoding(fp.readline)[0]
|
||||
fp.seek(0)
|
||||
source = fp.read()
|
||||
if encoding:
|
||||
source = source.decode(encoding)
|
||||
tree = driver.parse_string(source)
|
||||
new = str(tree)
|
||||
if encoding:
|
||||
new = new.encode(encoding)
|
||||
if diff(filepath, new):
|
||||
self.fail("Idempotency failed: %s" % filepath)
|
||||
|
||||
|
||||
class TestLiterals(GrammarTest):
|
||||
|
||||
def validate(self, s):
|
||||
driver.parse_string(support.dedent(s) + "\n\n")
|
||||
|
||||
def test_multiline_bytes_literals(self):
|
||||
s = """
|
||||
md5test(b"\xaa" * 80,
|
||||
|
@ -185,10 +197,10 @@ class TestLiterals(GrammarTest):
|
|||
self.validate(s)
|
||||
|
||||
|
||||
def diff(fn, tree):
|
||||
def diff(fn, result):
|
||||
f = open("@", "w")
|
||||
try:
|
||||
f.write(str(tree))
|
||||
f.write(result)
|
||||
finally:
|
||||
f.close()
|
||||
try:
|
||||
|
|
|
@ -14,7 +14,8 @@ from lib2to3 import refactor, pygram, fixer_base
|
|||
from . import support
|
||||
|
||||
|
||||
FIXER_DIR = os.path.join(os.path.dirname(__file__), "data/fixers")
|
||||
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
|
||||
FIXER_DIR = os.path.join(TEST_DATA_DIR, "fixers")
|
||||
|
||||
sys.path.append(FIXER_DIR)
|
||||
try:
|
||||
|
@ -22,6 +23,8 @@ try:
|
|||
finally:
|
||||
sys.path.pop()
|
||||
|
||||
_2TO3_FIXERS = refactor.get_fixers_from_package("lib2to3.fixes")
|
||||
|
||||
class TestRefactoringTool(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
@ -121,19 +124,40 @@ class TestRefactoringTool(unittest.TestCase):
|
|||
+def cheese(): pass""".splitlines()
|
||||
self.assertEqual(diff_lines[:-1], expected)
|
||||
|
||||
def test_refactor_file(self):
|
||||
test_file = os.path.join(FIXER_DIR, "parrot_example.py")
|
||||
old_contents = open(test_file, "r").read()
|
||||
rt = self.rt()
|
||||
def check_file_refactoring(self, test_file, fixers=_2TO3_FIXERS):
|
||||
def read_file():
|
||||
with open(test_file, "rb") as fp:
|
||||
return fp.read()
|
||||
old_contents = read_file()
|
||||
rt = self.rt(fixers=fixers)
|
||||
|
||||
rt.refactor_file(test_file)
|
||||
self.assertEqual(old_contents, open(test_file, "r").read())
|
||||
self.assertEqual(old_contents, read_file())
|
||||
|
||||
rt.refactor_file(test_file, True)
|
||||
try:
|
||||
self.assertNotEqual(old_contents, open(test_file, "r").read())
|
||||
rt.refactor_file(test_file, True)
|
||||
self.assertNotEqual(old_contents, read_file())
|
||||
finally:
|
||||
open(test_file, "w").write(old_contents)
|
||||
with open(test_file, "wb") as fp:
|
||||
fp.write(old_contents)
|
||||
|
||||
def test_refactor_file(self):
|
||||
test_file = os.path.join(FIXER_DIR, "parrot_example.py")
|
||||
self.check_file_refactoring(test_file, _DEFAULT_FIXERS)
|
||||
|
||||
def test_file_encoding(self):
|
||||
fn = os.path.join(TEST_DATA_DIR, "different_encoding.py")
|
||||
self.check_file_refactoring(fn)
|
||||
|
||||
def test_crlf_newlines(self):
|
||||
old_sep = os.linesep
|
||||
os.linesep = "\r\n"
|
||||
try:
|
||||
fn = os.path.join(TEST_DATA_DIR, "crlf.py")
|
||||
fixes = refactor.get_fixers_from_package("lib2to3.fixes")
|
||||
self.check_file_refactoring(fn, fixes)
|
||||
finally:
|
||||
os.linesep = old_sep
|
||||
|
||||
def test_refactor_docstring(self):
|
||||
rt = self.rt()
|
||||
|
|
Loading…
Reference in New Issue