Merged revisions 72494 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

................
  r72494 | benjamin.peterson | 2009-05-08 20:01:14 -0500 (Fri, 08 May 2009) | 21 lines

  Merged revisions 72491-72493 via svnmerge from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3

  ........
    r72491 | benjamin.peterson | 2009-05-08 19:33:27 -0500 (Fri, 08 May 2009) | 7 lines

    make 2to3 use unicode internally on 2.x

    This started out as a fix for #2660, but became this large refactoring
    when I realized the dire state this was in. 2to3 now uses
    tokenize.detect_encoding to decode the files correctly into unicode.
  ........
    r72492 | benjamin.peterson | 2009-05-08 19:35:38 -0500 (Fri, 08 May 2009) | 1 line

    remove compat code
  ........
    r72493 | benjamin.peterson | 2009-05-08 19:54:15 -0500 (Fri, 08 May 2009) | 1 line

    add a test for \r\n newlines
  ........
................
This commit is contained in:
Benjamin Peterson 2009-05-09 19:42:23 +00:00
parent b0ba27dff1
commit d481e3d791
16 changed files with 201 additions and 60 deletions

View File

@ -123,7 +123,7 @@ class FixImports(fixer_base.BaseFix):
import_mod = results.get("module_name")
if import_mod:
mod_name = import_mod.value
new_name = self.mapping[mod_name]
new_name = str(self.mapping[mod_name])
import_mod.replace(Name(new_name, prefix=import_mod.get_prefix()))
if "name_import" in results:
# If it's not a "from x import x, y" or "import x as y" import,

View File

@ -19,5 +19,5 @@ class FixMethodattrs(fixer_base.BaseFix):
def transform(self, node, results):
attr = results["attr"][0]
new = MAP[attr.value]
new = str(MAP[attr.value])
attr.replace(Name(new, prefix=attr.get_prefix()))

View File

@ -65,5 +65,5 @@ class FixRenames(fixer_base.BaseFix):
#import_mod = results.get("module")
if mod_name and attr_name:
new_attr = LOOKUP[(mod_name.value, attr_name.value)]
new_attr = str(LOOKUP[(mod_name.value, attr_name.value)])
attr_name.replace(Name(new_attr, prefix=attr_name.get_prefix()))

View File

@ -56,7 +56,7 @@ class FixTypes(fixer_base.BaseFix):
PATTERN = '|'.join(_pats)
def transform(self, node, results):
new_value = _TYPE_MAPPING.get(results["name"].value)
new_value = str(_TYPE_MAPPING.get(results["name"].value))
if new_value:
return Name(new_value, prefix=node.get_prefix())
return None

View File

@ -23,7 +23,7 @@ class StdoutRefactoringTool(refactor.MultiprocessRefactoringTool):
self.errors.append((msg, args, kwargs))
self.logger.error(msg, *args, **kwargs)
def write_file(self, new_text, filename, old_text):
def write_file(self, new_text, filename, old_text, encoding):
if not self.nobackups:
# Make backup
backup = filename + ".bak"
@ -37,8 +37,8 @@ class StdoutRefactoringTool(refactor.MultiprocessRefactoringTool):
except os.error as err:
self.log_message("Can't rename %s to %s", filename, backup)
# Actually write the new file
super(StdoutRefactoringTool, self).write_file(new_text,
filename, old_text)
write = super(StdoutRefactoringTool, self).write_file
write(new_text, filename, old_text, encoding)
if not self.nobackups:
shutil.copymode(backup, filename)

View File

@ -133,7 +133,7 @@ class PatternCompiler(object):
assert len(nodes) >= 1
node = nodes[0]
if node.type == token.STRING:
value = literals.evalString(node.value)
value = str(literals.evalString(node.value))
return pytree.LeafPattern(content=value)
elif node.type == token.NAME:
value = node.value

View File

@ -16,6 +16,7 @@ __author__ = "Guido van Rossum <guido@python.org>"
__all__ = ["Driver", "load_grammar"]
# Python imports
import codecs
import os
import logging
import sys
@ -90,9 +91,9 @@ class Driver(object):
"""Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug)
def parse_file(self, filename, debug=False):
def parse_file(self, filename, encoding=None, debug=False):
"""Parse a file and return the syntax tree."""
stream = open(filename)
stream = codecs.open(filename, "r", encoding)
try:
return self.parse_stream(stream, debug)
finally:

View File

@ -30,6 +30,7 @@ __credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
import string, re
from codecs import BOM_UTF8, lookup
from lib2to3.pgen2.token import *
from . import token
@ -228,6 +229,75 @@ class Untokenizer:
startline = False
toks_append(tokval)
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
be used to decode a Python source file. It requires one argment, readline,
in the same way as the tokenize() generator.
It will call readline a maximum of twice, and return the encoding used
(as a string) and a list of any lines (left as bytes) it has read
in.
It detects the encoding from the presence of a utf-8 bom or an encoding
cookie as specified in pep-0263. If both a bom and a cookie are present,
but disagree, a SyntaxError will be raised. If the encoding cookie is an
invalid charset, raise a SyntaxError.
If no encoding is specified, then the default of 'utf-8' will be returned.
"""
bom_found = False
encoding = None
def read_or_stop():
try:
return readline()
except StopIteration:
return b''
def find_cookie(line):
try:
line_string = line.decode('ascii')
except UnicodeDecodeError:
return None
matches = cookie_re.findall(line_string)
if not matches:
return None
encoding = matches[0]
try:
codec = lookup(encoding)
except LookupError:
# This behaviour mimics the Python interpreter
raise SyntaxError("unknown encoding: " + encoding)
if bom_found and codec.name != 'utf-8':
# This behaviour mimics the Python interpreter
raise SyntaxError('encoding problem: utf-8')
return encoding
first = read_or_stop()
if first.startswith(BOM_UTF8):
bom_found = True
first = first[3:]
if not first:
return 'utf-8', []
encoding = find_cookie(first)
if encoding:
return encoding, [first]
second = read_or_stop()
if not second:
return 'utf-8', [first]
encoding = find_cookie(second)
if encoding:
return encoding, [first, second]
return 'utf-8', [first, second]
def untokenize(iterable):
"""Transform tokens back into Python source code.

View File

@ -216,6 +216,10 @@ class Base(object):
return ""
return next_sib.get_prefix()
if sys.version_info < (3, 0):
def __str__(self):
return str(self).encode("ascii")
class Node(Base):
@ -245,7 +249,7 @@ class Node(Base):
type_repr(self.type),
self.children)
def __str__(self):
def __unicode__(self):
"""
Return a pretty string representation.
@ -253,6 +257,9 @@ class Node(Base):
"""
return "".join(map(str, self.children))
if sys.version_info > (3, 0):
__str__ = __unicode__
def _eq(self, other):
"""Compare two nodes for equality."""
return (self.type, self.children) == (other.type, other.children)
@ -353,7 +360,7 @@ class Leaf(Base):
self.type,
self.value)
def __str__(self):
def __unicode__(self):
"""
Return a pretty string representation.
@ -361,6 +368,9 @@ class Leaf(Base):
"""
return self.prefix + str(self.value)
if sys.version_info > (3, 0):
__str__ = __unicode__
def _eq(self, other):
"""Compare two nodes for equality."""
return (self.type, self.value) == (other.type, other.value)

View File

@ -22,8 +22,7 @@ from collections import defaultdict
from itertools import chain
# Local imports
from .pgen2 import driver
from .pgen2 import tokenize
from .pgen2 import driver, tokenize
from . import pytree
from . import patcomp
@ -87,6 +86,25 @@ def get_fixers_from_package(pkg_name):
return [pkg_name + "." + fix_name
for fix_name in get_all_fix_names(pkg_name, False)]
def _identity(obj):
return obj
if sys.version_info < (3, 0):
import codecs
_open_with_encoding = codecs.open
# codecs.open doesn't translate newlines sadly.
def _from_system_newlines(input):
return input.replace("\r\n", "\n")
def _to_system_newlines(input):
if os.linesep != "\n":
return input.replace("\n", os.linesep)
else:
return input
else:
_open_with_encoding = open
_from_system_newlines = _identity
_to_system_newlines = _identity
class FixerError(Exception):
"""A fixer could not be loaded."""
@ -213,29 +231,42 @@ class RefactoringTool(object):
# Modify dirnames in-place to remove subdirs with leading dots
dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")]
def refactor_file(self, filename, write=False, doctests_only=False):
"""Refactors a file."""
def _read_python_source(self, filename):
"""
Do our best to decode a Python source file correctly.
"""
try:
f = open(filename)
f = open(filename, "rb")
except IOError as err:
self.log_error("Can't open %s: %s", filename, err)
return
return None, None
try:
input = f.read() + "\n" # Silence certain parse errors
encoding = tokenize.detect_encoding(f.readline)[0]
finally:
f.close()
with _open_with_encoding(filename, "r", encoding=encoding) as f:
return _from_system_newlines(f.read()), encoding
def refactor_file(self, filename, write=False, doctests_only=False):
"""Refactors a file."""
input, encoding = self._read_python_source(filename)
if input is None:
# Reading the file failed.
return
input += "\n" # Silence certain parse errors
if doctests_only:
self.log_debug("Refactoring doctests in %s", filename)
output = self.refactor_docstring(input, filename)
if output != input:
self.processed_file(output, filename, input, write=write)
self.processed_file(output, filename, input, write, encoding)
else:
self.log_debug("No doctest changes in %s", filename)
else:
tree = self.refactor_string(input, filename)
if tree and tree.was_changed:
# The [:-1] is to take off the \n we added earlier
self.processed_file(str(tree)[:-1], filename, write=write)
self.processed_file(str(tree)[:-1], filename,
write=write, encoding=encoding)
else:
self.log_debug("No changes in %s", filename)
@ -321,31 +352,26 @@ class RefactoringTool(object):
node.replace(new)
node = new
def processed_file(self, new_text, filename, old_text=None, write=False):
def processed_file(self, new_text, filename, old_text=None, write=False,
encoding=None):
"""
Called when a file has been refactored, and there are changes.
"""
self.files.append(filename)
if old_text is None:
try:
f = open(filename, "r")
except IOError as err:
self.log_error("Can't read %s: %s", filename, err)
old_text = self._read_python_source(filename)[0]
if old_text is None:
return
try:
old_text = f.read()
finally:
f.close()
if old_text == new_text:
self.log_debug("No changes to %s", filename)
return
self.print_output(diff_texts(old_text, new_text, filename))
if write:
self.write_file(new_text, filename, old_text)
self.write_file(new_text, filename, old_text, encoding)
else:
self.log_debug("Not writing changes to %s", filename)
def write_file(self, new_text, filename, old_text):
def write_file(self, new_text, filename, old_text, encoding=None):
"""Writes a string to a file.
It first shows a unified diff between the old text and the new text, and
@ -353,12 +379,12 @@ class RefactoringTool(object):
set.
"""
try:
f = open(filename, "w")
f = _open_with_encoding(filename, "w", encoding=encoding)
except os.error as err:
self.log_error("Can't create %s: %s", filename, err)
return
try:
f.write(new_text)
f.write(_to_system_newlines(new_text))
except os.error as err:
self.log_error("Can't write %s: %s", filename, err)
finally:

View File

@ -0,0 +1,3 @@
print "hi"
print "Like bad Windows newlines?"

View File

@ -0,0 +1,4 @@
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
print(u'ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')

View File

@ -9,12 +9,9 @@ import os.path
import re
from textwrap import dedent
#sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
# Local imports
from .. import pytree
from .. import refactor
from ..pgen2 import driver
from lib2to3 import pytree, refactor
from lib2to3.pgen2 import driver
test_dir = os.path.dirname(__file__)
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
@ -25,12 +22,6 @@ driver = driver.Driver(grammar, convert=pytree.convert)
def parse_string(string):
return driver.parse_string(reformat(string), debug=True)
# Python 2.3's TestSuite is not iter()-able
if sys.version_info < (2, 4):
def TestSuite_iter(self):
return iter(self._tests)
unittest.TestSuite.__iter__ = TestSuite_iter
def run_all_tests(test_mod=None, tests=None):
if tests is None:
tests = unittest.TestLoader().loadTestsFromModule(test_mod)

View File

@ -28,7 +28,7 @@ class Test_all(support.TestCase):
def test_all_project_files(self):
for filepath in support.all_project_files():
print("Fixing %s..." % filepath)
self.refactor.refactor_string(open(filepath).read(), filepath)
self.refactor.refactor_file(filepath)
if __name__ == "__main__":

View File

@ -14,9 +14,9 @@ from .support import driver, test_dir
# Python imports
import os
import os.path
# Local imports
from lib2to3.pgen2 import tokenize
from ..pgen2.parse import ParseError
@ -150,13 +150,25 @@ class TestParserIdempotency(support.TestCase):
def test_all_project_files(self):
for filepath in support.all_project_files():
print("Parsing %s..." % filepath)
tree = driver.parse_file(filepath, debug=True)
if diff(filepath, tree):
with open(filepath, "rb") as fp:
encoding = tokenize.detect_encoding(fp.readline)[0]
fp.seek(0)
source = fp.read()
if encoding:
source = source.decode(encoding)
tree = driver.parse_string(source)
new = str(tree)
if encoding:
new = new.encode(encoding)
if diff(filepath, new):
self.fail("Idempotency failed: %s" % filepath)
class TestLiterals(GrammarTest):
def validate(self, s):
driver.parse_string(support.dedent(s) + "\n\n")
def test_multiline_bytes_literals(self):
s = """
md5test(b"\xaa" * 80,
@ -185,10 +197,10 @@ class TestLiterals(GrammarTest):
self.validate(s)
def diff(fn, tree):
def diff(fn, result):
f = open("@", "w")
try:
f.write(str(tree))
f.write(result)
finally:
f.close()
try:

View File

@ -14,7 +14,8 @@ from lib2to3 import refactor, pygram, fixer_base
from . import support
FIXER_DIR = os.path.join(os.path.dirname(__file__), "data/fixers")
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
FIXER_DIR = os.path.join(TEST_DATA_DIR, "fixers")
sys.path.append(FIXER_DIR)
try:
@ -22,6 +23,8 @@ try:
finally:
sys.path.pop()
_2TO3_FIXERS = refactor.get_fixers_from_package("lib2to3.fixes")
class TestRefactoringTool(unittest.TestCase):
def setUp(self):
@ -121,19 +124,40 @@ class TestRefactoringTool(unittest.TestCase):
+def cheese(): pass""".splitlines()
self.assertEqual(diff_lines[:-1], expected)
def test_refactor_file(self):
test_file = os.path.join(FIXER_DIR, "parrot_example.py")
old_contents = open(test_file, "r").read()
rt = self.rt()
def check_file_refactoring(self, test_file, fixers=_2TO3_FIXERS):
def read_file():
with open(test_file, "rb") as fp:
return fp.read()
old_contents = read_file()
rt = self.rt(fixers=fixers)
rt.refactor_file(test_file)
self.assertEqual(old_contents, open(test_file, "r").read())
self.assertEqual(old_contents, read_file())
rt.refactor_file(test_file, True)
try:
self.assertNotEqual(old_contents, open(test_file, "r").read())
rt.refactor_file(test_file, True)
self.assertNotEqual(old_contents, read_file())
finally:
open(test_file, "w").write(old_contents)
with open(test_file, "wb") as fp:
fp.write(old_contents)
def test_refactor_file(self):
test_file = os.path.join(FIXER_DIR, "parrot_example.py")
self.check_file_refactoring(test_file, _DEFAULT_FIXERS)
def test_file_encoding(self):
fn = os.path.join(TEST_DATA_DIR, "different_encoding.py")
self.check_file_refactoring(fn)
def test_crlf_newlines(self):
old_sep = os.linesep
os.linesep = "\r\n"
try:
fn = os.path.join(TEST_DATA_DIR, "crlf.py")
fixes = refactor.get_fixers_from_package("lib2to3.fixes")
self.check_file_refactoring(fn, fixes)
finally:
os.linesep = old_sep
def test_refactor_docstring(self):
rt = self.rt()