cpython/Lib/lib2to3/patcomp.py

202 lines
6.8 KiB
Python
Raw Normal View History

2010-02-05 22:40:03 -04:00
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Pattern compiler.
The grammer is taken from PatternGrammar.txt.
The compiler compiles a pattern to a pytree.*Pattern instance.
"""
__author__ = "Guido van Rossum <guido@python.org>"
# Python imports
import os
# Fairly local imports
from .pgen2 import driver, literals, token, tokenize, parse, grammar
# Really local imports
from . import pytree
from . import pygram
# The pattern grammar file
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
"PatternGrammar.txt")
class PatternSyntaxError(Exception):
pass
def tokenize_wrapper(input):
"""Tokenizes a string suppressing significant whitespace."""
skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
for quintuple in tokens:
type, value, start, end, line_text = quintuple
if type not in skip:
yield quintuple
class PatternCompiler(object):
def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
"""Initializer.
Takes an optional alternative filename for the pattern grammar.
"""
self.grammar = driver.load_grammar(grammar_file)
self.syms = pygram.Symbols(self.grammar)
self.pygrammar = pygram.python_grammar
self.pysyms = pygram.python_symbols
self.driver = driver.Driver(self.grammar, convert=pattern_convert)
def compile_pattern(self, input, debug=False):
"""Compiles a pattern string to a nested pytree.*Pattern object."""
tokens = tokenize_wrapper(input)
try:
root = self.driver.parse_tokens(tokens, debug=debug)
Merged revisions 80934 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ................ r80934 | benjamin.peterson | 2010-05-07 13:58:23 -0500 (Fri, 07 May 2010) | 69 lines Merged revisions 79911,79916-79917,80018,80418,80572-80573,80635-80639,80668,80922 via svnmerge from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r79911 | benjamin.peterson | 2010-04-09 15:38:53 -0500 (Fri, 09 Apr 2010) | 1 line use absolute import ........ r79916 | benjamin.peterson | 2010-04-09 16:05:21 -0500 (Fri, 09 Apr 2010) | 1 line generalize detection of __future__ imports and attach them to the tree ........ r79917 | benjamin.peterson | 2010-04-09 16:11:44 -0500 (Fri, 09 Apr 2010) | 1 line don't try to 'fix' relative imports when absolute_import is enabled #8858 ........ r80018 | benjamin.peterson | 2010-04-12 16:12:12 -0500 (Mon, 12 Apr 2010) | 4 lines prevent diffs from being mangled is multiprocess mode #6409 Patch by George Boutsioukis. ........ r80418 | benjamin.peterson | 2010-04-23 16:00:03 -0500 (Fri, 23 Apr 2010) | 1 line remove unhelpful description ........ r80572 | benjamin.peterson | 2010-04-27 20:33:54 -0500 (Tue, 27 Apr 2010) | 1 line use unicode literals ........ r80573 | jeffrey.yasskin | 2010-04-27 23:08:27 -0500 (Tue, 27 Apr 2010) | 6 lines Don't transform imports that are already relative. 2to3 turned from . import refactor into from .. import refactor which broke the transformation of 2to3 itself. ........ r80635 | benjamin.peterson | 2010-04-29 16:02:23 -0500 (Thu, 29 Apr 2010) | 1 line remove imports ........ r80636 | benjamin.peterson | 2010-04-29 16:02:41 -0500 (Thu, 29 Apr 2010) | 1 line unicode literal ........ r80637 | benjamin.peterson | 2010-04-29 16:03:42 -0500 (Thu, 29 Apr 2010) | 1 line must pass a string to Number ........ r80638 | benjamin.peterson | 2010-04-29 16:05:34 -0500 (Thu, 29 Apr 2010) | 1 line unicode literals ........ r80639 | benjamin.peterson | 2010-04-29 16:06:09 -0500 (Thu, 29 Apr 2010) | 1 line pass string to Number ........ r80668 | jeffrey.yasskin | 2010-04-30 18:02:47 -0500 (Fri, 30 Apr 2010) | 4 lines Make 2to3 run under Python 2.5 so that the benchmark suite at http://hg.python.org/benchmarks/ can use it and still run on implementations that haven't gotten to 2.6 yet. Fixes issue 8566. ........ r80922 | benjamin.peterson | 2010-05-07 11:06:25 -0500 (Fri, 07 May 2010) | 1 line prevent xrange transformation from wrapping range calls it produces in list ........ ................
2010-05-07 16:06:32 -03:00
except parse.ParseError, e:
2010-02-05 22:40:03 -04:00
raise PatternSyntaxError(str(e))
return self.compile_node(root)
def compile_node(self, node):
"""Compiles a node, recursively.
This is one big switch on the node type.
"""
# XXX Optimize certain Wildcard-containing-Wildcard patterns
# that can be merged
if node.type == self.syms.Matcher:
node = node.children[0] # Avoid unneeded recursion
if node.type == self.syms.Alternatives:
# Skip the odd children since they are just '|' tokens
alts = [self.compile_node(ch) for ch in node.children[::2]]
if len(alts) == 1:
return alts[0]
p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
return p.optimize()
if node.type == self.syms.Alternative:
units = [self.compile_node(ch) for ch in node.children]
if len(units) == 1:
return units[0]
p = pytree.WildcardPattern([units], min=1, max=1)
return p.optimize()
if node.type == self.syms.NegatedUnit:
pattern = self.compile_basic(node.children[1:])
p = pytree.NegatedPattern(pattern)
return p.optimize()
assert node.type == self.syms.Unit
name = None
nodes = node.children
if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
name = nodes[0].value
nodes = nodes[2:]
repeat = None
if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
repeat = nodes[-1]
nodes = nodes[:-1]
# Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
pattern = self.compile_basic(nodes, repeat)
if repeat is not None:
assert repeat.type == self.syms.Repeater
children = repeat.children
child = children[0]
if child.type == token.STAR:
min = 0
max = pytree.HUGE
elif child.type == token.PLUS:
min = 1
max = pytree.HUGE
elif child.type == token.LBRACE:
assert children[-1].type == token.RBRACE
assert len(children) in (3, 5)
min = max = self.get_int(children[1])
if len(children) == 5:
max = self.get_int(children[3])
else:
assert False
if min != 1 or max != 1:
pattern = pattern.optimize()
pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
if name is not None:
pattern.name = name
return pattern.optimize()
def compile_basic(self, nodes, repeat=None):
# Compile STRING | NAME [Details] | (...) | [...]
assert len(nodes) >= 1
node = nodes[0]
if node.type == token.STRING:
value = unicode(literals.evalString(node.value))
return pytree.LeafPattern(_type_of_literal(value), value)
elif node.type == token.NAME:
value = node.value
if value.isupper():
if value not in TOKEN_MAP:
raise PatternSyntaxError("Invalid token: %r" % value)
if nodes[1:]:
raise PatternSyntaxError("Can't have details for token")
return pytree.LeafPattern(TOKEN_MAP[value])
else:
if value == "any":
type = None
elif not value.startswith("_"):
type = getattr(self.pysyms, value, None)
if type is None:
raise PatternSyntaxError("Invalid symbol: %r" % value)
if nodes[1:]: # Details present
content = [self.compile_node(nodes[1].children[1])]
else:
content = None
return pytree.NodePattern(type, content)
elif node.value == "(":
return self.compile_node(nodes[1])
elif node.value == "[":
assert repeat is None
subpattern = self.compile_node(nodes[1])
return pytree.WildcardPattern([[subpattern]], min=0, max=1)
assert False, node
def get_int(self, node):
assert node.type == token.NUMBER
return int(node.value)
# Map named tokens to the type value for a LeafPattern
TOKEN_MAP = {"NAME": token.NAME,
"STRING": token.STRING,
"NUMBER": token.NUMBER,
"TOKEN": None}
def _type_of_literal(value):
if value[0].isalpha():
return token.NAME
elif value in grammar.opmap:
return grammar.opmap[value]
else:
return None
def pattern_convert(grammar, raw_node_info):
"""Converts raw node information to a Node or Leaf instance."""
type, value, context, children = raw_node_info
if children or type in grammar.number2symbol:
return pytree.Node(type, children, context=context)
else:
return pytree.Leaf(type, value, context=context)
def compile_pattern(pattern):
return PatternCompiler().compile_pattern(pattern)