cpython/Lib/lib2to3/pgen2/driver.py

179 lines
5.8 KiB
Python
Raw Normal View History

# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Parser driver.
This provides a high-level interface to parse a file into a syntax tree.
"""
__author__ = "Guido van Rossum <guido@python.org>"
__all__ = ["Driver", "load_grammar"]
# Python imports
import codecs
import io
import os
import logging
import pkgutil
import sys
# Pgen imports
Merged revisions 61724-61725 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ................ r61724 | martin.v.loewis | 2008-03-22 01:01:12 +0100 (Sa, 22 Mär 2008) | 49 lines Merged revisions 61602-61723 via svnmerge from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r61626 | david.wolever | 2008-03-19 17:19:16 +0100 (Mi, 19 Mär 2008) | 1 line Added fixer for implicit local imports. See #2414. ........ r61628 | david.wolever | 2008-03-19 17:57:43 +0100 (Mi, 19 Mär 2008) | 1 line Added a class for tests which should not run if a particular import is found. ........ r61629 | collin.winter | 2008-03-19 17:58:19 +0100 (Mi, 19 Mär 2008) | 1 line Two more relative import fixes in pgen2. ........ r61635 | david.wolever | 2008-03-19 20:16:03 +0100 (Mi, 19 Mär 2008) | 1 line Fixed print fixer so it will do the Right Thing when it encounters __future__.print_function. 2to3 gets upset, though, so the tests have been commented out. ........ r61637 | david.wolever | 2008-03-19 21:37:17 +0100 (Mi, 19 Mär 2008) | 3 lines Added a fixer for itertools imports (from itertools import imap, ifilterfalse --> from itertools import filterfalse) ........ r61645 | david.wolever | 2008-03-19 23:22:35 +0100 (Mi, 19 Mär 2008) | 1 line SVN is happier when you add the files you create... -_-' ........ r61654 | david.wolever | 2008-03-20 01:09:56 +0100 (Do, 20 Mär 2008) | 1 line Added an explicit sort order to fixers -- fixes problems like #2427 ........ r61664 | david.wolever | 2008-03-20 04:32:40 +0100 (Do, 20 Mär 2008) | 3 lines Fixes #2428 -- comments are no longer eatten by __future__ fixer. ........ r61673 | david.wolever | 2008-03-20 17:22:40 +0100 (Do, 20 Mär 2008) | 1 line Added 2to3 node pretty-printer ........ r61679 | david.wolever | 2008-03-20 20:50:42 +0100 (Do, 20 Mär 2008) | 1 line Made node printing a little bit prettier ........ r61723 | martin.v.loewis | 2008-03-22 00:59:27 +0100 (Sa, 22 Mär 2008) | 2 lines Fix whitespace. ........ ................ r61725 | martin.v.loewis | 2008-03-22 01:02:41 +0100 (Sa, 22 Mär 2008) | 2 lines Install lib2to3. ................
2008-03-21 21:07:09 -03:00
from . import grammar, parse, token, tokenize, pgen
class Driver(object):
def __init__(self, grammar, convert=None, logger=None):
self.grammar = grammar
if logger is None:
logger = logging.getLogger()
self.logger = logger
self.convert = convert
def parse_tokens(self, tokens, debug=False):
"""Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize.
p = parse.Parser(self.grammar, self.convert)
p.setup()
lineno = 1
column = 0
type = value = start = end = line_text = None
prefix = ""
for quintuple in tokens:
type, value, start, end, line_text = quintuple
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
s_lineno, s_column = start
if lineno < s_lineno:
prefix += "\n" * (s_lineno - lineno)
lineno = s_lineno
column = 0
if column < s_column:
prefix += line_text[column:s_column]
column = s_column
if type in (tokenize.COMMENT, tokenize.NL):
prefix += value
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
continue
if type == token.OP:
type = grammar.opmap[value]
if debug:
self.logger.debug("%s %r (prefix=%r)",
token.tok_name[type], value, prefix)
if p.addtoken(type, value, (prefix, start)):
if debug:
self.logger.debug("Stop.")
break
prefix = ""
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
Merged revisions 68288-68291,68325-68326,68338,68388,68393,68423 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ................ r68288 | benjamin.peterson | 2009-01-03 18:39:07 -0600 (Sat, 03 Jan 2009) | 1 line only check the actual compile() call for a SyntaxError ................ r68289 | georg.brandl | 2009-01-04 02:26:10 -0600 (Sun, 04 Jan 2009) | 2 lines Test commit. ................ r68290 | georg.brandl | 2009-01-04 04:23:49 -0600 (Sun, 04 Jan 2009) | 4 lines Add "suspicious" builder which finds leftover markup in the HTML files. Patch by Gabriel Genellina. ................ r68291 | georg.brandl | 2009-01-04 04:24:09 -0600 (Sun, 04 Jan 2009) | 2 lines Fix two issues found by the suspicious builder. ................ r68325 | benjamin.peterson | 2009-01-04 16:00:18 -0600 (Sun, 04 Jan 2009) | 1 line use Jinja 2.1.1 ................ r68326 | georg.brandl | 2009-01-04 16:03:10 -0600 (Sun, 04 Jan 2009) | 2 lines Update make.bat. ................ r68338 | neal.norwitz | 2009-01-04 21:57:25 -0600 (Sun, 04 Jan 2009) | 1 line Make sure to checkout any new packages ................ r68388 | benjamin.peterson | 2009-01-07 21:39:46 -0600 (Wed, 07 Jan 2009) | 1 line string exceptions are gone ................ r68393 | benjamin.peterson | 2009-01-07 22:01:00 -0600 (Wed, 07 Jan 2009) | 1 line use new sphinx modules ................ r68423 | benjamin.peterson | 2009-01-08 20:13:34 -0600 (Thu, 08 Jan 2009) | 29 lines Merged revisions 68306-68308,68340,68368,68422 via svnmerge from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r68306 | benjamin.peterson | 2009-01-04 12:27:19 -0600 (Sun, 04 Jan 2009) | 1 line fix_urllib: add mappings for the url parsing functions ........ r68307 | benjamin.peterson | 2009-01-04 12:30:01 -0600 (Sun, 04 Jan 2009) | 1 line remove duplicated function ........ r68308 | benjamin.peterson | 2009-01-04 12:50:34 -0600 (Sun, 04 Jan 2009) | 1 line turtle is no longer renamed ........ r68340 | georg.brandl | 2009-01-05 02:11:39 -0600 (Mon, 05 Jan 2009) | 2 lines Fix undefined locals in parse_tokens(). ........ r68368 | benjamin.peterson | 2009-01-06 17:56:10 -0600 (Tue, 06 Jan 2009) | 1 line fix typo (thanks to Robert Lehmann) ........ r68422 | benjamin.peterson | 2009-01-08 20:01:03 -0600 (Thu, 08 Jan 2009) | 1 line run the imports fixers after fix_import, so fix_import doesn't try to make stdlib renames into relative imports #4876 ........ ................
2009-01-08 23:03:23 -04:00
raise parse.ParseError("incomplete input",
type, value, (prefix, start))
return p.rootnode
def parse_stream_raw(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
tokens = tokenize.generate_tokens(stream.readline)
return self.parse_tokens(tokens, debug)
def parse_stream(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug)
def parse_file(self, filename, encoding=None, debug=False):
"""Parse a file and return the syntax tree."""
with io.open(filename, "r", encoding=encoding) as stream:
return self.parse_stream(stream, debug)
def parse_string(self, text, debug=False):
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(io.StringIO(text).readline)
return self.parse_tokens(tokens, debug)
def _generate_pickle_name(gt):
head, tail = os.path.splitext(gt)
if tail == ".txt":
tail = ""
return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger()
gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
if save:
logger.info("Writing grammar tables to %s", gp)
try:
g.dump(gp)
2012-12-25 10:47:37 -04:00
except OSError as e:
logger.info("Writing failed: %s", e)
else:
g = grammar.Grammar()
g.load(gp)
return g
def _newer(a, b):
"""Inquire whether file a was written since file b."""
if not os.path.exists(a):
return False
if not os.path.exists(b):
return True
return os.path.getmtime(a) >= os.path.getmtime(b)
def load_packaged_grammar(package, grammar_source):
"""Normally, loads a pickled grammar by doing
pkgutil.get_data(package, pickled_grammar)
where *pickled_grammar* is computed from *grammar_source* by adding the
Python version and using a ``.pickle`` extension.
However, if *grammar_source* is an extant file, load_grammar(grammar_source)
2017-12-23 00:48:13 -04:00
is called instead. This facilitates using a packaged grammar file when needed
but preserves load_grammar's automatic regeneration behavior when possible.
"""
if os.path.isfile(grammar_source):
return load_grammar(grammar_source)
pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
data = pkgutil.get_data(package, pickled_name)
g = grammar.Grammar()
g.loads(data)
return g
def main(*args):
"""Main program, when run as a script: produce grammar pickle files.
Calls load_grammar for each argument, a path to a grammar text file.
"""
if not args:
args = sys.argv[1:]
logging.basicConfig(level=logging.INFO, stream=sys.stdout,
format='%(message)s')
for gt in args:
load_grammar(gt, save=True, force=True)
return True
if __name__ == "__main__":
sys.exit(int(not main()))