cpython/Parser/asdl.py

416 lines
11 KiB
Python
Raw Normal View History

"""An implementation of the Zephyr Abstract Syntax Definition Language.
See http://asdl.sourceforge.net/ and
http://www.cs.princeton.edu/~danwang/Papers/dsl97/dsl97-abstract.html.
Only supports top level module decl, not view. I'm guessing that view
is intended to support the browser and I'm not interested in the
browser.
Changes for Python: Add support for module versions
"""
#__metaclass__ = type
import os
import traceback
import spark
class Token:
# spark seems to dispatch in the parser based on a token's
# type attribute
def __init__(self, type, lineno):
self.type = type
self.lineno = lineno
def __str__(self):
return self.type
def __repr__(self):
return str(self)
class Id(Token):
def __init__(self, value, lineno):
self.type = 'Id'
self.value = value
self.lineno = lineno
def __str__(self):
return self.value
2006-02-28 14:30:36 -04:00
class String(Token):
def __init__(self, value, lineno):
self.type = 'String'
self.value = value
self.lineno = lineno
class ASDLSyntaxError:
def __init__(self, lineno, token=None, msg=None):
self.lineno = lineno
self.token = token
self.msg = msg
def __str__(self):
if self.msg is None:
return "Error at '%s', line %d" % (self.token, self.lineno)
else:
return "%s, line %d" % (self.msg, self.lineno)
class ASDLScanner(spark.GenericScanner, object):
def tokenize(self, input):
self.rv = []
self.lineno = 1
super(ASDLScanner, self).tokenize(input)
return self.rv
def t_id(self, s):
r"[\w\.]+"
# XXX doesn't distinguish upper vs. lower, which is
# significant for ASDL.
self.rv.append(Id(s, self.lineno))
2006-02-28 14:30:36 -04:00
def t_string(self, s):
r'"[^"]*"'
self.rv.append(String(s, self.lineno))
def t_xxx(self, s): # not sure what this production means
r"<="
self.rv.append(Token(s, self.lineno))
def t_punctuation(self, s):
r"[\{\}\*\=\|\(\)\,\?\:]"
self.rv.append(Token(s, self.lineno))
def t_comment(self, s):
r"\-\-[^\n]*"
pass
def t_newline(self, s):
r"\n"
self.lineno += 1
def t_whitespace(self, s):
r"[ \t]+"
pass
def t_default(self, s):
r" . +"
raise ValueError, "unmatched input: %s" % `s`
class ASDLParser(spark.GenericParser, object):
def __init__(self):
super(ASDLParser, self).__init__("module")
def typestring(self, tok):
return tok.type
def error(self, tok):
raise ASDLSyntaxError(tok.lineno, tok)
def p_module_0(self, (module, name, version, _0, _1)):
" module ::= Id Id version { } "
if module.value != "module":
raise ASDLSyntaxError(module.lineno,
msg="expected 'module', found %s" % module)
return Module(name, None, version)
def p_module(self, (module, name, version, _0, definitions, _1)):
" module ::= Id Id version { definitions } "
if module.value != "module":
raise ASDLSyntaxError(module.lineno,
msg="expected 'module', found %s" % module)
return Module(name, definitions, version)
2006-02-28 14:30:36 -04:00
def p_version(self, (version, V)):
"version ::= Id String"
if version.value != "version":
raise ASDLSyntaxError(version.lineno,
msg="expected 'version', found %" % version)
return V
def p_definition_0(self, (definition,)):
" definitions ::= definition "
return definition
def p_definition_1(self, (definitions, definition)):
" definitions ::= definition definitions "
return definitions + definition
def p_definition(self, (id, _, type)):
" definition ::= Id = type "
return [Type(id, type)]
def p_type_0(self, (product,)):
" type ::= product "
return product
def p_type_1(self, (sum,)):
" type ::= sum "
return Sum(sum)
def p_type_2(self, (sum, id, _0, attributes, _1)):
" type ::= sum Id ( fields ) "
if id.value != "attributes":
raise ASDLSyntaxError(id.lineno,
msg="expected attributes, found %s" % id)
if attributes:
attributes.reverse()
return Sum(sum, attributes)
def p_product(self, (_0, fields, _1)):
" product ::= ( fields ) "
# XXX can't I just construct things in the right order?
2005-12-25 19:18:31 -04:00
fields.reverse()
return Product(fields)
def p_sum_0(self, (constructor,)):
Merged revisions 68133-68134,68141-68142,68145-68146,68148-68149,68159-68162,68166,68171-68174,68179,68195-68196,68210,68214-68215,68217-68222 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r68133 | antoine.pitrou | 2009-01-01 16:38:03 +0100 (Thu, 01 Jan 2009) | 1 line fill in actual issue number in tests ........ r68134 | hirokazu.yamamoto | 2009-01-01 16:45:39 +0100 (Thu, 01 Jan 2009) | 2 lines Issue #4797: IOError.filename was not set when _fileio.FileIO failed to open file with `str' filename on Windows. ........ r68141 | benjamin.peterson | 2009-01-01 17:43:12 +0100 (Thu, 01 Jan 2009) | 1 line fix highlighting ........ r68142 | benjamin.peterson | 2009-01-01 18:29:49 +0100 (Thu, 01 Jan 2009) | 2 lines welcome to 2009, Python! ........ r68145 | amaury.forgeotdarc | 2009-01-02 01:03:54 +0100 (Fri, 02 Jan 2009) | 5 lines #4801 _collections module fails to build on cygwin. _PyObject_GC_TRACK is the macro version of PyObject_GC_Track, and according to documentation it should not be used for extension modules. ........ r68146 | ronald.oussoren | 2009-01-02 11:44:46 +0100 (Fri, 02 Jan 2009) | 2 lines Fix for issue4472: "configure --enable-shared doesn't work on OSX" ........ r68148 | ronald.oussoren | 2009-01-02 11:48:31 +0100 (Fri, 02 Jan 2009) | 2 lines Forgot to add a NEWS item in my previous checkin ........ r68149 | ronald.oussoren | 2009-01-02 11:50:48 +0100 (Fri, 02 Jan 2009) | 2 lines Fix for issue4780 ........ r68159 | ronald.oussoren | 2009-01-02 15:48:17 +0100 (Fri, 02 Jan 2009) | 2 lines Fix for issue 1627952 ........ r68160 | ronald.oussoren | 2009-01-02 15:52:09 +0100 (Fri, 02 Jan 2009) | 2 lines Fix for issue r1737832 ........ r68161 | ronald.oussoren | 2009-01-02 16:00:05 +0100 (Fri, 02 Jan 2009) | 3 lines Fix for issue 1149804 ........ r68162 | ronald.oussoren | 2009-01-02 16:06:00 +0100 (Fri, 02 Jan 2009) | 3 lines Fix for issue 4472 is incompatible with Cygwin, this patch should fix that. ........ r68166 | benjamin.peterson | 2009-01-02 19:26:23 +0100 (Fri, 02 Jan 2009) | 1 line document PyMemberDef ........ r68171 | georg.brandl | 2009-01-02 21:25:14 +0100 (Fri, 02 Jan 2009) | 3 lines #4811: fix markup glitches (mostly remains of the conversion), found by Gabriel Genellina. ........ r68172 | martin.v.loewis | 2009-01-02 21:32:55 +0100 (Fri, 02 Jan 2009) | 2 lines Issue #4075: Use OutputDebugStringW in Py_FatalError. ........ r68173 | martin.v.loewis | 2009-01-02 21:40:14 +0100 (Fri, 02 Jan 2009) | 2 lines Issue #4051: Prevent conflict of UNICODE macros in cPickle. ........ r68174 | benjamin.peterson | 2009-01-02 21:47:27 +0100 (Fri, 02 Jan 2009) | 1 line fix compilation on non-Windows platforms ........ r68179 | raymond.hettinger | 2009-01-02 22:26:45 +0100 (Fri, 02 Jan 2009) | 1 line Issue #4615. Document how to use itertools for de-duping. ........ r68195 | georg.brandl | 2009-01-03 14:45:15 +0100 (Sat, 03 Jan 2009) | 2 lines Remove useless string literal. ........ r68196 | georg.brandl | 2009-01-03 15:29:53 +0100 (Sat, 03 Jan 2009) | 2 lines Fix indentation. ........ r68210 | georg.brandl | 2009-01-03 20:10:12 +0100 (Sat, 03 Jan 2009) | 2 lines Set eol-style correctly for mp_distributing.py. ........ r68214 | georg.brandl | 2009-01-03 20:44:48 +0100 (Sat, 03 Jan 2009) | 2 lines Make indentation consistent. ........ r68215 | georg.brandl | 2009-01-03 21:15:14 +0100 (Sat, 03 Jan 2009) | 2 lines Fix role name. ........ r68217 | georg.brandl | 2009-01-03 21:30:15 +0100 (Sat, 03 Jan 2009) | 2 lines Add rstlint, a little tool to find subtle markup problems and inconsistencies in the Doc sources. ........ r68218 | georg.brandl | 2009-01-03 21:38:59 +0100 (Sat, 03 Jan 2009) | 2 lines Recognize usage of the default role. ........ r68219 | georg.brandl | 2009-01-03 21:47:01 +0100 (Sat, 03 Jan 2009) | 2 lines Fix uses of the default role. ........ r68220 | georg.brandl | 2009-01-03 21:55:06 +0100 (Sat, 03 Jan 2009) | 2 lines Remove trailing whitespace. ........ r68221 | georg.brandl | 2009-01-03 22:04:55 +0100 (Sat, 03 Jan 2009) | 2 lines Remove tabs from the documentation. ........ r68222 | georg.brandl | 2009-01-03 22:11:58 +0100 (Sat, 03 Jan 2009) | 2 lines Disable the line length checker by default. ........
2009-01-03 17:55:17 -04:00
" sum ::= constructor "
return [constructor]
def p_sum_1(self, (constructor, _, sum)):
" sum ::= constructor | sum "
return [constructor] + sum
def p_sum_2(self, (constructor, _, sum)):
" sum ::= constructor | sum "
return [constructor] + sum
def p_constructor_0(self, (id,)):
" constructor ::= Id "
return Constructor(id)
def p_constructor_1(self, (id, _0, fields, _1)):
" constructor ::= Id ( fields ) "
# XXX can't I just construct things in the right order?
2005-12-25 19:18:31 -04:00
fields.reverse()
return Constructor(id, fields)
def p_fields_0(self, (field,)):
" fields ::= field "
return [field]
def p_fields_1(self, (field, _, fields)):
" fields ::= field , fields "
return fields + [field]
def p_field_0(self, (type,)):
" field ::= Id "
return Field(type)
def p_field_1(self, (type, name)):
" field ::= Id Id "
return Field(type, name)
def p_field_2(self, (type, _, name)):
" field ::= Id * Id "
return Field(type, name, seq=1)
def p_field_3(self, (type, _, name)):
" field ::= Id ? Id "
return Field(type, name, opt=1)
def p_field_4(self, (type, _)):
" field ::= Id * "
return Field(type, seq=1)
def p_field_5(self, (type, _)):
" field ::= Id ? "
return Field(type, opt=1)
builtin_types = ("identifier", "string", "int", "bool", "object")
# below is a collection of classes to capture the AST of an AST :-)
# not sure if any of the methods are useful yet, but I'm adding them
# piecemeal as they seem helpful
class AST:
pass # a marker class
class Module(AST):
def __init__(self, name, dfns, version):
self.name = name
self.dfns = dfns
self.version = version
self.types = {} # maps type name to value (from dfns)
for type in dfns:
self.types[type.name.value] = type.value
def __repr__(self):
return "Module(%s, %s)" % (self.name, self.dfns)
class Type(AST):
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return "Type(%s, %s)" % (self.name, self.value)
class Constructor(AST):
def __init__(self, name, fields=None):
self.name = name
self.fields = fields or []
def __repr__(self):
return "Constructor(%s, %s)" % (self.name, self.fields)
class Field(AST):
def __init__(self, type, name=None, seq=0, opt=0):
self.type = type
self.name = name
self.seq = seq
self.opt = opt
def __repr__(self):
if self.seq:
extra = ", seq=1"
elif self.opt:
extra = ", opt=1"
else:
extra = ""
if self.name is None:
return "Field(%s%s)" % (self.type, extra)
else:
return "Field(%s, %s%s)" % (self.type, self.name, extra)
class Sum(AST):
def __init__(self, types, attributes=None):
self.types = types
self.attributes = attributes or []
def __repr__(self):
if self.attributes is None:
return "Sum(%s)" % self.types
else:
return "Sum(%s, %s)" % (self.types, self.attributes)
class Product(AST):
def __init__(self, fields):
self.fields = fields
def __repr__(self):
return "Product(%s)" % self.fields
class VisitorBase(object):
def __init__(self, skip=0):
self.cache = {}
self.skip = skip
def visit(self, object, *args):
meth = self._dispatch(object)
if meth is None:
return
try:
meth(object, *args)
except Exception, err:
print "Error visiting", repr(object)
print err
traceback.print_exc()
# XXX hack
if hasattr(self, 'file'):
self.file.flush()
os._exit(1)
def _dispatch(self, object):
assert isinstance(object, AST), repr(object)
klass = object.__class__
meth = self.cache.get(klass)
if meth is None:
methname = "visit" + klass.__name__
if self.skip:
meth = getattr(self, methname, None)
else:
meth = getattr(self, methname)
self.cache[klass] = meth
return meth
class Check(VisitorBase):
def __init__(self):
super(Check, self).__init__(skip=1)
self.cons = {}
self.errors = 0
self.types = {}
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type):
self.visit(type.value, str(type.name))
def visitSum(self, sum, name):
for t in sum.types:
self.visit(t, name)
def visitConstructor(self, cons, name):
key = str(cons.name)
conflict = self.cons.get(key)
if conflict is None:
self.cons[key] = name
else:
print "Redefinition of constructor %s" % key
print "Defined in %s and %s" % (conflict, name)
self.errors += 1
for f in cons.fields:
self.visit(f, key)
def visitField(self, field, name):
key = str(field.type)
l = self.types.setdefault(key, [])
l.append(name)
def visitProduct(self, prod, name):
for f in prod.fields:
self.visit(f, name)
def check(mod):
v = Check()
v.visit(mod)
for t in v.types:
if not mod.types.has_key(t) and not t in builtin_types:
v.errors += 1
uses = ", ".join(v.types[t])
print "Undefined type %s, used in %s" % (t, uses)
2005-12-25 19:18:31 -04:00
return not v.errors
def parse(file):
scanner = ASDLScanner()
parser = ASDLParser()
buf = open(file).read()
tokens = scanner.tokenize(buf)
try:
return parser.parse(tokens)
except ASDLSyntaxError, err:
print err
lines = buf.split("\n")
print lines[err.lineno - 1] # lines starts at 0, files at 1
if __name__ == "__main__":
import glob
import sys
if len(sys.argv) > 1:
files = sys.argv[1:]
else:
testdir = "tests"
files = glob.glob(testdir + "/*.asdl")
2005-12-25 19:18:31 -04:00
for file in files:
print file
mod = parse(file)
print "module", mod.name
print len(mod.dfns), "definitions"
if not check(mod):
print "Check failed"
else:
for dfn in mod.dfns:
print dfn.type