SF patch [ 597919 ] compiler package and SET_LINENO

A variety of changes from Michael Hudson to get the compiler working
with 2.3.  The primary change is the handling of SET_LINENO:

# The set_lineno() function and the explicit emit() calls for
# SET_LINENO below are only used to generate the line number table.
# As of Python 2.3, the interpreter does not have a SET_LINENO
# instruction.  pyassem treats SET_LINENO opcodes as a special case.

A few other small changes:
 - Remove unused code from pycodegen and pyassem.
 - Fix error handling in parsermodule.  When PyParser_SimplerParseString()
   fails, it sets an exception with detailed info.  The parsermodule
   was clobbering that exception and replacing it was a generic
   "could not parse string" exception.  Keep the original exception.
This commit is contained in:
Jeremy Hylton 2002-12-31 18:17:44 +00:00
parent 436eadd455
commit accb62b28e
6 changed files with 79 additions and 50 deletions

View File

@ -6,15 +6,8 @@ import sys
import types import types
from compiler import misc from compiler import misc
from compiler.consts import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, \ from compiler.consts \
CO_VARKEYWORDS import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS
def xxx_sort(l):
l = l[:]
def sorter(a, b):
return cmp(a.bid, b.bid)
l.sort(sorter)
return l
class FlowGraph: class FlowGraph:
def __init__(self): def __init__(self):
@ -77,7 +70,7 @@ class FlowGraph:
def emit(self, *inst): def emit(self, *inst):
if self._debug: if self._debug:
print "\t", inst print "\t", inst
if inst[0] == 'RETURN_VALUE': if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']:
self.current.addOutEdge(self.exit) self.current.addOutEdge(self.exit)
if len(inst) == 2 and isinstance(inst[1], Block): if len(inst) == 2 and isinstance(inst[1], Block):
self.current.addOutEdge(inst[1]) self.current.addOutEdge(inst[1])
@ -266,7 +259,7 @@ class Block:
self.next.append(block) self.next.append(block)
assert len(self.next) == 1, map(str, self.next) assert len(self.next) == 1, map(str, self.next)
_uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE',
'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP') 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
def pruneNext(self): def pruneNext(self):
@ -443,7 +436,7 @@ class PyFlowGraph(FlowGraph):
insts.append(inst) insts.append(inst)
if len(inst) == 1: if len(inst) == 1:
pc = pc + 1 pc = pc + 1
else: elif inst[0] != "SET_LINENO":
# arg takes 2 bytes # arg takes 2 bytes
pc = pc + 3 pc = pc + 3
end[b] = pc end[b] = pc
@ -452,7 +445,7 @@ class PyFlowGraph(FlowGraph):
inst = insts[i] inst = insts[i]
if len(inst) == 1: if len(inst) == 1:
pc = pc + 1 pc = pc + 1
else: elif inst[0] != "SET_LINENO":
pc = pc + 3 pc = pc + 3
opname = inst[0] opname = inst[0]
if self.hasjrel.has_elt(opname): if self.hasjrel.has_elt(opname):
@ -580,6 +573,7 @@ class PyFlowGraph(FlowGraph):
oparg = t[1] oparg = t[1]
if opname == "SET_LINENO": if opname == "SET_LINENO":
lnotab.nextLine(oparg) lnotab.nextLine(oparg)
continue
hi, lo = twobyte(oparg) hi, lo = twobyte(oparg)
try: try:
lnotab.addCode(self.opnum[opname], lo, hi) lnotab.addCode(self.opnum[opname], lo, hi)
@ -697,7 +691,7 @@ class LineAddrTable:
# after the loading of "b". This works with the C Python # after the loading of "b". This works with the C Python
# compiler because it only generates a SET_LINENO instruction # compiler because it only generates a SET_LINENO instruction
# for the assignment. # for the assignment.
if line > 0: if line >= 0:
push = self.lnotab.append push = self.lnotab.append
while addr > 255: while addr > 255:
push(255); push(0) push(255); push(0)
@ -768,6 +762,7 @@ class StackDepthTracker:
# PRINT_EXPR? # PRINT_EXPR?
'PRINT_ITEM': -1, 'PRINT_ITEM': -1,
'RETURN_VALUE': -1, 'RETURN_VALUE': -1,
'YIELD_VALUE': -1,
'EXEC_STMT': -3, 'EXEC_STMT': -3,
'BUILD_CLASS': -2, 'BUILD_CLASS': -2,
'STORE_NAME': -1, 'STORE_NAME': -1,

View File

@ -13,6 +13,7 @@ from compiler.consts import CO_VARARGS, CO_VARKEYWORDS, CO_NEWLOCALS,\
CO_NESTED, CO_GENERATOR, CO_GENERATOR_ALLOWED, CO_FUTURE_DIVISION CO_NESTED, CO_GENERATOR, CO_GENERATOR_ALLOWED, CO_FUTURE_DIVISION
from compiler.pyassem import TupleArg from compiler.pyassem import TupleArg
# XXX The version-specific code can go, since this code only works with 2.x.
# Do we have Python 1.x or Python 2.x? # Do we have Python 1.x or Python 2.x?
try: try:
VERSION = sys.version_info[0] VERSION = sys.version_info[0]
@ -32,22 +33,14 @@ EXCEPT = 2
TRY_FINALLY = 3 TRY_FINALLY = 3
END_FINALLY = 4 END_FINALLY = 4
# XXX this doesn't seem to be used
class BlockStack(misc.Stack):
__super_init = misc.Stack.__init__
def __init__(self):
self.__super_init(self)
self.loop = None
def compileFile(filename, display=0): def compileFile(filename, display=0):
f = open(filename) f = open(filename, 'U')
buf = f.read() buf = f.read()
f.close() f.close()
mod = Module(buf, filename) mod = Module(buf, filename)
try: try:
mod.compile(display) mod.compile(display)
except SyntaxError, err: except SyntaxError:
raise raise
else: else:
f = open(filename + "c", "wb") f = open(filename + "c", "wb")
@ -134,7 +127,7 @@ class Module(AbstractCompileMode):
# to indicate the type of the value. simplest way to get the # to indicate the type of the value. simplest way to get the
# same effect is to call marshal and then skip the code. # same effect is to call marshal and then skip the code.
mtime = os.path.getmtime(self.filename) mtime = os.path.getmtime(self.filename)
mtime = struct.pack('i', mtime) mtime = struct.pack('<i', mtime)
return self.MAGIC + mtime return self.MAGIC + mtime
class LocalNameFinder: class LocalNameFinder:
@ -310,9 +303,17 @@ class CodeGenerator:
else: else:
self.emit(prefix + '_NAME', name) self.emit(prefix + '_NAME', name)
def set_lineno(self, node, force=0): # The set_lineno() function and the explicit emit() calls for
"""Emit SET_LINENO if node has lineno attribute and it is # SET_LINENO below are only used to generate the line number table.
different than the last lineno emitted. # As of Python 2.3, the interpreter does not have a SET_LINENO
# instruction. pyassem treats SET_LINENO opcodes as a special case.
def set_lineno(self, node, force=False):
"""Emit SET_LINENO if necessary.
The instruction is considered necessary if the node has a
lineno attribute and it is different than the last lineno
emitted.
Returns true if SET_LINENO was emitted. Returns true if SET_LINENO was emitted.
@ -326,8 +327,8 @@ class CodeGenerator:
or force): or force):
self.emit('SET_LINENO', lineno) self.emit('SET_LINENO', lineno)
self.last_lineno = lineno self.last_lineno = lineno
return 1 return True
return 0 return False
# The first few visitor methods handle nodes that generator new # The first few visitor methods handle nodes that generator new
# code objects. They use class attributes to determine what # code objects. They use class attributes to determine what
@ -387,9 +388,6 @@ class CodeGenerator:
def visitClass(self, node): def visitClass(self, node):
gen = self.ClassGen(node, self.scopes, gen = self.ClassGen(node, self.scopes,
self.get_module()) self.get_module())
if node.doc:
self.emit('LOAD_CONST', node.doc)
self.storeName('__doc__')
walk(node.code, gen) walk(node.code, gen)
gen.finish() gen.finish()
self.set_lineno(node) self.set_lineno(node)
@ -447,7 +445,7 @@ class CodeGenerator:
self.nextBlock(loop) self.nextBlock(loop)
self.setups.push((LOOP, loop)) self.setups.push((LOOP, loop))
self.set_lineno(node, force=1) self.set_lineno(node, force=True)
self.visit(node.test) self.visit(node.test)
self.emit('JUMP_IF_FALSE', else_ or after) self.emit('JUMP_IF_FALSE', else_ or after)
@ -617,7 +615,7 @@ class CodeGenerator:
return start, anchor return start, anchor
def visitListCompIf(self, node, branch): def visitListCompIf(self, node, branch):
self.set_lineno(node, force=1) self.set_lineno(node, force=True)
self.visit(node.test) self.visit(node.test)
self.emit('JUMP_IF_FALSE', branch) self.emit('JUMP_IF_FALSE', branch)
self.newBlock() self.newBlock()
@ -975,7 +973,7 @@ class CodeGenerator:
def visitYield(self, node): def visitYield(self, node):
self.set_lineno(node) self.set_lineno(node)
self.visit(node.value) self.visit(node.value)
self.emit('YIELD_STMT') self.emit('YIELD_VALUE')
# slice and subscript stuff # slice and subscript stuff
@ -1266,7 +1264,6 @@ class FunctionCodeGenerator(NestedScopeMixin, AbstractFunctionCode,
self.__super_init(func, scopes, isLambda, class_name, mod) self.__super_init(func, scopes, isLambda, class_name, mod)
self.graph.setFreeVars(self.scope.get_free_vars()) self.graph.setFreeVars(self.scope.get_free_vars())
self.graph.setCellVars(self.scope.get_cell_vars()) self.graph.setCellVars(self.scope.get_cell_vars())
if self.graph.checkFlag(CO_GENERATOR_ALLOWED):
if self.scope.generator is not None: if self.scope.generator is not None:
self.graph.setFlag(CO_GENERATOR) self.graph.setFlag(CO_GENERATOR)
@ -1304,6 +1301,12 @@ class ClassCodeGenerator(NestedScopeMixin, AbstractClassCode, CodeGenerator):
self.__super_init(klass, scopes, module) self.__super_init(klass, scopes, module)
self.graph.setFreeVars(self.scope.get_free_vars()) self.graph.setFreeVars(self.scope.get_free_vars())
self.graph.setCellVars(self.scope.get_cell_vars()) self.graph.setCellVars(self.scope.get_cell_vars())
self.set_lineno(klass)
self.emit("LOAD_GLOBAL", "__name__")
self.storeName("__module__")
if klass.doc:
self.emit("LOAD_CONST", klass.doc)
self.storeName('__doc__')
def generateArgList(arglist): def generateArgList(arglist):
"""Generate an arg list marking TupleArgs""" """Generate an arg list marking TupleArgs"""
@ -1379,7 +1382,5 @@ def wrap_aug(node):
return wrapper[node.__class__](node) return wrapper[node.__class__](node)
if __name__ == "__main__": if __name__ == "__main__":
import sys
for file in sys.argv[1:]: for file in sys.argv[1:]:
compileFile(file) compileFile(file)

View File

@ -249,6 +249,9 @@ class SymbolVisitor:
scope = ClassScope(node.name, self.module) scope = ClassScope(node.name, self.module)
if parent.nested or isinstance(parent, FunctionScope): if parent.nested or isinstance(parent, FunctionScope):
scope.nested = 1 scope.nested = 1
if node.doc is not None:
scope.add_def('__doc__')
scope.add_def('__module__')
self.scopes[node] = scope self.scopes[node] = scope
prev = self.klass prev = self.klass
self.klass = node.name self.klass = node.name

View File

@ -37,7 +37,11 @@ from consts import OP_ASSIGN, OP_DELETE, OP_APPLY
def parseFile(path): def parseFile(path):
f = open(path) f = open(path)
src = f.read() # XXX The parser API tolerates files without a trailing newline,
# but not strings without a trailing newline. Always add an extra
# newline to the file contents, since we're going through the string
# version of the API.
src = f.read() + "\n"
f.close() f.close()
return parse(src) return parse(src)
@ -100,6 +104,7 @@ class Transformer:
token.STRING: self.atom_string, token.STRING: self.atom_string,
token.NAME: self.atom_name, token.NAME: self.atom_name,
} }
self.encoding = None
def transform(self, tree): def transform(self, tree):
"""Transform an AST into a modified parse tree.""" """Transform an AST into a modified parse tree."""
@ -110,6 +115,7 @@ class Transformer:
def parsesuite(self, text): def parsesuite(self, text):
"""Return a modified parse tree for the given suite text.""" """Return a modified parse tree for the given suite text."""
# Hack for handling non-native line endings on non-DOS like OSs. # Hack for handling non-native line endings on non-DOS like OSs.
# this can go now we have universal newlines?
text = text.replace('\x0d', '') text = text.replace('\x0d', '')
return self.transform(parser.suite(text)) return self.transform(parser.suite(text))
@ -131,6 +137,12 @@ class Transformer:
def compile_node(self, node): def compile_node(self, node):
### emit a line-number node? ### emit a line-number node?
n = node[0] n = node[0]
if n == symbol.encoding_decl:
self.encoding = node[2]
node = node[1]
n = node[0]
if n == symbol.single_input: if n == symbol.single_input:
return self.single_input(node[1:]) return self.single_input(node[1:])
if n == symbol.file_input: if n == symbol.file_input:
@ -519,6 +531,7 @@ class Transformer:
return self.com_binary(Tuple, nodelist) return self.com_binary(Tuple, nodelist)
testlist_safe = testlist # XXX testlist_safe = testlist # XXX
testlist1 = testlist
exprlist = testlist exprlist = testlist
def test(self, nodelist): def test(self, nodelist):
@ -637,11 +650,14 @@ class Transformer:
def factor(self, nodelist): def factor(self, nodelist):
elt = nodelist[0] elt = nodelist[0]
t = elt[0] t = elt[0]
print "source", nodelist[-1]
node = self.com_node(nodelist[-1]) node = self.com_node(nodelist[-1])
# need to handle (unary op)constant here...
if t == token.PLUS: if t == token.PLUS:
node = UnaryAdd(node) node = UnaryAdd(node)
node.lineno = elt[2] node.lineno = elt[2]
elif t == token.MINUS: elif t == token.MINUS:
print node
node = UnarySub(node) node = UnarySub(node)
node.lineno = elt[2] node.lineno = elt[2]
elif t == token.TILDE: elif t == token.TILDE:
@ -699,11 +715,21 @@ class Transformer:
n.lineno = nodelist[0][2] n.lineno = nodelist[0][2]
return n return n
def decode_literal(self, lit):
if self.encoding:
# this is particularly fragile & a bit of a
# hack... changes in compile.c:parsestr and
# tokenizer.c must be reflected here.
if self.encoding not in ['utf-8', 'iso-8859-1']:
lit = unicode(lit, 'utf-8').encode(self.encoding)
return eval("# coding: %s\n%s" % (self.encoding, lit))
else:
return eval(lit)
def atom_string(self, nodelist): def atom_string(self, nodelist):
### need to verify this matches compile.c
k = '' k = ''
for node in nodelist: for node in nodelist:
k = k + eval(node[1]) k += self.decode_literal(node[1])
n = Const(k) n = Const(k)
n.lineno = nodelist[0][2] n.lineno = nodelist[0][2]
return n return n

View File

@ -89,7 +89,7 @@ node2tuple(node *n, /* node to convert */
PyObject *v; PyObject *v;
PyObject *w; PyObject *w;
v = mkseq(1 + NCH(n)); v = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl));
if (v == NULL) if (v == NULL)
return (v); return (v);
w = PyInt_FromLong(TYPE(n)); w = PyInt_FromLong(TYPE(n));
@ -106,6 +106,9 @@ node2tuple(node *n, /* node to convert */
} }
(void) addelem(v, i+1, w); (void) addelem(v, i+1, w);
} }
if (TYPE(n) == encoding_decl)
(void) addelem(v, i+1, PyString_FromString(STR(n)));
return (v); return (v);
} }
else if (ISTERMINAL(TYPE(n))) { else if (ISTERMINAL(TYPE(n))) {
@ -478,7 +481,7 @@ err_string(char *message)
/* PyObject* parser_do_parse(PyObject* args, int type) /* PyObject* parser_do_parse(PyObject* args, int type)
* *
* Internal function to actually execute the parse and return the result if * Internal function to actually execute the parse and return the result if
* successful, or set an exception if not. * successful or set an exception if not.
* *
*/ */
static PyObject* static PyObject*
@ -494,10 +497,8 @@ parser_do_parse(PyObject *args, PyObject *kw, char *argspec, int type)
(type == PyST_EXPR) (type == PyST_EXPR)
? eval_input : file_input); ? eval_input : file_input);
if (n != 0) if (n)
res = parser_newstobject(n, type); res = parser_newstobject(n, type);
else
err_string("could not parse string");
} }
return (res); return (res);
} }

View File

@ -1241,6 +1241,9 @@ decode_utf8(char **sPtr, char *end, char* encoding)
#endif #endif
} }
/* compiler.transformer.Transformer.decode_literal depends on what
might seem like minor details of this function -- changes here
must be reflected there. */
static PyObject * static PyObject *
parsestr(struct compiling *c, char *s) parsestr(struct compiling *c, char *s)
{ {