SF patch #872326: Generator expression implementation

(Code contributed by Jiwon Seo.)

The documentation portion of the patch is being re-worked and will be
checked-in soon.  Likewise, PEP 289 will be updated to reflect Guido's
rationale for the design decisions on binding behavior (as described in
in his patch comments and in discussions on python-dev).

The test file, test_genexps.py, is written in doctest format and is
meant to exercise all aspects of the the patch.  Further additions are
welcome from everyone.  Please stress test this new feature as much as
possible before the alpha release.
This commit is contained in:
Raymond Hettinger 2004-05-19 08:20:33 +00:00
parent 285cfccecb
commit 354433a59d
20 changed files with 1590 additions and 439 deletions

View File

@ -88,6 +88,84 @@ Greg Wilson and ultimately implemented by Raymond Hettinger.}
XXX write this.
%======================================================================
\section{PEP 229: Generator Expressions}
Generator expressions create in-line generators using a syntax similar
to list comprehensions but with parenthesis instead of the surrounding
brackets.
Genexps allow simple generators to be constructed without a separate function
definition. Writing:
\begin{verbatim}
g = (tgtexp for var1 in exp1 for var2 in exp2 if exp3)
\end{verbatim}
is equivalent to:
\begin{verbatim}
def _generator(exp):
for var1 in exp:
for var2 in exp2:
if exp3:
yield tgtexp
g = _generator(exp1)
del _generator
\end{verbatim}
The advantage over full generator definitions is in economy of
expression. Their advantage over list comprehensions is in saving
memory by creating data only when it is needed rather than forming
a whole list is memory all at once. Applications using memory
friendly generator expressions may scale-up to high volumes of data
more readily than with list comprehensions.
Generator expressions are intended to be used inside functions
such as \function{sum()}, \function{min()}, \function{set()}, and
\function{dict()}. These functions consume their data all at once
and would not benefit from having a full list instead of a generator
an input:
\begin{verbatim}
>>> sum(i*i for i in range(10))
285
>>> sorted(set(i*i for i in xrange(-10, 11)))
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
>>> words = "Adam apple baker Bill Nancy NASA nut".split()
>>> dict((word.lower(), word) for word in words)
{'apple': 'apple', 'baker': 'baker', 'bill': 'Bill', 'nasa': 'NASA',
'adam': 'Adam', 'nancy': 'Nancy', 'nut': 'nut'}
>>> xvec = [10, 20, 30]
>>> yvec = [7, 5, 3]
>>> sum(x*y for x,y in itertools.izip(xvec, yvec)) # dot product
260
\end{verbatim}
These examples show the intended use for generator expressions
in situations where the values get consumed immediately after the
generator is created. In these situations, they operate like
memory efficient versions of list comprehensions.
For more complex uses of generators, it is strongly recommended that
the traditional full generator definitions be used instead. In a
generator expression, the first for-loop expression is evaluated
as soon as the expression is defined while the other expressions do
not get evaluated until the generator is run. This nuance is never
an issue when the generator is used immediately. If it is not used
right away, then it is better to write a full generator definition
which more clearly reveals when the expressions are evaluated and is
more obvious about the visibility and lifetime of its looping variables.
\begin{seealso}
\seepep{289}{Generator Expressions}{Proposed by Raymond Hettinger and
implemented by Jiwon Seo with early efforts steered by Hye-Shik Chang.}
\end{seealso}
%======================================================================
\section{PEP 322: Reverse Iteration}

View File

@ -80,8 +80,9 @@ arith_expr: term (('+'|'-') term)*
term: factor (('*'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: atom trailer* ['**' factor]
atom: '(' [testlist] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+
atom: '(' [testlist_gexp] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+
listmaker: test ( list_for | (',' test)* [','] )
testlist_gexp: test ( gen_for | (',' test)* [','] )
lambdef: 'lambda' [varargslist] ':' test
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
@ -95,12 +96,16 @@ dictmaker: test ':' test (',' test ':' test)* [',']
classdef: 'class' NAME ['(' testlist ')'] ':' suite
arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
argument: [test '='] test # Really [keyword '='] test
argument: [test '='] test [gen_for] # Really [keyword '='] test
list_iter: list_for | list_if
list_for: 'for' exprlist 'in' testlist_safe [list_iter]
list_if: 'if' test [list_iter]
gen_iter: gen_for | gen_if
gen_for: 'for' exprlist 'in' test [gen_iter]
gen_if: 'if' test [gen_iter]
testlist1: test (',' test)*
# not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@ -49,20 +49,24 @@
#define power 304
#define atom 305
#define listmaker 306
#define lambdef 307
#define trailer 308
#define subscriptlist 309
#define subscript 310
#define sliceop 311
#define exprlist 312
#define testlist 313
#define testlist_safe 314
#define dictmaker 315
#define classdef 316
#define arglist 317
#define argument 318
#define list_iter 319
#define list_for 320
#define list_if 321
#define testlist1 322
#define encoding_decl 323
#define testlist_gexp 307
#define lambdef 308
#define trailer 309
#define subscriptlist 310
#define subscript 311
#define sliceop 312
#define exprlist 313
#define testlist 314
#define testlist_safe 315
#define dictmaker 316
#define classdef 317
#define arglist 318
#define argument 319
#define list_iter 320
#define list_for 321
#define list_if 322
#define gen_iter 323
#define gen_for 324
#define gen_if 325
#define testlist1 326
#define encoding_decl 327

View File

@ -46,7 +46,7 @@ typedef struct _symtable_entry {
including free refs to globals */
int ste_generator; /* true if namespace is a generator */
int ste_opt_lineno; /* lineno of last exec or import * */
int ste_tmpname; /* temporary name counter */
int ste_tmpname; /* temporary name counter */
struct symtable *ste_table;
} PySymtableEntryObject;
@ -93,6 +93,9 @@ PyAPI_FUNC(void) PySymtable_Free(struct symtable *);
#define OPT_EXEC 2
#define OPT_BARE_EXEC 4
#define GENERATOR 1
#define GENERATOR_EXPRESSION 2
#ifdef __cplusplus
}
#endif

View File

@ -1236,6 +1236,82 @@ class ListCompFor(Node):
def __repr__(self):
return "ListCompFor(%s, %s, %s)" % (repr(self.assign), repr(self.list), repr(self.ifs))
class GenExpr(Node):
nodes["genexpr"] = "GenExpr"
def __init__(self, code):
self.code = code
self.argnames = ['[outmost-iterable]']
self.varargs = self.kwargs = None
def getChildren(self):
return self.code,
def getChildNodes(self):
return self.code,
def __repr__(self):
return "GenExpr(%s)" % (repr(self.code),)
class GenExprInner(Node):
nodes["genexprinner"] = "GenExprInner"
def __init__(self, expr, quals):
self.expr = expr
self.quals = quals
def getChildren(self):
children = []
children.append(self.expr)
children.extend(flatten(self.quals))
return tuple(children)
def getChildNodes(self):
nodelist = []
nodelist.append(self.expr)
nodelist.extend(flatten_nodes(self.quals))
return tuple(nodelist)
def __repr__(self):
return "GenExprInner(%s, %s)" % (repr(self.expr), repr(self.quals))
class GenExprFor(Node):
nodes["genexprfor"] = "GenExprFor"
def __init__(self, assign, iter, ifs):
self.assign = assign
self.iter = iter
self.ifs = ifs
self.is_outmost = False
def getChildren(self):
children = []
children.append(self.assign)
children.append(self.iter)
children.extend(flatten(self.ifs))
return tuple(children)
def getChildNodes(self):
nodelist = []
nodelist.append(self.assign)
nodelist.append(self.iter)
nodelist.extend(flatten_nodes(self.ifs))
return tuple(nodelist)
def __repr__(self):
return "GenExprFor(%s, %s, %s)" % (repr(self.assign), repr(self.iter), repr(self.ifs))
class GenExprIf(Node):
nodes["genexprif"] = "GenExprIf"
def __init__(self, test):
self.test = test
def getChildren(self):
return self.test,
def getChildNodes(self):
return self.test,
def __repr__(self):
return "GenExprIf(%s)" % (repr(self.test),)
klasses = globals()
for k in nodes.keys():
nodes[k] = klasses[nodes[k]]

View File

@ -619,6 +619,79 @@ class CodeGenerator:
self.newBlock()
self.emit('POP_TOP')
def visitGenExpr(self, node):
gen = GenExprCodeGenerator(node, self.scopes, self.class_name,
self.get_module())
walk(node.code, gen)
gen.finish()
self.set_lineno(node)
frees = gen.scope.get_free_vars()
if frees:
for name in frees:
self.emit('LOAD_CLOSURE', name)
self.emit('LOAD_CONST', gen)
self.emit('MAKE_CLOSURE', 0)
else:
self.emit('LOAD_CONST', gen)
self.emit('MAKE_FUNCTION', 0)
# precomputation of outmost iterable
self.visit(node.code.quals[0].iter)
self.emit('GET_ITER')
self.emit('CALL_FUNCTION', 1)
def visitGenExprInner(self, node):
self.set_lineno(node)
# setup list
stack = []
for i, for_ in zip(range(len(node.quals)), node.quals):
start, anchor = self.visit(for_)
cont = None
for if_ in for_.ifs:
if cont is None:
cont = self.newBlock()
self.visit(if_, cont)
stack.insert(0, (start, cont, anchor))
self.visit(node.expr)
self.emit('YIELD_VALUE')
for start, cont, anchor in stack:
if cont:
skip_one = self.newBlock()
self.emit('JUMP_FORWARD', skip_one)
self.startBlock(cont)
self.emit('POP_TOP')
self.nextBlock(skip_one)
self.emit('JUMP_ABSOLUTE', start)
self.startBlock(anchor)
self.emit('LOAD_CONST', None)
def visitGenExprFor(self, node):
start = self.newBlock()
anchor = self.newBlock()
if node.is_outmost:
self.loadName('[outmost-iterable]')
else:
self.visit(node.iter)
self.emit('GET_ITER')
self.nextBlock(start)
self.set_lineno(node, force=True)
self.emit('FOR_ITER', anchor)
self.nextBlock()
self.visit(node.assign)
return start, anchor
def visitGenExprIf(self, node, branch):
self.set_lineno(node, force=True)
self.visit(node.test)
self.emit('JUMP_IF_FALSE', branch)
self.newBlock()
self.emit('POP_TOP')
# exception related
def visitAssert(self, node):
@ -1199,6 +1272,7 @@ class AbstractFunctionCode:
klass.lambdaCount = klass.lambdaCount + 1
else:
name = func.name
args, hasTupleArg = generateArgList(func.argnames)
self.graph = pyassem.PyFlowGraph(name, func.filename, args,
optimized=1)
@ -1263,6 +1337,21 @@ class FunctionCodeGenerator(NestedScopeMixin, AbstractFunctionCode,
if self.scope.generator is not None:
self.graph.setFlag(CO_GENERATOR)
class GenExprCodeGenerator(NestedScopeMixin, AbstractFunctionCode,
CodeGenerator):
super_init = CodeGenerator.__init__ # call be other init
scopes = None
__super_init = AbstractFunctionCode.__init__
def __init__(self, gexp, scopes, class_name, mod):
self.scopes = scopes
self.scope = scopes[gexp]
self.__super_init(gexp, scopes, 1, class_name, mod)
self.graph.setFreeVars(self.scope.get_free_vars())
self.graph.setCellVars(self.scope.get_cell_vars())
self.graph.setFlag(CO_GENERATOR)
class AbstractClassCode:
def __init__(self, klass, scopes, module):

View File

@ -179,6 +179,21 @@ class ModuleScope(Scope):
class FunctionScope(Scope):
pass
class GenExprScope(Scope):
__super_init = Scope.__init__
__counter = 1
def __init__(self, module, klass=None):
i = self.__counter
self.__counter += 1
self.__super_init("generator expression<%d>"%i, module, klass)
self.add_param('[outmost-iterable]')
def get_names(self):
keys = Scope.get_names()
return keys
class LambdaScope(FunctionScope):
__super_init = Scope.__init__
@ -220,6 +235,32 @@ class SymbolVisitor:
self.visit(node.code, scope)
self.handle_free_vars(scope, parent)
def visitGenExpr(self, node, parent):
scope = GenExprScope(self.module, self.klass);
if parent.nested or isinstance(parent, FunctionScope) \
or isinstance(parent, GenExprScope):
scope.nested = 1
self.scopes[node] = scope
self.visit(node.code, scope)
self.handle_free_vars(scope, parent)
def visitGenExprInner(self, node, scope):
for genfor in node.quals:
self.visit(genfor, scope)
self.visit(node.expr, scope)
def visitGenExprFor(self, node, scope):
self.visit(node.assign, scope, 1)
self.visit(node.iter, scope)
for if_ in node.ifs:
self.visit(if_, scope)
def visitGenExprIf(self, node, scope):
self.visit(node.test, scope)
def visitLambda(self, node, parent, assign=0):
# Lambda is an expression, so it could appear in an expression
# context where assign is passed. The transformer should catch

View File

@ -534,6 +534,12 @@ class Transformer:
testlist1 = testlist
exprlist = testlist
def testlist_gexp(self, nodelist):
if len(nodelist) == 2 and nodelist[1][0] == symbol.gen_for:
test = self.com_node(nodelist[0])
return self.com_generator_expression(test, nodelist[1])
return self.testlist(nodelist)
def test(self, nodelist):
# and_test ('or' and_test)* | lambdef
if len(nodelist) == 1 and nodelist[0][0] == symbol.lambdef:
@ -1085,6 +1091,48 @@ class Transformer:
values.append(self.com_node(nodelist[i]))
return List(values)
if hasattr(symbol, 'gen_for'):
def com_generator_expression(self, expr, node):
# gen_iter: gen_for | gen_if
# gen_for: 'for' exprlist 'in' test [gen_iter]
# gen_if: 'if' test [gen_iter]
lineno = node[1][2]
fors = []
while node:
t = node[1][1]
if t == 'for':
assignNode = self.com_assign(node[2], OP_ASSIGN)
genNode = self.com_node(node[4])
newfor = GenExprFor(assignNode, genNode, [])
newfor.lineno = node[1][2]
fors.append(newfor)
if (len(node)) == 5:
node = None
else:
node = self.com_gen_iter(node[5])
elif t == 'if':
test = self.com_node(node[2])
newif = GenExprIf(test)
newif.lineno = node[1][2]
newfor.ifs.append(newif)
if len(node) == 3:
node = None
else:
node = self.com_gen_iter(node[3])
else:
raise SyntaxError, \
("unexpected generator expression element: %s %d"
% (node, lineno))
fors[0].is_outmost = True
n = GenExpr(GenExprInner(expr, fors))
n.lineno = lineno
return n
def com_gen_iter(self, node):
assert node[0] == symbol.gen_iter
return node[1]
def com_dictmaker(self, nodelist):
# dictmaker: test ':' test (',' test ':' value)* [',']
items = []
@ -1122,6 +1170,8 @@ class Transformer:
if node[0] == token.STAR or node[0] == token.DOUBLESTAR:
break
kw, result = self.com_argument(node, kw)
if len_nodelist != 2 and isinstance(result, GenExpr):
raise SyntaxError, 'generator expression needs parenthesis'
args.append(result)
else:
# No broken by star arg, so skip the last one we processed.
@ -1148,6 +1198,9 @@ class Transformer:
return CallFunc(primaryNode, args, star_node, dstar_node)
def com_argument(self, nodelist, kw):
if len(nodelist) == 3 and nodelist[2][0] == symbol.gen_for:
test = self.com_node(nodelist[1])
return 0, self.com_generator_expression(test, nodelist[2])
if len(nodelist) == 2:
if kw:
raise SyntaxError, "non-keyword arg after keyword arg"

View File

@ -61,23 +61,27 @@ factor = 303
power = 304
atom = 305
listmaker = 306
lambdef = 307
trailer = 308
subscriptlist = 309
subscript = 310
sliceop = 311
exprlist = 312
testlist = 313
testlist_safe = 314
dictmaker = 315
classdef = 316
arglist = 317
argument = 318
list_iter = 319
list_for = 320
list_if = 321
testlist1 = 322
encoding_decl = 323
testlist_gexp = 307
lambdef = 308
trailer = 309
subscriptlist = 310
subscript = 311
sliceop = 312
exprlist = 313
testlist = 314
testlist_safe = 315
dictmaker = 316
classdef = 317
arglist = 318
argument = 319
list_iter = 320
list_for = 321
list_if = 322
gen_iter = 323
gen_for = 324
gen_if = 325
testlist1 = 326
encoding_decl = 327
#--end constants--
sym_name = {}

View File

@ -566,7 +566,7 @@ def test_main(verbose=None):
# doctests
from test import test_deque
# test_support.run_doctest(test_deque, verbose)
test_support.run_doctest(test_deque, verbose)
if __name__ == "__main__":
test_main(verbose=True)

258
Lib/test/test_genexps.py Normal file
View File

@ -0,0 +1,258 @@
doctests = """
Test simple loop with conditional
>>> sum(i*i for i in range(100) if i&1 == 1)
166650
Test simple nesting
>>> list((i,j) for i in range(3) for j in range(4) )
[(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3)]
Test nesting with the inner expression dependent on the outer
>>> list((i,j) for i in range(4) for j in range(i) )
[(1, 0), (2, 0), (2, 1), (3, 0), (3, 1), (3, 2)]
Make sure the induction variable is not exposed
>>> i = 20
>>> sum(i*i for i in range(100))
328350
>>> i
20
Test first class
>>> g = (i*i for i in range(4))
>>> type(g)
<type 'generator'>
>>> list(g)
[0, 1, 4, 9]
Test direct calls to next()
>>> g = (i*i for i in range(3))
>>> g.next()
0
>>> g.next()
1
>>> g.next()
4
>>> g.next()
Traceback (most recent call last):
File "<pyshell#21>", line 1, in -toplevel-
g.next()
StopIteration
Does it stay stopped?
>>> g.next()
Traceback (most recent call last):
File "<pyshell#21>", line 1, in -toplevel-
g.next()
StopIteration
>>> list(g)
[]
Test running gen when defining function is out of scope
>>> def f(n):
... return (i*i for i in xrange(n))
...
>>> list(f(10))
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
>>> def f(n):
... return ((i,j) for i in xrange(3) for j in xrange(n))
...
>>> list(f(4))
[(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3)]
>>> def f(n):
... return ((i,j) for i in xrange(3) for j in xrange(4) if j in xrange(n))
...
>>> list(f(4))
[(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3)]
>>> list(f(2))
[(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)]
#Verify that parenthesis are required in a statement
#>>> def f(n):
#... return i*i for i in xrange(n)
#...
#SyntaxError: invalid syntax
Verify early binding for the outermost for-expression
>>> x=10
>>> g = (i*i for i in range(x))
>>> x = 5
>>> list(g)
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
Verify late binding for the outermost if-expression
>>> include = (2,4,6,8)
>>> g = (i*i for i in range(10) if i in include)
>>> include = (1,3,5,7,9)
>>> list(g)
[1, 9, 25, 49, 81]
Verify late binding for the innermost for-expression
>>> g = ((i,j) for i in range(3) for j in range(x))
>>> x = 4
>>> list(g)
[(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3)]
Verify re-use of tuples (a side benefit of using genexps over listcomps)
>>> tupleids = map(id, ((i,i) for i in xrange(10)))
>>> max(tupleids) - min(tupleids)
0
########### Tests borrowed from or inspired by test_generators.py ############
Make a generator that acts like range()
>>> yrange = lambda n: (i for i in xrange(n))
>>> list(yrange(10))
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Generators always return to the most recent caller:
>>> def creator():
... r = yrange(5)
... print "creator", r.next()
... return r
...
>>> def caller():
... r = creator()
... for i in r:
... print "caller", i
...
>>> caller()
creator 0
caller 1
caller 2
caller 3
caller 4
Generators can call other generators:
>>> def zrange(n):
... for i in yrange(n):
... yield i
...
>>> list(zrange(5))
[0, 1, 2, 3, 4]
Verify that a gen exp cannot be resumed while it is actively running:
>>> g = (me.next() for i in xrange(10))
>>> me = g
>>> me.next()
Traceback (most recent call last):
File "<pyshell#30>", line 1, in -toplevel-
me.next()
File "<pyshell#28>", line 1, in <generator expression>
g = (me.next() for i in xrange(10))
ValueError: generator already executing
Verify exception propagation
>>> g = (10 // i for i in (5, 0, 2))
>>> g.next()
2
>>> g.next()
Traceback (most recent call last):
File "<pyshell#37>", line 1, in -toplevel-
g.next()
File "<pyshell#35>", line 1, in <generator expression>
g = (10 // i for i in (5, 0, 2))
ZeroDivisionError: integer division or modulo by zero
>>> g.next()
Traceback (most recent call last):
File "<pyshell#38>", line 1, in -toplevel-
g.next()
StopIteration
Make sure that None is a valid return value
>>> list(None for i in xrange(10))
[None, None, None, None, None, None, None, None, None, None]
Check that generator attributes are present
>>> g = (i*i for i in range(3))
>>> expected = set(['gi_frame', 'gi_running', 'next'])
>>> set(attr for attr in dir(g) if not attr.startswith('__')) >= expected
True
>>> print g.next.__doc__
x.next() -> the next value, or raise StopIteration
>>> import types
>>> isinstance(g, types.GeneratorType)
True
Check the __iter__ slot is defined to return self
>>> iter(g) is g
True
Verify that the running flag is set properly
>>> g = (me.gi_running for i in (0,1))
>>> me = g
>>> me.gi_running
0
>>> me.next()
1
>>> me.gi_running
0
Verify that genexps are weakly referencable
>>> import weakref
>>> g = (i*i for i in range(4))
>>> wr = weakref.ref(g)
>>> wr() is g
True
>>> p = weakref.proxy(g)
>>> list(p)
[0, 1, 4, 9]
"""
__test__ = {'doctests' : doctests}
def test_main(verbose=None):
import sys
from test import test_support
from test import test_genexps
test_support.run_doctest(test_genexps, verbose)
# verify reference counting
if verbose and hasattr(sys, "gettotalrefcount"):
import gc
counts = [None] * 5
for i in xrange(len(counts)):
test_support.run_doctest(test_genexps, verbose)
gc.collect()
counts[i] = sys.gettotalrefcount()
print counts
if __name__ == "__main__":
test_main(verbose=True)

View File

@ -739,3 +739,46 @@ print [
for (sp_sno, sp_pno) in suppart
if sno == sp_sno and pno == sp_pno
]
# generator expression tests
g = ([x for x in range(10)] for x in range(1))
verify(g.next() == [x for x in range(10)])
try:
g.next()
raise TestFailed, 'should produce StopIteration exception'
except StopIteration:
pass
a = 1
try:
g = (a for d in a)
g.next()
raise TestFailed, 'should produce TypeError'
except TypeError:
pass
verify(list((x, y) for x in 'abcd' for y in 'abcd') == [(x, y) for x in 'abcd' for y in 'abcd'])
verify(list((x, y) for x in 'ab' for y in 'xy') == [(x, y) for x in 'ab' for y in 'xy'])
a = [x for x in range(10)]
b = (x for x in (y for y in a))
verify(sum(b) == sum([x for x in range(10)]))
verify(sum(x**2 for x in range(10)) == sum([x**2 for x in range(10)]))
verify(sum(x*x for x in range(10) if x%2) == sum([x*x for x in range(10) if x%2]))
verify(sum(x for x in (y for y in range(10))) == sum([x for x in range(10)]))
verify(sum(x for x in (y for y in (z for z in range(10)))) == sum([x for x in range(10)]))
verify(sum(x for x in [y for y in (z for z in range(10))]) == sum([x for x in range(10)]))
verify(sum(x for x in (y for y in (z for z in range(10) if True)) if True) == sum([x for x in range(10)]))
verify(sum(x for x in (y for y in (z for z in range(10) if True) if False) if True) == 0)
check_syntax("foo(x for x in range(10), 100)")
check_syntax("foo(100, x for x in range(10))")
# test for outmost iterable precomputation
x = 10; g = (i for i in range(x)); x = 5
verify(len(list(g)) == 10)
# This should hold, since we're only precomputing outmost iterable.
x = 10; t = False; g = ((i,j) for i in range(x) if t for j in range(x))
x = 5; t = True;
verify([(i,j) for i in range(10) for j in range(5)] == list(g))

View File

@ -67,6 +67,8 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase):
self.check_expr("lambda foo=bar, blaz=blat+2, **z: 0")
self.check_expr("lambda foo=bar, blaz=blat+2, *y, **z: 0")
self.check_expr("lambda x, *y, **z: 0")
self.check_expr("(x for x in range(10))")
self.check_expr("foo(x for x in range(10))")
def test_print(self):
self.check_suite("print")

View File

@ -500,6 +500,7 @@ Barry Scott
Steven Scott
Nick Seidenman
Fred Sells
Jiwon Seo
Denis Severson
Ha Shao
Bruce Sherwood

View File

@ -12,6 +12,8 @@ What's New in Python 2.4 alpha 1?
Core and builtins
-----------------
- Implemented generator expressions (PEP 289). Coded by Jiwon Seo.
- Enabled the profiling of C extension functions (and builtins) - check
new documentation and modified profiler and bdb modules for more details

View File

@ -855,7 +855,9 @@ VALIDATER(subscriptlist); VALIDATER(sliceop);
VALIDATER(exprlist); VALIDATER(dictmaker);
VALIDATER(arglist); VALIDATER(argument);
VALIDATER(listmaker); VALIDATER(yield_stmt);
VALIDATER(testlist1);
VALIDATER(testlist1); VALIDATER(gen_for);
VALIDATER(gen_iter); VALIDATER(gen_if);
VALIDATER(testlist_gexp);
#undef VALIDATER
@ -1246,6 +1248,21 @@ validate_list_iter(node *tree)
return res;
}
/* gen_iter: gen_for | gen_if
*/
static int
validate_gen_iter(node *tree)
{
int res = (validate_ntype(tree, gen_iter)
&& validate_numnodes(tree, 1, "gen_iter"));
if (res && TYPE(CHILD(tree, 0)) == gen_for)
res = validate_gen_for(CHILD(tree, 0));
else
res = validate_gen_if(CHILD(tree, 0));
return res;
}
/* list_for: 'for' exprlist 'in' testlist [list_iter]
*/
static int
@ -1268,6 +1285,28 @@ validate_list_for(node *tree)
return res;
}
/* gen_for: 'for' exprlist 'in' test [gen_iter]
*/
static int
validate_gen_for(node *tree)
{
int nch = NCH(tree);
int res;
if (nch == 5)
res = validate_gen_iter(CHILD(tree, 4));
else
res = validate_numnodes(tree, 4, "gen_for");
if (res)
res = (validate_name(CHILD(tree, 0), "for")
&& validate_exprlist(CHILD(tree, 1))
&& validate_name(CHILD(tree, 2), "in")
&& validate_test(CHILD(tree, 3)));
return res;
}
/* list_if: 'if' test [list_iter]
*/
static int
@ -1288,6 +1327,25 @@ validate_list_if(node *tree)
return res;
}
/* gen_if: 'if' test [gen_iter]
*/
static int
validate_gen_if(node *tree)
{
int nch = NCH(tree);
int res;
if (nch == 3)
res = validate_gen_iter(CHILD(tree, 2));
else
res = validate_numnodes(tree, 2, "gen_if");
if (res)
res = (validate_name(CHILD(tree, 0), "if")
&& validate_test(CHILD(tree, 1)));
return res;
}
/* validate_fpdef()
*
@ -2187,7 +2245,7 @@ validate_atom(node *tree)
&& (validate_rparen(CHILD(tree, nch - 1))));
if (res && (nch == 3))
res = validate_testlist(CHILD(tree, 1));
res = validate_testlist_gexp(CHILD(tree, 1));
break;
case LSQB:
if (nch == 2)
@ -2244,7 +2302,7 @@ validate_listmaker(node *tree)
ok = validate_test(CHILD(tree, 0));
/*
* list_iter | (',' test)* [',']
* list_for | (',' test)* [',']
*/
if (nch == 2 && TYPE(CHILD(tree, 1)) == list_for)
ok = validate_list_for(CHILD(tree, 1));
@ -2266,6 +2324,43 @@ validate_listmaker(node *tree)
return ok;
}
/* testlist_gexp:
* test ( gen_for | (',' test)* [','] )
*/
static int
validate_testlist_gexp(node *tree)
{
int nch = NCH(tree);
int ok = nch;
if (nch == 0)
err_string("missing child nodes of testlist_gexp");
else {
ok = validate_test(CHILD(tree, 0));
}
/*
* gen_for | (',' test)* [',']
*/
if (nch == 2 && TYPE(CHILD(tree, 1)) == gen_for)
ok = validate_gen_for(CHILD(tree, 1));
else {
/* (',' test)* [','] */
int i = 1;
while (ok && nch - i >= 2) {
ok = (validate_comma(CHILD(tree, i))
&& validate_test(CHILD(tree, i+1)));
i += 2;
}
if (ok && i == nch-1)
ok = validate_comma(CHILD(tree, i));
else if (i != nch) {
ok = 0;
err_string("illegal trailing nodes for testlist_gexp");
}
}
return ok;
}
/* funcdef:
* 'def' NAME parameters ':' suite
@ -2318,6 +2413,18 @@ validate_arglist(node *tree)
/* raise the right error from having an invalid number of children */
return validate_numnodes(tree, nch + 1, "arglist");
if (nch > 1) {
for (i=0; i<nch; i++) {
if (TYPE(CHILD(tree, i)) == argument) {
node *ch = CHILD(tree, i);
if (NCH(ch) == 2 && TYPE(CHILD(ch, 1)) == gen_for) {
err_string("need '(', ')' for generator expression");
return 0;
}
}
}
}
while (ok && nch-i >= 2) {
/* skip leading (argument ',') */
ok = (validate_argument(CHILD(tree, i))
@ -2377,17 +2484,19 @@ validate_arglist(node *tree)
/* argument:
*
* [test '='] test
* [test '='] test [gen_for]
*/
static int
validate_argument(node *tree)
{
int nch = NCH(tree);
int res = (validate_ntype(tree, argument)
&& ((nch == 1) || (nch == 3))
&& ((nch == 1) || (nch == 2) || (nch == 3))
&& validate_test(CHILD(tree, 0)));
if (res && (nch == 3))
if (res && (nch == 2))
res = validate_gen_for(CHILD(tree, 1));
else if (res && (nch == 3))
res = (validate_equal(CHILD(tree, 1))
&& validate_test(CHILD(tree, 2)));

View File

@ -744,11 +744,15 @@ static int com_add(struct compiling *, PyObject *, PyObject *, PyObject *);
static int com_addconst(struct compiling *, PyObject *);
static int com_addname(struct compiling *, PyObject *);
static void com_addopname(struct compiling *, int, node *);
static void com_test(struct compiling *c, node *n);
static void com_list(struct compiling *, node *, int);
static void com_list_iter(struct compiling *, node *, node *, char *);
static void com_gen_iter(struct compiling *, node *, node *);
static int com_argdefs(struct compiling *, node *);
static void com_assign(struct compiling *, node *, int, node *);
static void com_assign_name(struct compiling *, node *, int);
static int com_make_closure(struct compiling *c, PyCodeObject *co);
static PyCodeObject *icompile(node *, struct compiling *);
static PyCodeObject *jcompile(node *, const char *, struct compiling *,
PyCompilerFlags *);
@ -759,6 +763,7 @@ static node *get_rawdocstring(node *);
static int get_ref_type(struct compiling *, char *);
/* symtable operations */
static int symtable_lookup(struct symtable *st, char *name);
static struct symtable *symtable_build(node *, PyFutureFeatures *,
const char *filename);
static int symtable_load_symbols(struct compiling *);
@ -777,7 +782,10 @@ static void symtable_global(struct symtable *, node *);
static void symtable_import(struct symtable *, node *);
static void symtable_assign(struct symtable *, node *, int);
static void symtable_list_comprehension(struct symtable *, node *);
static void symtable_generator_expression(struct symtable *, node *);
static void symtable_list_for(struct symtable *, node *);
static void symtable_gen_for(struct symtable *, node *, int);
static void symtable_gen_iter(struct symtable *, node *);
static int symtable_update_free_vars(struct symtable *);
static int symtable_undo_free(struct symtable *, PyObject *, PyObject *);
@ -1589,7 +1597,7 @@ com_list_for(struct compiling *c, node *n, node *e, char *t)
int anchor = 0;
int save_begin = c->c_begin;
/* list_iter: for v in expr [list_iter] */
/* list_for: for v in expr [list_iter] */
com_node(c, CHILD(n, 3)); /* expr */
com_addbyte(c, GET_ITER);
c->c_begin = c->c_nexti;
@ -1605,6 +1613,52 @@ com_list_for(struct compiling *c, node *n, node *e, char *t)
com_pop(c, 1); /* FOR_ITER has popped this */
}
static void
com_gen_for(struct compiling *c, node *n, node *t, int is_outmost)
{
int break_anchor = 0;
int anchor = 0;
int save_begin = c->c_begin;
REQ(n, gen_for);
/* gen_for: for v in test [gen_iter] */
com_addfwref(c, SETUP_LOOP, &break_anchor);
block_push(c, SETUP_LOOP);
if (is_outmost) {
com_addop_varname(c, VAR_LOAD, "[outmost-iterable]");
com_push(c, 1);
}
else {
com_node(c, CHILD(n, 3));
com_addbyte(c, GET_ITER);
}
c->c_begin = c->c_nexti;
com_set_lineno(c, c->c_last_line);
com_addfwref(c, FOR_ITER, &anchor);
com_push(c, 1);
com_assign(c, CHILD(n, 1), OP_ASSIGN, NULL);
if (NCH(n) == 5)
com_gen_iter(c, CHILD(n, 4), t);
else {
com_test(c, t);
com_addbyte(c, YIELD_VALUE);
com_pop(c, 1);
}
com_addoparg(c, JUMP_ABSOLUTE, c->c_begin);
c->c_begin = save_begin;
com_backpatch(c, anchor);
com_pop(c, 1); /* FOR_ITER has popped this */
com_addbyte(c, POP_BLOCK);
block_pop(c, SETUP_LOOP);
com_backpatch(c, break_anchor);
}
static void
com_list_if(struct compiling *c, node *n, node *e, char *t)
{
@ -1623,6 +1677,32 @@ com_list_if(struct compiling *c, node *n, node *e, char *t)
com_backpatch(c, anchor);
}
static void
com_gen_if(struct compiling *c, node *n, node *t)
{
/* gen_if: 'if' test [gen_iter] */
int anchor = 0;
int a=0;
com_node(c, CHILD(n, 1));
com_addfwref(c, JUMP_IF_FALSE, &a);
com_addbyte(c, POP_TOP);
com_pop(c, 1);
if (NCH(n) == 3)
com_gen_iter(c, CHILD(n, 2), t);
else {
com_test(c, t);
com_addbyte(c, YIELD_VALUE);
com_pop(c, 1);
}
com_addfwref(c, JUMP_FORWARD, &anchor);
com_backpatch(c, a);
/* We jump here with an extra entry which we now pop */
com_addbyte(c, POP_TOP);
com_backpatch(c, anchor);
}
static void
com_list_iter(struct compiling *c,
node *p, /* parent of list_iter node */
@ -1654,6 +1734,28 @@ com_list_iter(struct compiling *c,
}
}
static void
com_gen_iter(struct compiling *c, node *n, node *t)
{
/* gen_iter: gen_for | gen_if */
node *ch;
REQ(n, gen_iter);
ch = CHILD(n, 0);
switch (TYPE(ch)) {
case gen_for:
com_gen_for(c, ch, t, 0);
break;
case gen_if:
com_gen_if(c, ch, t);
break;
default:
com_error(c, PyExc_SystemError,
"invalid gen_iter node type");
}
}
static void
com_list_comprehension(struct compiling *c, node *n)
{
@ -1688,6 +1790,52 @@ com_listmaker(struct compiling *c, node *n)
}
}
static void
com_generator_expression(struct compiling *c, node *n)
{
/* testlist_gexp: test gen_for */
/* argument: test gen_for */
PyCodeObject *co;
REQ(CHILD(n, 0), test);
REQ(CHILD(n, 1), gen_for);
symtable_enter_scope(c->c_symtable, "<genexpr>", TYPE(n),
n->n_lineno);
co = icompile(n, c);
symtable_exit_scope(c->c_symtable);
if (co == NULL)
c->c_errors++;
else {
int closure = com_make_closure(c, co);
int i = com_addconst(c, (PyObject *)co);
com_addoparg(c, LOAD_CONST, i);
com_push(c, 1);
if (closure)
com_addoparg(c, MAKE_CLOSURE, 0);
else
com_addoparg(c, MAKE_FUNCTION, 0);
com_test(c, CHILD(CHILD(n, 1), 3));
com_addbyte(c, GET_ITER);
com_addoparg(c, CALL_FUNCTION, 1);
com_pop(c, 1);
Py_DECREF(co);
}
}
static void
com_testlist_gexp(struct compiling *c, node *n)
{
/* testlist_gexp: test ( gen_for | (',' test)* [','] ) */
if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for)
com_generator_expression(c, n);
else com_list(c, n, 0);
}
static void
com_dictmaker(struct compiling *c, node *n)
{
@ -1721,7 +1869,7 @@ com_atom(struct compiling *c, node *n)
com_push(c, 1);
}
else
com_node(c, CHILD(n, 1));
com_testlist_gexp(c, CHILD(n, 1));
break;
case LSQB: /* '[' [listmaker] ']' */
if (TYPE(CHILD(n, 1)) == RSQB) {
@ -1857,7 +2005,7 @@ static void
com_argument(struct compiling *c, node *n, PyObject **pkeywords)
{
node *m;
REQ(n, argument); /* [test '='] test; really [keyword '='] test */
REQ(n, argument); /* [test '='] test [gen_for]; really [keyword '='] test */
if (NCH(n) == 1) {
if (*pkeywords != NULL) {
com_error(c, PyExc_SyntaxError,
@ -1868,6 +2016,11 @@ com_argument(struct compiling *c, node *n, PyObject **pkeywords)
}
return;
}
if (NCH(n) == 2) {
com_generator_expression(c, n);
return;
}
m = n;
do {
m = CHILD(m, 0);
@ -2723,7 +2876,8 @@ static void
com_assign_sequence(struct compiling *c, node *n, int assigning)
{
int i;
if (TYPE(n) != testlist && TYPE(n) != listmaker)
if (TYPE(n) != testlist && TYPE(n) != testlist_gexp &&
TYPE(n) != listmaker)
REQ(n, exprlist);
if (assigning) {
i = (NCH(n)+1)/2;
@ -2765,7 +2919,13 @@ com_assign(struct compiling *c, node *n, int assigning, node *augn)
case exprlist:
case testlist:
case testlist1:
case testlist_gexp:
if (NCH(n) > 1) {
if (TYPE(CHILD(n, 1)) == gen_for) {
com_error(c, PyExc_SystemError,
"assign to generator expression not possible");
return;
}
if (assigning > OP_APPLY) {
com_error(c, PyExc_SyntaxError,
"augmented assign to tuple not possible");
@ -4252,6 +4412,23 @@ compile_classdef(struct compiling *c, node *n)
com_pop(c, 1);
}
static void
compile_generator_expression(struct compiling *c, node *n)
{
/* testlist_gexp: test gen_for */
/* argument: test gen_for */
REQ(CHILD(n, 0), test);
REQ(CHILD(n, 1), gen_for);
c->c_name = "<generator expression>";
com_gen_for(c, CHILD(n, 1), CHILD(n, 0), 1);
com_addoparg(c, LOAD_CONST, com_addconst(c, Py_None));
com_push(c, 1);
com_addbyte(c, RETURN_VALUE);
com_pop(c, 1);
}
static void
compile_node(struct compiling *c, node *n)
{
@ -4300,6 +4477,11 @@ compile_node(struct compiling *c, node *n)
compile_classdef(c, n);
break;
case testlist_gexp: /* A generator expression */
case argument: /* A generator expression */
compile_generator_expression(c, n);
break;
default:
com_error(c, PyExc_SystemError,
"compile_node: unexpected node type");
@ -4976,7 +5158,6 @@ symtable_load_symbols(struct compiling *c)
}
}
}
assert(PyDict_Size(c->c_freevars) == si.si_nfrees);
if (si.si_ncells > 1) { /* one cell is always in order */
@ -5346,11 +5527,11 @@ look_for_yield(node *n)
return 0;
case yield_stmt:
return 1;
return GENERATOR;
default:
if (look_for_yield(kid))
return 1;
return GENERATOR;
}
}
return 0;
@ -5494,6 +5675,18 @@ symtable_node(struct symtable *st, node *n)
if (TYPE(CHILD(n, i)) >= single_input)
symtable_node(st, CHILD(n, i));
break;
case arglist:
if (NCH(n) > 1)
for (i = 0; i < NCH(n); ++i) {
node *ch = CHILD(n, i);
if (TYPE(ch) == argument && NCH(ch) == 2 &&
TYPE(CHILD(ch, 1)) == gen_for) {
PyErr_SetString(PyExc_SyntaxError,
"invalid syntax");
symtable_error(st, n->n_lineno);
return;
}
}
/* The remaining cases fall through to default except in
special circumstances. This requires the individual cases
to be coded with great care, even though they look like
@ -5504,6 +5697,11 @@ symtable_node(struct symtable *st, node *n)
n = CHILD(n, 2);
goto loop;
}
else if (TYPE(n) == argument && NCH(n) == 2 &&
TYPE(CHILD(n, 1)) == gen_for) {
symtable_generator_expression(st, n);
break;
}
/* fall through */
case listmaker:
if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == list_for) {
@ -5511,6 +5709,13 @@ symtable_node(struct symtable *st, node *n)
break;
}
/* fall through */
case testlist_gexp:
if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for) {
symtable_generator_expression(st, n);
break;
}
/* fall through */
case atom:
if (TYPE(n) == atom && TYPE(CHILD(n, 0)) == NAME) {
symtable_add_use(st, STR(CHILD(n, 0)));
@ -5714,6 +5919,26 @@ symtable_list_comprehension(struct symtable *st, node *n)
--st->st_cur->ste_tmpname;
}
static void
symtable_generator_expression(struct symtable *st, node *n)
{
/* testlist_gexp: test gen_for */
REQ(CHILD(n, 0), test);
REQ(CHILD(n, 1), gen_for);
symtable_enter_scope(st, "<genexpr>", TYPE(n), n->n_lineno);
st->st_cur->ste_generator = GENERATOR_EXPRESSION;
symtable_add_def(st, "[outmost-iterable]", DEF_PARAM);
symtable_gen_for(st, CHILD(n, 1), 1);
symtable_node(st, CHILD(n, 0));
symtable_exit_scope(st);
/* for outmost iterable precomputation */
symtable_node(st, CHILD(CHILD(n, 1), 3));
}
static void
symtable_list_for(struct symtable *st, node *n)
{
@ -5725,6 +5950,39 @@ symtable_list_for(struct symtable *st, node *n)
symtable_node(st, CHILD(n, 4));
}
static void
symtable_gen_for(struct symtable *st, node *n, int is_outmost)
{
REQ(n, gen_for);
/* gen_for: for v in test [gen_iter] */
symtable_assign(st, CHILD(n, 1), 0);
if (is_outmost)
symtable_add_use(st, "[outmost-iterable]");
else
symtable_node(st, CHILD(n, 3));
if (NCH(n) == 5)
symtable_gen_iter(st, CHILD(n, 4));
}
static void
symtable_gen_iter(struct symtable *st, node *n)
{
REQ(n, gen_iter);
n = CHILD(n, 0);
if (TYPE(n) == gen_for)
symtable_gen_for(st, n, 0);
else {
REQ(n, gen_if);
symtable_node(st, CHILD(n, 1));
if (NCH(n) == 3)
symtable_gen_iter(st, CHILD(n, 2));
}
}
static void
symtable_import(struct symtable *st, node *n)
{
@ -5813,6 +6071,17 @@ symtable_assign(struct symtable *st, node *n, int def_flag)
symtable_assign(st, CHILD(n, i), def_flag);
}
return;
case testlist_gexp:
if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for) {
/* XXX This is an error, but the next pass
will catch it. */
return;
} else {
for (i = 0; i < NCH(n); i += 2)
symtable_assign(st, CHILD(n, i), def_flag);
}
return;
case exprlist:
case testlist:
case testlist1:

File diff suppressed because it is too large Load Diff

View File

@ -66,6 +66,8 @@ PySymtableEntry_New(struct symtable *st, char *name, int type, int lineno)
switch (type) {
case funcdef:
case lambdef:
case testlist_gexp: /* generator expression */
case argument: /* generator expression */
ste->ste_type = TYPE_FUNCTION;
break;
case classdef:

View File

@ -38,6 +38,10 @@ AssAttr: expr, attrname*, flags*
ListComp: expr, quals!
ListCompFor: assign, list, ifs!
ListCompIf: test
GenExpr: code
GenExprInner: expr, quals!
GenExprFor: assign, iter, ifs!
GenExprIf: test
List: nodes!
Dict: items!
Not: expr
@ -85,3 +89,10 @@ init(Lambda):
self.varargs = 1
if flags & CO_VARKEYWORDS:
self.kwargs = 1
init(GenExpr):
self.argnames = ['[outmost-iterable]']
self.varargs = self.kwargs = None
init(GenExprFor):
self.is_outmost = False