Issue #24619: New approach for tokenizing async/await.

This commit fixes how one-line async-defs and defs are tracked
by tokenizer.  It allows to correctly parse invalid code such
as:

>>> async def f():
...     def g(): pass
...     async = 10

and valid code such as:

>>> async def f():
...     async def g(): pass
...     await z

As a consequence, is is now possible to have one-line
'async def foo(): await ..' functions:

>>> async def foo(): return await bar()
This commit is contained in:
Yury Selivanov 2015-07-22 13:33:45 +03:00
parent 80acc3ebbc
commit 8fb307cd65
13 changed files with 343 additions and 69 deletions

View File

@ -685,9 +685,7 @@ Execution of Python coroutines can be suspended and resumed at many points
(see :term:`coroutine`). In the body of a coroutine, any ``await`` and
``async`` identifiers become reserved keywords; :keyword:`await` expressions,
:keyword:`async for` and :keyword:`async with` can only be used in
coroutine bodies. However, to simplify the parser, these keywords cannot
be used on the same line as a function or coroutine (:keyword:`def`
statement) header.
coroutine bodies.
Functions defined with ``async def`` syntax are always coroutine functions,
even if they do not contain ``await`` or ``async`` keywords.

View File

@ -369,6 +369,7 @@ def generate_tokens(readline):
# 'stashed' and 'ctx' are used for async/await parsing
stashed = None
ctx = [('sync', 0)]
in_async = 0
while 1: # loop over lines in stream
try:
@ -436,6 +437,14 @@ def generate_tokens(readline):
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
cur_indent = indents[-1]
while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
if ctx[-1][0] == 'async':
in_async -= 1
assert in_async >= 0
ctx.pop()
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
else: # continued statement
@ -499,7 +508,7 @@ def generate_tokens(readline):
yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
if token in ('async', 'await'):
if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]:
if in_async:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
@ -515,6 +524,7 @@ def generate_tokens(readline):
and stashed[1] == 'async'):
ctx.append(('async', indents[-1]))
in_async += 1
yield (ASYNC, stashed[1],
stashed[2], stashed[3],

View File

@ -1,3 +1,2 @@
async def foo():
def foo(a=await something()):
pass
async def foo(a=await something()):
pass

View File

@ -1,3 +1,2 @@
async def foo():
def foo(a:await something()):
pass
async def foo(a:await something()):
pass

View File

@ -1,2 +1,2 @@
async def foo():
async def foo(): await something()
await

View File

@ -1,2 +0,0 @@
async def foo():
await

View File

@ -67,11 +67,11 @@ def silence_coro_gc():
class AsyncBadSyntaxTest(unittest.TestCase):
def test_badsyntax_1(self):
with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
with self.assertRaisesRegex(SyntaxError, "'await' outside"):
import test.badsyntax_async1
def test_badsyntax_2(self):
with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
with self.assertRaisesRegex(SyntaxError, "'await' outside"):
import test.badsyntax_async2
def test_badsyntax_3(self):
@ -103,10 +103,6 @@ class AsyncBadSyntaxTest(unittest.TestCase):
import test.badsyntax_async8
def test_badsyntax_9(self):
with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
import test.badsyntax_async9
def test_badsyntax_10(self):
ns = {}
for comp in {'(await a for a in b)',
'[await a for a in b]',
@ -116,6 +112,221 @@ class AsyncBadSyntaxTest(unittest.TestCase):
with self.assertRaisesRegex(SyntaxError, 'await.*in comprehen'):
exec('async def f():\n\t{}'.format(comp), ns, ns)
def test_badsyntax_10(self):
# Tests for issue 24619
samples = [
"""async def foo():
def bar(): pass
await = 1
""",
"""async def foo():
def bar(): pass
await = 1
""",
"""async def foo():
def bar(): pass
if 1:
await = 1
""",
"""def foo():
async def bar(): pass
if 1:
await a
""",
"""def foo():
async def bar(): pass
await a
""",
"""def foo():
def baz(): pass
async def bar(): pass
await a
""",
"""def foo():
def baz(): pass
# 456
async def bar(): pass
# 123
await a
""",
"""async def foo():
def baz(): pass
# 456
async def bar(): pass
# 123
await = 2
""",
"""def foo():
def baz(): pass
async def bar(): pass
await a
""",
"""async def foo():
def baz(): pass
async def bar(): pass
await = 2
""",
"""async def foo():
def async(): pass
""",
"""async def foo():
def await(): pass
""",
"""async def foo():
def bar():
await
""",
"""async def foo():
return lambda async: await
""",
"""async def foo():
return lambda a: await
""",
"""async def foo(a: await b):
pass
""",
"""def baz():
async def foo(a: await b):
pass
""",
"""async def foo(async):
pass
""",
"""async def foo():
def bar():
def baz():
async = 1
""",
"""async def foo():
def bar():
def baz():
pass
async = 1
""",
"""def foo():
async def bar():
async def baz():
pass
def baz():
42
async = 1
""",
"""async def foo():
def bar():
def baz():
pass\nawait foo()
""",
"""def foo():
def bar():
async def baz():
pass\nawait foo()
""",
"""async def foo(await):
pass
""",
"""def foo():
async def bar(): pass
await a
""",
"""def foo():
async def bar():
pass\nawait a
"""]
ns = {}
for code in samples:
with self.subTest(code=code), self.assertRaises(SyntaxError):
exec(code, ns, ns)
def test_goodsyntax_1(self):
# Tests for issue 24619
def foo(await):
async def foo(): pass
async def foo():
pass
return await + 1
self.assertEqual(foo(10), 11)
def foo(await):
async def foo(): pass
async def foo(): pass
return await + 2
self.assertEqual(foo(20), 22)
def foo(await):
async def foo(): pass
async def foo(): pass
return await + 2
self.assertEqual(foo(20), 22)
def foo(await):
"""spam"""
async def foo(): \
pass
# 123
async def foo(): pass
# 456
return await + 2
self.assertEqual(foo(20), 22)
def foo(await):
def foo(): pass
def foo(): pass
async def bar(): return await_
await_ = await
try:
bar().send(None)
except StopIteration as ex:
return ex.args[0]
self.assertEqual(foo(42), 42)
async def f():
async def g(): pass
await z
self.assertTrue(inspect.iscoroutinefunction(f))
class TokenizerRegrTest(unittest.TestCase):
@ -461,8 +672,7 @@ class CoroutineTest(unittest.TestCase):
class Awaitable:
pass
async def foo():
return (await Awaitable())
async def foo(): return await Awaitable()
with self.assertRaisesRegex(
TypeError, "object Awaitable can't be used in 'await' expression"):

View File

@ -1051,10 +1051,7 @@ class GrammarTests(unittest.TestCase):
async def test():
def sum():
async = 1
await = 41
return async + await
pass
if 1:
await someobj()

View File

@ -786,12 +786,12 @@ Async/await extension:
NAME 'def' (2, 2) (2, 5)
NAME 'foo' (2, 6) (2, 9)
OP '(' (2, 9) (2, 10)
NAME 'await' (2, 10) (2, 15)
AWAIT 'await' (2, 10) (2, 15)
OP ')' (2, 15) (2, 16)
OP ':' (2, 16) (2, 17)
NEWLINE '\\n' (2, 17) (2, 18)
INDENT ' ' (3, 0) (3, 4)
NAME 'await' (3, 4) (3, 9)
AWAIT 'await' (3, 4) (3, 9)
OP '=' (3, 10) (3, 11)
NUMBER '1' (3, 12) (3, 13)
NEWLINE '\\n' (3, 13) (3, 14)
@ -829,6 +829,17 @@ Async/await extension:
OP ':' (2, 18) (2, 19)
NAME 'pass' (2, 20) (2, 24)
DEDENT '' (3, 0) (3, 0)
>>> dump_tokens('''async def foo(async): await''')
ENCODING 'utf-8' (0, 0) (0, 0)
ASYNC 'async' (1, 0) (1, 5)
NAME 'def' (1, 6) (1, 9)
NAME 'foo' (1, 10) (1, 13)
OP '(' (1, 13) (1, 14)
ASYNC 'async' (1, 14) (1, 19)
OP ')' (1, 19) (1, 20)
OP ':' (1, 20) (1, 21)
AWAIT 'await' (1, 22) (1, 27)
"""
from test import support

View File

@ -501,6 +501,7 @@ def _tokenize(readline, encoding):
# 'stashed' and 'ctx' are used for async/await parsing
stashed = None
ctx = [('sync', 0)]
in_async = 0
if encoding is not None:
if encoding == "utf-8-sig":
@ -580,6 +581,9 @@ def _tokenize(readline, encoding):
cur_indent = indents[-1]
while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
if ctx[-1][0] == 'async':
in_async -= 1
assert in_async >= 0
ctx.pop()
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
@ -640,7 +644,7 @@ def _tokenize(readline, encoding):
yield TokenInfo(STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]:
if in_async:
yield TokenInfo(
ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
@ -657,6 +661,7 @@ def _tokenize(readline, encoding):
and stashed.string == 'async'):
ctx.append(('async', indents[-1]))
in_async += 1
yield TokenInfo(ASYNC, stashed.string,
stashed.start, stashed.end,

View File

@ -19,6 +19,9 @@ Core and Builtins
- Issue #24407: Fix crash when dict is mutated while being updated.
- Issue #24619: New approach for tokenizing async/await. As a consequence,
is is now possible to have one-line 'async def foo(): await ..' functions.
Library
-------

View File

@ -31,6 +31,12 @@
|| c == '_'\
|| (c >= 128))
/* The following DEFTYPE* flags are used in 'tok_state->deftypestack',
and should be removed in 3.7, when async/await are regular
keywords. */
#define DEFTYPE_ASYNC 1
#define DEFTYPE_HAS_NL 2
extern char *PyOS_Readline(FILE *, FILE *, const char *);
/* Return malloc'ed string including trailing \n;
empty malloc'ed string for EOF;
@ -130,6 +136,8 @@ tok_new(void)
tok->def = 0;
tok->defstack[0] = 0;
tok->deftypestack[0] = 0;
tok->def_async_behind = 0;
tok->def_in_async = 0;
tok->atbol = 1;
tok->pendin = 0;
@ -1436,7 +1444,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok->pendin++;
while (tok->def && tok->defstack[tok->def] >= tok->indent) {
if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
tok->def_in_async--;
assert(tok->def_in_async >= 0);
}
tok->def--;
assert(tok->def >= 0);
}
return DEDENT;
@ -1447,6 +1460,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}
}
if (!blankline && tok->level == 0
&& tok->def && tok->deftypestack[tok->def] & DEFTYPE_HAS_NL
&& tok->defstack[tok->def] >= tok->indent)
{
/* The top function on the stack did have a NEWLINE
token, but didn't have an INDENT. That means that
it's a one-line function and it should now be removed
from the stack. */
if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
tok->def_in_async--;
assert(tok->def_in_async >= 0);
}
tok->def--;
assert(tok->def >= 0);
}
again:
tok->start = NULL;
/* Skip spaces */
@ -1501,59 +1530,58 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok_len = tok->cur - tok->start;
if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) {
if (tok->def && tok->deftypestack[tok->def] == 3) {
tok->deftypestack[tok->def] = 2;
/* The current token is 'def'. */
if (tok->def + 1 >= MAXINDENT) {
tok->done = E_TOODEEP;
tok->cur = tok->inp;
return ERRORTOKEN;
}
else if (tok->defstack[tok->def] < tok->indent) {
/* We advance defs stack only when we see "def" *and*
the indentation level was increased relative to the
previous "def". */
if (tok->def + 1 >= MAXINDENT) {
tok->done = E_TOODEEP;
tok->cur = tok->inp;
return ERRORTOKEN;
}
/* Advance defs stack. */
tok->def++;
tok->defstack[tok->def] = tok->indent;
tok->def++;
tok->defstack[tok->def] = tok->indent;
tok->deftypestack[tok->def] = 1;
if (tok->def_async_behind) {
/* The previous token was 'async'. */
tok->def_async_behind = 0;
tok->deftypestack[tok->def] = DEFTYPE_ASYNC;
tok->def_in_async++;
}
else {
/* This is a regular function (not async def). */
tok->deftypestack[tok->def] = 0;
}
}
else if (tok_len == 5) {
if (memcmp(tok->start, "async", 5) == 0) {
/* The current token is 'async'. */
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
/* Try to look ahead one token. */
ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
&ahead_top_end);
if (ahead_tok_kind == NAME &&
ahead_tok.cur - ahead_tok.start == 3 &&
memcmp(ahead_tok.start, "def", 3) == 0) {
if (tok->def + 1 >= MAXINDENT) {
tok->done = E_TOODEEP;
tok->cur = tok->inp;
return ERRORTOKEN;
}
tok->def++;
tok->defstack[tok->def] = tok->indent;
tok->deftypestack[tok->def] = 3;
if (ahead_tok_kind == NAME
&& ahead_tok.cur - ahead_tok.start == 3
&& memcmp(ahead_tok.start, "def", 3) == 0)
{
/* The next token is going to be 'def', so instead of
returning 'async' NAME token, we return ASYNC. */
tok->def_async_behind = 1;
return ASYNC;
}
else if (tok->def && tok->deftypestack[tok->def] == 2
&& tok->defstack[tok->def] < tok->indent) {
else if (tok->def_in_async)
{
/* We're inside an 'async def' function, so we treat
'async' token as ASYNC, instead of NAME. */
return ASYNC;
}
}
else if (memcmp(tok->start, "await", 5) == 0
&& tok->def && tok->deftypestack[tok->def] == 2
&& tok->defstack[tok->def] < tok->indent) {
else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async)
{
/* We're inside an 'async def' function, so we treat
'await' token as AWAIT, instead of NAME. */
return AWAIT;
}
}
@ -1569,6 +1597,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
tok->cont_line = 0;
if (tok->def) {
/* Mark the top function on the stack that it had
at least one NEWLINE. That will help us to
distinguish one-line functions from functions
with multiple statements. */
tok->deftypestack[tok->def] |= DEFTYPE_HAS_NL;
}
return NEWLINE;
}

View File

@ -66,12 +66,21 @@ struct tok_state {
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
int defstack[MAXINDENT]; /* stack if funcs & indents where they
were defined */
int deftypestack[MAXINDENT]; /* stack of func types
(0 not func; 1: "def name";
2: "async def name") */
int def; /* Length of stack of func types */
/* `def*` fields are for parsing async/await in a backwards compatible
way. They should be removed in 3.7, when they will become
regular constants. See PEP 492 for more details. */
int defstack[MAXINDENT]; /* Stack of funcs & indents where they
were defined. */
int deftypestack[MAXINDENT]; /* Stack of func flags, see DEFTYPE_*
constants. */
int def; /* Length of stack of func types/flags. */
int def_async_behind; /* 1 if there was an 'async' token before
a 'def' token. */
int def_in_async; /* Counter of how deep 'async def's
are nested. If greater than 0,
we are somewhere in an 'async def'
body, so 'async' and 'await' should
be parsed as keywords.*/
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);