bpo-40334: Support CO_FUTURE_BARRY_AS_BDFL in the new parser (GH-19721)

This commit also allows to pass flags to the new parser in all interfaces and fixes a bug in the parser generator that was causing to inline rules with actions, making them disappear.
This commit is contained in:
Pablo Galindo 2020-04-27 18:02:07 +01:00 committed by GitHub
parent 9adccc1384
commit 2b74c835a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 578 additions and 468 deletions

View File

@ -323,7 +323,8 @@ compare_op_bitwise_or_pair[CmpopExprPair*]:
| isnot_bitwise_or
| is_bitwise_or
eq_bitwise_or[CmpopExprPair*]: '==' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Eq, a) }
noteq_bitwise_or[CmpopExprPair*]: '!=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, NotEq, a) }
noteq_bitwise_or[CmpopExprPair*]:
| (tok='!=' {_PyPegen_check_barry_as_flufl(p) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) }
lte_bitwise_or[CmpopExprPair*]: '<=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, LtE, a) }
lt_bitwise_or[CmpopExprPair*]: '<' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Lt, a) }
gte_bitwise_or[CmpopExprPair*]: '>=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, GtE, a) }

View File

@ -11,21 +11,24 @@ extern "C" {
#include "Python.h"
#include "Python-ast.h"
PyAPI_FUNC(mod_ty) PyPegen_ASTFromFile(const char *filename, int mode, PyArena *arena);
PyAPI_FUNC(mod_ty) PyPegen_ASTFromFile(const char *filename, int mode, PyCompilerFlags*, PyArena *arena);
PyAPI_FUNC(mod_ty) PyPegen_ASTFromString(const char *str, int mode, PyCompilerFlags *flags,
PyArena *arena);
PyAPI_FUNC(mod_ty) PyPegen_ASTFromStringObject(const char *str, PyObject* filename, int mode,
PyCompilerFlags *flags, PyArena *arena);
PyAPI_FUNC(mod_ty) PyPegen_ASTFromFileObject(FILE *fp, PyObject *filename_ob,
int mode, const char *enc, const char *ps1,
const char *ps2, int *errcode, PyArena *arena);
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFile(const char *filename, int mode);
const char *ps2, PyCompilerFlags *flags,
int *errcode, PyArena *arena);
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFile(const char *filename, int mode, PyCompilerFlags *flags);
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromString(const char *str, int mode,
PyCompilerFlags *flags);
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFileObject(FILE *, PyObject *filename_ob,
int mode, const char *enc,
int mode,
const char *ps1,
const char *ps2,
PyCompilerFlags *flags,
const char *enc,
int *errcode);
#ifdef __cplusplus

View File

@ -4,7 +4,6 @@ import sys
from test import support
@support.skip_if_new_parser("Not supported by pegen yet")
class FLUFLTests(unittest.TestCase):
def test_barry_as_bdfl(self):
@ -16,10 +15,13 @@ class FLUFLTests(unittest.TestCase):
__future__.CO_FUTURE_BARRY_AS_BDFL)
self.assertRegex(str(cm.exception),
"with Barry as BDFL, use '<>' instead of '!='")
self.assertEqual(cm.exception.text, '2 != 3\n')
self.assertIn('2 != 3', cm.exception.text)
self.assertEqual(cm.exception.filename, '<FLUFL test>')
self.assertEqual(cm.exception.lineno, 2)
self.assertEqual(cm.exception.offset, 4)
self.assertTrue(cm.exception.lineno, 2)
# The old parser reports the end of the token and the new
# parser reports the start of the token
self.assertEqual(cm.exception.offset, 4 if support.use_old_parser() else 3)
def test_guido_as_bdfl(self):
code = '2 {0} 3'
@ -27,10 +29,12 @@ class FLUFLTests(unittest.TestCase):
with self.assertRaises(SyntaxError) as cm:
compile(code.format('<>'), '<FLUFL test>', 'exec')
self.assertRegex(str(cm.exception), "invalid syntax")
self.assertEqual(cm.exception.text, '2 <> 3\n')
self.assertIn('2 <> 3', cm.exception.text)
self.assertEqual(cm.exception.filename, '<FLUFL test>')
self.assertEqual(cm.exception.lineno, 1)
self.assertEqual(cm.exception.offset, 4)
# The old parser reports the end of the token and the new
# parser reports the start of the token
self.assertEqual(cm.exception.offset, 4 if support.use_old_parser() else 3)
if __name__ == '__main__':

View File

@ -28,9 +28,10 @@ _Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds)
return NULL;
}
PyCompilerFlags flags = _PyCompilerFlags_INIT;
PyObject *result = NULL;
mod_ty res = PyPegen_ASTFromFile(filename, mode, arena);
mod_ty res = PyPegen_ASTFromFile(filename, mode, &flags, arena);
if (res == NULL) {
goto error;
}

File diff suppressed because it is too large Load Diff

View File

@ -586,7 +586,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
return NULL;
}
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, NULL, p->arena);
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, NULL, p->arena);
p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
? p->starting_col_offset + t->col_offset : 0;

View File

@ -22,20 +22,19 @@ PyPegen_ASTFromStringObject(const char *str, PyObject* filename, int mode, PyCom
return NULL;
}
int iflags = flags != NULL ? flags->cf_flags : PyCF_IGNORE_COOKIE;
mod_ty result = _PyPegen_run_parser_from_string(str, mode, filename, iflags, arena);
mod_ty result = _PyPegen_run_parser_from_string(str, mode, filename, flags, arena);
return result;
}
mod_ty
PyPegen_ASTFromFile(const char *filename, int mode, PyArena *arena)
PyPegen_ASTFromFile(const char *filename, int mode, PyCompilerFlags *flags, PyArena *arena)
{
PyObject *filename_ob = PyUnicode_FromString(filename);
if (filename_ob == NULL) {
return NULL;
}
mod_ty result = _PyPegen_run_parser_from_file(filename, mode, filename_ob, arena);
mod_ty result = _PyPegen_run_parser_from_file(filename, mode, filename_ob, flags, arena);
Py_XDECREF(filename_ob);
return result;
}
@ -43,13 +42,13 @@ PyPegen_ASTFromFile(const char *filename, int mode, PyArena *arena)
mod_ty
PyPegen_ASTFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
const char *enc, const char *ps1, const char* ps2,
int *errcode, PyArena *arena)
PyCompilerFlags *flags, int *errcode, PyArena *arena)
{
if (PySys_Audit("compile", "OO", Py_None, filename_ob) < 0) {
return NULL;
}
return _PyPegen_run_parser_from_file_pointer(fp, mode, filename_ob, enc, ps1, ps2,
errcode, arena);
flags, errcode, arena);
}
PyCodeObject *
@ -81,7 +80,7 @@ error:
}
PyCodeObject *
PyPegen_CodeObjectFromFile(const char *filename, int mode)
PyPegen_CodeObjectFromFile(const char *filename, int mode, PyCompilerFlags* flags)
{
PyArena *arena = PyArena_New();
if (arena == NULL) {
@ -95,7 +94,7 @@ PyPegen_CodeObjectFromFile(const char *filename, int mode)
goto error;
}
mod_ty res = PyPegen_ASTFromFile(filename, mode, arena);
mod_ty res = PyPegen_ASTFromFile(filename, mode, flags, arena);
if (res == NULL) {
goto error;
}
@ -110,8 +109,8 @@ error:
PyCodeObject *
PyPegen_CodeObjectFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
const char *ps1, const char *ps2, const char *enc,
int *errcode)
const char *ps1, const char *ps2,
PyCompilerFlags *flags, const char *enc, int *errcode)
{
PyArena *arena = PyArena_New();
if (arena == NULL) {
@ -121,7 +120,7 @@ PyPegen_CodeObjectFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
PyCodeObject *result = NULL;
mod_ty res = PyPegen_ASTFromFileObject(fp, filename_ob, mode, enc, ps1, ps2,
errcode, arena);
flags, errcode, arena);
if (res == NULL) {
goto error;
}

View File

@ -25,6 +25,24 @@ init_normalization(Parser *p)
return 1;
}
/* Checks if the NOTEQUAL token is valid given the current parser flags
0 indicates success and nonzero indicates failure (an exception may be set) */
int
_PyPegen_check_barry_as_flufl(Parser *p) {
Token *t = p->tokens[p->fill - 1];
assert(t->bytes != NULL);
assert(t->type == NOTEQUAL);
char* tok_str = PyBytes_AS_STRING(t->bytes);
if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>")){
RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
return -1;
} else if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
return strcmp(tok_str, "!=");
}
return 0;
}
PyObject *
_PyPegen_new_identifier(Parser *p, char *n)
{
@ -401,7 +419,6 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
loc = Py_None;
}
tmp = Py_BuildValue("(OiiN)", p->tok->filename, t->lineno, col_number, loc);
if (!tmp) {
goto error;
@ -902,8 +919,31 @@ _PyPegen_Parser_Free(Parser *p)
PyMem_Free(p);
}
static int
compute_parser_flags(PyCompilerFlags *flags)
{
int parser_flags = 0;
if (!flags) {
return 0;
}
if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
}
if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
parser_flags |= PyPARSE_IGNORE_COOKIE;
}
if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
parser_flags |= PyPARSE_BARRY_AS_BDFL;
}
if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
parser_flags |= PyPARSE_TYPE_COMMENTS;
}
return parser_flags;
}
Parser *
_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena *arena)
_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
int *errcode, PyArena *arena)
{
Parser *p = PyMem_Malloc(sizeof(Parser));
if (p == NULL) {
@ -938,6 +978,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena
p->starting_lineno = 0;
p->starting_col_offset = 0;
p->flags = flags;
return p;
}
@ -976,7 +1017,7 @@ _PyPegen_run_parser(Parser *p)
mod_ty
_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
const char *enc, const char *ps1, const char *ps2,
int *errcode, PyArena *arena)
PyCompilerFlags *flags, int *errcode, PyArena *arena)
{
struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
if (tok == NULL) {
@ -993,7 +1034,8 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
// From here on we need to clean up even if there's an error
mod_ty result = NULL;
Parser *p = _PyPegen_Parser_New(tok, start_rule, errcode, arena);
int parser_flags = compute_parser_flags(flags);
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, errcode, arena);
if (p == NULL) {
goto error;
}
@ -1008,7 +1050,7 @@ error:
mod_ty
_PyPegen_run_parser_from_file(const char *filename, int start_rule,
PyObject *filename_ob, PyArena *arena)
PyObject *filename_ob, PyCompilerFlags *flags, PyArena *arena)
{
FILE *fp = fopen(filename, "rb");
if (fp == NULL) {
@ -1017,7 +1059,7 @@ _PyPegen_run_parser_from_file(const char *filename, int start_rule,
}
mod_ty result = _PyPegen_run_parser_from_file_pointer(fp, start_rule, filename_ob,
NULL, NULL, NULL, NULL, arena);
NULL, NULL, NULL, flags, NULL, arena);
fclose(fp);
return result;
@ -1025,12 +1067,12 @@ _PyPegen_run_parser_from_file(const char *filename, int start_rule,
mod_ty
_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
int iflags, PyArena *arena)
PyCompilerFlags *flags, PyArena *arena)
{
int exec_input = start_rule == Py_file_input;
struct tok_state *tok;
if (iflags & PyCF_IGNORE_COOKIE) {
if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
tok = PyTokenizer_FromUTF8(str, exec_input);
} else {
tok = PyTokenizer_FromString(str, exec_input);
@ -1048,7 +1090,8 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
// We need to clear up from here on
mod_ty result = NULL;
Parser *p = _PyPegen_Parser_New(tok, start_rule, NULL, arena);
int parser_flags = compute_parser_flags(flags);
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, NULL, arena);
if (p == NULL) {
goto error;
}

View File

@ -7,6 +7,23 @@
#include <Python-ast.h>
#include <pyarena.h>
#if 0
#define PyPARSE_YIELD_IS_KEYWORD 0x0001
#endif
#define PyPARSE_DONT_IMPLY_DEDENT 0x0002
#if 0
#define PyPARSE_WITH_IS_KEYWORD 0x0003
#define PyPARSE_PRINT_IS_FUNCTION 0x0004
#define PyPARSE_UNICODE_LITERALS 0x0008
#endif
#define PyPARSE_IGNORE_COOKIE 0x0010
#define PyPARSE_BARRY_AS_BDFL 0x0020
#define PyPARSE_TYPE_COMMENTS 0x0040
#define PyPARSE_ASYNC_HACKS 0x0080
typedef struct _memo {
int type;
void *node;
@ -41,6 +58,7 @@ typedef struct {
int starting_lineno;
int starting_col_offset;
int error_indicator;
int flags;
} Parser;
typedef struct {
@ -137,13 +155,13 @@ CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
#define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result)
PyObject *_PyPegen_new_identifier(Parser *, char *);
Parser *_PyPegen_Parser_New(struct tok_state *, int, int *, PyArena *);
Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int *, PyArena *);
void _PyPegen_Parser_Free(Parser *);
mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
const char *, const char *, int *, PyArena *);
const char *, const char *, PyCompilerFlags *, int *, PyArena *);
void *_PyPegen_run_parser(Parser *);
mod_ty _PyPegen_run_parser_from_file(const char *, int, PyObject *, PyArena *);
mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, int, PyArena *);
mod_ty _PyPegen_run_parser_from_file(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
void *_PyPegen_interactive_exit(Parser *);
asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
@ -174,6 +192,7 @@ asdl_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
int _PyPegen_check_barry_as_flufl(Parser *);
void *_PyPegen_parse(Parser *);

View File

@ -241,7 +241,7 @@ PyRun_InteractiveOneObjectEx(FILE *fp, PyObject *filename,
if (use_peg) {
mod = PyPegen_ASTFromFileObject(fp, filename, Py_single_input,
enc, ps1, ps2, &errcode, arena);
enc, ps1, ps2, flags, &errcode, arena);
}
else {
mod = PyParser_ASTFromFileObject(fp, filename, enc,
@ -1073,7 +1073,7 @@ PyRun_FileExFlags(FILE *fp, const char *filename_str, int start, PyObject *globa
if (use_peg) {
mod = PyPegen_ASTFromFileObject(fp, filename, start, NULL, NULL, NULL,
NULL, arena);
flags, NULL, arena);
}
else {
mod = PyParser_ASTFromFileObject(fp, filename, NULL, start, 0, 0,

View File

@ -12,7 +12,6 @@ _build_return_object(mod_ty module, int mode, PyObject *filename_ob, PyArena *ar
} else {
result = Py_None;
Py_INCREF(result);
}
return result;
@ -43,7 +42,8 @@ parse_file(PyObject *self, PyObject *args, PyObject *kwds)
goto error;
}
mod_ty res = _PyPegen_run_parser_from_file(filename, Py_file_input, filename_ob, arena);
PyCompilerFlags flags = _PyCompilerFlags_INIT;
mod_ty res = _PyPegen_run_parser_from_file(filename, Py_file_input, filename_ob, &flags, arena);
if (res == NULL) {
goto error;
}
@ -81,8 +81,9 @@ parse_string(PyObject *self, PyObject *args, PyObject *kwds)
goto error;
}
PyCompilerFlags flags = _PyCompilerFlags_INIT;
mod_ty res = _PyPegen_run_parser_from_string(the_string, Py_file_input, filename_ob,
PyCF_IGNORE_COOKIE, arena);
&flags, arena);
if (res == NULL) {
goto error;
}

View File

@ -73,9 +73,17 @@ class CCallMakerVisitor(GrammarVisitor):
return "literal", f"_PyPegen_expect_token(p, {type})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
def can_we_inline(node):
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
return False
# If the alternative has an action we cannot inline
if getattr(node.alts[0], "action", None) is not None:
return False
return True
if node in self.cache:
return self.cache[node]
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
if can_we_inline(node):
self.cache[node] = self.visit(node.alts[0].items[0])
else:
name = self.gen.name_node(node)