From 15acc4eaba8519d7d5f2acaffde65446b44dcf79 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 27 Oct 2020 20:54:20 +0200 Subject: [PATCH] bpo-41659: Disallow curly brace directly after primary (GH-22996) --- Grammar/python.gram | 3 + Lib/test/test_exceptions.py | 1 + Lib/test/test_syntax.py | 3 + .../2020-10-27-18-32-49.bpo-41659.d4a-8o.rst | 3 + Parser/parser.c | 401 ++++++++++-------- 5 files changed, 244 insertions(+), 167 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-10-27-18-32-49.bpo-41659.d4a-8o.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index 19c85accf8d..b8da554b8ec 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -475,6 +475,7 @@ await_primary[expr_ty] (memo): | AWAIT a=primary { CHECK_VERSION(expr_ty, 5, "Await expressions are", _Py_Await(a, EXTRA)) } | primary primary[expr_ty]: + | invalid_primary # must be before 'primay genexp' because of invalid_genexp | a=primary '.' b=NAME { _Py_Attribute(a, b->v.Name.id, Load, EXTRA) } | a=primary b=genexp { _Py_Call(a, CHECK(asdl_expr_seq*, (asdl_expr_seq*)_PyPegen_singleton_seq(p, b)), NULL, EXTRA) } | a=primary '(' b=[arguments] ')' { @@ -682,6 +683,8 @@ invalid_del_stmt: RAISE_SYNTAX_ERROR_INVALID_TARGET(DEL_TARGETS, a) } invalid_block: | NEWLINE !INDENT { RAISE_INDENTATION_ERROR("expected an indented block") } +invalid_primary: + | primary a='{' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "invalid syntax") } invalid_comprehension: | ('[' | '(' | '{') a=starred_expression for_if_clauses { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "iterable unpacking cannot be used in comprehension") } diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 1ec44688777..4dbf5fe5d5b 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -208,6 +208,7 @@ class ExceptionTests(unittest.TestCase): check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18) check('x = "a', 1, 7) check('lambda x: x = 2', 1, 1) + check('f{a + b + c}', 1, 2) # Errors thrown by compile.c check('class foo:return 1', 1, 11) diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 7c3302c1d46..c25b85246b9 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -802,6 +802,9 @@ class SyntaxTestCase(unittest.TestCase): else: self.fail("compile() did not raise SyntaxError") + def test_curly_brace_after_primary_raises_immediately(self): + self._check_error("f{", "invalid syntax", mode="single") + def test_assign_call(self): self._check_error("f() = 1", "assign") diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-27-18-32-49.bpo-41659.d4a-8o.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-27-18-32-49.bpo-41659.d4a-8o.rst new file mode 100644 index 00000000000..038749a7b16 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-27-18-32-49.bpo-41659.d4a-8o.rst @@ -0,0 +1,3 @@ +Fix a bug in the parser, where a curly brace following a `primary` didn't fail immediately. +This led to invalid expressions like `a {b}` to throw a :exc:`SyntaxError` with a wrong offset, +or invalid expressions ending with a curly brace like `a {` to not fail immediately in the REPL. \ No newline at end of file diff --git a/Parser/parser.c b/Parser/parser.c index e438f06c9be..a22cf2752d1 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -216,173 +216,174 @@ static KeywordToken *reserved_keywords[] = { #define invalid_ann_assign_target_type 1147 #define invalid_del_stmt_type 1148 #define invalid_block_type 1149 -#define invalid_comprehension_type 1150 -#define invalid_dict_comprehension_type 1151 -#define invalid_parameters_type 1152 -#define invalid_lambda_parameters_type 1153 -#define invalid_star_etc_type 1154 -#define invalid_lambda_star_etc_type 1155 -#define invalid_double_type_comments_type 1156 -#define invalid_with_item_type 1157 -#define invalid_for_target_type 1158 -#define invalid_group_type 1159 -#define invalid_import_from_targets_type 1160 -#define _loop0_1_type 1161 -#define _loop0_2_type 1162 -#define _loop0_4_type 1163 -#define _gather_3_type 1164 -#define _loop0_6_type 1165 -#define _gather_5_type 1166 -#define _loop0_8_type 1167 -#define _gather_7_type 1168 -#define _loop0_10_type 1169 -#define _gather_9_type 1170 -#define _loop1_11_type 1171 -#define _loop0_13_type 1172 -#define _gather_12_type 1173 -#define _tmp_14_type 1174 -#define _tmp_15_type 1175 -#define _tmp_16_type 1176 -#define _tmp_17_type 1177 -#define _tmp_18_type 1178 -#define _tmp_19_type 1179 -#define _tmp_20_type 1180 -#define _tmp_21_type 1181 -#define _loop1_22_type 1182 -#define _tmp_23_type 1183 -#define _tmp_24_type 1184 -#define _loop0_26_type 1185 -#define _gather_25_type 1186 -#define _loop0_28_type 1187 -#define _gather_27_type 1188 -#define _tmp_29_type 1189 -#define _tmp_30_type 1190 -#define _loop0_31_type 1191 -#define _loop1_32_type 1192 -#define _loop0_34_type 1193 -#define _gather_33_type 1194 -#define _tmp_35_type 1195 -#define _loop0_37_type 1196 -#define _gather_36_type 1197 -#define _tmp_38_type 1198 -#define _loop0_40_type 1199 -#define _gather_39_type 1200 -#define _loop0_42_type 1201 -#define _gather_41_type 1202 -#define _loop0_44_type 1203 -#define _gather_43_type 1204 -#define _loop0_46_type 1205 -#define _gather_45_type 1206 -#define _tmp_47_type 1207 -#define _loop1_48_type 1208 -#define _tmp_49_type 1209 -#define _tmp_50_type 1210 -#define _tmp_51_type 1211 -#define _tmp_52_type 1212 -#define _tmp_53_type 1213 -#define _loop0_54_type 1214 -#define _loop0_55_type 1215 -#define _loop0_56_type 1216 -#define _loop1_57_type 1217 -#define _loop0_58_type 1218 -#define _loop1_59_type 1219 -#define _loop1_60_type 1220 -#define _loop1_61_type 1221 -#define _loop0_62_type 1222 -#define _loop1_63_type 1223 -#define _loop0_64_type 1224 -#define _loop1_65_type 1225 -#define _loop0_66_type 1226 -#define _loop1_67_type 1227 -#define _loop1_68_type 1228 -#define _tmp_69_type 1229 -#define _loop0_71_type 1230 -#define _gather_70_type 1231 -#define _loop1_72_type 1232 -#define _loop0_74_type 1233 -#define _gather_73_type 1234 -#define _loop1_75_type 1235 -#define _loop0_76_type 1236 -#define _loop0_77_type 1237 -#define _loop0_78_type 1238 -#define _loop1_79_type 1239 -#define _loop0_80_type 1240 -#define _loop1_81_type 1241 -#define _loop1_82_type 1242 -#define _loop1_83_type 1243 -#define _loop0_84_type 1244 -#define _loop1_85_type 1245 -#define _loop0_86_type 1246 -#define _loop1_87_type 1247 -#define _loop0_88_type 1248 -#define _loop1_89_type 1249 -#define _loop1_90_type 1250 -#define _loop1_91_type 1251 -#define _loop1_92_type 1252 -#define _tmp_93_type 1253 -#define _loop0_95_type 1254 -#define _gather_94_type 1255 -#define _tmp_96_type 1256 -#define _tmp_97_type 1257 -#define _tmp_98_type 1258 -#define _tmp_99_type 1259 -#define _loop1_100_type 1260 -#define _tmp_101_type 1261 -#define _tmp_102_type 1262 -#define _loop0_104_type 1263 -#define _gather_103_type 1264 -#define _loop1_105_type 1265 -#define _loop0_106_type 1266 -#define _loop0_107_type 1267 -#define _loop0_109_type 1268 -#define _gather_108_type 1269 -#define _tmp_110_type 1270 -#define _loop0_112_type 1271 -#define _gather_111_type 1272 -#define _loop0_114_type 1273 -#define _gather_113_type 1274 -#define _loop0_116_type 1275 -#define _gather_115_type 1276 -#define _loop0_118_type 1277 -#define _gather_117_type 1278 -#define _loop0_119_type 1279 -#define _loop0_121_type 1280 -#define _gather_120_type 1281 -#define _tmp_122_type 1282 -#define _loop0_124_type 1283 -#define _gather_123_type 1284 -#define _loop0_126_type 1285 -#define _gather_125_type 1286 -#define _tmp_127_type 1287 -#define _loop0_128_type 1288 -#define _loop0_129_type 1289 -#define _loop0_130_type 1290 -#define _tmp_131_type 1291 -#define _tmp_132_type 1292 -#define _loop0_133_type 1293 -#define _tmp_134_type 1294 -#define _loop0_135_type 1295 -#define _tmp_136_type 1296 -#define _tmp_137_type 1297 -#define _tmp_138_type 1298 -#define _tmp_139_type 1299 -#define _tmp_140_type 1300 -#define _tmp_141_type 1301 -#define _tmp_142_type 1302 -#define _tmp_143_type 1303 -#define _tmp_144_type 1304 -#define _tmp_145_type 1305 -#define _tmp_146_type 1306 -#define _tmp_147_type 1307 -#define _tmp_148_type 1308 -#define _tmp_149_type 1309 -#define _tmp_150_type 1310 -#define _tmp_151_type 1311 -#define _tmp_152_type 1312 -#define _loop1_153_type 1313 -#define _loop1_154_type 1314 -#define _tmp_155_type 1315 -#define _tmp_156_type 1316 +#define invalid_primary_type 1150 // Left-recursive +#define invalid_comprehension_type 1151 +#define invalid_dict_comprehension_type 1152 +#define invalid_parameters_type 1153 +#define invalid_lambda_parameters_type 1154 +#define invalid_star_etc_type 1155 +#define invalid_lambda_star_etc_type 1156 +#define invalid_double_type_comments_type 1157 +#define invalid_with_item_type 1158 +#define invalid_for_target_type 1159 +#define invalid_group_type 1160 +#define invalid_import_from_targets_type 1161 +#define _loop0_1_type 1162 +#define _loop0_2_type 1163 +#define _loop0_4_type 1164 +#define _gather_3_type 1165 +#define _loop0_6_type 1166 +#define _gather_5_type 1167 +#define _loop0_8_type 1168 +#define _gather_7_type 1169 +#define _loop0_10_type 1170 +#define _gather_9_type 1171 +#define _loop1_11_type 1172 +#define _loop0_13_type 1173 +#define _gather_12_type 1174 +#define _tmp_14_type 1175 +#define _tmp_15_type 1176 +#define _tmp_16_type 1177 +#define _tmp_17_type 1178 +#define _tmp_18_type 1179 +#define _tmp_19_type 1180 +#define _tmp_20_type 1181 +#define _tmp_21_type 1182 +#define _loop1_22_type 1183 +#define _tmp_23_type 1184 +#define _tmp_24_type 1185 +#define _loop0_26_type 1186 +#define _gather_25_type 1187 +#define _loop0_28_type 1188 +#define _gather_27_type 1189 +#define _tmp_29_type 1190 +#define _tmp_30_type 1191 +#define _loop0_31_type 1192 +#define _loop1_32_type 1193 +#define _loop0_34_type 1194 +#define _gather_33_type 1195 +#define _tmp_35_type 1196 +#define _loop0_37_type 1197 +#define _gather_36_type 1198 +#define _tmp_38_type 1199 +#define _loop0_40_type 1200 +#define _gather_39_type 1201 +#define _loop0_42_type 1202 +#define _gather_41_type 1203 +#define _loop0_44_type 1204 +#define _gather_43_type 1205 +#define _loop0_46_type 1206 +#define _gather_45_type 1207 +#define _tmp_47_type 1208 +#define _loop1_48_type 1209 +#define _tmp_49_type 1210 +#define _tmp_50_type 1211 +#define _tmp_51_type 1212 +#define _tmp_52_type 1213 +#define _tmp_53_type 1214 +#define _loop0_54_type 1215 +#define _loop0_55_type 1216 +#define _loop0_56_type 1217 +#define _loop1_57_type 1218 +#define _loop0_58_type 1219 +#define _loop1_59_type 1220 +#define _loop1_60_type 1221 +#define _loop1_61_type 1222 +#define _loop0_62_type 1223 +#define _loop1_63_type 1224 +#define _loop0_64_type 1225 +#define _loop1_65_type 1226 +#define _loop0_66_type 1227 +#define _loop1_67_type 1228 +#define _loop1_68_type 1229 +#define _tmp_69_type 1230 +#define _loop0_71_type 1231 +#define _gather_70_type 1232 +#define _loop1_72_type 1233 +#define _loop0_74_type 1234 +#define _gather_73_type 1235 +#define _loop1_75_type 1236 +#define _loop0_76_type 1237 +#define _loop0_77_type 1238 +#define _loop0_78_type 1239 +#define _loop1_79_type 1240 +#define _loop0_80_type 1241 +#define _loop1_81_type 1242 +#define _loop1_82_type 1243 +#define _loop1_83_type 1244 +#define _loop0_84_type 1245 +#define _loop1_85_type 1246 +#define _loop0_86_type 1247 +#define _loop1_87_type 1248 +#define _loop0_88_type 1249 +#define _loop1_89_type 1250 +#define _loop1_90_type 1251 +#define _loop1_91_type 1252 +#define _loop1_92_type 1253 +#define _tmp_93_type 1254 +#define _loop0_95_type 1255 +#define _gather_94_type 1256 +#define _tmp_96_type 1257 +#define _tmp_97_type 1258 +#define _tmp_98_type 1259 +#define _tmp_99_type 1260 +#define _loop1_100_type 1261 +#define _tmp_101_type 1262 +#define _tmp_102_type 1263 +#define _loop0_104_type 1264 +#define _gather_103_type 1265 +#define _loop1_105_type 1266 +#define _loop0_106_type 1267 +#define _loop0_107_type 1268 +#define _loop0_109_type 1269 +#define _gather_108_type 1270 +#define _tmp_110_type 1271 +#define _loop0_112_type 1272 +#define _gather_111_type 1273 +#define _loop0_114_type 1274 +#define _gather_113_type 1275 +#define _loop0_116_type 1276 +#define _gather_115_type 1277 +#define _loop0_118_type 1278 +#define _gather_117_type 1279 +#define _loop0_119_type 1280 +#define _loop0_121_type 1281 +#define _gather_120_type 1282 +#define _tmp_122_type 1283 +#define _loop0_124_type 1284 +#define _gather_123_type 1285 +#define _loop0_126_type 1286 +#define _gather_125_type 1287 +#define _tmp_127_type 1288 +#define _loop0_128_type 1289 +#define _loop0_129_type 1290 +#define _loop0_130_type 1291 +#define _tmp_131_type 1292 +#define _tmp_132_type 1293 +#define _loop0_133_type 1294 +#define _tmp_134_type 1295 +#define _loop0_135_type 1296 +#define _tmp_136_type 1297 +#define _tmp_137_type 1298 +#define _tmp_138_type 1299 +#define _tmp_139_type 1300 +#define _tmp_140_type 1301 +#define _tmp_141_type 1302 +#define _tmp_142_type 1303 +#define _tmp_143_type 1304 +#define _tmp_144_type 1305 +#define _tmp_145_type 1306 +#define _tmp_146_type 1307 +#define _tmp_147_type 1308 +#define _tmp_148_type 1309 +#define _tmp_149_type 1310 +#define _tmp_150_type 1311 +#define _tmp_151_type 1312 +#define _tmp_152_type 1313 +#define _loop1_153_type 1314 +#define _loop1_154_type 1315 +#define _tmp_155_type 1316 +#define _tmp_156_type 1317 static mod_ty file_rule(Parser *p); static mod_ty interactive_rule(Parser *p); @@ -534,6 +535,7 @@ static void *invalid_assignment_rule(Parser *p); static expr_ty invalid_ann_assign_target_rule(Parser *p); static void *invalid_del_stmt_rule(Parser *p); static void *invalid_block_rule(Parser *p); +static void *invalid_primary_rule(Parser *p); static void *invalid_comprehension_rule(Parser *p); static void *invalid_dict_comprehension_rule(Parser *p); static void *invalid_parameters_rule(Parser *p); @@ -10275,6 +10277,7 @@ await_primary_rule(Parser *p) // Left-recursive // primary: +// | invalid_primary // | primary '.' NAME // | primary genexp // | primary '(' arguments? ')' @@ -10328,6 +10331,25 @@ primary_raw(Parser *p) UNUSED(_start_lineno); // Only used by EXTRA macro int _start_col_offset = p->tokens[_mark]->col_offset; UNUSED(_start_col_offset); // Only used by EXTRA macro + if (p->call_invalid_rules) { // invalid_primary + if (p->error_indicator) { + D(p->level--); + return NULL; + } + D(fprintf(stderr, "%*c> primary[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "invalid_primary")); + void *invalid_primary_var; + if ( + (invalid_primary_var = invalid_primary_rule(p)) // invalid_primary + ) + { + D(fprintf(stderr, "%*c+ primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "invalid_primary")); + _res = invalid_primary_var; + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s primary[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "invalid_primary")); + } { // primary '.' NAME if (p->error_indicator) { D(p->level--); @@ -15028,6 +15050,51 @@ invalid_block_rule(Parser *p) return _res; } +// Left-recursive +// invalid_primary: primary '{' +static void * +invalid_primary_rule(Parser *p) +{ + D(p->level++); + if (p->error_indicator) { + D(p->level--); + return NULL; + } + void * _res = NULL; + int _mark = p->mark; + { // primary '{' + if (p->error_indicator) { + D(p->level--); + return NULL; + } + D(fprintf(stderr, "%*c> invalid_primary[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "primary '{'")); + Token * a; + expr_ty primary_var; + if ( + (primary_var = primary_rule(p)) // primary + && + (a = _PyPegen_expect_token(p, 25)) // token='{' + ) + { + D(fprintf(stderr, "%*c+ invalid_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "primary '{'")); + _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "invalid syntax" ); + if (_res == NULL && PyErr_Occurred()) { + p->error_indicator = 1; + D(p->level--); + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s invalid_primary[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "primary '{'")); + } + _res = NULL; + done: + D(p->level--); + return _res; +} + // invalid_comprehension: ('[' | '(' | '{') starred_expression for_if_clauses static void * invalid_comprehension_rule(Parser *p)