From d8a82e2897b735e2b7e9e086f1d709365a2ad72c Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Wed, 22 May 2019 13:43:37 -0700 Subject: [PATCH] bpo-36878: Only allow text after `# type: ignore` if first character ASCII (GH-13504) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This disallows things like `# type: ignoreé`, which seems wrong. Also switch to using Py_ISALNUM for the alnum check, for consistency with other code (and maybe correctness re: locale issues?). https://bugs.python.org/issue36878 --- Lib/test/test_type_comments.py | 1 + .../2019-05-22-11-16-16.bpo-36878.QwLa3P.rst | 2 ++ Parser/tokenizer.c | 5 +++-- 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-05-22-11-16-16.bpo-36878.QwLa3P.rst diff --git a/Lib/test/test_type_comments.py b/Lib/test/test_type_comments.py index c62894fa425..83d8717247a 100644 --- a/Lib/test/test_type_comments.py +++ b/Lib/test/test_type_comments.py @@ -334,6 +334,7 @@ class TypeCommentTests(unittest.TestCase): check_both_ways("try: # type: int\n pass\nfinally:\n pass\n") check_both_ways("try:\n pass\nfinally: # type: int\n pass\n") check_both_ways("pass # type: ignorewhatever\n") + check_both_ways("pass # type: ignoreé\n") def test_func_type_input(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-05-22-11-16-16.bpo-36878.QwLa3P.rst b/Misc/NEWS.d/next/Core and Builtins/2019-05-22-11-16-16.bpo-36878.QwLa3P.rst new file mode 100644 index 00000000000..2d9f014119d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-05-22-11-16-16.bpo-36878.QwLa3P.rst @@ -0,0 +1,2 @@ +Only accept text after `# type: ignore` if the first character is ASCII. +This is to disallow things like `# type: ignoreé`. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 9b269afc429..c2ec659fed8 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1275,10 +1275,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) type_start = p; /* A TYPE_IGNORE is "type: ignore" followed by the end of the token - * or anything non-alphanumeric. */ + * or anything ASCII and non-alphanumeric. */ is_type_ignore = ( tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0 - && !(tok->cur > ignore_end && isalnum(p[6]))); + && !(tok->cur > ignore_end + && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0])))); if (is_type_ignore) { *p_start = (char *) ignore_end;