From d8a82e2897b735e2b7e9e086f1d709365a2ad72c Mon Sep 17 00:00:00 2001
From: "Michael J. Sullivan" <sully@msully.net>
Date: Wed, 22 May 2019 13:43:37 -0700
Subject: [PATCH] bpo-36878: Only allow text after `# type: ignore` if first
 character ASCII (GH-13504)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This disallows things like `# type: ignoreé`, which seems wrong.

Also switch to using Py_ISALNUM for the alnum check, for consistency
with other code (and maybe correctness re: locale issues?).


https://bugs.python.org/issue36878
---
 Lib/test/test_type_comments.py                               | 1 +
 .../2019-05-22-11-16-16.bpo-36878.QwLa3P.rst                 | 2 ++
 Parser/tokenizer.c                                           | 5 +++--
 3 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-05-22-11-16-16.bpo-36878.QwLa3P.rst

diff --git a/Lib/test/test_type_comments.py b/Lib/test/test_type_comments.py
index c62894fa425..83d8717247a 100644
--- a/Lib/test/test_type_comments.py
+++ b/Lib/test/test_type_comments.py
@@ -334,6 +334,7 @@ class TypeCommentTests(unittest.TestCase):
         check_both_ways("try:  # type: int\n  pass\nfinally:\n  pass\n")
         check_both_ways("try:\n  pass\nfinally:  # type: int\n  pass\n")
         check_both_ways("pass  # type: ignorewhatever\n")
+        check_both_ways("pass  # type: ignoreé\n")
 
     def test_func_type_input(self):
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-05-22-11-16-16.bpo-36878.QwLa3P.rst b/Misc/NEWS.d/next/Core and Builtins/2019-05-22-11-16-16.bpo-36878.QwLa3P.rst
new file mode 100644
index 00000000000..2d9f014119d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-05-22-11-16-16.bpo-36878.QwLa3P.rst	
@@ -0,0 +1,2 @@
+Only accept text after `# type: ignore` if the first character is ASCII.
+This is to disallow things like `# type: ignoreé`.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 9b269afc429..c2ec659fed8 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1275,10 +1275,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                 type_start = p;
 
                 /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
-                 * or anything non-alphanumeric. */
+                 * or anything ASCII and non-alphanumeric. */
                 is_type_ignore = (
                     tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
-                    && !(tok->cur > ignore_end && isalnum(p[6])));
+                    && !(tok->cur > ignore_end
+                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
 
                 if (is_type_ignore) {
                     *p_start = (char *) ignore_end;