From 4ceeeb09d8ff445888b24aa324bc06175d141cb9 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sat, 3 Apr 2010 22:48:51 +0000 Subject: [PATCH] ensure that the locale does not affect the tokenization of identifiers --- Misc/NEWS | 2 ++ Parser/tokenizer.c | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 50011456d5f..af1dd20d249 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 2.7 beta 1? Core and Builtins ----------------- +- Ensure that tokenization of identifiers is not affected by locale. + - Issue #1222585: Added LDCXXSHARED for C++ support. Patch by Arfrever. - Raise a TypeError when trying to delete a T_STRING_INPLACE struct member. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d60b25694aa..fbbd0bc7fb3 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -93,6 +93,21 @@ char *_PyParser_TokenNames[] = { }; +/* Ensure that the locale does not interfere with tokenization. */ + +static int +ascii_isalpha(int c) +{ + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); +} + +static int +ascii_isalnum(int c) +{ + return ascii_isalpha(c) || ('0' <= c && c <= '9'); +} + + /* Create and initialize a new tok_state structure */ static struct tok_state * @@ -230,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size) } while (t[0] == '\x20' || t[0] == '\t'); begin = t; - while (isalnum(Py_CHARMASK(t[0])) || + while (ascii_isalnum(Py_CHARMASK(t[0])) || t[0] == '-' || t[0] == '_' || t[0] == '.') t++; @@ -1185,7 +1200,6 @@ indenterror(struct tok_state *tok) return 0; } - /* Get next token, after space stripping etc. */ static int @@ -1341,7 +1355,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) } /* Identifier (most frequent token!) */ - if (isalpha(c) || c == '_') { + if (ascii_isalpha(c) || c == '_') { /* Process r"", u"" and ur"" */ switch (c) { case 'b': @@ -1367,7 +1381,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) goto letter_quote; break; } - while (isalnum(c) || c == '_') { + while (ascii_isalnum(c) || c == '_') { c = tok_nextc(tok); } tok_backup(tok, c);