From 9d6897accc49f40414fbecafeb1c65562c6e4647 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 24 Aug 2002 06:54:19 +0000 Subject: [PATCH] Speed up the most egregious "if token in (long tuple)" cases by using a dict instead. (Alas, using a Set would be slower instead of faster.) --- Lib/tokenize.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 22f28c44269..76ea7a2ef99 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -110,6 +110,21 @@ endprogs = {"'": re.compile(Single), '"': re.compile(Double), "UR'''": single3prog, 'UR"""': double3prog, 'r': None, 'R': None, 'u': None, 'U': None} +triple_quoted = {} +for t in ("'''", '"""', + "r'''", 'r"""', "R'''", 'R"""', + "u'''", 'u"""', "U'''", 'U"""', + "ur'''", 'ur"""', "Ur'''", 'Ur"""', + "uR'''", 'uR"""', "UR'''", 'UR"""'): + triple_quoted[t] = t +single_quoted = {} +for t in ("'", '"', + "r'", 'r"', "R'", 'R"', + "u'", 'u"', "U'", 'U"', + "ur'", 'ur"', "Ur'", 'Ur"', + "uR'", 'uR"', "UR'", 'UR"' ): + single_quoted[t] = t + tabsize = 8 class TokenError(Exception): pass @@ -232,11 +247,7 @@ def generate_tokens(readline): token, spos, epos, line) elif initial == '#': yield (COMMENT, token, spos, epos, line) - elif token in ("'''", '"""', # triple-quoted - "r'''", 'r"""', "R'''", 'R"""', - "u'''", 'u"""', "U'''", 'U"""', - "ur'''", 'ur"""', "Ur'''", 'Ur"""', - "uR'''", 'uR"""', "UR'''", 'UR"""'): + elif token in triple_quoted: endprog = endprogs[token] endmatch = endprog.match(line, pos) if endmatch: # all on one line @@ -248,11 +259,9 @@ def generate_tokens(readline): contstr = line[start:] contline = line break - elif initial in ("'", '"') or \ - token[:2] in ("r'", 'r"', "R'", 'R"', - "u'", 'u"', "U'", 'U"') or \ - token[:3] in ("ur'", 'ur"', "Ur'", 'Ur"', - "uR'", 'uR"', "UR'", 'UR"' ): + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: if token[-1] == '\n': # continued string strstart = (lnum, start) endprog = (endprogs[initial] or endprogs[token[1]] or