Cleanup of tokenizer.c.

This commit is contained in:
Guido van Rossum 2007-11-16 00:51:45 +00:00
parent 053b4f3a0e
commit cf171a7fbc
1 changed files with 49 additions and 63 deletions

View File

@ -1269,30 +1269,24 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
/* Identifier (most frequent token!) */ /* Identifier (most frequent token!) */
nonascii = 0; nonascii = 0;
if (is_potential_identifier_start(c)) { if (is_potential_identifier_start(c)) {
/* Process r"", u"" and ur"" */ /* Process b"", r"" and br"" */
switch (c) { if (c == 'b' || c == 'B') {
case 'r':
case 'R':
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == '"' || c == '\'') if (c == '"' || c == '\'')
goto letter_quote; goto letter_quote;
break;
case 'b':
case 'B':
c = tok_nextc(tok);
if (c == 'r' || c == 'R')
c = tok_nextc(tok);
if (c == '"' || c == '\'')
goto letter_quote;
break;
} }
if (c == 'r' || c == 'R') {
c = tok_nextc(tok);
if (c == '"' || c == '\'')
goto letter_quote;
}
while (is_potential_identifier_char(c)) { while (is_potential_identifier_char(c)) {
if (c >= 128) if (c >= 128)
nonascii = 1; nonascii = 1;
c = tok_nextc(tok); c = tok_nextc(tok);
} }
tok_backup(tok, c); tok_backup(tok, c);
if (nonascii && if (nonascii &&
!verify_identifier(tok->start, tok->cur)) { !verify_identifier(tok->start, tok->cur)) {
tok->done = E_IDENTIFIER; tok->done = E_IDENTIFIER;
return ERRORTOKEN; return ERRORTOKEN;
@ -1322,7 +1316,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == '.') { if (c == '.') {
*p_start = tok->start; *p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return ELLIPSIS; return ELLIPSIS;
} else { } else {
tok_backup(tok, c); tok_backup(tok, c);
@ -1436,55 +1430,47 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
letter_quote: letter_quote:
/* String */ /* String */
if (c == '\'' || c == '"') { if (c == '\'' || c == '"') {
Py_ssize_t quote2 = tok->cur - tok->start + 1; int quote = c;
int quote = c; int quote_size = 1; /* 1 or 3 */
int triple = 0; int end_quote_size = 0;
int tripcount = 0;
for (;;) { /* Find the quote size and start of string */
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == '\n') { if (c == quote) {
if (!triple) { c = tok_nextc(tok);
tok->done = E_EOLS; if (c == quote)
tok_backup(tok, c); quote_size = 3;
return ERRORTOKEN;
}
tripcount = 0;
tok->cont_line = 1; /* multiline string. */
}
else if (c == EOF) {
if (triple)
tok->done = E_EOFS;
else
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
else if (c == quote) {
tripcount++;
if (tok->cur - tok->start == quote2) {
c = tok_nextc(tok);
if (c == quote) {
triple = 1;
tripcount = 0;
continue;
}
tok_backup(tok, c);
}
if (!triple || tripcount == 3)
break;
}
else if (c == '\\') {
tripcount = 0;
c = tok_nextc(tok);
if (c == EOF) {
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
}
else else
tripcount = 0; end_quote_size = 1; /* empty string found */
} }
if (c != quote)
tok_backup(tok, c);
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
if (c == EOF) {
if (quote_size == 3)
tok->done = E_EOFS;
else
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (quote_size == 1 && c == '\n') {
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (c == quote)
end_quote_size += 1;
else {
end_quote_size = 0;
if (c == '\\')
c = tok_nextc(tok); /* skip escaped char */
}
}
*p_start = tok->start; *p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return STRING; return STRING;
@ -1619,7 +1605,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
/* Get -*- encoding -*- from a Python file. /* Get -*- encoding -*- from a Python file.
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
the first or second line of the file (in which case the encoding the first or second line of the file (in which case the encoding
should be assumed to be PyUnicode_GetDefaultEncoding()). should be assumed to be PyUnicode_GetDefaultEncoding()).
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed