mirror of https://github.com/python/cpython
Cleanup of tokenizer.c.
This commit is contained in:
parent
053b4f3a0e
commit
cf171a7fbc
|
@ -1269,30 +1269,24 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
||||||
/* Identifier (most frequent token!) */
|
/* Identifier (most frequent token!) */
|
||||||
nonascii = 0;
|
nonascii = 0;
|
||||||
if (is_potential_identifier_start(c)) {
|
if (is_potential_identifier_start(c)) {
|
||||||
/* Process r"", u"" and ur"" */
|
/* Process b"", r"" and br"" */
|
||||||
switch (c) {
|
if (c == 'b' || c == 'B') {
|
||||||
case 'r':
|
|
||||||
case 'R':
|
|
||||||
c = tok_nextc(tok);
|
c = tok_nextc(tok);
|
||||||
if (c == '"' || c == '\'')
|
if (c == '"' || c == '\'')
|
||||||
goto letter_quote;
|
goto letter_quote;
|
||||||
break;
|
|
||||||
case 'b':
|
|
||||||
case 'B':
|
|
||||||
c = tok_nextc(tok);
|
|
||||||
if (c == 'r' || c == 'R')
|
|
||||||
c = tok_nextc(tok);
|
|
||||||
if (c == '"' || c == '\'')
|
|
||||||
goto letter_quote;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
if (c == 'r' || c == 'R') {
|
||||||
|
c = tok_nextc(tok);
|
||||||
|
if (c == '"' || c == '\'')
|
||||||
|
goto letter_quote;
|
||||||
|
}
|
||||||
while (is_potential_identifier_char(c)) {
|
while (is_potential_identifier_char(c)) {
|
||||||
if (c >= 128)
|
if (c >= 128)
|
||||||
nonascii = 1;
|
nonascii = 1;
|
||||||
c = tok_nextc(tok);
|
c = tok_nextc(tok);
|
||||||
}
|
}
|
||||||
tok_backup(tok, c);
|
tok_backup(tok, c);
|
||||||
if (nonascii &&
|
if (nonascii &&
|
||||||
!verify_identifier(tok->start, tok->cur)) {
|
!verify_identifier(tok->start, tok->cur)) {
|
||||||
tok->done = E_IDENTIFIER;
|
tok->done = E_IDENTIFIER;
|
||||||
return ERRORTOKEN;
|
return ERRORTOKEN;
|
||||||
|
@ -1322,7 +1316,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
||||||
c = tok_nextc(tok);
|
c = tok_nextc(tok);
|
||||||
if (c == '.') {
|
if (c == '.') {
|
||||||
*p_start = tok->start;
|
*p_start = tok->start;
|
||||||
*p_end = tok->cur;
|
*p_end = tok->cur;
|
||||||
return ELLIPSIS;
|
return ELLIPSIS;
|
||||||
} else {
|
} else {
|
||||||
tok_backup(tok, c);
|
tok_backup(tok, c);
|
||||||
|
@ -1436,55 +1430,47 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
||||||
letter_quote:
|
letter_quote:
|
||||||
/* String */
|
/* String */
|
||||||
if (c == '\'' || c == '"') {
|
if (c == '\'' || c == '"') {
|
||||||
Py_ssize_t quote2 = tok->cur - tok->start + 1;
|
int quote = c;
|
||||||
int quote = c;
|
int quote_size = 1; /* 1 or 3 */
|
||||||
int triple = 0;
|
int end_quote_size = 0;
|
||||||
int tripcount = 0;
|
|
||||||
for (;;) {
|
/* Find the quote size and start of string */
|
||||||
c = tok_nextc(tok);
|
c = tok_nextc(tok);
|
||||||
if (c == '\n') {
|
if (c == quote) {
|
||||||
if (!triple) {
|
c = tok_nextc(tok);
|
||||||
tok->done = E_EOLS;
|
if (c == quote)
|
||||||
tok_backup(tok, c);
|
quote_size = 3;
|
||||||
return ERRORTOKEN;
|
|
||||||
}
|
|
||||||
tripcount = 0;
|
|
||||||
tok->cont_line = 1; /* multiline string. */
|
|
||||||
}
|
|
||||||
else if (c == EOF) {
|
|
||||||
if (triple)
|
|
||||||
tok->done = E_EOFS;
|
|
||||||
else
|
|
||||||
tok->done = E_EOLS;
|
|
||||||
tok->cur = tok->inp;
|
|
||||||
return ERRORTOKEN;
|
|
||||||
}
|
|
||||||
else if (c == quote) {
|
|
||||||
tripcount++;
|
|
||||||
if (tok->cur - tok->start == quote2) {
|
|
||||||
c = tok_nextc(tok);
|
|
||||||
if (c == quote) {
|
|
||||||
triple = 1;
|
|
||||||
tripcount = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
tok_backup(tok, c);
|
|
||||||
}
|
|
||||||
if (!triple || tripcount == 3)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (c == '\\') {
|
|
||||||
tripcount = 0;
|
|
||||||
c = tok_nextc(tok);
|
|
||||||
if (c == EOF) {
|
|
||||||
tok->done = E_EOLS;
|
|
||||||
tok->cur = tok->inp;
|
|
||||||
return ERRORTOKEN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
tripcount = 0;
|
end_quote_size = 1; /* empty string found */
|
||||||
}
|
}
|
||||||
|
if (c != quote)
|
||||||
|
tok_backup(tok, c);
|
||||||
|
|
||||||
|
/* Get rest of string */
|
||||||
|
while (end_quote_size != quote_size) {
|
||||||
|
c = tok_nextc(tok);
|
||||||
|
if (c == EOF) {
|
||||||
|
if (quote_size == 3)
|
||||||
|
tok->done = E_EOFS;
|
||||||
|
else
|
||||||
|
tok->done = E_EOLS;
|
||||||
|
tok->cur = tok->inp;
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
|
if (quote_size == 1 && c == '\n') {
|
||||||
|
tok->done = E_EOLS;
|
||||||
|
tok->cur = tok->inp;
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
|
if (c == quote)
|
||||||
|
end_quote_size += 1;
|
||||||
|
else {
|
||||||
|
end_quote_size = 0;
|
||||||
|
if (c == '\\')
|
||||||
|
c = tok_nextc(tok); /* skip escaped char */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
*p_start = tok->start;
|
*p_start = tok->start;
|
||||||
*p_end = tok->cur;
|
*p_end = tok->cur;
|
||||||
return STRING;
|
return STRING;
|
||||||
|
@ -1619,7 +1605,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
|
||||||
/* Get -*- encoding -*- from a Python file.
|
/* Get -*- encoding -*- from a Python file.
|
||||||
|
|
||||||
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
|
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
|
||||||
the first or second line of the file (in which case the encoding
|
the first or second line of the file (in which case the encoding
|
||||||
should be assumed to be PyUnicode_GetDefaultEncoding()).
|
should be assumed to be PyUnicode_GetDefaultEncoding()).
|
||||||
|
|
||||||
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
|
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
|
||||||
|
|
Loading…
Reference in New Issue