rewrite parsestr() so it's comprehensible; remove dead code

This commit is contained in:
Benjamin Peterson 2016-02-25 23:13:53 -08:00
parent 202803a0c0
commit 768921cf33
1 changed files with 12 additions and 24 deletions

View File

@ -3995,7 +3995,7 @@ decode_utf8(struct compiling *c, const char **sPtr, const char *end)
} }
static PyObject * static PyObject *
decode_unicode(struct compiling *c, const char *s, size_t len, const char *encoding) decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len)
{ {
PyObject *v, *u; PyObject *v, *u;
char *buf; char *buf;
@ -4921,7 +4921,6 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
const char *s = STR(n); const char *s = STR(n);
int quote = Py_CHARMASK(*s); int quote = Py_CHARMASK(*s);
int rawmode = 0; int rawmode = 0;
int need_encoding;
if (Py_ISALPHA(quote)) { if (Py_ISALPHA(quote)) {
while (!*bytesmode || !rawmode) { while (!*bytesmode || !rawmode) {
if (quote == 'b' || quote == 'B') { if (quote == 'b' || quote == 'B') {
@ -4977,11 +4976,10 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
return NULL; return NULL;
} }
} }
if (!*bytesmode && !rawmode) { /* Avoid invoking escape decoding routines if possible. */
return decode_unicode(c, s, len, c->c_encoding); rawmode = rawmode || strchr(s, '\\') == NULL;
}
if (*bytesmode) { if (*bytesmode) {
/* Disallow non-ascii characters (but not escapes) */ /* Disallow non-ASCII characters. */
const char *ch; const char *ch;
for (ch = s; *ch; ch++) { for (ch = s; *ch; ch++) {
if (Py_CHARMASK(*ch) >= 0x80) { if (Py_CHARMASK(*ch) >= 0x80) {
@ -4990,26 +4988,16 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
return NULL; return NULL;
} }
} }
} if (rawmode)
need_encoding = !*bytesmode && strcmp(c->c_encoding, "utf-8") != 0;
if (rawmode || strchr(s, '\\') == NULL) {
if (need_encoding) {
PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
if (u == NULL || !*bytesmode)
return u;
v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
Py_DECREF(u);
return v;
} else if (*bytesmode) {
return PyBytes_FromStringAndSize(s, len); return PyBytes_FromStringAndSize(s, len);
} else if (strcmp(c->c_encoding, "utf-8") == 0) { else
return PyUnicode_FromStringAndSize(s, len); return PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL);
} else { } else {
return PyUnicode_DecodeLatin1(s, len, NULL); if (rawmode)
} return PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
else
return decode_unicode_with_escapes(c, s, len);
} }
return PyBytes_DecodeEscape(s, len, NULL, 1,
need_encoding ? c->c_encoding : NULL);
} }
/* Accepts a STRING+ atom, and produces an expr_ty node. Run through /* Accepts a STRING+ atom, and produces an expr_ty node. Run through