diff --git a/Include/pythonrun.h b/Include/pythonrun.h index 5f6a67f538c..87d78741669 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -9,6 +9,7 @@ extern "C" { #define PyCF_MASK (CO_FUTURE_DIVISION) #define PyCF_MASK_OBSOLETE (CO_GENERATOR_ALLOWED | CO_NESTED) +#define PyCF_SOURCE_IS_UTF8 0x0100 typedef struct { int cf_flags; /* bitmask of CO_xxx flags relevant to future */ diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 2e00632de20..047e93eee6c 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -195,6 +195,8 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, compile) self.assertRaises(ValueError, compile, 'print 42\n', '', 'badmode') self.assertRaises(ValueError, compile, 'print 42\n', '', 'single', 0xff) + if have_unicode: + compile(unicode('print u"\xc3\xa5"\n', 'utf8'), '', 'exec') def test_complex(self): class OS: @@ -309,6 +311,8 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(eval(unicode('c'), globals, locals), 300) bom = '\xef\xbb\xbf' self.assertEqual(eval(bom + 'a', globals, locals), 1) + self.assertEqual(eval(unicode('u"\xc3\xa5"', 'utf8'), globals), + unicode('\xc3\xa5', 'utf8')) self.assertRaises(TypeError, eval) self.assertRaises(TypeError, eval, ()) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index b74e09c26ac..9b1bf55495d 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -340,11 +340,32 @@ builtin_compile(PyObject *self, PyObject *args) int dont_inherit = 0; int supplied_flags = 0; PyCompilerFlags cf; + PyObject *result, *cmd, *tmp = NULL; - if (!PyArg_ParseTuple(args, "sss|ii:compile", &str, &filename, + if (!PyArg_ParseTuple(args, "Oss|ii:compile", &cmd, &filename, &startstr, &supplied_flags, &dont_inherit)) return NULL; + cf.cf_flags = supplied_flags; + +#ifdef Py_USING_UNICODE + if (PyUnicode_Check(cmd)) { + tmp = PyUnicode_AsUTF8String(cmd); + if (tmp == NULL) + return NULL; + cmd = tmp; + cf.cf_flags |= PyCF_SOURCE_IS_UTF8; + } +#endif + if (!PyString_Check(cmd)) { + PyErr_SetString(PyExc_TypeError, + "compile() arg 1 must be a string"); + return NULL; + } + + if (PyString_AsStringAndSize(cmd, &str, NULL)) + return NULL; + if (strcmp(startstr, "exec") == 0) start = Py_file_input; else if (strcmp(startstr, "eval") == 0) @@ -364,11 +385,12 @@ builtin_compile(PyObject *self, PyObject *args) } /* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */ - cf.cf_flags = supplied_flags; if (!dont_inherit) { PyEval_MergeCompilerFlags(&cf); } - return Py_CompileStringFlags(str, filename, start, &cf); + result = Py_CompileStringFlags(str, filename, start, &cf); + Py_XDECREF(tmp); + return result; } PyDoc_STRVAR(compile_doc, @@ -428,7 +450,7 @@ Return the tuple ((x-x%y)/y, x%y). Invariant: div*y + mod == x."); static PyObject * builtin_eval(PyObject *self, PyObject *args) { - PyObject *cmd; + PyObject *cmd, *result, *tmp = NULL; PyObject *globals = Py_None, *locals = Py_None; char *str; PyCompilerFlags cf; @@ -467,14 +489,26 @@ builtin_eval(PyObject *self, PyObject *args) "eval() arg 1 must be a string or code object"); return NULL; } + cf.cf_flags = 0; + +#ifdef Py_USING_UNICODE + if (PyUnicode_Check(cmd)) { + tmp = PyUnicode_AsUTF8String(cmd); + if (tmp == NULL) + return NULL; + cmd = tmp; + cf.cf_flags |= PyCF_SOURCE_IS_UTF8; + } +#endif if (PyString_AsStringAndSize(cmd, &str, NULL)) return NULL; while (*str == ' ' || *str == '\t') str++; - cf.cf_flags = 0; (void)PyEval_MergeCompilerFlags(&cf); - return PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); + result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); + Py_XDECREF(tmp); + return result; } PyDoc_STRVAR(eval_doc, diff --git a/Python/ceval.c b/Python/ceval.c index 0f52a0bb814..801f08d15eb 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3122,7 +3122,7 @@ int PyEval_MergeCompilerFlags(PyCompilerFlags *cf) { PyFrameObject *current_frame = (PyFrameObject *)PyEval_GetFrame(); - int result = 0; + int result = cf->cf_flags != 0; if (current_frame != NULL) { const int codeflags = current_frame->f_code->co_flags; @@ -3898,16 +3898,27 @@ exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals, locals); } else { + PyObject *tmp = NULL; char *str; PyCompilerFlags cf; + cf.cf_flags = 0; +#ifdef Py_USING_UNICODE + if (PyUnicode_Check(prog)) { + tmp = PyUnicode_AsUTF8String(prog); + if (tmp == NULL) + return -1; + prog = tmp; + cf.cf_flags |= PyCF_SOURCE_IS_UTF8; + } +#endif if (PyString_AsStringAndSize(prog, &str, NULL)) return -1; - cf.cf_flags = 0; if (PyEval_MergeCompilerFlags(&cf)) v = PyRun_StringFlags(str, Py_file_input, globals, locals, &cf); else v = PyRun_String(str, Py_file_input, globals, locals); + Py_XDECREF(tmp); } if (plain) PyFrame_LocalsToFast(f, 0); diff --git a/Python/compile.c b/Python/compile.c index 01e961b6910..2b2a9d5389a 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4206,7 +4206,9 @@ jcompile(node *n, const char *filename, struct compiling *base, PyCodeObject *co; if (!com_init(&sc, filename)) return NULL; - if (TYPE(n) == encoding_decl) { + if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) { + sc.c_encoding = "utf-8"; + } else if (TYPE(n) == encoding_decl) { sc.c_encoding = STR(n); n = CHILD(n, 0); } else {