#2798: PyArg_ParseTuple did not correctly handle the "s" code in case of unicode strings

with chars outside the 7bit ascii (s# was already correct). This is necessary to allow python run from a non-ASCII directory, and seems enough on some platforms, probably where the default PyUnicode encoding (utf-8) is also the default filesystem encoding.
2008-05-12 13:19:07 +00:00 · 2008-05-12 13:19:07 +00:00 · 0740459248
parent e6161492fe
commit 0740459248
3 changed files with 45 additions and 8 deletions
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -12,6 +12,10 @@ What's new in Python 3.0b1?
 Core and Builtins
 -----------------

+- Issue 2798: When parsing arguments with PyArg_ParseTuple, the "s" code now
+  allows any unicode string and returns a utf-8 encoded buffer, just like the
+  "s#" code already does.  The "z" code was corrected as well.
+
 - Issue 2801: fix bug in the float.is_integer method where a ValueError
  was sometimes incorrectly raised.

--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@ -475,6 +475,38 @@ test_k_code(PyObject *self)
 }


+/* Test the s and z codes for PyArg_ParseTuple.
+*/
+static PyObject *
+test_s_code(PyObject *self)
+{
+    /* Unicode strings should be accepted */
+    PyObject *tuple, *obj;
+    char *value;
+
+    tuple = PyTuple_New(1);
+    if (tuple == NULL)
+        return NULL;
+
+    obj = PyUnicode_Decode("t\xeate", strlen("t\xeate"),
+			   "latin-1", NULL);
+    if (obj == NULL)
+	return NULL;
+
+    PyTuple_SET_ITEM(tuple, 0, obj);
+
+    /* These two blocks used to raise a TypeError:
+     * "argument must be string without null bytes, not str" 
+     */
+    if (PyArg_ParseTuple(tuple, "s:test_s_code1", &value) < 0)
+    	return NULL;
+
+    if (PyArg_ParseTuple(tuple, "z:test_s_code2", &value) < 0)
+    	return NULL;
+
+    Py_RETURN_NONE;
+}
+
 /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
   of an error.
 */
@ -952,6 +984,7 @@ static PyMethodDef TestMethods[] = {
 	{"codec_incrementaldecoder",
 	 (PyCFunction)codec_incrementaldecoder,	 METH_VARARGS},
 #endif
+	{"test_s_code",		(PyCFunction)test_s_code,	 METH_NOARGS},
 	{"test_u_code",		(PyCFunction)test_u_code,	 METH_NOARGS},
 	{"test_Z_code",		(PyCFunction)test_Z_code,	 METH_NOARGS},
 #ifdef WITH_THREAD
--- a/Python/getargs.c
+++ b/Python/getargs.c
@ -822,10 +822,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 			}
 			else
 				return converterr("string", arg, msgbuf, bufsize);
-			/* XXX(gb): this test is completely wrong -- p is a
-			 * byte string while arg is a Unicode. I *think* it should
-			 * check against the size of uarg... */
-			if ((Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+			if ((Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
 				return converterr("string without null bytes",
 						  arg, msgbuf, bufsize);
 		}
@ -874,11 +871,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 			format++;
 		} else {
 			char **p = va_arg(*p_va, char **);
+			uarg = NULL;

 			if (arg == Py_None)
 				*p = 0;
-			else if (PyString_Check(arg))
+			else if (PyString_Check(arg)) {
+				/* Enable null byte check below */
+				uarg = arg;
 				*p = PyString_AS_STRING(arg);
+			}
 			else if (PyUnicode_Check(arg)) {
 				uarg = UNICODE_DEFAULT_ENCODING(arg);
 				if (uarg == NULL)
@ -900,9 +901,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 				}
 				format++;
 			}
-			/* XXX(gb): same comment as for 's' applies here... */
-			else if (*p != NULL &&
-				 (Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+			else if (*p != NULL && uarg != NULL &&
+				(Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
 				return converterr(
 					"string without null bytes or None",
 					arg, msgbuf, bufsize);