From 8334a4fc313be34d3b76f8635b160159c10f8b0e Mon Sep 17 00:00:00 2001 From: Amaury Forgeot d'Arc Date: Fri, 30 Nov 2007 21:53:17 +0000 Subject: [PATCH] Backport of r59241: str.decode fails on very long strings on 64bit platforms. PyArgs_ParseTuple t# and w# formats truncated the lengths to 32bit. --- Lib/test/test_bigmem.py | 10 ++++++---- Misc/NEWS | 5 +++++ Python/getargs.c | 7 ++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index 6d6c37ceea6..d4fc6eb4785 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -65,13 +65,15 @@ class StrTest(unittest.TestCase): self.assertEquals(s.count('i'), 1) self.assertEquals(s.count('j'), 0) - @bigmemtest(minsize=0, memuse=1) + @bigmemtest(minsize=_2G + 2, memuse=3) def test_decode(self, size): - pass + s = '.' * size + self.assertEquals(len(s.decode('utf-8')), size) - @bigmemtest(minsize=0, memuse=1) + @bigmemtest(minsize=_2G + 2, memuse=3) def test_encode(self, size): - pass + s = u'.' * size + self.assertEquals(len(s.encode('utf-8')), size) @bigmemtest(minsize=_2G, memuse=2) def test_endswith(self, size): diff --git a/Misc/NEWS b/Misc/NEWS index 30c45d142f0..2c9f5fd1a45 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,11 @@ What's New in Python 2.5.2c1? Core and builtins ----------------- +- Issue #1521: On 64bit platforms, using PyArgs_ParseTuple with the t# of w# + format code incorrectly truncated the length to an int, even when + PY_SSIZE_T_CLEAN is set. The str.decode method used to return incorrect + results with huge strings. + - Issue #1445: Fix a SystemError when accessing the ``cell_contents`` attribute of an empty cell object. diff --git a/Python/getargs.c b/Python/getargs.c index d6255986327..d94edce50b0 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -894,7 +894,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, char **buffer; const char *encoding; PyObject *s; - int size, recode_strings; + Py_ssize_t size; + int recode_strings; /* Get 'e' parameter: the encoding name */ encoding = (const char *)va_arg(*p_va, const char *); @@ -1144,7 +1145,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'w': { /* memory buffer, read-write access */ void **p = va_arg(*p_va, void **); PyBufferProcs *pb = arg->ob_type->tp_as_buffer; - int count; + Py_ssize_t count; if (pb == NULL || pb->bf_getwritebuffer == NULL || @@ -1166,7 +1167,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 't': { /* 8-bit character buffer, read-only access */ char **p = va_arg(*p_va, char **); PyBufferProcs *pb = arg->ob_type->tp_as_buffer; - int count; + Py_ssize_t count; if (*format++ != '#') return converterr(