From 901c997de0ea941fe60e8b0671b126fec0c58cb9 Mon Sep 17 00:00:00 2001 From: Robert Schuppenies Date: Tue, 10 Jun 2008 10:10:31 +0000 Subject: [PATCH] Issue 3048: Fixed sys.getsizeof for unicode objects. --- Lib/test/test_sys.py | 35 ++++++++++++++++++++++++++++------- Objects/unicodeobject.c | 24 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 0e17f023ecb..45633742fda 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -421,11 +421,14 @@ class SizeofTest(unittest.TestCase): self.file.close() test.test_support.unlink(test.test_support.TESTFN) - def check_sizeof(self, o, size): + def check_sizeof(self, o, size, size2=None): + """Check size of o. Possible are size and optionally size2).""" result = sys.getsizeof(o) - msg = 'wrong size for %s: got %d, expected %d' \ - % (type(o), result, size) - self.assertEqual(result, size, msg) + msg = 'wrong size for %s: got %d, expected ' % (type(o), result) + if (size2 != None) and (result != size): + self.assertEqual(result, size2, msg + str(size2)) + else: + self.assertEqual(result, size, msg + str(size)) def align(self, value): mod = value % self.p @@ -517,10 +520,10 @@ class SizeofTest(unittest.TestCase): pass # type (PyTypeObject + PyNumberMethods + PyMappingMethods + # PySequenceMethods + PyBufferProcs) - len_typeobject = p + 2*l + 15*p + l + 4*p + l + 9*p + l + 11*p + len_typeobject = p + 2*l + 15*p + l + 4*p + l + 9*p +\ + l + 11*p + self.align(4) self.check_sizeof(class_newstyle, - h + len_typeobject + 42*p + 10*p + 3*p + 6*p) - + h + len_typeobject + 41*p + 10*p + 3*p + 6*p) def test_specialtypes(self): i = self.i @@ -534,6 +537,24 @@ class SizeofTest(unittest.TestCase): # list self.check_sizeof([], h + l + p + l) self.check_sizeof([1, 2, 3], h + l + p + l + 3*l) + # unicode + import math + usize = math.log(sys.maxunicode + 1, 2) / 8 + samples = [u'', u'1'*100] + # we need to test for both sizes, because we don't know if the string + # has been cached + for s in samples: + basicsize = h + l + p + l + p + usize * (len(s) + 1) + self.check_sizeof(s, basicsize,\ + size2=basicsize + sys.getsizeof(str(s))) + # XXX trigger caching encoded version as Python string + s = samples[1] + try: + getattr(sys, s) + except AttributeError: + pass + finally: + self.check_sizeof(s, basicsize + sys.getsizeof(str(s))) h += l # long diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 873f1c43957..840efb9de39 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7895,6 +7895,29 @@ PyDoc_STRVAR(p_format__doc__, \n\ "); +static PyObject * +unicode__sizeof__(PyUnicodeObject *v) +{ + PyObject *res = NULL, *defsize = NULL; + + res = PyInt_FromSsize_t(sizeof(PyUnicodeObject) + + sizeof(Py_UNICODE) * (v->length + 1)); + if (v->defenc) { + defsize = PyObject_CallMethod(v->defenc, "__sizeof__", NULL); + if (defsize == NULL) { + Py_DECREF(res); + return NULL; + } + res = PyNumber_Add(res, defsize); + Py_DECREF(defsize); + } + return res; +} + +PyDoc_STRVAR(sizeof__doc__, +"S.__sizeof__() -> size of S in memory, in bytes\n\ +\n\ +"); static PyObject * unicode_getnewargs(PyUnicodeObject *v) @@ -7952,6 +7975,7 @@ static PyMethodDef unicode_methods[] = { {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, + {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__}, #if 0 {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, #endif