From 66a796e5ab8dd7bfc1fe05a830feb05acdab6f53 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Tue, 19 Dec 2006 20:50:34 +0000 Subject: [PATCH] Patch #1601678: move intern() to sys.intern(). --- Doc/lib/libfuncs.tex | 14 -------------- Doc/lib/libsys.tex | 15 +++++++++++++++ Doc/tut/tut.tex | 2 +- Include/stringobject.h | 2 +- Lib/test/test_builtin.py | 24 ------------------------ Lib/test/test_sys.py | 25 +++++++++++++++++++++++++ Misc/NEWS | 10 +++++++--- Misc/python-mode.el | 2 +- Python/bltinmodule.c | 26 -------------------------- Python/sysmodule.c | 27 +++++++++++++++++++++++++++ Tools/pybench/Strings.py | 5 +++-- Tools/scripts/fixdiv.py | 2 +- 12 files changed, 81 insertions(+), 73 deletions(-) diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex index a8c06bb18c1..7e0b88d0959 100644 --- a/Doc/lib/libfuncs.tex +++ b/Doc/lib/libfuncs.tex @@ -1262,17 +1262,3 @@ bypass these functions without concerns about missing something important. argument). \end{funcdesc} -\begin{funcdesc}{intern}{string} - Enter \var{string} in the table of ``interned'' strings and return - the interned string -- which is \var{string} itself or a copy. - Interning strings is useful to gain a little performance on - dictionary lookup -- if the keys in a dictionary are interned, and - the lookup key is interned, the key comparisons (after hashing) can - be done by a pointer compare instead of a string compare. Normally, - the names used in Python programs are automatically interned, and - the dictionaries used to hold module, class or instance attributes - have interned keys. \versionchanged[Interned strings are not - immortal (like they used to be in Python 2.2 and before); - you must keep a reference to the return value of \function{intern()} - around to benefit from it]{2.3} -\end{funcdesc} diff --git a/Doc/lib/libsys.tex b/Doc/lib/libsys.tex index 702427a257c..7a8859bfb46 100644 --- a/Doc/lib/libsys.tex +++ b/Doc/lib/libsys.tex @@ -340,6 +340,21 @@ else: \versionadded{1.5.2} \end{datadesc} +\begin{funcdesc}{intern}{string} + Enter \var{string} in the table of ``interned'' strings and return + the interned string -- which is \var{string} itself or a copy. + Interning strings is useful to gain a little performance on + dictionary lookup -- if the keys in a dictionary are interned, and + the lookup key is interned, the key comparisons (after hashing) can + be done by a pointer compare instead of a string compare. Normally, + the names used in Python programs are automatically interned, and + the dictionaries used to hold module, class or instance attributes + have interned keys. \versionchanged[Interned strings are not + immortal (like they used to be in Python 2.2 and before); + you must keep a reference to the return value of \function{intern()} + around to benefit from it]{2.3} +\end{funcdesc} + \begin{datadesc}{last_type} \dataline{last_value} \dataline{last_traceback} diff --git a/Doc/tut/tut.tex b/Doc/tut/tut.tex index c76c51853bd..4b6b93fb2f5 100644 --- a/Doc/tut/tut.tex +++ b/Doc/tut/tut.tex @@ -2700,7 +2700,7 @@ standard module \module{__builtin__}\refbimodindex{__builtin__}: 'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'exec', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'help', 'hex', - 'id', 'int', 'intern', 'isinstance', 'issubclass', 'iter', + 'id', 'int', 'isinstance', 'issubclass', 'iter', 'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range', 'reload', 'repr', 'reversed', 'round', 'set', diff --git a/Include/stringobject.h b/Include/stringobject.h index 5f8a6f02a04..ef8f54510cb 100644 --- a/Include/stringobject.h +++ b/Include/stringobject.h @@ -28,7 +28,7 @@ functions should be applied to nil objects. Interning strings (ob_sstate) tries to ensure that only one string object with a given value exists, so equality tests can be one pointer comparison. This is generally restricted to strings that "look like" - Python identifiers, although the intern() builtin can be used to force + Python identifiers, although the sys.intern() function can be used to force interning of any string. Together, these sped the interpreter by up to 20%. */ diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 385031f3b78..b0014789f92 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -842,30 +842,6 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(int(Foo4()), 42L) self.assertRaises(TypeError, int, Foo5()) - def test_intern(self): - self.assertRaises(TypeError, intern) - s = "never interned before" - self.assert_(intern(s) is s) - s2 = s.swapcase().swapcase() - self.assert_(intern(s2) is s) - - # Subclasses of string can't be interned, because they - # provide too much opportunity for insane things to happen. - # We don't want them in the interned dict and if they aren't - # actually interned, we don't want to create the appearance - # that they are by allowing intern() to succeeed. - class S(str): - def __hash__(self): - return 123 - - self.assertRaises(TypeError, intern, S("abc")) - - # It's still safe to pass these strings to routines that - # call intern internally, e.g. PyObject_SetAttr(). - s = S("abc") - setattr(s, s, s) - self.assertEqual(getattr(s, s), s) - def test_iter(self): self.assertRaises(TypeError, iter) self.assertRaises(TypeError, iter, 42, 42) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index f1f1524c06d..fdeb5001a1a 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -350,6 +350,31 @@ class SysModuleTest(unittest.TestCase): # the test runs under regrtest. self.assert_(sys.__stdout__.encoding == sys.__stderr__.encoding) + def test_intern(self): + self.assertRaises(TypeError, sys.intern) + s = "never interned before" + self.assert_(sys.intern(s) is s) + s2 = s.swapcase().swapcase() + self.assert_(sys.intern(s2) is s) + + # Subclasses of string can't be interned, because they + # provide too much opportunity for insane things to happen. + # We don't want them in the interned dict and if they aren't + # actually interned, we don't want to create the appearance + # that they are by allowing intern() to succeeed. + class S(str): + def __hash__(self): + return 123 + + self.assertRaises(TypeError, sys.intern, S("abc")) + + # It's still safe to pass these strings to routines that + # call intern internally, e.g. PyObject_SetAttr(). + s = S("abc") + setattr(s, s, s) + self.assertEqual(getattr(s, s), s) + + def test_main(): test.test_support.run_unittest(SysModuleTest) diff --git a/Misc/NEWS b/Misc/NEWS index 2c37fc20dac..4963dcdac71 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,7 +12,7 @@ What's New in Python 3000? TO DO ----- -- See PEP 3000. +- See PEP 3000, 3100. - Test merging certain changes from the 2.5 HEAD code. @@ -36,7 +36,11 @@ TO DO Core and Builtins ----------------- -- Renamed nb_nonzero to nb_bool and __nonzero__ to __bool__ +- Moved intern() to sys.intern(). + +- exec is now a function. + +- Renamed nb_nonzero to nb_bool and __nonzero__ to __bool__. - Classic classes are a thing of the past. All classes are new style. @@ -90,7 +94,7 @@ Core and Builtins - zip returns an iterator - Additions: - set literals + set literals, ellipsis literal Extension Modules diff --git a/Misc/python-mode.el b/Misc/python-mode.el index e70a613da84..6e2741d1941 100644 --- a/Misc/python-mode.el +++ b/Misc/python-mode.el @@ -382,7 +382,7 @@ support for features needed by `python-mode'.") "delattr" "dict" "dir" "divmod" "enumerate" "eval" "execfile" "exit" "file" "filter" "float" "getattr" "globals" "hasattr" - "hash" "hex" "id" "int" "intern" + "hash" "hex" "id" "int" "isinstance" "issubclass" "iter" "len" "license" "list" "locals" "long" "map" "max" "min" "object" "oct" "open" "ord" "pow" "property" "range" diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 73b02205165..3e6d237f5b9 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1118,31 +1118,6 @@ PyDoc_STRVAR(hex_doc, Return the hexadecimal representation of an integer or long integer."); -static PyObject * -builtin_intern(PyObject *self, PyObject *args) -{ - PyObject *s; - if (!PyArg_ParseTuple(args, "S:intern", &s)) - return NULL; - if (!PyString_CheckExact(s)) { - PyErr_SetString(PyExc_TypeError, - "can't intern subclass of string"); - return NULL; - } - Py_INCREF(s); - PyString_InternInPlace(&s); - return s; -} - -PyDoc_STRVAR(intern_doc, -"intern(string) -> string\n\ -\n\ -``Intern'' the given string. This enters the string in the (global)\n\ -table of interned strings whose purpose is to speed up dictionary lookups.\n\ -Return the string itself or the previously interned string object with the\n\ -same value."); - - static PyObject * builtin_iter(PyObject *self, PyObject *args) { @@ -2069,7 +2044,6 @@ static PyMethodDef builtin_methods[] = { {"hash", builtin_hash, METH_O, hash_doc}, {"hex", builtin_hex, METH_O, hex_doc}, {"id", builtin_id, METH_O, id_doc}, - {"intern", builtin_intern, METH_VARARGS, intern_doc}, {"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc}, {"issubclass", builtin_issubclass, METH_VARARGS, issubclass_doc}, {"iter", builtin_iter, METH_VARARGS, iter_doc}, diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b74a440d4f5..101a6e46e89 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -264,6 +264,32 @@ operating system filenames." #endif + +static PyObject * +sys_intern(PyObject *self, PyObject *args) +{ + PyObject *s; + if (!PyArg_ParseTuple(args, "S:intern", &s)) + return NULL; + if (!PyString_CheckExact(s)) { + PyErr_SetString(PyExc_TypeError, + "can't intern subclass of string"); + return NULL; + } + Py_INCREF(s); + PyString_InternInPlace(&s); + return s; +} + +PyDoc_STRVAR(intern_doc, +"intern(string) -> string\n\ +\n\ +``Intern'' the given string. This enters the string in the (global)\n\ +table of interned strings whose purpose is to speed up dictionary lookups.\n\ +Return the string itself or the previously interned string object with the\n\ +same value."); + + /* * Cached interned string objects used for calling the profile and * trace functions. Initialized by trace_init(). @@ -772,6 +798,7 @@ static PyMethodDef sys_methods[] = { {"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS, getwindowsversion_doc}, #endif /* MS_WINDOWS */ + {"intern", sys_intern, METH_VARARGS, intern_doc}, #ifdef USE_MALLOPT {"mdebug", sys_mdebug, METH_VARARGS}, #endif diff --git a/Tools/pybench/Strings.py b/Tools/pybench/Strings.py index 3be8b35e9e3..dc49df1dae1 100644 --- a/Tools/pybench/Strings.py +++ b/Tools/pybench/Strings.py @@ -1,5 +1,6 @@ from pybench import Test from string import join +import sys class ConcatStrings(Test): @@ -174,7 +175,7 @@ class CompareInternedStrings(Test): def test(self): # Make sure the strings *are* interned - s = intern(join(map(str,range(10)))) + s = sys.intern(join(map(str,range(10)))) t = s for i in xrange(self.rounds): @@ -240,7 +241,7 @@ class CompareInternedStrings(Test): def calibrate(self): - s = intern(join(map(str,range(10)))) + s = sys.intern(join(map(str,range(10)))) t = s for i in xrange(self.rounds): diff --git a/Tools/scripts/fixdiv.py b/Tools/scripts/fixdiv.py index 7e1ed0b4776..b2cab888a66 100755 --- a/Tools/scripts/fixdiv.py +++ b/Tools/scripts/fixdiv.py @@ -198,7 +198,7 @@ def readwarnings(warningsfile): list = warnings.get(filename) if list is None: warnings[filename] = list = [] - list.append((int(lineno), intern(what))) + list.append((int(lineno), sys.intern(what))) f.close() return warnings