Patch #1601678: move intern() to sys.intern().

This commit is contained in:
Georg Brandl 2006-12-19 20:50:34 +00:00
parent 376446dd4e
commit 66a796e5ab
12 changed files with 81 additions and 73 deletions

View File

@ -1262,17 +1262,3 @@ bypass these functions without concerns about missing something important.
argument).
\end{funcdesc}
\begin{funcdesc}{intern}{string}
Enter \var{string} in the table of ``interned'' strings and return
the interned string -- which is \var{string} itself or a copy.
Interning strings is useful to gain a little performance on
dictionary lookup -- if the keys in a dictionary are interned, and
the lookup key is interned, the key comparisons (after hashing) can
be done by a pointer compare instead of a string compare. Normally,
the names used in Python programs are automatically interned, and
the dictionaries used to hold module, class or instance attributes
have interned keys. \versionchanged[Interned strings are not
immortal (like they used to be in Python 2.2 and before);
you must keep a reference to the return value of \function{intern()}
around to benefit from it]{2.3}
\end{funcdesc}

View File

@ -340,6 +340,21 @@ else:
\versionadded{1.5.2}
\end{datadesc}
\begin{funcdesc}{intern}{string}
Enter \var{string} in the table of ``interned'' strings and return
the interned string -- which is \var{string} itself or a copy.
Interning strings is useful to gain a little performance on
dictionary lookup -- if the keys in a dictionary are interned, and
the lookup key is interned, the key comparisons (after hashing) can
be done by a pointer compare instead of a string compare. Normally,
the names used in Python programs are automatically interned, and
the dictionaries used to hold module, class or instance attributes
have interned keys. \versionchanged[Interned strings are not
immortal (like they used to be in Python 2.2 and before);
you must keep a reference to the return value of \function{intern()}
around to benefit from it]{2.3}
\end{funcdesc}
\begin{datadesc}{last_type}
\dataline{last_value}
\dataline{last_traceback}

View File

@ -2700,7 +2700,7 @@ standard module \module{__builtin__}\refbimodindex{__builtin__}:
'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod',
'enumerate', 'eval', 'exec', 'execfile', 'exit', 'file', 'filter', 'float',
'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'help', 'hex',
'id', 'int', 'intern', 'isinstance', 'issubclass', 'iter',
'id', 'int', 'isinstance', 'issubclass', 'iter',
'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min',
'object', 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range',
'reload', 'repr', 'reversed', 'round', 'set',

View File

@ -28,7 +28,7 @@ functions should be applied to nil objects.
Interning strings (ob_sstate) tries to ensure that only one string
object with a given value exists, so equality tests can be one pointer
comparison. This is generally restricted to strings that "look like"
Python identifiers, although the intern() builtin can be used to force
Python identifiers, although the sys.intern() function can be used to force
interning of any string.
Together, these sped the interpreter by up to 20%. */

View File

@ -842,30 +842,6 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(int(Foo4()), 42L)
self.assertRaises(TypeError, int, Foo5())
def test_intern(self):
self.assertRaises(TypeError, intern)
s = "never interned before"
self.assert_(intern(s) is s)
s2 = s.swapcase().swapcase()
self.assert_(intern(s2) is s)
# Subclasses of string can't be interned, because they
# provide too much opportunity for insane things to happen.
# We don't want them in the interned dict and if they aren't
# actually interned, we don't want to create the appearance
# that they are by allowing intern() to succeeed.
class S(str):
def __hash__(self):
return 123
self.assertRaises(TypeError, intern, S("abc"))
# It's still safe to pass these strings to routines that
# call intern internally, e.g. PyObject_SetAttr().
s = S("abc")
setattr(s, s, s)
self.assertEqual(getattr(s, s), s)
def test_iter(self):
self.assertRaises(TypeError, iter)
self.assertRaises(TypeError, iter, 42, 42)

View File

@ -350,6 +350,31 @@ class SysModuleTest(unittest.TestCase):
# the test runs under regrtest.
self.assert_(sys.__stdout__.encoding == sys.__stderr__.encoding)
def test_intern(self):
self.assertRaises(TypeError, sys.intern)
s = "never interned before"
self.assert_(sys.intern(s) is s)
s2 = s.swapcase().swapcase()
self.assert_(sys.intern(s2) is s)
# Subclasses of string can't be interned, because they
# provide too much opportunity for insane things to happen.
# We don't want them in the interned dict and if they aren't
# actually interned, we don't want to create the appearance
# that they are by allowing intern() to succeeed.
class S(str):
def __hash__(self):
return 123
self.assertRaises(TypeError, sys.intern, S("abc"))
# It's still safe to pass these strings to routines that
# call intern internally, e.g. PyObject_SetAttr().
s = S("abc")
setattr(s, s, s)
self.assertEqual(getattr(s, s), s)
def test_main():
test.test_support.run_unittest(SysModuleTest)

View File

@ -12,7 +12,7 @@ What's New in Python 3000?
TO DO
-----
- See PEP 3000.
- See PEP 3000, 3100.
- Test merging certain changes from the 2.5 HEAD code.
@ -36,7 +36,11 @@ TO DO
Core and Builtins
-----------------
- Renamed nb_nonzero to nb_bool and __nonzero__ to __bool__
- Moved intern() to sys.intern().
- exec is now a function.
- Renamed nb_nonzero to nb_bool and __nonzero__ to __bool__.
- Classic classes are a thing of the past. All classes are new style.
@ -90,7 +94,7 @@ Core and Builtins
- zip returns an iterator
- Additions:
set literals
set literals, ellipsis literal
Extension Modules

View File

@ -382,7 +382,7 @@ support for features needed by `python-mode'.")
"delattr" "dict" "dir" "divmod"
"enumerate" "eval" "execfile" "exit" "file"
"filter" "float" "getattr" "globals" "hasattr"
"hash" "hex" "id" "int" "intern"
"hash" "hex" "id" "int"
"isinstance" "issubclass" "iter" "len" "license"
"list" "locals" "long" "map" "max" "min" "object"
"oct" "open" "ord" "pow" "property" "range"

View File

@ -1118,31 +1118,6 @@ PyDoc_STRVAR(hex_doc,
Return the hexadecimal representation of an integer or long integer.");
static PyObject *
builtin_intern(PyObject *self, PyObject *args)
{
PyObject *s;
if (!PyArg_ParseTuple(args, "S:intern", &s))
return NULL;
if (!PyString_CheckExact(s)) {
PyErr_SetString(PyExc_TypeError,
"can't intern subclass of string");
return NULL;
}
Py_INCREF(s);
PyString_InternInPlace(&s);
return s;
}
PyDoc_STRVAR(intern_doc,
"intern(string) -> string\n\
\n\
``Intern'' the given string. This enters the string in the (global)\n\
table of interned strings whose purpose is to speed up dictionary lookups.\n\
Return the string itself or the previously interned string object with the\n\
same value.");
static PyObject *
builtin_iter(PyObject *self, PyObject *args)
{
@ -2069,7 +2044,6 @@ static PyMethodDef builtin_methods[] = {
{"hash", builtin_hash, METH_O, hash_doc},
{"hex", builtin_hex, METH_O, hex_doc},
{"id", builtin_id, METH_O, id_doc},
{"intern", builtin_intern, METH_VARARGS, intern_doc},
{"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc},
{"issubclass", builtin_issubclass, METH_VARARGS, issubclass_doc},
{"iter", builtin_iter, METH_VARARGS, iter_doc},

View File

@ -264,6 +264,32 @@ operating system filenames."
#endif
static PyObject *
sys_intern(PyObject *self, PyObject *args)
{
PyObject *s;
if (!PyArg_ParseTuple(args, "S:intern", &s))
return NULL;
if (!PyString_CheckExact(s)) {
PyErr_SetString(PyExc_TypeError,
"can't intern subclass of string");
return NULL;
}
Py_INCREF(s);
PyString_InternInPlace(&s);
return s;
}
PyDoc_STRVAR(intern_doc,
"intern(string) -> string\n\
\n\
``Intern'' the given string. This enters the string in the (global)\n\
table of interned strings whose purpose is to speed up dictionary lookups.\n\
Return the string itself or the previously interned string object with the\n\
same value.");
/*
* Cached interned string objects used for calling the profile and
* trace functions. Initialized by trace_init().
@ -772,6 +798,7 @@ static PyMethodDef sys_methods[] = {
{"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS,
getwindowsversion_doc},
#endif /* MS_WINDOWS */
{"intern", sys_intern, METH_VARARGS, intern_doc},
#ifdef USE_MALLOPT
{"mdebug", sys_mdebug, METH_VARARGS},
#endif

View File

@ -1,5 +1,6 @@
from pybench import Test
from string import join
import sys
class ConcatStrings(Test):
@ -174,7 +175,7 @@ class CompareInternedStrings(Test):
def test(self):
# Make sure the strings *are* interned
s = intern(join(map(str,range(10))))
s = sys.intern(join(map(str,range(10))))
t = s
for i in xrange(self.rounds):
@ -240,7 +241,7 @@ class CompareInternedStrings(Test):
def calibrate(self):
s = intern(join(map(str,range(10))))
s = sys.intern(join(map(str,range(10))))
t = s
for i in xrange(self.rounds):

View File

@ -198,7 +198,7 @@ def readwarnings(warningsfile):
list = warnings.get(filename)
if list is None:
warnings[filename] = list = []
list.append((int(lineno), intern(what)))
list.append((int(lineno), sys.intern(what)))
f.close()
return warnings