Patch #1601678: move intern() to sys.intern().
This commit is contained in:
parent
376446dd4e
commit
66a796e5ab
|
@ -1262,17 +1262,3 @@ bypass these functions without concerns about missing something important.
|
|||
argument).
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{intern}{string}
|
||||
Enter \var{string} in the table of ``interned'' strings and return
|
||||
the interned string -- which is \var{string} itself or a copy.
|
||||
Interning strings is useful to gain a little performance on
|
||||
dictionary lookup -- if the keys in a dictionary are interned, and
|
||||
the lookup key is interned, the key comparisons (after hashing) can
|
||||
be done by a pointer compare instead of a string compare. Normally,
|
||||
the names used in Python programs are automatically interned, and
|
||||
the dictionaries used to hold module, class or instance attributes
|
||||
have interned keys. \versionchanged[Interned strings are not
|
||||
immortal (like they used to be in Python 2.2 and before);
|
||||
you must keep a reference to the return value of \function{intern()}
|
||||
around to benefit from it]{2.3}
|
||||
\end{funcdesc}
|
||||
|
|
|
@ -340,6 +340,21 @@ else:
|
|||
\versionadded{1.5.2}
|
||||
\end{datadesc}
|
||||
|
||||
\begin{funcdesc}{intern}{string}
|
||||
Enter \var{string} in the table of ``interned'' strings and return
|
||||
the interned string -- which is \var{string} itself or a copy.
|
||||
Interning strings is useful to gain a little performance on
|
||||
dictionary lookup -- if the keys in a dictionary are interned, and
|
||||
the lookup key is interned, the key comparisons (after hashing) can
|
||||
be done by a pointer compare instead of a string compare. Normally,
|
||||
the names used in Python programs are automatically interned, and
|
||||
the dictionaries used to hold module, class or instance attributes
|
||||
have interned keys. \versionchanged[Interned strings are not
|
||||
immortal (like they used to be in Python 2.2 and before);
|
||||
you must keep a reference to the return value of \function{intern()}
|
||||
around to benefit from it]{2.3}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{datadesc}{last_type}
|
||||
\dataline{last_value}
|
||||
\dataline{last_traceback}
|
||||
|
|
|
@ -2700,7 +2700,7 @@ standard module \module{__builtin__}\refbimodindex{__builtin__}:
|
|||
'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod',
|
||||
'enumerate', 'eval', 'exec', 'execfile', 'exit', 'file', 'filter', 'float',
|
||||
'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'help', 'hex',
|
||||
'id', 'int', 'intern', 'isinstance', 'issubclass', 'iter',
|
||||
'id', 'int', 'isinstance', 'issubclass', 'iter',
|
||||
'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min',
|
||||
'object', 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range',
|
||||
'reload', 'repr', 'reversed', 'round', 'set',
|
||||
|
|
|
@ -28,7 +28,7 @@ functions should be applied to nil objects.
|
|||
Interning strings (ob_sstate) tries to ensure that only one string
|
||||
object with a given value exists, so equality tests can be one pointer
|
||||
comparison. This is generally restricted to strings that "look like"
|
||||
Python identifiers, although the intern() builtin can be used to force
|
||||
Python identifiers, although the sys.intern() function can be used to force
|
||||
interning of any string.
|
||||
Together, these sped the interpreter by up to 20%. */
|
||||
|
||||
|
|
|
@ -842,30 +842,6 @@ class BuiltinTest(unittest.TestCase):
|
|||
self.assertEqual(int(Foo4()), 42L)
|
||||
self.assertRaises(TypeError, int, Foo5())
|
||||
|
||||
def test_intern(self):
|
||||
self.assertRaises(TypeError, intern)
|
||||
s = "never interned before"
|
||||
self.assert_(intern(s) is s)
|
||||
s2 = s.swapcase().swapcase()
|
||||
self.assert_(intern(s2) is s)
|
||||
|
||||
# Subclasses of string can't be interned, because they
|
||||
# provide too much opportunity for insane things to happen.
|
||||
# We don't want them in the interned dict and if they aren't
|
||||
# actually interned, we don't want to create the appearance
|
||||
# that they are by allowing intern() to succeeed.
|
||||
class S(str):
|
||||
def __hash__(self):
|
||||
return 123
|
||||
|
||||
self.assertRaises(TypeError, intern, S("abc"))
|
||||
|
||||
# It's still safe to pass these strings to routines that
|
||||
# call intern internally, e.g. PyObject_SetAttr().
|
||||
s = S("abc")
|
||||
setattr(s, s, s)
|
||||
self.assertEqual(getattr(s, s), s)
|
||||
|
||||
def test_iter(self):
|
||||
self.assertRaises(TypeError, iter)
|
||||
self.assertRaises(TypeError, iter, 42, 42)
|
||||
|
|
|
@ -350,6 +350,31 @@ class SysModuleTest(unittest.TestCase):
|
|||
# the test runs under regrtest.
|
||||
self.assert_(sys.__stdout__.encoding == sys.__stderr__.encoding)
|
||||
|
||||
def test_intern(self):
|
||||
self.assertRaises(TypeError, sys.intern)
|
||||
s = "never interned before"
|
||||
self.assert_(sys.intern(s) is s)
|
||||
s2 = s.swapcase().swapcase()
|
||||
self.assert_(sys.intern(s2) is s)
|
||||
|
||||
# Subclasses of string can't be interned, because they
|
||||
# provide too much opportunity for insane things to happen.
|
||||
# We don't want them in the interned dict and if they aren't
|
||||
# actually interned, we don't want to create the appearance
|
||||
# that they are by allowing intern() to succeeed.
|
||||
class S(str):
|
||||
def __hash__(self):
|
||||
return 123
|
||||
|
||||
self.assertRaises(TypeError, sys.intern, S("abc"))
|
||||
|
||||
# It's still safe to pass these strings to routines that
|
||||
# call intern internally, e.g. PyObject_SetAttr().
|
||||
s = S("abc")
|
||||
setattr(s, s, s)
|
||||
self.assertEqual(getattr(s, s), s)
|
||||
|
||||
|
||||
def test_main():
|
||||
test.test_support.run_unittest(SysModuleTest)
|
||||
|
||||
|
|
10
Misc/NEWS
10
Misc/NEWS
|
@ -12,7 +12,7 @@ What's New in Python 3000?
|
|||
TO DO
|
||||
-----
|
||||
|
||||
- See PEP 3000.
|
||||
- See PEP 3000, 3100.
|
||||
|
||||
- Test merging certain changes from the 2.5 HEAD code.
|
||||
|
||||
|
@ -36,7 +36,11 @@ TO DO
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Renamed nb_nonzero to nb_bool and __nonzero__ to __bool__
|
||||
- Moved intern() to sys.intern().
|
||||
|
||||
- exec is now a function.
|
||||
|
||||
- Renamed nb_nonzero to nb_bool and __nonzero__ to __bool__.
|
||||
|
||||
- Classic classes are a thing of the past. All classes are new style.
|
||||
|
||||
|
@ -90,7 +94,7 @@ Core and Builtins
|
|||
- zip returns an iterator
|
||||
|
||||
- Additions:
|
||||
set literals
|
||||
set literals, ellipsis literal
|
||||
|
||||
|
||||
Extension Modules
|
||||
|
|
|
@ -382,7 +382,7 @@ support for features needed by `python-mode'.")
|
|||
"delattr" "dict" "dir" "divmod"
|
||||
"enumerate" "eval" "execfile" "exit" "file"
|
||||
"filter" "float" "getattr" "globals" "hasattr"
|
||||
"hash" "hex" "id" "int" "intern"
|
||||
"hash" "hex" "id" "int"
|
||||
"isinstance" "issubclass" "iter" "len" "license"
|
||||
"list" "locals" "long" "map" "max" "min" "object"
|
||||
"oct" "open" "ord" "pow" "property" "range"
|
||||
|
|
|
@ -1118,31 +1118,6 @@ PyDoc_STRVAR(hex_doc,
|
|||
Return the hexadecimal representation of an integer or long integer.");
|
||||
|
||||
|
||||
static PyObject *
|
||||
builtin_intern(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *s;
|
||||
if (!PyArg_ParseTuple(args, "S:intern", &s))
|
||||
return NULL;
|
||||
if (!PyString_CheckExact(s)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"can't intern subclass of string");
|
||||
return NULL;
|
||||
}
|
||||
Py_INCREF(s);
|
||||
PyString_InternInPlace(&s);
|
||||
return s;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(intern_doc,
|
||||
"intern(string) -> string\n\
|
||||
\n\
|
||||
``Intern'' the given string. This enters the string in the (global)\n\
|
||||
table of interned strings whose purpose is to speed up dictionary lookups.\n\
|
||||
Return the string itself or the previously interned string object with the\n\
|
||||
same value.");
|
||||
|
||||
|
||||
static PyObject *
|
||||
builtin_iter(PyObject *self, PyObject *args)
|
||||
{
|
||||
|
@ -2069,7 +2044,6 @@ static PyMethodDef builtin_methods[] = {
|
|||
{"hash", builtin_hash, METH_O, hash_doc},
|
||||
{"hex", builtin_hex, METH_O, hex_doc},
|
||||
{"id", builtin_id, METH_O, id_doc},
|
||||
{"intern", builtin_intern, METH_VARARGS, intern_doc},
|
||||
{"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc},
|
||||
{"issubclass", builtin_issubclass, METH_VARARGS, issubclass_doc},
|
||||
{"iter", builtin_iter, METH_VARARGS, iter_doc},
|
||||
|
|
|
@ -264,6 +264,32 @@ operating system filenames."
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
static PyObject *
|
||||
sys_intern(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *s;
|
||||
if (!PyArg_ParseTuple(args, "S:intern", &s))
|
||||
return NULL;
|
||||
if (!PyString_CheckExact(s)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"can't intern subclass of string");
|
||||
return NULL;
|
||||
}
|
||||
Py_INCREF(s);
|
||||
PyString_InternInPlace(&s);
|
||||
return s;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(intern_doc,
|
||||
"intern(string) -> string\n\
|
||||
\n\
|
||||
``Intern'' the given string. This enters the string in the (global)\n\
|
||||
table of interned strings whose purpose is to speed up dictionary lookups.\n\
|
||||
Return the string itself or the previously interned string object with the\n\
|
||||
same value.");
|
||||
|
||||
|
||||
/*
|
||||
* Cached interned string objects used for calling the profile and
|
||||
* trace functions. Initialized by trace_init().
|
||||
|
@ -772,6 +798,7 @@ static PyMethodDef sys_methods[] = {
|
|||
{"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS,
|
||||
getwindowsversion_doc},
|
||||
#endif /* MS_WINDOWS */
|
||||
{"intern", sys_intern, METH_VARARGS, intern_doc},
|
||||
#ifdef USE_MALLOPT
|
||||
{"mdebug", sys_mdebug, METH_VARARGS},
|
||||
#endif
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from pybench import Test
|
||||
from string import join
|
||||
import sys
|
||||
|
||||
class ConcatStrings(Test):
|
||||
|
||||
|
@ -174,7 +175,7 @@ class CompareInternedStrings(Test):
|
|||
def test(self):
|
||||
|
||||
# Make sure the strings *are* interned
|
||||
s = intern(join(map(str,range(10))))
|
||||
s = sys.intern(join(map(str,range(10))))
|
||||
t = s
|
||||
|
||||
for i in xrange(self.rounds):
|
||||
|
@ -240,7 +241,7 @@ class CompareInternedStrings(Test):
|
|||
|
||||
def calibrate(self):
|
||||
|
||||
s = intern(join(map(str,range(10))))
|
||||
s = sys.intern(join(map(str,range(10))))
|
||||
t = s
|
||||
|
||||
for i in xrange(self.rounds):
|
||||
|
|
|
@ -198,7 +198,7 @@ def readwarnings(warningsfile):
|
|||
list = warnings.get(filename)
|
||||
if list is None:
|
||||
warnings[filename] = list = []
|
||||
list.append((int(lineno), intern(what)))
|
||||
list.append((int(lineno), sys.intern(what)))
|
||||
f.close()
|
||||
return warnings
|
||||
|
||||
|
|
Loading…
Reference in New Issue