revert the addition of _pickle because it was causing havok with 64-bit

This commit is contained in:
Benjamin Peterson 2008-06-12 03:10:02 +00:00
parent f501942c76
commit 75f25f2c9a
12 changed files with 126 additions and 4695 deletions

View File

@ -174,7 +174,7 @@ __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
# Pickling machinery
class _Pickler:
class Pickler:
def __init__(self, file, protocol=None):
"""This takes a binary file for writing a pickle data stream.
@ -182,19 +182,21 @@ class _Pickler:
All protocols now read and write bytes.
The optional protocol argument tells the pickler to use the
given protocol; supported protocols are 0, 1, 2, 3. The default
protocol is 3; a backward-incompatible protocol designed for
Python 3.0.
given protocol; supported protocols are 0, 1, 2. The default
protocol is 2; it's been supported for many years now.
Protocol 1 is more efficient than protocol 0; protocol 2 is
more efficient than protocol 1.
Specifying a negative protocol version selects the highest
protocol version supported. The higher the protocol used, the
more recent the version of Python needed to read the pickle
produced.
The file argument must have a write() method that accepts a single
bytes argument. It can thus be a file object opened for binary
writing, a io.BytesIO instance, or any other custom object that
meets this interface.
The file parameter must have a write() method that accepts a single
string argument. It can thus be an open file object, a StringIO
object, or any other custom object that meets this interface.
"""
if protocol is None:
protocol = DEFAULT_PROTOCOL
@ -202,10 +204,7 @@ class _Pickler:
protocol = HIGHEST_PROTOCOL
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
try:
self.write = file.write
except AttributeError:
raise TypeError("file must have a 'write' attribute")
self.write = file.write
self.memo = {}
self.proto = int(protocol)
self.bin = protocol >= 1
@ -271,10 +270,10 @@ class _Pickler:
return GET + repr(i).encode("ascii") + b'\n'
def save(self, obj, save_persistent_id=True):
def save(self, obj):
# Check for persistent id (defined by a subclass)
pid = self.persistent_id(obj)
if pid is not None and save_persistent_id:
if pid:
self.save_pers(pid)
return
@ -342,7 +341,7 @@ class _Pickler:
def save_pers(self, pid):
# Save a persistent id reference
if self.bin:
self.save(pid, save_persistent_id=False)
self.save(pid)
self.write(BINPERSID)
else:
self.write(PERSID + str(pid).encode("ascii") + b'\n')
@ -351,13 +350,13 @@ class _Pickler:
listitems=None, dictitems=None, obj=None):
# This API is called by some subclasses
# Assert that args is a tuple
# Assert that args is a tuple or None
if not isinstance(args, tuple):
raise PicklingError("args from save_reduce() should be a tuple")
raise PicklingError("args from reduce() should be a tuple")
# Assert that func is callable
if not hasattr(func, '__call__'):
raise PicklingError("func from save_reduce() should be callable")
raise PicklingError("func from reduce should be callable")
save = self.save
write = self.write
@ -439,6 +438,31 @@ class _Pickler:
self.write(obj and TRUE or FALSE)
dispatch[bool] = save_bool
def save_int(self, obj, pack=struct.pack):
if self.bin:
# If the int is small enough to fit in a signed 4-byte 2's-comp
# format, we can store it more efficiently than the general
# case.
# First one- and two-byte unsigned ints:
if obj >= 0:
if obj <= 0xff:
self.write(BININT1 + bytes([obj]))
return
if obj <= 0xffff:
self.write(BININT2 + bytes([obj&0xff, obj>>8]))
return
# Next check for 4-byte signed ints:
high_bits = obj >> 31 # note that Python shift sign-extends
if high_bits == 0 or high_bits == -1:
# All high bits are copies of bit 2**31, so the value
# fits in a 4-byte signed int.
self.write(BININT + pack("<i", obj))
return
# Text pickle, or int too big to fit in signed 4-byte format.
self.write(INT + repr(obj).encode("ascii") + b'\n')
# XXX save_int is merged into save_long
# dispatch[int] = save_int
def save_long(self, obj, pack=struct.pack):
if self.bin:
# If the int is small enough to fit in a signed 4-byte 2's-comp
@ -479,7 +503,7 @@ class _Pickler:
def save_bytes(self, obj, pack=struct.pack):
if self.proto < 3:
self.save_reduce(bytes, (list(obj),), obj=obj)
self.save_reduce(bytes, (list(obj),))
return
n = len(obj)
if n < 256:
@ -555,6 +579,12 @@ class _Pickler:
dispatch[tuple] = save_tuple
# save_empty_tuple() isn't used by anything in Python 2.3. However, I
# found a Pickler subclass in Zope3 that calls it, so it's not harmless
# to remove it.
def save_empty_tuple(self, obj):
self.write(EMPTY_TUPLE)
def save_list(self, obj):
write = self.write
@ -666,7 +696,7 @@ class _Pickler:
module = whichmodule(obj, name)
try:
__import__(module, level=0)
__import__(module)
mod = sys.modules[module]
klass = getattr(mod, name)
except (ImportError, KeyError, AttributeError):
@ -690,19 +720,9 @@ class _Pickler:
else:
write(EXT4 + pack("<i", code))
return
# Non-ASCII identifiers are supported only with protocols >= 3.
if self.proto >= 3:
write(GLOBAL + bytes(module, "utf-8") + b'\n' +
bytes(name, "utf-8") + b'\n')
else:
try:
write(GLOBAL + bytes(module, "ascii") + b'\n' +
bytes(name, "ascii") + b'\n')
except UnicodeEncodeError:
raise PicklingError(
"can't pickle global identifier '%s.%s' using "
"pickle protocol %i" % (module, name, self.proto))
write(GLOBAL + bytes(module, "utf-8") + b'\n' +
bytes(name, "utf-8") + b'\n')
self.memoize(obj)
dispatch[FunctionType] = save_global
@ -761,7 +781,7 @@ def whichmodule(func, funcname):
# Unpickling machinery
class _Unpickler:
class Unpickler:
def __init__(self, file, *, encoding="ASCII", errors="strict"):
"""This takes a binary file for reading a pickle data stream.
@ -821,9 +841,6 @@ class _Unpickler:
while stack[k] is not mark: k = k-1
return k
def persistent_load(self, pid):
raise UnpickingError("unsupported persistent id encountered")
dispatch = {}
def load_proto(self):
@ -833,7 +850,7 @@ class _Unpickler:
dispatch[PROTO[0]] = load_proto
def load_persid(self):
pid = self.readline()[:-1].decode("ascii")
pid = self.readline()[:-1]
self.append(self.persistent_load(pid))
dispatch[PERSID[0]] = load_persid
@ -862,9 +879,9 @@ class _Unpickler:
val = True
else:
try:
val = int(data, 0)
val = int(data)
except ValueError:
val = int(data, 0)
val = int(data)
self.append(val)
dispatch[INT[0]] = load_int
@ -916,8 +933,7 @@ class _Unpickler:
break
else:
raise ValueError("insecure string pickle: %r" % orig)
self.append(codecs.escape_decode(rep)[0]
.decode(self.encoding, self.errors))
self.append(codecs.escape_decode(rep)[0])
dispatch[STRING[0]] = load_string
def load_binstring(self):
@ -959,7 +975,7 @@ class _Unpickler:
dispatch[TUPLE[0]] = load_tuple
def load_empty_tuple(self):
self.append(())
self.stack.append(())
dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
def load_tuple1(self):
@ -975,11 +991,11 @@ class _Unpickler:
dispatch[TUPLE3[0]] = load_tuple3
def load_empty_list(self):
self.append([])
self.stack.append([])
dispatch[EMPTY_LIST[0]] = load_empty_list
def load_empty_dictionary(self):
self.append({})
self.stack.append({})
dispatch[EMPTY_DICT[0]] = load_empty_dictionary
def load_list(self):
@ -1006,13 +1022,13 @@ class _Unpickler:
def _instantiate(self, klass, k):
args = tuple(self.stack[k+1:])
del self.stack[k:]
instantiated = False
instantiated = 0
if (not args and
isinstance(klass, type) and
not hasattr(klass, "__getinitargs__")):
value = _EmptyClass()
value.__class__ = klass
instantiated = True
instantiated = 1
if not instantiated:
try:
value = klass(*args)
@ -1022,8 +1038,8 @@ class _Unpickler:
self.append(value)
def load_inst(self):
module = self.readline()[:-1].decode("ascii")
name = self.readline()[:-1].decode("ascii")
module = self.readline()[:-1]
name = self.readline()[:-1]
klass = self.find_class(module, name)
self._instantiate(klass, self.marker())
dispatch[INST[0]] = load_inst
@ -1043,8 +1059,8 @@ class _Unpickler:
dispatch[NEWOBJ[0]] = load_newobj
def load_global(self):
module = self.readline()[:-1].decode("utf-8")
name = self.readline()[:-1].decode("utf-8")
module = self.readline()[:-1]
name = self.readline()[:-1]
klass = self.find_class(module, name)
self.append(klass)
dispatch[GLOBAL[0]] = load_global
@ -1079,7 +1095,11 @@ class _Unpickler:
def find_class(self, module, name):
# Subclasses may override this
__import__(module, level=0)
if isinstance(module, bytes_types):
module = module.decode("utf-8")
if isinstance(name, bytes_types):
name = name.decode("utf-8")
__import__(module)
mod = sys.modules[module]
klass = getattr(mod, name)
return klass
@ -1111,33 +1131,31 @@ class _Unpickler:
dispatch[DUP[0]] = load_dup
def load_get(self):
i = int(self.readline()[:-1])
self.append(self.memo[i])
self.append(self.memo[self.readline()[:-1].decode("ascii")])
dispatch[GET[0]] = load_get
def load_binget(self):
i = self.read(1)[0]
self.append(self.memo[i])
i = ord(self.read(1))
self.append(self.memo[repr(i)])
dispatch[BINGET[0]] = load_binget
def load_long_binget(self):
i = mloads(b'i' + self.read(4))
self.append(self.memo[i])
self.append(self.memo[repr(i)])
dispatch[LONG_BINGET[0]] = load_long_binget
def load_put(self):
i = int(self.readline()[:-1])
self.memo[i] = self.stack[-1]
self.memo[self.readline()[:-1].decode("ascii")] = self.stack[-1]
dispatch[PUT[0]] = load_put
def load_binput(self):
i = self.read(1)[0]
self.memo[i] = self.stack[-1]
i = ord(self.read(1))
self.memo[repr(i)] = self.stack[-1]
dispatch[BINPUT[0]] = load_binput
def load_long_binput(self):
i = mloads(b'i' + self.read(4))
self.memo[i] = self.stack[-1]
self.memo[repr(i)] = self.stack[-1]
dispatch[LONG_BINPUT[0]] = load_long_binput
def load_append(self):
@ -1303,12 +1321,6 @@ def decode_long(data):
n -= 1 << (nbytes * 8)
return n
# Use the faster _pickle if possible
try:
from _pickle import *
except ImportError:
Pickler, Unpickler = _Pickler, _Unpickler
# Shorthands
def dump(obj, file, protocol=None):
@ -1321,14 +1333,14 @@ def dumps(obj, protocol=None):
assert isinstance(res, bytes_types)
return res
def load(file, *, encoding="ASCII", errors="strict"):
return Unpickler(file, encoding=encoding, errors=errors).load()
def load(file):
return Unpickler(file).load()
def loads(s, *, encoding="ASCII", errors="strict"):
def loads(s):
if isinstance(s, str):
raise TypeError("Can't load pickle from unicode string")
file = io.BytesIO(s)
return Unpickler(file, encoding=encoding, errors=errors).load()
return Unpickler(file).load()
# Doctest

View File

@ -2079,12 +2079,11 @@ _dis_test = r"""
70: t TUPLE (MARK at 49)
71: p PUT 5
74: R REDUCE
75: p PUT 6
78: V UNICODE 'def'
83: p PUT 7
86: s SETITEM
87: a APPEND
88: . STOP
75: V UNICODE 'def'
80: p PUT 6
83: s SETITEM
84: a APPEND
85: . STOP
highest protocol among opcodes = 0
Try again with a "binary" pickle.
@ -2116,12 +2115,11 @@ Try again with a "binary" pickle.
49: t TUPLE (MARK at 37)
50: q BINPUT 5
52: R REDUCE
53: q BINPUT 6
55: X BINUNICODE 'def'
63: q BINPUT 7
65: s SETITEM
66: e APPENDS (MARK at 3)
67: . STOP
53: X BINUNICODE 'def'
61: q BINPUT 6
63: s SETITEM
64: e APPENDS (MARK at 3)
65: . STOP
highest protocol among opcodes = 1
Exercise the INST/OBJ/BUILD family.

View File

@ -362,7 +362,7 @@ def create_data():
return x
class AbstractPickleTests(unittest.TestCase):
# Subclass must define self.dumps, self.loads.
# Subclass must define self.dumps, self.loads, self.error.
_testdata = create_data()
@ -463,9 +463,8 @@ class AbstractPickleTests(unittest.TestCase):
self.assertEqual(list(x[0].attr.keys()), [1])
self.assert_(x[0].attr[1] is x)
def test_get(self):
self.assertRaises(KeyError, self.loads, b'g0\np0')
self.assertEquals(self.loads(b'((Kdtp0\nh\x00l.))'), [(100,), (100,)])
def test_garyp(self):
self.assertRaises(self.error, self.loads, b'garyp')
def test_insecure_strings(self):
# XXX Some of these tests are temporarily disabled
@ -956,7 +955,7 @@ class AbstractPickleModuleTests(unittest.TestCase):
f = open(TESTFN, "wb")
try:
f.close()
self.assertRaises(ValueError, pickle.dump, 123, f)
self.assertRaises(ValueError, self.module.dump, 123, f)
finally:
os.remove(TESTFN)
@ -965,24 +964,24 @@ class AbstractPickleModuleTests(unittest.TestCase):
f = open(TESTFN, "wb")
try:
f.close()
self.assertRaises(ValueError, pickle.dump, 123, f)
self.assertRaises(ValueError, self.module.dump, 123, f)
finally:
os.remove(TESTFN)
def test_highest_protocol(self):
# Of course this needs to be changed when HIGHEST_PROTOCOL changes.
self.assertEqual(pickle.HIGHEST_PROTOCOL, 3)
self.assertEqual(self.module.HIGHEST_PROTOCOL, 3)
def test_callapi(self):
from io import BytesIO
f = BytesIO()
# With and without keyword arguments
pickle.dump(123, f, -1)
pickle.dump(123, file=f, protocol=-1)
pickle.dumps(123, -1)
pickle.dumps(123, protocol=-1)
pickle.Pickler(f, -1)
pickle.Pickler(f, protocol=-1)
self.module.dump(123, f, -1)
self.module.dump(123, file=f, protocol=-1)
self.module.dumps(123, -1)
self.module.dumps(123, protocol=-1)
self.module.Pickler(f, -1)
self.module.Pickler(f, protocol=-1)
class AbstractPersistentPicklerTests(unittest.TestCase):

View File

@ -7,42 +7,37 @@ from test.pickletester import AbstractPickleTests
from test.pickletester import AbstractPickleModuleTests
from test.pickletester import AbstractPersistentPicklerTests
try:
import _pickle
has_c_implementation = True
except ImportError:
has_c_implementation = False
class PickleTests(AbstractPickleTests, AbstractPickleModuleTests):
module = pickle
error = KeyError
class PickleTests(AbstractPickleModuleTests):
pass
def dumps(self, arg, proto=None):
return pickle.dumps(arg, proto)
def loads(self, buf):
return pickle.loads(buf)
class PyPicklerTests(AbstractPickleTests):
class PicklerTests(AbstractPickleTests):
pickler = pickle._Pickler
unpickler = pickle._Unpickler
error = KeyError
def dumps(self, arg, proto=None):
f = io.BytesIO()
p = self.pickler(f, proto)
p = pickle.Pickler(f, proto)
p.dump(arg)
f.seek(0)
return bytes(f.read())
def loads(self, buf):
f = io.BytesIO(buf)
u = self.unpickler(f)
u = pickle.Unpickler(f)
return u.load()
class PyPersPicklerTests(AbstractPersistentPicklerTests):
pickler = pickle._Pickler
unpickler = pickle._Unpickler
class PersPicklerTests(AbstractPersistentPicklerTests):
def dumps(self, arg, proto=None):
class PersPickler(self.pickler):
class PersPickler(pickle.Pickler):
def persistent_id(subself, obj):
return self.persistent_id(obj)
f = io.BytesIO()
@ -52,29 +47,19 @@ class PyPersPicklerTests(AbstractPersistentPicklerTests):
return f.read()
def loads(self, buf):
class PersUnpickler(self.unpickler):
class PersUnpickler(pickle.Unpickler):
def persistent_load(subself, obj):
return self.persistent_load(obj)
f = io.BytesIO(buf)
u = PersUnpickler(f)
return u.load()
if has_c_implementation:
class CPicklerTests(PyPicklerTests):
pickler = _pickle.Pickler
unpickler = _pickle.Unpickler
class CPersPicklerTests(PyPersPicklerTests):
pickler = _pickle.Pickler
unpickler = _pickle.Unpickler
def test_main():
tests = [PickleTests, PyPicklerTests, PyPersPicklerTests]
if has_c_implementation:
tests.extend([CPicklerTests, CPersPicklerTests])
support.run_unittest(*tests)
support.run_unittest(
PickleTests,
PicklerTests,
PersPicklerTests
)
support.run_doctest(pickle)
if __name__ == "__main__":

View File

@ -12,6 +12,8 @@ class OptimizedPickleTests(AbstractPickleTests, AbstractPickleModuleTests):
def loads(self, buf):
return pickle.loads(buf)
module = pickle
error = KeyError
def test_main():
support.run_unittest(OptimizedPickleTests)

View File

@ -82,10 +82,6 @@ Library
- Added C optimized implementation of io.StringIO.
- The ``pickle`` module is now automatically use an optimized C
implementation of Pickler and Unpickler when available. The
``cPickle`` module is no longer needed.
- Removed the ``htmllib`` and ``sgmllib`` modules.
- The deprecated ``SmartCookie`` and ``SimpleCookie`` classes have

File diff suppressed because it is too large Load Diff

View File

@ -145,10 +145,6 @@ SOURCE=..\..\Modules\_stringio.c
# End Source File
# Begin Source File
SOURCE=..\..\Modules\_pickle.c
# End Source File
# Begin Source File
SOURCE=..\..\Modules\_functoolsmodule.c
# End Source File
# Begin Source File

View File

@ -376,9 +376,6 @@
<File
RelativePath="..\..\Modules\_stringio.c">
</File>
<File
RelativePath="..\..\Modules\_pickle.c">
</File>
<File
RelativePath="..\..\Modules\_functoolsmodule.c">
</File>

View File

@ -994,10 +994,6 @@
RelativePath="..\..\Modules\_stringio.c"
>
</File>
<File
RelativePath="..\..\Modules\_pickle.c"
>
</File>
<File
RelativePath="..\..\Modules\_functoolsmodule.c"
>

View File

@ -153,7 +153,6 @@ struct _inittab _PyImport_Inittab[] = {
{"_fileio", PyInit__fileio},
{"_bytesio", PyInit__bytesio},
{"_stringio", PyInit__stringio},
{"_pickle", PyInit__pickle},
{"atexit", PyInit_atexit},
/* Sentinel */

View File

@ -422,9 +422,6 @@ class PyBuildExt(build_ext):
exts.append( Extension("_functools", ["_functoolsmodule.c"]) )
# Memory-based IO accelerator modules
exts.append( Extension("_bytesio", ["_bytesio.c"]) )
exts.append( Extension("_stringio", ["_stringio.c"]) )
# C-optimized pickle replacement
exts.append( Extension("_pickle", ["_pickle.c"]) )
# atexit
exts.append( Extension("atexit", ["atexitmodule.c"]) )
# _json speedups