Checkpoint. Make pickle.py read/write bytes.

Fix strict.py so it reads/writes bytes and accepts both string types as format.
This commit is contained in:
Guido van Rossum 2007-05-04 19:56:22 +00:00
parent 612344f127
commit 2e6a4b37ba
2 changed files with 192 additions and 212 deletions

View File

@ -33,6 +33,7 @@ import marshal
import sys import sys
import struct import struct
import re import re
import io
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
"Unpickler", "dump", "dumps", "load", "loads"] "Unpickler", "dump", "dumps", "load", "loads"]
@ -50,6 +51,9 @@ compatible_formats = ["1.0", # Original protocol 0
# know how to read. # know how to read.
HIGHEST_PROTOCOL = 2 HIGHEST_PROTOCOL = 2
# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
DEFAULT_PROTOCOL = 2
# Why use struct.pack() for pickling but marshal.loads() for # Why use struct.pack() for pickling but marshal.loads() for
# unpickling? struct.pack() is 40% faster than marshal.dumps(), but # unpickling? struct.pack() is 40% faster than marshal.dumps(), but
# marshal.loads() is twice as fast as struct.unpack()! # marshal.loads() is twice as fast as struct.unpack()!
@ -89,75 +93,69 @@ try:
except ImportError: except ImportError:
PyStringMap = None PyStringMap = None
# UnicodeType may or may not be exported (normally imported from types)
try:
UnicodeType
except NameError:
UnicodeType = None
# Pickle opcodes. See pickletools.py for extensive docs. The listing # Pickle opcodes. See pickletools.py for extensive docs. The listing
# here is in kind-of alphabetical order of 1-character pickle code. # here is in kind-of alphabetical order of 1-character pickle code.
# pickletools groups them by purpose. # pickletools groups them by purpose.
MARK = '(' # push special markobject on stack MARK = b'(' # push special markobject on stack
STOP = '.' # every pickle ends with STOP STOP = b'.' # every pickle ends with STOP
POP = '0' # discard topmost stack item POP = b'0' # discard topmost stack item
POP_MARK = '1' # discard stack top through topmost markobject POP_MARK = b'1' # discard stack top through topmost markobject
DUP = '2' # duplicate top stack item DUP = b'2' # duplicate top stack item
FLOAT = 'F' # push float object; decimal string argument FLOAT = b'F' # push float object; decimal string argument
INT = 'I' # push integer or bool; decimal string argument INT = b'I' # push integer or bool; decimal string argument
BININT = 'J' # push four-byte signed int BININT = b'J' # push four-byte signed int
BININT1 = 'K' # push 1-byte unsigned int BININT1 = b'K' # push 1-byte unsigned int
LONG = 'L' # push long; decimal string argument LONG = b'L' # push long; decimal string argument
BININT2 = 'M' # push 2-byte unsigned int BININT2 = b'M' # push 2-byte unsigned int
NONE = 'N' # push None NONE = b'N' # push None
PERSID = 'P' # push persistent object; id is taken from string arg PERSID = b'P' # push persistent object; id is taken from string arg
BINPERSID = 'Q' # " " " ; " " " " stack BINPERSID = b'Q' # " " " ; " " " " stack
REDUCE = 'R' # apply callable to argtuple, both on stack REDUCE = b'R' # apply callable to argtuple, both on stack
STRING = 'S' # push string; NL-terminated string argument STRING = b'S' # push string; NL-terminated string argument
BINSTRING = 'T' # push string; counted binary string argument BINSTRING = b'T' # push string; counted binary string argument
SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
BINUNICODE = 'X' # " " " ; counted UTF-8 string argument BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
APPEND = 'a' # append stack top to list below it APPEND = b'a' # append stack top to list below it
BUILD = 'b' # call __setstate__ or __dict__.update() BUILD = b'b' # call __setstate__ or __dict__.update()
GLOBAL = 'c' # push self.find_class(modname, name); 2 string args GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
DICT = 'd' # build a dict from stack items DICT = b'd' # build a dict from stack items
EMPTY_DICT = '}' # push empty dict EMPTY_DICT = b'}' # push empty dict
APPENDS = 'e' # extend list on stack by topmost stack slice APPENDS = b'e' # extend list on stack by topmost stack slice
GET = 'g' # push item from memo on stack; index is string arg GET = b'g' # push item from memo on stack; index is string arg
BINGET = 'h' # " " " " " " ; " " 1-byte arg BINGET = b'h' # " " " " " " ; " " 1-byte arg
INST = 'i' # build & push class instance INST = b'i' # build & push class instance
LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
LIST = 'l' # build list from topmost stack items LIST = b'l' # build list from topmost stack items
EMPTY_LIST = ']' # push empty list EMPTY_LIST = b']' # push empty list
OBJ = 'o' # build & push class instance OBJ = b'o' # build & push class instance
PUT = 'p' # store stack top in memo; index is string arg PUT = b'p' # store stack top in memo; index is string arg
BINPUT = 'q' # " " " " " ; " " 1-byte arg BINPUT = b'q' # " " " " " ; " " 1-byte arg
LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
SETITEM = 's' # add key+value pair to dict SETITEM = b's' # add key+value pair to dict
TUPLE = 't' # build tuple from topmost stack items TUPLE = b't' # build tuple from topmost stack items
EMPTY_TUPLE = ')' # push empty tuple EMPTY_TUPLE = b')' # push empty tuple
SETITEMS = 'u' # modify dict by adding topmost key+value pairs SETITEMS = b'u' # modify dict by adding topmost key+value pairs
BINFLOAT = 'G' # push float; arg is 8-byte float encoding BINFLOAT = b'G' # push float; arg is 8-byte float encoding
TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
# Protocol 2 # Protocol 2
PROTO = '\x80' # identify pickle protocol PROTO = b'\x80' # identify pickle protocol
NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
EXT1 = '\x82' # push object from extension registry; 1-byte index EXT1 = b'\x82' # push object from extension registry; 1-byte index
EXT2 = '\x83' # ditto, but 2-byte index EXT2 = b'\x83' # ditto, but 2-byte index
EXT4 = '\x84' # ditto, but 4-byte index EXT4 = b'\x84' # ditto, but 4-byte index
TUPLE1 = '\x85' # build 1-tuple from stack top TUPLE1 = b'\x85' # build 1-tuple from stack top
TUPLE2 = '\x86' # build 2-tuple from two topmost stack items TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
TUPLE3 = '\x87' # build 3-tuple from three topmost stack items TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
NEWTRUE = '\x88' # push True NEWTRUE = b'\x88' # push True
NEWFALSE = '\x89' # push False NEWFALSE = b'\x89' # push False
LONG1 = '\x8a' # push long from < 256 bytes LONG1 = b'\x8a' # push long from < 256 bytes
LONG4 = '\x8b' # push really big long LONG4 = b'\x8b' # push really big long
_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
@ -170,15 +168,13 @@ __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
class Pickler: class Pickler:
def __init__(self, file, protocol=None): def __init__(self, file, protocol=None):
"""This takes a file-like object for writing a pickle data stream. """This takes a binary file for writing a pickle data stream.
All protocols now read and write bytes.
The optional protocol argument tells the pickler to use the The optional protocol argument tells the pickler to use the
given protocol; supported protocols are 0, 1, 2. The default given protocol; supported protocols are 0, 1, 2. The default
protocol is 0, to be backwards compatible. (Protocol 0 is the protocol is 2; it's been supported for many years now.
only protocol that can be written to a file opened in text
mode and read back successfully. When using a protocol higher
than 0, make sure the file is opened in binary mode, both when
pickling and unpickling.)
Protocol 1 is more efficient than protocol 0; protocol 2 is Protocol 1 is more efficient than protocol 0; protocol 2 is
more efficient than protocol 1. more efficient than protocol 1.
@ -194,7 +190,7 @@ class Pickler:
""" """
if protocol is None: if protocol is None:
protocol = 0 protocol = DEFAULT_PROTOCOL
if protocol < 0: if protocol < 0:
protocol = HIGHEST_PROTOCOL protocol = HIGHEST_PROTOCOL
elif not 0 <= protocol <= HIGHEST_PROTOCOL: elif not 0 <= protocol <= HIGHEST_PROTOCOL:
@ -219,7 +215,7 @@ class Pickler:
def dump(self, obj): def dump(self, obj):
"""Write a pickled representation of obj to the open file.""" """Write a pickled representation of obj to the open file."""
if self.proto >= 2: if self.proto >= 2:
self.write(PROTO + chr(self.proto)) self.write(PROTO + bytes([self.proto]))
self.save(obj) self.save(obj)
self.write(STOP) self.write(STOP)
@ -249,21 +245,21 @@ class Pickler:
def put(self, i, pack=struct.pack): def put(self, i, pack=struct.pack):
if self.bin: if self.bin:
if i < 256: if i < 256:
return BINPUT + chr(i) return BINPUT + bytes([i])
else: else:
return LONG_BINPUT + pack("<i", i) return LONG_BINPUT + pack("<i", i)
return PUT + repr(i) + '\n' return PUT + bytes(repr(i)) + b'\n'
# Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
def get(self, i, pack=struct.pack): def get(self, i, pack=struct.pack):
if self.bin: if self.bin:
if i < 256: if i < 256:
return BINGET + chr(i) return BINGET + bytes([i])
else: else:
return LONG_BINGET + pack("<i", i) return LONG_BINGET + pack("<i", i)
return GET + repr(i) + '\n' return GET + bytes(repr(i)) + b'\n'
def save(self, obj): def save(self, obj):
# Check for persistent id (defined by a subclass) # Check for persistent id (defined by a subclass)
@ -312,7 +308,7 @@ class Pickler:
(t.__name__, obj)) (t.__name__, obj))
# Check for string returned by reduce(), meaning "save as global" # Check for string returned by reduce(), meaning "save as global"
if type(rv) is StringType: if inistance(rv, basestring):
self.save_global(obj, rv) self.save_global(obj, rv)
return return
@ -339,7 +335,7 @@ class Pickler:
self.save(pid) self.save(pid)
self.write(BINPERSID) self.write(BINPERSID)
else: else:
self.write(PERSID + str(pid) + '\n') self.write(PERSID + bytes(str(pid)) + b'\n')
def save_reduce(self, func, args, state=None, def save_reduce(self, func, args, state=None,
listitems=None, dictitems=None, obj=None): listitems=None, dictitems=None, obj=None):
@ -441,10 +437,10 @@ class Pickler:
# First one- and two-byte unsigned ints: # First one- and two-byte unsigned ints:
if obj >= 0: if obj >= 0:
if obj <= 0xff: if obj <= 0xff:
self.write(BININT1 + chr(obj)) self.write(BININT1 + bytes([obj]))
return return
if obj <= 0xffff: if obj <= 0xffff:
self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8)) self.write(BININT2 + bytes([obj&0xff, obj>>8]))
return return
# Next check for 4-byte signed ints: # Next check for 4-byte signed ints:
high_bits = obj >> 31 # note that Python shift sign-extends high_bits = obj >> 31 # note that Python shift sign-extends
@ -454,7 +450,7 @@ class Pickler:
self.write(BININT + pack("<i", obj)) self.write(BININT + pack("<i", obj))
return return
# Text pickle, or int too big to fit in signed 4-byte format. # Text pickle, or int too big to fit in signed 4-byte format.
self.write(INT + repr(obj) + '\n') self.write(INT + bytes(repr(obj)) + b'\n')
# XXX save_int is merged into save_long # XXX save_int is merged into save_long
# dispatch[IntType] = save_int # dispatch[IntType] = save_int
@ -466,10 +462,10 @@ class Pickler:
# First one- and two-byte unsigned ints: # First one- and two-byte unsigned ints:
if obj >= 0: if obj >= 0:
if obj <= 0xff: if obj <= 0xff:
self.write(BININT1 + chr(obj)) self.write(BININT1 + bytes([obj]))
return return
if obj <= 0xffff: if obj <= 0xffff:
self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8)) self.write(BININT2, bytes([obj&0xff, obj>>8]))
return return
# Next check for 4-byte signed ints: # Next check for 4-byte signed ints:
high_bits = obj >> 31 # note that Python shift sign-extends high_bits = obj >> 31 # note that Python shift sign-extends
@ -479,74 +475,46 @@ class Pickler:
self.write(BININT + pack("<i", obj)) self.write(BININT + pack("<i", obj))
return return
if self.proto >= 2: if self.proto >= 2:
bytes = encode_long(obj) encoded = encode_long(obj)
n = len(bytes) n = len(encoded)
if n < 256: if n < 256:
self.write(LONG1 + chr(n) + bytes) self.write(LONG1 + bytes([n]) + encoded)
else: else:
self.write(LONG4 + pack("<i", n) + bytes) self.write(LONG4 + pack("<i", n) + encoded)
return return
self.write(LONG + repr(obj) + '\n') self.write(LONG + bytes(repr(obj)) + b'\n')
dispatch[LongType] = save_long dispatch[LongType] = save_long
def save_float(self, obj, pack=struct.pack): def save_float(self, obj, pack=struct.pack):
if self.bin: if self.bin:
self.write(BINFLOAT + pack('>d', obj)) self.write(BINFLOAT + pack('>d', obj))
else: else:
self.write(FLOAT + repr(obj) + '\n') self.write(FLOAT + bytes(repr(obj)) + b'\n')
dispatch[FloatType] = save_float dispatch[FloatType] = save_float
def save_string(self, obj, pack=struct.pack): def save_string(self, obj, pack=struct.pack):
if self.bin: if self.bin:
n = len(obj) n = len(obj)
if n < 256: if n < 256:
self.write(SHORT_BINSTRING + chr(n) + obj) self.write(SHORT_BINSTRING + bytes([n]) + bytes(obj))
else: else:
self.write(BINSTRING + pack("<i", n) + obj) self.write(BINSTRING + pack("<i", n) + bytes(obj))
else: else:
self.write(STRING + repr(obj) + '\n') self.write(STRING + bytes(repr(obj)) + b'\n')
self.memoize(obj) self.memoize(obj)
dispatch[StringType] = save_string dispatch[str8] = save_string
def save_unicode(self, obj, pack=struct.pack): def save_unicode(self, obj, pack=struct.pack):
if self.bin: if self.bin:
encoding = obj.encode('utf-8') encoded = obj.encode('utf-8')
n = len(encoding) n = len(encoded)
self.write(BINUNICODE + pack("<i", n) + encoding) self.write(BINUNICODE + pack("<i", n) + encoded)
else: else:
obj = obj.replace("\\", "\\u005c") obj = obj.replace("\\", "\\u005c")
obj = obj.replace("\n", "\\u000a") obj = obj.replace("\n", "\\u000a")
self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n') self.write(UNICODE + obj.encode('raw-unicode-escape') + 'b\n')
self.memoize(obj) self.memoize(obj)
dispatch[UnicodeType] = save_unicode dispatch[str] = save_unicode
if StringType == UnicodeType:
# This is true for Jython
def save_string(self, obj, pack=struct.pack):
str = obj.isunicode()
if self.bin:
if str:
obj = obj.encode("utf-8")
l = len(obj)
if l < 256 and not str:
self.write(SHORT_BINSTRING + chr(l) + obj)
else:
s = pack("<i", l)
if str:
self.write(BINUNICODE + s + obj)
else:
self.write(BINSTRING + s + obj)
else:
if str:
obj = obj.replace("\\", "\\u005c")
obj = obj.replace("\n", "\\u000a")
obj = obj.encode('raw-unicode-escape')
self.write(UNICODE + obj + '\n')
else:
self.write(STRING + repr(obj) + '\n')
self.memoize(obj)
dispatch[StringType] = save_string
def save_tuple(self, obj): def save_tuple(self, obj):
write = self.write write = self.write
@ -736,14 +704,14 @@ class Pickler:
if code: if code:
assert code > 0 assert code > 0
if code <= 0xff: if code <= 0xff:
write(EXT1 + chr(code)) write(EXT1 + bytes([code]))
elif code <= 0xffff: elif code <= 0xffff:
write("%c%c%c" % (EXT2, code&0xff, code>>8)) write(EXT2 + bytes([code&0xff, code>>8]))
else: else:
write(EXT4 + pack("<i", code)) write(EXT4 + pack("<i", code))
return return
write(GLOBAL + module + '\n' + name + '\n') write(GLOBAL + bytes(module) + b'\n' + bytes(name) + b'\n')
self.memoize(obj) self.memoize(obj)
dispatch[ClassType] = save_global dispatch[ClassType] = save_global
@ -806,7 +774,7 @@ def whichmodule(func, funcname):
class Unpickler: class Unpickler:
def __init__(self, file): def __init__(self, file):
"""This takes a file-like object for reading a pickle data stream. """This takes a binary file for reading a pickle data stream.
The protocol version of the pickle is detected automatically, so no The protocol version of the pickle is detected automatically, so no
proto argument is needed. proto argument is needed.
@ -817,10 +785,25 @@ class Unpickler:
object can be a file object opened for reading, a StringIO object, object can be a file object opened for reading, a StringIO object,
or any other custom object that meets this interface. or any other custom object that meets this interface.
""" """
self.readline = file.readline try:
self.readline = file.readline
except AttributeError:
self.file = file
self.read = file.read self.read = file.read
self.memo = {} self.memo = {}
def readline(self):
# XXX Slow but at least correct
b = bytes()
while True:
c = self.file.read(1)
if not c:
break
b += c
if c == b'\n':
break
return b
def load(self): def load(self):
"""Read a pickled object representation from the open file. """Read a pickled object representation from the open file.
@ -834,7 +817,9 @@ class Unpickler:
try: try:
while 1: while 1:
key = read(1) key = read(1)
dispatch[key](self) if not key:
raise EOFError
dispatch[key[0]](self)
except _Stop as stopinst: except _Stop as stopinst:
return stopinst.value return stopinst.value
@ -855,37 +840,33 @@ class Unpickler:
dispatch = {} dispatch = {}
def load_eof(self):
raise EOFError
dispatch[''] = load_eof
def load_proto(self): def load_proto(self):
proto = ord(self.read(1)) proto = ord(self.read(1))
if not 0 <= proto <= 2: if not 0 <= proto <= 2:
raise ValueError, "unsupported pickle protocol: %d" % proto raise ValueError, "unsupported pickle protocol: %d" % proto
dispatch[PROTO] = load_proto dispatch[PROTO[0]] = load_proto
def load_persid(self): def load_persid(self):
pid = self.readline()[:-1] pid = self.readline()[:-1]
self.append(self.persistent_load(pid)) self.append(self.persistent_load(pid))
dispatch[PERSID] = load_persid dispatch[PERSID[0]] = load_persid
def load_binpersid(self): def load_binpersid(self):
pid = self.stack.pop() pid = self.stack.pop()
self.append(self.persistent_load(pid)) self.append(self.persistent_load(pid))
dispatch[BINPERSID] = load_binpersid dispatch[BINPERSID[0]] = load_binpersid
def load_none(self): def load_none(self):
self.append(None) self.append(None)
dispatch[NONE] = load_none dispatch[NONE[0]] = load_none
def load_false(self): def load_false(self):
self.append(False) self.append(False)
dispatch[NEWFALSE] = load_false dispatch[NEWFALSE[0]] = load_false
def load_true(self): def load_true(self):
self.append(True) self.append(True)
dispatch[NEWTRUE] = load_true dispatch[NEWTRUE[0]] = load_true
def load_int(self): def load_int(self):
data = self.readline() data = self.readline()
@ -899,43 +880,43 @@ class Unpickler:
except ValueError: except ValueError:
val = int(data) val = int(data)
self.append(val) self.append(val)
dispatch[INT] = load_int dispatch[INT[0]] = load_int
def load_binint(self): def load_binint(self):
self.append(mloads('i' + self.read(4))) self.append(mloads(b'i' + self.read(4)))
dispatch[BININT] = load_binint dispatch[BININT[0]] = load_binint
def load_binint1(self): def load_binint1(self):
self.append(ord(self.read(1))) self.append(ord(self.read(1)))
dispatch[BININT1] = load_binint1 dispatch[BININT1[0]] = load_binint1
def load_binint2(self): def load_binint2(self):
self.append(mloads('i' + self.read(2) + '\000\000')) self.append(mloads(b'i' + self.read(2) + '\000\000'))
dispatch[BININT2] = load_binint2 dispatch[BININT2[0]] = load_binint2
def load_long(self): def load_long(self):
self.append(int(self.readline()[:-1], 0)) self.append(int(self.readline()[:-1], 0))
dispatch[LONG] = load_long dispatch[LONG[0]] = load_long
def load_long1(self): def load_long1(self):
n = ord(self.read(1)) n = ord(self.read(1))
bytes = self.read(n) data = self.read(n)
self.append(decode_long(bytes)) self.append(decode_long(data))
dispatch[LONG1] = load_long1 dispatch[LONG1[0]] = load_long1
def load_long4(self): def load_long4(self):
n = mloads('i' + self.read(4)) n = mloads(b'i' + self.read(4))
bytes = self.read(n) data = self.read(n)
self.append(decode_long(bytes)) self.append(decode_long(data))
dispatch[LONG4] = load_long4 dispatch[LONG4[0]] = load_long4
def load_float(self): def load_float(self):
self.append(float(self.readline()[:-1])) self.append(float(self.readline()[:-1]))
dispatch[FLOAT] = load_float dispatch[FLOAT[0]] = load_float
def load_binfloat(self, unpack=struct.unpack): def load_binfloat(self, unpack=struct.unpack):
self.append(unpack('>d', self.read(8))[0]) self.append(unpack('>d', self.read(8))[0])
dispatch[BINFLOAT] = load_binfloat dispatch[BINFLOAT[0]] = load_binfloat
def load_string(self): def load_string(self):
rep = self.readline()[:-1] rep = self.readline()[:-1]
@ -948,60 +929,60 @@ class Unpickler:
else: else:
raise ValueError, "insecure string pickle" raise ValueError, "insecure string pickle"
self.append(rep.decode("string-escape")) self.append(rep.decode("string-escape"))
dispatch[STRING] = load_string dispatch[STRING[0]] = load_string
def load_binstring(self): def load_binstring(self):
len = mloads('i' + self.read(4)) len = mloads(b'i' + self.read(4))
self.append(self.read(len)) self.append(self.read(len))
dispatch[BINSTRING] = load_binstring dispatch[BINSTRING[0]] = load_binstring
def load_unicode(self): def load_unicode(self):
self.append(str(self.readline()[:-1],'raw-unicode-escape')) self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
dispatch[UNICODE] = load_unicode dispatch[UNICODE[0]] = load_unicode
def load_binunicode(self): def load_binunicode(self):
len = mloads('i' + self.read(4)) len = mloads(b'i' + self.read(4))
self.append(str(self.read(len),'utf-8')) self.append(str(self.read(len), 'utf-8'))
dispatch[BINUNICODE] = load_binunicode dispatch[BINUNICODE[0]] = load_binunicode
def load_short_binstring(self): def load_short_binstring(self):
len = ord(self.read(1)) len = ord(self.read(1))
self.append(self.read(len)) self.append(self.read(len))
dispatch[SHORT_BINSTRING] = load_short_binstring dispatch[SHORT_BINSTRING[0]] = load_short_binstring
def load_tuple(self): def load_tuple(self):
k = self.marker() k = self.marker()
self.stack[k:] = [tuple(self.stack[k+1:])] self.stack[k:] = [tuple(self.stack[k+1:])]
dispatch[TUPLE] = load_tuple dispatch[TUPLE[0]] = load_tuple
def load_empty_tuple(self): def load_empty_tuple(self):
self.stack.append(()) self.stack.append(())
dispatch[EMPTY_TUPLE] = load_empty_tuple dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
def load_tuple1(self): def load_tuple1(self):
self.stack[-1] = (self.stack[-1],) self.stack[-1] = (self.stack[-1],)
dispatch[TUPLE1] = load_tuple1 dispatch[TUPLE1[0]] = load_tuple1
def load_tuple2(self): def load_tuple2(self):
self.stack[-2:] = [(self.stack[-2], self.stack[-1])] self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
dispatch[TUPLE2] = load_tuple2 dispatch[TUPLE2[0]] = load_tuple2
def load_tuple3(self): def load_tuple3(self):
self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
dispatch[TUPLE3] = load_tuple3 dispatch[TUPLE3[0]] = load_tuple3
def load_empty_list(self): def load_empty_list(self):
self.stack.append([]) self.stack.append([])
dispatch[EMPTY_LIST] = load_empty_list dispatch[EMPTY_LIST[0]] = load_empty_list
def load_empty_dictionary(self): def load_empty_dictionary(self):
self.stack.append({}) self.stack.append({})
dispatch[EMPTY_DICT] = load_empty_dictionary dispatch[EMPTY_DICT[0]] = load_empty_dictionary
def load_list(self): def load_list(self):
k = self.marker() k = self.marker()
self.stack[k:] = [self.stack[k+1:]] self.stack[k:] = [self.stack[k+1:]]
dispatch[LIST] = load_list dispatch[LIST[0]] = load_list
def load_dict(self): def load_dict(self):
k = self.marker() k = self.marker()
@ -1012,7 +993,7 @@ class Unpickler:
value = items[i+1] value = items[i+1]
d[key] = value d[key] = value
self.stack[k:] = [d] self.stack[k:] = [d]
dispatch[DICT] = load_dict dispatch[DICT[0]] = load_dict
# INST and OBJ differ only in how they get a class object. It's not # INST and OBJ differ only in how they get a class object. It's not
# only sensible to do the rest in a common routine, the two routines # only sensible to do the rest in a common routine, the two routines
@ -1047,43 +1028,43 @@ class Unpickler:
name = self.readline()[:-1] name = self.readline()[:-1]
klass = self.find_class(module, name) klass = self.find_class(module, name)
self._instantiate(klass, self.marker()) self._instantiate(klass, self.marker())
dispatch[INST] = load_inst dispatch[INST[0]] = load_inst
def load_obj(self): def load_obj(self):
# Stack is ... markobject classobject arg1 arg2 ... # Stack is ... markobject classobject arg1 arg2 ...
k = self.marker() k = self.marker()
klass = self.stack.pop(k+1) klass = self.stack.pop(k+1)
self._instantiate(klass, k) self._instantiate(klass, k)
dispatch[OBJ] = load_obj dispatch[OBJ[0]] = load_obj
def load_newobj(self): def load_newobj(self):
args = self.stack.pop() args = self.stack.pop()
cls = self.stack[-1] cls = self.stack[-1]
obj = cls.__new__(cls, *args) obj = cls.__new__(cls, *args)
self.stack[-1] = obj self.stack[-1] = obj
dispatch[NEWOBJ] = load_newobj dispatch[NEWOBJ[0]] = load_newobj
def load_global(self): def load_global(self):
module = self.readline()[:-1] module = self.readline()[:-1]
name = self.readline()[:-1] name = self.readline()[:-1]
klass = self.find_class(module, name) klass = self.find_class(module, name)
self.append(klass) self.append(klass)
dispatch[GLOBAL] = load_global dispatch[GLOBAL[0]] = load_global
def load_ext1(self): def load_ext1(self):
code = ord(self.read(1)) code = ord(self.read(1))
self.get_extension(code) self.get_extension(code)
dispatch[EXT1] = load_ext1 dispatch[EXT1[0]] = load_ext1
def load_ext2(self): def load_ext2(self):
code = mloads('i' + self.read(2) + '\000\000') code = mloads(b'i' + self.read(2) + b'\000\000')
self.get_extension(code) self.get_extension(code)
dispatch[EXT2] = load_ext2 dispatch[EXT2[0]] = load_ext2
def load_ext4(self): def load_ext4(self):
code = mloads('i' + self.read(4)) code = mloads(b'i' + self.read(4))
self.get_extension(code) self.get_extension(code)
dispatch[EXT4] = load_ext4 dispatch[EXT4[0]] = load_ext4
def get_extension(self, code): def get_extension(self, code):
nil = [] nil = []
@ -1111,55 +1092,55 @@ class Unpickler:
func = stack[-1] func = stack[-1]
value = func(*args) value = func(*args)
stack[-1] = value stack[-1] = value
dispatch[REDUCE] = load_reduce dispatch[REDUCE[0]] = load_reduce
def load_pop(self): def load_pop(self):
del self.stack[-1] del self.stack[-1]
dispatch[POP] = load_pop dispatch[POP[0]] = load_pop
def load_pop_mark(self): def load_pop_mark(self):
k = self.marker() k = self.marker()
del self.stack[k:] del self.stack[k:]
dispatch[POP_MARK] = load_pop_mark dispatch[POP_MARK[0]] = load_pop_mark
def load_dup(self): def load_dup(self):
self.append(self.stack[-1]) self.append(self.stack[-1])
dispatch[DUP] = load_dup dispatch[DUP[0]] = load_dup
def load_get(self): def load_get(self):
self.append(self.memo[self.readline()[:-1]]) self.append(self.memo[self.readline()[:-1]])
dispatch[GET] = load_get dispatch[GET[0]] = load_get
def load_binget(self): def load_binget(self):
i = ord(self.read(1)) i = ord(self.read(1))
self.append(self.memo[repr(i)]) self.append(self.memo[repr(i)])
dispatch[BINGET] = load_binget dispatch[BINGET[0]] = load_binget
def load_long_binget(self): def load_long_binget(self):
i = mloads('i' + self.read(4)) i = mloads(b'i' + self.read(4))
self.append(self.memo[repr(i)]) self.append(self.memo[repr(i)])
dispatch[LONG_BINGET] = load_long_binget dispatch[LONG_BINGET[0]] = load_long_binget
def load_put(self): def load_put(self):
self.memo[self.readline()[:-1]] = self.stack[-1] self.memo[self.readline()[:-1]] = self.stack[-1]
dispatch[PUT] = load_put dispatch[PUT[0]] = load_put
def load_binput(self): def load_binput(self):
i = ord(self.read(1)) i = ord(self.read(1))
self.memo[repr(i)] = self.stack[-1] self.memo[repr(i)] = self.stack[-1]
dispatch[BINPUT] = load_binput dispatch[BINPUT[0]] = load_binput
def load_long_binput(self): def load_long_binput(self):
i = mloads('i' + self.read(4)) i = mloads(b'i' + self.read(4))
self.memo[repr(i)] = self.stack[-1] self.memo[repr(i)] = self.stack[-1]
dispatch[LONG_BINPUT] = load_long_binput dispatch[LONG_BINPUT[0]] = load_long_binput
def load_append(self): def load_append(self):
stack = self.stack stack = self.stack
value = stack.pop() value = stack.pop()
list = stack[-1] list = stack[-1]
list.append(value) list.append(value)
dispatch[APPEND] = load_append dispatch[APPEND[0]] = load_append
def load_appends(self): def load_appends(self):
stack = self.stack stack = self.stack
@ -1167,7 +1148,7 @@ class Unpickler:
list = stack[mark - 1] list = stack[mark - 1]
list.extend(stack[mark + 1:]) list.extend(stack[mark + 1:])
del stack[mark:] del stack[mark:]
dispatch[APPENDS] = load_appends dispatch[APPENDS[0]] = load_appends
def load_setitem(self): def load_setitem(self):
stack = self.stack stack = self.stack
@ -1175,7 +1156,7 @@ class Unpickler:
key = stack.pop() key = stack.pop()
dict = stack[-1] dict = stack[-1]
dict[key] = value dict[key] = value
dispatch[SETITEM] = load_setitem dispatch[SETITEM[0]] = load_setitem
def load_setitems(self): def load_setitems(self):
stack = self.stack stack = self.stack
@ -1185,7 +1166,7 @@ class Unpickler:
dict[stack[i]] = stack[i + 1] dict[stack[i]] = stack[i + 1]
del stack[mark:] del stack[mark:]
dispatch[SETITEMS] = load_setitems dispatch[SETITEMS[0]] = load_setitems
def load_build(self): def load_build(self):
stack = self.stack stack = self.stack
@ -1216,16 +1197,16 @@ class Unpickler:
if slotstate: if slotstate:
for k, v in slotstate.items(): for k, v in slotstate.items():
setattr(inst, k, v) setattr(inst, k, v)
dispatch[BUILD] = load_build dispatch[BUILD[0]] = load_build
def load_mark(self): def load_mark(self):
self.append(self.mark) self.append(self.mark)
dispatch[MARK] = load_mark dispatch[MARK[0]] = load_mark
def load_stop(self): def load_stop(self):
value = self.stack.pop() value = self.stack.pop()
raise _Stop(value) raise _Stop(value)
dispatch[STOP] = load_stop dispatch[STOP[0]] = load_stop
# Helper class for load_inst/load_obj # Helper class for load_inst/load_obj
@ -1332,16 +1313,11 @@ def decode_long(data):
# Shorthands # Shorthands
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
def dump(obj, file, protocol=None): def dump(obj, file, protocol=None):
Pickler(file, protocol).dump(obj) Pickler(file, protocol).dump(obj)
def dumps(obj, protocol=None): def dumps(obj, protocol=None):
file = StringIO() file = io.BytesIO()
Pickler(file, protocol).dump(obj) Pickler(file, protocol).dump(obj)
return file.getvalue() return file.getvalue()
@ -1349,7 +1325,7 @@ def load(file):
return Unpickler(file).load() return Unpickler(file).load()
def loads(str): def loads(str):
file = StringIO(str) file = io.BytesIO(str)
return Unpickler(file).load() return Unpickler(file).load()
# Doctest # Doctest

View File

@ -25,7 +25,11 @@ Whitespace between formats is ignored.
The variable struct.error is an exception raised on errors. The variable struct.error is an exception raised on errors.
""" """
__version__ = '0.1'
# XXX Move the bytes and str8 casts into the _struct module
__version__ = '3.0'
from _struct import Struct, error from _struct import Struct, error
@ -36,7 +40,7 @@ def _compile(fmt):
# Internal: compile struct pattern # Internal: compile struct pattern
if len(_cache) >= _MAXCACHE: if len(_cache) >= _MAXCACHE:
_cache.clear() _cache.clear()
s = Struct(fmt) s = Struct(str8(fmt))
_cache[fmt] = s _cache[fmt] = s
return s return s
@ -60,7 +64,7 @@ def pack(fmt, *args):
o = _cache[fmt] o = _cache[fmt]
except KeyError: except KeyError:
o = _compile(fmt) o = _compile(fmt)
return o.pack(*args) return bytes(o.pack(*args))
def pack_into(fmt, buf, offset, *args): def pack_into(fmt, buf, offset, *args):
""" """
@ -72,7 +76,7 @@ def pack_into(fmt, buf, offset, *args):
o = _cache[fmt] o = _cache[fmt]
except KeyError: except KeyError:
o = _compile(fmt) o = _compile(fmt)
return o.pack_into(buf, offset, *args) return bytes(o.pack_into(buf, offset, *args))
def unpack(fmt, s): def unpack(fmt, s):
""" """