bpo-32503: Avoid creating too small frames in pickles. (#5127)
This commit is contained in:
parent
bd5c7d238c
commit
1211c9a989
|
@ -183,6 +183,7 @@ __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
|
||||||
|
|
||||||
class _Framer:
|
class _Framer:
|
||||||
|
|
||||||
|
_FRAME_SIZE_MIN = 4
|
||||||
_FRAME_SIZE_TARGET = 64 * 1024
|
_FRAME_SIZE_TARGET = 64 * 1024
|
||||||
|
|
||||||
def __init__(self, file_write):
|
def __init__(self, file_write):
|
||||||
|
@ -203,11 +204,12 @@ class _Framer:
|
||||||
if f.tell() >= self._FRAME_SIZE_TARGET or force:
|
if f.tell() >= self._FRAME_SIZE_TARGET or force:
|
||||||
data = f.getbuffer()
|
data = f.getbuffer()
|
||||||
write = self.file_write
|
write = self.file_write
|
||||||
# Issue a single call to the write method of the underlying
|
if len(data) >= self._FRAME_SIZE_MIN:
|
||||||
# file object for the frame opcode with the size of the
|
# Issue a single call to the write method of the underlying
|
||||||
# frame. The concatenation is expected to be less expensive
|
# file object for the frame opcode with the size of the
|
||||||
# than issuing an additional call to write.
|
# frame. The concatenation is expected to be less expensive
|
||||||
write(FRAME + pack("<Q", len(data)))
|
# than issuing an additional call to write.
|
||||||
|
write(FRAME + pack("<Q", len(data)))
|
||||||
|
|
||||||
# Issue a separate call to write to append the frame
|
# Issue a separate call to write to append the frame
|
||||||
# contents without concatenation to the above to avoid a
|
# contents without concatenation to the above to avoid a
|
||||||
|
|
|
@ -2037,6 +2037,7 @@ class AbstractPickleTests(unittest.TestCase):
|
||||||
|
|
||||||
# Exercise framing (proto >= 4) for significant workloads
|
# Exercise framing (proto >= 4) for significant workloads
|
||||||
|
|
||||||
|
FRAME_SIZE_MIN = 4
|
||||||
FRAME_SIZE_TARGET = 64 * 1024
|
FRAME_SIZE_TARGET = 64 * 1024
|
||||||
|
|
||||||
def check_frame_opcodes(self, pickled):
|
def check_frame_opcodes(self, pickled):
|
||||||
|
@ -2047,36 +2048,43 @@ class AbstractPickleTests(unittest.TestCase):
|
||||||
framed by default and are therefore considered a frame by themselves in
|
framed by default and are therefore considered a frame by themselves in
|
||||||
the following consistency check.
|
the following consistency check.
|
||||||
"""
|
"""
|
||||||
last_arg = last_pos = last_frame_opcode_size = None
|
frame_end = frameless_start = None
|
||||||
frameless_opcode_sizes = {
|
frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8', 'BINUNICODE8'}
|
||||||
'BINBYTES': 5,
|
|
||||||
'BINUNICODE': 5,
|
|
||||||
'BINBYTES8': 9,
|
|
||||||
'BINUNICODE8': 9,
|
|
||||||
}
|
|
||||||
for op, arg, pos in pickletools.genops(pickled):
|
for op, arg, pos in pickletools.genops(pickled):
|
||||||
if op.name in frameless_opcode_sizes:
|
if frame_end is not None:
|
||||||
if len(arg) > self.FRAME_SIZE_TARGET:
|
self.assertLessEqual(pos, frame_end)
|
||||||
frame_opcode_size = frameless_opcode_sizes[op.name]
|
if pos == frame_end:
|
||||||
arg = len(arg)
|
frame_end = None
|
||||||
else:
|
|
||||||
continue
|
|
||||||
elif op.name == 'FRAME':
|
|
||||||
frame_opcode_size = 9
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if last_pos is not None:
|
if frame_end is not None: # framed
|
||||||
# The previous frame's size should be equal to the number
|
self.assertNotEqual(op.name, 'FRAME')
|
||||||
# of bytes up to the current frame.
|
if op.name in frameless_opcodes:
|
||||||
frame_size = pos - last_pos - last_frame_opcode_size
|
# Only short bytes and str objects should be written
|
||||||
self.assertEqual(frame_size, last_arg)
|
# in a frame
|
||||||
last_arg, last_pos = arg, pos
|
self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)
|
||||||
last_frame_opcode_size = frame_opcode_size
|
|
||||||
# The last frame's size should be equal to the number of bytes up
|
else: # not framed
|
||||||
# to the pickle's end.
|
if (op.name == 'FRAME' or
|
||||||
frame_size = len(pickled) - last_pos - last_frame_opcode_size
|
(op.name in frameless_opcodes and
|
||||||
self.assertEqual(frame_size, last_arg)
|
len(arg) > self.FRAME_SIZE_TARGET)):
|
||||||
|
# Frame or large bytes or str object
|
||||||
|
if frameless_start is not None:
|
||||||
|
# Only short data should be written outside of a frame
|
||||||
|
self.assertLess(pos - frameless_start,
|
||||||
|
self.FRAME_SIZE_MIN)
|
||||||
|
frameless_start = None
|
||||||
|
elif frameless_start is None and op.name != 'PROTO':
|
||||||
|
frameless_start = pos
|
||||||
|
|
||||||
|
if op.name == 'FRAME':
|
||||||
|
self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
|
||||||
|
frame_end = pos + 9 + arg
|
||||||
|
|
||||||
|
pos = len(pickled)
|
||||||
|
if frame_end is not None:
|
||||||
|
self.assertEqual(frame_end, pos)
|
||||||
|
elif frameless_start is not None:
|
||||||
|
self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
|
||||||
|
|
||||||
def test_framing_many_objects(self):
|
def test_framing_many_objects(self):
|
||||||
obj = list(range(10**5))
|
obj = list(range(10**5))
|
||||||
|
@ -2095,7 +2103,8 @@ class AbstractPickleTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_framing_large_objects(self):
|
def test_framing_large_objects(self):
|
||||||
N = 1024 * 1024
|
N = 1024 * 1024
|
||||||
obj = [b'x' * N, b'y' * N, 'z' * N]
|
small_items = [[i] for i in range(10)]
|
||||||
|
obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
|
||||||
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
|
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
|
||||||
for fast in [False, True]:
|
for fast in [False, True]:
|
||||||
with self.subTest(proto=proto, fast=fast):
|
with self.subTest(proto=proto, fast=fast):
|
||||||
|
@ -2119,12 +2128,9 @@ class AbstractPickleTests(unittest.TestCase):
|
||||||
# Perform full equality check if the lengths match.
|
# Perform full equality check if the lengths match.
|
||||||
self.assertEqual(obj, unpickled)
|
self.assertEqual(obj, unpickled)
|
||||||
n_frames = count_opcode(pickle.FRAME, pickled)
|
n_frames = count_opcode(pickle.FRAME, pickled)
|
||||||
if not fast:
|
# A single frame for small objects between
|
||||||
# One frame per memoize for each large object.
|
# first two large objects.
|
||||||
self.assertGreaterEqual(n_frames, len(obj))
|
self.assertEqual(n_frames, 1)
|
||||||
else:
|
|
||||||
# One frame at the beginning and one at the end.
|
|
||||||
self.assertGreaterEqual(n_frames, 2)
|
|
||||||
self.check_frame_opcodes(pickled)
|
self.check_frame_opcodes(pickled)
|
||||||
|
|
||||||
def test_optional_frames(self):
|
def test_optional_frames(self):
|
||||||
|
@ -2152,7 +2158,9 @@ class AbstractPickleTests(unittest.TestCase):
|
||||||
|
|
||||||
frame_size = self.FRAME_SIZE_TARGET
|
frame_size = self.FRAME_SIZE_TARGET
|
||||||
num_frames = 20
|
num_frames = 20
|
||||||
obj = [bytes([i]) * frame_size for i in range(num_frames)]
|
# Large byte objects (dict values) intermitted with small objects
|
||||||
|
# (dict keys)
|
||||||
|
obj = {i: bytes([i]) * frame_size for i in range(num_frames)}
|
||||||
|
|
||||||
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
|
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
|
||||||
pickled = self.dumps(obj, proto)
|
pickled = self.dumps(obj, proto)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Pickling with protocol 4 no longer creates too small frames.
|
|
@ -119,8 +119,8 @@ enum {
|
||||||
/* Prefetch size when unpickling (disabled on unpeekable streams) */
|
/* Prefetch size when unpickling (disabled on unpeekable streams) */
|
||||||
PREFETCH = 8192 * 16,
|
PREFETCH = 8192 * 16,
|
||||||
|
|
||||||
|
FRAME_SIZE_MIN = 4,
|
||||||
FRAME_SIZE_TARGET = 64 * 1024,
|
FRAME_SIZE_TARGET = 64 * 1024,
|
||||||
|
|
||||||
FRAME_HEADER_SIZE = 9
|
FRAME_HEADER_SIZE = 9
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -949,13 +949,6 @@ _write_size64(char *out, size_t value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
|
|
||||||
{
|
|
||||||
qdata[0] = FRAME;
|
|
||||||
_write_size64(qdata + 1, frame_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
_Pickler_CommitFrame(PicklerObject *self)
|
_Pickler_CommitFrame(PicklerObject *self)
|
||||||
{
|
{
|
||||||
|
@ -966,7 +959,14 @@ _Pickler_CommitFrame(PicklerObject *self)
|
||||||
return 0;
|
return 0;
|
||||||
frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
|
frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
|
||||||
qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
|
qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
|
||||||
_Pickler_WriteFrameHeader(self, qdata, frame_len);
|
if (frame_len >= FRAME_SIZE_MIN) {
|
||||||
|
qdata[0] = FRAME;
|
||||||
|
_write_size64(qdata + 1, frame_len);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
|
||||||
|
self->output_len -= FRAME_HEADER_SIZE;
|
||||||
|
}
|
||||||
self->frame_start = -1;
|
self->frame_start = -1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue