bpo-32759: Free unused arenas in multiprocessing.heap (GH-5827)

Large shared arrays allocated using multiprocessing would remain allocated
until the process ends.
This commit is contained in:
Antoine Pitrou 2018-04-09 17:37:55 +02:00 committed by GitHub
parent 2ef65f346a
commit e4679cd644
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 148 additions and 44 deletions

View File

@ -8,6 +8,7 @@
# #
import bisect import bisect
from collections import defaultdict
import mmap import mmap
import os import os
import sys import sys
@ -28,6 +29,9 @@ if sys.platform == 'win32':
import _winapi import _winapi
class Arena(object): class Arena(object):
"""
A shared memory area backed by anonymous memory (Windows).
"""
_rand = tempfile._RandomNameSequence() _rand = tempfile._RandomNameSequence()
@ -52,6 +56,7 @@ if sys.platform == 'win32':
def __setstate__(self, state): def __setstate__(self, state):
self.size, self.name = self._state = state self.size, self.name = self._state = state
# Reopen existing mmap
self.buffer = mmap.mmap(-1, self.size, tagname=self.name) self.buffer = mmap.mmap(-1, self.size, tagname=self.name)
# XXX Temporarily preventing buildbot failures while determining # XXX Temporarily preventing buildbot failures while determining
# XXX the correct long-term fix. See issue 23060 # XXX the correct long-term fix. See issue 23060
@ -60,6 +65,10 @@ if sys.platform == 'win32':
else: else:
class Arena(object): class Arena(object):
"""
A shared memory area backed by a temporary file (POSIX).
"""
if sys.platform == 'linux': if sys.platform == 'linux':
_dir_candidates = ['/dev/shm'] _dir_candidates = ['/dev/shm']
else: else:
@ -69,6 +78,8 @@ else:
self.size = size self.size = size
self.fd = fd self.fd = fd
if fd == -1: if fd == -1:
# Arena is created anew (if fd != -1, it means we're coming
# from rebuild_arena() below)
self.fd, name = tempfile.mkstemp( self.fd, name = tempfile.mkstemp(
prefix='pym-%d-'%os.getpid(), prefix='pym-%d-'%os.getpid(),
dir=self._choose_dir(size)) dir=self._choose_dir(size))
@ -103,37 +114,82 @@ else:
class Heap(object): class Heap(object):
# Minimum malloc() alignment
_alignment = 8 _alignment = 8
_DISCARD_FREE_SPACE_LARGER_THAN = 4 * 1024 ** 2 # 4 MB
_DOUBLE_ARENA_SIZE_UNTIL = 4 * 1024 ** 2
def __init__(self, size=mmap.PAGESIZE): def __init__(self, size=mmap.PAGESIZE):
self._lastpid = os.getpid() self._lastpid = os.getpid()
self._lock = threading.Lock() self._lock = threading.Lock()
# Current arena allocation size
self._size = size self._size = size
# A sorted list of available block sizes in arenas
self._lengths = [] self._lengths = []
# Free block management:
# - map each block size to a list of `(Arena, start, stop)` blocks
self._len_to_seq = {} self._len_to_seq = {}
# - map `(Arena, start)` tuple to the `(Arena, start, stop)` block
# starting at that offset
self._start_to_block = {} self._start_to_block = {}
# - map `(Arena, stop)` tuple to the `(Arena, start, stop)` block
# ending at that offset
self._stop_to_block = {} self._stop_to_block = {}
self._allocated_blocks = set()
# Map arenas to their `(Arena, start, stop)` blocks in use
self._allocated_blocks = defaultdict(set)
self._arenas = [] self._arenas = []
# list of pending blocks to free - see free() comment below
# List of pending blocks to free - see comment in free() below
self._pending_free_blocks = [] self._pending_free_blocks = []
# Statistics
self._n_mallocs = 0
self._n_frees = 0
@staticmethod @staticmethod
def _roundup(n, alignment): def _roundup(n, alignment):
# alignment must be a power of 2 # alignment must be a power of 2
mask = alignment - 1 mask = alignment - 1
return (n + mask) & ~mask return (n + mask) & ~mask
def _new_arena(self, size):
# Create a new arena with at least the given *size*
length = self._roundup(max(self._size, size), mmap.PAGESIZE)
# We carve larger and larger arenas, for efficiency, until we
# reach a large-ish size (roughly L3 cache-sized)
if self._size < self._DOUBLE_ARENA_SIZE_UNTIL:
self._size *= 2
util.info('allocating a new mmap of length %d', length)
arena = Arena(length)
self._arenas.append(arena)
return (arena, 0, length)
def _discard_arena(self, arena):
# Possibly delete the given (unused) arena
length = arena.size
# Reusing an existing arena is faster than creating a new one, so
# we only reclaim space if it's large enough.
if length < self._DISCARD_FREE_SPACE_LARGER_THAN:
return
blocks = self._allocated_blocks.pop(arena)
assert not blocks
del self._start_to_block[(arena, 0)]
del self._stop_to_block[(arena, length)]
self._arenas.remove(arena)
seq = self._len_to_seq[length]
seq.remove((arena, 0, length))
if not seq:
del self._len_to_seq[length]
self._lengths.remove(length)
def _malloc(self, size): def _malloc(self, size):
# returns a large enough block -- it might be much larger # returns a large enough block -- it might be much larger
i = bisect.bisect_left(self._lengths, size) i = bisect.bisect_left(self._lengths, size)
if i == len(self._lengths): if i == len(self._lengths):
length = self._roundup(max(self._size, size), mmap.PAGESIZE) return self._new_arena(size)
self._size *= 2
util.info('allocating a new mmap of length %d', length)
arena = Arena(length)
self._arenas.append(arena)
return (arena, 0, length)
else: else:
length = self._lengths[i] length = self._lengths[i]
seq = self._len_to_seq[length] seq = self._len_to_seq[length]
@ -146,8 +202,8 @@ class Heap(object):
del self._stop_to_block[(arena, stop)] del self._stop_to_block[(arena, stop)]
return block return block
def _free(self, block): def _add_free_block(self, block):
# free location and try to merge with neighbours # make block available and try to merge with its neighbours in the arena
(arena, start, stop) = block (arena, start, stop) = block
try: try:
@ -191,6 +247,14 @@ class Heap(object):
return start, stop return start, stop
def _remove_allocated_block(self, block):
arena, start, stop = block
blocks = self._allocated_blocks[arena]
blocks.remove((start, stop))
if not blocks:
# Arena is entirely free, discard it from this process
self._discard_arena(arena)
def _free_pending_blocks(self): def _free_pending_blocks(self):
# Free all the blocks in the pending list - called with the lock held. # Free all the blocks in the pending list - called with the lock held.
while True: while True:
@ -198,8 +262,8 @@ class Heap(object):
block = self._pending_free_blocks.pop() block = self._pending_free_blocks.pop()
except IndexError: except IndexError:
break break
self._allocated_blocks.remove(block) self._add_free_block(block)
self._free(block) self._remove_allocated_block(block)
def free(self, block): def free(self, block):
# free a block returned by malloc() # free a block returned by malloc()
@ -210,7 +274,7 @@ class Heap(object):
# immediately, the block is added to a list of blocks to be freed # immediately, the block is added to a list of blocks to be freed
# synchronously sometimes later from malloc() or free(), by calling # synchronously sometimes later from malloc() or free(), by calling
# _free_pending_blocks() (appending and retrieving from a list is not # _free_pending_blocks() (appending and retrieving from a list is not
# strictly thread-safe but under cPython it's atomic thanks to the GIL). # strictly thread-safe but under CPython it's atomic thanks to the GIL).
if os.getpid() != self._lastpid: if os.getpid() != self._lastpid:
raise ValueError( raise ValueError(
"My pid ({0:n}) is not last pid {1:n}".format( "My pid ({0:n}) is not last pid {1:n}".format(
@ -222,9 +286,10 @@ class Heap(object):
else: else:
# we hold the lock # we hold the lock
try: try:
self._n_frees += 1
self._free_pending_blocks() self._free_pending_blocks()
self._allocated_blocks.remove(block) self._add_free_block(block)
self._free(block) self._remove_allocated_block(block)
finally: finally:
self._lock.release() self._lock.release()
@ -237,18 +302,21 @@ class Heap(object):
if os.getpid() != self._lastpid: if os.getpid() != self._lastpid:
self.__init__() # reinitialize after fork self.__init__() # reinitialize after fork
with self._lock: with self._lock:
self._n_mallocs += 1
# allow pending blocks to be marked available
self._free_pending_blocks() self._free_pending_blocks()
size = self._roundup(max(size,1), self._alignment) size = self._roundup(max(size, 1), self._alignment)
(arena, start, stop) = self._malloc(size) (arena, start, stop) = self._malloc(size)
new_stop = start + size real_stop = start + size
if new_stop < stop: if real_stop < stop:
self._free((arena, new_stop, stop)) # if the returned block is larger than necessary, mark
block = (arena, start, new_stop) # the remainder available
self._allocated_blocks.add(block) self._add_free_block((arena, real_stop, stop))
return block self._allocated_blocks[arena].add((start, real_stop))
return (arena, start, real_stop)
# #
# Class representing a chunk of an mmap -- can be inherited by child process # Class wrapping a block allocated out of a Heap -- can be inherited by child process
# #
class BufferWrapper(object): class BufferWrapper(object):

View File

@ -3372,11 +3372,25 @@ class _TestHeap(BaseTestCase):
ALLOWED_TYPES = ('processes',) ALLOWED_TYPES = ('processes',)
def setUp(self):
super().setUp()
# Make pristine heap for these tests
self.old_heap = multiprocessing.heap.BufferWrapper._heap
multiprocessing.heap.BufferWrapper._heap = multiprocessing.heap.Heap()
def tearDown(self):
multiprocessing.heap.BufferWrapper._heap = self.old_heap
super().tearDown()
def test_heap(self): def test_heap(self):
iterations = 5000 iterations = 5000
maxblocks = 50 maxblocks = 50
blocks = [] blocks = []
# get the heap object
heap = multiprocessing.heap.BufferWrapper._heap
heap._DISCARD_FREE_SPACE_LARGER_THAN = 0
# create and destroy lots of blocks of different sizes # create and destroy lots of blocks of different sizes
for i in range(iterations): for i in range(iterations):
size = int(random.lognormvariate(0, 1) * 1000) size = int(random.lognormvariate(0, 1) * 1000)
@ -3385,31 +3399,52 @@ class _TestHeap(BaseTestCase):
if len(blocks) > maxblocks: if len(blocks) > maxblocks:
i = random.randrange(maxblocks) i = random.randrange(maxblocks)
del blocks[i] del blocks[i]
del b
# get the heap object
heap = multiprocessing.heap.BufferWrapper._heap
# verify the state of the heap # verify the state of the heap
all = [] with heap._lock:
occupied = 0 all = []
heap._lock.acquire() free = 0
self.addCleanup(heap._lock.release) occupied = 0
for L in list(heap._len_to_seq.values()): for L in list(heap._len_to_seq.values()):
for arena, start, stop in L: # count all free blocks in arenas
all.append((heap._arenas.index(arena), start, stop, for arena, start, stop in L:
stop-start, 'free')) all.append((heap._arenas.index(arena), start, stop,
for arena, start, stop in heap._allocated_blocks: stop-start, 'free'))
all.append((heap._arenas.index(arena), start, stop, free += (stop-start)
stop-start, 'occupied')) for arena, arena_blocks in heap._allocated_blocks.items():
occupied += (stop-start) # count all allocated blocks in arenas
for start, stop in arena_blocks:
all.append((heap._arenas.index(arena), start, stop,
stop-start, 'occupied'))
occupied += (stop-start)
all.sort() self.assertEqual(free + occupied,
sum(arena.size for arena in heap._arenas))
for i in range(len(all)-1): all.sort()
(arena, start, stop) = all[i][:3]
(narena, nstart, nstop) = all[i+1][:3] for i in range(len(all)-1):
self.assertTrue((arena != narena and nstart == 0) or (arena, start, stop) = all[i][:3]
(stop == nstart)) (narena, nstart, nstop) = all[i+1][:3]
if arena != narena:
# Two different arenas
self.assertEqual(stop, heap._arenas[arena].size) # last block
self.assertEqual(nstart, 0) # first block
else:
# Same arena: two adjacent blocks
self.assertEqual(stop, nstart)
# test free'ing all blocks
random.shuffle(blocks)
while blocks:
blocks.pop()
self.assertEqual(heap._n_frees, heap._n_mallocs)
self.assertEqual(len(heap._pending_free_blocks), 0)
self.assertEqual(len(heap._arenas), 0)
self.assertEqual(len(heap._allocated_blocks), 0, heap._allocated_blocks)
self.assertEqual(len(heap._len_to_seq), 0)
def test_free_from_gc(self): def test_free_from_gc(self):
# Check that freeing of blocks by the garbage collector doesn't deadlock # Check that freeing of blocks by the garbage collector doesn't deadlock

View File

@ -0,0 +1 @@
Free unused arenas in multiprocessing.heap.