From e4679cd644aa19f9d9df9beb1326625cf2b02c15 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 9 Apr 2018 17:37:55 +0200 Subject: [PATCH] bpo-32759: Free unused arenas in multiprocessing.heap (GH-5827) Large shared arrays allocated using multiprocessing would remain allocated until the process ends. --- Lib/multiprocessing/heap.py | 114 ++++++++++++++---- Lib/test/_test_multiprocessing.py | 77 ++++++++---- .../2018-02-23-12-21-41.bpo-32759.M-y9GA.rst | 1 + 3 files changed, 148 insertions(+), 44 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-02-23-12-21-41.bpo-32759.M-y9GA.rst diff --git a/Lib/multiprocessing/heap.py b/Lib/multiprocessing/heap.py index 566173a1b0a..6217dfe1268 100644 --- a/Lib/multiprocessing/heap.py +++ b/Lib/multiprocessing/heap.py @@ -8,6 +8,7 @@ # import bisect +from collections import defaultdict import mmap import os import sys @@ -28,6 +29,9 @@ if sys.platform == 'win32': import _winapi class Arena(object): + """ + A shared memory area backed by anonymous memory (Windows). + """ _rand = tempfile._RandomNameSequence() @@ -52,6 +56,7 @@ if sys.platform == 'win32': def __setstate__(self, state): self.size, self.name = self._state = state + # Reopen existing mmap self.buffer = mmap.mmap(-1, self.size, tagname=self.name) # XXX Temporarily preventing buildbot failures while determining # XXX the correct long-term fix. See issue 23060 @@ -60,6 +65,10 @@ if sys.platform == 'win32': else: class Arena(object): + """ + A shared memory area backed by a temporary file (POSIX). + """ + if sys.platform == 'linux': _dir_candidates = ['/dev/shm'] else: @@ -69,6 +78,8 @@ else: self.size = size self.fd = fd if fd == -1: + # Arena is created anew (if fd != -1, it means we're coming + # from rebuild_arena() below) self.fd, name = tempfile.mkstemp( prefix='pym-%d-'%os.getpid(), dir=self._choose_dir(size)) @@ -103,37 +114,82 @@ else: class Heap(object): + # Minimum malloc() alignment _alignment = 8 + _DISCARD_FREE_SPACE_LARGER_THAN = 4 * 1024 ** 2 # 4 MB + _DOUBLE_ARENA_SIZE_UNTIL = 4 * 1024 ** 2 + def __init__(self, size=mmap.PAGESIZE): self._lastpid = os.getpid() self._lock = threading.Lock() + # Current arena allocation size self._size = size + # A sorted list of available block sizes in arenas self._lengths = [] + + # Free block management: + # - map each block size to a list of `(Arena, start, stop)` blocks self._len_to_seq = {} + # - map `(Arena, start)` tuple to the `(Arena, start, stop)` block + # starting at that offset self._start_to_block = {} + # - map `(Arena, stop)` tuple to the `(Arena, start, stop)` block + # ending at that offset self._stop_to_block = {} - self._allocated_blocks = set() + + # Map arenas to their `(Arena, start, stop)` blocks in use + self._allocated_blocks = defaultdict(set) self._arenas = [] - # list of pending blocks to free - see free() comment below + + # List of pending blocks to free - see comment in free() below self._pending_free_blocks = [] + # Statistics + self._n_mallocs = 0 + self._n_frees = 0 + @staticmethod def _roundup(n, alignment): # alignment must be a power of 2 mask = alignment - 1 return (n + mask) & ~mask + def _new_arena(self, size): + # Create a new arena with at least the given *size* + length = self._roundup(max(self._size, size), mmap.PAGESIZE) + # We carve larger and larger arenas, for efficiency, until we + # reach a large-ish size (roughly L3 cache-sized) + if self._size < self._DOUBLE_ARENA_SIZE_UNTIL: + self._size *= 2 + util.info('allocating a new mmap of length %d', length) + arena = Arena(length) + self._arenas.append(arena) + return (arena, 0, length) + + def _discard_arena(self, arena): + # Possibly delete the given (unused) arena + length = arena.size + # Reusing an existing arena is faster than creating a new one, so + # we only reclaim space if it's large enough. + if length < self._DISCARD_FREE_SPACE_LARGER_THAN: + return + blocks = self._allocated_blocks.pop(arena) + assert not blocks + del self._start_to_block[(arena, 0)] + del self._stop_to_block[(arena, length)] + self._arenas.remove(arena) + seq = self._len_to_seq[length] + seq.remove((arena, 0, length)) + if not seq: + del self._len_to_seq[length] + self._lengths.remove(length) + def _malloc(self, size): # returns a large enough block -- it might be much larger i = bisect.bisect_left(self._lengths, size) if i == len(self._lengths): - length = self._roundup(max(self._size, size), mmap.PAGESIZE) - self._size *= 2 - util.info('allocating a new mmap of length %d', length) - arena = Arena(length) - self._arenas.append(arena) - return (arena, 0, length) + return self._new_arena(size) else: length = self._lengths[i] seq = self._len_to_seq[length] @@ -146,8 +202,8 @@ class Heap(object): del self._stop_to_block[(arena, stop)] return block - def _free(self, block): - # free location and try to merge with neighbours + def _add_free_block(self, block): + # make block available and try to merge with its neighbours in the arena (arena, start, stop) = block try: @@ -191,6 +247,14 @@ class Heap(object): return start, stop + def _remove_allocated_block(self, block): + arena, start, stop = block + blocks = self._allocated_blocks[arena] + blocks.remove((start, stop)) + if not blocks: + # Arena is entirely free, discard it from this process + self._discard_arena(arena) + def _free_pending_blocks(self): # Free all the blocks in the pending list - called with the lock held. while True: @@ -198,8 +262,8 @@ class Heap(object): block = self._pending_free_blocks.pop() except IndexError: break - self._allocated_blocks.remove(block) - self._free(block) + self._add_free_block(block) + self._remove_allocated_block(block) def free(self, block): # free a block returned by malloc() @@ -210,7 +274,7 @@ class Heap(object): # immediately, the block is added to a list of blocks to be freed # synchronously sometimes later from malloc() or free(), by calling # _free_pending_blocks() (appending and retrieving from a list is not - # strictly thread-safe but under cPython it's atomic thanks to the GIL). + # strictly thread-safe but under CPython it's atomic thanks to the GIL). if os.getpid() != self._lastpid: raise ValueError( "My pid ({0:n}) is not last pid {1:n}".format( @@ -222,9 +286,10 @@ class Heap(object): else: # we hold the lock try: + self._n_frees += 1 self._free_pending_blocks() - self._allocated_blocks.remove(block) - self._free(block) + self._add_free_block(block) + self._remove_allocated_block(block) finally: self._lock.release() @@ -237,18 +302,21 @@ class Heap(object): if os.getpid() != self._lastpid: self.__init__() # reinitialize after fork with self._lock: + self._n_mallocs += 1 + # allow pending blocks to be marked available self._free_pending_blocks() - size = self._roundup(max(size,1), self._alignment) + size = self._roundup(max(size, 1), self._alignment) (arena, start, stop) = self._malloc(size) - new_stop = start + size - if new_stop < stop: - self._free((arena, new_stop, stop)) - block = (arena, start, new_stop) - self._allocated_blocks.add(block) - return block + real_stop = start + size + if real_stop < stop: + # if the returned block is larger than necessary, mark + # the remainder available + self._add_free_block((arena, real_stop, stop)) + self._allocated_blocks[arena].add((start, real_stop)) + return (arena, start, real_stop) # -# Class representing a chunk of an mmap -- can be inherited by child process +# Class wrapping a block allocated out of a Heap -- can be inherited by child process # class BufferWrapper(object): diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index c6a1f5ca905..20185a9a595 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -3372,11 +3372,25 @@ class _TestHeap(BaseTestCase): ALLOWED_TYPES = ('processes',) + def setUp(self): + super().setUp() + # Make pristine heap for these tests + self.old_heap = multiprocessing.heap.BufferWrapper._heap + multiprocessing.heap.BufferWrapper._heap = multiprocessing.heap.Heap() + + def tearDown(self): + multiprocessing.heap.BufferWrapper._heap = self.old_heap + super().tearDown() + def test_heap(self): iterations = 5000 maxblocks = 50 blocks = [] + # get the heap object + heap = multiprocessing.heap.BufferWrapper._heap + heap._DISCARD_FREE_SPACE_LARGER_THAN = 0 + # create and destroy lots of blocks of different sizes for i in range(iterations): size = int(random.lognormvariate(0, 1) * 1000) @@ -3385,31 +3399,52 @@ class _TestHeap(BaseTestCase): if len(blocks) > maxblocks: i = random.randrange(maxblocks) del blocks[i] - - # get the heap object - heap = multiprocessing.heap.BufferWrapper._heap + del b # verify the state of the heap - all = [] - occupied = 0 - heap._lock.acquire() - self.addCleanup(heap._lock.release) - for L in list(heap._len_to_seq.values()): - for arena, start, stop in L: - all.append((heap._arenas.index(arena), start, stop, - stop-start, 'free')) - for arena, start, stop in heap._allocated_blocks: - all.append((heap._arenas.index(arena), start, stop, - stop-start, 'occupied')) - occupied += (stop-start) + with heap._lock: + all = [] + free = 0 + occupied = 0 + for L in list(heap._len_to_seq.values()): + # count all free blocks in arenas + for arena, start, stop in L: + all.append((heap._arenas.index(arena), start, stop, + stop-start, 'free')) + free += (stop-start) + for arena, arena_blocks in heap._allocated_blocks.items(): + # count all allocated blocks in arenas + for start, stop in arena_blocks: + all.append((heap._arenas.index(arena), start, stop, + stop-start, 'occupied')) + occupied += (stop-start) - all.sort() + self.assertEqual(free + occupied, + sum(arena.size for arena in heap._arenas)) - for i in range(len(all)-1): - (arena, start, stop) = all[i][:3] - (narena, nstart, nstop) = all[i+1][:3] - self.assertTrue((arena != narena and nstart == 0) or - (stop == nstart)) + all.sort() + + for i in range(len(all)-1): + (arena, start, stop) = all[i][:3] + (narena, nstart, nstop) = all[i+1][:3] + if arena != narena: + # Two different arenas + self.assertEqual(stop, heap._arenas[arena].size) # last block + self.assertEqual(nstart, 0) # first block + else: + # Same arena: two adjacent blocks + self.assertEqual(stop, nstart) + + # test free'ing all blocks + random.shuffle(blocks) + while blocks: + blocks.pop() + + self.assertEqual(heap._n_frees, heap._n_mallocs) + self.assertEqual(len(heap._pending_free_blocks), 0) + self.assertEqual(len(heap._arenas), 0) + self.assertEqual(len(heap._allocated_blocks), 0, heap._allocated_blocks) + self.assertEqual(len(heap._len_to_seq), 0) def test_free_from_gc(self): # Check that freeing of blocks by the garbage collector doesn't deadlock diff --git a/Misc/NEWS.d/next/Library/2018-02-23-12-21-41.bpo-32759.M-y9GA.rst b/Misc/NEWS.d/next/Library/2018-02-23-12-21-41.bpo-32759.M-y9GA.rst new file mode 100644 index 00000000000..e9bd326b4af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-02-23-12-21-41.bpo-32759.M-y9GA.rst @@ -0,0 +1 @@ +Free unused arenas in multiprocessing.heap.