bpo-32759: Free unused arenas in multiprocessing.heap (GH-5827)

Large shared arrays allocated using multiprocessing would remain allocated until the process ends.
2018-04-09 17:37:55 +02:00 · 2018-04-09 17:37:55 +02:00 · e4679cd644
parent 2ef65f346a
commit e4679cd644
3 changed files with 148 additions and 44 deletions
--- a/Lib/multiprocessing/heap.py
+++ b/Lib/multiprocessing/heap.py
@ -8,6 +8,7 @@
 #

 import bisect
+from collections import defaultdict
 import mmap
 import os
 import sys
@ -28,6 +29,9 @@ if sys.platform == 'win32':
    import _winapi

    class Arena(object):
+        """
+        A shared memory area backed by anonymous memory (Windows).
+        """

        _rand = tempfile._RandomNameSequence()

@ -52,6 +56,7 @@ if sys.platform == 'win32':

        def __setstate__(self, state):
            self.size, self.name = self._state = state
+            # Reopen existing mmap
            self.buffer = mmap.mmap(-1, self.size, tagname=self.name)
            # XXX Temporarily preventing buildbot failures while determining
            # XXX the correct long-term fix. See issue 23060
@ -60,6 +65,10 @@ if sys.platform == 'win32':
 else:

    class Arena(object):
+        """
+        A shared memory area backed by a temporary file (POSIX).
+        """
+
        if sys.platform == 'linux':
            _dir_candidates = ['/dev/shm']
        else:
@ -69,6 +78,8 @@ else:
            self.size = size
            self.fd = fd
            if fd == -1:
+                # Arena is created anew (if fd != -1, it means we're coming
+                # from rebuild_arena() below)
                self.fd, name = tempfile.mkstemp(
                     prefix='pym-%d-'%os.getpid(),
                     dir=self._choose_dir(size))
@ -103,37 +114,82 @@ else:

 class Heap(object):

+    # Minimum malloc() alignment
    _alignment = 8

+    _DISCARD_FREE_SPACE_LARGER_THAN = 4 * 1024 ** 2  # 4 MB
+    _DOUBLE_ARENA_SIZE_UNTIL = 4 * 1024 ** 2
+
    def __init__(self, size=mmap.PAGESIZE):
        self._lastpid = os.getpid()
        self._lock = threading.Lock()
+        # Current arena allocation size
        self._size = size
+        # A sorted list of available block sizes in arenas
        self._lengths = []
+
+        # Free block management:
+        # - map each block size to a list of `(Arena, start, stop)` blocks
        self._len_to_seq = {}
+        # - map `(Arena, start)` tuple to the `(Arena, start, stop)` block
+        #   starting at that offset
        self._start_to_block = {}
+        # - map `(Arena, stop)` tuple to the `(Arena, start, stop)` block
+        #   ending at that offset
        self._stop_to_block = {}
-        self._allocated_blocks = set()
+
+        # Map arenas to their `(Arena, start, stop)` blocks in use
+        self._allocated_blocks = defaultdict(set)
        self._arenas = []
-        # list of pending blocks to free - see free() comment below
+
+        # List of pending blocks to free - see comment in free() below
        self._pending_free_blocks = []

+        # Statistics
+        self._n_mallocs = 0
+        self._n_frees = 0
+
    @staticmethod
    def _roundup(n, alignment):
        # alignment must be a power of 2
        mask = alignment - 1
        return (n + mask) & ~mask

+    def _new_arena(self, size):
+        # Create a new arena with at least the given *size*
+        length = self._roundup(max(self._size, size), mmap.PAGESIZE)
+        # We carve larger and larger arenas, for efficiency, until we
+        # reach a large-ish size (roughly L3 cache-sized)
+        if self._size < self._DOUBLE_ARENA_SIZE_UNTIL:
+            self._size *= 2
+        util.info('allocating a new mmap of length %d', length)
+        arena = Arena(length)
+        self._arenas.append(arena)
+        return (arena, 0, length)
+
+    def _discard_arena(self, arena):
+        # Possibly delete the given (unused) arena
+        length = arena.size
+        # Reusing an existing arena is faster than creating a new one, so
+        # we only reclaim space if it's large enough.
+        if length < self._DISCARD_FREE_SPACE_LARGER_THAN:
+            return
+        blocks = self._allocated_blocks.pop(arena)
+        assert not blocks
+        del self._start_to_block[(arena, 0)]
+        del self._stop_to_block[(arena, length)]
+        self._arenas.remove(arena)
+        seq = self._len_to_seq[length]
+        seq.remove((arena, 0, length))
+        if not seq:
+            del self._len_to_seq[length]
+            self._lengths.remove(length)
+
    def _malloc(self, size):
        # returns a large enough block -- it might be much larger
        i = bisect.bisect_left(self._lengths, size)
        if i == len(self._lengths):
-            length = self._roundup(max(self._size, size), mmap.PAGESIZE)
-            self._size *= 2
-            util.info('allocating a new mmap of length %d', length)
-            arena = Arena(length)
-            self._arenas.append(arena)
-            return (arena, 0, length)
+            return self._new_arena(size)
        else:
            length = self._lengths[i]
            seq = self._len_to_seq[length]
@ -146,8 +202,8 @@ class Heap(object):
        del self._stop_to_block[(arena, stop)]
        return block

-    def _free(self, block):
-        # free location and try to merge with neighbours
+    def _add_free_block(self, block):
+        # make block available and try to merge with its neighbours in the arena
        (arena, start, stop) = block

        try:
@ -191,6 +247,14 @@ class Heap(object):

        return start, stop

+    def _remove_allocated_block(self, block):
+        arena, start, stop = block
+        blocks = self._allocated_blocks[arena]
+        blocks.remove((start, stop))
+        if not blocks:
+            # Arena is entirely free, discard it from this process
+            self._discard_arena(arena)
+
    def _free_pending_blocks(self):
        # Free all the blocks in the pending list - called with the lock held.
        while True:
@ -198,8 +262,8 @@ class Heap(object):
                block = self._pending_free_blocks.pop()
            except IndexError:
                break
-            self._allocated_blocks.remove(block)
-            self._free(block)
+            self._add_free_block(block)
+            self._remove_allocated_block(block)

    def free(self, block):
        # free a block returned by malloc()
@ -210,7 +274,7 @@ class Heap(object):
        # immediately, the block is added to a list of blocks to be freed
        # synchronously sometimes later from malloc() or free(), by calling
        # _free_pending_blocks() (appending and retrieving from a list is not
-        # strictly thread-safe but under cPython it's atomic thanks to the GIL).
+        # strictly thread-safe but under CPython it's atomic thanks to the GIL).
        if os.getpid() != self._lastpid:
            raise ValueError(
                "My pid ({0:n}) is not last pid {1:n}".format(
@ -222,9 +286,10 @@ class Heap(object):
        else:
            # we hold the lock
            try:
+                self._n_frees += 1
                self._free_pending_blocks()
-                self._allocated_blocks.remove(block)
-                self._free(block)
+                self._add_free_block(block)
+                self._remove_allocated_block(block)
            finally:
                self._lock.release()

@ -237,18 +302,21 @@ class Heap(object):
        if os.getpid() != self._lastpid:
            self.__init__()                     # reinitialize after fork
        with self._lock:
+            self._n_mallocs += 1
+            # allow pending blocks to be marked available
            self._free_pending_blocks()
-            size = self._roundup(max(size,1), self._alignment)
+            size = self._roundup(max(size, 1), self._alignment)
            (arena, start, stop) = self._malloc(size)
-            new_stop = start + size
-            if new_stop < stop:
-                self._free((arena, new_stop, stop))
-            block = (arena, start, new_stop)
-            self._allocated_blocks.add(block)
-            return block
+            real_stop = start + size
+            if real_stop < stop:
+                # if the returned block is larger than necessary, mark
+                # the remainder available
+                self._add_free_block((arena, real_stop, stop))
+            self._allocated_blocks[arena].add((start, real_stop))
+            return (arena, start, real_stop)

 #
-# Class representing a chunk of an mmap -- can be inherited by child process
+# Class wrapping a block allocated out of a Heap -- can be inherited by child process
 #

 class BufferWrapper(object):
--- a/Lib/test/_test_multiprocessing.py
+++ b/Lib/test/_test_multiprocessing.py
@ -3372,11 +3372,25 @@ class _TestHeap(BaseTestCase):

    ALLOWED_TYPES = ('processes',)

+    def setUp(self):
+        super().setUp()
+        # Make pristine heap for these tests
+        self.old_heap = multiprocessing.heap.BufferWrapper._heap
+        multiprocessing.heap.BufferWrapper._heap = multiprocessing.heap.Heap()
+
+    def tearDown(self):
+        multiprocessing.heap.BufferWrapper._heap = self.old_heap
+        super().tearDown()
+
    def test_heap(self):
        iterations = 5000
        maxblocks = 50
        blocks = []

+        # get the heap object
+        heap = multiprocessing.heap.BufferWrapper._heap
+        heap._DISCARD_FREE_SPACE_LARGER_THAN = 0
+
        # create and destroy lots of blocks of different sizes
        for i in range(iterations):
            size = int(random.lognormvariate(0, 1) * 1000)
@ -3385,31 +3399,52 @@ class _TestHeap(BaseTestCase):
            if len(blocks) > maxblocks:
                i = random.randrange(maxblocks)
                del blocks[i]
-
-        # get the heap object
-        heap = multiprocessing.heap.BufferWrapper._heap
+            del b

        # verify the state of the heap
-        all = []
-        occupied = 0
-        heap._lock.acquire()
-        self.addCleanup(heap._lock.release)
-        for L in list(heap._len_to_seq.values()):
-            for arena, start, stop in L:
-                all.append((heap._arenas.index(arena), start, stop,
-                            stop-start, 'free'))
-        for arena, start, stop in heap._allocated_blocks:
-            all.append((heap._arenas.index(arena), start, stop,
-                        stop-start, 'occupied'))
-            occupied += (stop-start)
+        with heap._lock:
+            all = []
+            free = 0
+            occupied = 0
+            for L in list(heap._len_to_seq.values()):
+                # count all free blocks in arenas
+                for arena, start, stop in L:
+                    all.append((heap._arenas.index(arena), start, stop,
+                                stop-start, 'free'))
+                    free += (stop-start)
+            for arena, arena_blocks in heap._allocated_blocks.items():
+                # count all allocated blocks in arenas
+                for start, stop in arena_blocks:
+                    all.append((heap._arenas.index(arena), start, stop,
+                                stop-start, 'occupied'))
+                    occupied += (stop-start)

-        all.sort()
+            self.assertEqual(free + occupied,
+                             sum(arena.size for arena in heap._arenas))

-        for i in range(len(all)-1):
-            (arena, start, stop) = all[i][:3]
-            (narena, nstart, nstop) = all[i+1][:3]
-            self.assertTrue((arena != narena and nstart == 0) or
-                            (stop == nstart))
+            all.sort()
+
+            for i in range(len(all)-1):
+                (arena, start, stop) = all[i][:3]
+                (narena, nstart, nstop) = all[i+1][:3]
+                if arena != narena:
+                    # Two different arenas
+                    self.assertEqual(stop, heap._arenas[arena].size)  # last block
+                    self.assertEqual(nstart, 0)         # first block
+                else:
+                    # Same arena: two adjacent blocks
+                    self.assertEqual(stop, nstart)
+
+        # test free'ing all blocks
+        random.shuffle(blocks)
+        while blocks:
+            blocks.pop()
+
+        self.assertEqual(heap._n_frees, heap._n_mallocs)
+        self.assertEqual(len(heap._pending_free_blocks), 0)
+        self.assertEqual(len(heap._arenas), 0)
+        self.assertEqual(len(heap._allocated_blocks), 0, heap._allocated_blocks)
+        self.assertEqual(len(heap._len_to_seq), 0)

    def test_free_from_gc(self):
        # Check that freeing of blocks by the garbage collector doesn't deadlock
--- a/Misc/NEWS.d/next/Library/2018-02-23-12-21-41.bpo-32759.M-y9GA.rst
+++ b/Misc/NEWS.d/next/Library/2018-02-23-12-21-41.bpo-32759.M-y9GA.rst
@ -0,0 +1 @@
+Free unused arenas in multiprocessing.heap.