diff --git a/Doc/library/debug.rst b/Doc/library/debug.rst index c69fb1c8465..88a2fa62a56 100644 --- a/Doc/library/debug.rst +++ b/Doc/library/debug.rst @@ -15,3 +15,4 @@ allowing you to identify bottlenecks in your programs. profile.rst timeit.rst trace.rst + tracemalloc.rst diff --git a/Doc/library/tracemalloc.rst b/Doc/library/tracemalloc.rst new file mode 100644 index 00000000000..2edf7bb9d59 --- /dev/null +++ b/Doc/library/tracemalloc.rst @@ -0,0 +1,608 @@ +:mod:`tracemalloc` --- Trace memory allocations +=============================================== + +.. module:: tracemalloc + :synopsis: Trace memory allocations. + +The tracemalloc module is a debug tool to trace memory blocks allocated by +Python. It provides the following information: + +* Traceback where an object was allocated +* Statistics on allocated memory blocks per filename and per line number: + total size, number and average size of allocated memory blocks +* Compute the differences between two snapshots to detect memory leaks + +To trace most memory blocks allocated by Python, the module should be started +as early as possible by setting the :envvar:`PYTHONTRACEMALLOC` environment +variable to ``1``, or by using :option:`-X` ``tracemalloc`` command line +option. The :func:`tracemalloc.start` function can be called at runtime to +start tracing Python memory allocations. + +By default, a trace of an allocated memory block only stores the most recent +frame (1 frame). To store 25 frames at startup: set the +:envvar:`PYTHONTRACEMALLOC` environment variable to ``25``, or use the +:option:`-X` ``tracemalloc=25`` command line option. The +:func:`set_traceback_limit` function can be used at runtime to set the limit. + +.. versionadded:: 3.4 + + +Examples +======== + +Display the top 10 +------------------ + +Display the 10 files allocating the most memory:: + + import tracemalloc + + tracemalloc.start() + + # ... run your application ... + + snapshot = tracemalloc.take_snapshot() + top_stats = snapshot.statistics('lineno') + + print("[ Top 10 ]") + for stat in top_stats[:10]: + print(stat) + + +Example of output of the Python test suite:: + + [ Top 10 ] + :716: size=4855 KiB, count=39328, average=126 B + :284: size=521 KiB, count=3199, average=167 B + /usr/lib/python3.4/collections/__init__.py:368: size=244 KiB, count=2315, average=108 B + /usr/lib/python3.4/unittest/case.py:381: size=185 KiB, count=779, average=243 B + /usr/lib/python3.4/unittest/case.py:402: size=154 KiB, count=378, average=416 B + /usr/lib/python3.4/abc.py:133: size=88.7 KiB, count=347, average=262 B + :1446: size=70.4 KiB, count=911, average=79 B + :1454: size=52.0 KiB, count=25, average=2131 B + :5: size=49.7 KiB, count=148, average=344 B + /usr/lib/python3.4/sysconfig.py:411: size=48.0 KiB, count=1, average=48.0 KiB + +We can see that Python loaded ``4.8 MiB`` data (bytecode and constants) from +modules and that the :mod:`collections` module allocated ``244 KiB`` to build +:class:`~collections.namedtuple` types. + +See :meth:`Snapshot.statistics` for more options. + + +Compute differences +------------------- + +Take two snapshots and display the differences:: + + import tracemalloc + tracemalloc.start() + # ... start your application ... + + snapshot1 = tracemalloc.take_snapshot() + # ... call the function leaking memory ... + snapshot2 = tracemalloc.take_snapshot() + + top_stats = snapshot2.compare_to(snapshot1, 'lineno') + + print("[ Top 10 differences ]") + for stat in top_stats[:10]: + print(stat) + +Example of output before/after running some tests of the Python test suite:: + + [ Top 10 differences ] + :716: size=8173 KiB (+4428 KiB), count=71332 (+39369), average=117 B + /usr/lib/python3.4/linecache.py:127: size=940 KiB (+940 KiB), count=8106 (+8106), average=119 B + /usr/lib/python3.4/unittest/case.py:571: size=298 KiB (+298 KiB), count=589 (+589), average=519 B + :284: size=1005 KiB (+166 KiB), count=7423 (+1526), average=139 B + /usr/lib/python3.4/mimetypes.py:217: size=112 KiB (+112 KiB), count=1334 (+1334), average=86 B + /usr/lib/python3.4/http/server.py:848: size=96.0 KiB (+96.0 KiB), count=1 (+1), average=96.0 KiB + /usr/lib/python3.4/inspect.py:1465: size=83.5 KiB (+83.5 KiB), count=109 (+109), average=784 B + /usr/lib/python3.4/unittest/mock.py:491: size=77.7 KiB (+77.7 KiB), count=143 (+143), average=557 B + /usr/lib/python3.4/urllib/parse.py:476: size=71.8 KiB (+71.8 KiB), count=969 (+969), average=76 B + /usr/lib/python3.4/contextlib.py:38: size=67.2 KiB (+67.2 KiB), count=126 (+126), average=546 B + +We can see that Python loaded ``4.4 MiB`` of new data (bytecode and constants) +from modules (on of total of ``8.2 MiB``) and that the :mod:`linecache` module +cached ``940 KiB`` of Python source code to format tracebacks. + +If the system has little free memory, snapshots can be written on disk using +the :meth:`Snapshot.dump` method to analyze the snapshot offline. Then use the +:meth:`Snapshot.load` method reload the snapshot. + + +Get the traceback of a memory block +----------------------------------- + +Code to display the traceback of the biggest memory block:: + + import linecache + import tracemalloc + + tracemalloc.set_traceback_limit(25) + tracemalloc.start() + + # ... run your application ... + + snapshot = tracemalloc.take_snapshot() + top_stats = snapshot.statistics('traceback') + + # pick the biggest memory block + stat = top_stats[0] + print("%s memory blocks: %.1f KiB" % (stat.count, stat.size / 1024)) + for frame in stat.traceback: + print(' File "%s", line %s' % (frame.filename, frame.lineno)) + line = linecache.getline(frame.filename, frame.lineno) + line = line.strip() + if line: + print(' ' + line) + +Example of output of the Python test suite (traceback limited to 25 frames):: + + 903 memory blocks: 870.1 KiB + File "", line 716 + File "", line 1036 + File "", line 934 + File "", line 1068 + File "", line 619 + File "", line 1581 + File "", line 1614 + File "/usr/lib/python3.4/doctest.py", line 101 + import pdb + File "", line 284 + File "", line 938 + File "", line 1068 + File "", line 619 + File "", line 1581 + File "", line 1614 + File "/usr/lib/python3.4/test/support/__init__.py", line 1728 + import doctest + File "/usr/lib/python3.4/test/test_pickletools.py", line 21 + support.run_doctest(pickletools) + File "/usr/lib/python3.4/test/regrtest.py", line 1276 + test_runner() + File "/usr/lib/python3.4/test/regrtest.py", line 976 + display_failure=not verbose) + File "/usr/lib/python3.4/test/regrtest.py", line 761 + match_tests=ns.match_tests) + File "/usr/lib/python3.4/test/regrtest.py", line 1563 + main() + File "/usr/lib/python3.4/test/__main__.py", line 3 + regrtest.main_in_temp_cwd() + File "/usr/lib/python3.4/runpy.py", line 73 + exec(code, run_globals) + File "/usr/lib/python3.4/runpy.py", line 160 + "__main__", fname, loader, pkg_name) + +We can see that most memory was allocated in the :mod:`importlib` module to +load data (bytecode and constants) from modules: ``870 KiB``. The traceback is +where the :mod:`importlib` loaded data for the the last time: on the ``import +pdb`` line of the :mod:`doctest` module. The traceback may change if a new +module is loaded. + + +Pretty top +---------- + +Code to display the 10 lines allocating the most memory with a pretty output, +ignoring ```` and ```` files:: + + import os + import tracemalloc + + def display_top(snapshot, group_by='lineno', limit=10): + snapshot = snapshot.filter_traces(( + tracemalloc.Filter(False, ""), + tracemalloc.Filter(False, ""), + )) + top_stats = snapshot.statistics(group_by) + + print("Top %s lines" % limit) + for index, stat in enumerate(top_stats[:limit], 1): + frame = stat.traceback[0] + # replace "/path/to/module/file.py" with "module/file.py" + filename = os.sep.join(frame.filename.split(os.sep)[-2:]) + print("#%s: %s:%s: %.1f KiB" + % (index, filename, frame.lineno, + stat.size / 1024)) + + other = top_stats[limit:] + if other: + size = sum(stat.size for stat in other) + print("%s other: %.1f KiB" % (len(other), size / 1024)) + total = sum(stat.size for stat in top_stats) + print("Total allocated size: %.1f KiB" % (total / 1024)) + + tracemalloc.start() + + # ... run your application ... + + snapshot = tracemalloc.take_snapshot() + display_top(snapshot, 10) + +Example of output of the Python test suite:: + + 2013-11-08 14:16:58.149320: Top 10 lines + #1: collections/__init__.py:368: 291.9 KiB + #2: Lib/doctest.py:1291: 200.2 KiB + #3: unittest/case.py:571: 160.3 KiB + #4: Lib/abc.py:133: 99.8 KiB + #5: urllib/parse.py:476: 71.8 KiB + #6: :5: 62.7 KiB + #7: Lib/base64.py:140: 59.8 KiB + #8: Lib/_weakrefset.py:37: 51.8 KiB + #9: collections/__init__.py:362: 50.6 KiB + #10: test/test_site.py:56: 48.0 KiB + 7496 other: 4161.9 KiB + Total allocated size: 5258.8 KiB + +See :meth:`Snapshot.statistics` for more options. + + +API +=== + +Functions +--------- + +.. function:: clear_traces() + + Clear traces of memory blocks allocated by Python. + + See also :func:`stop`. + + +.. function:: get_object_traceback(obj) + + Get the traceback where the Python object *obj* was allocated. + Return a :class:`Traceback` instance, or ``None`` if the :mod:`tracemalloc` + module is not tracing memory allocations or did not trace the allocation of + the object. + + See also :func:`gc.get_referrers` and :func:`sys.getsizeof` functions. + + +.. function:: get_traceback_limit() + + Get the maximum number of frames stored in the traceback of a trace. + + By default, a trace of a memory block only stores the most recent + frame: the limit is ``1``. + + Use the :func:`set_traceback_limit` function to change the limit. + + +.. function:: get_traced_memory() + + Get the current size and maximum size of memory blocks traced by the + :mod:`tracemalloc` module as a tuple: ``(size: int, max_size: int)``. + + +.. function:: get_tracemalloc_memory() + + Get the memory usage in bytes of the :mod:`tracemalloc` module used to store + traces of memory blocks. + Return an :class:`int`. + + +.. function:: is_tracing() + + ``True`` if the :mod:`tracemalloc` module is tracing Python memory + allocations, ``False`` otherwise. + + See also :func:`start` and :func:`stop` functions. + + +.. function:: set_traceback_limit(nframe: int) + + Set the maximum number of frames stored in the traceback of a trace. + *nframe* must be greater or equal to ``1``. + + Storing more than ``1`` frame is only useful to compute statistics grouped + by ``'traceback'`` or to compute cumulative statistics: see the + :meth:`Snapshot.compare_to` and :meth:`Snapshot.statistics` methods. + + Storing more frames increases the memory and CPU overhead of the + :mod:`tracemalloc` module. Use the :func:`get_tracemalloc_memory` function + to measure how much memory is used by the :mod:`tracemalloc` module. + + The :envvar:`PYTHONTRACEMALLOC` environment variable + (``PYTHONTRACEMALLOC=NFRAME``) and the :option:`-X` ``tracemalloc=NFRAME`` + command line option can be used to set the limit at startup. + + Use the :func:`get_traceback_limit` function to get the current limit. + + +.. function:: start() + + Start tracing Python memory allocations: install hooks on Python memory + allocators. + + See also :func:`stop` and :func:`is_tracing` functions. + + +.. function:: stop() + + Stop tracing Python memory allocations: uninstall hooks on Python memory + allocators. Clear also traces of memory blocks allocated by Python + + Call :func:`take_snapshot` function to take a snapshot of traces before + clearing them. + + See also :func:`start` and :func:`is_tracing` functions. + + +.. function:: take_snapshot() + + Take a snapshot of traces of memory blocks allocated by Python. Return a new + :class:`Snapshot` instance. + + The snapshot does not include memory blocks allocated before the + :mod:`tracemalloc` module started to trace memory allocations. + + Tracebacks of traces are limited to :func:`get_traceback_limit` frames. Use + :func:`set_traceback_limit` to store more frames. + + The :mod:`tracemalloc` module must be tracing memory allocations to take a + snapshot, see the the :func:`start` function. + + See also the :func:`get_object_traceback` function. + + +Filter +------ + +.. class:: Filter(inclusive: bool, filename_pattern: str, lineno: int=None, all_frames: bool=False) + + Filter on traces of memory blocks. + + See the :func:`fnmatch.fnmatch` function for the syntax of + *filename_pattern*. The ``'.pyc'`` and ``'.pyo'`` file extensions are + replaced with ``'.py'``. + + Examples: + + * ``Filter(True, subprocess.__file__)`` only includes traces of the + :mod:`subprocess` module + * ``Filter(False, tracemalloc.__file__)`` excludes traces of the + :mod:`tracemalloc` module + * ``Filter(False, "")`` excludes empty tracebacks + + .. attribute:: inclusive + + If *inclusive* is ``True`` (include), only trace memory blocks allocated + in a file with a name matching :attr:`filename_pattern` at line number + :attr:`lineno`. + + If *inclusive* is ``False`` (exclude), ignore memory blocks allocated in + a file with a name matching :attr:`filename_pattern` at line number + :attr:`lineno`. + + .. attribute:: lineno + + Line number (``int``) of the filter. If *lineno* is ``None``, the filter + matches any line number. + + .. attribute:: filename_pattern + + Filename pattern of the filter (``str``). + + .. attribute:: all_frames + + If *all_frames* is ``True``, all frames of the traceback are checked. If + *all_frames* is ``False``, only the most recent frame is checked. + + This attribute is ignored if the traceback limit is less than ``2``. See + the :func:`get_traceback_limit` function and + :attr:`Snapshot.traceback_limit` attribute. + + +Frame +----- + +.. class:: Frame + + Frame of a traceback. + + The :class:`Traceback` class is a sequence of :class:`Frame` instances. + + .. attribute:: filename + + Filename (``str``). + + .. attribute:: lineno + + Line number (``int``). + + +Snapshot +-------- + +.. class:: Snapshot + + Snapshot of traces of memory blocks allocated by Python. + + The :func:`take_snapshot` function creates a snapshot instance. + + .. method:: compare_to(old_snapshot: Snapshot, group_by: str, cumulative: bool=False) + + Compute the differences with an old snapshot. Get statistics as a sorted + list of :class:`StatisticDiff` instances grouped by *group_by*. + + See the :meth:`statistics` method for *group_by* and *cumulative* + parameters. + + The result is sorted from the biggest to the smallest by: absolute value + of :attr:`StatisticDiff.size_diff`, :attr:`StatisticDiff.size`, absolute + value of :attr:`StatisticDiff.count_diff`, :attr:`Statistic.count` and + then by :attr:`StatisticDiff.traceback`. + + + .. method:: dump(filename) + + Write the snapshot into a file. + + Use :meth:`load` to reload the snapshot. + + + .. method:: filter_traces(filters) + + Create a new :class:`Snapshot` instance with a filtered :attr:`traces` + sequence, *filters* is a list of :class:`Filter` instances. If *filters* + is an empty list, return a new :class:`Snapshot` instance with a copy of + the traces. + + All inclusive filters are applied at once, a trace is ignored if no + inclusive filters match it. A trace is ignored if at least one exclusive + filter matchs it. + + + .. classmethod:: load(filename) + + Load a snapshot from a file. + + See also :meth:`dump`. + + + .. method:: statistics(group_by: str, cumulative: bool=False) + + Get statistics as a sorted list of :class:`Statistic` instances grouped + by *group_by*: + + ===================== ======================== + group_by description + ===================== ======================== + ``'filename'`` filename + ``'lineno'`` filename and line number + ``'traceback'`` traceback + ===================== ======================== + + If *cumulative* is ``True``, cumulate size and count of memory blocks of + all frames of the traceback of a trace, not only the most recent frame. + The cumulative mode can only be used with *group_by* equals to + ``'filename'`` and ``'lineno'`` and :attr:`traceback_limit` greater than + ``1``. + + The result is sorted from the biggest to the smallest by: + :attr:`Statistic.size`, :attr:`Statistic.count` and then by + :attr:`Statistic.traceback`. + + + .. attribute:: traceback_limit + + Maximum number of frames stored in the traceback of :attr:`traces`: + result of the :func:`get_traceback_limit` when the snapshot was taken. + + .. attribute:: traces + + Traces of all memory blocks allocated by Python: sequence of + :class:`Trace` instances. + + The sequence has an undefined order. Use the :meth:`Snapshot.statistics` + method to get a sorted list of statistics. + + +Statistic +--------- + +.. class:: Statistic + + Statistic on memory allocations. + + :func:`Snapshot.statistics` returns a list of :class:`Statistic` instances. + + See also the :class:`StatisticDiff` class. + + .. attribute:: count + + Number of memory blocks (``int``). + + .. attribute:: size + + Total size of memory blocks in bytes (``int``). + + .. attribute:: traceback + + Traceback where the memory block was allocated, :class:`Traceback` + instance. + + +StatisticDiff +------------- + +.. class:: StatisticDiff + + Statistic difference on memory allocations between an old and a new + :class:`Snapshot` instance. + + :func:`Snapshot.compare_to` returns a list of :class:`StatisticDiff` + instances. See also the :class:`Statistic` class. + + .. attribute:: count + + Number of memory blocks in the new snapshot (``int``): ``0`` if + the memory blocks have been released in the new snapshot. + + .. attribute:: count_diff + + Difference of number of memory blocks between the old and the new + snapshots (``int``): ``0`` if the memory blocks have been allocated in + the new snapshot. + + .. attribute:: size + + Total size of memory blocks in bytes in the new snapshot (``int``): + ``0`` if the memory blocks have been released in the new snapshot. + + .. attribute:: size_diff + + Difference of total size of memory blocks in bytes between the old and + the new snapshots (``int``): ``0`` if the memory blocks have been + allocated in the new snapshot. + + .. attribute:: traceback + + Traceback where the memory blocks were allocated, :class:`Traceback` + instance. + + +Trace +----- + +.. class:: Trace + + Trace of a memory block. + + The :attr:`Snapshot.traces` attribute is a sequence of :class:`Trace` + instances. + + .. attribute:: size + + Size of the memory block in bytes (``int``). + + .. attribute:: traceback + + Traceback where the memory block was allocated, :class:`Traceback` + instance. + + +Traceback +--------- + +.. class:: Traceback + + Sequence of :class:`Frame` instances sorted from the most recent frame to + the oldest frame. + + A traceback contains at least ``1`` frame. If the ``tracemalloc`` module + failed to get a frame, the filename ``""`` at line number ``0`` is + used. + + When a snapshot is taken, tracebacks of traces are limited to + :func:`get_traceback_limit` frames. See the :func:`take_snapshot` function. + + The :attr:`Trace.traceback` attribute is an instance of :class:`Traceback` + instance. + + diff --git a/Doc/license.rst b/Doc/license.rst index 5e6ed26ece5..ddc69b839d7 100644 --- a/Doc/license.rst +++ b/Doc/license.rst @@ -893,3 +893,44 @@ used for the build:: Jean-loup Gailly Mark Adler jloup@gzip.org madler@alumni.caltech.edu + +cfuhash +------- + +The implementtation of the hash table used by the :mod:`tracemalloc` is based +on the cfuhash project:: + + Copyright (c) 2005 Don Owens + All rights reserved. + + This code is released under the BSD license: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index e8a329e3b67..cfd604565f8 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -376,11 +376,15 @@ Miscellaneous options .. cmdoption:: -X Reserved for various implementation-specific options. CPython currently - defines two possible values: + defines the following possible values: * ``-X faulthandler`` to enable :mod:`faulthandler`; * ``-X showrefcount`` to enable the output of the total reference count and memory blocks (only works on debug builds); + * ``-X tracemalloc`` to enable :mod:`tracemalloc`. + * ``-X tracemalloc=NFRAME`` to enable :mod:`tracemalloc`, *NFRAME* is the + maximum number of frames stored in a trace: see the + :func:`tracemalloc.set_traceback_limit` function. It also allows to pass arbitrary values and retrieve them through the :data:`sys._xoptions` dictionary. @@ -392,7 +396,7 @@ Miscellaneous options The ``-X faulthandler`` option. .. versionadded:: 3.4 - The ``-X showrefcount`` option. + The ``-X showrefcount`` and ``-X tracemalloc`` options. Options you shouldn't use @@ -594,6 +598,16 @@ conflict. .. versionadded:: 3.3 +.. envvar:: PYTHONTRACEMALLOC + + If this environment variable is set to a non-empty string, all memory + allocations made by Python are traced by the :mod:`tracemalloc` module. + The value of the variable is the maximum number of frames stored in a trace: + see the :func:`tracemalloc.set_traceback_limit` function. + + .. versionadded:: 3.4 + + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 87c039aa023..a242a4727d1 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2154,3 +2154,22 @@ def patch(test_instance, object_to_patch, attr_name, new_value): # actually override the attribute setattr(object_to_patch, attr_name, new_value) + + +def run_in_subinterp(code): + """ + Run code in a subinterpreter. Raise unittest.SkipTest if the tracemalloc + module is enabled. + """ + # Issue #10915, #15751: PyGILState_*() functions don't work with + # sub-interpreters, the tracemalloc module uses these functions internally + try: + import tracemalloc + except ImportError: + pass + else: + if tracemalloc.is_tracing(): + raise unittest.SkipTest("run_in_subinterp() cannot be used " + "if tracemalloc module is tracing " + "memory allocations") + return _testcapi.run_in_subinterp(code) diff --git a/Lib/test/test_atexit.py b/Lib/test/test_atexit.py index b641015b706..84644f17a88 100644 --- a/Lib/test/test_atexit.py +++ b/Lib/test/test_atexit.py @@ -158,7 +158,7 @@ class SubinterpreterTest(unittest.TestCase): atexit.register(f) del atexit """ - ret = _testcapi.run_in_subinterp(code) + ret = support.run_in_subinterp(code) self.assertEqual(ret, 0) self.assertEqual(atexit._ncallbacks(), n) @@ -173,7 +173,7 @@ class SubinterpreterTest(unittest.TestCase): atexit.register(f) atexit.__atexit = atexit """ - ret = _testcapi.run_in_subinterp(code) + ret = support.run_in_subinterp(code) self.assertEqual(ret, 0) self.assertEqual(atexit._ncallbacks(), n) diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py index d37c0578a3e..000079e4a6b 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py @@ -205,7 +205,7 @@ class SubinterpreterTest(unittest.TestCase): pickle.dump(id(builtins), f) """.format(w) with open(r, "rb") as f: - ret = _testcapi.run_in_subinterp(code) + ret = support.run_in_subinterp(code) self.assertEqual(ret, 0) self.assertNotEqual(pickle.load(f), id(sys.modules)) self.assertNotEqual(pickle.load(f), id(builtins)) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 66eace021e6..a84577cc0f0 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -853,7 +853,7 @@ class SubinterpThreadingTests(BaseTestCase): os.write(%d, b"x") threading.Thread(target=f).start() """ % (w,) - ret = _testcapi.run_in_subinterp(code) + ret = test.support.run_in_subinterp(code) self.assertEqual(ret, 0) # The thread was joined properly. self.assertEqual(os.read(r, 1), b"x") @@ -885,7 +885,7 @@ class SubinterpThreadingTests(BaseTestCase): os.write(%d, b"x") threading.Thread(target=f).start() """ % (w,) - ret = _testcapi.run_in_subinterp(code) + ret = test.support.run_in_subinterp(code) self.assertEqual(ret, 0) # The thread was joined properly. self.assertEqual(os.read(r, 1), b"x") diff --git a/Lib/test/test_tracemalloc.py b/Lib/test/test_tracemalloc.py new file mode 100644 index 00000000000..ecb5aee184f --- /dev/null +++ b/Lib/test/test_tracemalloc.py @@ -0,0 +1,797 @@ +import _tracemalloc +import contextlib +import datetime +import os +import sys +import tracemalloc +import unittest +from unittest.mock import patch +from test.script_helper import assert_python_ok, assert_python_failure +from test import support +try: + import threading +except ImportError: + threading = None + +EMPTY_STRING_SIZE = sys.getsizeof(b'') + +def get_frames(nframe, lineno_delta): + frames = [] + frame = sys._getframe(1) + for index in range(nframe): + code = frame.f_code + lineno = frame.f_lineno + lineno_delta + frames.append((code.co_filename, lineno)) + lineno_delta = 0 + frame = frame.f_back + if frame is None: + break + return tuple(frames) + +def allocate_bytes(size): + nframe = tracemalloc.get_traceback_limit() + bytes_len = (size - EMPTY_STRING_SIZE) + frames = get_frames(nframe, 1) + data = b'x' * bytes_len + return data, tracemalloc.Traceback(frames) + +def create_snapshots(): + traceback_limit = 2 + + raw_traces = [ + (10, (('a.py', 2), ('b.py', 4))), + (10, (('a.py', 2), ('b.py', 4))), + (10, (('a.py', 2), ('b.py', 4))), + + (2, (('a.py', 5), ('b.py', 4))), + + (66, (('b.py', 1),)), + + (7, (('', 0),)), + ] + snapshot = tracemalloc.Snapshot(raw_traces, traceback_limit) + + raw_traces2 = [ + (10, (('a.py', 2), ('b.py', 4))), + (10, (('a.py', 2), ('b.py', 4))), + (10, (('a.py', 2), ('b.py', 4))), + + (2, (('a.py', 5), ('b.py', 4))), + (5000, (('a.py', 5), ('b.py', 4))), + + (400, (('c.py', 578),)), + ] + snapshot2 = tracemalloc.Snapshot(raw_traces2, traceback_limit) + + return (snapshot, snapshot2) + +def frame(filename, lineno): + return tracemalloc._Frame((filename, lineno)) + +def traceback(*frames): + return tracemalloc.Traceback(frames) + +def traceback_lineno(filename, lineno): + return traceback((filename, lineno)) + +def traceback_filename(filename): + return traceback_lineno(filename, 0) + + +class TestTracemallocEnabled(unittest.TestCase): + def setUp(self): + if tracemalloc.is_tracing(): + self.skipTest("tracemalloc must be stopped before the test") + + tracemalloc.set_traceback_limit(1) + tracemalloc.start() + + def tearDown(self): + tracemalloc.stop() + + def test_get_tracemalloc_memory(self): + data = [allocate_bytes(123) for count in range(1000)] + size = tracemalloc.get_tracemalloc_memory() + self.assertGreaterEqual(size, 0) + + tracemalloc.clear_traces() + size2 = tracemalloc.get_tracemalloc_memory() + self.assertGreaterEqual(size2, 0) + self.assertLessEqual(size2, size) + + def test_get_object_traceback(self): + tracemalloc.clear_traces() + obj_size = 12345 + obj, obj_traceback = allocate_bytes(obj_size) + traceback = tracemalloc.get_object_traceback(obj) + self.assertEqual(traceback, obj_traceback) + + def test_set_traceback_limit(self): + obj_size = 10 + + nframe = tracemalloc.get_traceback_limit() + self.addCleanup(tracemalloc.set_traceback_limit, nframe) + + self.assertRaises(ValueError, tracemalloc.set_traceback_limit, -1) + + tracemalloc.clear_traces() + tracemalloc.set_traceback_limit(10) + obj2, obj2_traceback = allocate_bytes(obj_size) + traceback = tracemalloc.get_object_traceback(obj2) + self.assertEqual(len(traceback), 10) + self.assertEqual(traceback, obj2_traceback) + + tracemalloc.clear_traces() + tracemalloc.set_traceback_limit(1) + obj, obj_traceback = allocate_bytes(obj_size) + traceback = tracemalloc.get_object_traceback(obj) + self.assertEqual(len(traceback), 1) + self.assertEqual(traceback, obj_traceback) + + + def find_trace(self, traces, traceback): + for trace in traces: + if trace[1] == traceback._frames: + return trace + + self.fail("trace not found") + + def test_get_traces(self): + tracemalloc.clear_traces() + obj_size = 12345 + obj, obj_traceback = allocate_bytes(obj_size) + + traces = tracemalloc._get_traces() + trace = self.find_trace(traces, obj_traceback) + + self.assertIsInstance(trace, tuple) + size, traceback = trace + self.assertEqual(size, obj_size) + self.assertEqual(traceback, obj_traceback._frames) + + tracemalloc.stop() + self.assertEqual(tracemalloc._get_traces(), []) + + + def test_get_traces_intern_traceback(self): + # dummy wrappers to get more useful and identical frames in the traceback + def allocate_bytes2(size): + return allocate_bytes(size) + def allocate_bytes3(size): + return allocate_bytes2(size) + def allocate_bytes4(size): + return allocate_bytes3(size) + + # Ensure that two identical tracebacks are not duplicated + tracemalloc.clear_traces() + tracemalloc.set_traceback_limit(4) + obj_size = 123 + obj1, obj1_traceback = allocate_bytes4(obj_size) + obj2, obj2_traceback = allocate_bytes4(obj_size) + + traces = tracemalloc._get_traces() + + trace1 = self.find_trace(traces, obj1_traceback) + trace2 = self.find_trace(traces, obj2_traceback) + size1, traceback1 = trace1 + size2, traceback2 = trace2 + self.assertEqual(traceback2, traceback1) + self.assertIs(traceback2, traceback1) + + def test_get_traced_memory(self): + # Python allocates some internals objects, so the test must tolerate + # a small difference between the expected size and the real usage + max_error = 2048 + + # allocate one object + obj_size = 1024 * 1024 + tracemalloc.clear_traces() + obj, obj_traceback = allocate_bytes(obj_size) + size, max_size = tracemalloc.get_traced_memory() + self.assertGreaterEqual(size, obj_size) + self.assertGreaterEqual(max_size, size) + + self.assertLessEqual(size - obj_size, max_error) + self.assertLessEqual(max_size - size, max_error) + + # destroy the object + obj = None + size2, max_size2 = tracemalloc.get_traced_memory() + self.assertLess(size2, size) + self.assertGreaterEqual(size - size2, obj_size - max_error) + self.assertGreaterEqual(max_size2, max_size) + + # clear_traces() must reset traced memory counters + tracemalloc.clear_traces() + self.assertEqual(tracemalloc.get_traced_memory(), (0, 0)) + + # allocate another object + obj, obj_traceback = allocate_bytes(obj_size) + size, max_size = tracemalloc.get_traced_memory() + self.assertGreater(size, 0) + + # stop() rests also traced memory counters + tracemalloc.stop() + self.assertEqual(tracemalloc.get_traced_memory(), (0, 0)) + + def test_clear_traces(self): + obj, obj_traceback = allocate_bytes(123) + traceback = tracemalloc.get_object_traceback(obj) + self.assertIsNotNone(traceback) + + tracemalloc.clear_traces() + traceback2 = tracemalloc.get_object_traceback(obj) + self.assertIsNone(traceback2) + + def test_is_tracing(self): + tracemalloc.stop() + self.assertFalse(tracemalloc.is_tracing()) + + tracemalloc.start() + self.assertTrue(tracemalloc.is_tracing()) + + def test_snapshot(self): + obj, source = allocate_bytes(123) + + # take a snapshot + snapshot = tracemalloc.take_snapshot() + + # write on disk + snapshot.dump(support.TESTFN) + self.addCleanup(support.unlink, support.TESTFN) + + # load from disk + snapshot2 = tracemalloc.Snapshot.load(support.TESTFN) + self.assertEqual(snapshot2.traces, snapshot.traces) + + # tracemalloc must be tracing memory allocations to take a snapshot + tracemalloc.stop() + with self.assertRaises(RuntimeError) as cm: + tracemalloc.take_snapshot() + self.assertEqual(str(cm.exception), + "the tracemalloc module must be tracing memory " + "allocations to take a snapshot") + + def test_snapshot_save_attr(self): + # take a snapshot with a new attribute + snapshot = tracemalloc.take_snapshot() + snapshot.test_attr = "new" + snapshot.dump(support.TESTFN) + self.addCleanup(support.unlink, support.TESTFN) + + # load() should recreates the attribute + snapshot2 = tracemalloc.Snapshot.load(support.TESTFN) + self.assertEqual(snapshot2.test_attr, "new") + + def fork_child(self): + if not tracemalloc.is_tracing(): + return 2 + + obj_size = 12345 + obj, obj_traceback = allocate_bytes(obj_size) + traceback = tracemalloc.get_object_traceback(obj) + if traceback is None: + return 3 + + # everything is fine + return 0 + + @unittest.skipUnless(hasattr(os, 'fork'), 'need os.fork()') + def test_fork(self): + # check that tracemalloc is still working after fork + pid = os.fork() + if not pid: + # child + exitcode = 1 + try: + exitcode = self.fork_child() + finally: + os._exit(exitcode) + else: + pid2, status = os.waitpid(pid, 0) + self.assertTrue(os.WIFEXITED(status)) + exitcode = os.WEXITSTATUS(status) + self.assertEqual(exitcode, 0) + + +class TestSnapshot(unittest.TestCase): + maxDiff = 4000 + + def test_create_snapshot(self): + raw_traces = [(5, (('a.py', 2),))] + + with contextlib.ExitStack() as stack: + stack.enter_context(patch.object(tracemalloc, 'is_tracing', + return_value=True)) + stack.enter_context(patch.object(tracemalloc, 'get_traceback_limit', + return_value=5)) + stack.enter_context(patch.object(tracemalloc, '_get_traces', + return_value=raw_traces)) + + snapshot = tracemalloc.take_snapshot() + self.assertEqual(snapshot.traceback_limit, 5) + self.assertEqual(len(snapshot.traces), 1) + trace = snapshot.traces[0] + self.assertEqual(trace.size, 5) + self.assertEqual(len(trace.traceback), 1) + self.assertEqual(trace.traceback[0].filename, 'a.py') + self.assertEqual(trace.traceback[0].lineno, 2) + + def test_filter_traces(self): + snapshot, snapshot2 = create_snapshots() + filter1 = tracemalloc.Filter(False, "b.py") + filter2 = tracemalloc.Filter(True, "a.py", 2) + filter3 = tracemalloc.Filter(True, "a.py", 5) + + original_traces = list(snapshot.traces._traces) + + # exclude b.py + snapshot3 = snapshot.filter_traces((filter1,)) + self.assertEqual(snapshot3.traces._traces, [ + (10, (('a.py', 2), ('b.py', 4))), + (10, (('a.py', 2), ('b.py', 4))), + (10, (('a.py', 2), ('b.py', 4))), + (2, (('a.py', 5), ('b.py', 4))), + (7, (('', 0),)), + ]) + + # filter_traces() must not touch the original snapshot + self.assertEqual(snapshot.traces._traces, original_traces) + + # only include two lines of a.py + snapshot4 = snapshot3.filter_traces((filter2, filter3)) + self.assertEqual(snapshot4.traces._traces, [ + (10, (('a.py', 2), ('b.py', 4))), + (10, (('a.py', 2), ('b.py', 4))), + (10, (('a.py', 2), ('b.py', 4))), + (2, (('a.py', 5), ('b.py', 4))), + ]) + + # No filter: just duplicate the snapshot + snapshot5 = snapshot.filter_traces(()) + self.assertIsNot(snapshot5, snapshot) + self.assertIsNot(snapshot5.traces, snapshot.traces) + self.assertEqual(snapshot5.traces, snapshot.traces) + + def test_snapshot_group_by_line(self): + snapshot, snapshot2 = create_snapshots() + tb_0 = traceback_lineno('', 0) + tb_a_2 = traceback_lineno('a.py', 2) + tb_a_5 = traceback_lineno('a.py', 5) + tb_b_1 = traceback_lineno('b.py', 1) + tb_c_578 = traceback_lineno('c.py', 578) + + # stats per file and line + stats1 = snapshot.statistics('lineno') + self.assertEqual(stats1, [ + tracemalloc.Statistic(tb_b_1, 66, 1), + tracemalloc.Statistic(tb_a_2, 30, 3), + tracemalloc.Statistic(tb_0, 7, 1), + tracemalloc.Statistic(tb_a_5, 2, 1), + ]) + + # stats per file and line (2) + stats2 = snapshot2.statistics('lineno') + self.assertEqual(stats2, [ + tracemalloc.Statistic(tb_a_5, 5002, 2), + tracemalloc.Statistic(tb_c_578, 400, 1), + tracemalloc.Statistic(tb_a_2, 30, 3), + ]) + + # stats diff per file and line + statistics = snapshot2.compare_to(snapshot, 'lineno') + self.assertEqual(statistics, [ + tracemalloc.StatisticDiff(tb_a_5, 5002, 5000, 2, 1), + tracemalloc.StatisticDiff(tb_c_578, 400, 400, 1, 1), + tracemalloc.StatisticDiff(tb_b_1, 0, -66, 0, -1), + tracemalloc.StatisticDiff(tb_0, 0, -7, 0, -1), + tracemalloc.StatisticDiff(tb_a_2, 30, 0, 3, 0), + ]) + + def test_snapshot_group_by_file(self): + snapshot, snapshot2 = create_snapshots() + tb_0 = traceback_filename('') + tb_a = traceback_filename('a.py') + tb_b = traceback_filename('b.py') + tb_c = traceback_filename('c.py') + + # stats per file + stats1 = snapshot.statistics('filename') + self.assertEqual(stats1, [ + tracemalloc.Statistic(tb_b, 66, 1), + tracemalloc.Statistic(tb_a, 32, 4), + tracemalloc.Statistic(tb_0, 7, 1), + ]) + + # stats per file (2) + stats2 = snapshot2.statistics('filename') + self.assertEqual(stats2, [ + tracemalloc.Statistic(tb_a, 5032, 5), + tracemalloc.Statistic(tb_c, 400, 1), + ]) + + # stats diff per file + diff = snapshot2.compare_to(snapshot, 'filename') + self.assertEqual(diff, [ + tracemalloc.StatisticDiff(tb_a, 5032, 5000, 5, 1), + tracemalloc.StatisticDiff(tb_c, 400, 400, 1, 1), + tracemalloc.StatisticDiff(tb_b, 0, -66, 0, -1), + tracemalloc.StatisticDiff(tb_0, 0, -7, 0, -1), + ]) + + def test_snapshot_group_by_traceback(self): + snapshot, snapshot2 = create_snapshots() + + # stats per file + tb1 = traceback(('a.py', 2), ('b.py', 4)) + tb2 = traceback(('a.py', 5), ('b.py', 4)) + tb3 = traceback(('b.py', 1)) + tb4 = traceback(('', 0)) + stats1 = snapshot.statistics('traceback') + self.assertEqual(stats1, [ + tracemalloc.Statistic(tb3, 66, 1), + tracemalloc.Statistic(tb1, 30, 3), + tracemalloc.Statistic(tb4, 7, 1), + tracemalloc.Statistic(tb2, 2, 1), + ]) + + # stats per file (2) + tb5 = traceback(('c.py', 578)) + stats2 = snapshot2.statistics('traceback') + self.assertEqual(stats2, [ + tracemalloc.Statistic(tb2, 5002, 2), + tracemalloc.Statistic(tb5, 400, 1), + tracemalloc.Statistic(tb1, 30, 3), + ]) + + # stats diff per file + diff = snapshot2.compare_to(snapshot, 'traceback') + self.assertEqual(diff, [ + tracemalloc.StatisticDiff(tb2, 5002, 5000, 2, 1), + tracemalloc.StatisticDiff(tb5, 400, 400, 1, 1), + tracemalloc.StatisticDiff(tb3, 0, -66, 0, -1), + tracemalloc.StatisticDiff(tb4, 0, -7, 0, -1), + tracemalloc.StatisticDiff(tb1, 30, 0, 3, 0), + ]) + + self.assertRaises(ValueError, + snapshot.statistics, 'traceback', cumulative=True) + + def test_snapshot_group_by_cumulative(self): + snapshot, snapshot2 = create_snapshots() + tb_0 = traceback_filename('') + tb_a = traceback_filename('a.py') + tb_b = traceback_filename('b.py') + tb_a_2 = traceback_lineno('a.py', 2) + tb_a_5 = traceback_lineno('a.py', 5) + tb_b_1 = traceback_lineno('b.py', 1) + tb_b_4 = traceback_lineno('b.py', 4) + + # per file + stats = snapshot.statistics('filename', True) + self.assertEqual(stats, [ + tracemalloc.Statistic(tb_b, 98, 5), + tracemalloc.Statistic(tb_a, 32, 4), + tracemalloc.Statistic(tb_0, 7, 1), + ]) + + # per line + stats = snapshot.statistics('lineno', True) + self.assertEqual(stats, [ + tracemalloc.Statistic(tb_b_1, 66, 1), + tracemalloc.Statistic(tb_b_4, 32, 4), + tracemalloc.Statistic(tb_a_2, 30, 3), + tracemalloc.Statistic(tb_0, 7, 1), + tracemalloc.Statistic(tb_a_5, 2, 1), + ]) + + def test_trace_format(self): + snapshot, snapshot2 = create_snapshots() + trace = snapshot.traces[0] + self.assertEqual(str(trace), 'a.py:2: 10 B') + traceback = trace.traceback + self.assertEqual(str(traceback), 'a.py:2') + frame = traceback[0] + self.assertEqual(str(frame), 'a.py:2') + + def test_statistic_format(self): + snapshot, snapshot2 = create_snapshots() + stats = snapshot.statistics('lineno') + stat = stats[0] + self.assertEqual(str(stat), + 'b.py:1: size=66 B, count=1, average=66 B') + + def test_statistic_diff_format(self): + snapshot, snapshot2 = create_snapshots() + stats = snapshot2.compare_to(snapshot, 'lineno') + stat = stats[0] + self.assertEqual(str(stat), + 'a.py:5: size=5002 B (+5000 B), count=2 (+1), average=2501 B') + + + +class TestFilters(unittest.TestCase): + maxDiff = 2048 + + def test_filter_attributes(self): + # test default values + f = tracemalloc.Filter(True, "abc") + self.assertEqual(f.inclusive, True) + self.assertEqual(f.filename_pattern, "abc") + self.assertIsNone(f.lineno) + self.assertEqual(f.all_frames, False) + + # test custom values + f = tracemalloc.Filter(False, "test.py", 123, True) + self.assertEqual(f.inclusive, False) + self.assertEqual(f.filename_pattern, "test.py") + self.assertEqual(f.lineno, 123) + self.assertEqual(f.all_frames, True) + + # parameters passed by keyword + f = tracemalloc.Filter(inclusive=False, filename_pattern="test.py", lineno=123, all_frames=True) + self.assertEqual(f.inclusive, False) + self.assertEqual(f.filename_pattern, "test.py") + self.assertEqual(f.lineno, 123) + self.assertEqual(f.all_frames, True) + + # read-only attribute + self.assertRaises(AttributeError, setattr, f, "filename_pattern", "abc") + + def test_filter_match(self): + # filter without line number + f = tracemalloc.Filter(True, "abc") + self.assertTrue(f._match_frame("abc", 0)) + self.assertTrue(f._match_frame("abc", 5)) + self.assertTrue(f._match_frame("abc", 10)) + self.assertFalse(f._match_frame("12356", 0)) + self.assertFalse(f._match_frame("12356", 5)) + self.assertFalse(f._match_frame("12356", 10)) + + f = tracemalloc.Filter(False, "abc") + self.assertFalse(f._match_frame("abc", 0)) + self.assertFalse(f._match_frame("abc", 5)) + self.assertFalse(f._match_frame("abc", 10)) + self.assertTrue(f._match_frame("12356", 0)) + self.assertTrue(f._match_frame("12356", 5)) + self.assertTrue(f._match_frame("12356", 10)) + + # filter with line number > 0 + f = tracemalloc.Filter(True, "abc", 5) + self.assertFalse(f._match_frame("abc", 0)) + self.assertTrue(f._match_frame("abc", 5)) + self.assertFalse(f._match_frame("abc", 10)) + self.assertFalse(f._match_frame("12356", 0)) + self.assertFalse(f._match_frame("12356", 5)) + self.assertFalse(f._match_frame("12356", 10)) + + f = tracemalloc.Filter(False, "abc", 5) + self.assertTrue(f._match_frame("abc", 0)) + self.assertFalse(f._match_frame("abc", 5)) + self.assertTrue(f._match_frame("abc", 10)) + self.assertTrue(f._match_frame("12356", 0)) + self.assertTrue(f._match_frame("12356", 5)) + self.assertTrue(f._match_frame("12356", 10)) + + # filter with line number 0 + f = tracemalloc.Filter(True, "abc", 0) + self.assertTrue(f._match_frame("abc", 0)) + self.assertFalse(f._match_frame("abc", 5)) + self.assertFalse(f._match_frame("abc", 10)) + self.assertFalse(f._match_frame("12356", 0)) + self.assertFalse(f._match_frame("12356", 5)) + self.assertFalse(f._match_frame("12356", 10)) + + f = tracemalloc.Filter(False, "abc", 0) + self.assertFalse(f._match_frame("abc", 0)) + self.assertTrue(f._match_frame("abc", 5)) + self.assertTrue(f._match_frame("abc", 10)) + self.assertTrue(f._match_frame("12356", 0)) + self.assertTrue(f._match_frame("12356", 5)) + self.assertTrue(f._match_frame("12356", 10)) + + def test_filter_match_filename(self): + def fnmatch(inclusive, filename, pattern): + f = tracemalloc.Filter(inclusive, pattern) + return f._match_frame(filename, 0) + + self.assertTrue(fnmatch(True, "abc", "abc")) + self.assertFalse(fnmatch(True, "12356", "abc")) + self.assertFalse(fnmatch(True, "", "abc")) + + self.assertFalse(fnmatch(False, "abc", "abc")) + self.assertTrue(fnmatch(False, "12356", "abc")) + self.assertTrue(fnmatch(False, "", "abc")) + + def test_filter_match_filename_joker(self): + def fnmatch(filename, pattern): + filter = tracemalloc.Filter(True, pattern) + return filter._match_frame(filename, 0) + + # empty string + self.assertFalse(fnmatch('abc', '')) + self.assertFalse(fnmatch('', 'abc')) + self.assertTrue(fnmatch('', '')) + self.assertTrue(fnmatch('', '*')) + + # no * + self.assertTrue(fnmatch('abc', 'abc')) + self.assertFalse(fnmatch('abc', 'abcd')) + self.assertFalse(fnmatch('abc', 'def')) + + # a* + self.assertTrue(fnmatch('abc', 'a*')) + self.assertTrue(fnmatch('abc', 'abc*')) + self.assertFalse(fnmatch('abc', 'b*')) + self.assertFalse(fnmatch('abc', 'abcd*')) + + # a*b + self.assertTrue(fnmatch('abc', 'a*c')) + self.assertTrue(fnmatch('abcdcx', 'a*cx')) + self.assertFalse(fnmatch('abb', 'a*c')) + self.assertFalse(fnmatch('abcdce', 'a*cx')) + + # a*b*c + self.assertTrue(fnmatch('abcde', 'a*c*e')) + self.assertTrue(fnmatch('abcbdefeg', 'a*bd*eg')) + self.assertFalse(fnmatch('abcdd', 'a*c*e')) + self.assertFalse(fnmatch('abcbdefef', 'a*bd*eg')) + + # replace .pyc and .pyo suffix with .py + self.assertTrue(fnmatch('a.pyc', 'a.py')) + self.assertTrue(fnmatch('a.pyo', 'a.py')) + self.assertTrue(fnmatch('a.py', 'a.pyc')) + self.assertTrue(fnmatch('a.py', 'a.pyo')) + + if os.name == 'nt': + # case insensitive + self.assertTrue(fnmatch('aBC', 'ABc')) + self.assertTrue(fnmatch('aBcDe', 'Ab*dE')) + + self.assertTrue(fnmatch('a.pyc', 'a.PY')) + self.assertTrue(fnmatch('a.PYO', 'a.py')) + self.assertTrue(fnmatch('a.py', 'a.PYC')) + self.assertTrue(fnmatch('a.PY', 'a.pyo')) + else: + # case sensitive + self.assertFalse(fnmatch('aBC', 'ABc')) + self.assertFalse(fnmatch('aBcDe', 'Ab*dE')) + + self.assertFalse(fnmatch('a.pyc', 'a.PY')) + self.assertFalse(fnmatch('a.PYO', 'a.py')) + self.assertFalse(fnmatch('a.py', 'a.PYC')) + self.assertFalse(fnmatch('a.PY', 'a.pyo')) + + if os.name == 'nt': + # normalize alternate separator "/" to the standard separator "\" + self.assertTrue(fnmatch(r'a/b', r'a\b')) + self.assertTrue(fnmatch(r'a\b', r'a/b')) + self.assertTrue(fnmatch(r'a/b\c', r'a\b/c')) + self.assertTrue(fnmatch(r'a/b/c', r'a\b\c')) + else: + # there is no alternate separator + self.assertFalse(fnmatch(r'a/b', r'a\b')) + self.assertFalse(fnmatch(r'a\b', r'a/b')) + self.assertFalse(fnmatch(r'a/b\c', r'a\b/c')) + self.assertFalse(fnmatch(r'a/b/c', r'a\b\c')) + + def test_filter_match_trace(self): + t1 = (("a.py", 2), ("b.py", 3)) + t2 = (("b.py", 4), ("b.py", 5)) + t3 = (("c.py", 5), ('', 0)) + unknown = (('', 0),) + + f = tracemalloc.Filter(True, "b.py", all_frames=True) + self.assertTrue(f._match_traceback(t1)) + self.assertTrue(f._match_traceback(t2)) + self.assertFalse(f._match_traceback(t3)) + self.assertFalse(f._match_traceback(unknown)) + + f = tracemalloc.Filter(True, "b.py", all_frames=False) + self.assertFalse(f._match_traceback(t1)) + self.assertTrue(f._match_traceback(t2)) + self.assertFalse(f._match_traceback(t3)) + self.assertFalse(f._match_traceback(unknown)) + + f = tracemalloc.Filter(False, "b.py", all_frames=True) + self.assertFalse(f._match_traceback(t1)) + self.assertFalse(f._match_traceback(t2)) + self.assertTrue(f._match_traceback(t3)) + self.assertTrue(f._match_traceback(unknown)) + + f = tracemalloc.Filter(False, "b.py", all_frames=False) + self.assertTrue(f._match_traceback(t1)) + self.assertFalse(f._match_traceback(t2)) + self.assertTrue(f._match_traceback(t3)) + self.assertTrue(f._match_traceback(unknown)) + + f = tracemalloc.Filter(False, "", all_frames=False) + self.assertTrue(f._match_traceback(t1)) + self.assertTrue(f._match_traceback(t2)) + self.assertTrue(f._match_traceback(t3)) + self.assertFalse(f._match_traceback(unknown)) + + f = tracemalloc.Filter(True, "", all_frames=True) + self.assertFalse(f._match_traceback(t1)) + self.assertFalse(f._match_traceback(t2)) + self.assertTrue(f._match_traceback(t3)) + self.assertTrue(f._match_traceback(unknown)) + + f = tracemalloc.Filter(False, "", all_frames=True) + self.assertTrue(f._match_traceback(t1)) + self.assertTrue(f._match_traceback(t2)) + self.assertFalse(f._match_traceback(t3)) + self.assertFalse(f._match_traceback(unknown)) + + +class TestCommandLine(unittest.TestCase): + def test_env_var(self): + # not tracing by default + code = 'import tracemalloc; print(tracemalloc.is_tracing())' + ok, stdout, stderr = assert_python_ok('-c', code) + stdout = stdout.rstrip() + self.assertEqual(stdout, b'False') + + # PYTHON* environment varibles must be ignored when -E option is + # present + code = 'import tracemalloc; print(tracemalloc.is_tracing())' + ok, stdout, stderr = assert_python_ok('-E', '-c', code, PYTHONTRACEMALLOC='1') + stdout = stdout.rstrip() + self.assertEqual(stdout, b'False') + + # tracing at startup + code = 'import tracemalloc; print(tracemalloc.is_tracing())' + ok, stdout, stderr = assert_python_ok('-c', code, PYTHONTRACEMALLOC='1') + stdout = stdout.rstrip() + self.assertEqual(stdout, b'True') + + # start and set the number of frames + code = 'import tracemalloc; print(tracemalloc.get_traceback_limit())' + ok, stdout, stderr = assert_python_ok('-c', code, PYTHONTRACEMALLOC='10') + stdout = stdout.rstrip() + self.assertEqual(stdout, b'10') + + def test_env_var_invalid(self): + for nframe in (-1, 0, 5000): + with self.subTest(nframe=nframe): + with support.SuppressCrashReport(): + ok, stdout, stderr = assert_python_failure( + '-c', 'pass', + PYTHONTRACEMALLOC=str(nframe)) + self.assertIn(b'PYTHONTRACEMALLOC must be an integer ' + b'in range [1; 100]', + stderr) + + def test_sys_xoptions(self): + for xoptions, nframe in ( + ('tracemalloc', 1), + ('tracemalloc=1', 1), + ('tracemalloc=15', 15), + ): + with self.subTest(xoptions=xoptions, nframe=nframe): + code = 'import tracemalloc; print(tracemalloc.get_traceback_limit())' + ok, stdout, stderr = assert_python_ok('-X', xoptions, '-c', code) + stdout = stdout.rstrip() + self.assertEqual(stdout, str(nframe).encode('ascii')) + + def test_sys_xoptions_invalid(self): + for nframe in (-1, 0, 5000): + with self.subTest(nframe=nframe): + with support.SuppressCrashReport(): + args = ('-X', 'tracemalloc=%s' % nframe, '-c', 'pass') + ok, stdout, stderr = assert_python_failure(*args) + self.assertIn(b'-X tracemalloc=NFRAME: number of frame must ' + b'be an integer in range [1; 100]', + stderr) + + +def test_main(): + support.run_unittest( + TestTracemallocEnabled, + TestSnapshot, + TestFilters, + TestCommandLine, + ) + +if __name__ == "__main__": + test_main() diff --git a/Lib/tracemalloc.py b/Lib/tracemalloc.py new file mode 100644 index 00000000000..7780eca77d7 --- /dev/null +++ b/Lib/tracemalloc.py @@ -0,0 +1,464 @@ +from collections import Sequence +from functools import total_ordering +import fnmatch +import os.path +import pickle + +# Import types and functions implemented in C +from _tracemalloc import * +from _tracemalloc import _get_object_traceback, _get_traces + + +def _format_size(size, sign): + for unit in ('B', 'KiB', 'MiB', 'GiB', 'TiB'): + if abs(size) < 100 and unit != 'B': + # 3 digits (xx.x UNIT) + if sign: + return "%+.1f %s" % (size, unit) + else: + return "%.1f %s" % (size, unit) + if abs(size) < 10 * 1024 or unit == 'TiB': + # 4 or 5 digits (xxxx UNIT) + if sign: + return "%+.0f %s" % (size, unit) + else: + return "%.0f %s" % (size, unit) + size /= 1024 + + +class Statistic: + """ + Statistic difference on memory allocations between two Snapshot instance. + """ + + __slots__ = ('traceback', 'size', 'count') + + def __init__(self, traceback, size, count): + self.traceback = traceback + self.size = size + self.count = count + + def __hash__(self): + return (self.traceback, self.size, self.count) + + def __eq__(self, other): + return (self.traceback == other.traceback + and self.size == other.size + and self.count == other.count) + + def __str__(self): + text = ("%s: size=%s, count=%i" + % (self.traceback, + _format_size(self.size, False), + self.count)) + if self.count: + average = self.size / self.count + text += ", average=%s" % _format_size(average, False) + return text + + def __repr__(self): + return ('' + % (self.traceback, self.size, self.count)) + + def _sort_key(self): + return (self.size, self.count, self.traceback) + + +class StatisticDiff: + """ + Statistic difference on memory allocations between an old and a new + Snapshot instance. + """ + __slots__ = ('traceback', 'size', 'size_diff', 'count', 'count_diff') + + def __init__(self, traceback, size, size_diff, count, count_diff): + self.traceback = traceback + self.size = size + self.size_diff = size_diff + self.count = count + self.count_diff = count_diff + + def __hash__(self): + return (self.traceback, self.size, self.size_diff, + self.count, self.count_diff) + + def __eq__(self, other): + return (self.traceback == other.traceback + and self.size == other.size + and self.size_diff == other.size_diff + and self.count == other.count + and self.count_diff == other.count_diff) + + def __str__(self): + text = ("%s: size=%s (%s), count=%i (%+i)" + % (self.traceback, + _format_size(self.size, False), + _format_size(self.size_diff, True), + self.count, + self.count_diff)) + if self.count: + average = self.size / self.count + text += ", average=%s" % _format_size(average, False) + return text + + def __repr__(self): + return ('' + % (self.traceback, self.size, self.size_diff, + + self.count, self.count_diff)) + + def _sort_key(self): + return (abs(self.size_diff), self.size, + abs(self.count_diff), self.count, + self.traceback) + + +def _compare_grouped_stats(old_group, new_group): + statistics = [] + for traceback, stat in new_group.items(): + previous = old_group.pop(traceback, None) + if previous is not None: + stat = StatisticDiff(traceback, + stat.size, stat.size - previous.size, + stat.count, stat.count - previous.count) + else: + stat = StatisticDiff(traceback, + stat.size, stat.size, + stat.count, stat.count) + statistics.append(stat) + + for traceback, stat in old_group.items(): + stat = StatisticDiff(traceback, 0, -stat.size, 0, -stat.count) + statistics.append(stat) + return statistics + + +@total_ordering +class Frame: + """ + Frame of a traceback. + """ + __slots__ = ("_frame",) + + def __init__(self, frame): + self._frame = frame + + @property + def filename(self): + return self._frame[0] + + @property + def lineno(self): + return self._frame[1] + + def __eq__(self, other): + return (self._frame == other._frame) + + def __lt__(self, other): + return (self._frame < other._frame) + + def __hash__(self): + return hash(self._frame) + + def __str__(self): + return "%s:%s" % (self.filename, self.lineno) + + def __repr__(self): + return "" % (self.filename, self.lineno) + + +@total_ordering +class Traceback(Sequence): + """ + Sequence of Frame instances sorted from the most recent frame + to the oldest frame. + """ + __slots__ = ("_frames",) + + def __init__(self, frames): + Sequence.__init__(self) + self._frames = frames + + def __len__(self): + return len(self._frames) + + def __getitem__(self, index): + trace = self._frames[index] + return Frame(trace) + + def __contains__(self, frame): + return frame._frame in self._frames + + def __hash__(self): + return hash(self._frames) + + def __eq__(self, other): + return (self._frames == other._frames) + + def __lt__(self, other): + return (self._frames < other._frames) + + def __str__(self): + return str(self[0]) + + def __repr__(self): + return "" % (tuple(self),) + + +def get_object_traceback(obj): + """ + Get the traceback where the Python object *obj* was allocated. + Return a Traceback instance. + + Return None if the tracemalloc module is not tracing memory allocations or + did not trace the allocation of the object. + """ + frames = _get_object_traceback(obj) + if frames is not None: + return Traceback(frames) + else: + return None + + +class Trace: + """ + Trace of a memory block. + """ + __slots__ = ("_trace",) + + def __init__(self, trace): + self._trace = trace + + @property + def size(self): + return self._trace[0] + + @property + def traceback(self): + return Traceback(self._trace[1]) + + def __eq__(self, other): + return (self._trace == other._trace) + + def __hash__(self): + return hash(self._trace) + + def __str__(self): + return "%s: %s" % (self.traceback, _format_size(self.size, False)) + + def __repr__(self): + return ("" + % (_format_size(self.size, False), self.traceback)) + + +class _Traces(Sequence): + def __init__(self, traces): + Sequence.__init__(self) + self._traces = traces + + def __len__(self): + return len(self._traces) + + def __getitem__(self, index): + trace = self._traces[index] + return Trace(trace) + + def __contains__(self, trace): + return trace._trace in self._traces + + def __eq__(self, other): + return (self._traces == other._traces) + + def __repr__(self): + return "" % len(self) + + +def _normalize_filename(filename): + filename = os.path.normcase(filename) + if filename.endswith(('.pyc', '.pyo')): + filename = filename[:-1] + return filename + + +class Filter: + def __init__(self, inclusive, filename_pattern, + lineno=None, all_frames=False): + self.inclusive = inclusive + self._filename_pattern = _normalize_filename(filename_pattern) + self.lineno = lineno + self.all_frames = all_frames + + @property + def filename_pattern(self): + return self._filename_pattern + + def __match_frame(self, filename, lineno): + filename = _normalize_filename(filename) + if not fnmatch.fnmatch(filename, self._filename_pattern): + return False + if self.lineno is None: + return True + else: + return (lineno == self.lineno) + + def _match_frame(self, filename, lineno): + return self.__match_frame(filename, lineno) ^ (not self.inclusive) + + def _match_traceback(self, traceback): + if self.all_frames: + if any(self.__match_frame(filename, lineno) + for filename, lineno in traceback): + return self.inclusive + else: + return (not self.inclusive) + else: + filename, lineno = traceback[0] + return self._match_frame(filename, lineno) + + +class Snapshot: + """ + Snapshot of traces of memory blocks allocated by Python. + """ + + def __init__(self, traces, traceback_limit): + self.traces = _Traces(traces) + self.traceback_limit = traceback_limit + + def dump(self, filename): + """ + Write the snapshot into a file. + """ + with open(filename, "wb") as fp: + pickle.dump(self, fp, pickle.HIGHEST_PROTOCOL) + + @staticmethod + def load(filename): + """ + Load a snapshot from a file. + """ + with open(filename, "rb") as fp: + return pickle.load(fp) + + def _filter_trace(self, include_filters, exclude_filters, trace): + traceback = trace[1] + if include_filters: + if not any(trace_filter._match_traceback(traceback) + for trace_filter in include_filters): + return False + if exclude_filters: + if any(not trace_filter._match_traceback(traceback) + for trace_filter in exclude_filters): + return False + return True + + def filter_traces(self, filters): + """ + Create a new Snapshot instance with a filtered traces sequence, filters + is a list of Filter instances. If filters is an empty list, return a + new Snapshot instance with a copy of the traces. + """ + if filters: + include_filters = [] + exclude_filters = [] + for trace_filter in filters: + if trace_filter.inclusive: + include_filters.append(trace_filter) + else: + exclude_filters.append(trace_filter) + new_traces = [trace for trace in self.traces._traces + if self._filter_trace(include_filters, + exclude_filters, + trace)] + else: + new_traces = self.traces._traces.copy() + return Snapshot(new_traces, self.traceback_limit) + + def _group_by(self, key_type, cumulative): + if key_type not in ('traceback', 'filename', 'lineno'): + raise ValueError("unknown key_type: %r" % (key_type,)) + if cumulative and key_type not in ('lineno', 'filename'): + raise ValueError("cumulative mode cannot by used " + "with key type %r" % key_type) + if cumulative and self.traceback_limit < 2: + raise ValueError("cumulative mode needs tracebacks with at least " + "2 frames, traceback limit is %s" + % self.traceback_limit) + + stats = {} + tracebacks = {} + if not cumulative: + for trace in self.traces._traces: + size, trace_traceback = trace + try: + traceback = tracebacks[trace_traceback] + except KeyError: + if key_type == 'traceback': + frames = trace_traceback + elif key_type == 'lineno': + frames = trace_traceback[:1] + else: # key_type == 'filename': + frames = ((trace_traceback[0][0], 0),) + traceback = Traceback(frames) + tracebacks[trace_traceback] = traceback + try: + stat = stats[traceback] + stat.size += size + stat.count += 1 + except KeyError: + stats[traceback] = Statistic(traceback, size, 1) + else: + # cumulative statistics + for trace in self.traces._traces: + size, trace_traceback = trace + for frame in trace_traceback: + try: + traceback = tracebacks[frame] + except KeyError: + if key_type == 'lineno': + frames = (frame,) + else: # key_type == 'filename': + frames = ((frame[0], 0),) + traceback = Traceback(frames) + tracebacks[frame] = traceback + try: + stat = stats[traceback] + stat.size += size + stat.count += 1 + except KeyError: + stats[traceback] = Statistic(traceback, size, 1) + return stats + + def statistics(self, key_type, cumulative=False): + """ + Group statistics by key_type. Return a sorted list of Statistic + instances. + """ + grouped = self._group_by(key_type, cumulative) + statistics = list(grouped.values()) + statistics.sort(reverse=True, key=Statistic._sort_key) + return statistics + + def compare_to(self, old_snapshot, key_type, cumulative=False): + """ + Compute the differences with an old snapshot old_snapshot. Get + statistics as a sorted list of StatisticDiff instances, grouped by + group_by. + """ + new_group = self._group_by(key_type, cumulative) + old_group = old_snapshot._group_by(key_type, cumulative) + statistics = _compare_grouped_stats(old_group, new_group) + statistics.sort(reverse=True, key=StatisticDiff._sort_key) + return statistics + + +def take_snapshot(): + """ + Take a snapshot of traces of memory blocks allocated by Python. + """ + if not is_tracing(): + raise RuntimeError("the tracemalloc module must be tracing memory " + "allocations to take a snapshot") + traces = _get_traces() + traceback_limit = get_traceback_limit() + return Snapshot(traces, traceback_limit) diff --git a/Modules/Setup.dist b/Modules/Setup.dist index ebf8172df30..01fb85ffc36 100644 --- a/Modules/Setup.dist +++ b/Modules/Setup.dist @@ -102,7 +102,7 @@ PYTHONPATH=$(COREPYTHONPATH) # various reasons; therefore they are listed here instead of in the # normal order. -# This only contains the minimal set of modules required to run the +# This only contains the minimal set of modules required to run the # setup.py script in the root of the Python source tree. posix posixmodule.c # posix (UNIX) system calls @@ -115,7 +115,7 @@ _weakref _weakref.c # weak references _functools _functoolsmodule.c # Tools for working with functions and callable objects _operator _operator.c # operator.add() and similar goodies _collections _collectionsmodule.c # Container types -itertools itertoolsmodule.c # Functions creating iterators for efficient looping +itertools itertoolsmodule.c # Functions creating iterators for efficient looping atexit atexitmodule.c # Register functions to be run at interpreter-shutdown _stat _stat.c # stat.h interface @@ -132,12 +132,15 @@ zipimport zipimport.c # faulthandler module faulthandler faulthandler.c +# debug tool to trace memory blocks allocated by Python +_tracemalloc _tracemalloc.c hashtable.c + # The rest of the modules listed in this file are all commented out by # default. Usually they can be detected and built as dynamically # loaded modules by the new setup.py script added in Python 2.1. If -# you're on a platform that doesn't support dynamic loading, want to -# compile modules statically into the Python binary, or need to -# specify some odd set of compiler switches, you can uncomment the +# you're on a platform that doesn't support dynamic loading, want to +# compile modules statically into the Python binary, or need to +# specify some odd set of compiler switches, you can uncomment the # appropriate lines below. # ====================================================================== @@ -186,7 +189,7 @@ _symtable symtablemodule.c # supported...) #fcntl fcntlmodule.c # fcntl(2) and ioctl(2) -#spwd spwdmodule.c # spwd(3) +#spwd spwdmodule.c # spwd(3) #grp grpmodule.c # grp(3) #select selectmodule.c # select(2); not on ancient System V @@ -302,7 +305,7 @@ _symtable symtablemodule.c #_curses _cursesmodule.c -lcurses -ltermcap # Wrapper for the panel library that's part of ncurses and SYSV curses. -#_curses_panel _curses_panel.c -lpanel -lncurses +#_curses_panel _curses_panel.c -lpanel -lncurses # Modules that provide persistent dictionary-like semantics. You will diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c new file mode 100644 index 00000000000..15ed7342b92 --- /dev/null +++ b/Modules/_tracemalloc.c @@ -0,0 +1,1407 @@ +#include "Python.h" +#include "hashtable.h" +#include "frameobject.h" +#include "pythread.h" +#include "osdefs.h" + +/* Trace memory blocks allocated by PyMem_RawMalloc() */ +#define TRACE_RAW_MALLOC + +/* Forward declaration */ +static void tracemalloc_stop(void); +static int tracemalloc_atexit_register(void); +static void* raw_malloc(size_t size); +static void raw_free(void *ptr); + +#ifdef Py_DEBUG +# define TRACE_DEBUG +#endif + +#define _STR(VAL) #VAL +#define STR(VAL) _STR(VAL) + +/* Protected by the GIL */ +static struct { + PyMemAllocator mem; + PyMemAllocator raw; + PyMemAllocator obj; +} allocators; + +/* Arbitrary limit of the number of frames in a traceback. The value was chosen + to not allocate too much memory on the stack (see TRACEBACK_STACK_SIZE + below). */ +#define MAX_NFRAME 100 + +static struct { + /* Module initialized? + Variable protected by the GIL */ + enum { + TRACEMALLOC_NOT_INITIALIZED, + TRACEMALLOC_INITIALIZED, + TRACEMALLOC_FINALIZED + } initialized; + + /* atexit handler registered? */ + int atexit_registered; + + /* Is tracemalloc tracing memory allocations? + Variable protected by the GIL */ + int tracing; + + /* limit of the number of frames in a traceback, 1 by default. + Variable protected by the GIL. */ + int max_nframe; +} tracemalloc_config = {TRACEMALLOC_NOT_INITIALIZED, 0, 0, 1}; + +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) +/* This lock is needed because tracemalloc_free() is called without + the GIL held from PyMem_RawFree(). It cannot acquire the lock because it + would introduce a deadlock in PyThreadState_DeleteCurrent(). */ +static PyThread_type_lock tables_lock; +# define TABLES_LOCK() PyThread_acquire_lock(tables_lock, 1) +# define TABLES_UNLOCK() PyThread_release_lock(tables_lock) +#else + /* variables are protected by the GIL */ +# define TABLES_LOCK() +# define TABLES_UNLOCK() +#endif + +/* Pack the frame_t structure to reduce the memory footprint on 64-bit + architectures: 12 bytes instead of 16. This optimization might produce + SIGBUS on architectures not supporting unaligned memory accesses (64-bit + IPS CPU?): on such architecture, the structure must not be packed. */ +#pragma pack(4) +typedef struct +#ifdef __GNUC__ +__attribute__((packed)) +#endif +{ + PyObject *filename; + int lineno; +} frame_t; + +typedef struct { + Py_uhash_t hash; + int nframe; + frame_t frames[1]; +} traceback_t; + +#define TRACEBACK_SIZE(NFRAME) \ + (sizeof(traceback_t) + sizeof(frame_t) * (NFRAME - 1)) +#define TRACEBACK_STACK_SIZE TRACEBACK_SIZE(MAX_NFRAME) + +static PyObject *unknown_filename = NULL; +static traceback_t tracemalloc_empty_traceback; + +typedef struct { + size_t size; + traceback_t *traceback; +} trace_t; + +/* Size in bytes of currently traced memory. + Protected by TABLES_LOCK(). */ +static size_t tracemalloc_traced_memory = 0; + +/* Maximum size in bytes of traced memory. + Protected by TABLES_LOCK(). */ +static size_t tracemalloc_max_traced_memory = 0; + +/* Hash table used as a set to to intern filenames: + PyObject* => PyObject*. + Protected by the GIL */ +static _Py_hashtable_t *tracemalloc_filenames = NULL; + +/* Hash table used as a set to intern tracebacks: + traceback_t* => traceback_t* + Protected by the GIL */ +static _Py_hashtable_t *tracemalloc_tracebacks = NULL; + +/* pointer (void*) => trace (trace_t). + Protected by TABLES_LOCK(). */ +static _Py_hashtable_t *tracemalloc_traces = NULL; + +#ifdef TRACE_DEBUG +static void +tracemalloc_error(const char *format, ...) +{ + va_list ap; + fprintf(stderr, "tracemalloc: "); + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + fprintf(stderr, "\n"); + fflush(stderr); +} +#endif + +#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC) +#define REENTRANT_THREADLOCAL + +/* If your OS does not provide native thread local storage, you can implement + it manually using a lock. Functions of thread.c cannot be used because + they use PyMem_RawMalloc() which leads to a reentrant call. */ +#if !(defined(_POSIX_THREADS) || defined(NT_THREADS)) +# error "need native thread local storage (TLS)" +#endif + +static int tracemalloc_reentrant_key; + +/* Any non-NULL pointer can be used */ +#define REENTRANT Py_True + +static int +get_reentrant(void) +{ + void *ptr = PyThread_get_key_value(tracemalloc_reentrant_key); + if (ptr != NULL) { + assert(ptr == REENTRANT); + return 1; + } + else + return 0; +} + +static void +set_reentrant(int reentrant) +{ + if (reentrant) { + assert(PyThread_get_key_value(tracemalloc_reentrant_key) == NULL); + PyThread_set_key_value(tracemalloc_reentrant_key, + REENTRANT); + } + else { + /* FIXME: PyThread_set_key_value() cannot be used to set the flag + to zero, because it does nothing if the variable has already + a value set. */ + PyThread_delete_key_value(tracemalloc_reentrant_key); + } +} + +#else + +/* WITH_THREAD not defined: Python compiled without threads, + or TRACE_RAW_MALLOC not defined: variable protected by the GIL */ +static int tracemalloc_reentrant = 0; + +static int +get_reentrant(void) +{ + return tracemalloc_reentrant; +} + +static void +set_reentrant(int reentrant) +{ + assert(!reentrant || !get_reentrant()); + tracemalloc_reentrant = reentrant; +} +#endif + +static int +hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry) +{ + if (key != NULL && entry->key != NULL) + return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0); + else + return key == entry->key; +} + +static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free}; + +static _Py_hashtable_t * +hashtable_new(size_t data_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func) +{ + return _Py_hashtable_new_full(data_size, 0, + hash_func, compare_func, + NULL, NULL, NULL, &hashtable_alloc); +} + +static void* +raw_malloc(size_t size) +{ + return allocators.raw.malloc(allocators.raw.ctx, size); +} + +static void +raw_free(void *ptr) +{ + allocators.raw.free(allocators.raw.ctx, ptr); +} + +static Py_uhash_t +hashtable_hash_traceback(const void *key) +{ + const traceback_t *traceback = key; + return traceback->hash; +} + +static int +hashtable_compare_traceback(const traceback_t *traceback1, + const _Py_hashtable_entry_t *he) +{ + const traceback_t *traceback2 = he->key; + const frame_t *frame1, *frame2; + int i; + + if (traceback1->nframe != traceback2->nframe) + return 0; + + for (i=0; i < traceback1->nframe; i++) { + frame1 = &traceback1->frames[i]; + frame2 = &traceback2->frames[i]; + + if (frame1->lineno != frame2->lineno) + return 0; + + if (frame1->filename != frame2->filename) { + assert(PyUnicode_Compare(frame1->filename, frame2->filename) != 0); + return 0; + } + } + return 1; +} + +static void +tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame) +{ + PyCodeObject *code; + PyObject *filename; + _Py_hashtable_entry_t *entry; + + frame->filename = unknown_filename; + frame->lineno = PyFrame_GetLineNumber(pyframe); + assert(frame->lineno >= 0); + if (frame->lineno < 0) + frame->lineno = 0; + + code = pyframe->f_code; + if (code == NULL) { +#ifdef TRACE_DEBUG + tracemalloc_error("failed to get the code object of the a frame"); +#endif + return; + } + + if (code->co_filename == NULL) { +#ifdef TRACE_DEBUG + tracemalloc_error("failed to get the filename of the code object"); +#endif + return; + } + + filename = code->co_filename; + assert(filename != NULL); + if (filename == NULL) + return; + + if (!PyUnicode_Check(filename)) { +#ifdef TRACE_DEBUG + tracemalloc_error("filename is not an unicode string"); +#endif + return; + } + if (!PyUnicode_IS_READY(filename)) { + /* Don't make a Unicode string ready to avoid reentrant calls + to tracemalloc_malloc() or tracemalloc_realloc() */ +#ifdef TRACE_DEBUG + tracemalloc_error("filename is not a ready unicode string"); +#endif + return; + } + + /* intern the filename */ + entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename); + if (entry != NULL) { + filename = (PyObject *)entry->key; + } + else { + /* tracemalloc_filenames is responsible to keep a reference + to the filename */ + Py_INCREF(filename); + if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) { + Py_DECREF(filename); +#ifdef TRACE_DEBUG + tracemalloc_error("failed to intern the filename"); +#endif + return; + } + } + + /* the tracemalloc_filenames table keeps a reference to the filename */ + frame->filename = filename; +} + +static Py_uhash_t +traceback_hash(traceback_t *traceback) +{ + /* code based on tuplehash() of Objects/tupleobject.c */ + Py_uhash_t x; /* Unsigned for defined overflow behavior. */ + Py_hash_t y; + int len = traceback->nframe; + Py_uhash_t mult = _PyHASH_MULTIPLIER; + frame_t *frame; + + x = 0x345678UL; + frame = traceback->frames; + while (--len >= 0) { + y = PyObject_Hash(frame->filename); + y ^= frame->lineno; + frame++; + + x = (x ^ y) * mult; + /* the cast might truncate len; that doesn't change hash stability */ + mult += (Py_hash_t)(82520UL + len + len); + } + x += 97531UL; + return x; +} + +static void +traceback_get_frames(traceback_t *traceback) +{ + PyThreadState *tstate; + PyFrameObject *pyframe; + +#ifdef WITH_THREAD + tstate = PyGILState_GetThisThreadState(); +#else + tstate = PyThreadState_Get(); +#endif + if (tstate == NULL) { +#ifdef TRACE_DEBUG + tracemalloc_error("failed to get the current thread state"); +#endif + return; + } + + for (pyframe = tstate->frame; pyframe != NULL; pyframe = pyframe->f_back) { + tracemalloc_get_frame(pyframe, &traceback->frames[traceback->nframe]); + assert(traceback->frames[traceback->nframe].filename != NULL); + assert(traceback->frames[traceback->nframe].lineno >= 0); + traceback->nframe++; + if (traceback->nframe == tracemalloc_config.max_nframe) + break; + } +} + +static traceback_t * +traceback_new(void) +{ + char stack_buffer[TRACEBACK_STACK_SIZE]; + traceback_t *traceback = (traceback_t *)stack_buffer; + _Py_hashtable_entry_t *entry; + +#ifdef WITH_THREAD + assert(PyGILState_Check()); +#endif + + /* get frames */ + traceback->nframe = 0; + traceback_get_frames(traceback); + if (traceback->nframe == 0) + return &tracemalloc_empty_traceback; + traceback->hash = traceback_hash(traceback); + + /* intern the traceback */ + entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback); + if (entry != NULL) { + traceback = (traceback_t *)entry->key; + } + else { + traceback_t *copy; + size_t traceback_size; + + traceback_size = TRACEBACK_SIZE(traceback->nframe); + + copy = raw_malloc(traceback_size); + if (copy == NULL) { +#ifdef TRACE_DEBUG + tracemalloc_error("failed to intern the traceback: malloc failed"); +#endif + return NULL; + } + memcpy(copy, traceback, traceback_size); + + if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) { + raw_free(copy); +#ifdef TRACE_DEBUG + tracemalloc_error("failed to intern the traceback: putdata failed"); +#endif + return NULL; + } + traceback = copy; + } + return traceback; +} + +static void +tracemalloc_log_alloc(void *ptr, size_t size) +{ + traceback_t *traceback; + trace_t trace; + +#ifdef WITH_THREAD + assert(PyGILState_Check()); +#endif + + traceback = traceback_new(); + if (traceback == NULL) + return; + + trace.size = size; + trace.traceback = traceback; + + TABLES_LOCK(); + assert(tracemalloc_traced_memory <= PY_SIZE_MAX - size); + tracemalloc_traced_memory += size; + if (tracemalloc_traced_memory > tracemalloc_max_traced_memory) + tracemalloc_max_traced_memory = tracemalloc_traced_memory; + + _Py_HASHTABLE_SET(tracemalloc_traces, ptr, trace); + TABLES_UNLOCK(); +} + +static void +tracemalloc_log_free(void *ptr) +{ + trace_t trace; + + TABLES_LOCK(); + if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) { + assert(tracemalloc_traced_memory >= trace.size); + tracemalloc_traced_memory -= trace.size; + } + TABLES_UNLOCK(); +} + +static void* +tracemalloc_malloc(void *ctx, size_t size, int gil_held) +{ + PyMemAllocator *alloc = (PyMemAllocator *)ctx; +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) + PyGILState_STATE gil_state; +#endif + void *ptr; + + if (get_reentrant()) { + return alloc->malloc(alloc->ctx, size); + } + + /* Ignore reentrant call. PyObjet_Malloc() calls PyMem_Malloc() + for allocations larger than 512 bytes. PyGILState_Ensure() may call + PyMem_RawMalloc() indirectly which would call PyGILState_Ensure() if + reentrant are not disabled. */ + set_reentrant(1); +#ifdef WITH_THREAD +#ifdef TRACE_RAW_MALLOC + if (!gil_held) + gil_state = PyGILState_Ensure(); +#else + assert(gil_held); +#endif +#endif + ptr = alloc->malloc(alloc->ctx, size); + set_reentrant(0); + + if (ptr != NULL) + tracemalloc_log_alloc(ptr, size); + +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) + if (!gil_held) + PyGILState_Release(gil_state); +#endif + + return ptr; +} + +static void* +tracemalloc_realloc(void *ctx, void *ptr, size_t new_size, int gil_held) +{ + PyMemAllocator *alloc = (PyMemAllocator *)ctx; +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) + PyGILState_STATE gil_state; +#endif + void *ptr2; + + if (get_reentrant()) { + /* Reentrant call to PyMem_Realloc() and PyMem_RawRealloc(). + Example: PyMem_RawRealloc() is called internally by pymalloc + (_PyObject_Malloc() and _PyObject_Realloc()) to allocate a new + arena (new_arena()). */ + ptr2 = alloc->realloc(alloc->ctx, ptr, new_size); + + if (ptr2 != NULL && ptr != NULL) + tracemalloc_log_free(ptr); + + return ptr2; + } + + /* Ignore reentrant call. PyObjet_Realloc() calls PyMem_Realloc() for + allocations larger than 512 bytes. PyGILState_Ensure() may call + PyMem_RawMalloc() indirectly which would call PyGILState_Ensure() if + reentrant are not disabled. */ + set_reentrant(1); +#ifdef WITH_THREAD +#ifdef TRACE_RAW_MALLOC + if (!gil_held) + gil_state = PyGILState_Ensure(); +#else + assert(gil_held); +#endif +#endif + ptr2 = alloc->realloc(alloc->ctx, ptr, new_size); + set_reentrant(0); + + if (ptr2 != NULL) { + if (ptr != NULL) + tracemalloc_log_free(ptr); + + tracemalloc_log_alloc(ptr2, new_size); + } + +#if defined(TRACE_RAW_MALLOC) && defined(WITH_THREAD) + if (!gil_held) + PyGILState_Release(gil_state); +#endif + + return ptr2; +} + +static void +tracemalloc_free(void *ctx, void *ptr) +{ + PyMemAllocator *alloc = (PyMemAllocator *)ctx; + + if (ptr == NULL) + return; + + /* GIL cannot be locked in PyMem_RawFree() because it would introduce + a deadlock in PyThreadState_DeleteCurrent(). */ + + alloc->free(alloc->ctx, ptr); + tracemalloc_log_free(ptr); +} + +static void* +tracemalloc_malloc_gil(void *ctx, size_t size) +{ + return tracemalloc_malloc(ctx, size, 1); +} + +static void* +tracemalloc_realloc_gil(void *ctx, void *ptr, size_t new_size) +{ + return tracemalloc_realloc(ctx, ptr, new_size, 1); +} + +#ifdef TRACE_RAW_MALLOC +static void* +tracemalloc_raw_malloc(void *ctx, size_t size) +{ + return tracemalloc_malloc(ctx, size, 0); +} + +static void* +tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size) +{ + return tracemalloc_realloc(ctx, ptr, new_size, 0); +} +#endif + +static int +tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data) +{ + PyObject *filename = (PyObject *)entry->key; + Py_DECREF(filename); + return 0; +} + +static int +traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data) +{ + traceback_t *traceback = (traceback_t *)entry->key; + raw_free(traceback); + return 0; +} + +/* reentrant flag must be set to call this function and GIL must be held */ +static void +tracemalloc_clear_traces(void) +{ +#ifdef WITH_THREAD + /* The GIL protects variables againt concurrent access */ + assert(PyGILState_Check()); +#endif + + /* Disable also reentrant calls to tracemalloc_malloc() to not add a new + trace while we are clearing traces */ + assert(get_reentrant()); + + TABLES_LOCK(); + _Py_hashtable_clear(tracemalloc_traces); + tracemalloc_traced_memory = 0; + tracemalloc_max_traced_memory = 0; + TABLES_UNLOCK(); + + _Py_hashtable_foreach(tracemalloc_tracebacks, traceback_free_traceback, NULL); + _Py_hashtable_clear(tracemalloc_tracebacks); + + _Py_hashtable_foreach(tracemalloc_filenames, tracemalloc_clear_filename, NULL); + _Py_hashtable_clear(tracemalloc_filenames); +} + +static int +tracemalloc_init(void) +{ + if (tracemalloc_config.initialized == TRACEMALLOC_FINALIZED) { + PyErr_SetString(PyExc_RuntimeError, + "the tracemalloc module has been unloaded"); + return -1; + } + + if (tracemalloc_config.initialized == TRACEMALLOC_INITIALIZED) + return 0; + + PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw); + +#ifdef REENTRANT_THREADLOCAL + tracemalloc_reentrant_key = PyThread_create_key(); + if (tracemalloc_reentrant_key == -1) { +#ifdef MS_WINDOWS + PyErr_SetFromWindowsErr(0); +#else + PyErr_SetFromErrno(PyExc_OSError); +#endif + return -1; + } +#endif + +#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC) + if (tables_lock == NULL) { + tables_lock = PyThread_allocate_lock(); + if (tables_lock == NULL) { + PyErr_SetString(PyExc_RuntimeError, "cannot allocate lock"); + return -1; + } + } +#endif + + tracemalloc_filenames = hashtable_new(0, + (_Py_hashtable_hash_func)PyObject_Hash, + hashtable_compare_unicode); + + tracemalloc_tracebacks = hashtable_new(0, + (_Py_hashtable_hash_func)hashtable_hash_traceback, + (_Py_hashtable_compare_func)hashtable_compare_traceback); + + tracemalloc_traces = hashtable_new(sizeof(trace_t), + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct); + + if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL + || tracemalloc_traces == NULL) + { + PyErr_NoMemory(); + return -1; + } + + unknown_filename = PyUnicode_FromString(""); + if (unknown_filename == NULL) + return -1; + PyUnicode_InternInPlace(&unknown_filename); + + tracemalloc_empty_traceback.nframe = 1; + /* borrowed reference */ + tracemalloc_empty_traceback.frames[0].filename = unknown_filename; + tracemalloc_empty_traceback.frames[0].lineno = 0; + tracemalloc_empty_traceback.hash = traceback_hash(&tracemalloc_empty_traceback); + + /* Disable tracing allocations until hooks are installed. Set + also the reentrant flag to detect bugs: fail with an assertion error + if set_reentrant(1) is called while tracing is disabled. */ + set_reentrant(1); + + tracemalloc_config.initialized = TRACEMALLOC_INITIALIZED; + return 0; +} + +static void +tracemalloc_deinit(void) +{ + if (tracemalloc_config.initialized != TRACEMALLOC_INITIALIZED) + return; + tracemalloc_config.initialized = TRACEMALLOC_FINALIZED; + + tracemalloc_stop(); + + /* destroy hash tables */ + _Py_hashtable_destroy(tracemalloc_traces); + _Py_hashtable_destroy(tracemalloc_tracebacks); + _Py_hashtable_destroy(tracemalloc_filenames); + +#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC) + if (tables_lock != NULL) { + PyThread_free_lock(tables_lock); + tables_lock = NULL; + } +#endif + +#ifdef REENTRANT_THREADLOCAL + PyThread_delete_key(tracemalloc_reentrant_key); +#endif + + Py_XDECREF(unknown_filename); +} + +static int +tracemalloc_start(void) +{ + PyMemAllocator alloc; + + if (tracemalloc_init() < 0) + return -1; + + if (tracemalloc_config.tracing) { + /* hook already installed: do nothing */ + return 0; + } + + if (tracemalloc_atexit_register() < 0) + return -1; + +#ifdef TRACE_RAW_MALLOC + alloc.malloc = tracemalloc_raw_malloc; + alloc.realloc = tracemalloc_raw_realloc; + alloc.free = tracemalloc_free; + + alloc.ctx = &allocators.raw; + PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw); + PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); +#endif + + alloc.malloc = tracemalloc_malloc_gil; + alloc.realloc = tracemalloc_realloc_gil; + alloc.free = tracemalloc_free; + + alloc.ctx = &allocators.mem; + PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem); + PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); + + alloc.ctx = &allocators.obj; + PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj); + PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc); + + /* everything is ready: start tracing Python memory allocations */ + tracemalloc_config.tracing = 1; + set_reentrant(0); + + return 0; +} + +static void +tracemalloc_stop(void) +{ + if (!tracemalloc_config.tracing) + return; + + /* stop tracing Python memory allocations */ + tracemalloc_config.tracing = 0; + + /* set the reentrant flag to detect bugs: fail with an assertion error if + set_reentrant(1) is called while tracing is disabled. */ + set_reentrant(1); + + /* unregister the hook on memory allocators */ +#ifdef TRACE_RAW_MALLOC + PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw); +#endif + PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem); + PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj); + + /* release memory */ + tracemalloc_clear_traces(); +} + + +static PyObject* +lineno_as_obj(int lineno) +{ + if (lineno >= 0) + return PyLong_FromLong(lineno); + else + Py_RETURN_NONE; +} + +PyDoc_STRVAR(tracemalloc_is_tracing_doc, + "is_tracing()->bool\n" + "\n" + "True if the tracemalloc module is tracing Python memory allocations,\n" + "False otherwise."); + +static PyObject* +py_tracemalloc_is_tracing(PyObject *self) +{ + return PyBool_FromLong(tracemalloc_config.tracing); +} + +PyDoc_STRVAR(tracemalloc_clear_traces_doc, + "clear_traces()\n" + "\n" + "Clear traces of memory blocks allocated by Python."); + +static PyObject* +py_tracemalloc_clear_traces(PyObject *self) +{ + if (!tracemalloc_config.tracing) + Py_RETURN_NONE; + + set_reentrant(1); + tracemalloc_clear_traces(); + set_reentrant(0); + + Py_RETURN_NONE; +} + +static PyObject* +frame_to_pyobject(frame_t *frame) +{ + PyObject *frame_obj, *lineno_obj; + + frame_obj = PyTuple_New(2); + if (frame_obj == NULL) + return NULL; + + if (frame->filename == NULL) + frame->filename = Py_None; + Py_INCREF(frame->filename); + PyTuple_SET_ITEM(frame_obj, 0, frame->filename); + + assert(frame->lineno >= 0); + lineno_obj = lineno_as_obj(frame->lineno); + if (lineno_obj == NULL) { + Py_DECREF(frame_obj); + return NULL; + } + PyTuple_SET_ITEM(frame_obj, 1, lineno_obj); + + return frame_obj; +} + +static PyObject* +traceback_to_pyobject(traceback_t *traceback, _Py_hashtable_t *intern_table) +{ + int i; + PyObject *frames, *frame; + + if (intern_table != NULL) { + if (_Py_HASHTABLE_GET(intern_table, traceback, frames)) { + Py_INCREF(frames); + return frames; + } + } + + frames = PyTuple_New(traceback->nframe); + if (frames == NULL) + return NULL; + + for (i=0; i < traceback->nframe; i++) { + frame = frame_to_pyobject(&traceback->frames[i]); + if (frame == NULL) { + Py_DECREF(frames); + return NULL; + } + PyTuple_SET_ITEM(frames, i, frame); + } + + if (intern_table != NULL) { + if (_Py_HASHTABLE_SET(intern_table, traceback, frames) < 0) { + Py_DECREF(frames); + PyErr_NoMemory(); + return NULL; + } + /* intern_table keeps a new reference to frames */ + Py_INCREF(frames); + } + return frames; +} + +static PyObject* +trace_to_pyobject(trace_t *trace, _Py_hashtable_t *intern_tracebacks) +{ + PyObject *trace_obj = NULL; + PyObject *size, *traceback; + + trace_obj = PyTuple_New(2); + if (trace_obj == NULL) + return NULL; + + size = PyLong_FromSize_t(trace->size); + if (size == NULL) { + Py_DECREF(trace_obj); + return NULL; + } + PyTuple_SET_ITEM(trace_obj, 0, size); + + traceback = traceback_to_pyobject(trace->traceback, intern_tracebacks); + if (traceback == NULL) { + Py_DECREF(trace_obj); + return NULL; + } + PyTuple_SET_ITEM(trace_obj, 1, traceback); + + return trace_obj; +} + +typedef struct { + _Py_hashtable_t *traces; + _Py_hashtable_t *tracebacks; + PyObject *list; +} get_traces_t; + +static int +tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data) +{ + get_traces_t *get_traces = user_data; + trace_t *trace; + PyObject *tracemalloc_obj; + int res; + + trace = (trace_t *)_PY_HASHTABLE_ENTRY_DATA(entry); + + tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks); + if (tracemalloc_obj == NULL) + return 1; + + res = PyList_Append(get_traces->list, tracemalloc_obj); + Py_DECREF(tracemalloc_obj); + if (res < 0) + return 1; + + return 0; +} + +static int +tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data) +{ + PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + Py_DECREF(obj); + return 0; +} + +PyDoc_STRVAR(tracemalloc_get_traces_doc, + "get_traces() -> list\n" + "\n" + "Get traces of all memory blocks allocated by Python.\n" + "Return a list of (size: int, traceback: tuple) tuples.\n" + "traceback is a tuple of (filename: str, lineno: int) tuples.\n" + "\n" + "Return an empty list if the tracemalloc module is disabled."); + +static PyObject* +py_tracemalloc_get_traces(PyObject *self, PyObject *obj) +{ + get_traces_t get_traces; + int err; + + get_traces.traces = NULL; + get_traces.tracebacks = NULL; + get_traces.list = PyList_New(0); + if (get_traces.list == NULL) + goto error; + + if (!tracemalloc_config.tracing) + return get_traces.list; + + get_traces.tracebacks = hashtable_new(sizeof(PyObject *), + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct); + if (get_traces.tracebacks == NULL) { + PyErr_NoMemory(); + goto error; + } + + TABLES_LOCK(); + get_traces.traces = _Py_hashtable_copy(tracemalloc_traces); + TABLES_UNLOCK(); + + if (get_traces.traces == NULL) { + PyErr_NoMemory(); + goto error; + } + + set_reentrant(1); + err = _Py_hashtable_foreach(get_traces.traces, + tracemalloc_get_traces_fill, &get_traces); + set_reentrant(0); + if (err) + goto error; + + goto finally; + +error: + Py_CLEAR(get_traces.list); + +finally: + if (get_traces.tracebacks != NULL) { + _Py_hashtable_foreach(get_traces.tracebacks, + tracemalloc_pyobject_decref_cb, NULL); + _Py_hashtable_destroy(get_traces.tracebacks); + } + if (get_traces.traces != NULL) + _Py_hashtable_destroy(get_traces.traces); + + return get_traces.list; +} + +PyDoc_STRVAR(tracemalloc_get_object_traceback_doc, + "get_object_traceback(obj)\n" + "\n" + "Get the traceback where the Python object obj was allocated.\n" + "Return a tuple of (filename: str, lineno: int) tuples.\n" + "\n" + "Return None if the tracemalloc module is disabled or did not\n" + "trace the allocation of the object."); + +static PyObject* +py_tracemalloc_get_object_traceback(PyObject *self, PyObject *obj) +{ + PyTypeObject *type; + void *ptr; + trace_t trace; + int found; + + if (!tracemalloc_config.tracing) + Py_RETURN_NONE; + + type = Py_TYPE(obj); + if (PyType_IS_GC(type)) + ptr = (void *)((char *)obj - sizeof(PyGC_Head)); + else + ptr = (void *)obj; + + TABLES_LOCK(); + found = _Py_HASHTABLE_GET(tracemalloc_traces, ptr, trace); + TABLES_UNLOCK(); + + if (!found) + Py_RETURN_NONE; + + return traceback_to_pyobject(trace.traceback, NULL); +} + +static PyObject* +tracemalloc_atexit(PyObject *self) +{ +#ifdef WITH_THREAD + assert(PyGILState_Check()); +#endif + tracemalloc_deinit(); + Py_RETURN_NONE; +} + +static PyMethodDef atexit_method = { + "_atexit", (PyCFunction)tracemalloc_atexit, METH_NOARGS, NULL}; + +static int +tracemalloc_atexit_register(void) +{ + PyObject *method = NULL, *atexit = NULL, *func = NULL; + PyObject *result; + int ret = -1; + + if (tracemalloc_config.atexit_registered) + return 0; + tracemalloc_config.atexit_registered = 1; + + /* private functions */ + method = PyCFunction_New(&atexit_method, NULL); + if (method == NULL) + goto done; + + atexit = PyImport_ImportModule("atexit"); + if (atexit == NULL) { + if (!PyErr_Warn(PyExc_ImportWarning, + "atexit module is missing: " + "cannot automatically disable tracemalloc at exit")) + { + PyErr_Clear(); + return 0; + } + goto done; + } + + func = PyObject_GetAttrString(atexit, "register"); + if (func == NULL) + goto done; + + result = PyObject_CallFunction(func, "O", method); + if (result == NULL) + goto done; + Py_DECREF(result); + + ret = 0; + +done: + Py_XDECREF(method); + Py_XDECREF(func); + Py_XDECREF(atexit); + return ret; +} + +PyDoc_STRVAR(tracemalloc_start_doc, + "start()\n" + "\n" + "Start tracing Python memory allocations."); + +static PyObject* +py_tracemalloc_start(PyObject *self) +{ + if (tracemalloc_start() < 0) + return NULL; + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(tracemalloc_stop_doc, + "stop()\n" + "\n" + "Stop tracing Python memory allocations and clear traces\n" + "of memory blocks allocated by Python."); + +static PyObject* +py_tracemalloc_stop(PyObject *self) +{ + tracemalloc_stop(); + Py_RETURN_NONE; +} + +PyDoc_STRVAR(tracemalloc_get_traceback_limit_doc, + "get_traceback_limit() -> int\n" + "\n" + "Get the maximum number of frames stored in the traceback\n" + "of a trace.\n" + "\n" + "By default, a trace of an allocated memory block only stores\n" + "the most recent frame: the limit is 1."); + +static PyObject* +py_tracemalloc_get_traceback_limit(PyObject *self) +{ + return PyLong_FromLong(tracemalloc_config.max_nframe); +} + +PyDoc_STRVAR(tracemalloc_set_traceback_limit_doc, + "set_traceback_limit(nframe: int)\n" + "\n" + "Set the maximum number of frames stored in the traceback of a trace."); + +static PyObject* +tracemalloc_set_traceback_limit(PyObject *self, PyObject *args) +{ + Py_ssize_t nframe; + + if (!PyArg_ParseTuple(args, "n:set_traceback_limit", + &nframe)) + return NULL; + + if (nframe < 1 || nframe > MAX_NFRAME) { + PyErr_Format(PyExc_ValueError, + "the number of frames must be in range [1; %i]", + MAX_NFRAME); + return NULL; + } + tracemalloc_config.max_nframe = Py_SAFE_DOWNCAST(nframe, Py_ssize_t, int); + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(tracemalloc_get_tracemalloc_memory_doc, + "get_tracemalloc_memory() -> int\n" + "\n" + "Get the memory usage in bytes of the tracemalloc module\n" + "used internally to trace memory allocations."); + +static PyObject* +tracemalloc_get_tracemalloc_memory(PyObject *self) +{ + size_t size; + PyObject *size_obj; + + size = _Py_hashtable_size(tracemalloc_tracebacks); + size += _Py_hashtable_size(tracemalloc_filenames); + + TABLES_LOCK(); + size += _Py_hashtable_size(tracemalloc_traces); + TABLES_UNLOCK(); + + size_obj = PyLong_FromSize_t(size); + return Py_BuildValue("N", size_obj); +} + +PyDoc_STRVAR(tracemalloc_get_traced_memory_doc, + "get_traced_memory() -> int\n" + "\n" + "Get the current size and maximum size of memory blocks traced\n" + "by the tracemalloc module as a tuple: (size: int, max_size: int)."); + +static PyObject* +tracemalloc_get_traced_memory(PyObject *self) +{ + Py_ssize_t size, max_size; + PyObject *size_obj, *max_size_obj; + + if (!tracemalloc_config.tracing) + return Py_BuildValue("ii", 0, 0); + + TABLES_LOCK(); + size = tracemalloc_traced_memory; + max_size = tracemalloc_max_traced_memory; + TABLES_UNLOCK(); + + size_obj = PyLong_FromSize_t(size); + max_size_obj = PyLong_FromSize_t(max_size); + return Py_BuildValue("NN", size_obj, max_size_obj); +} + +static PyMethodDef module_methods[] = { + {"is_tracing", (PyCFunction)py_tracemalloc_is_tracing, + METH_NOARGS, tracemalloc_is_tracing_doc}, + {"clear_traces", (PyCFunction)py_tracemalloc_clear_traces, + METH_NOARGS, tracemalloc_clear_traces_doc}, + {"_get_traces", (PyCFunction)py_tracemalloc_get_traces, + METH_NOARGS, tracemalloc_get_traces_doc}, + {"_get_object_traceback", (PyCFunction)py_tracemalloc_get_object_traceback, + METH_O, tracemalloc_get_object_traceback_doc}, + {"start", (PyCFunction)py_tracemalloc_start, + METH_NOARGS, tracemalloc_start_doc}, + {"stop", (PyCFunction)py_tracemalloc_stop, + METH_NOARGS, tracemalloc_stop_doc}, + {"get_traceback_limit", (PyCFunction)py_tracemalloc_get_traceback_limit, + METH_NOARGS, tracemalloc_get_traceback_limit_doc}, + {"set_traceback_limit", (PyCFunction)tracemalloc_set_traceback_limit, + METH_VARARGS, tracemalloc_set_traceback_limit_doc}, + {"get_tracemalloc_memory", (PyCFunction)tracemalloc_get_tracemalloc_memory, + METH_NOARGS, tracemalloc_get_tracemalloc_memory_doc}, + {"get_traced_memory", (PyCFunction)tracemalloc_get_traced_memory, + METH_NOARGS, tracemalloc_get_traced_memory_doc}, + + /* sentinel */ + {NULL, NULL} +}; + +PyDoc_STRVAR(module_doc, +"Debug module to trace memory blocks allocated by Python."); + +static struct PyModuleDef module_def = { + PyModuleDef_HEAD_INIT, + "_tracemalloc", + module_doc, + 0, /* non-negative size to be able to unload the module */ + module_methods, + NULL, +}; + +PyMODINIT_FUNC +PyInit__tracemalloc(void) +{ + PyObject *m; + m = PyModule_Create(&module_def); + if (m == NULL) + return NULL; + + if (tracemalloc_init() < 0) + return NULL; + + return m; +} + +static int +parse_sys_xoptions(PyObject *value) +{ + PyObject *valuelong; + long nframe; + + if (value == Py_True) + return 1; + + assert(PyUnicode_Check(value)); + if (PyUnicode_GetLength(value) == 0) + return -1; + + valuelong = PyLong_FromUnicodeObject(value, 10); + if (valuelong == NULL) + return -1; + + nframe = PyLong_AsLong(valuelong); + Py_DECREF(valuelong); + if (nframe == -1 && PyErr_Occurred()) + return -1; + + if (nframe < 1 || nframe > MAX_NFRAME) + return -1; + + return Py_SAFE_DOWNCAST(nframe, long, int); +} + +int +_PyTraceMalloc_Init(void) +{ + char *p; + int nframe; + +#ifdef WITH_THREAD + assert(PyGILState_Check()); +#endif + + if ((p = Py_GETENV("PYTHONTRACEMALLOC")) && *p != '\0') { + char *endptr = p; + unsigned long value; + + value = strtoul(p, &endptr, 10); + if (*endptr != '\0' + || value < 1 + || value > MAX_NFRAME + || (errno == ERANGE && value == ULONG_MAX)) + { + Py_FatalError("PYTHONTRACEMALLOC must be an integer " + "in range [1; " STR(MAX_NFRAME) "]"); + return -1; + } + + nframe = (int)value; + } + else { + PyObject *xoptions, *key, *value; + + xoptions = PySys_GetXOptions(); + if (xoptions == NULL) + return -1; + + key = PyUnicode_FromString("tracemalloc"); + if (key == NULL) + return -1; + + value = PyDict_GetItemWithError(xoptions, key); + Py_DECREF(key); + if (value == NULL) { + if (PyErr_Occurred()) + return -1; + + /* -X tracemalloc is not used */ + return 0; + } + + nframe = parse_sys_xoptions(value); + Py_DECREF(value); + if (nframe < 0) { + Py_FatalError("-X tracemalloc=NFRAME: number of frame must be " + "an integer in range [1; " STR(MAX_NFRAME) "]"); + } + } + + tracemalloc_config.max_nframe = nframe; + return tracemalloc_start(); +} + diff --git a/Modules/hashtable.c b/Modules/hashtable.c new file mode 100644 index 00000000000..221ed53b9f6 --- /dev/null +++ b/Modules/hashtable.c @@ -0,0 +1,518 @@ +/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash + project: + http://sourceforge.net/projects/libcfu/ + + Copyright of cfuhash: + ---------------------------------- + Creation date: 2005-06-24 21:22:40 + Authors: Don + Change log: + + Copyright (c) 2005 Don Owens + All rights reserved. + + This code is released under the BSD license: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + ---------------------------------- +*/ + +#include "Python.h" +#include "hashtable.h" + +#define HASHTABLE_MIN_SIZE 16 +#define HASHTABLE_HIGH 0.50 +#define HASHTABLE_LOW 0.10 +#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) + +#define BUCKETS_HEAD(SLIST) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) +#define TABLE_HEAD(HT, BUCKET) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) +#define ENTRY_NEXT(ENTRY) \ + ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) +#define HASHTABLE_ITEM_SIZE(HT) \ + (sizeof(_Py_hashtable_entry_t) + (HT)->data_size) + +/* Forward declaration */ +static void hashtable_rehash(_Py_hashtable_t *ht); + +static void +_Py_slist_init(_Py_slist_t *list) +{ + list->head = NULL; +} + +static void +_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) +{ + item->next = list->head; + list->head = item; +} + +static void +_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, + _Py_slist_item_t *item) +{ + if (previous != NULL) + previous->next = item->next; + else + list->head = item->next; +} + +Py_uhash_t +_Py_hashtable_hash_int(const void *key) +{ + return (Py_uhash_t)key; +} + +Py_uhash_t +_Py_hashtable_hash_ptr(const void *key) +{ + return (Py_uhash_t)_Py_HashPointer((void *)key); +} + +int +_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry) +{ + return entry->key == key; +} + +/* makes sure the real size of the buckets array is a power of 2 */ +static size_t +round_size(size_t s) +{ + size_t i; + if (s < HASHTABLE_MIN_SIZE) + return HASHTABLE_MIN_SIZE; + i = 1; + while (i < s) + i <<= 1; + return i; +} + +_Py_hashtable_t * +_Py_hashtable_new_full(size_t data_size, size_t init_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func, + _Py_hashtable_copy_data_func copy_data_func, + _Py_hashtable_free_data_func free_data_func, + _Py_hashtable_get_data_size_func get_data_size_func, + _Py_hashtable_allocator_t *allocator) +{ + _Py_hashtable_t *ht; + size_t buckets_size; + _Py_hashtable_allocator_t alloc; + + if (allocator == NULL) { + alloc.malloc = PyMem_RawMalloc; + alloc.free = PyMem_RawFree; + } + else + alloc = *allocator; + + ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); + if (ht == NULL) + return ht; + + ht->num_buckets = round_size(init_size); + ht->entries = 0; + ht->data_size = data_size; + + buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); + ht->buckets = alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + alloc.free(ht); + return NULL; + } + memset(ht->buckets, 0, buckets_size); + + ht->hash_func = hash_func; + ht->compare_func = compare_func; + ht->copy_data_func = copy_data_func; + ht->free_data_func = free_data_func; + ht->get_data_size_func = get_data_size_func; + ht->alloc = alloc; + return ht; +} + +_Py_hashtable_t * +_Py_hashtable_new(size_t data_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func) +{ + return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, + hash_func, compare_func, + NULL, NULL, NULL, NULL); +} + +size_t +_Py_hashtable_size(_Py_hashtable_t *ht) +{ + size_t size; + size_t hv; + + size = sizeof(_Py_hashtable_t); + + /* buckets */ + size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *); + + /* entries */ + size += ht->entries * HASHTABLE_ITEM_SIZE(ht); + + /* data linked from entries */ + if (ht->get_data_size_func) { + for (hv = 0; hv < ht->num_buckets; hv++) { + _Py_hashtable_entry_t *entry; + + for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + void *data; + + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + size += ht->get_data_size_func(data); + } + } + } + return size; +} + +#ifdef Py_DEBUG +void +_Py_hashtable_print_stats(_Py_hashtable_t *ht) +{ + size_t size; + size_t chain_len, max_chain_len, total_chain_len, nchains; + _Py_hashtable_entry_t *entry; + size_t hv; + double load; + + size = _Py_hashtable_size(ht); + + load = (double)ht->entries / ht->num_buckets; + + max_chain_len = 0; + total_chain_len = 0; + nchains = 0; + for (hv = 0; hv < ht->num_buckets; hv++) { + entry = TABLE_HEAD(ht, hv); + if (entry != NULL) { + chain_len = 0; + for (; entry; entry = ENTRY_NEXT(entry)) { + chain_len++; + } + if (chain_len > max_chain_len) + max_chain_len = chain_len; + total_chain_len += chain_len; + nchains++; + } + } + printf("hash table %p: entries=%zu/%zu (%.0f%%), ", + ht, ht->entries, ht->num_buckets, load * 100.0); + if (nchains) + printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); + printf("max_chain_len=%zu, %zu kB\n", + max_chain_len, size / 1024); +} +#endif + +/* Get an entry. Return NULL if the key does not exist. */ +_Py_hashtable_entry_t * +_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry; + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { + if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + break; + } + + return entry; +} + +static int +_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry, *previous; + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + previous = NULL; + for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { + if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + break; + previous = entry; + } + + if (entry == NULL) + return 0; + + _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, + (_Py_slist_item_t *)entry); + ht->entries--; + + if (data != NULL) + _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + ht->alloc.free(entry); + + if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) + hashtable_rehash(ht); + return 1; +} + +/* Add a new entry to the hash. The key must not be present in the hash table. + Return 0 on success, -1 on memory error. */ +int +_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, + void *data, size_t data_size) +{ + Py_uhash_t key_hash; + size_t index; + _Py_hashtable_entry_t *entry; + + assert(data != NULL || data_size == 0); +#ifndef NDEBUG + /* Don't write the assertion on a single line because it is interesting + to know the duplicated entry if the assertion failed. The entry can + be read using a debugger. */ + entry = _Py_hashtable_get_entry(ht, key); + assert(entry == NULL); +#endif + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); + if (entry == NULL) { + /* memory allocation failed */ + return -1; + } + + entry->key = (void *)key; + entry->key_hash = key_hash; + + assert(data_size == ht->data_size); + memcpy(_PY_HASHTABLE_ENTRY_DATA(entry), data, data_size); + + _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); + ht->entries++; + + if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) + hashtable_rehash(ht); + return 0; +} + +/* Get data from an entry. Copy entry data into data and return 1 if the entry + exists, return 0 if the entry does not exist. */ +int +_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + _Py_hashtable_entry_t *entry; + + assert(data != NULL); + + entry = _Py_hashtable_get_entry(ht, key); + if (entry == NULL) + return 0; + _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + return 1; +} + +int +_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + assert(data != NULL); + assert(ht->free_data_func == NULL); + return _hashtable_pop_entry(ht, key, data, data_size); +} + +/* Delete an entry. The entry must exist. */ +void +_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) +{ +#ifndef NDEBUG + int found = _hashtable_pop_entry(ht, key, NULL, 0); + assert(found); +#else + (void)_hashtable_pop_entry(ht, key, NULL, 0); +#endif +} + +/* Prototype for a pointer to a function to be called foreach + key/value pair in the hash by hashtable_foreach(). Iteration + stops if a non-zero value is returned. */ +int +_Py_hashtable_foreach(_Py_hashtable_t *ht, + int (*func) (_Py_hashtable_entry_t *entry, void *arg), + void *arg) +{ + _Py_hashtable_entry_t *entry; + size_t hv; + + for (hv = 0; hv < ht->num_buckets; hv++) { + for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + int res = func(entry, arg); + if (res) + return res; + } + } + return 0; +} + +static void +hashtable_rehash(_Py_hashtable_t *ht) +{ + size_t buckets_size, new_size, bucket; + _Py_slist_t *old_buckets = NULL; + size_t old_num_buckets; + + new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); + if (new_size == ht->num_buckets) + return; + + old_num_buckets = ht->num_buckets; + + buckets_size = new_size * sizeof(ht->buckets[0]); + old_buckets = ht->buckets; + ht->buckets = ht->alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + /* cancel rehash on memory allocation failure */ + ht->buckets = old_buckets ; + /* memory allocation failed */ + return; + } + memset(ht->buckets, 0, buckets_size); + + ht->num_buckets = new_size; + + for (bucket = 0; bucket < old_num_buckets; bucket++) { + _Py_hashtable_entry_t *entry, *next; + for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { + size_t entry_index; + + assert(ht->hash_func(entry->key) == entry->key_hash); + next = ENTRY_NEXT(entry); + entry_index = entry->key_hash & (new_size - 1); + + _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); + } + } + + ht->alloc.free(old_buckets); +} + +void +_Py_hashtable_clear(_Py_hashtable_t *ht) +{ + _Py_hashtable_entry_t *entry, *next; + size_t i; + + for (i=0; i < ht->num_buckets; i++) { + for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { + next = ENTRY_NEXT(entry); + if (ht->free_data_func) + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->alloc.free(entry); + } + _Py_slist_init(&ht->buckets[i]); + } + ht->entries = 0; + hashtable_rehash(ht); +} + +void +_Py_hashtable_destroy(_Py_hashtable_t *ht) +{ + size_t i; + + for (i = 0; i < ht->num_buckets; i++) { + _Py_slist_item_t *entry = ht->buckets[i].head; + while (entry) { + _Py_slist_item_t *entry_next = entry->next; + if (ht->free_data_func) + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->alloc.free(entry); + entry = entry_next; + } + } + + ht->alloc.free(ht->buckets); + ht->alloc.free(ht); +} + +/* Return a copy of the hash table */ +_Py_hashtable_t * +_Py_hashtable_copy(_Py_hashtable_t *src) +{ + _Py_hashtable_t *dst; + _Py_hashtable_entry_t *entry; + size_t bucket; + int err; + void *data, *new_data; + + dst = _Py_hashtable_new_full(src->data_size, src->num_buckets, + src->hash_func, src->compare_func, + src->copy_data_func, src->free_data_func, + src->get_data_size_func, &src->alloc); + if (dst == NULL) + return NULL; + + for (bucket=0; bucket < src->num_buckets; bucket++) { + entry = TABLE_HEAD(src, bucket); + for (; entry; entry = ENTRY_NEXT(entry)) { + if (src->copy_data_func) { + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + new_data = src->copy_data_func(data); + if (new_data != NULL) + err = _Py_hashtable_set(dst, entry->key, + &new_data, src->data_size); + else + err = 1; + } + else { + data = _PY_HASHTABLE_ENTRY_DATA(entry); + err = _Py_hashtable_set(dst, entry->key, data, src->data_size); + } + if (err) { + _Py_hashtable_destroy(dst); + return NULL; + } + } + } + return dst; +} + diff --git a/Modules/hashtable.h b/Modules/hashtable.h new file mode 100644 index 00000000000..539e490c319 --- /dev/null +++ b/Modules/hashtable.h @@ -0,0 +1,128 @@ +#ifndef Py_HASHTABLE_H +#define Py_HASHTABLE_H + +/* The whole API is private */ +#ifndef Py_LIMITED_API + +typedef struct _Py_slist_item_s { + struct _Py_slist_item_s *next; +} _Py_slist_item_t; + +typedef struct { + _Py_slist_item_t *head; +} _Py_slist_t; + +#define _Py_SLIST_ITEM_NEXT(ITEM) (((_Py_slist_item_t *)ITEM)->next) + +#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head) + +typedef struct { + /* used by _Py_hashtable_t.buckets to link entries */ + _Py_slist_item_t _Py_slist_item; + + const void *key; + Py_uhash_t key_hash; + + /* data follows */ +} _Py_hashtable_entry_t; + +#define _PY_HASHTABLE_ENTRY_DATA(ENTRY) \ + ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)) + +#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \ + (*(void **)_PY_HASHTABLE_ENTRY_DATA(ENTRY)) + +#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \ + do { \ + assert((DATA_SIZE) == (TABLE)->data_size); \ + memcpy(DATA, _PY_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \ + } while (0) + +typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key); +typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he); +typedef void* (*_Py_hashtable_copy_data_func)(void *data); +typedef void (*_Py_hashtable_free_data_func)(void *data); +typedef size_t (*_Py_hashtable_get_data_size_func)(void *data); + +typedef struct { + /* allocate a memory block */ + void* (*malloc) (size_t size); + + /* release a memory block */ + void (*free) (void *ptr); +} _Py_hashtable_allocator_t; + +typedef struct { + size_t num_buckets; + size_t entries; /* Total number of entries in the table. */ + _Py_slist_t *buckets; + size_t data_size; + + _Py_hashtable_hash_func hash_func; + _Py_hashtable_compare_func compare_func; + _Py_hashtable_copy_data_func copy_data_func; + _Py_hashtable_free_data_func free_data_func; + _Py_hashtable_get_data_size_func get_data_size_func; + _Py_hashtable_allocator_t alloc; +} _Py_hashtable_t; + +/* hash and compare functions for integers and pointers */ +PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key); +PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key); +PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry); + +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new( + size_t data_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func); +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full( + size_t data_size, + size_t init_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func, + _Py_hashtable_copy_data_func copy_data_func, + _Py_hashtable_free_data_func free_data_func, + _Py_hashtable_get_data_size_func get_data_size_func, + _Py_hashtable_allocator_t *allocator); +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src); +PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht); +PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht); + +typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg); + +PyAPI_FUNC(int) _Py_hashtable_foreach( + _Py_hashtable_t *ht, + _Py_hashtable_foreach_func func, void *arg); +PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht); + +PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry( + _Py_hashtable_t *ht, + const void *key); +PyAPI_FUNC(int) _Py_hashtable_set( + _Py_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(int) _Py_hashtable_get( + _Py_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(int) _Py_hashtable_pop( + _Py_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(void) _Py_hashtable_delete( + _Py_hashtable_t *ht, + const void *key); + +#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \ + _Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA)) + +#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \ + _Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA)) + +#endif /* Py_LIMITED_API */ + +#endif diff --git a/PC/config.c b/PC/config.c index 42a07c96945..d8db109f115 100644 --- a/PC/config.c +++ b/PC/config.c @@ -13,6 +13,7 @@ extern PyObject* PyInit_binascii(void); extern PyObject* PyInit_cmath(void); extern PyObject* PyInit_errno(void); extern PyObject* PyInit_faulthandler(void); +extern PyObject* PyInit__tracemalloc(void); extern PyObject* PyInit_gc(void); extern PyObject* PyInit_math(void); extern PyObject* PyInit__md5(void); @@ -102,6 +103,7 @@ struct _inittab _PyImport_Inittab[] = { {"msvcrt", PyInit_msvcrt}, {"_locale", PyInit__locale}, #endif + {"_tracemalloc", PyInit__tracemalloc}, /* XXX Should _winapi go in a WIN32 block? not WIN64? */ {"_winapi", PyInit__winapi}, diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index c2e1eb3cfb3..27c2807a955 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -449,6 +449,7 @@ + @@ -517,6 +518,7 @@ + @@ -532,6 +534,7 @@ + @@ -684,4 +687,4 @@ - \ No newline at end of file + diff --git a/Python/pythonrun.c b/Python/pythonrun.c index b5d57dfcbf9..3adbbd73736 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -105,6 +105,7 @@ extern void PyLong_Fini(void); extern int _PyFaulthandler_Init(void); extern void _PyFaulthandler_Fini(void); extern void _PyHash_Fini(void); +extern int _PyTraceMalloc_Init(void); #ifdef WITH_THREAD extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); @@ -454,6 +455,9 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib) if (install_sigs) initsigs(); /* Signal handling stuff, including initintr() */ + if (_PyTraceMalloc_Init() < 0) + Py_FatalError("Py_Initialize: can't initialize tracemalloc"); + initmain(interp); /* Module __main__ */ if (initstdio() < 0) Py_FatalError(