bpo-37958: Adding get_profile_dict to pstats (GH-15495)

pstats is really useful or profiling and printing the output of the execution of some block of code, but I've found on multiple occasions when I'd like to access this output directly in an easily usable dictionary on which I can further analyze or manipulate.

The proposal is to add a function called get_profile_dict inside of pstats that'll automatically return this data the data in an easily accessible dict.

The output of the following script:

```
import cProfile, pstats
import pprint
from pstats import func_std_string, f8

def fib(n):
    if n == 0:
        return 0
    if n == 1:
        return 1
    return fib(n-1) + fib(n-2)

pr = cProfile.Profile()
pr.enable()
fib(5)
pr.create_stats()

ps = pstats.Stats(pr).sort_stats('tottime', 'cumtime')

def get_profile_dict(self, keys_filter=None):
    """
        Returns a dict where the key is a function name and the value is a dict
        with the following keys:
            - ncalls
            - tottime
            - percall_tottime
            - cumtime
            - percall_cumtime
            - file_name
            - line_number

        keys_filter can be optionally set to limit the key-value pairs in the
        retrieved dict.
    """
    pstats_dict = {}
    func_list = self.fcn_list[:] if self.fcn_list else list(self.stats.keys())

    if not func_list:
        return pstats_dict

    pstats_dict["total_tt"] = float(f8(self.total_tt))
    for func in func_list:
        cc, nc, tt, ct, callers = self.stats[func]
        file, line, func_name = func
        ncalls = str(nc) if nc == cc else (str(nc) + '/' + str(cc))
        tottime = float(f8(tt))
        percall_tottime = -1 if nc == 0 else float(f8(tt/nc))
        cumtime = float(f8(ct))
        percall_cumtime = -1 if cc == 0 else float(f8(ct/cc))
        func_dict = {
            "ncalls": ncalls,
            "tottime": tottime, # time spent in this function alone
            "percall_tottime": percall_tottime,
            "cumtime": cumtime, # time spent in the function plus all functions that this function called,
            "percall_cumtime": percall_cumtime,
            "file_name": file,
            "line_number": line
        }
        func_dict_filtered = func_dict if not keys_filter else { key: func_dict[key] for key in keys_filter }
        pstats_dict[func_name] = func_dict_filtered

    return pstats_dict

pp = pprint.PrettyPrinter(depth=6)
pp.pprint(get_profile_dict(ps))
```

will produce:

```
{"<method 'disable' of '_lsprof.Profiler' objects>": {'cumtime': 0.0,
                                                      'file_name': '~',
                                                      'line_number': 0,
                                                      'ncalls': '1',
                                                      'percall_cumtime': 0.0,
                                                      'percall_tottime': 0.0,
                                                      'tottime': 0.0},
 'create_stats': {'cumtime': 0.0,
                  'file_name': '/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/cProfile.py',
                  'line_number': 50,
                  'ncalls': '1',
                  'percall_cumtime': 0.0,
                  'percall_tottime': 0.0,
                  'tottime': 0.0},
 'fib': {'cumtime': 0.0,
         'file_name': 'get_profile_dict.py',
         'line_number': 5,
         'ncalls': '15/1',
         'percall_cumtime': 0.0,
         'percall_tottime': 0.0,
         'tottime': 0.0},
 'total_tt': 0.0}
 ```

 As an example, this can be used to generate a stacked column chart using various visualization tools which will assist in easily identifying program bottlenecks.



https://bugs.python.org/issue37958



Automerge-Triggered-By: @gpshead
This commit is contained in:
Daniel Olshansky 2020-01-15 17:51:54 -05:00 committed by Miss Islington (bot)
parent dc0284ee8f
commit 01602ae403
4 changed files with 90 additions and 4 deletions

View File

@ -525,6 +525,17 @@ Analysis of the profiler data is done using the :class:`~pstats.Stats` class.
ordering are identical to the :meth:`~pstats.Stats.print_callers` method.
.. method:: get_stats_profile()
This method returns an instance of StatsProfile, which contains a mapping
of function names to instances of FunctionProfile. Each FunctionProfile
instance holds information related to the function's profile such as how
long the function took to run, how many times it was called, etc...
.. versionadded:: 3.9
Added the following dataclasses: StatsProfile, FunctionProfile.
Added the following function: get_stats_profile.
.. _deterministic-profiling:
What Is Deterministic Profiling?

View File

@ -25,11 +25,13 @@ import os
import time
import marshal
import re
from enum import Enum
from functools import cmp_to_key
from dataclasses import dataclass
from typing import Dict
__all__ = ["Stats", "SortKey"]
__all__ = ["Stats", "SortKey", "FunctionProfile", "StatsProfile"]
class SortKey(str, Enum):
CALLS = 'calls', 'ncalls'
@ -52,6 +54,22 @@ class SortKey(str, Enum):
return obj
@dataclass(unsafe_hash=True)
class FunctionProfile:
ncalls: int
tottime: float
percall_tottime: float
cumtime: float
percall_cumtime: float
file_name: str
line_number: int
@dataclass(unsafe_hash=True)
class StatsProfile:
'''Class for keeping track of an item in inventory.'''
total_tt: float
func_profiles: Dict[str, FunctionProfile]
class Stats:
"""This class is used for creating reports from data generated by the
Profile class. It is a "friend" of that class, and imports data either
@ -333,6 +351,41 @@ class Stats:
return new_list, msg
def get_stats_profile(self):
"""This method returns an instance of StatsProfile, which contains a mapping
of function names to instances of FunctionProfile. Each FunctionProfile
instance holds information related to the function's profile such as how
long the function took to run, how many times it was called, etc...
"""
func_list = self.fcn_list[:] if self.fcn_list else list(self.stats.keys())
if not func_list:
return StatsProfile(0, {})
total_tt = float(f8(self.total_tt))
func_profiles = {}
stats_profile = StatsProfile(total_tt, func_profiles)
for func in func_list:
cc, nc, tt, ct, callers = self.stats[func]
file_name, line_number, func_name = func
ncalls = str(nc) if nc == cc else (str(nc) + '/' + str(cc))
tottime = float(f8(tt))
percall_tottime = -1 if nc == 0 else float(f8(tt/nc))
cumtime = float(f8(ct))
percall_cumtime = -1 if cc == 0 else float(f8(ct/cc))
func_profile = FunctionProfile(
ncalls,
tottime, # time spent in this function alone
percall_tottime,
cumtime, # time spent in the function plus all functions that this function called,
percall_cumtime,
file_name,
line_number
)
func_profiles[func_name] = func_profile
return stats_profile
def get_print_list(self, sel_list):
width = self.max_name_len
if self.fcn_list:

View File

@ -1,10 +1,12 @@
import unittest
from test import support
from io import StringIO
import pstats
from pstats import SortKey
import pstats
import time
import cProfile
class AddCallersTestCase(unittest.TestCase):
"""Tests for pstats.add_callers helper."""
@ -75,6 +77,24 @@ class StatsTestCase(unittest.TestCase):
SortKey.TIME,
'calls')
def test_get_stats_profile(self):
def pass1(): pass
def pass2(): pass
def pass3(): pass
pr = cProfile.Profile()
pr.enable()
pass1()
pass2()
pass3()
pr.create_stats()
ps = pstats.Stats(pr)
stats_profile = ps.get_stats_profile()
funcs_called = set(stats_profile.func_profiles.keys())
self.assertIn('pass1', funcs_called)
self.assertIn('pass2', funcs_called)
self.assertIn('pass3', funcs_called)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,2 @@
Added the pstats.Stats.get_profile_dict() method to return the profile
data as a StatsProfile instance.