From f1a8df0ac984162795815aae6696155fcd22fdfb Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Fri, 12 Sep 2014 10:39:48 -0400 Subject: [PATCH] Issue #16104: Allow compileall to do parallel bytecode compilation. Both compileall.compile_dir() and the CLI for compileall now allow for specifying how many workers to use (or 0 to use all CPUs). Thanks to Claudiu Popa for the patch. --- Doc/library/compileall.rst | 19 ++++++++- Doc/whatsnew/3.5.rst | 7 +++ Lib/compileall.py | 85 +++++++++++++++++++++++++------------ Lib/test/test_compileall.py | 57 +++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 28 deletions(-) diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst index 104f33a9736..b11d175459e 100644 --- a/Doc/library/compileall.rst +++ b/Doc/library/compileall.rst @@ -73,12 +73,18 @@ compile Python sources. :program:`python -m compileall -r 0` is equivalent to :program:`python -m compileall -l`. +.. cmdoption:: -j N + + Use *N* workers to compile the files within the given directory. + If ``0`` is used, then the result of :func:`os.cpu_count()` + will be used. .. versionchanged:: 3.2 Added the ``-i``, ``-b`` and ``-h`` options. .. versionchanged:: 3.5 - Added the ``-r`` option. + Added the ``-j`` and ``-r`` options. + There is no command-line option to control the optimization level used by the :func:`compile` function, because the Python interpreter itself already @@ -87,7 +93,7 @@ provides the option: :program:`python -O -m compileall`. Public functions ---------------- -.. function:: compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1) +.. function:: compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1, workers=1) Recursively descend the directory tree named by *dir*, compiling all :file:`.py` files along the way. @@ -120,9 +126,18 @@ Public functions *optimize* specifies the optimization level for the compiler. It is passed to the built-in :func:`compile` function. + The argument *workers* specifies how many workers are used to + compile files in parallel. The default is to not use multiple workers. + If the platform can't use multiple workers and *workers* argument is given, + then a :exc:`NotImplementedError` will be raised. + If *workers* is lower than ``0``, a :exc:`ValueError` will be raised. + .. versionchanged:: 3.2 Added the *legacy* and *optimize* parameter. + .. versionchanged:: 3.5 + Added the *workers* parameter. + .. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1) diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index d72e7f3f6bb..2e183a9a8a8 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -134,6 +134,13 @@ New Modules Improved Modules ================ +compileall +---------- + +* :func:`compileall.compile_dir` and :mod:`compileall`'s command-line interface + can now do parallel bytecode compilation. + (Contributed by Claudiu Popa in :issue:`16104`). + doctest ------- diff --git a/Lib/compileall.py b/Lib/compileall.py index 513d899e342..f1c9d27ad6f 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -16,10 +16,40 @@ import importlib.util import py_compile import struct +try: + from concurrent.futures import ProcessPoolExecutor +except ImportError: + ProcessPoolExecutor = None +from functools import partial + __all__ = ["compile_dir","compile_file","compile_path"] +def _walk_dir(dir, ddir=None, maxlevels=10, quiet=False): + if not quiet: + print('Listing {!r}...'.format(dir)) + try: + names = os.listdir(dir) + except OSError: + print("Can't list {!r}".format(dir)) + names = [] + names.sort() + for name in names: + if name == '__pycache__': + continue + fullname = os.path.join(dir, name) + if ddir is not None: + dfile = os.path.join(ddir, name) + else: + dfile = None + if not os.path.isdir(fullname): + yield fullname + elif (maxlevels > 0 and name != os.curdir and name != os.pardir and + os.path.isdir(fullname) and not os.path.islink(fullname)): + yield from _walk_dir(fullname, ddir=dfile, + maxlevels=maxlevels - 1, quiet=quiet) + def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, - quiet=False, legacy=False, optimize=-1): + quiet=False, legacy=False, optimize=-1, workers=1): """Byte-compile all modules in the given directory tree. Arguments (only dir is required): @@ -32,33 +62,31 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet: if True, be quiet during compilation legacy: if True, produce legacy pyc paths instead of PEP 3147 paths optimize: optimization level or -1 for level of the interpreter + workers: maximum number of parallel workers """ - if not quiet: - print('Listing {!r}...'.format(dir)) - try: - names = os.listdir(dir) - except OSError: - print("Can't list {!r}".format(dir)) - names = [] - names.sort() + files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels, + ddir=ddir) success = 1 - for name in names: - if name == '__pycache__': - continue - fullname = os.path.join(dir, name) - if ddir is not None: - dfile = os.path.join(ddir, name) - else: - dfile = None - if not os.path.isdir(fullname): - if not compile_file(fullname, ddir, force, rx, quiet, + if workers is not None and workers != 1: + if workers < 0: + raise ValueError('workers must be greater or equal to 0') + if ProcessPoolExecutor is None: + raise NotImplementedError('multiprocessing support not available') + + workers = workers or None + with ProcessPoolExecutor(max_workers=workers) as executor: + results = executor.map(partial(compile_file, + ddir=ddir, force=force, + rx=rx, quiet=quiet, + legacy=legacy, + optimize=optimize), + files) + success = min(results, default=1) + else: + for file in files: + if not compile_file(file, ddir, force, rx, quiet, legacy, optimize): success = 0 - elif (maxlevels > 0 and name != os.curdir and name != os.pardir and - os.path.isdir(fullname) and not os.path.islink(fullname)): - if not compile_dir(fullname, maxlevels - 1, dfile, force, rx, - quiet, legacy, optimize): - success = 0 return success def compile_file(fullname, ddir=None, force=False, rx=None, quiet=False, @@ -196,8 +224,10 @@ def main(): help=('zero or more file and directory names ' 'to compile; if no arguments given, defaults ' 'to the equivalent of -l sys.path')) - args = parser.parse_args() + parser.add_argument('-j', '--workers', default=1, + type=int, help='Run compileall concurrently') + args = parser.parse_args() compile_dests = args.compile_dest if (args.ddir and (len(compile_dests) != 1 @@ -223,6 +253,9 @@ def main(): print("Error reading file list {}".format(args.flist)) return False + if args.workers is not None: + args.workers = args.workers or None + success = True try: if compile_dests: @@ -234,7 +267,7 @@ def main(): else: if not compile_dir(dest, maxlevels, args.ddir, args.force, args.rx, args.quiet, - args.legacy): + args.legacy, workers=args.workers): success = False return success else: diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index ba1765e6f87..877d6f27f56 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -10,6 +10,13 @@ import time import unittest import io +from unittest import mock, skipUnless +try: + from concurrent.futures import ProcessPoolExecutor + _have_multiprocessing = True +except ImportError: + _have_multiprocessing = False + from test import support, script_helper class CompileallTests(unittest.TestCase): @@ -106,6 +113,33 @@ class CompileallTests(unittest.TestCase): debug_override=not optimize) self.assertTrue(os.path.isfile(cached3)) + @mock.patch('compileall.ProcessPoolExecutor') + def test_compile_pool_called(self, pool_mock): + compileall.compile_dir(self.directory, quiet=True, workers=5) + self.assertTrue(pool_mock.called) + + def test_compile_workers_non_positive(self): + with self.assertRaisesRegex(ValueError, + "workers must be greater or equal to 0"): + compileall.compile_dir(self.directory, workers=-1) + + @mock.patch('compileall.ProcessPoolExecutor') + def test_compile_workers_cpu_count(self, pool_mock): + compileall.compile_dir(self.directory, quiet=True, workers=0) + self.assertEqual(pool_mock.call_args[1]['max_workers'], None) + + @mock.patch('compileall.ProcessPoolExecutor') + @mock.patch('compileall.compile_file') + def test_compile_one_worker(self, compile_file_mock, pool_mock): + compileall.compile_dir(self.directory, quiet=True) + self.assertFalse(pool_mock.called) + self.assertTrue(compile_file_mock.called) + + @mock.patch('compileall.ProcessPoolExecutor', new=None) + def test_compile_missing_multiprocessing(self): + with self.assertRaisesRegex(NotImplementedError, + "multiprocessing support not available"): + compileall.compile_dir(self.directory, quiet=True, workers=5) class EncodingTest(unittest.TestCase): """Issue 6716: compileall should escape source code when printing errors @@ -413,6 +447,29 @@ class CommandLineTests(unittest.TestCase): out = self.assertRunOK('badfilename') self.assertRegex(out, b"Can't list 'badfilename'") + @skipUnless(_have_multiprocessing, "requires multiprocessing") + def test_workers(self): + bar2fn = script_helper.make_script(self.directory, 'bar2', '') + files = [] + for suffix in range(5): + pkgdir = os.path.join(self.directory, 'foo{}'.format(suffix)) + os.mkdir(pkgdir) + fn = script_helper.make_script(pkgdir, '__init__', '') + files.append(script_helper.make_script(pkgdir, 'bar2', '')) + + self.assertRunOK(self.directory, '-j', '0') + self.assertCompiled(bar2fn) + for file in files: + self.assertCompiled(file) + + @mock.patch('compileall.compile_dir') + def test_workers_available_cores(self, compile_dir): + with mock.patch("sys.argv", + new=[sys.executable, self.directory, "-j0"]): + compileall.main() + self.assertTrue(compile_dir.called) + self.assertEqual(compile_dir.call_args[-1]['workers'], None) + if __name__ == "__main__": unittest.main()