Issue #16104: Allow compileall to do parallel bytecode compilation.

Both compileall.compile_dir() and the CLI for compileall now allow for
specifying how many workers to use (or 0 to use all CPUs).

Thanks to Claudiu Popa for the patch.
This commit is contained in:
Brett Cannon 2014-09-12 10:39:48 -04:00
parent a56411e5d4
commit f1a8df0ac9
4 changed files with 140 additions and 28 deletions

View File

@ -73,12 +73,18 @@ compile Python sources.
:program:`python -m compileall <directory> -r 0` is equivalent to
:program:`python -m compileall <directory> -l`.
.. cmdoption:: -j N
Use *N* workers to compile the files within the given directory.
If ``0`` is used, then the result of :func:`os.cpu_count()`
will be used.
.. versionchanged:: 3.2
Added the ``-i``, ``-b`` and ``-h`` options.
.. versionchanged:: 3.5
Added the ``-r`` option.
Added the ``-j`` and ``-r`` options.
There is no command-line option to control the optimization level used by the
:func:`compile` function, because the Python interpreter itself already
@ -87,7 +93,7 @@ provides the option: :program:`python -O -m compileall`.
Public functions
----------------
.. function:: compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1)
.. function:: compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1, workers=1)
Recursively descend the directory tree named by *dir*, compiling all :file:`.py`
files along the way.
@ -120,9 +126,18 @@ Public functions
*optimize* specifies the optimization level for the compiler. It is passed to
the built-in :func:`compile` function.
The argument *workers* specifies how many workers are used to
compile files in parallel. The default is to not use multiple workers.
If the platform can't use multiple workers and *workers* argument is given,
then a :exc:`NotImplementedError` will be raised.
If *workers* is lower than ``0``, a :exc:`ValueError` will be raised.
.. versionchanged:: 3.2
Added the *legacy* and *optimize* parameter.
.. versionchanged:: 3.5
Added the *workers* parameter.
.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1)

View File

@ -134,6 +134,13 @@ New Modules
Improved Modules
================
compileall
----------
* :func:`compileall.compile_dir` and :mod:`compileall`'s command-line interface
can now do parallel bytecode compilation.
(Contributed by Claudiu Popa in :issue:`16104`).
doctest
-------

View File

@ -16,10 +16,40 @@ import importlib.util
import py_compile
import struct
try:
from concurrent.futures import ProcessPoolExecutor
except ImportError:
ProcessPoolExecutor = None
from functools import partial
__all__ = ["compile_dir","compile_file","compile_path"]
def _walk_dir(dir, ddir=None, maxlevels=10, quiet=False):
if not quiet:
print('Listing {!r}...'.format(dir))
try:
names = os.listdir(dir)
except OSError:
print("Can't list {!r}".format(dir))
names = []
names.sort()
for name in names:
if name == '__pycache__':
continue
fullname = os.path.join(dir, name)
if ddir is not None:
dfile = os.path.join(ddir, name)
else:
dfile = None
if not os.path.isdir(fullname):
yield fullname
elif (maxlevels > 0 and name != os.curdir and name != os.pardir and
os.path.isdir(fullname) and not os.path.islink(fullname)):
yield from _walk_dir(fullname, ddir=dfile,
maxlevels=maxlevels - 1, quiet=quiet)
def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
quiet=False, legacy=False, optimize=-1):
quiet=False, legacy=False, optimize=-1, workers=1):
"""Byte-compile all modules in the given directory tree.
Arguments (only dir is required):
@ -32,33 +62,31 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
quiet: if True, be quiet during compilation
legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
optimize: optimization level or -1 for level of the interpreter
workers: maximum number of parallel workers
"""
if not quiet:
print('Listing {!r}...'.format(dir))
try:
names = os.listdir(dir)
except OSError:
print("Can't list {!r}".format(dir))
names = []
names.sort()
files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels,
ddir=ddir)
success = 1
for name in names:
if name == '__pycache__':
continue
fullname = os.path.join(dir, name)
if ddir is not None:
dfile = os.path.join(ddir, name)
if workers is not None and workers != 1:
if workers < 0:
raise ValueError('workers must be greater or equal to 0')
if ProcessPoolExecutor is None:
raise NotImplementedError('multiprocessing support not available')
workers = workers or None
with ProcessPoolExecutor(max_workers=workers) as executor:
results = executor.map(partial(compile_file,
ddir=ddir, force=force,
rx=rx, quiet=quiet,
legacy=legacy,
optimize=optimize),
files)
success = min(results, default=1)
else:
dfile = None
if not os.path.isdir(fullname):
if not compile_file(fullname, ddir, force, rx, quiet,
for file in files:
if not compile_file(file, ddir, force, rx, quiet,
legacy, optimize):
success = 0
elif (maxlevels > 0 and name != os.curdir and name != os.pardir and
os.path.isdir(fullname) and not os.path.islink(fullname)):
if not compile_dir(fullname, maxlevels - 1, dfile, force, rx,
quiet, legacy, optimize):
success = 0
return success
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=False,
@ -196,8 +224,10 @@ def main():
help=('zero or more file and directory names '
'to compile; if no arguments given, defaults '
'to the equivalent of -l sys.path'))
args = parser.parse_args()
parser.add_argument('-j', '--workers', default=1,
type=int, help='Run compileall concurrently')
args = parser.parse_args()
compile_dests = args.compile_dest
if (args.ddir and (len(compile_dests) != 1
@ -223,6 +253,9 @@ def main():
print("Error reading file list {}".format(args.flist))
return False
if args.workers is not None:
args.workers = args.workers or None
success = True
try:
if compile_dests:
@ -234,7 +267,7 @@ def main():
else:
if not compile_dir(dest, maxlevels, args.ddir,
args.force, args.rx, args.quiet,
args.legacy):
args.legacy, workers=args.workers):
success = False
return success
else:

View File

@ -10,6 +10,13 @@ import time
import unittest
import io
from unittest import mock, skipUnless
try:
from concurrent.futures import ProcessPoolExecutor
_have_multiprocessing = True
except ImportError:
_have_multiprocessing = False
from test import support, script_helper
class CompileallTests(unittest.TestCase):
@ -106,6 +113,33 @@ class CompileallTests(unittest.TestCase):
debug_override=not optimize)
self.assertTrue(os.path.isfile(cached3))
@mock.patch('compileall.ProcessPoolExecutor')
def test_compile_pool_called(self, pool_mock):
compileall.compile_dir(self.directory, quiet=True, workers=5)
self.assertTrue(pool_mock.called)
def test_compile_workers_non_positive(self):
with self.assertRaisesRegex(ValueError,
"workers must be greater or equal to 0"):
compileall.compile_dir(self.directory, workers=-1)
@mock.patch('compileall.ProcessPoolExecutor')
def test_compile_workers_cpu_count(self, pool_mock):
compileall.compile_dir(self.directory, quiet=True, workers=0)
self.assertEqual(pool_mock.call_args[1]['max_workers'], None)
@mock.patch('compileall.ProcessPoolExecutor')
@mock.patch('compileall.compile_file')
def test_compile_one_worker(self, compile_file_mock, pool_mock):
compileall.compile_dir(self.directory, quiet=True)
self.assertFalse(pool_mock.called)
self.assertTrue(compile_file_mock.called)
@mock.patch('compileall.ProcessPoolExecutor', new=None)
def test_compile_missing_multiprocessing(self):
with self.assertRaisesRegex(NotImplementedError,
"multiprocessing support not available"):
compileall.compile_dir(self.directory, quiet=True, workers=5)
class EncodingTest(unittest.TestCase):
"""Issue 6716: compileall should escape source code when printing errors
@ -413,6 +447,29 @@ class CommandLineTests(unittest.TestCase):
out = self.assertRunOK('badfilename')
self.assertRegex(out, b"Can't list 'badfilename'")
@skipUnless(_have_multiprocessing, "requires multiprocessing")
def test_workers(self):
bar2fn = script_helper.make_script(self.directory, 'bar2', '')
files = []
for suffix in range(5):
pkgdir = os.path.join(self.directory, 'foo{}'.format(suffix))
os.mkdir(pkgdir)
fn = script_helper.make_script(pkgdir, '__init__', '')
files.append(script_helper.make_script(pkgdir, 'bar2', ''))
self.assertRunOK(self.directory, '-j', '0')
self.assertCompiled(bar2fn)
for file in files:
self.assertCompiled(file)
@mock.patch('compileall.compile_dir')
def test_workers_available_cores(self, compile_dir):
with mock.patch("sys.argv",
new=[sys.executable, self.directory, "-j0"]):
compileall.main()
self.assertTrue(compile_dir.called)
self.assertEqual(compile_dir.call_args[-1]['workers'], None)
if __name__ == "__main__":
unittest.main()