bpo-42398: Fix "make regen-all" race condition (GH-23362)

Fix a race condition in "make regen-all" when make -jN option is used
to run jobs in parallel. The clinic.py script now only use atomic
write to write files. Moveover, generated files are now left
unchanged if the content does not change, to not change the file
modification time.

The "make regen-all" command runs "make clinic" and "make
regen-importlib" targets:

* "make regen-importlib" builds object files (ex: Modules/_weakref.o)
  from source files (ex: Modules/_weakref.c) and clinic files (ex:
  Modules/clinic/_weakref.c.h)
* "make clinic" always rewrites all clinic files
  (ex: Modules/clinic/_weakref.c.h)

Since there is no dependency between "clinic" and "regen-importlib"
Makefile targets, these two targets can be run in parallel. Moreover,
half of clinic.py file writes are not atomic and so there is a race
condition when "make regen-all" runs jobs in parallel using make -jN
option (which can be passed in MAKEFLAGS environment variable).

Fix clinic.py to make all file writes atomic:

* Add write_file() function to ensure that all file writes are
  atomic: write into a temporary file and then use os.replace().
* Moreover, write_file() doesn't recreate or modify the file if the
  content does not change to avoid modifying the file modification
  file.
* Update test_clinic to verify these assertions with a functional
  test.
* Remove Clinic.force attribute which was no longer used, whereas
  Clinic.verify remains useful.
This commit is contained in:
Victor Stinner 2020-11-18 15:36:27 +01:00 committed by GitHub
parent ce04e7105b
commit 8fba9523cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 58 additions and 26 deletions

View File

@ -795,17 +795,29 @@ class ClinicExternalTest(TestCase):
maxDiff = None maxDiff = None
def test_external(self): def test_external(self):
# bpo-42398: Test that the destination file is left unchanged if the
# content does not change. Moreover, check also that the file
# modification time does not change in this case.
source = support.findfile('clinic.test') source = support.findfile('clinic.test')
with open(source, 'r', encoding='utf-8') as f: with open(source, 'r', encoding='utf-8') as f:
original = f.read() orig_contents = f.read()
with os_helper.temp_dir() as testdir:
testfile = os.path.join(testdir, 'clinic.test.c') with os_helper.temp_dir() as tmp_dir:
testfile = os.path.join(tmp_dir, 'clinic.test.c')
with open(testfile, 'w', encoding='utf-8') as f: with open(testfile, 'w', encoding='utf-8') as f:
f.write(original) f.write(orig_contents)
clinic.parse_file(testfile, force=True) old_mtime_ns = os.stat(testfile).st_mtime_ns
clinic.parse_file(testfile)
with open(testfile, 'r', encoding='utf-8') as f: with open(testfile, 'r', encoding='utf-8') as f:
result = f.read() new_contents = f.read()
self.assertEqual(result, original) new_mtime_ns = os.stat(testfile).st_mtime_ns
self.assertEqual(new_contents, orig_contents)
# Don't change the file modification time
# if the content does not change
self.assertEqual(new_mtime_ns, old_mtime_ns)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -0,0 +1,4 @@
Fix a race condition in "make regen-all" when make -jN option is used to run
jobs in parallel. The clinic.py script now only use atomic write to write
files. Moveover, generated files are now left unchanged if the content does not
change, to not change the file modification time.

View File

@ -1777,6 +1777,30 @@ legacy_converters = {}
# The callable should not call builtins.print. # The callable should not call builtins.print.
return_converters = {} return_converters = {}
def write_file(filename, new_contents):
try:
with open(filename, 'r', encoding="utf-8") as fp:
old_contents = fp.read()
if old_contents == new_contents:
# no change: avoid modifying the file modification time
return
except FileNotFoundError:
pass
# Atomic write using a temporary file and os.replace()
filename_new = f"{filename}.new"
with open(filename_new, "w", encoding="utf-8") as fp:
fp.write(new_contents)
try:
os.replace(filename_new, filename)
except:
os.unlink(filename_new)
raise
clinic = None clinic = None
class Clinic: class Clinic:
@ -1823,7 +1847,7 @@ impl_definition block
""" """
def __init__(self, language, printer=None, *, force=False, verify=True, filename=None): def __init__(self, language, printer=None, *, verify=True, filename=None):
# maps strings to Parser objects. # maps strings to Parser objects.
# (instantiated from the "parsers" global.) # (instantiated from the "parsers" global.)
self.parsers = {} self.parsers = {}
@ -1832,7 +1856,6 @@ impl_definition block
fail("Custom printers are broken right now") fail("Custom printers are broken right now")
self.printer = printer or BlockPrinter(language) self.printer = printer or BlockPrinter(language)
self.verify = verify self.verify = verify
self.force = force
self.filename = filename self.filename = filename
self.modules = collections.OrderedDict() self.modules = collections.OrderedDict()
self.classes = collections.OrderedDict() self.classes = collections.OrderedDict()
@ -1965,8 +1988,7 @@ impl_definition block
block.input = 'preserve\n' block.input = 'preserve\n'
printer_2 = BlockPrinter(self.language) printer_2 = BlockPrinter(self.language)
printer_2.print_block(block) printer_2.print_block(block)
with open(destination.filename, "wt") as f: write_file(destination.filename, printer_2.f.getvalue())
f.write(printer_2.f.getvalue())
continue continue
text = printer.f.getvalue() text = printer.f.getvalue()
@ -2018,7 +2040,10 @@ impl_definition block
return module, cls return module, cls
def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf-8'): def parse_file(filename, *, verify=True, output=None):
if not output:
output = filename
extension = os.path.splitext(filename)[1][1:] extension = os.path.splitext(filename)[1][1:]
if not extension: if not extension:
fail("Can't extract file type for file " + repr(filename)) fail("Can't extract file type for file " + repr(filename))
@ -2028,7 +2053,7 @@ def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf
except KeyError: except KeyError:
fail("Can't identify file type for file " + repr(filename)) fail("Can't identify file type for file " + repr(filename))
with open(filename, 'r', encoding=encoding) as f: with open(filename, 'r', encoding="utf-8") as f:
raw = f.read() raw = f.read()
# exit quickly if there are no clinic markers in the file # exit quickly if there are no clinic markers in the file
@ -2036,19 +2061,10 @@ def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf
if not find_start_re.search(raw): if not find_start_re.search(raw):
return return
clinic = Clinic(language, force=force, verify=verify, filename=filename) clinic = Clinic(language, verify=verify, filename=filename)
cooked = clinic.parse(raw) cooked = clinic.parse(raw)
if (cooked == raw) and not force:
return
directory = os.path.dirname(filename) or '.' write_file(output, cooked)
with tempfile.TemporaryDirectory(prefix="clinic", dir=directory) as tmpdir:
bytes = cooked.encode(encoding)
tmpfilename = os.path.join(tmpdir, os.path.basename(filename))
with open(tmpfilename, "wb") as f:
f.write(bytes)
os.replace(tmpfilename, output or filename)
def compute_checksum(input, length=None): def compute_checksum(input, length=None):
@ -5105,7 +5121,7 @@ For more information see https://docs.python.org/3/howto/clinic.html""")
path = os.path.join(root, filename) path = os.path.join(root, filename)
if ns.verbose: if ns.verbose:
print(path) print(path)
parse_file(path, force=ns.force, verify=not ns.force) parse_file(path, verify=not ns.force)
return return
if not ns.filename: if not ns.filename:
@ -5121,7 +5137,7 @@ For more information see https://docs.python.org/3/howto/clinic.html""")
for filename in ns.filename: for filename in ns.filename:
if ns.verbose: if ns.verbose:
print(filename) print(filename)
parse_file(filename, output=ns.output, force=ns.force, verify=not ns.force) parse_file(filename, output=ns.output, verify=not ns.force)
if __name__ == "__main__": if __name__ == "__main__":