#11999: sync based on comparing mtimes, not mtime to system clock

This commit is contained in:
R David Murray 2011-05-06 21:56:22 -04:00
parent 58d6b1b7a4
commit 8b26c4b8ea
3 changed files with 58 additions and 51 deletions

View File

@ -234,19 +234,24 @@ class Maildir(Mailbox):
def __init__(self, dirname, factory=rfc822.Message, create=True):
"""Initialize a Maildir instance."""
Mailbox.__init__(self, dirname, factory, create)
self._paths = {
'tmp': os.path.join(self._path, 'tmp'),
'new': os.path.join(self._path, 'new'),
'cur': os.path.join(self._path, 'cur'),
}
if not os.path.exists(self._path):
if create:
os.mkdir(self._path, 0700)
os.mkdir(os.path.join(self._path, 'tmp'), 0700)
os.mkdir(os.path.join(self._path, 'new'), 0700)
os.mkdir(os.path.join(self._path, 'cur'), 0700)
for path in self._paths.values():
os.mkdir(path, 0o700)
else:
raise NoSuchMailboxError(self._path)
self._toc = {}
self._last_read = None # Records last time we read cur/new
# NOTE: we manually invalidate _last_read each time we do any
# modifications ourselves, otherwise we might get tripped up by
# bogus mtime behaviour on some systems (see issue #6896).
self._toc_mtimes = {}
for subdir in ('cur', 'new'):
self._toc_mtimes[subdir] = os.path.getmtime(self._paths[subdir])
self._last_read = time.time() # Records last time we read cur/new
self._skewfactor = 0.1 # Adjust if os/fs clocks are skewing
def add(self, message):
"""Add message and return assigned key."""
@ -283,15 +288,11 @@ class Maildir(Mailbox):
raise
if isinstance(message, MaildirMessage):
os.utime(dest, (os.path.getatime(dest), message.get_date()))
# Invalidate cached toc
self._last_read = None
return uniq
def remove(self, key):
"""Remove the keyed message; raise KeyError if it doesn't exist."""
os.remove(os.path.join(self._path, self._lookup(key)))
# Invalidate cached toc (only on success)
self._last_read = None
def discard(self, key):
"""If the keyed message exists, remove it."""
@ -326,8 +327,6 @@ class Maildir(Mailbox):
if isinstance(message, MaildirMessage):
os.utime(new_path, (os.path.getatime(new_path),
message.get_date()))
# Invalidate cached toc
self._last_read = None
def get_message(self, key):
"""Return a Message representation or raise a KeyError."""
@ -383,8 +382,8 @@ class Maildir(Mailbox):
def flush(self):
"""Write any pending changes to disk."""
# Maildir changes are always written immediately, so there's nothing
# to do except invalidate our cached toc.
self._last_read = None
# to do.
pass
def lock(self):
"""Lock the mailbox."""
@ -482,36 +481,39 @@ class Maildir(Mailbox):
def _refresh(self):
"""Update table of contents mapping."""
if self._last_read is not None:
for subdir in ('new', 'cur'):
mtime = os.path.getmtime(os.path.join(self._path, subdir))
if mtime > self._last_read:
break
else:
# If it has been less than two seconds since the last _refresh() call,
# we have to unconditionally re-read the mailbox just in case it has
# been modified, because os.path.mtime() has a 2 sec resolution in the
# most common worst case (FAT) and a 1 sec resolution typically. This
# results in a few unnecessary re-reads when _refresh() is called
# multiple times in that interval, but once the clock ticks over, we
# will only re-read as needed. Because the filesystem might be being
# served by an independent system with its own clock, we record and
# compare with the mtimes from the filesystem. Because the other
# system's clock might be skewing relative to our clock, we add an
# extra delta to our wait. The default is one tenth second, but is an
# instance variable and so can be adjusted if dealing with a
# particularly skewed or irregular system.
if time.time() - self._last_read > 2 + self._skewfactor:
refresh = False
for subdir in self._toc_mtimes:
mtime = os.path.getmtime(self._paths[subdir])
if mtime > self._toc_mtimes[subdir]:
refresh = True
self._toc_mtimes[subdir] = mtime
if not refresh:
return
# We record the current time - 1sec so that, if _refresh() is called
# again in the same second, we will always re-read the mailbox
# just in case it's been modified. (os.path.mtime() only has
# 1sec resolution.) This results in a few unnecessary re-reads
# when _refresh() is called multiple times in the same second,
# but once the clock ticks over, we will only re-read as needed.
now = time.time() - 1
# Refresh toc
self._toc = {}
def update_dir (subdir):
path = os.path.join(self._path, subdir)
for subdir in self._toc_mtimes:
path = self._paths[subdir]
for entry in os.listdir(path):
p = os.path.join(path, entry)
if os.path.isdir(p):
continue
uniq = entry.split(self.colon)[0]
self._toc[uniq] = os.path.join(subdir, entry)
update_dir('new')
update_dir('cur')
self._last_read = now
self._last_read = time.time()
def _lookup(self, key):
"""Use TOC to return subpath for given key, or raise a KeyError."""

View File

@ -751,27 +751,28 @@ class TestMaildir(TestMailbox):
self.assertFalse((perms & 0111)) # Execute bits should all be off.
def test_reread(self):
# Wait for 2 seconds
time.sleep(2)
# Initially, the mailbox has not been read and the time is null.
assert getattr(self._box, '_last_read', None) is None
# Put the last modified times more than two seconds into the past
# (because mtime may have only a two second granularity).
for subdir in ('cur', 'new'):
os.utime(os.path.join(self._box._path, subdir),
(time.time()-5,)*2)
# Refresh mailbox; the times should now be set to something.
self._box._refresh()
assert getattr(self._box, '_last_read', None) is not None
# Because mtime has a two second granularity in worst case (FAT), a
# refresh is done unconditionally if called for within
# two-second-plus-a-bit of the last one, just in case the mbox has
# changed; so now we have to wait for that interval to expire.
time.sleep(2.01 + self._box._skewfactor)
# Try calling _refresh() again; the modification times shouldn't have
# changed, so the mailbox should not be re-reading. Re-reading causes
# the ._toc attribute to be assigned a new dictionary object, so
# we'll check that the ._toc attribute isn't a different object.
# Re-reading causes the ._toc attribute to be assigned a new dictionary
# object, so we'll check that the ._toc attribute isn't a different
# object.
orig_toc = self._box._toc
def refreshed():
return self._box._toc is not orig_toc
time.sleep(1) # Wait 1sec to ensure time.time()'s value changes
self._box._refresh()
assert not refreshed()
self.assertFalse(refreshed())
# Now, write something into cur and remove it. This changes
# the mtime and should cause a re-read.
@ -780,7 +781,7 @@ class TestMaildir(TestMailbox):
f.close()
os.unlink(filename)
self._box._refresh()
assert refreshed()
self.assertTrue(refreshed())
class _TestMboxMMDF(TestMailbox):

View File

@ -77,6 +77,10 @@ Core and Builtins
Library
-------
- Issue 11999: fixed sporadic sync failure mailbox.Maildir due to its trying to
detect mtime changes by comparing to the system clock instead of to the
previous value of the mtime.
- Issue #10684: shutil.move used to delete a folder on case insensitive
filesystems when the source and destination name where the same except
for the case.