From f36151556f834deaa1cb0be3f1d6f3fdf73231eb Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Wed, 1 Jan 2003 21:51:37 +0000 Subject: [PATCH] A quicker astimezone() implementation, rehabilitating an earlier suggestion from Guido, along with a formal correctness proof of the trickiest bit. The intricacy of the proof reveals how delicate this is, but also how robust the conclusion: correctness doesn't rely on dst() returning +- one hour (not all real time zones do!), it only relies on: 1. That dst() returns a (any) non-zero value if and only if daylight time is in effect. and 2. That the tzinfo subclass implements a consistent notion of time zone. The meaning of "consistent" was a hidden assumption, which is now an explicit requirement in the docs. Alas, it's an unverifiable (by the datetime implementation) requirement, but so it goes. --- Doc/lib/libdatetime.tex | 29 +++-- Lib/test/test_datetime.py | 25 +++++ Modules/datetimemodule.c | 220 ++++++++++++++++++++++++++------------ 3 files changed, 201 insertions(+), 73 deletions(-) diff --git a/Doc/lib/libdatetime.tex b/Doc/lib/libdatetime.tex index ae6adba17f8..72f4ed95c20 100644 --- a/Doc/lib/libdatetime.tex +++ b/Doc/lib/libdatetime.tex @@ -887,10 +887,10 @@ implement all of them. the magnitude of the offset must be less than one day), or a \class{timedelta} object representing a whole number of minutes in the same range. Most implementations of \method{utcoffset()} - will probably look like: + will probably look like one of these two: \begin{verbatim} - return CONSTANT # fixed-offset class + return CONSTANT # fixed-offset class return CONSTANT + self.dst(dt) # daylight-aware class \end{verbatim} \end{methoddesc} @@ -905,12 +905,13 @@ implement all of them. rather than a fixed string primarily because some \class{tzinfo} objects will wish to return different names depending on the specific value of \var{dt} passed, especially if the \class{tzinfo} class is - accounting for DST. + accounting for daylight time. \end{methoddesc} \begin{methoddesc}{dst}{self, dt} - Return the DST offset, in minutes east of UTC, or \code{None} if - DST information isn't known. Return \code{0} if DST is not in effect. + Return the daylight savings time (DST) adjustment, in minutes east of + UTC, or \code{None} if DST information isn't known. Return \code{0} if + DST is not in effect. If DST is in effect, return the offset as an integer or \class{timedelta} object (see \method{utcoffset()} for details). Note that DST offset, if applicable, has @@ -919,7 +920,23 @@ implement all of them. unless you're interested in displaying DST info separately. For example, \method{datetimetz.timetuple()} calls its \member{tzinfo} member's \method{dst()} method to determine how the - \member{tm_isdst} flag should be set. + \member{tm_isdst} flag should be set, and + \method{datetimetz.astimezone()} calls \method{dst()} to account for + DST changes when crossing time zones. + + An instance \var{tz} of a \class{tzinfo} subclass that models both + standard and daylight times must be consistent in this sense: + + \code{tz.utcoffset(dt) - tz.dst(dt)} + + must return the same result for every \class{datetimetz} \var{dt} + in a given year with \code{dt.tzinfo==tz} For sane \class{tzinfo} + subclasses, this expression yields the time zone's "standard offset" + within the year, which should be the same across all days in the year. + The implementation of \method{datetimetz.astimezone()} relies on this, + but cannot detect violations; it's the programmer's responsibility to + ensure it. + \end{methoddesc} These methods are called by a \class{datetimetz} or \class{timetz} object, diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index 4a7aa60729f..4fe2ad2344b 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -2703,6 +2703,31 @@ class TestTimezoneConversions(unittest.TestCase): # self.convert_between_tz_and_utc(Eastern, Central) # can't work # self.convert_between_tz_and_utc(Central, Eastern) # can't work + def test_tricky(self): + # 22:00 on day before daylight starts. + fourback = self.dston - timedelta(hours=4) + ninewest = FixedOffset(-9*60, "-0900", 0) + fourback = fourback.astimezone(ninewest) + # 22:00-0900 is 7:00 UTC == 2:00 EST == 3:00 DST. Since it's "after + # 2", we should get the 3 spelling. + # If we plug 22:00 the day before into Eastern, it "looks like std + # time", so its offset is returned as -5, and -5 - -9 = 4. Adding 4 + # to 22:00 lands on 2:00, which makes no sense in local time (the + # local clock jumps from 1 to 3). The point here is to make sure we + # get the 3 spelling. + expected = self.dston.replace(hour=3) + got = fourback.astimezone(Eastern).astimezone(None) + self.assertEqual(expected, got) + + # Similar, but map to 6:00 UTC == 1:00 EST == 2:00 DST. In that + # case we want the 1:00 spelling. + sixutc = self.dston.replace(hour=6).astimezone(utc_real) + # Now 6:00 "looks like daylight", so the offset wrt Eastern is -4, + # and adding -4-0 == -4 gives the 2:00 spelling. We want the 1:00 EST + # spelling. + expected = self.dston.replace(hour=1) + got = sixutc.astimezone(Eastern).astimezone(None) + self.assertEqual(expected, got) def test_suite(): allsuites = [unittest.makeSuite(klass, 'test') diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c index fb1f9e1ee9d..8211a10996b 100644 --- a/Modules/datetimemodule.c +++ b/Modules/datetimemodule.c @@ -4753,8 +4753,9 @@ datetimetz_astimezone(PyDateTime_DateTimeTZ *self, PyObject *args, PyObject *result; PyObject *temp; - int selfoff, resoff, tempoff, total_added_to_result; + int selfoff, resoff, resdst, total_added_to_result; int none; + int delta; PyObject *tzinfo; static char *keywords[] = {"tz", NULL}; @@ -4788,36 +4789,17 @@ datetimetz_astimezone(PyDateTime_DateTimeTZ *self, PyObject *args, if (none) return result; - /* Add resoff-selfoff to result. */ - total_added_to_result = resoff - selfoff; - mm += total_added_to_result; - if ((mm < 0 || mm >= 60) && - normalize_datetime(&y, &m, &d, &hh, &mm, &ss, &us) < 0) - goto Fail; - temp = new_datetimetz(y, m, d, hh, mm, ss, us, tzinfo); - if (temp == NULL) - goto Fail; - Py_DECREF(result); - result = temp; - - /* If tz is a fixed-offset class, we're done, but we can't know - * whether it is. If it's a DST-aware class, and we're not near a - * DST boundary, we're also done. If we crossed a DST boundary, - * the offset will be different now, and that's our only clue. - * Unfortunately, we can be in trouble even if we didn't cross a - * DST boundary, if we landed on one of the DST "problem hours". + /* See the long comment block at the end of this file for an + * explanation of this algorithm. That it always works requires a + * pretty intricate proof. */ - tempoff = call_utcoffset(tzinfo, result, &none); - if (tempoff == -1 && PyErr_Occurred()) + resdst = call_dst(tzinfo, result, &none); + if (resdst == -1 && PyErr_Occurred()) goto Fail; - if (none) - goto Inconsistent; - - if (tempoff != resoff) { - /* We did cross a boundary. Try to correct. */ - const int delta = tempoff - resoff; - total_added_to_result += delta; - mm += delta; + /* None and 0 dst() results are the same to us here. Debatable. */ + total_added_to_result = resoff - resdst - selfoff; + if (total_added_to_result != 0) { + mm += total_added_to_result; if ((mm < 0 || mm >= 60) && normalize_datetime(&y, &m, &d, &hh, &mm, &ss, &us) < 0) goto Fail; @@ -4832,50 +4814,42 @@ datetimetz_astimezone(PyDateTime_DateTimeTZ *self, PyObject *args, goto Fail; if (none) goto Inconsistent; - } - /* If this is the first hour of DST, it may be a local time that - * doesn't make sense on the local clock, in which case the naive - * hour before it (in standard time) is equivalent and does make - * sense on the local clock. So force that. + } + + /* The distance now from self to result is + * self - result == naive(self) - selfoff - (naive(result) - resoff) == + * naive(self) - selfoff - + * ((naive(self) + total_added_to_result - resoff) == + * - selfoff - total_added_to_result + resoff. */ - hh -= 1; - if (hh < 0 && normalize_datetime(&y, &m, &d, &hh, &mm, &ss, &us) < 0) + delta = resoff - selfoff - total_added_to_result; + + /* Now self and result are the same UTC time iff delta is 0. + * If it is 0, we're done, although that takes some proving. + */ + if (delta == 0) + return result; + + total_added_to_result += delta; + mm += delta; + if ((mm < 0 || mm >= 60) && + normalize_datetime(&y, &m, &d, &hh, &mm, &ss, &us) < 0) goto Fail; + temp = new_datetimetz(y, m, d, hh, mm, ss, us, tzinfo); if (temp == NULL) goto Fail; - tempoff = call_utcoffset(tzinfo, temp, &none); - if (tempoff == -1 && PyErr_Occurred()) { - Py_DECREF(temp); - goto Fail; - } - if (none) { - Py_DECREF(temp); - goto Inconsistent; - } - /* Are temp and result really the same time? temp == result iff - * temp - tempoff == result - resoff, iff - * (result - HOUR) - tempoff = result - resoff, iff - * resoff - tempoff == HOUR - */ - if (resoff - tempoff == 60) { - /* use the local time that makes sense */ - Py_DECREF(result); - return temp; - } - Py_DECREF(temp); + Py_DECREF(result); + result = temp; - /* There's still a problem with the unspellable (in local time) - * hour after DST ends. If self and result map to the same UTC time - * time, we're OK, else the hour is unrepresentable in the tzinfo - * zone. The result's local time now is - * self + total_added_to_result, so self == result iff - * self - selfoff == result - resoff, iff - * self - selfoff == (self + total_added_to_result) - resoff, iff - * - selfoff == total_added_to_result - resoff, iff - * total_added_to_result == resoff - selfoff - */ - if (total_added_to_result == resoff - selfoff) + resoff = call_utcoffset(tzinfo, result, &none); + if (resoff == -1 && PyErr_Occurred()) + goto Fail; + if (none) + goto Inconsistent; + + if (resoff - selfoff == total_added_to_result) + /* self and result are the same UTC time */ return result; /* Else there's no way to spell self in zone tzinfo. */ @@ -5498,3 +5472,115 @@ initdatetime(void) if (us_per_hour == NULL || us_per_day == NULL || us_per_week == NULL) return; } + +/* --------------------------------------------------------------------------- +Some time zone algebra. For a datetimetz x, let + x.n = x stripped of its timezone -- its naive time. + x.o = x.utcoffset(), and assuming that doesn't raise an exception or + return None + x.d = x.dst(), and assuming that doesn't raise an exception or + return None + x.s = x's standard offset, x.o - x.d + +Now some derived rules, where k is a duration (timedelta). + +1. x.o = x.s + x.d + This follows from the definition of x.s. + +2. If x and y have the same tzinfo member, x.s == y.s. + This is actually a requirement, an assumption we need to make about + sane tzinfo classes. + +3. The naive UTC time corresponding to x is x.n - x.o. + This is again a requirement for a sane tzinfo class. + +4. (x+k).s = x.s + This follows from #2, and that datimetimetz+timedelta preserves tzinfo. + +5. (y+k).n = y.n + k + Again follows from how arithmetic is defined. + +Now we can explain x.astimezone(tz). Let's assume it's an interesting case +(meaning that the various tzinfo methods exist, and don't blow up or return +None when called). + +The function wants to return a datetimetz y with timezone tz, equivalent to x. + +By #3, we want + + y.n - y.o = x.n - x.o [1] + +The algorithm starts by attaching tz to x.n, and calling that y. So +x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] +becomes true; in effect, we want to solve [2] for k: + + (y+k).n - (y+k).o = x.n - x.o [2] + +By #1, this is the same as + + (y+k).n - ((y+k).s + (y+k).d) = x.n - x.o [3] + +By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. +Substituting that into [3], + + x.n + k - (y+k).s - (y+k).d = x.n - x.o; the x.n terms cancel, leaving + k - (y+k).s - (y+k).d = - x.o; rearranging, + k = (y+k).s - x.o - (y+k).d; by #4, (y+k).s == y.s, so + k = y.s - x.o - (y+k).d; then by #1, y.s = y.o - y.d, so + k = y.o - y.d - x.o - (y+k).d + +On the RHS, (y+k).d can't be computed directly, but all the rest can be, and +we approximate k by ignoring the (y+k).d term at first. Note that k can't +be very large, since all offset-returning methods return a duration of +magnitude less than 24 hours. For that reason, if y is firmly in std time, +(y+k).d must be 0, so ignoring it has no consequence then. + +In any case, the new value is + + z = y + y.o - y.d - x.o + +If + z.n - z.o = x.n - x.o [4] + +then, we have an equivalent time, and are almost done. The insecurity here is +at the start of daylight time. Picture US Eastern for concreteness. The wall +time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good +sense then. A sensible Eastern tzinfo class will consider such a time to be +EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST on the +day DST starts. We want to return the 1:MM EST spelling because that's +the only spelling that makes sense on the local wall clock. + +Claim: When [4] is true, we have "the right" spelling in this endcase. No +further adjustment is necessary. + +Proof: The right spelling has z.d = 0, and the wrong spelling has z.d != 0 +(for US Eastern, the wrong spelling has z.d = 60 minutes, but we can't assume +that all time zones work this way -- we can assume a time zone is in daylight +time iff dst() doesn't return 0). By [4], and recalling that z.o = z.s + z.d, + + z.n - z.s - z.d = x.n - x.o [5] + +Also + + z.n = (y + y.o - y.d - x.o).n by the construction of z, which equals + y.n + y.o - y.d - x.o by #5. + +Plugging that into [5], + + y.n + y.o - y.d - x.o - z.s - z.d = x.n - x.o; cancelling the x.o terms, + y.n + y.o - y.d - z.s - z.d = x.n; but x.n = y.n too, so they also cancel, + y.o - y.d - z.s - z.d = 0; then y.o = y.s + y.d, so + y.s + y.d - y.d - z.s - z.d = 0; then the y.d terms cancel, + y.s - z.s - z.d = 0; but y and z are in the same timezone, so by #2 + y.s = z.s, and they also cancel, leaving + - z.d = 0; or, + z.d = 0 + +Therefore z is the standard-time spelling, and there's nothing left to do in +this case. + +Note that we actually proved something stronger: when [4] is true, it must +also be true that z.dst() returns 0. + +XXX Flesh out the rest of the algorithm. +--------------------------------------------------------------------------- */