mirror of https://github.com/python/cpython
bpo-42663: Fix parsing TZ strings in zoneinfo module (GH-23825)
zipinfo now supports the full range of values in the TZ string determined by RFC 8536 and detects all invalid formats. Both Python and C implementations now raise exceptions of the same type on invalid data.
This commit is contained in:
parent
12deda7633
commit
ab08ff7882
|
@ -1001,6 +1001,80 @@ class TZStrTest(ZoneInfoTestBase):
|
|||
|
||||
self.assertEqual(dt_act, dt_utc)
|
||||
|
||||
def test_extreme_tzstr(self):
|
||||
tzstrs = [
|
||||
# Extreme offset hour
|
||||
"AAA24",
|
||||
"AAA+24",
|
||||
"AAA-24",
|
||||
"AAA24BBB,J60/2,J300/2",
|
||||
"AAA+24BBB,J60/2,J300/2",
|
||||
"AAA-24BBB,J60/2,J300/2",
|
||||
"AAA4BBB24,J60/2,J300/2",
|
||||
"AAA4BBB+24,J60/2,J300/2",
|
||||
"AAA4BBB-24,J60/2,J300/2",
|
||||
# Extreme offset minutes
|
||||
"AAA4:00BBB,J60/2,J300/2",
|
||||
"AAA4:59BBB,J60/2,J300/2",
|
||||
"AAA4BBB5:00,J60/2,J300/2",
|
||||
"AAA4BBB5:59,J60/2,J300/2",
|
||||
# Extreme offset seconds
|
||||
"AAA4:00:00BBB,J60/2,J300/2",
|
||||
"AAA4:00:59BBB,J60/2,J300/2",
|
||||
"AAA4BBB5:00:00,J60/2,J300/2",
|
||||
"AAA4BBB5:00:59,J60/2,J300/2",
|
||||
# Extreme total offset
|
||||
"AAA24:59:59BBB5,J60/2,J300/2",
|
||||
"AAA-24:59:59BBB5,J60/2,J300/2",
|
||||
"AAA4BBB24:59:59,J60/2,J300/2",
|
||||
"AAA4BBB-24:59:59,J60/2,J300/2",
|
||||
# Extreme months
|
||||
"AAA4BBB,M12.1.1/2,M1.1.1/2",
|
||||
"AAA4BBB,M1.1.1/2,M12.1.1/2",
|
||||
# Extreme weeks
|
||||
"AAA4BBB,M1.5.1/2,M1.1.1/2",
|
||||
"AAA4BBB,M1.1.1/2,M1.5.1/2",
|
||||
# Extreme weekday
|
||||
"AAA4BBB,M1.1.6/2,M2.1.1/2",
|
||||
"AAA4BBB,M1.1.1/2,M2.1.6/2",
|
||||
# Extreme numeric offset
|
||||
"AAA4BBB,0/2,20/2",
|
||||
"AAA4BBB,0/2,0/14",
|
||||
"AAA4BBB,20/2,365/2",
|
||||
"AAA4BBB,365/2,365/14",
|
||||
# Extreme julian offset
|
||||
"AAA4BBB,J1/2,J20/2",
|
||||
"AAA4BBB,J1/2,J1/14",
|
||||
"AAA4BBB,J20/2,J365/2",
|
||||
"AAA4BBB,J365/2,J365/14",
|
||||
# Extreme transition hour
|
||||
"AAA4BBB,J60/167,J300/2",
|
||||
"AAA4BBB,J60/+167,J300/2",
|
||||
"AAA4BBB,J60/-167,J300/2",
|
||||
"AAA4BBB,J60/2,J300/167",
|
||||
"AAA4BBB,J60/2,J300/+167",
|
||||
"AAA4BBB,J60/2,J300/-167",
|
||||
# Extreme transition minutes
|
||||
"AAA4BBB,J60/2:00,J300/2",
|
||||
"AAA4BBB,J60/2:59,J300/2",
|
||||
"AAA4BBB,J60/2,J300/2:00",
|
||||
"AAA4BBB,J60/2,J300/2:59",
|
||||
# Extreme transition seconds
|
||||
"AAA4BBB,J60/2:00:00,J300/2",
|
||||
"AAA4BBB,J60/2:00:59,J300/2",
|
||||
"AAA4BBB,J60/2,J300/2:00:00",
|
||||
"AAA4BBB,J60/2,J300/2:00:59",
|
||||
# Extreme total transition time
|
||||
"AAA4BBB,J60/167:59:59,J300/2",
|
||||
"AAA4BBB,J60/-167:59:59,J300/2",
|
||||
"AAA4BBB,J60/2,J300/167:59:59",
|
||||
"AAA4BBB,J60/2,J300/-167:59:59",
|
||||
]
|
||||
|
||||
for tzstr in tzstrs:
|
||||
with self.subTest(tzstr=tzstr):
|
||||
self.zone_from_tzstr(tzstr)
|
||||
|
||||
def test_invalid_tzstr(self):
|
||||
invalid_tzstrs = [
|
||||
"PST8PDT", # DST but no transition specified
|
||||
|
@ -1008,16 +1082,33 @@ class TZStrTest(ZoneInfoTestBase):
|
|||
"GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST
|
||||
"GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST
|
||||
"PST8PDT,M3.2.0/2", # Only one transition rule
|
||||
# Invalid offsets
|
||||
"STD+25",
|
||||
"STD-25",
|
||||
"STD+374",
|
||||
"STD+374DST,M3.2.0/2,M11.1.0/3",
|
||||
"STD+23DST+25,M3.2.0/2,M11.1.0/3",
|
||||
"STD-23DST-25,M3.2.0/2,M11.1.0/3",
|
||||
# Invalid offset hours
|
||||
"AAA168",
|
||||
"AAA+168",
|
||||
"AAA-168",
|
||||
"AAA168BBB,J60/2,J300/2",
|
||||
"AAA+168BBB,J60/2,J300/2",
|
||||
"AAA-168BBB,J60/2,J300/2",
|
||||
"AAA4BBB168,J60/2,J300/2",
|
||||
"AAA4BBB+168,J60/2,J300/2",
|
||||
"AAA4BBB-168,J60/2,J300/2",
|
||||
# Invalid offset minutes
|
||||
"AAA4:0BBB,J60/2,J300/2",
|
||||
"AAA4:100BBB,J60/2,J300/2",
|
||||
"AAA4BBB5:0,J60/2,J300/2",
|
||||
"AAA4BBB5:100,J60/2,J300/2",
|
||||
# Invalid offset seconds
|
||||
"AAA4:00:0BBB,J60/2,J300/2",
|
||||
"AAA4:00:100BBB,J60/2,J300/2",
|
||||
"AAA4BBB5:00:0,J60/2,J300/2",
|
||||
"AAA4BBB5:00:100,J60/2,J300/2",
|
||||
# Completely invalid dates
|
||||
"AAA4BBB,M1443339,M11.1.0/3",
|
||||
"AAA4BBB,M3.2.0/2,0349309483959c",
|
||||
"AAA4BBB,,J300/2",
|
||||
"AAA4BBB,z,J300/2",
|
||||
"AAA4BBB,J60/2,",
|
||||
"AAA4BBB,J60/2,z",
|
||||
# Invalid months
|
||||
"AAA4BBB,M13.1.1/2,M1.1.1/2",
|
||||
"AAA4BBB,M1.1.1/2,M13.1.1/2",
|
||||
|
@ -1037,6 +1128,26 @@ class TZStrTest(ZoneInfoTestBase):
|
|||
# Invalid julian offset
|
||||
"AAA4BBB,J0/2,J20/2",
|
||||
"AAA4BBB,J20/2,J366/2",
|
||||
# Invalid transition time
|
||||
"AAA4BBB,J60/2/3,J300/2",
|
||||
"AAA4BBB,J60/2,J300/2/3",
|
||||
# Invalid transition hour
|
||||
"AAA4BBB,J60/168,J300/2",
|
||||
"AAA4BBB,J60/+168,J300/2",
|
||||
"AAA4BBB,J60/-168,J300/2",
|
||||
"AAA4BBB,J60/2,J300/168",
|
||||
"AAA4BBB,J60/2,J300/+168",
|
||||
"AAA4BBB,J60/2,J300/-168",
|
||||
# Invalid transition minutes
|
||||
"AAA4BBB,J60/2:0,J300/2",
|
||||
"AAA4BBB,J60/2:100,J300/2",
|
||||
"AAA4BBB,J60/2,J300/2:0",
|
||||
"AAA4BBB,J60/2,J300/2:100",
|
||||
# Invalid transition seconds
|
||||
"AAA4BBB,J60/2:00:0,J300/2",
|
||||
"AAA4BBB,J60/2:00:100,J300/2",
|
||||
"AAA4BBB,J60/2,J300/2:00:0",
|
||||
"AAA4BBB,J60/2,J300/2:00:100",
|
||||
]
|
||||
|
||||
for invalid_tzstr in invalid_tzstrs:
|
||||
|
|
|
@ -517,8 +517,8 @@ class _DayOffset:
|
|||
__slots__ = ["d", "julian", "hour", "minute", "second"]
|
||||
|
||||
def __init__(self, d, julian, hour=2, minute=0, second=0):
|
||||
if not (0 + julian) <= d <= 365:
|
||||
min_day = 0 + julian
|
||||
min_day = 0 + julian # convert bool to int
|
||||
if not min_day <= d <= 365:
|
||||
raise ValueError(f"d must be in [{min_day}, 365], not: {d}")
|
||||
|
||||
self.d = d
|
||||
|
@ -560,11 +560,11 @@ class _CalendarOffset:
|
|||
)
|
||||
|
||||
def __init__(self, m, w, d, hour=2, minute=0, second=0):
|
||||
if not 0 < m <= 12:
|
||||
raise ValueError("m must be in (0, 12]")
|
||||
if not 1 <= m <= 12:
|
||||
raise ValueError("m must be in [1, 12]")
|
||||
|
||||
if not 0 < w <= 5:
|
||||
raise ValueError("w must be in (0, 5]")
|
||||
if not 1 <= w <= 5:
|
||||
raise ValueError("w must be in [1, 5]")
|
||||
|
||||
if not 0 <= d <= 6:
|
||||
raise ValueError("d must be in [0, 6]")
|
||||
|
@ -634,18 +634,21 @@ def _parse_tz_str(tz_str):
|
|||
|
||||
offset_str, *start_end_str = tz_str.split(",", 1)
|
||||
|
||||
# fmt: off
|
||||
parser_re = re.compile(
|
||||
r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
|
||||
r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" +
|
||||
r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
|
||||
r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" +
|
||||
r")?" + # dst
|
||||
r")?$" # stdoff
|
||||
r"""
|
||||
(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
|
||||
(?:
|
||||
(?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
|
||||
(?:
|
||||
(?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
|
||||
(?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
|
||||
)? # dst
|
||||
)? # stdoff
|
||||
""",
|
||||
re.ASCII|re.VERBOSE
|
||||
)
|
||||
# fmt: on
|
||||
|
||||
m = parser_re.match(offset_str)
|
||||
m = parser_re.fullmatch(offset_str)
|
||||
|
||||
if m is None:
|
||||
raise ValueError(f"{tz_str} is not a valid TZ string")
|
||||
|
@ -696,16 +699,17 @@ def _parse_tz_str(tz_str):
|
|||
|
||||
|
||||
def _parse_dst_start_end(dststr):
|
||||
date, *time = dststr.split("/")
|
||||
if date[0] == "M":
|
||||
date, *time = dststr.split("/", 1)
|
||||
type = date[:1]
|
||||
if type == "M":
|
||||
n_is_julian = False
|
||||
m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date)
|
||||
m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII)
|
||||
if m is None:
|
||||
raise ValueError(f"Invalid dst start/end date: {dststr}")
|
||||
date_offset = tuple(map(int, m.groups()))
|
||||
offset = _CalendarOffset(*date_offset)
|
||||
else:
|
||||
if date[0] == "J":
|
||||
if type == "J":
|
||||
n_is_julian = True
|
||||
date = date[1:]
|
||||
else:
|
||||
|
@ -715,38 +719,54 @@ def _parse_dst_start_end(dststr):
|
|||
offset = _DayOffset(doy, n_is_julian)
|
||||
|
||||
if time:
|
||||
time_components = list(map(int, time[0].split(":")))
|
||||
n_components = len(time_components)
|
||||
if n_components < 3:
|
||||
time_components.extend([0] * (3 - n_components))
|
||||
offset.hour, offset.minute, offset.second = time_components
|
||||
offset.hour, offset.minute, offset.second = _parse_transition_time(time[0])
|
||||
|
||||
return offset
|
||||
|
||||
|
||||
def _parse_transition_time(time_str):
|
||||
match = re.fullmatch(
|
||||
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
|
||||
time_str,
|
||||
re.ASCII
|
||||
)
|
||||
if match is None:
|
||||
raise ValueError(f"Invalid time: {time_str}")
|
||||
|
||||
h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))
|
||||
|
||||
if h > 167:
|
||||
raise ValueError(
|
||||
f"Hour must be in [0, 167]: {time_str}"
|
||||
)
|
||||
|
||||
if match.group("sign") == "-":
|
||||
h, m, s = -h, -m, -s
|
||||
|
||||
return h, m, s
|
||||
|
||||
|
||||
def _parse_tz_delta(tz_delta):
|
||||
match = re.match(
|
||||
r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
|
||||
match = re.fullmatch(
|
||||
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
|
||||
tz_delta,
|
||||
re.ASCII
|
||||
)
|
||||
# Anything passed to this function should already have hit an equivalent
|
||||
# regular expression to find the section to parse.
|
||||
assert match is not None, tz_delta
|
||||
|
||||
h, m, s = (
|
||||
int(v) if v is not None else 0
|
||||
for v in map(match.group, ("h", "m", "s"))
|
||||
)
|
||||
h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))
|
||||
|
||||
total = h * 3600 + m * 60 + s
|
||||
|
||||
if not -86400 < total < 86400:
|
||||
if h > 24:
|
||||
raise ValueError(
|
||||
f"Offset must be strictly between -24h and +24h: {tz_delta}"
|
||||
f"Offset hours must be in [0, 24]: {tz_delta}"
|
||||
)
|
||||
|
||||
# Yes, +5 maps to an offset of -5h
|
||||
if match.group("sign") != "-":
|
||||
total *= -1
|
||||
total = -total
|
||||
|
||||
return total
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
:mod:`zipinfo` now supports the full range of values in the TZ string
|
||||
determined by RFC 8536 and detects all invalid formats.
|
||||
Both Python and C implementations now raise exceptions of the same
|
||||
type on invalid data.
|
|
@ -61,21 +61,21 @@ struct TransitionRuleType {
|
|||
|
||||
typedef struct {
|
||||
TransitionRuleType base;
|
||||
uint8_t month;
|
||||
uint8_t week;
|
||||
uint8_t day;
|
||||
int8_t hour;
|
||||
int8_t minute;
|
||||
int8_t second;
|
||||
uint8_t month; /* 1 - 12 */
|
||||
uint8_t week; /* 1 - 5 */
|
||||
uint8_t day; /* 0 - 6 */
|
||||
int16_t hour; /* -167 - 167, RFC 8536 §3.3.1 */
|
||||
int8_t minute; /* signed 2 digits */
|
||||
int8_t second; /* signed 2 digits */
|
||||
} CalendarRule;
|
||||
|
||||
typedef struct {
|
||||
TransitionRuleType base;
|
||||
uint8_t julian;
|
||||
unsigned int day;
|
||||
int8_t hour;
|
||||
int8_t minute;
|
||||
int8_t second;
|
||||
uint8_t julian; /* 0, 1 */
|
||||
uint16_t day; /* 0 - 365 */
|
||||
int16_t hour; /* -167 - 167, RFC 8536 §3.3.1 */
|
||||
int8_t minute; /* signed 2 digits */
|
||||
int8_t second; /* signed 2 digits */
|
||||
} DayRule;
|
||||
|
||||
struct StrongCacheNode {
|
||||
|
@ -133,15 +133,14 @@ ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
|
|||
static int
|
||||
parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out);
|
||||
|
||||
static Py_ssize_t
|
||||
parse_abbr(const char *const p, PyObject **abbr);
|
||||
static Py_ssize_t
|
||||
parse_tz_delta(const char *const p, long *total_seconds);
|
||||
static Py_ssize_t
|
||||
parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
|
||||
int8_t *second);
|
||||
static Py_ssize_t
|
||||
parse_transition_rule(const char *const p, TransitionRuleType **out);
|
||||
static int
|
||||
parse_abbr(const char **p, PyObject **abbr);
|
||||
static int
|
||||
parse_tz_delta(const char **p, long *total_seconds);
|
||||
static int
|
||||
parse_transition_time(const char **p, int *hour, int *minute, int *second);
|
||||
static int
|
||||
parse_transition_rule(const char **p, TransitionRuleType **out);
|
||||
|
||||
static _ttinfo *
|
||||
find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
|
||||
|
@ -1327,14 +1326,14 @@ calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
|
|||
}
|
||||
|
||||
int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
|
||||
return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
|
||||
(int64_t)(self->minute * 60) + (int64_t)(self->second));
|
||||
return ordinal * 86400 + (int64_t)self->hour * 3600 +
|
||||
(int64_t)self->minute * 60 + self->second;
|
||||
}
|
||||
|
||||
/* Constructor for CalendarRule. */
|
||||
int
|
||||
calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
|
||||
int8_t minute, int8_t second, CalendarRule *out)
|
||||
calendarrule_new(int month, int week, int day, int hour,
|
||||
int minute, int second, CalendarRule *out)
|
||||
{
|
||||
// These bounds come from the POSIX standard, which describes an Mm.n.d
|
||||
// rule as:
|
||||
|
@ -1343,33 +1342,36 @@ calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
|
|||
// 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
|
||||
// may occur in either the fourth or the fifth week). Week 1 is the first
|
||||
// week in which the d'th day occurs. Day zero is Sunday.
|
||||
if (month <= 0 || month > 12) {
|
||||
PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
|
||||
if (month < 1 || month > 12) {
|
||||
PyErr_Format(PyExc_ValueError, "Month must be in [1, 12]");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (week <= 0 || week > 5) {
|
||||
PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
|
||||
if (week < 1 || week > 5) {
|
||||
PyErr_Format(PyExc_ValueError, "Week must be in [1, 5]");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// If the 'day' parameter type is changed to a signed type,
|
||||
// "day < 0" check must be added.
|
||||
if (/* day < 0 || */ day > 6) {
|
||||
if (day < 0 || day > 6) {
|
||||
PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hour < -167 || hour > 167) {
|
||||
PyErr_Format(PyExc_ValueError, "Hour must be in [0, 167]");
|
||||
return -1;
|
||||
}
|
||||
|
||||
TransitionRuleType base = {&calendarrule_year_to_timestamp};
|
||||
|
||||
CalendarRule new_offset = {
|
||||
.base = base,
|
||||
.month = month,
|
||||
.week = week,
|
||||
.day = day,
|
||||
.hour = hour,
|
||||
.minute = minute,
|
||||
.second = second,
|
||||
.month = (uint8_t)month,
|
||||
.week = (uint8_t)week,
|
||||
.day = (uint8_t)day,
|
||||
.hour = (int16_t)hour,
|
||||
.minute = (int8_t)minute,
|
||||
.second = (int8_t)second,
|
||||
};
|
||||
|
||||
*out = new_offset;
|
||||
|
@ -1409,40 +1411,45 @@ dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
|
|||
// always transitions on a given calendar day (other than February 29th),
|
||||
// you would use a Julian day, e.g. J91 always refers to April 1st and J365
|
||||
// always refers to December 31st.
|
||||
unsigned int day = self->day;
|
||||
uint16_t day = self->day;
|
||||
if (self->julian && day >= 59 && is_leap_year(year)) {
|
||||
day += 1;
|
||||
}
|
||||
|
||||
return ((days_before_year + day) * 86400) + (self->hour * 3600) +
|
||||
(self->minute * 60) + self->second;
|
||||
return (days_before_year + day) * 86400 + (int64_t)self->hour * 3600 +
|
||||
(int64_t)self->minute * 60 + self->second;
|
||||
}
|
||||
|
||||
/* Constructor for DayRule. */
|
||||
static int
|
||||
dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
|
||||
int8_t second, DayRule *out)
|
||||
dayrule_new(int julian, int day, int hour, int minute,
|
||||
int second, DayRule *out)
|
||||
{
|
||||
// The POSIX standard specifies that Julian days must be in the range (1 <=
|
||||
// n <= 365) and that non-Julian (they call it "0-based Julian") days must
|
||||
// be in the range (0 <= n <= 365).
|
||||
if (day < julian || day > 365) {
|
||||
PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
|
||||
PyErr_Format(PyExc_ValueError, "day must be in [%d, 365], not: %d",
|
||||
julian, day);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hour < -167 || hour > 167) {
|
||||
PyErr_Format(PyExc_ValueError, "Hour must be in [0, 167]");
|
||||
return -1;
|
||||
}
|
||||
|
||||
TransitionRuleType base = {
|
||||
&dayrule_year_to_timestamp,
|
||||
};
|
||||
|
||||
DayRule tmp = {
|
||||
.base = base,
|
||||
.julian = julian,
|
||||
.day = day,
|
||||
.hour = hour,
|
||||
.minute = minute,
|
||||
.second = second,
|
||||
.julian = (uint8_t)julian,
|
||||
.day = (int16_t)day,
|
||||
.hour = (int16_t)hour,
|
||||
.minute = (int8_t)minute,
|
||||
.second = (int8_t)second,
|
||||
};
|
||||
|
||||
*out = tmp;
|
||||
|
@ -1599,21 +1606,18 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out)
|
|||
const char *p = tz_str;
|
||||
|
||||
// Read the `std` abbreviation, which must be at least 3 characters long.
|
||||
Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
|
||||
if (num_chars < 1) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
|
||||
if (parse_abbr(&p, &std_abbr)) {
|
||||
if (!PyErr_Occurred()) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
|
||||
}
|
||||
goto error;
|
||||
}
|
||||
|
||||
p += num_chars;
|
||||
|
||||
// Now read the STD offset, which is required
|
||||
num_chars = parse_tz_delta(p, &std_offset);
|
||||
if (num_chars < 0) {
|
||||
if (parse_tz_delta(&p, &std_offset)) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
|
||||
goto error;
|
||||
}
|
||||
p += num_chars;
|
||||
|
||||
// If the string ends here, there is no DST, otherwise we must parse the
|
||||
// DST abbreviation and start and end dates and times.
|
||||
|
@ -1621,12 +1625,12 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out)
|
|||
goto complete;
|
||||
}
|
||||
|
||||
num_chars = parse_abbr(p, &dst_abbr);
|
||||
if (num_chars < 1) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
|
||||
if (parse_abbr(&p, &dst_abbr)) {
|
||||
if (!PyErr_Occurred()) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
|
||||
}
|
||||
goto error;
|
||||
}
|
||||
p += num_chars;
|
||||
|
||||
if (*p == ',') {
|
||||
// From the POSIX standard:
|
||||
|
@ -1636,14 +1640,11 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out)
|
|||
dst_offset = std_offset + 3600;
|
||||
}
|
||||
else {
|
||||
num_chars = parse_tz_delta(p, &dst_offset);
|
||||
if (num_chars < 0) {
|
||||
if (parse_tz_delta(&p, &dst_offset)) {
|
||||
PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
|
||||
tz_str_obj);
|
||||
goto error;
|
||||
}
|
||||
|
||||
p += num_chars;
|
||||
}
|
||||
|
||||
TransitionRuleType **transitions[2] = {&start, &end};
|
||||
|
@ -1656,14 +1657,12 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out)
|
|||
}
|
||||
p++;
|
||||
|
||||
num_chars = parse_transition_rule(p, transitions[i]);
|
||||
if (num_chars < 0) {
|
||||
if (parse_transition_rule(&p, transitions[i])) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"Malformed transition rule in TZ string: %R",
|
||||
tz_str_obj);
|
||||
goto error;
|
||||
}
|
||||
p += num_chars;
|
||||
}
|
||||
|
||||
if (*p != '\0') {
|
||||
|
@ -1698,21 +1697,25 @@ error:
|
|||
}
|
||||
|
||||
static int
|
||||
parse_uint(const char *const p, uint8_t *value)
|
||||
parse_digits(const char **p, int min, int max, int *value)
|
||||
{
|
||||
if (!Py_ISDIGIT(*p)) {
|
||||
return -1;
|
||||
assert(max <= 3);
|
||||
*value = 0;
|
||||
for (int i = 0; i < max; i++, (*p)++) {
|
||||
if (!Py_ISDIGIT(**p)) {
|
||||
return (i < min) ? -1 : 0;
|
||||
}
|
||||
*value *= 10;
|
||||
*value += (**p) - '0';
|
||||
}
|
||||
|
||||
*value = (*p) - '0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Parse the STD and DST abbreviations from a TZ string. */
|
||||
static Py_ssize_t
|
||||
parse_abbr(const char *const p, PyObject **abbr)
|
||||
static int
|
||||
parse_abbr(const char **p, PyObject **abbr)
|
||||
{
|
||||
const char *ptr = p;
|
||||
const char *ptr = *p;
|
||||
const char *str_start;
|
||||
const char *str_end;
|
||||
|
||||
|
@ -1741,7 +1744,7 @@ parse_abbr(const char *const p, PyObject **abbr)
|
|||
ptr++;
|
||||
}
|
||||
else {
|
||||
str_start = p;
|
||||
str_start = ptr;
|
||||
// From the POSIX standard:
|
||||
//
|
||||
// In the unquoted form, all characters in these fields shall be
|
||||
|
@ -1751,6 +1754,9 @@ parse_abbr(const char *const p, PyObject **abbr)
|
|||
ptr++;
|
||||
}
|
||||
str_end = ptr;
|
||||
if (str_end == str_start) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
*abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
|
||||
|
@ -1758,12 +1764,13 @@ parse_abbr(const char *const p, PyObject **abbr)
|
|||
return -1;
|
||||
}
|
||||
|
||||
return ptr - p;
|
||||
*p = ptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Parse a UTC offset from a TZ str. */
|
||||
static Py_ssize_t
|
||||
parse_tz_delta(const char *const p, long *total_seconds)
|
||||
static int
|
||||
parse_tz_delta(const char **p, long *total_seconds)
|
||||
{
|
||||
// From the POSIX spec:
|
||||
//
|
||||
|
@ -1778,75 +1785,30 @@ parse_tz_delta(const char *const p, long *total_seconds)
|
|||
// The POSIX spec says that the values for `hour` must be between 0 and 24
|
||||
// hours, but RFC 8536 §3.3.1 specifies that the hours part of the
|
||||
// transition times may be signed and range from -167 to 167.
|
||||
long sign = -1;
|
||||
long hours = 0;
|
||||
long minutes = 0;
|
||||
long seconds = 0;
|
||||
int hours = 0;
|
||||
int minutes = 0;
|
||||
int seconds = 0;
|
||||
|
||||
const char *ptr = p;
|
||||
char buff = *ptr;
|
||||
if (buff == '-' || buff == '+') {
|
||||
// Negative numbers correspond to *positive* offsets, from the spec:
|
||||
//
|
||||
// If preceded by a '-', the timezone shall be east of the Prime
|
||||
// Meridian; otherwise, it shall be west (which may be indicated by
|
||||
// an optional preceding '+' ).
|
||||
if (buff == '-') {
|
||||
sign = 1;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
}
|
||||
|
||||
// The hour can be 1 or 2 numeric characters
|
||||
for (size_t i = 0; i < 2; ++i) {
|
||||
buff = *ptr;
|
||||
if (!Py_ISDIGIT(buff)) {
|
||||
if (i == 0) {
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
hours *= 10;
|
||||
hours += buff - '0';
|
||||
ptr++;
|
||||
}
|
||||
|
||||
if (hours > 24 || hours < 0) {
|
||||
if (parse_transition_time(p, &hours, &minutes, &seconds)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Minutes and seconds always of the format ":dd"
|
||||
long *outputs[2] = {&minutes, &seconds};
|
||||
for (size_t i = 0; i < 2; ++i) {
|
||||
if (*ptr != ':') {
|
||||
goto complete;
|
||||
}
|
||||
ptr++;
|
||||
|
||||
for (size_t j = 0; j < 2; ++j) {
|
||||
buff = *ptr;
|
||||
if (!Py_ISDIGIT(buff)) {
|
||||
return -1;
|
||||
}
|
||||
*(outputs[i]) *= 10;
|
||||
*(outputs[i]) += buff - '0';
|
||||
ptr++;
|
||||
}
|
||||
if (hours > 24 || hours < -24) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
complete:
|
||||
*total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
|
||||
|
||||
return ptr - p;
|
||||
// Negative numbers correspond to *positive* offsets, from the spec:
|
||||
//
|
||||
// If preceded by a '-', the timezone shall be east of the Prime
|
||||
// Meridian; otherwise, it shall be west (which may be indicated by
|
||||
// an optional preceding '+' ).
|
||||
*total_seconds = -((hours * 3600L) + (minutes * 60) + seconds);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Parse the date portion of a transition rule. */
|
||||
static Py_ssize_t
|
||||
parse_transition_rule(const char *const p, TransitionRuleType **out)
|
||||
static int
|
||||
parse_transition_rule(const char **p, TransitionRuleType **out)
|
||||
{
|
||||
// The full transition rule indicates when to change back and forth between
|
||||
// STD and DST, and has the form:
|
||||
|
@ -1858,10 +1820,10 @@ parse_transition_rule(const char *const p, TransitionRuleType **out)
|
|||
// does not include the ',' at the end of the first rule.
|
||||
//
|
||||
// The POSIX spec states that if *time* is not given, the default is 02:00.
|
||||
const char *ptr = p;
|
||||
int8_t hour = 2;
|
||||
int8_t minute = 0;
|
||||
int8_t second = 0;
|
||||
const char *ptr = *p;
|
||||
int hour = 2;
|
||||
int minute = 0;
|
||||
int second = 0;
|
||||
|
||||
// Rules come in one of three flavors:
|
||||
//
|
||||
|
@ -1870,44 +1832,30 @@ parse_transition_rule(const char *const p, TransitionRuleType **out)
|
|||
// 3. Mm.n.d: Specifying by month, week and day-of-week.
|
||||
|
||||
if (*ptr == 'M') {
|
||||
uint8_t month, week, day;
|
||||
int month, week, day;
|
||||
ptr++;
|
||||
if (parse_uint(ptr, &month)) {
|
||||
|
||||
if (parse_digits(&ptr, 1, 2, &month)) {
|
||||
return -1;
|
||||
}
|
||||
ptr++;
|
||||
if (*ptr != '.') {
|
||||
uint8_t tmp;
|
||||
if (parse_uint(ptr, &tmp)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
month *= 10;
|
||||
month += tmp;
|
||||
ptr++;
|
||||
if (*ptr++ != '.') {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint8_t *values[2] = {&week, &day};
|
||||
for (size_t i = 0; i < 2; ++i) {
|
||||
if (*ptr != '.') {
|
||||
return -1;
|
||||
}
|
||||
ptr++;
|
||||
|
||||
if (parse_uint(ptr, values[i])) {
|
||||
return -1;
|
||||
}
|
||||
ptr++;
|
||||
if (parse_digits(&ptr, 1, 1, &week)) {
|
||||
return -1;
|
||||
}
|
||||
if (*ptr++ != '.') {
|
||||
return -1;
|
||||
}
|
||||
if (parse_digits(&ptr, 1, 1, &day)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*ptr == '/') {
|
||||
ptr++;
|
||||
Py_ssize_t num_chars =
|
||||
parse_transition_time(ptr, &hour, &minute, &second);
|
||||
if (num_chars < 0) {
|
||||
if (parse_transition_time(&ptr, &hour, &minute, &second)) {
|
||||
return -1;
|
||||
}
|
||||
ptr += num_chars;
|
||||
}
|
||||
|
||||
CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
|
||||
|
@ -1923,33 +1871,22 @@ parse_transition_rule(const char *const p, TransitionRuleType **out)
|
|||
*out = (TransitionRuleType *)rv;
|
||||
}
|
||||
else {
|
||||
uint8_t julian = 0;
|
||||
unsigned int day = 0;
|
||||
int julian = 0;
|
||||
int day = 0;
|
||||
if (*ptr == 'J') {
|
||||
julian = 1;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
if (!Py_ISDIGIT(*ptr)) {
|
||||
if (i == 0) {
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
day *= 10;
|
||||
day += (*ptr) - '0';
|
||||
ptr++;
|
||||
if (parse_digits(&ptr, 1, 3, &day)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*ptr == '/') {
|
||||
ptr++;
|
||||
Py_ssize_t num_chars =
|
||||
parse_transition_time(ptr, &hour, &minute, &second);
|
||||
if (num_chars < 0) {
|
||||
if (parse_transition_time(&ptr, &hour, &minute, &second)) {
|
||||
return -1;
|
||||
}
|
||||
ptr += num_chars;
|
||||
}
|
||||
|
||||
DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
|
||||
|
@ -1964,13 +1901,13 @@ parse_transition_rule(const char *const p, TransitionRuleType **out)
|
|||
*out = (TransitionRuleType *)rv;
|
||||
}
|
||||
|
||||
return ptr - p;
|
||||
*p = ptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Parse the time portion of a transition rule (e.g. following an /) */
|
||||
static Py_ssize_t
|
||||
parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
|
||||
int8_t *second)
|
||||
static int
|
||||
parse_transition_time(const char **p, int *hour, int *minute, int *second)
|
||||
{
|
||||
// From the spec:
|
||||
//
|
||||
|
@ -1982,12 +1919,9 @@ parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
|
|||
// h[h][:mm[:ss]]
|
||||
//
|
||||
// RFC 8536 also allows transition times to be signed and to range from
|
||||
// -167 to +167, but the current version only supports [0, 99].
|
||||
//
|
||||
// TODO: Support the full range of transition hours.
|
||||
int8_t *components[3] = {hour, minute, second};
|
||||
const char *ptr = p;
|
||||
int8_t sign = 1;
|
||||
// -167 to +167.
|
||||
const char *ptr = *p;
|
||||
int sign = 1;
|
||||
|
||||
if (*ptr == '-' || *ptr == '+') {
|
||||
if (*ptr == '-') {
|
||||
|
@ -1996,32 +1930,31 @@ parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
|
|||
ptr++;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
if (i > 0) {
|
||||
if (*ptr != ':') {
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
// The hour can be 1 to 3 numeric characters
|
||||
if (parse_digits(&ptr, 1, 3, hour)) {
|
||||
return -1;
|
||||
}
|
||||
*hour *= sign;
|
||||
|
||||
uint8_t buff = 0;
|
||||
for (size_t j = 0; j < 2; j++) {
|
||||
if (!Py_ISDIGIT(*ptr)) {
|
||||
if (i == 0 && j > 0) {
|
||||
break;
|
||||
}
|
||||
// Minutes and seconds always of the format ":dd"
|
||||
if (*ptr == ':') {
|
||||
ptr++;
|
||||
if (parse_digits(&ptr, 2, 2, minute)) {
|
||||
return -1;
|
||||
}
|
||||
*minute *= sign;
|
||||
|
||||
if (*ptr == ':') {
|
||||
ptr++;
|
||||
if (parse_digits(&ptr, 2, 2, second)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
buff *= 10;
|
||||
buff += (*ptr) - '0';
|
||||
ptr++;
|
||||
*second *= sign;
|
||||
}
|
||||
|
||||
*(components[i]) = sign * buff;
|
||||
}
|
||||
|
||||
return ptr - p;
|
||||
*p = ptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Constructor for a _tzrule.
|
||||
|
@ -2376,8 +2309,8 @@ get_local_timestamp(PyObject *dt, int64_t *local_ts)
|
|||
}
|
||||
}
|
||||
|
||||
*local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
|
||||
(int64_t)(hour * 3600 + minute * 60 + second);
|
||||
*local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400L +
|
||||
(int64_t)(hour * 3600L + minute * 60 + second);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue