diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index bb32b149668..2904b691e3a 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -58,7 +58,7 @@ typedef struct { uint8_t month; uint8_t week; uint8_t day; - int8_t hour; + int16_t hour; int8_t minute; int8_t second; } CalendarRule; @@ -66,8 +66,8 @@ typedef struct { typedef struct { TransitionRuleType base; uint8_t julian; - unsigned int day; - int8_t hour; + uint16_t day; + int16_t hour; int8_t minute; int8_t second; } DayRule; @@ -118,15 +118,14 @@ ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, static int parse_tz_str(PyObject *tz_str_obj, _tzrule *out); -static Py_ssize_t -parse_abbr(const char *const p, PyObject **abbr); -static Py_ssize_t -parse_tz_delta(const char *const p, long *total_seconds); -static Py_ssize_t -parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, - int8_t *second); -static Py_ssize_t -parse_transition_rule(const char *const p, TransitionRuleType **out); +static int +parse_abbr(const char **p, PyObject **abbr); +static int +parse_tz_delta(const char **p, long *total_seconds); +static int +parse_transition_time(const char **p, int *hour, int *minute, int *second); +static int +parse_transition_rule(const char **p, TransitionRuleType **out); static _ttinfo * find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year); @@ -1188,14 +1187,14 @@ calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year) } int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL; - return ((ordinal * 86400) + (int64_t)(self->hour * 3600) + + return ((ordinal * 86400L) + (int64_t)(self->hour * 3600L) + (int64_t)(self->minute * 60) + (int64_t)(self->second)); } /* Constructor for CalendarRule. */ int -calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, - int8_t minute, int8_t second, CalendarRule *out) +calendarrule_new(int month, int week, int day, int hour, + int minute, int second, CalendarRule *out) { // These bounds come from the POSIX standard, which describes an Mm.n.d // rule as: @@ -1204,33 +1203,36 @@ calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which // may occur in either the fourth or the fifth week). Week 1 is the first // week in which the d'th day occurs. Day zero is Sunday. - if (month <= 0 || month > 12) { - PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]"); + if (month < 1 || month > 12) { + PyErr_Format(PyExc_ValueError, "Month must be in [1, 12]"); return -1; } - if (week <= 0 || week > 5) { - PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]"); + if (week < 1 || week > 5) { + PyErr_Format(PyExc_ValueError, "Week must be in [1, 5]"); return -1; } - // If the 'day' parameter type is changed to a signed type, - // "day < 0" check must be added. - if (/* day < 0 || */ day > 6) { + if (day < 0 || day > 6) { PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]"); return -1; } + if (hour < -167 || hour > 167) { + PyErr_Format(PyExc_ValueError, "Hour must be in [0, 167]"); + return -1; + } + TransitionRuleType base = {&calendarrule_year_to_timestamp}; CalendarRule new_offset = { .base = base, - .month = month, - .week = week, - .day = day, - .hour = hour, - .minute = minute, - .second = second, + .month = Py_SAFE_DOWNCAST(month, int, uint8_t), + .week = Py_SAFE_DOWNCAST(week, int, uint8_t), + .day = Py_SAFE_DOWNCAST(day, int, uint8_t), + .hour = Py_SAFE_DOWNCAST(hour, int, int16_t), + .minute = Py_SAFE_DOWNCAST(minute, int, int8_t), + .second = Py_SAFE_DOWNCAST(second, int, int8_t), }; *out = new_offset; @@ -1270,40 +1272,45 @@ dayrule_year_to_timestamp(TransitionRuleType *base_self, int year) // always transitions on a given calendar day (other than February 29th), // you would use a Julian day, e.g. J91 always refers to April 1st and J365 // always refers to December 31st. - unsigned int day = self->day; + uint16_t day = self->day; if (self->julian && day >= 59 && is_leap_year(year)) { day += 1; } - return ((days_before_year + day) * 86400) + (self->hour * 3600) + + return ((days_before_year + day) * 86400L) + (self->hour * 3600L) + (self->minute * 60) + self->second; } /* Constructor for DayRule. */ static int -dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute, - int8_t second, DayRule *out) +dayrule_new(int julian, int day, int hour, int minute, + int second, DayRule *out) { // The POSIX standard specifies that Julian days must be in the range (1 <= // n <= 365) and that non-Julian (they call it "0-based Julian") days must // be in the range (0 <= n <= 365). if (day < julian || day > 365) { - PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u", + PyErr_Format(PyExc_ValueError, "day must be in [%d, 365], not: %d", julian, day); return -1; } + if (hour < -167 || hour > 167) { + PyErr_Format(PyExc_ValueError, "Hour must be in [0, 167]"); + return -1; + } + TransitionRuleType base = { &dayrule_year_to_timestamp, }; DayRule tmp = { .base = base, - .julian = julian, - .day = day, - .hour = hour, - .minute = minute, - .second = second, + .julian = Py_SAFE_DOWNCAST(julian, int, uint8_t), + .day = Py_SAFE_DOWNCAST(day, int, int16_t), + .hour = Py_SAFE_DOWNCAST(hour, int, int16_t), + .minute = Py_SAFE_DOWNCAST(minute, int, int8_t), + .second = Py_SAFE_DOWNCAST(second, int, int8_t), }; *out = tmp; @@ -1453,28 +1460,25 @@ parse_tz_str(PyObject *tz_str_obj, _tzrule *out) long std_offset = 1 << 20; long dst_offset = 1 << 20; - char *tz_str = PyBytes_AsString(tz_str_obj); + const char *tz_str = PyBytes_AsString(tz_str_obj); if (tz_str == NULL) { return -1; } - char *p = tz_str; + const char *p = tz_str; // Read the `std` abbreviation, which must be at least 3 characters long. - Py_ssize_t num_chars = parse_abbr(p, &std_abbr); - if (num_chars < 1) { - PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); + if (parse_abbr(&p, &std_abbr)) { + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); + } goto error; } - p += num_chars; - // Now read the STD offset, which is required - num_chars = parse_tz_delta(p, &std_offset); - if (num_chars < 0) { + if (parse_tz_delta(&p, &std_offset)) { PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj); goto error; } - p += num_chars; // If the string ends here, there is no DST, otherwise we must parse the // DST abbreviation and start and end dates and times. @@ -1482,12 +1486,12 @@ parse_tz_str(PyObject *tz_str_obj, _tzrule *out) goto complete; } - num_chars = parse_abbr(p, &dst_abbr); - if (num_chars < 1) { - PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); + if (parse_abbr(&p, &dst_abbr)) { + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); + } goto error; } - p += num_chars; if (*p == ',') { // From the POSIX standard: @@ -1497,14 +1501,11 @@ parse_tz_str(PyObject *tz_str_obj, _tzrule *out) dst_offset = std_offset + 3600; } else { - num_chars = parse_tz_delta(p, &dst_offset); - if (num_chars < 0) { + if (parse_tz_delta(&p, &dst_offset)) { PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R", tz_str_obj); goto error; } - - p += num_chars; } TransitionRuleType **transitions[2] = {&start, &end}; @@ -1517,14 +1518,12 @@ parse_tz_str(PyObject *tz_str_obj, _tzrule *out) } p++; - num_chars = parse_transition_rule(p, transitions[i]); - if (num_chars < 0) { + if (parse_transition_rule(&p, transitions[i])) { PyErr_Format(PyExc_ValueError, "Malformed transition rule in TZ string: %R", tz_str_obj); goto error; } - p += num_chars; } if (*p != '\0') { @@ -1558,21 +1557,24 @@ error: } static int -parse_uint(const char *const p, uint8_t *value) +parse_digits(const char **p, int min, int max, int *value) { - if (!isdigit(*p)) { - return -1; + *value = 0; + for (int i = 0; i < max; i++, (*p)++) { + if (!isdigit(**p)) { + return (i < min) ? -1 : 0; + } + *value *= 10; + *value += (**p) - '0'; } - - *value = (*p) - '0'; return 0; } /* Parse the STD and DST abbreviations from a TZ string. */ -static Py_ssize_t -parse_abbr(const char *const p, PyObject **abbr) +static int +parse_abbr(const char **p, PyObject **abbr) { - const char *ptr = p; + const char *ptr = *p; char buff = *ptr; const char *str_start; const char *str_end; @@ -1601,7 +1603,7 @@ parse_abbr(const char *const p, PyObject **abbr) ptr++; } else { - str_start = p; + str_start = ptr; // From the POSIX standard: // // In the unquoted form, all characters in these fields shall be @@ -1611,6 +1613,9 @@ parse_abbr(const char *const p, PyObject **abbr) ptr++; } str_end = ptr; + if (str_end == str_start) { + return -1; + } } *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start); @@ -1618,12 +1623,13 @@ parse_abbr(const char *const p, PyObject **abbr) return -1; } - return ptr - p; + *p = ptr; + return 0; } /* Parse a UTC offset from a TZ str. */ -static Py_ssize_t -parse_tz_delta(const char *const p, long *total_seconds) +static int +parse_tz_delta(const char **p, long *total_seconds) { // From the POSIX spec: // @@ -1638,75 +1644,30 @@ parse_tz_delta(const char *const p, long *total_seconds) // The POSIX spec says that the values for `hour` must be between 0 and 24 // hours, but RFC 8536 ยง3.3.1 specifies that the hours part of the // transition times may be signed and range from -167 to 167. - long sign = -1; - long hours = 0; - long minutes = 0; - long seconds = 0; + int hours = 0; + int minutes = 0; + int seconds = 0; - const char *ptr = p; - char buff = *ptr; - if (buff == '-' || buff == '+') { - // Negative numbers correspond to *positive* offsets, from the spec: - // - // If preceded by a '-', the timezone shall be east of the Prime - // Meridian; otherwise, it shall be west (which may be indicated by - // an optional preceding '+' ). - if (buff == '-') { - sign = 1; - } - - ptr++; - } - - // The hour can be 1 or 2 numeric characters - for (size_t i = 0; i < 2; ++i) { - buff = *ptr; - if (!isdigit(buff)) { - if (i == 0) { - return -1; - } - else { - break; - } - } - - hours *= 10; - hours += buff - '0'; - ptr++; - } - - if (hours > 24 || hours < 0) { + if (parse_transition_time(p, &hours, &minutes, &seconds)) { return -1; } - // Minutes and seconds always of the format ":dd" - long *outputs[2] = {&minutes, &seconds}; - for (size_t i = 0; i < 2; ++i) { - if (*ptr != ':') { - goto complete; - } - ptr++; - - for (size_t j = 0; j < 2; ++j) { - buff = *ptr; - if (!isdigit(buff)) { - return -1; - } - *(outputs[i]) *= 10; - *(outputs[i]) += buff - '0'; - ptr++; - } + if (hours > 24 || hours < -24) { + return -1; } -complete: - *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds); - - return ptr - p; + // Negative numbers correspond to *positive* offsets, from the spec: + // + // If preceded by a '-', the timezone shall be east of the Prime + // Meridian; otherwise, it shall be west (which may be indicated by + // an optional preceding '+' ). + *total_seconds = -((hours * 3600L) + (minutes * 60) + seconds); + return 0; } /* Parse the date portion of a transition rule. */ -static Py_ssize_t -parse_transition_rule(const char *const p, TransitionRuleType **out) +static int +parse_transition_rule(const char **p, TransitionRuleType **out) { // The full transition rule indicates when to change back and forth between // STD and DST, and has the form: @@ -1718,10 +1679,10 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) // does not include the ',' at the end of the first rule. // // The POSIX spec states that if *time* is not given, the default is 02:00. - const char *ptr = p; - int8_t hour = 2; - int8_t minute = 0; - int8_t second = 0; + const char *ptr = *p; + int hour = 2; + int minute = 0; + int second = 0; // Rules come in one of three flavors: // @@ -1730,44 +1691,30 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) // 3. Mm.n.d: Specifying by month, week and day-of-week. if (*ptr == 'M') { - uint8_t month, week, day; + int month, week, day; ptr++; - if (parse_uint(ptr, &month)) { + + if (parse_digits(&ptr, 1, 2, &month)) { return -1; } - ptr++; - if (*ptr != '.') { - uint8_t tmp; - if (parse_uint(ptr, &tmp)) { - return -1; - } - - month *= 10; - month += tmp; - ptr++; + if (*ptr++ != '.') { + return -1; } - - uint8_t *values[2] = {&week, &day}; - for (size_t i = 0; i < 2; ++i) { - if (*ptr != '.') { - return -1; - } - ptr++; - - if (parse_uint(ptr, values[i])) { - return -1; - } - ptr++; + if (parse_digits(&ptr, 1, 1, &week)) { + return -1; + } + if (*ptr++ != '.') { + return -1; + } + if (parse_digits(&ptr, 1, 1, &day)) { + return -1; } if (*ptr == '/') { ptr++; - Py_ssize_t num_chars = - parse_transition_time(ptr, &hour, &minute, &second); - if (num_chars < 0) { + if (parse_transition_time(&ptr, &hour, &minute, &second)) { return -1; } - ptr += num_chars; } CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule)); @@ -1783,33 +1730,22 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) *out = (TransitionRuleType *)rv; } else { - uint8_t julian = 0; - unsigned int day = 0; + int julian = 0; + int day = 0; if (*ptr == 'J') { julian = 1; ptr++; } - for (size_t i = 0; i < 3; ++i) { - if (!isdigit(*ptr)) { - if (i == 0) { - return -1; - } - break; - } - day *= 10; - day += (*ptr) - '0'; - ptr++; + if (parse_digits(&ptr, 1, 3, &day)) { + return -1; } if (*ptr == '/') { ptr++; - Py_ssize_t num_chars = - parse_transition_time(ptr, &hour, &minute, &second); - if (num_chars < 0) { + if (parse_transition_time(&ptr, &hour, &minute, &second)) { return -1; } - ptr += num_chars; } DayRule *rv = PyMem_Calloc(1, sizeof(DayRule)); @@ -1824,13 +1760,13 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) *out = (TransitionRuleType *)rv; } - return ptr - p; + *p = ptr; + return 0; } /* Parse the time portion of a transition rule (e.g. following an /) */ -static Py_ssize_t -parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, - int8_t *second) +static int +parse_transition_time(const char **p, int *hour, int *minute, int *second) { // From the spec: // @@ -1842,12 +1778,9 @@ parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, // h[h][:mm[:ss]] // // RFC 8536 also allows transition times to be signed and to range from - // -167 to +167, but the current version only supports [0, 99]. - // - // TODO: Support the full range of transition hours. - int8_t *components[3] = {hour, minute, second}; - const char *ptr = p; - int8_t sign = 1; + // -167 to +167. + const char *ptr = *p; + int sign = 1; if (*ptr == '-' || *ptr == '+') { if (*ptr == '-') { @@ -1856,32 +1789,31 @@ parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, ptr++; } - for (size_t i = 0; i < 3; ++i) { - if (i > 0) { - if (*ptr != ':') { - break; - } - ptr++; - } + // The hour can be 1 to 3 numeric characters + if (parse_digits(&ptr, 1, 3, hour)) { + return -1; + } + *hour *= sign; - uint8_t buff = 0; - for (size_t j = 0; j < 2; j++) { - if (!isdigit(*ptr)) { - if (i == 0 && j > 0) { - break; - } + // Minutes and seconds always of the format ":dd" + if (*ptr == ':') { + ptr++; + if (parse_digits(&ptr, 2, 2, minute)) { + return -1; + } + *minute *= sign; + + if (*ptr == ':') { + ptr++; + if (parse_digits(&ptr, 2, 2, second)) { return -1; } - - buff *= 10; - buff += (*ptr) - '0'; - ptr++; + *second *= sign; } - - *(components[i]) = sign * buff; } - return ptr - p; + *p = ptr; + return 0; } /* Constructor for a _tzrule. @@ -2236,8 +2168,8 @@ get_local_timestamp(PyObject *dt, int64_t *local_ts) } } - *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 + - (int64_t)(hour * 3600 + minute * 60 + second); + *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400L + + (int64_t)(hour * 3600L + minute * 60 + second); return 0; }