bpo-15873: Implement [date][time].fromisoformat (#4699)

Closes bpo-15873.
This commit is contained in:
Paul Ganssle 2017-12-21 00:33:49 -05:00 committed by Alexander Belopolsky
parent 507434fd50
commit 09dc2f508c
5 changed files with 989 additions and 32 deletions

View File

@ -436,6 +436,21 @@ Other constructors, all class methods:
d``.
.. classmethod:: date.fromisoformat(date_string)
Return a :class:`date` corresponding to a *date_string* in the format emitted
by :meth:`date.isoformat`. Specifically, this function supports strings in
the format(s) ``YYYY-MM-DD``.
.. caution::
This does not support parsing arbitrary ISO 8601 strings - it is only intended
as the inverse operation of :meth:`date.isoformat`.
.. versionadded:: 3.7
Class attributes:
.. attribute:: date.min
@ -819,6 +834,21 @@ Other constructors, all class methods:
Added the *tzinfo* argument.
.. classmethod:: datetime.fromisoformat(date_string)
Return a :class:`datetime` corresponding to a *date_string* in one of the
formats emitted by :meth:`date.isoformat` and :meth:`datetime.isoformat`.
Specifically, this function supports strings in the format(s)
``YYYY-MM-DD[*HH[:MM[:SS[.mmm[mmm]]]][+HH:MM[:SS[.ffffff]]]]``,
where ``*`` can match any single character.
.. caution::
This does not support parsing arbitrary ISO 8601 strings - it is only intended
as the inverse operation of :meth:`datetime.isoformat`.
.. versionadded:: 3.7
.. classmethod:: datetime.strptime(date_string, format)
Return a :class:`.datetime` corresponding to *date_string*, parsed according to
@ -1486,6 +1516,23 @@ In boolean contexts, a :class:`.time` object is always considered to be true.
error-prone and has been removed in Python 3.5. See :issue:`13936` for full
details.
Other constructor:
.. classmethod:: time.fromisoformat(time_string)
Return a :class:`time` corresponding to a *time_string* in one of the
formats emitted by :meth:`time.isoformat`. Specifically, this function supports
strings in the format(s) ``HH[:MM[:SS[.mmm[mmm]]]][+HH:MM[:SS[.ffffff]]]``.
.. caution::
This does not support parsing arbitrary ISO 8601 strings - it is only intended
as the inverse operation of :meth:`time.isoformat`.
.. versionadded:: 3.7
Instance methods:
.. method:: time.replace(hour=self.hour, minute=self.minute, second=self.second, \
@ -1587,7 +1634,6 @@ Instance methods:
``self.tzinfo.tzname(None)``, or raises an exception if the latter doesn't
return ``None`` or a string object.
Example:
>>> from datetime import time, tzinfo, timedelta

View File

@ -173,6 +173,24 @@ def _format_time(hh, mm, ss, us, timespec='auto'):
else:
return fmt.format(hh, mm, ss, us)
def _format_offset(off):
s = ''
if off is not None:
if off.days < 0:
sign = "-"
off = -off
else:
sign = "+"
hh, mm = divmod(off, timedelta(hours=1))
mm, ss = divmod(mm, timedelta(minutes=1))
s += "%s%02d:%02d" % (sign, hh, mm)
if ss or ss.microseconds:
s += ":%02d" % ss.seconds
if ss.microseconds:
s += '.%06d' % ss.microseconds
return s
# Correctly substitute for %z and %Z escapes in strftime formats.
def _wrap_strftime(object, format, timetuple):
# Don't call utcoffset() or tzname() unless actually needed.
@ -237,6 +255,102 @@ def _wrap_strftime(object, format, timetuple):
newformat = "".join(newformat)
return _time.strftime(newformat, timetuple)
# Helpers for parsing the result of isoformat()
def _parse_isoformat_date(dtstr):
# It is assumed that this function will only be called with a
# string of length exactly 10, and (though this is not used) ASCII-only
year = int(dtstr[0:4])
if dtstr[4] != '-':
raise ValueError('Invalid date separator: %s' % dtstr[4])
month = int(dtstr[5:7])
if dtstr[7] != '-':
raise ValueError('Invalid date separator')
day = int(dtstr[8:10])
return [year, month, day]
def _parse_hh_mm_ss_ff(tstr):
# Parses things of the form HH[:MM[:SS[.fff[fff]]]]
len_str = len(tstr)
time_comps = [0, 0, 0, 0]
pos = 0
for comp in range(0, 3):
if (len_str - pos) < 2:
raise ValueError('Incomplete time component')
time_comps[comp] = int(tstr[pos:pos+2])
pos += 2
next_char = tstr[pos:pos+1]
if not next_char or comp >= 2:
break
if next_char != ':':
raise ValueError('Invalid time separator: %c' % next_char)
pos += 1
if pos < len_str:
if tstr[pos] != '.':
raise ValueError('Invalid microsecond component')
else:
pos += 1
len_remainder = len_str - pos
if len_remainder not in (3, 6):
raise ValueError('Invalid microsecond component')
time_comps[3] = int(tstr[pos:])
if len_remainder == 3:
time_comps[3] *= 1000
return time_comps
def _parse_isoformat_time(tstr):
# Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
len_str = len(tstr)
if len_str < 2:
raise ValueError('Isoformat time too short')
# This is equivalent to re.search('[+-]', tstr), but faster
tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1)
timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr
time_comps = _parse_hh_mm_ss_ff(timestr)
tzi = None
if tz_pos > 0:
tzstr = tstr[tz_pos:]
# Valid time zone strings are:
# HH:MM len: 5
# HH:MM:SS len: 8
# HH:MM:SS.ffffff len: 15
if len(tzstr) not in (5, 8, 15):
raise ValueError('Malformed time zone string')
tz_comps = _parse_hh_mm_ss_ff(tzstr)
if all(x == 0 for x in tz_comps):
tzi = timezone.utc
else:
tzsign = -1 if tstr[tz_pos - 1] == '-' else 1
td = timedelta(hours=tz_comps[0], minutes=tz_comps[1],
seconds=tz_comps[2], microseconds=tz_comps[3])
tzi = timezone(tzsign * td)
time_comps.append(tzi)
return time_comps
# Just raise TypeError if the arg isn't None or a string.
def _check_tzname(name):
if name is not None and not isinstance(name, str):
@ -732,6 +846,19 @@ class date:
y, m, d = _ord2ymd(n)
return cls(y, m, d)
@classmethod
def fromisoformat(cls, date_string):
"""Construct a date from the output of date.isoformat()."""
if not isinstance(date_string, str):
raise TypeError('fromisoformat: argument must be str')
try:
assert len(date_string) == 10
return cls(*_parse_isoformat_date(date_string))
except Exception:
raise ValueError('Invalid isoformat string: %s' % date_string)
# Conversions to string
def __repr__(self):
@ -1190,22 +1317,10 @@ class time:
# Conversion to string
def _tzstr(self, sep=":"):
"""Return formatted timezone offset (+xx:xx) or None."""
def _tzstr(self):
"""Return formatted timezone offset (+xx:xx) or an empty string."""
off = self.utcoffset()
if off is not None:
if off.days < 0:
sign = "-"
off = -off
else:
sign = "+"
hh, mm = divmod(off, timedelta(hours=1))
mm, ss = divmod(mm, timedelta(minutes=1))
assert 0 <= hh < 24
off = "%s%02d%s%02d" % (sign, hh, sep, mm)
if ss:
off += ':%02d' % ss.seconds
return off
return _format_offset(off)
def __repr__(self):
"""Convert to formal string, for repr()."""
@ -1244,6 +1359,18 @@ class time:
__str__ = isoformat
@classmethod
def fromisoformat(cls, time_string):
"""Construct a time from the output of isoformat()."""
if not isinstance(time_string, str):
raise TypeError('fromisoformat: argument must be str')
try:
return cls(*_parse_isoformat_time(time_string))
except Exception:
raise ValueError('Invalid isoformat string: %s' % time_string)
def strftime(self, fmt):
"""Format using strftime(). The date part of the timestamp passed
to underlying strftime should not be used.
@ -1497,6 +1624,31 @@ class datetime(date):
time.hour, time.minute, time.second, time.microsecond,
tzinfo, fold=time.fold)
@classmethod
def fromisoformat(cls, date_string):
"""Construct a datetime from the output of datetime.isoformat()."""
if not isinstance(date_string, str):
raise TypeError('fromisoformat: argument must be str')
# Split this at the separator
dstr = date_string[0:10]
tstr = date_string[11:]
try:
date_components = _parse_isoformat_date(dstr)
except ValueError:
raise ValueError('Invalid isoformat string: %s' % date_string)
if tstr:
try:
time_components = _parse_isoformat_time(tstr)
except ValueError:
raise ValueError('Invalid isoformat string: %s' % date_string)
else:
time_components = [0, 0, 0, 0, None]
return cls(*(date_components + time_components))
def timetuple(self):
"Return local time tuple compatible with time.localtime()."
dst = self.dst()
@ -1673,18 +1825,10 @@ class datetime(date):
self._microsecond, timespec))
off = self.utcoffset()
if off is not None:
if off.days < 0:
sign = "-"
off = -off
else:
sign = "+"
hh, mm = divmod(off, timedelta(hours=1))
mm, ss = divmod(mm, timedelta(minutes=1))
s += "%s%02d:%02d" % (sign, hh, mm)
if ss:
assert not ss.microseconds
s += ":%02d" % ss.seconds
tz = _format_offset(off)
if tz:
s += tz
return s
def __repr__(self):
@ -2275,9 +2419,10 @@ else:
_check_date_fields, _check_int_field, _check_time_fields,
_check_tzinfo_arg, _check_tzname, _check_utc_offset, _cmp, _cmperror,
_date_class, _days_before_month, _days_before_year, _days_in_month,
_format_time, _is_leap, _isoweek1monday, _math, _ord2ymd,
_time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord,
_divide_and_round)
_format_time, _format_offset, _is_leap, _isoweek1monday, _math,
_ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord,
_divide_and_round, _parse_isoformat_date, _parse_isoformat_time,
_parse_hh_mm_ss_ff)
# XXX Since import * above excludes names that start with _,
# docstring does not get overwritten. In the future, it may be
# appropriate to maintain a single module level docstring and

View File

@ -49,7 +49,6 @@ OTHERSTUFF = (10, 34.5, "abc", {}, [], ())
INF = float("inf")
NAN = float("nan")
#############################################################################
# module tests
@ -1588,6 +1587,63 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
# blow up because other fields are insane.
self.theclass(base[:2] + bytes([ord_byte]) + base[3:])
def test_fromisoformat(self):
# Test that isoformat() is reversible
base_dates = [
(1, 1, 1),
(1000, 2, 14),
(1900, 1, 1),
(2000, 2, 29),
(2004, 11, 12),
(2004, 4, 3),
(2017, 5, 30)
]
for dt_tuple in base_dates:
dt = self.theclass(*dt_tuple)
dt_str = dt.isoformat()
with self.subTest(dt_str=dt_str):
dt_rt = self.theclass.fromisoformat(dt.isoformat())
self.assertEqual(dt, dt_rt)
def test_fromisoformat_subclass(self):
class DateSubclass(self.theclass):
pass
dt = DateSubclass(2014, 12, 14)
dt_rt = DateSubclass.fromisoformat(dt.isoformat())
self.assertIsInstance(dt_rt, DateSubclass)
def test_fromisoformat_fails(self):
# Test that fromisoformat() fails on invalid values
bad_strs = [
'', # Empty string
'009-03-04', # Not 10 characters
'123456789', # Not a date
'200a-12-04', # Invalid character in year
'2009-1a-04', # Invalid character in month
'2009-12-0a', # Invalid character in day
'2009-01-32', # Invalid day
'2009-02-29', # Invalid leap day
'20090228', # Valid ISO8601 output not from isoformat()
]
for bad_str in bad_strs:
with self.assertRaises(ValueError):
self.theclass.fromisoformat(bad_str)
def test_fromisoformat_fails_typeerror(self):
# Test that fromisoformat fails when passed the wrong type
import io
bad_types = [b'2009-03-01', None, io.StringIO('2009-03-01')]
for bad_type in bad_types:
with self.assertRaises(TypeError):
self.theclass.fromisoformat(bad_type)
#############################################################################
# datetime tests
@ -1675,6 +1731,36 @@ class TestDateTime(TestDate):
t = self.theclass(2, 3, 2, tzinfo=tz)
self.assertEqual(t.isoformat(), "0002-03-02T00:00:00+00:00:16")
def test_isoformat_timezone(self):
tzoffsets = [
('05:00', timedelta(hours=5)),
('02:00', timedelta(hours=2)),
('06:27', timedelta(hours=6, minutes=27)),
('12:32:30', timedelta(hours=12, minutes=32, seconds=30)),
('02:04:09.123456', timedelta(hours=2, minutes=4, seconds=9, microseconds=123456))
]
tzinfos = [
('', None),
('+00:00', timezone.utc),
('+00:00', timezone(timedelta(0))),
]
tzinfos += [
(prefix + expected, timezone(sign * td))
for expected, td in tzoffsets
for prefix, sign in [('-', -1), ('+', 1)]
]
dt_base = self.theclass(2016, 4, 1, 12, 37, 9)
exp_base = '2016-04-01T12:37:09'
for exp_tz, tzi in tzinfos:
dt = dt_base.replace(tzinfo=tzi)
exp = exp_base + exp_tz
with self.subTest(tzi=tzi):
assert dt.isoformat() == exp
def test_format(self):
dt = self.theclass(2007, 9, 10, 4, 5, 1, 123)
self.assertEqual(dt.__format__(''), str(dt))
@ -2334,6 +2420,173 @@ class TestDateTime(TestDate):
self.assertEqual(dt2.newmeth(-7), dt1.year + dt1.month +
dt1.second - 7)
def test_fromisoformat_datetime(self):
# Test that isoformat() is reversible
base_dates = [
(1, 1, 1),
(1900, 1, 1),
(2004, 11, 12),
(2017, 5, 30)
]
base_times = [
(0, 0, 0, 0),
(0, 0, 0, 241000),
(0, 0, 0, 234567),
(12, 30, 45, 234567)
]
separators = [' ', 'T']
tzinfos = [None, timezone.utc,
timezone(timedelta(hours=-5)),
timezone(timedelta(hours=2))]
dts = [self.theclass(*date_tuple, *time_tuple, tzinfo=tzi)
for date_tuple in base_dates
for time_tuple in base_times
for tzi in tzinfos]
for dt in dts:
for sep in separators:
dtstr = dt.isoformat(sep=sep)
with self.subTest(dtstr=dtstr):
dt_rt = self.theclass.fromisoformat(dtstr)
self.assertEqual(dt, dt_rt)
def test_fromisoformat_timezone(self):
base_dt = self.theclass(2014, 12, 30, 12, 30, 45, 217456)
tzoffsets = [
timedelta(hours=5), timedelta(hours=2),
timedelta(hours=6, minutes=27),
timedelta(hours=12, minutes=32, seconds=30),
timedelta(hours=2, minutes=4, seconds=9, microseconds=123456)
]
tzoffsets += [-1 * td for td in tzoffsets]
tzinfos = [None, timezone.utc,
timezone(timedelta(hours=0))]
tzinfos += [timezone(td) for td in tzoffsets]
for tzi in tzinfos:
dt = base_dt.replace(tzinfo=tzi)
dtstr = dt.isoformat()
with self.subTest(tstr=dtstr):
dt_rt = self.theclass.fromisoformat(dtstr)
assert dt == dt_rt, dt_rt
def test_fromisoformat_separators(self):
separators = [
' ', 'T', '\u007f', # 1-bit widths
'\u0080', 'ʁ', # 2-bit widths
'', '', # 3-bit widths
'🐍' # 4-bit widths
]
for sep in separators:
dt = self.theclass(2018, 1, 31, 23, 59, 47, 124789)
dtstr = dt.isoformat(sep=sep)
with self.subTest(dtstr=dtstr):
dt_rt = self.theclass.fromisoformat(dtstr)
self.assertEqual(dt, dt_rt)
def test_fromisoformat_ambiguous(self):
# Test strings like 2018-01-31+12:15 (where +12:15 is not a time zone)
separators = ['+', '-']
for sep in separators:
dt = self.theclass(2018, 1, 31, 12, 15)
dtstr = dt.isoformat(sep=sep)
with self.subTest(dtstr=dtstr):
dt_rt = self.theclass.fromisoformat(dtstr)
self.assertEqual(dt, dt_rt)
def test_fromisoformat_timespecs(self):
datetime_bases = [
(2009, 12, 4, 8, 17, 45, 123456),
(2009, 12, 4, 8, 17, 45, 0)]
tzinfos = [None, timezone.utc,
timezone(timedelta(hours=-5)),
timezone(timedelta(hours=2)),
timezone(timedelta(hours=6, minutes=27))]
timespecs = ['hours', 'minutes', 'seconds',
'milliseconds', 'microseconds']
for ip, ts in enumerate(timespecs):
for tzi in tzinfos:
for dt_tuple in datetime_bases:
if ts == 'milliseconds':
new_microseconds = 1000 * (dt_tuple[6] // 1000)
dt_tuple = dt_tuple[0:6] + (new_microseconds,)
dt = self.theclass(*(dt_tuple[0:(4 + ip)]), tzinfo=tzi)
dtstr = dt.isoformat(timespec=ts)
with self.subTest(dtstr=dtstr):
dt_rt = self.theclass.fromisoformat(dtstr)
self.assertEqual(dt, dt_rt)
def test_fromisoformat_fails_datetime(self):
# Test that fromisoformat() fails on invalid values
bad_strs = [
'', # Empty string
'2009.04-19T03', # Wrong first separator
'2009-04.19T03', # Wrong second separator
'2009-04-19T0a', # Invalid hours
'2009-04-19T03:1a:45', # Invalid minutes
'2009-04-19T03:15:4a', # Invalid seconds
'2009-04-19T03;15:45', # Bad first time separator
'2009-04-19T03:15;45', # Bad second time separator
'2009-04-19T03:15:4500:00', # Bad time zone separator
'2009-04-19T03:15:45.2345', # Too many digits for milliseconds
'2009-04-19T03:15:45.1234567', # Too many digits for microseconds
'2009-04-19T03:15:45.123456+24:30', # Invalid time zone offset
'2009-04-19T03:15:45.123456-24:30', # Invalid negative offset
'2009-04-10ᛇᛇᛇᛇᛇ12:15', # Too many unicode separators
'2009-04-19T1', # Incomplete hours
'2009-04-19T12:3', # Incomplete minutes
'2009-04-19T12:30:4', # Incomplete seconds
'2009-04-19T12:', # Ends with time separator
'2009-04-19T12:30:', # Ends with time separator
'2009-04-19T12:30:45.', # Ends with time separator
'2009-04-19T12:30:45.123456+', # Ends with timzone separator
'2009-04-19T12:30:45.123456-', # Ends with timzone separator
'2009-04-19T12:30:45.123456-05:00a', # Extra text
'2009-04-19T12:30:45.123-05:00a', # Extra text
'2009-04-19T12:30:45-05:00a', # Extra text
]
for bad_str in bad_strs:
with self.subTest(bad_str=bad_str):
with self.assertRaises(ValueError):
self.theclass.fromisoformat(bad_str)
def test_fromisoformat_utc(self):
dt_str = '2014-04-19T13:21:13+00:00'
dt = self.theclass.fromisoformat(dt_str)
self.assertIs(dt.tzinfo, timezone.utc)
def test_fromisoformat_subclass(self):
class DateTimeSubclass(self.theclass):
pass
dt = DateTimeSubclass(2014, 12, 14, 9, 30, 45, 457390,
tzinfo=timezone(timedelta(hours=10, minutes=45)))
dt_rt = DateTimeSubclass.fromisoformat(dt.isoformat())
self.assertEqual(dt, dt_rt)
self.assertIsInstance(dt_rt, DateTimeSubclass)
class TestSubclassDateTime(TestDateTime):
theclass = SubclassDatetime
# Override tests not designed for subclass
@ -2517,6 +2770,36 @@ class TestTime(HarmlessMixedComparison, unittest.TestCase):
self.assertEqual(t.isoformat(timespec='microseconds'), "12:34:56.000000")
self.assertEqual(t.isoformat(timespec='auto'), "12:34:56")
def test_isoformat_timezone(self):
tzoffsets = [
('05:00', timedelta(hours=5)),
('02:00', timedelta(hours=2)),
('06:27', timedelta(hours=6, minutes=27)),
('12:32:30', timedelta(hours=12, minutes=32, seconds=30)),
('02:04:09.123456', timedelta(hours=2, minutes=4, seconds=9, microseconds=123456))
]
tzinfos = [
('', None),
('+00:00', timezone.utc),
('+00:00', timezone(timedelta(0))),
]
tzinfos += [
(prefix + expected, timezone(sign * td))
for expected, td in tzoffsets
for prefix, sign in [('-', -1), ('+', 1)]
]
t_base = self.theclass(12, 37, 9)
exp_base = '12:37:09'
for exp_tz, tzi in tzinfos:
t = t_base.replace(tzinfo=tzi)
exp = exp_base + exp_tz
with self.subTest(tzi=tzi):
assert t.isoformat() == exp
def test_1653736(self):
# verify it doesn't accept extra keyword arguments
t = self.theclass(second=1)
@ -3055,6 +3338,133 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
t2 = t2.replace(tzinfo=Varies())
self.assertTrue(t1 < t2) # t1's offset counter still going up
def test_fromisoformat(self):
time_examples = [
(0, 0, 0, 0),
(23, 59, 59, 999999),
]
hh = (9, 12, 20)
mm = (5, 30)
ss = (4, 45)
usec = (0, 245000, 678901)
time_examples += list(itertools.product(hh, mm, ss, usec))
tzinfos = [None, timezone.utc,
timezone(timedelta(hours=2)),
timezone(timedelta(hours=6, minutes=27))]
for ttup in time_examples:
for tzi in tzinfos:
t = self.theclass(*ttup, tzinfo=tzi)
tstr = t.isoformat()
with self.subTest(tstr=tstr):
t_rt = self.theclass.fromisoformat(tstr)
self.assertEqual(t, t_rt)
def test_fromisoformat_timezone(self):
base_time = self.theclass(12, 30, 45, 217456)
tzoffsets = [
timedelta(hours=5), timedelta(hours=2),
timedelta(hours=6, minutes=27),
timedelta(hours=12, minutes=32, seconds=30),
timedelta(hours=2, minutes=4, seconds=9, microseconds=123456)
]
tzoffsets += [-1 * td for td in tzoffsets]
tzinfos = [None, timezone.utc,
timezone(timedelta(hours=0))]
tzinfos += [timezone(td) for td in tzoffsets]
for tzi in tzinfos:
t = base_time.replace(tzinfo=tzi)
tstr = t.isoformat()
with self.subTest(tstr=tstr):
t_rt = self.theclass.fromisoformat(tstr)
assert t == t_rt, t_rt
def test_fromisoformat_timespecs(self):
time_bases = [
(8, 17, 45, 123456),
(8, 17, 45, 0)
]
tzinfos = [None, timezone.utc,
timezone(timedelta(hours=-5)),
timezone(timedelta(hours=2)),
timezone(timedelta(hours=6, minutes=27))]
timespecs = ['hours', 'minutes', 'seconds',
'milliseconds', 'microseconds']
for ip, ts in enumerate(timespecs):
for tzi in tzinfos:
for t_tuple in time_bases:
if ts == 'milliseconds':
new_microseconds = 1000 * (t_tuple[-1] // 1000)
t_tuple = t_tuple[0:-1] + (new_microseconds,)
t = self.theclass(*(t_tuple[0:(1 + ip)]), tzinfo=tzi)
tstr = t.isoformat(timespec=ts)
with self.subTest(tstr=tstr):
t_rt = self.theclass.fromisoformat(tstr)
self.assertEqual(t, t_rt)
def test_fromisoformat_fails(self):
bad_strs = [
'', # Empty string
'12:', # Ends on a separator
'12:30:', # Ends on a separator
'12:30:15.', # Ends on a separator
'1', # Incomplete hours
'12:3', # Incomplete minutes
'12:30:1', # Incomplete seconds
'1a:30:45.334034', # Invalid character in hours
'12:a0:45.334034', # Invalid character in minutes
'12:30:a5.334034', # Invalid character in seconds
'12:30:45.1234', # Too many digits for milliseconds
'12:30:45.1234567', # Too many digits for microseconds
'12:30:45.123456+24:30', # Invalid time zone offset
'12:30:45.123456-24:30', # Invalid negative offset
'123045', # Uses full-width unicode colons
'12:30:45123456', # Uses \u2024 in place of decimal point
'12:30:45a', # Extra at tend of basic time
'12:30:45.123a', # Extra at end of millisecond time
'12:30:45.123456a', # Extra at end of microsecond time
'12:30:45.123456+12:00:30a', # Extra at end of full time
]
for bad_str in bad_strs:
with self.subTest(bad_str=bad_str):
with self.assertRaises(ValueError):
self.theclass.fromisoformat(bad_str)
def test_fromisoformat_fails_typeerror(self):
# Test the fromisoformat fails when passed the wrong type
import io
bad_types = [b'12:30:45', None, io.StringIO('12:30:45')]
for bad_type in bad_types:
with self.assertRaises(TypeError):
self.theclass.fromisoformat(bad_type)
def test_fromisoformat_subclass(self):
class TimeSubclass(self.theclass):
pass
tsc = TimeSubclass(12, 14, 45, 203745, tzinfo=timezone.utc)
tsc_rt = TimeSubclass.fromisoformat(tsc.isoformat())
self.assertEqual(tsc, tsc_rt)
self.assertIsInstance(tsc_rt, TimeSubclass)
def test_subclass_timetz(self):
class C(self.theclass):

View File

@ -0,0 +1,3 @@
Added new alternate constructors :meth:`datetime.datetime.fromisoformat`,
:meth:`datetime.time.fromisoformat` and :meth:`datetime.date.fromisoformat`
as the inverse operation of each classes's respective ``isoformat`` methods.

View File

@ -663,6 +663,167 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d)
SET_DAY(self, d);
}
/* ---------------------------------------------------------------------------
* String parsing utilities and helper functions
*/
static const char*
parse_digits(const char* ptr, int* var, size_t num_digits)
{
for (size_t i = 0; i < num_digits; ++i) {
unsigned int tmp = (unsigned int)(*(ptr++) - '0');
if (tmp > 9) {
return NULL;
}
*var *= 10;
*var += (signed int)tmp;
}
return ptr;
}
static int parse_isoformat_date(const char *dtstr,
int* year, int *month, int* day) {
/* Parse the date components of the result of date.isoformat()
*
* Return codes:
* 0: Success
* -1: Failed to parse date component
* -2: Failed to parse dateseparator
*/
const char *p = dtstr;
p = parse_digits(p, year, 4);
if (NULL == p) {
return -1;
}
if (*(p++) != '-') {
return -2;
}
p = parse_digits(p, month, 2);
if (NULL == p) {
return -1;
}
if (*(p++) != '-') {
return -2;
}
p = parse_digits(p, day, 2);
if (p == NULL) {
return -1;
}
return 0;
}
static int
parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end,
int* hour, int* minute, int *second, int *microsecond) {
const char *p = tstr;
const char *p_end = tstr_end;
int *vals[3] = {hour, minute, second};
// Parse [HH[:MM[:SS]]]
for (size_t i = 0; i < 3; ++i) {
p = parse_digits(p, vals[i], 2);
if (NULL == p) {
return -3;
}
char c = *(p++);
if (p >= p_end) {
return c != '\0';
} else if (c == ':') {
continue;
} else if (c == '.') {
break;
} else {
return -4; // Malformed time separator
}
}
// Parse .fff[fff]
size_t len_remains = p_end - p;
if (!(len_remains == 6 || len_remains == 3)) {
return -3;
}
p = parse_digits(p, microsecond, len_remains);
if (NULL == p) {
return -3;
}
if (len_remains == 3) {
*microsecond *= 1000;
}
// Return 1 if it's not the end of the string
return *p != '\0';
}
static int
parse_isoformat_time(const char *dtstr, size_t dtlen,
int* hour, int *minute, int *second, int *microsecond,
int* tzoffset, int *tzmicrosecond) {
// Parse the time portion of a datetime.isoformat() string
//
// Return codes:
// 0: Success (no tzoffset)
// 1: Success (with tzoffset)
// -3: Failed to parse time component
// -4: Failed to parse time separator
// -5: Malformed timezone string
const char *p = dtstr;
const char *p_end = dtstr + dtlen;
const char *tzinfo_pos = p;
do {
if (*tzinfo_pos == '+' || *tzinfo_pos == '-') {
break;
}
} while(++tzinfo_pos < p_end);
int rv = parse_hh_mm_ss_ff(dtstr, tzinfo_pos,
hour, minute, second, microsecond);
if (rv < 0) {
return rv;
} else if (tzinfo_pos == p_end) {
// We know that there's no time zone, so if there's stuff at the
// end of the string it's an error.
if (rv == 1) {
return -5;
} else {
return 0;
}
}
// Parse time zone component
// Valid formats are:
// - +HH:MM (len 6)
// - +HH:MM:SS (len 9)
// - +HH:MM:SS.ffffff (len 16)
size_t tzlen = p_end - tzinfo_pos;
if (!(tzlen == 6 || tzlen == 9 || tzlen == 16)) {
return -5;
}
int tzsign = (*tzinfo_pos == '-')?-1:1;
tzinfo_pos++;
int tzhour = 0, tzminute = 0, tzsecond = 0;
rv = parse_hh_mm_ss_ff(tzinfo_pos, p_end,
&tzhour, &tzminute, &tzsecond, tzmicrosecond);
*tzoffset = tzsign * ((tzhour * 3600) + (tzminute * 60) + tzsecond);
*tzmicrosecond *= tzsign;
return rv?-5:1;
}
/* ---------------------------------------------------------------------------
* Create various objects, mostly without range checking.
*/
@ -1063,6 +1224,27 @@ append_keyword_fold(PyObject *repr, int fold)
return repr;
}
static inline PyObject *
tzinfo_from_isoformat_results(int rv, int tzoffset, int tz_useconds) {
PyObject *tzinfo;
if (rv == 1) {
// Create a timezone from offset in seconds (0 returns UTC)
if (tzoffset == 0) {
Py_INCREF(PyDateTime_TimeZone_UTC);
return PyDateTime_TimeZone_UTC;
}
PyObject *delta = new_delta(0, tzoffset, tz_useconds, 1);
tzinfo = new_timezone(delta, NULL);
Py_XDECREF(delta);
} else {
tzinfo = Py_None;
Py_INCREF(Py_None);
}
return tzinfo;
}
/* ---------------------------------------------------------------------------
* String format helpers.
*/
@ -2607,6 +2789,7 @@ date_fromtimestamp(PyObject *cls, PyObject *args)
return result;
}
/* Return new date from proleptic Gregorian ordinal. Raises ValueError if
* the ordinal is out of range.
*/
@ -2633,6 +2816,46 @@ date_fromordinal(PyObject *cls, PyObject *args)
return result;
}
/* Return the new date from a string as generated by date.isoformat() */
static PyObject *
date_fromisoformat(PyObject *cls, PyObject *dtstr) {
assert(dtstr != NULL);
if (!PyUnicode_Check(dtstr)) {
PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str");
return NULL;
}
Py_ssize_t len;
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
int year = 0, month = 0, day = 0;
int rv;
if (len == 10) {
rv = parse_isoformat_date(dt_ptr, &year, &month, &day);
} else {
rv = -1;
}
if (rv < 0) {
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s",
dt_ptr);
return NULL;
}
PyObject *result;
if ( (PyTypeObject*)cls == &PyDateTime_DateType ) {
result = new_date_ex(year, month, day, (PyTypeObject*)cls);
} else {
result = PyObject_CallFunction(cls, "iii", year, month, day);
}
return result;
}
/*
* Date arithmetic.
*/
@ -2925,6 +3148,10 @@ static PyMethodDef date_methods[] = {
PyDoc_STR("int -> date corresponding to a proleptic Gregorian "
"ordinal.")},
{"fromisoformat", (PyCFunction)date_fromisoformat, METH_O |
METH_CLASS,
PyDoc_STR("str -> Construct a date from the output of date.isoformat()")},
{"today", (PyCFunction)date_today, METH_NOARGS | METH_CLASS,
PyDoc_STR("Current date or datetime: same as "
"self.__class__.fromtimestamp(time.time()).")},
@ -3972,6 +4199,49 @@ time_replace(PyDateTime_Time *self, PyObject *args, PyObject *kw)
return clone;
}
static PyObject *
time_fromisoformat(PyObject *cls, PyObject *tstr) {
assert(tstr != NULL);
if (!PyUnicode_Check(tstr)) {
PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str");
return NULL;
}
Py_ssize_t len;
const char *p = PyUnicode_AsUTF8AndSize(tstr, &len);
int hour = 0, minute = 0, second = 0, microsecond = 0;
int tzoffset, tzimicrosecond = 0;
int rv = parse_isoformat_time(p, len,
&hour, &minute, &second, &microsecond,
&tzoffset, &tzimicrosecond);
if (rv < 0) {
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", p);
return NULL;
}
PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset,
tzimicrosecond);
if (tzinfo == NULL) {
return NULL;
}
PyObject *t;
if ( (PyTypeObject *)cls == &PyDateTime_TimeType ) {
t = new_time(hour, minute, second, microsecond, tzinfo, 0);
} else {
t = PyObject_CallFunction(cls, "iiiiO",
hour, minute, second, microsecond, tzinfo);
}
Py_DECREF(tzinfo);
return t;
}
/* Pickle support, a simple use of __reduce__. */
/* Let basestate be the non-tzinfo data string.
@ -4041,6 +4311,9 @@ static PyMethodDef time_methods[] = {
{"replace", (PyCFunction)time_replace, METH_VARARGS | METH_KEYWORDS,
PyDoc_STR("Return time with new specified fields.")},
{"fromisoformat", (PyCFunction)time_fromisoformat, METH_O | METH_CLASS,
PyDoc_STR("string -> time from time.isoformat() output")},
{"__reduce_ex__", (PyCFunction)time_reduce_ex, METH_VARARGS,
PyDoc_STR("__reduce_ex__(proto) -> (cls, state)")},
@ -4506,6 +4779,82 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw)
return result;
}
static PyObject *
datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
assert(dtstr != NULL);
if (!PyUnicode_Check(dtstr)) {
PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str");
return NULL;
}
Py_ssize_t len;
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
const char * p = dt_ptr;
int year = 0, month = 0, day = 0;
int hour = 0, minute = 0, second = 0, microsecond = 0;
int tzoffset = 0, tzusec = 0;
// date has a fixed length of 10
int rv = parse_isoformat_date(p, &year, &month, &day);
if (!rv && len > 10) {
// In UTF-8, the length of multi-byte characters is encoded in the MSB
if ((p[10] & 0x80) == 0) {
p += 11;
} else {
switch(p[10] & 0xf0) {
case 0xe0:
p += 13;
break;
case 0xf0:
p += 14;
break;
default:
p += 12;
break;
}
}
len -= (p - dt_ptr);
rv = parse_isoformat_time(p, len,
&hour, &minute, &second, &microsecond,
&tzoffset, &tzusec);
}
if (rv < 0) {
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr);
return NULL;
}
PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec);
if (tzinfo == NULL) {
return NULL;
}
PyObject* dt;
if ( (PyTypeObject*)cls == &PyDateTime_DateTimeType ) {
// Use the fast path constructor
dt = new_datetime(year, month, day, hour, minute, second, microsecond,
tzinfo, 0);
} else {
// Subclass
dt = PyObject_CallFunction(cls, "iiiiiiiO",
year,
month,
day,
hour,
minute,
second,
microsecond,
tzinfo);
}
Py_DECREF(tzinfo);
return dt;
}
/*
* Destructor.
*/
@ -5519,6 +5868,10 @@ static PyMethodDef datetime_methods[] = {
METH_VARARGS | METH_KEYWORDS | METH_CLASS,
PyDoc_STR("date, time -> datetime with same date and time fields")},
{"fromisoformat", (PyCFunction)datetime_fromisoformat,
METH_O | METH_CLASS,
PyDoc_STR("string -> datetime from datetime.isoformat() output")},
/* Instance methods: */
{"date", (PyCFunction)datetime_getdate, METH_NOARGS,