From 8452ca15f41061c8a6297d7956df22ab476d4df4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 7 Dec 2018 13:42:10 +0200 Subject: [PATCH] bpo-22005: Fixed unpickling instances of datetime classes pickled by Python 2. (GH-11017) encoding='latin1' should be used for successful decoding. --- Doc/library/pickle.rst | 6 + Lib/datetime.py | 38 ++- Lib/test/datetimetester.py | 116 ++++++++ .../2017-10-12-22-39-55.bpo-22005.lGP-sc.rst | 3 + Modules/_datetimemodule.c | 265 ++++++++++++------ 5 files changed, 333 insertions(+), 95 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-10-12-22-39-55.bpo-22005.lGP-sc.rst diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst index 4f9d3596b64..5fe49a013bc 100644 --- a/Doc/library/pickle.rst +++ b/Doc/library/pickle.rst @@ -243,6 +243,9 @@ process more convenient: *errors* tell pickle how to decode 8-bit string instances pickled by Python 2; these default to 'ASCII' and 'strict', respectively. The *encoding* can be 'bytes' to read these 8-bit string instances as bytes objects. + Using ``encoding='latin1'`` is required for unpickling NumPy arrays and + instances of :class:`~datetime.datetime`, :class:`~datetime.date` and + :class:`~datetime.time` pickled by Python 2. .. function:: loads(bytes_object, \*, fix_imports=True, encoding="ASCII", errors="strict") @@ -260,6 +263,9 @@ process more convenient: *errors* tell pickle how to decode 8-bit string instances pickled by Python 2; these default to 'ASCII' and 'strict', respectively. The *encoding* can be 'bytes' to read these 8-bit string instances as bytes objects. + Using ``encoding='latin1'`` is required for unpickling NumPy arrays and + instances of :class:`~datetime.datetime`, :class:`~datetime.date` and + :class:`~datetime.time` pickled by Python 2. The :mod:`pickle` module defines three exceptions: diff --git a/Lib/datetime.py b/Lib/datetime.py index 292919fd798..4780b6df8f9 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -808,9 +808,19 @@ class date: year, month, day (required, base 1) """ - if month is None and isinstance(year, bytes) and len(year) == 4 and \ - 1 <= year[2] <= 12: + if (month is None and + isinstance(year, (bytes, str)) and len(year) == 4 and + 1 <= ord(year[2:3]) <= 12): # Pickle support + if isinstance(year, str): + try: + year = year.encode('latin1') + except UnicodeEncodeError: + # More informative error message. + raise ValueError( + "Failed to encode latin1 string when unpickling " + "a date object. " + "pickle.load(data, encoding='latin1') is assumed.") self = object.__new__(cls) self.__setstate(year) self._hashcode = -1 @@ -1184,8 +1194,18 @@ class time: tzinfo (default to None) fold (keyword only, default to zero) """ - if isinstance(hour, bytes) and len(hour) == 6 and hour[0]&0x7F < 24: + if (isinstance(hour, (bytes, str)) and len(hour) == 6 and + ord(hour[0:1])&0x7F < 24): # Pickle support + if isinstance(hour, str): + try: + hour = hour.encode('latin1') + except UnicodeEncodeError: + # More informative error message. + raise ValueError( + "Failed to encode latin1 string when unpickling " + "a time object. " + "pickle.load(data, encoding='latin1') is assumed.") self = object.__new__(cls) self.__setstate(hour, minute or None) self._hashcode = -1 @@ -1496,8 +1516,18 @@ class datetime(date): def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0): - if isinstance(year, bytes) and len(year) == 10 and 1 <= year[2]&0x7F <= 12: + if (isinstance(year, (bytes, str)) and len(year) == 10 and + 1 <= ord(year[2:3])&0x7F <= 12): # Pickle support + if isinstance(year, str): + try: + year = bytes(year, 'latin1') + except UnicodeEncodeError: + # More informative error message. + raise ValueError( + "Failed to encode latin1 string when unpickling " + "a datetime object. " + "pickle.load(data, encoding='latin1') is assumed.") self = object.__new__(cls) self.__setstate(year, month) self._hashcode = -1 diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 78b123f5b11..2f838c44555 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -38,6 +38,7 @@ import _testcapi import _strptime # +pickle_loads = {pickle.loads, pickle._loads} pickle_choices = [(pickle, pickle, proto) for proto in range(pickle.HIGHEST_PROTOCOL + 1)] @@ -1434,6 +1435,19 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase): self.assertEqual(orig, derived) self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2)) + def test_compat_unpickle(self): + tests = [ + b"cdatetime\ndate\n(S'\\x07\\xdf\\x0b\\x1b'\ntR.", + b'cdatetime\ndate\n(U\x04\x07\xdf\x0b\x1btR.', + b'\x80\x02cdatetime\ndate\nU\x04\x07\xdf\x0b\x1b\x85R.', + ] + args = 2015, 11, 27 + expected = self.theclass(*args) + for data in tests: + for loads in pickle_loads: + derived = loads(data, encoding='latin1') + self.assertEqual(derived, expected) + def test_compare(self): t1 = self.theclass(2, 3, 4) t2 = self.theclass(2, 3, 4) @@ -2098,6 +2112,24 @@ class TestDateTime(TestDate): derived = unpickler.loads(green) self.assertEqual(orig, derived) + def test_compat_unpickle(self): + tests = [ + b'cdatetime\ndatetime\n(' + b"S'\\x07\\xdf\\x0b\\x1b\\x14;\\x01\\x00\\x10\\x00'\ntR.", + + b'cdatetime\ndatetime\n(' + b'U\n\x07\xdf\x0b\x1b\x14;\x01\x00\x10\x00tR.', + + b'\x80\x02cdatetime\ndatetime\n' + b'U\n\x07\xdf\x0b\x1b\x14;\x01\x00\x10\x00\x85R.', + ] + args = 2015, 11, 27, 20, 59, 1, 64**2 + expected = self.theclass(*args) + for data in tests: + for loads in pickle_loads: + derived = loads(data, encoding='latin1') + self.assertEqual(derived, expected) + def test_more_compare(self): # The test_compare() inherited from TestDate covers the error cases. # We just want to test lexicographic ordering on the members datetime @@ -3069,6 +3101,19 @@ class TestTime(HarmlessMixedComparison, unittest.TestCase): derived = unpickler.loads(green) self.assertEqual(orig, derived) + def test_compat_unpickle(self): + tests = [ + b"cdatetime\ntime\n(S'\\x14;\\x10\\x00\\x10\\x00'\ntR.", + b'cdatetime\ntime\n(U\x06\x14;\x10\x00\x10\x00tR.', + b'\x80\x02cdatetime\ntime\nU\x06\x14;\x10\x00\x10\x00\x85R.', + ] + args = 20, 59, 16, 64**2 + expected = self.theclass(*args) + for data in tests: + for loads in pickle_loads: + derived = loads(data, encoding='latin1') + self.assertEqual(derived, expected) + def test_bool(self): # time is always True. cls = self.theclass @@ -3441,6 +3486,40 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase): self.assertEqual(derived.tzname(), 'cookie') self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2)) + def test_compat_unpickle(self): + tests = [ + b"cdatetime\ntime\n(S'\\x05\\x06\\x07\\x01\\xe2@'\n" + b"ctest.datetimetester\nPicklableFixedOffset\n(tR" + b"(dS'_FixedOffset__offset'\ncdatetime\ntimedelta\n" + b"(I-1\nI68400\nI0\ntRs" + b"S'_FixedOffset__dstoffset'\nNs" + b"S'_FixedOffset__name'\nS'cookie'\nsbtR.", + + b'cdatetime\ntime\n(U\x06\x05\x06\x07\x01\xe2@' + b'ctest.datetimetester\nPicklableFixedOffset\n)R' + b'}(U\x14_FixedOffset__offsetcdatetime\ntimedelta\n' + b'(J\xff\xff\xff\xffJ0\x0b\x01\x00K\x00tR' + b'U\x17_FixedOffset__dstoffsetN' + b'U\x12_FixedOffset__nameU\x06cookieubtR.', + + b'\x80\x02cdatetime\ntime\nU\x06\x05\x06\x07\x01\xe2@' + b'ctest.datetimetester\nPicklableFixedOffset\n)R' + b'}(U\x14_FixedOffset__offsetcdatetime\ntimedelta\n' + b'J\xff\xff\xff\xffJ0\x0b\x01\x00K\x00\x87R' + b'U\x17_FixedOffset__dstoffsetN' + b'U\x12_FixedOffset__nameU\x06cookieub\x86R.', + ] + + tinfo = PicklableFixedOffset(-300, 'cookie') + expected = self.theclass(5, 6, 7, 123456, tzinfo=tinfo) + for data in tests: + for loads in pickle_loads: + derived = loads(data, encoding='latin1') + self.assertEqual(derived, expected, repr(data)) + self.assertIsInstance(derived.tzinfo, PicklableFixedOffset) + self.assertEqual(derived.utcoffset(), timedelta(minutes=-300)) + self.assertEqual(derived.tzname(), 'cookie') + def test_more_bool(self): # time is always True. cls = self.theclass @@ -3789,6 +3868,43 @@ class TestDateTimeTZ(TestDateTime, TZInfoBase, unittest.TestCase): self.assertEqual(derived.tzname(), 'cookie') self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2)) + def test_compat_unpickle(self): + tests = [ + b'cdatetime\ndatetime\n' + b"(S'\\x07\\xdf\\x0b\\x1b\\x14;\\x01\\x01\\xe2@'\n" + b'ctest.datetimetester\nPicklableFixedOffset\n(tR' + b"(dS'_FixedOffset__offset'\ncdatetime\ntimedelta\n" + b'(I-1\nI68400\nI0\ntRs' + b"S'_FixedOffset__dstoffset'\nNs" + b"S'_FixedOffset__name'\nS'cookie'\nsbtR.", + + b'cdatetime\ndatetime\n' + b'(U\n\x07\xdf\x0b\x1b\x14;\x01\x01\xe2@' + b'ctest.datetimetester\nPicklableFixedOffset\n)R' + b'}(U\x14_FixedOffset__offsetcdatetime\ntimedelta\n' + b'(J\xff\xff\xff\xffJ0\x0b\x01\x00K\x00tR' + b'U\x17_FixedOffset__dstoffsetN' + b'U\x12_FixedOffset__nameU\x06cookieubtR.', + + b'\x80\x02cdatetime\ndatetime\n' + b'U\n\x07\xdf\x0b\x1b\x14;\x01\x01\xe2@' + b'ctest.datetimetester\nPicklableFixedOffset\n)R' + b'}(U\x14_FixedOffset__offsetcdatetime\ntimedelta\n' + b'J\xff\xff\xff\xffJ0\x0b\x01\x00K\x00\x87R' + b'U\x17_FixedOffset__dstoffsetN' + b'U\x12_FixedOffset__nameU\x06cookieub\x86R.', + ] + args = 2015, 11, 27, 20, 59, 1, 123456 + tinfo = PicklableFixedOffset(-300, 'cookie') + expected = self.theclass(*args, **{'tzinfo': tinfo}) + for data in tests: + for loads in pickle_loads: + derived = loads(data, encoding='latin1') + self.assertEqual(derived, expected) + self.assertIsInstance(derived.tzinfo, PicklableFixedOffset) + self.assertEqual(derived.utcoffset(), timedelta(minutes=-300)) + self.assertEqual(derived.tzname(), 'cookie') + def test_extreme_hashes(self): # If an attempt is made to hash these via subtracting the offset # then hashing a datetime object, OverflowError results. The diff --git a/Misc/NEWS.d/next/Library/2017-10-12-22-39-55.bpo-22005.lGP-sc.rst b/Misc/NEWS.d/next/Library/2017-10-12-22-39-55.bpo-22005.lGP-sc.rst new file mode 100644 index 00000000000..951098d0a7a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-12-22-39-55.bpo-22005.lGP-sc.rst @@ -0,0 +1,3 @@ +Implemented unpickling instances of :class:`~datetime.datetime`, +:class:`~datetime.date` and :class:`~datetime.time` pickled by Python 2. +``encoding='latin1'`` should be used for successful decoding. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index eb9c35d02d4..87a88be6090 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -2788,31 +2788,60 @@ static PyGetSetDef date_getset[] = { static char *date_kws[] = {"year", "month", "day", NULL}; +static PyObject * +date_from_pickle(PyTypeObject *type, PyObject *state) +{ + PyDateTime_Date *me; + + me = (PyDateTime_Date *) (type->tp_alloc(type, 0)); + if (me != NULL) { + const char *pdata = PyBytes_AS_STRING(state); + memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE); + me->hashcode = -1; + } + return (PyObject *)me; +} + static PyObject * date_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *self = NULL; - PyObject *state; int year; int month; int day; /* Check for invocation from pickle with __getstate__ state */ - if (PyTuple_GET_SIZE(args) == 1) { - state = PyTuple_GET_ITEM(args, 0); - if (PyBytes_Check(state) && - PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE && - MONTH_IS_SANE(PyBytes_AS_STRING(state)[2])) - { - PyDateTime_Date *me; - - me = (PyDateTime_Date *) (type->tp_alloc(type, 0)); - if (me != NULL) { - char *pdata = PyBytes_AS_STRING(state); - memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE); - me->hashcode = -1; + if (PyTuple_GET_SIZE(args) >= 1) { + PyObject *state = PyTuple_GET_ITEM(args, 0); + if (PyBytes_Check(state)) { + if (PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE && + MONTH_IS_SANE(PyBytes_AS_STRING(state)[2])) + { + return date_from_pickle(type, state); + } + } + else if (PyUnicode_Check(state)) { + if (PyUnicode_READY(state)) { + return NULL; + } + if (PyUnicode_GET_LENGTH(state) == _PyDateTime_DATE_DATASIZE && + MONTH_IS_SANE(PyUnicode_READ_CHAR(state, 2))) + { + state = PyUnicode_AsLatin1String(state); + if (state == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { + /* More informative error message. */ + PyErr_SetString(PyExc_ValueError, + "Failed to encode latin1 string when unpickling " + "a date object. " + "pickle.load(data, encoding='latin1') is assumed."); + } + return NULL; + } + self = date_from_pickle(type, state); + Py_DECREF(state); + return self; } - return (PyObject *)me; } } @@ -3901,11 +3930,43 @@ static PyGetSetDef time_getset[] = { static char *time_kws[] = {"hour", "minute", "second", "microsecond", "tzinfo", "fold", NULL}; +static PyObject * +time_from_pickle(PyTypeObject *type, PyObject *state, PyObject *tzinfo) +{ + PyDateTime_Time *me; + char aware = (char)(tzinfo != Py_None); + + if (aware && check_tzinfo_subclass(tzinfo) < 0) { + PyErr_SetString(PyExc_TypeError, "bad tzinfo state arg"); + return NULL; + } + + me = (PyDateTime_Time *) (type->tp_alloc(type, aware)); + if (me != NULL) { + const char *pdata = PyBytes_AS_STRING(state); + + memcpy(me->data, pdata, _PyDateTime_TIME_DATASIZE); + me->hashcode = -1; + me->hastzinfo = aware; + if (aware) { + Py_INCREF(tzinfo); + me->tzinfo = tzinfo; + } + if (pdata[0] & (1 << 7)) { + me->data[0] -= 128; + me->fold = 1; + } + else { + me->fold = 0; + } + } + return (PyObject *)me; +} + static PyObject * time_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *self = NULL; - PyObject *state; int hour = 0; int minute = 0; int second = 0; @@ -3914,47 +3975,42 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw) int fold = 0; /* Check for invocation from pickle with __getstate__ state */ - if (PyTuple_GET_SIZE(args) >= 1 && - PyTuple_GET_SIZE(args) <= 2) - { - state = PyTuple_GET_ITEM(args, 0); - if (PyBytes_Check(state) && - PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE && - (0x7F & ((unsigned char) (PyBytes_AS_STRING(state)[0]))) < 24) - { - PyDateTime_Time *me; - char aware; - - if (PyTuple_GET_SIZE(args) == 2) { - tzinfo = PyTuple_GET_ITEM(args, 1); - if (check_tzinfo_subclass(tzinfo) < 0) { - PyErr_SetString(PyExc_TypeError, "bad " - "tzinfo state arg"); + if (PyTuple_GET_SIZE(args) >= 1 && PyTuple_GET_SIZE(args) <= 2) { + PyObject *state = PyTuple_GET_ITEM(args, 0); + if (PyTuple_GET_SIZE(args) == 2) { + tzinfo = PyTuple_GET_ITEM(args, 1); + } + if (PyBytes_Check(state)) { + if (PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE && + (0x7F & ((unsigned char) (PyBytes_AS_STRING(state)[0]))) < 24) + { + return time_from_pickle(type, state, tzinfo); + } + } + else if (PyUnicode_Check(state)) { + if (PyUnicode_READY(state)) { + return NULL; + } + if (PyUnicode_GET_LENGTH(state) == _PyDateTime_TIME_DATASIZE && + (0x7F & PyUnicode_READ_CHAR(state, 2)) < 24) + { + state = PyUnicode_AsLatin1String(state); + if (state == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { + /* More informative error message. */ + PyErr_SetString(PyExc_ValueError, + "Failed to encode latin1 string when unpickling " + "a time object. " + "pickle.load(data, encoding='latin1') is assumed."); + } return NULL; } + self = time_from_pickle(type, state, tzinfo); + Py_DECREF(state); + return self; } - aware = (char)(tzinfo != Py_None); - me = (PyDateTime_Time *) (type->tp_alloc(type, aware)); - if (me != NULL) { - char *pdata = PyBytes_AS_STRING(state); - - memcpy(me->data, pdata, _PyDateTime_TIME_DATASIZE); - me->hashcode = -1; - me->hastzinfo = aware; - if (aware) { - Py_INCREF(tzinfo); - me->tzinfo = tzinfo; - } - if (pdata[0] & (1 << 7)) { - me->data[0] -= 128; - me->fold = 1; - } - else { - me->fold = 0; - } - } - return (PyObject *)me; } + tzinfo = Py_None; } if (PyArg_ParseTupleAndKeywords(args, kw, "|iiiiO$i", time_kws, @@ -4540,11 +4596,43 @@ static char *datetime_kws[] = { "microsecond", "tzinfo", "fold", NULL }; +static PyObject * +datetime_from_pickle(PyTypeObject *type, PyObject *state, PyObject *tzinfo) +{ + PyDateTime_DateTime *me; + char aware = (char)(tzinfo != Py_None); + + if (aware && check_tzinfo_subclass(tzinfo) < 0) { + PyErr_SetString(PyExc_TypeError, "bad tzinfo state arg"); + return NULL; + } + + me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware)); + if (me != NULL) { + const char *pdata = PyBytes_AS_STRING(state); + + memcpy(me->data, pdata, _PyDateTime_DATETIME_DATASIZE); + me->hashcode = -1; + me->hastzinfo = aware; + if (aware) { + Py_INCREF(tzinfo); + me->tzinfo = tzinfo; + } + if (pdata[2] & (1 << 7)) { + me->data[2] -= 128; + me->fold = 1; + } + else { + me->fold = 0; + } + } + return (PyObject *)me; +} + static PyObject * datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *self = NULL; - PyObject *state; int year; int month; int day; @@ -4556,47 +4644,42 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw) PyObject *tzinfo = Py_None; /* Check for invocation from pickle with __getstate__ state */ - if (PyTuple_GET_SIZE(args) >= 1 && - PyTuple_GET_SIZE(args) <= 2) - { - state = PyTuple_GET_ITEM(args, 0); - if (PyBytes_Check(state) && - PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE && - MONTH_IS_SANE(PyBytes_AS_STRING(state)[2] & 0x7F)) - { - PyDateTime_DateTime *me; - char aware; - - if (PyTuple_GET_SIZE(args) == 2) { - tzinfo = PyTuple_GET_ITEM(args, 1); - if (check_tzinfo_subclass(tzinfo) < 0) { - PyErr_SetString(PyExc_TypeError, "bad " - "tzinfo state arg"); + if (PyTuple_GET_SIZE(args) >= 1 && PyTuple_GET_SIZE(args) <= 2) { + PyObject *state = PyTuple_GET_ITEM(args, 0); + if (PyTuple_GET_SIZE(args) == 2) { + tzinfo = PyTuple_GET_ITEM(args, 1); + } + if (PyBytes_Check(state)) { + if (PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE && + MONTH_IS_SANE(PyBytes_AS_STRING(state)[2] & 0x7F)) + { + return datetime_from_pickle(type, state, tzinfo); + } + } + else if (PyUnicode_Check(state)) { + if (PyUnicode_READY(state)) { + return NULL; + } + if (PyUnicode_GET_LENGTH(state) == _PyDateTime_DATETIME_DATASIZE && + MONTH_IS_SANE(PyUnicode_READ_CHAR(state, 2) & 0x7F)) + { + state = PyUnicode_AsLatin1String(state); + if (state == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { + /* More informative error message. */ + PyErr_SetString(PyExc_ValueError, + "Failed to encode latin1 string when unpickling " + "a datetime object. " + "pickle.load(data, encoding='latin1') is assumed."); + } return NULL; } + self = datetime_from_pickle(type, state, tzinfo); + Py_DECREF(state); + return self; } - aware = (char)(tzinfo != Py_None); - me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware)); - if (me != NULL) { - char *pdata = PyBytes_AS_STRING(state); - - memcpy(me->data, pdata, _PyDateTime_DATETIME_DATASIZE); - me->hashcode = -1; - me->hastzinfo = aware; - if (aware) { - Py_INCREF(tzinfo); - me->tzinfo = tzinfo; - } - if (pdata[2] & (1 << 7)) { - me->data[2] -= 128; - me->fold = 1; - } - else { - me->fold = 0; - } - } - return (PyObject *)me; } + tzinfo = Py_None; } if (PyArg_ParseTupleAndKeywords(args, kw, "iii|iiiiO$i", datetime_kws,