mirror of https://github.com/python/cpython
gh-52551: Fix encoding issues in strftime() (GH-125193)
Fix time.strftime(), the strftime() method and formatting of the datetime classes datetime, date and time. * Characters not encodable in the current locale are now acceptable in the format string. * Surrogate pairs and sequence of surrogatescape-encoded bytes are no longer recombinated. * Embedded null character no longer terminates the format string. This fixes also gh-78662 and gh-124531.
This commit is contained in:
parent
0cb20f2e7e
commit
ad3eac1963
|
@ -2949,11 +2949,32 @@ class TestDateTime(TestDate):
|
||||||
self.assertEqual(t.strftime("%z"), "-0200" + z)
|
self.assertEqual(t.strftime("%z"), "-0200" + z)
|
||||||
self.assertEqual(t.strftime("%:z"), "-02:00:" + z)
|
self.assertEqual(t.strftime("%:z"), "-02:00:" + z)
|
||||||
|
|
||||||
# bpo-34482: Check that surrogates don't cause a crash.
|
def test_strftime_special(self):
|
||||||
try:
|
t = self.theclass(2004, 12, 31, 6, 22, 33, 47)
|
||||||
t.strftime('%y\ud800%m %H\ud800%M')
|
s1 = t.strftime('%c')
|
||||||
except UnicodeEncodeError:
|
s2 = t.strftime('%B')
|
||||||
pass
|
# gh-52551, gh-78662: Unicode strings should pass through strftime,
|
||||||
|
# independently from locale.
|
||||||
|
self.assertEqual(t.strftime('\U0001f40d'), '\U0001f40d')
|
||||||
|
self.assertEqual(t.strftime('\U0001f4bb%c\U0001f40d%B'), f'\U0001f4bb{s1}\U0001f40d{s2}')
|
||||||
|
self.assertEqual(t.strftime('%c\U0001f4bb%B\U0001f40d'), f'{s1}\U0001f4bb{s2}\U0001f40d')
|
||||||
|
# Lone surrogates should pass through.
|
||||||
|
self.assertEqual(t.strftime('\ud83d'), '\ud83d')
|
||||||
|
self.assertEqual(t.strftime('\udc0d'), '\udc0d')
|
||||||
|
self.assertEqual(t.strftime('\ud83d%c\udc0d%B'), f'\ud83d{s1}\udc0d{s2}')
|
||||||
|
self.assertEqual(t.strftime('%c\ud83d%B\udc0d'), f'{s1}\ud83d{s2}\udc0d')
|
||||||
|
self.assertEqual(t.strftime('%c\udc0d%B\ud83d'), f'{s1}\udc0d{s2}\ud83d')
|
||||||
|
# Surrogate pairs should not recombine.
|
||||||
|
self.assertEqual(t.strftime('\ud83d\udc0d'), '\ud83d\udc0d')
|
||||||
|
self.assertEqual(t.strftime('%c\ud83d\udc0d%B'), f'{s1}\ud83d\udc0d{s2}')
|
||||||
|
# Surrogate-escaped bytes should not recombine.
|
||||||
|
self.assertEqual(t.strftime('\udcf0\udc9f\udc90\udc8d'), '\udcf0\udc9f\udc90\udc8d')
|
||||||
|
self.assertEqual(t.strftime('%c\udcf0\udc9f\udc90\udc8d%B'), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
|
||||||
|
# gh-124531: The null character should not terminate the format string.
|
||||||
|
self.assertEqual(t.strftime('\0'), '\0')
|
||||||
|
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
|
||||||
|
self.assertEqual(t.strftime('\0%c\0%B'), f'\0{s1}\0{s2}')
|
||||||
|
self.assertEqual(t.strftime('%c\0%B\0'), f'{s1}\0{s2}\0')
|
||||||
|
|
||||||
def test_extract(self):
|
def test_extract(self):
|
||||||
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
|
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
|
||||||
|
@ -3736,6 +3757,33 @@ class TestTime(HarmlessMixedComparison, unittest.TestCase):
|
||||||
# gh-85432: The parameter was named "fmt" in the pure-Python impl.
|
# gh-85432: The parameter was named "fmt" in the pure-Python impl.
|
||||||
t.strftime(format="%f")
|
t.strftime(format="%f")
|
||||||
|
|
||||||
|
def test_strftime_special(self):
|
||||||
|
t = self.theclass(1, 2, 3, 4)
|
||||||
|
s1 = t.strftime('%I%p%Z')
|
||||||
|
s2 = t.strftime('%X')
|
||||||
|
# gh-52551, gh-78662: Unicode strings should pass through strftime,
|
||||||
|
# independently from locale.
|
||||||
|
self.assertEqual(t.strftime('\U0001f40d'), '\U0001f40d')
|
||||||
|
self.assertEqual(t.strftime('\U0001f4bb%I%p%Z\U0001f40d%X'), f'\U0001f4bb{s1}\U0001f40d{s2}')
|
||||||
|
self.assertEqual(t.strftime('%I%p%Z\U0001f4bb%X\U0001f40d'), f'{s1}\U0001f4bb{s2}\U0001f40d')
|
||||||
|
# Lone surrogates should pass through.
|
||||||
|
self.assertEqual(t.strftime('\ud83d'), '\ud83d')
|
||||||
|
self.assertEqual(t.strftime('\udc0d'), '\udc0d')
|
||||||
|
self.assertEqual(t.strftime('\ud83d%I%p%Z\udc0d%X'), f'\ud83d{s1}\udc0d{s2}')
|
||||||
|
self.assertEqual(t.strftime('%I%p%Z\ud83d%X\udc0d'), f'{s1}\ud83d{s2}\udc0d')
|
||||||
|
self.assertEqual(t.strftime('%I%p%Z\udc0d%X\ud83d'), f'{s1}\udc0d{s2}\ud83d')
|
||||||
|
# Surrogate pairs should not recombine.
|
||||||
|
self.assertEqual(t.strftime('\ud83d\udc0d'), '\ud83d\udc0d')
|
||||||
|
self.assertEqual(t.strftime('%I%p%Z\ud83d\udc0d%X'), f'{s1}\ud83d\udc0d{s2}')
|
||||||
|
# Surrogate-escaped bytes should not recombine.
|
||||||
|
self.assertEqual(t.strftime('\udcf0\udc9f\udc90\udc8d'), '\udcf0\udc9f\udc90\udc8d')
|
||||||
|
self.assertEqual(t.strftime('%I%p%Z\udcf0\udc9f\udc90\udc8d%X'), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
|
||||||
|
# gh-124531: The null character should not terminate the format string.
|
||||||
|
self.assertEqual(t.strftime('\0'), '\0')
|
||||||
|
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
|
||||||
|
self.assertEqual(t.strftime('\0%I%p%Z\0%X'), f'\0{s1}\0{s2}')
|
||||||
|
self.assertEqual(t.strftime('%I%p%Z\0%X\0'), f'{s1}\0{s2}\0')
|
||||||
|
|
||||||
def test_format(self):
|
def test_format(self):
|
||||||
t = self.theclass(1, 2, 3, 4)
|
t = self.theclass(1, 2, 3, 4)
|
||||||
self.assertEqual(t.__format__(''), str(t))
|
self.assertEqual(t.__format__(''), str(t))
|
||||||
|
@ -4259,9 +4307,8 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
|
||||||
self.assertRaises(TypeError, t.strftime, "%Z")
|
self.assertRaises(TypeError, t.strftime, "%Z")
|
||||||
|
|
||||||
# Issue #6697:
|
# Issue #6697:
|
||||||
if '_Fast' in self.__class__.__name__:
|
Badtzname.tz = '\ud800'
|
||||||
Badtzname.tz = '\ud800'
|
self.assertEqual(t.strftime("%Z"), '\ud800')
|
||||||
self.assertRaises(ValueError, t.strftime, "%Z")
|
|
||||||
|
|
||||||
def test_hash_edge_cases(self):
|
def test_hash_edge_cases(self):
|
||||||
# Offsets that overflow a basic time.
|
# Offsets that overflow a basic time.
|
||||||
|
|
|
@ -181,8 +181,33 @@ class TimeTestCase(unittest.TestCase):
|
||||||
self.fail('conversion specifier: %r failed.' % format)
|
self.fail('conversion specifier: %r failed.' % format)
|
||||||
|
|
||||||
self.assertRaises(TypeError, time.strftime, b'%S', tt)
|
self.assertRaises(TypeError, time.strftime, b'%S', tt)
|
||||||
# embedded null character
|
|
||||||
self.assertRaises(ValueError, time.strftime, '%S\0', tt)
|
def test_strftime_special(self):
|
||||||
|
tt = time.gmtime(self.t)
|
||||||
|
s1 = time.strftime('%c', tt)
|
||||||
|
s2 = time.strftime('%B', tt)
|
||||||
|
# gh-52551, gh-78662: Unicode strings should pass through strftime,
|
||||||
|
# independently from locale.
|
||||||
|
self.assertEqual(time.strftime('\U0001f40d', tt), '\U0001f40d')
|
||||||
|
self.assertEqual(time.strftime('\U0001f4bb%c\U0001f40d%B', tt), f'\U0001f4bb{s1}\U0001f40d{s2}')
|
||||||
|
self.assertEqual(time.strftime('%c\U0001f4bb%B\U0001f40d', tt), f'{s1}\U0001f4bb{s2}\U0001f40d')
|
||||||
|
# Lone surrogates should pass through.
|
||||||
|
self.assertEqual(time.strftime('\ud83d', tt), '\ud83d')
|
||||||
|
self.assertEqual(time.strftime('\udc0d', tt), '\udc0d')
|
||||||
|
self.assertEqual(time.strftime('\ud83d%c\udc0d%B', tt), f'\ud83d{s1}\udc0d{s2}')
|
||||||
|
self.assertEqual(time.strftime('%c\ud83d%B\udc0d', tt), f'{s1}\ud83d{s2}\udc0d')
|
||||||
|
self.assertEqual(time.strftime('%c\udc0d%B\ud83d', tt), f'{s1}\udc0d{s2}\ud83d')
|
||||||
|
# Surrogate pairs should not recombine.
|
||||||
|
self.assertEqual(time.strftime('\ud83d\udc0d', tt), '\ud83d\udc0d')
|
||||||
|
self.assertEqual(time.strftime('%c\ud83d\udc0d%B', tt), f'{s1}\ud83d\udc0d{s2}')
|
||||||
|
# Surrogate-escaped bytes should not recombine.
|
||||||
|
self.assertEqual(time.strftime('\udcf0\udc9f\udc90\udc8d', tt), '\udcf0\udc9f\udc90\udc8d')
|
||||||
|
self.assertEqual(time.strftime('%c\udcf0\udc9f\udc90\udc8d%B', tt), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
|
||||||
|
# gh-124531: The null character should not terminate the format string.
|
||||||
|
self.assertEqual(time.strftime('\0', tt), '\0')
|
||||||
|
self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000)
|
||||||
|
self.assertEqual(time.strftime('\0%c\0%B', tt), f'\0{s1}\0{s2}')
|
||||||
|
self.assertEqual(time.strftime('%c\0%B\0', tt), f'{s1}\0{s2}\0')
|
||||||
|
|
||||||
def _bounds_checking(self, func):
|
def _bounds_checking(self, func):
|
||||||
# Make sure that strftime() checks the bounds of the various parts
|
# Make sure that strftime() checks the bounds of the various parts
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
Fix encoding issues in :func:`time.strftime`, the
|
||||||
|
:meth:`~datetime.datetime.strftime` method of the :mod:`datetime` classes
|
||||||
|
:class:`~datetime.datetime`, :class:`~datetime.date` and
|
||||||
|
:class:`~datetime.time` and formatting of these classes. Characters not
|
||||||
|
encodable in the current locale are now acceptable in the format string.
|
||||||
|
Surrogate pairs and sequence of surrogatescape-encoded bytes are no longer
|
||||||
|
recombinated. Embedded null character no longer terminates the format
|
||||||
|
string.
|
|
@ -1747,7 +1747,7 @@ make_somezreplacement(PyObject *object, char *sep, PyObject *tzinfoarg)
|
||||||
PyObject *tzinfo = get_tzinfo_member(object);
|
PyObject *tzinfo = get_tzinfo_member(object);
|
||||||
|
|
||||||
if (tzinfo == Py_None || tzinfo == NULL) {
|
if (tzinfo == Py_None || tzinfo == NULL) {
|
||||||
return PyBytes_FromStringAndSize(NULL, 0);
|
return PyUnicode_FromStringAndSize(NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(tzinfoarg != NULL);
|
assert(tzinfoarg != NULL);
|
||||||
|
@ -1758,7 +1758,7 @@ make_somezreplacement(PyObject *object, char *sep, PyObject *tzinfoarg)
|
||||||
tzinfoarg) < 0)
|
tzinfoarg) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
return PyBytes_FromStringAndSize(buf, strlen(buf));
|
return PyUnicode_FromString(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -1815,7 +1815,7 @@ make_freplacement(PyObject *object)
|
||||||
else
|
else
|
||||||
sprintf(freplacement, "%06d", 0);
|
sprintf(freplacement, "%06d", 0);
|
||||||
|
|
||||||
return PyBytes_FromStringAndSize(freplacement, strlen(freplacement));
|
return PyUnicode_FromString(freplacement);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* I sure don't want to reproduce the strftime code from the time module,
|
/* I sure don't want to reproduce the strftime code from the time module,
|
||||||
|
@ -1836,94 +1836,60 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
|
||||||
PyObject *Zreplacement = NULL; /* py string, replacement for %Z */
|
PyObject *Zreplacement = NULL; /* py string, replacement for %Z */
|
||||||
PyObject *freplacement = NULL; /* py string, replacement for %f */
|
PyObject *freplacement = NULL; /* py string, replacement for %f */
|
||||||
|
|
||||||
const char *pin; /* pointer to next char in input format */
|
|
||||||
Py_ssize_t flen; /* length of input format */
|
|
||||||
char ch; /* next char in input format */
|
|
||||||
|
|
||||||
PyObject *newfmt = NULL; /* py string, the output format */
|
|
||||||
char *pnew; /* pointer to available byte in output format */
|
|
||||||
size_t totalnew; /* number bytes total in output format buffer,
|
|
||||||
exclusive of trailing \0 */
|
|
||||||
size_t usednew; /* number bytes used so far in output format buffer */
|
|
||||||
|
|
||||||
const char *ptoappend; /* ptr to string to append to output buffer */
|
|
||||||
Py_ssize_t ntoappend; /* # of bytes to append to output buffer */
|
|
||||||
|
|
||||||
#ifdef Py_NORMALIZE_CENTURY
|
|
||||||
/* Buffer of maximum size of formatted year permitted by long. */
|
|
||||||
char buf[SIZEOF_LONG * 5 / 2 + 2
|
|
||||||
#ifdef Py_STRFTIME_C99_SUPPORT
|
|
||||||
/* Need 6 more to accommodate dashes, 2-digit month and day for %F. */
|
|
||||||
+ 6
|
|
||||||
#endif
|
|
||||||
];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
assert(object && format && timetuple);
|
assert(object && format && timetuple);
|
||||||
assert(PyUnicode_Check(format));
|
assert(PyUnicode_Check(format));
|
||||||
/* Convert the input format to a C string and size */
|
|
||||||
pin = PyUnicode_AsUTF8AndSize(format, &flen);
|
|
||||||
if (!pin)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
PyObject *strftime = _PyImport_GetModuleAttrString("time", "strftime");
|
PyObject *strftime = _PyImport_GetModuleAttrString("time", "strftime");
|
||||||
if (strftime == NULL) {
|
if (strftime == NULL) {
|
||||||
goto Done;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Scan the input format, looking for %z/%Z/%f escapes, building
|
/* Scan the input format, looking for %z/%Z/%f escapes, building
|
||||||
* a new format. Since computing the replacements for those codes
|
* a new format. Since computing the replacements for those codes
|
||||||
* is expensive, don't unless they're actually used.
|
* is expensive, don't unless they're actually used.
|
||||||
*/
|
*/
|
||||||
if (flen > INT_MAX - 1) {
|
|
||||||
PyErr_NoMemory();
|
|
||||||
goto Done;
|
|
||||||
}
|
|
||||||
|
|
||||||
totalnew = flen + 1; /* realistic if no %z/%Z */
|
_PyUnicodeWriter writer;
|
||||||
newfmt = PyBytes_FromStringAndSize(NULL, totalnew);
|
_PyUnicodeWriter_Init(&writer);
|
||||||
if (newfmt == NULL) goto Done;
|
writer.overallocate = 1;
|
||||||
pnew = PyBytes_AsString(newfmt);
|
|
||||||
usednew = 0;
|
|
||||||
|
|
||||||
while ((ch = *pin++) != '\0') {
|
Py_ssize_t flen = PyUnicode_GET_LENGTH(format);
|
||||||
if (ch != '%') {
|
Py_ssize_t i = 0;
|
||||||
ptoappend = pin - 1;
|
Py_ssize_t start = 0;
|
||||||
ntoappend = 1;
|
Py_ssize_t end = 0;
|
||||||
|
while (i != flen) {
|
||||||
|
i = PyUnicode_FindChar(format, '%', i, flen, 1);
|
||||||
|
if (i < 0) {
|
||||||
|
assert(!PyErr_Occurred());
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
else if ((ch = *pin++) == '\0') {
|
end = i;
|
||||||
/* Null byte follows %, copy only '%'.
|
i++;
|
||||||
*
|
if (i == flen) {
|
||||||
* Back the pin up one char so that we catch the null check
|
break;
|
||||||
* the next time through the loop.*/
|
|
||||||
pin--;
|
|
||||||
ptoappend = pin - 1;
|
|
||||||
ntoappend = 1;
|
|
||||||
}
|
}
|
||||||
|
Py_UCS4 ch = PyUnicode_READ_CHAR(format, i);
|
||||||
|
i++;
|
||||||
/* A % has been seen and ch is the character after it. */
|
/* A % has been seen and ch is the character after it. */
|
||||||
else if (ch == 'z') {
|
PyObject *replacement = NULL;
|
||||||
|
if (ch == 'z') {
|
||||||
/* %z -> +HHMM */
|
/* %z -> +HHMM */
|
||||||
if (zreplacement == NULL) {
|
if (zreplacement == NULL) {
|
||||||
zreplacement = make_somezreplacement(object, "", tzinfoarg);
|
zreplacement = make_somezreplacement(object, "", tzinfoarg);
|
||||||
if (zreplacement == NULL)
|
if (zreplacement == NULL)
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
assert(zreplacement != NULL);
|
replacement = zreplacement;
|
||||||
assert(PyBytes_Check(zreplacement));
|
|
||||||
ptoappend = PyBytes_AS_STRING(zreplacement);
|
|
||||||
ntoappend = PyBytes_GET_SIZE(zreplacement);
|
|
||||||
}
|
}
|
||||||
else if (ch == ':' && *pin == 'z' && pin++) {
|
else if (ch == ':' && i < flen && PyUnicode_READ_CHAR(format, i) == 'z') {
|
||||||
/* %:z -> +HH:MM */
|
/* %:z -> +HH:MM */
|
||||||
|
i++;
|
||||||
if (colonzreplacement == NULL) {
|
if (colonzreplacement == NULL) {
|
||||||
colonzreplacement = make_somezreplacement(object, ":", tzinfoarg);
|
colonzreplacement = make_somezreplacement(object, ":", tzinfoarg);
|
||||||
if (colonzreplacement == NULL)
|
if (colonzreplacement == NULL)
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
assert(colonzreplacement != NULL);
|
replacement = colonzreplacement;
|
||||||
assert(PyBytes_Check(colonzreplacement));
|
|
||||||
ptoappend = PyBytes_AS_STRING(colonzreplacement);
|
|
||||||
ntoappend = PyBytes_GET_SIZE(colonzreplacement);
|
|
||||||
}
|
}
|
||||||
else if (ch == 'Z') {
|
else if (ch == 'Z') {
|
||||||
/* format tzname */
|
/* format tzname */
|
||||||
|
@ -1931,26 +1897,18 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
|
||||||
Zreplacement = make_Zreplacement(object,
|
Zreplacement = make_Zreplacement(object,
|
||||||
tzinfoarg);
|
tzinfoarg);
|
||||||
if (Zreplacement == NULL)
|
if (Zreplacement == NULL)
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
assert(Zreplacement != NULL);
|
replacement = Zreplacement;
|
||||||
assert(PyUnicode_Check(Zreplacement));
|
|
||||||
ptoappend = PyUnicode_AsUTF8AndSize(Zreplacement,
|
|
||||||
&ntoappend);
|
|
||||||
if (ptoappend == NULL)
|
|
||||||
goto Done;
|
|
||||||
}
|
}
|
||||||
else if (ch == 'f') {
|
else if (ch == 'f') {
|
||||||
/* format microseconds */
|
/* format microseconds */
|
||||||
if (freplacement == NULL) {
|
if (freplacement == NULL) {
|
||||||
freplacement = make_freplacement(object);
|
freplacement = make_freplacement(object);
|
||||||
if (freplacement == NULL)
|
if (freplacement == NULL)
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
assert(freplacement != NULL);
|
replacement = freplacement;
|
||||||
assert(PyBytes_Check(freplacement));
|
|
||||||
ptoappend = PyBytes_AS_STRING(freplacement);
|
|
||||||
ntoappend = PyBytes_GET_SIZE(freplacement);
|
|
||||||
}
|
}
|
||||||
#ifdef Py_NORMALIZE_CENTURY
|
#ifdef Py_NORMALIZE_CENTURY
|
||||||
else if (ch == 'Y' || ch == 'G'
|
else if (ch == 'Y' || ch == 'G'
|
||||||
|
@ -1961,100 +1919,102 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
|
||||||
/* 0-pad year with century as necessary */
|
/* 0-pad year with century as necessary */
|
||||||
PyObject *item = PySequence_GetItem(timetuple, 0);
|
PyObject *item = PySequence_GetItem(timetuple, 0);
|
||||||
if (item == NULL) {
|
if (item == NULL) {
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
long year_long = PyLong_AsLong(item);
|
long year_long = PyLong_AsLong(item);
|
||||||
Py_DECREF(item);
|
Py_DECREF(item);
|
||||||
if (year_long == -1 && PyErr_Occurred()) {
|
if (year_long == -1 && PyErr_Occurred()) {
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
/* Note that datetime(1000, 1, 1).strftime('%G') == '1000' so year
|
/* Note that datetime(1000, 1, 1).strftime('%G') == '1000' so year
|
||||||
1000 for %G can go on the fast path. */
|
1000 for %G can go on the fast path. */
|
||||||
if (year_long >= 1000) {
|
if (year_long >= 1000) {
|
||||||
goto PassThrough;
|
continue;
|
||||||
}
|
}
|
||||||
if (ch == 'G') {
|
if (ch == 'G') {
|
||||||
PyObject *year_str = PyObject_CallFunction(strftime, "sO",
|
PyObject *year_str = PyObject_CallFunction(strftime, "sO",
|
||||||
"%G", timetuple);
|
"%G", timetuple);
|
||||||
if (year_str == NULL) {
|
if (year_str == NULL) {
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
PyObject *year = PyNumber_Long(year_str);
|
PyObject *year = PyNumber_Long(year_str);
|
||||||
Py_DECREF(year_str);
|
Py_DECREF(year_str);
|
||||||
if (year == NULL) {
|
if (year == NULL) {
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
year_long = PyLong_AsLong(year);
|
year_long = PyLong_AsLong(year);
|
||||||
Py_DECREF(year);
|
Py_DECREF(year);
|
||||||
if (year_long == -1 && PyErr_Occurred()) {
|
if (year_long == -1 && PyErr_Occurred()) {
|
||||||
goto Done;
|
goto Error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ntoappend = PyOS_snprintf(buf, sizeof(buf),
|
/* Buffer of maximum size of formatted year permitted by long.
|
||||||
|
* +6 to accommodate dashes, 2-digit month and day for %F. */
|
||||||
|
char buf[SIZEOF_LONG * 5 / 2 + 2 + 6];
|
||||||
|
Py_ssize_t n = PyOS_snprintf(buf, sizeof(buf),
|
||||||
#ifdef Py_STRFTIME_C99_SUPPORT
|
#ifdef Py_STRFTIME_C99_SUPPORT
|
||||||
ch == 'F' ? "%04ld-%%m-%%d" :
|
ch == 'F' ? "%04ld-%%m-%%d" :
|
||||||
#endif
|
#endif
|
||||||
"%04ld", year_long);
|
"%04ld", year_long);
|
||||||
#ifdef Py_STRFTIME_C99_SUPPORT
|
#ifdef Py_STRFTIME_C99_SUPPORT
|
||||||
if (ch == 'C') {
|
if (ch == 'C') {
|
||||||
ntoappend -= 2;
|
n -= 2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
ptoappend = buf;
|
if (_PyUnicodeWriter_WriteSubstring(&writer, format, start, end) < 0) {
|
||||||
|
goto Error;
|
||||||
|
}
|
||||||
|
start = i;
|
||||||
|
if (_PyUnicodeWriter_WriteASCIIString(&writer, buf, n) < 0) {
|
||||||
|
goto Error;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
else {
|
else {
|
||||||
/* percent followed by something else */
|
/* percent followed by something else */
|
||||||
#ifdef Py_NORMALIZE_CENTURY
|
|
||||||
PassThrough:
|
|
||||||
#endif
|
|
||||||
ptoappend = pin - 2;
|
|
||||||
ntoappend = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Append the ntoappend chars starting at ptoappend to
|
|
||||||
* the new format.
|
|
||||||
*/
|
|
||||||
if (ntoappend == 0)
|
|
||||||
continue;
|
continue;
|
||||||
assert(ptoappend != NULL);
|
|
||||||
assert(ntoappend > 0);
|
|
||||||
while (usednew + ntoappend > totalnew) {
|
|
||||||
if (totalnew > (PY_SSIZE_T_MAX >> 1)) { /* overflow */
|
|
||||||
PyErr_NoMemory();
|
|
||||||
goto Done;
|
|
||||||
}
|
|
||||||
totalnew <<= 1;
|
|
||||||
if (_PyBytes_Resize(&newfmt, totalnew) < 0)
|
|
||||||
goto Done;
|
|
||||||
pnew = PyBytes_AsString(newfmt) + usednew;
|
|
||||||
}
|
}
|
||||||
memcpy(pnew, ptoappend, ntoappend);
|
assert(replacement != NULL);
|
||||||
pnew += ntoappend;
|
assert(PyUnicode_Check(replacement));
|
||||||
usednew += ntoappend;
|
if (_PyUnicodeWriter_WriteSubstring(&writer, format, start, end) < 0) {
|
||||||
assert(usednew <= totalnew);
|
goto Error;
|
||||||
|
}
|
||||||
|
start = i;
|
||||||
|
if (_PyUnicodeWriter_WriteStr(&writer, replacement) < 0) {
|
||||||
|
goto Error;
|
||||||
|
}
|
||||||
} /* end while() */
|
} /* end while() */
|
||||||
|
|
||||||
if (_PyBytes_Resize(&newfmt, usednew) < 0)
|
PyObject *newformat;
|
||||||
goto Done;
|
if (start == 0) {
|
||||||
{
|
_PyUnicodeWriter_Dealloc(&writer);
|
||||||
PyObject *format;
|
newformat = Py_NewRef(format);
|
||||||
|
}
|
||||||
format = PyUnicode_FromString(PyBytes_AS_STRING(newfmt));
|
else {
|
||||||
if (format != NULL) {
|
if (_PyUnicodeWriter_WriteSubstring(&writer, format, start, flen) < 0) {
|
||||||
result = PyObject_CallFunctionObjArgs(strftime,
|
goto Error;
|
||||||
format, timetuple, NULL);
|
}
|
||||||
Py_DECREF(format);
|
newformat = _PyUnicodeWriter_Finish(&writer);
|
||||||
|
if (newformat == NULL) {
|
||||||
|
goto Done;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
result = PyObject_CallFunctionObjArgs(strftime,
|
||||||
|
newformat, timetuple, NULL);
|
||||||
|
Py_DECREF(newformat);
|
||||||
|
|
||||||
Done:
|
Done:
|
||||||
Py_XDECREF(freplacement);
|
Py_XDECREF(freplacement);
|
||||||
Py_XDECREF(zreplacement);
|
Py_XDECREF(zreplacement);
|
||||||
Py_XDECREF(colonzreplacement);
|
Py_XDECREF(colonzreplacement);
|
||||||
Py_XDECREF(Zreplacement);
|
Py_XDECREF(Zreplacement);
|
||||||
Py_XDECREF(newfmt);
|
|
||||||
Py_XDECREF(strftime);
|
Py_XDECREF(strftime);
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
Error:
|
||||||
|
_PyUnicodeWriter_Dealloc(&writer);
|
||||||
|
goto Done;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------------
|
/* ---------------------------------------------------------------------------
|
||||||
|
|
|
@ -775,28 +775,101 @@ the C library strftime function.\n"
|
||||||
#define time_strlen strlen
|
#define time_strlen strlen
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
time_strftime1(time_char **outbuf, size_t *bufsize,
|
||||||
|
time_char *format, size_t fmtlen,
|
||||||
|
struct tm *tm)
|
||||||
|
{
|
||||||
|
size_t buflen;
|
||||||
|
#if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME)
|
||||||
|
/* check that the format string contains only valid directives */
|
||||||
|
for (const time_char *f = strchr(format, '%');
|
||||||
|
f != NULL;
|
||||||
|
f = strchr(f + 2, '%'))
|
||||||
|
{
|
||||||
|
if (f[1] == '#')
|
||||||
|
++f; /* not documented by python, */
|
||||||
|
if (f[1] == '\0')
|
||||||
|
break;
|
||||||
|
if ((f[1] == 'y') && tm->tm_year < 0) {
|
||||||
|
PyErr_SetString(PyExc_ValueError,
|
||||||
|
"format %y requires year >= 1900 on Windows");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME)
|
||||||
|
for (const time_char *f = wcschr(format, '%');
|
||||||
|
f != NULL;
|
||||||
|
f = wcschr(f + 2, '%'))
|
||||||
|
{
|
||||||
|
if (f[1] == L'\0')
|
||||||
|
break;
|
||||||
|
/* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0))
|
||||||
|
returns "0/" instead of "99" */
|
||||||
|
if (f[1] == L'y' && tm->tm_year < 0) {
|
||||||
|
PyErr_SetString(PyExc_ValueError,
|
||||||
|
"format %y requires year >= 1900 on AIX");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* I hate these functions that presume you know how big the output
|
||||||
|
* will be ahead of time...
|
||||||
|
*/
|
||||||
|
while (1) {
|
||||||
|
if (*bufsize > PY_SSIZE_T_MAX/sizeof(time_char)) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
*outbuf = (time_char *)PyMem_Realloc(*outbuf,
|
||||||
|
*bufsize*sizeof(time_char));
|
||||||
|
if (*outbuf == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
|
||||||
|
errno = 0;
|
||||||
|
#endif
|
||||||
|
_Py_BEGIN_SUPPRESS_IPH
|
||||||
|
buflen = format_time(*outbuf, *bufsize, format, tm);
|
||||||
|
_Py_END_SUPPRESS_IPH
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
|
||||||
|
/* VisualStudio .NET 2005 does this properly */
|
||||||
|
if (buflen == 0 && errno == EINVAL) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "Invalid format string");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (buflen == 0 && *bufsize < 256 * fmtlen) {
|
||||||
|
*bufsize += *bufsize;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* If the buffer is 256 times as long as the format,
|
||||||
|
it's probably not failing for lack of room!
|
||||||
|
More likely, the format yields an empty result,
|
||||||
|
e.g. an empty format, or %Z when the timezone
|
||||||
|
is unknown. */
|
||||||
|
#ifdef HAVE_WCSFTIME
|
||||||
|
return PyUnicode_FromWideChar(*outbuf, buflen);
|
||||||
|
#else
|
||||||
|
return PyUnicode_DecodeLocaleAndSize(*outbuf, buflen, "surrogateescape");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
time_strftime(PyObject *module, PyObject *args)
|
time_strftime(PyObject *module, PyObject *args)
|
||||||
{
|
{
|
||||||
PyObject *tup = NULL;
|
PyObject *tup = NULL;
|
||||||
struct tm buf;
|
struct tm buf;
|
||||||
const time_char *fmt;
|
|
||||||
#ifdef HAVE_WCSFTIME
|
|
||||||
wchar_t *format;
|
|
||||||
#else
|
|
||||||
PyObject *format;
|
|
||||||
#endif
|
|
||||||
PyObject *format_arg;
|
PyObject *format_arg;
|
||||||
size_t fmtlen, buflen;
|
Py_ssize_t format_size;
|
||||||
time_char *outbuf = NULL;
|
time_char *format, *outbuf = NULL;
|
||||||
size_t i;
|
size_t fmtlen, bufsize = 1024;
|
||||||
PyObject *ret = NULL;
|
|
||||||
|
|
||||||
memset((void *) &buf, '\0', sizeof(buf));
|
memset((void *) &buf, '\0', sizeof(buf));
|
||||||
|
|
||||||
/* Will always expect a unicode string to be passed as format.
|
|
||||||
Given that there's no str type anymore in py3k this seems safe.
|
|
||||||
*/
|
|
||||||
if (!PyArg_ParseTuple(args, "U|O:strftime", &format_arg, &tup))
|
if (!PyArg_ParseTuple(args, "U|O:strftime", &format_arg, &tup))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -834,101 +907,63 @@ time_strftime(PyObject *module, PyObject *args)
|
||||||
else if (buf.tm_isdst > 1)
|
else if (buf.tm_isdst > 1)
|
||||||
buf.tm_isdst = 1;
|
buf.tm_isdst = 1;
|
||||||
|
|
||||||
#ifdef HAVE_WCSFTIME
|
format_size = PyUnicode_GET_LENGTH(format_arg);
|
||||||
format = PyUnicode_AsWideCharString(format_arg, NULL);
|
if ((size_t)format_size > PY_SSIZE_T_MAX/sizeof(time_char) - 1) {
|
||||||
if (format == NULL)
|
PyErr_NoMemory();
|
||||||
return NULL;
|
return NULL;
|
||||||
fmt = format;
|
}
|
||||||
#else
|
format = PyMem_Malloc((format_size + 1)*sizeof(time_char));
|
||||||
/* Convert the unicode string to an ascii one */
|
if (format == NULL) {
|
||||||
format = PyUnicode_EncodeLocale(format_arg, "surrogateescape");
|
PyErr_NoMemory();
|
||||||
if (format == NULL)
|
|
||||||
return NULL;
|
return NULL;
|
||||||
fmt = PyBytes_AS_STRING(format);
|
}
|
||||||
#endif
|
_PyUnicodeWriter writer;
|
||||||
|
_PyUnicodeWriter_Init(&writer);
|
||||||
|
writer.overallocate = 1;
|
||||||
|
Py_ssize_t i = 0;
|
||||||
|
while (i < format_size) {
|
||||||
|
fmtlen = 0;
|
||||||
|
for (; i < format_size; i++) {
|
||||||
|
Py_UCS4 c = PyUnicode_READ_CHAR(format_arg, i);
|
||||||
|
if (!c || c > 127) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
format[fmtlen++] = (char)c;
|
||||||
|
}
|
||||||
|
if (fmtlen) {
|
||||||
|
format[fmtlen] = 0;
|
||||||
|
PyObject *unicode = time_strftime1(&outbuf, &bufsize,
|
||||||
|
format, fmtlen, &buf);
|
||||||
|
if (unicode == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (_PyUnicodeWriter_WriteStr(&writer, unicode) < 0) {
|
||||||
|
Py_DECREF(unicode);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
Py_DECREF(unicode);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME)
|
Py_ssize_t start = i;
|
||||||
/* check that the format string contains only valid directives */
|
for (; i < format_size; i++) {
|
||||||
for (outbuf = strchr(fmt, '%');
|
Py_UCS4 c = PyUnicode_READ_CHAR(format_arg, i);
|
||||||
outbuf != NULL;
|
if (c == '%') {
|
||||||
outbuf = strchr(outbuf+2, '%'))
|
break;
|
||||||
{
|
}
|
||||||
if (outbuf[1] == '#')
|
}
|
||||||
++outbuf; /* not documented by python, */
|
if (_PyUnicodeWriter_WriteSubstring(&writer, format_arg, start, i) < 0) {
|
||||||
if (outbuf[1] == '\0')
|
goto error;
|
||||||
break;
|
|
||||||
if ((outbuf[1] == 'y') && buf.tm_year < 0) {
|
|
||||||
PyErr_SetString(PyExc_ValueError,
|
|
||||||
"format %y requires year >= 1900 on Windows");
|
|
||||||
Py_DECREF(format);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME)
|
|
||||||
for (outbuf = wcschr(fmt, '%');
|
|
||||||
outbuf != NULL;
|
|
||||||
outbuf = wcschr(outbuf+2, '%'))
|
|
||||||
{
|
|
||||||
if (outbuf[1] == L'\0')
|
|
||||||
break;
|
|
||||||
/* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0))
|
|
||||||
returns "0/" instead of "99" */
|
|
||||||
if (outbuf[1] == L'y' && buf.tm_year < 0) {
|
|
||||||
PyErr_SetString(PyExc_ValueError,
|
|
||||||
"format %y requires year >= 1900 on AIX");
|
|
||||||
PyMem_Free(format);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
fmtlen = time_strlen(fmt);
|
PyMem_Free(outbuf);
|
||||||
|
|
||||||
/* I hate these functions that presume you know how big the output
|
|
||||||
* will be ahead of time...
|
|
||||||
*/
|
|
||||||
for (i = 1024; ; i += i) {
|
|
||||||
outbuf = (time_char *)PyMem_Malloc(i*sizeof(time_char));
|
|
||||||
if (outbuf == NULL) {
|
|
||||||
PyErr_NoMemory();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
|
|
||||||
errno = 0;
|
|
||||||
#endif
|
|
||||||
_Py_BEGIN_SUPPRESS_IPH
|
|
||||||
buflen = format_time(outbuf, i, fmt, &buf);
|
|
||||||
_Py_END_SUPPRESS_IPH
|
|
||||||
#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
|
|
||||||
/* VisualStudio .NET 2005 does this properly */
|
|
||||||
if (buflen == 0 && errno == EINVAL) {
|
|
||||||
PyErr_SetString(PyExc_ValueError, "Invalid format string");
|
|
||||||
PyMem_Free(outbuf);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (buflen > 0 || i >= 256 * fmtlen) {
|
|
||||||
/* If the buffer is 256 times as long as the format,
|
|
||||||
it's probably not failing for lack of room!
|
|
||||||
More likely, the format yields an empty result,
|
|
||||||
e.g. an empty format, or %Z when the timezone
|
|
||||||
is unknown. */
|
|
||||||
#ifdef HAVE_WCSFTIME
|
|
||||||
ret = PyUnicode_FromWideChar(outbuf, buflen);
|
|
||||||
#else
|
|
||||||
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape");
|
|
||||||
#endif
|
|
||||||
PyMem_Free(outbuf);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
PyMem_Free(outbuf);
|
|
||||||
}
|
|
||||||
#ifdef HAVE_WCSFTIME
|
|
||||||
PyMem_Free(format);
|
PyMem_Free(format);
|
||||||
#else
|
return _PyUnicodeWriter_Finish(&writer);
|
||||||
Py_DECREF(format);
|
error:
|
||||||
#endif
|
PyMem_Free(outbuf);
|
||||||
return ret;
|
PyMem_Free(format);
|
||||||
|
_PyUnicodeWriter_Dealloc(&writer);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef time_char
|
#undef time_char
|
||||||
|
|
Loading…
Reference in New Issue