mirror of https://github.com/python/cpython
Rewrite PyBytes_FromFormatV() using _PyBytesWriter API
* Add much more unit tests on PyBytes_FromFormatV() * Remove the first loop to compute the length of the output string * Use _PyBytesWriter to handle the bytes buffer, use overallocation * Cleanup the code to make simpler and easier to review
This commit is contained in:
parent
b6d84832bf
commit
03dab786b2
|
@ -783,25 +783,93 @@ class BytesTest(BaseBytesTest, unittest.TestCase):
|
||||||
# Test PyBytes_FromFormat()
|
# Test PyBytes_FromFormat()
|
||||||
def test_from_format(self):
|
def test_from_format(self):
|
||||||
test.support.import_module('ctypes')
|
test.support.import_module('ctypes')
|
||||||
from ctypes import pythonapi, py_object, c_int, c_char_p
|
_testcapi = test.support.import_module('_testcapi')
|
||||||
|
from ctypes import pythonapi, py_object
|
||||||
|
from ctypes import (
|
||||||
|
c_int, c_uint,
|
||||||
|
c_long, c_ulong,
|
||||||
|
c_size_t, c_ssize_t,
|
||||||
|
c_char_p)
|
||||||
|
|
||||||
PyBytes_FromFormat = pythonapi.PyBytes_FromFormat
|
PyBytes_FromFormat = pythonapi.PyBytes_FromFormat
|
||||||
PyBytes_FromFormat.restype = py_object
|
PyBytes_FromFormat.restype = py_object
|
||||||
|
|
||||||
|
# basic tests
|
||||||
self.assertEqual(PyBytes_FromFormat(b'format'),
|
self.assertEqual(PyBytes_FromFormat(b'format'),
|
||||||
b'format')
|
b'format')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'Hello %s !', b'world'),
|
||||||
|
b'Hello world !')
|
||||||
|
|
||||||
|
# test formatters
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'c=%c', c_int(0)),
|
||||||
|
b'c=\0')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'c=%c', c_int(ord('@'))),
|
||||||
|
b'c=@')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'c=%c', c_int(255)),
|
||||||
|
b'c=\xff')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'd=%d ld=%ld zd=%zd',
|
||||||
|
c_int(1), c_long(2),
|
||||||
|
c_size_t(3)),
|
||||||
|
b'd=1 ld=2 zd=3')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'd=%d ld=%ld zd=%zd',
|
||||||
|
c_int(-1), c_long(-2),
|
||||||
|
c_size_t(-3)),
|
||||||
|
b'd=-1 ld=-2 zd=-3')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'u=%u lu=%lu zu=%zu',
|
||||||
|
c_uint(123), c_ulong(456),
|
||||||
|
c_size_t(789)),
|
||||||
|
b'u=123 lu=456 zu=789')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'i=%i', c_int(123)),
|
||||||
|
b'i=123')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'i=%i', c_int(-123)),
|
||||||
|
b'i=-123')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'x=%x', c_int(0xabc)),
|
||||||
|
b'x=abc')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'ptr=%p',
|
||||||
|
c_char_p(0xabcdef)),
|
||||||
|
b'ptr=0xabcdef')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b's=%s', c_char_p(b'cstr')),
|
||||||
|
b's=cstr')
|
||||||
|
|
||||||
|
# test minimum and maximum integer values
|
||||||
|
size_max = c_size_t(-1).value
|
||||||
|
for formatstr, ctypes_type, value, py_formatter in (
|
||||||
|
(b'%d', c_int, _testcapi.INT_MIN, str),
|
||||||
|
(b'%d', c_int, _testcapi.INT_MAX, str),
|
||||||
|
(b'%ld', c_long, _testcapi.LONG_MIN, str),
|
||||||
|
(b'%ld', c_long, _testcapi.LONG_MAX, str),
|
||||||
|
(b'%lu', c_ulong, _testcapi.ULONG_MAX, str),
|
||||||
|
(b'%zd', c_ssize_t, _testcapi.PY_SSIZE_T_MIN, str),
|
||||||
|
(b'%zd', c_ssize_t, _testcapi.PY_SSIZE_T_MAX, str),
|
||||||
|
(b'%zu', c_size_t, size_max, str),
|
||||||
|
(b'%p', c_char_p, size_max, lambda value: '%#x' % value),
|
||||||
|
):
|
||||||
|
self.assertEqual(PyBytes_FromFormat(formatstr, ctypes_type(value)),
|
||||||
|
py_formatter(value).encode('ascii')),
|
||||||
|
|
||||||
|
# width and precision (width is currently ignored)
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'%5s', b'a'),
|
||||||
|
b'a')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'%.3s', b'abcdef'),
|
||||||
|
b'abc')
|
||||||
|
|
||||||
|
# '%%' formatter
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'%%'),
|
||||||
|
b'%')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'[%%]'),
|
||||||
|
b'[%]')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'%%%c', c_int(ord('_'))),
|
||||||
|
b'%_')
|
||||||
|
self.assertEqual(PyBytes_FromFormat(b'%%s'),
|
||||||
|
b'%s')
|
||||||
|
|
||||||
|
# Invalid formats and partial formatting
|
||||||
self.assertEqual(PyBytes_FromFormat(b'%'), b'%')
|
self.assertEqual(PyBytes_FromFormat(b'%'), b'%')
|
||||||
self.assertEqual(PyBytes_FromFormat(b'%%'), b'%')
|
self.assertEqual(PyBytes_FromFormat(b'x=%i y=%', c_int(2), c_int(3)),
|
||||||
self.assertEqual(PyBytes_FromFormat(b'%%s'), b'%s')
|
b'x=2 y=%')
|
||||||
self.assertEqual(PyBytes_FromFormat(b'[%%]'), b'[%]')
|
|
||||||
self.assertEqual(PyBytes_FromFormat(b'%%%c', c_int(ord('_'))), b'%_')
|
|
||||||
|
|
||||||
self.assertEqual(PyBytes_FromFormat(b'c:%c', c_int(255)),
|
# Issue #19969: %c must raise OverflowError for values
|
||||||
b'c:\xff')
|
# not in the range [0; 255]
|
||||||
self.assertEqual(PyBytes_FromFormat(b's:%s', c_char_p(b'cstr')),
|
|
||||||
b's:cstr')
|
|
||||||
|
|
||||||
# Issue #19969
|
|
||||||
self.assertRaises(OverflowError,
|
self.assertRaises(OverflowError,
|
||||||
PyBytes_FromFormat, b'%c', c_int(-1))
|
PyBytes_FromFormat, b'%c', c_int(-1))
|
||||||
self.assertRaises(OverflowError,
|
self.assertRaises(OverflowError,
|
||||||
|
|
|
@ -174,190 +174,184 @@ PyBytes_FromString(const char *str)
|
||||||
PyObject *
|
PyObject *
|
||||||
PyBytes_FromFormatV(const char *format, va_list vargs)
|
PyBytes_FromFormatV(const char *format, va_list vargs)
|
||||||
{
|
{
|
||||||
va_list count;
|
|
||||||
Py_ssize_t n = 0;
|
|
||||||
const char* f;
|
|
||||||
char *s;
|
char *s;
|
||||||
PyObject* string;
|
const char *f;
|
||||||
|
const char *p;
|
||||||
|
Py_ssize_t prec;
|
||||||
|
int longflag;
|
||||||
|
int size_tflag;
|
||||||
|
/* Longest 64-bit formatted numbers:
|
||||||
|
- "18446744073709551615\0" (21 bytes)
|
||||||
|
- "-9223372036854775808\0" (21 bytes)
|
||||||
|
Decimal takes the most space (it isn't enough for octal.)
|
||||||
|
|
||||||
Py_VA_COPY(count, vargs);
|
Longest 64-bit pointer representation:
|
||||||
/* step 1: figure out how large a buffer we need */
|
"0xffffffffffffffff\0" (19 bytes). */
|
||||||
for (f = format; *f; f++) {
|
char buffer[21];
|
||||||
if (*f == '%') {
|
_PyBytesWriter writer;
|
||||||
const char* p = f;
|
|
||||||
while (*++f && *f != '%' && !Py_ISALPHA(*f))
|
|
||||||
;
|
|
||||||
|
|
||||||
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
|
_PyBytesWriter_Init(&writer);
|
||||||
* they don't affect the amount of space we reserve.
|
|
||||||
*/
|
|
||||||
if ((*f == 'l' || *f == 'z') &&
|
|
||||||
(f[1] == 'd' || f[1] == 'u'))
|
|
||||||
++f;
|
|
||||||
|
|
||||||
switch (*f) {
|
s = _PyBytesWriter_Alloc(&writer, strlen(format));
|
||||||
case 'c':
|
if (s == NULL)
|
||||||
{
|
|
||||||
int c = va_arg(count, int);
|
|
||||||
if (c < 0 || c > 255) {
|
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
|
||||||
"PyBytes_FromFormatV(): %c format "
|
|
||||||
"expects an integer in range [0; 255]");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
n++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case '%':
|
|
||||||
n++;
|
|
||||||
break;
|
|
||||||
case 'd': case 'u': case 'i': case 'x':
|
|
||||||
(void) va_arg(count, int);
|
|
||||||
/* 20 bytes is enough to hold a 64-bit
|
|
||||||
integer. Decimal takes the most space.
|
|
||||||
This isn't enough for octal. */
|
|
||||||
n += 20;
|
|
||||||
break;
|
|
||||||
case 's':
|
|
||||||
s = va_arg(count, char*);
|
|
||||||
n += strlen(s);
|
|
||||||
break;
|
|
||||||
case 'p':
|
|
||||||
(void) va_arg(count, int);
|
|
||||||
/* maximum 64-bit pointer representation:
|
|
||||||
* 0xffffffffffffffff
|
|
||||||
* so 19 characters is enough.
|
|
||||||
* XXX I count 18 -- what's the extra for?
|
|
||||||
*/
|
|
||||||
n += 19;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
/* if we stumble upon an unknown
|
|
||||||
formatting code, copy the rest of
|
|
||||||
the format string to the output
|
|
||||||
string. (we cannot just skip the
|
|
||||||
code, since there's no way to know
|
|
||||||
what's in the argument list) */
|
|
||||||
n += strlen(p);
|
|
||||||
goto expand;
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
n++;
|
|
||||||
}
|
|
||||||
expand:
|
|
||||||
/* step 2: fill the buffer */
|
|
||||||
/* Since we've analyzed how much space we need for the worst case,
|
|
||||||
use sprintf directly instead of the slower PyOS_snprintf. */
|
|
||||||
string = PyBytes_FromStringAndSize(NULL, n);
|
|
||||||
if (!string)
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
writer.overallocate = 1;
|
||||||
|
|
||||||
s = PyBytes_AsString(string);
|
#define WRITE_BYTES(str) \
|
||||||
|
do { \
|
||||||
|
s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
|
||||||
|
if (s == NULL) \
|
||||||
|
goto error; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
for (f = format; *f; f++) {
|
for (f = format; *f; f++) {
|
||||||
if (*f == '%') {
|
if (*f != '%') {
|
||||||
const char* p = f++;
|
*s++ = *f;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
p = f++;
|
||||||
|
|
||||||
|
/* ignore the width (ex: 10 in "%10s") */
|
||||||
|
while (Py_ISDIGIT(*f))
|
||||||
|
f++;
|
||||||
|
|
||||||
|
/* parse the precision (ex: 10 in "%.10s") */
|
||||||
|
prec = 0;
|
||||||
|
if (*f == '.') {
|
||||||
|
f++;
|
||||||
|
for (; Py_ISDIGIT(*f); f++) {
|
||||||
|
prec = (prec * 10) + (*f - '0');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (*f && *f != '%' && !Py_ISALPHA(*f))
|
||||||
|
f++;
|
||||||
|
|
||||||
|
/* handle the long flag ('l'), but only for %ld and %lu.
|
||||||
|
others can be added when necessary. */
|
||||||
|
longflag = 0;
|
||||||
|
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
|
||||||
|
longflag = 1;
|
||||||
|
++f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* handle the size_t flag ('z'). */
|
||||||
|
size_tflag = 0;
|
||||||
|
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
|
||||||
|
size_tflag = 1;
|
||||||
|
++f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* substract bytes preallocated for the format string
|
||||||
|
(ex: 2 for "%s") */
|
||||||
|
writer.min_size -= (f - p + 1);
|
||||||
|
|
||||||
|
switch (*f) {
|
||||||
|
case 'c':
|
||||||
|
{
|
||||||
|
int c = va_arg(vargs, int);
|
||||||
|
if (c < 0 || c > 255) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"PyBytes_FromFormatV(): %c format "
|
||||||
|
"expects an integer in range [0; 255]");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
writer.min_size++;
|
||||||
|
*s++ = (unsigned char)c;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'd':
|
||||||
|
if (longflag)
|
||||||
|
sprintf(buffer, "%ld", va_arg(vargs, long));
|
||||||
|
else if (size_tflag)
|
||||||
|
sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
|
||||||
|
va_arg(vargs, Py_ssize_t));
|
||||||
|
else
|
||||||
|
sprintf(buffer, "%d", va_arg(vargs, int));
|
||||||
|
assert(strlen(buffer) < sizeof(buffer));
|
||||||
|
WRITE_BYTES(buffer);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'u':
|
||||||
|
if (longflag)
|
||||||
|
sprintf(buffer, "%lu",
|
||||||
|
va_arg(vargs, unsigned long));
|
||||||
|
else if (size_tflag)
|
||||||
|
sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
|
||||||
|
va_arg(vargs, size_t));
|
||||||
|
else
|
||||||
|
sprintf(buffer, "%u",
|
||||||
|
va_arg(vargs, unsigned int));
|
||||||
|
assert(strlen(buffer) < sizeof(buffer));
|
||||||
|
WRITE_BYTES(buffer);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'i':
|
||||||
|
sprintf(buffer, "%i", va_arg(vargs, int));
|
||||||
|
assert(strlen(buffer) < sizeof(buffer));
|
||||||
|
WRITE_BYTES(buffer);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'x':
|
||||||
|
sprintf(buffer, "%x", va_arg(vargs, int));
|
||||||
|
assert(strlen(buffer) < sizeof(buffer));
|
||||||
|
WRITE_BYTES(buffer);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 's':
|
||||||
|
{
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
int longflag = 0;
|
|
||||||
int size_tflag = 0;
|
p = va_arg(vargs, char*);
|
||||||
/* parse the width.precision part (we're only
|
i = strlen(p);
|
||||||
interested in the precision value, if any) */
|
if (prec > 0 && i > prec)
|
||||||
n = 0;
|
i = prec;
|
||||||
while (Py_ISDIGIT(*f))
|
s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
|
||||||
n = (n*10) + *f++ - '0';
|
if (s == NULL)
|
||||||
if (*f == '.') {
|
goto error;
|
||||||
f++;
|
break;
|
||||||
n = 0;
|
}
|
||||||
while (Py_ISDIGIT(*f))
|
|
||||||
n = (n*10) + *f++ - '0';
|
case 'p':
|
||||||
|
sprintf(buffer, "%p", va_arg(vargs, void*));
|
||||||
|
assert(strlen(buffer) < sizeof(buffer));
|
||||||
|
/* %p is ill-defined: ensure leading 0x. */
|
||||||
|
if (buffer[1] == 'X')
|
||||||
|
buffer[1] = 'x';
|
||||||
|
else if (buffer[1] != 'x') {
|
||||||
|
memmove(buffer+2, buffer, strlen(buffer)+1);
|
||||||
|
buffer[0] = '0';
|
||||||
|
buffer[1] = 'x';
|
||||||
}
|
}
|
||||||
while (*f && *f != '%' && !Py_ISALPHA(*f))
|
WRITE_BYTES(buffer);
|
||||||
f++;
|
break;
|
||||||
/* handle the long flag, but only for %ld and %lu.
|
|
||||||
others can be added when necessary. */
|
case '%':
|
||||||
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
|
writer.min_size++;
|
||||||
longflag = 1;
|
*s++ = '%';
|
||||||
++f;
|
break;
|
||||||
}
|
|
||||||
/* handle the size_t flag. */
|
default:
|
||||||
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
|
if (*f == 0) {
|
||||||
size_tflag = 1;
|
/* fix min_size if we reached the end of the format string */
|
||||||
++f;
|
writer.min_size++;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (*f) {
|
/* invalid format string: copy unformatted string and exit */
|
||||||
case 'c':
|
WRITE_BYTES(p);
|
||||||
{
|
return _PyBytesWriter_Finish(&writer, s);
|
||||||
int c = va_arg(vargs, int);
|
}
|
||||||
/* c has been checked for overflow in the first step */
|
|
||||||
*s++ = (unsigned char)c;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 'd':
|
|
||||||
if (longflag)
|
|
||||||
sprintf(s, "%ld", va_arg(vargs, long));
|
|
||||||
else if (size_tflag)
|
|
||||||
sprintf(s, "%" PY_FORMAT_SIZE_T "d",
|
|
||||||
va_arg(vargs, Py_ssize_t));
|
|
||||||
else
|
|
||||||
sprintf(s, "%d", va_arg(vargs, int));
|
|
||||||
s += strlen(s);
|
|
||||||
break;
|
|
||||||
case 'u':
|
|
||||||
if (longflag)
|
|
||||||
sprintf(s, "%lu",
|
|
||||||
va_arg(vargs, unsigned long));
|
|
||||||
else if (size_tflag)
|
|
||||||
sprintf(s, "%" PY_FORMAT_SIZE_T "u",
|
|
||||||
va_arg(vargs, size_t));
|
|
||||||
else
|
|
||||||
sprintf(s, "%u",
|
|
||||||
va_arg(vargs, unsigned int));
|
|
||||||
s += strlen(s);
|
|
||||||
break;
|
|
||||||
case 'i':
|
|
||||||
sprintf(s, "%i", va_arg(vargs, int));
|
|
||||||
s += strlen(s);
|
|
||||||
break;
|
|
||||||
case 'x':
|
|
||||||
sprintf(s, "%x", va_arg(vargs, int));
|
|
||||||
s += strlen(s);
|
|
||||||
break;
|
|
||||||
case 's':
|
|
||||||
p = va_arg(vargs, char*);
|
|
||||||
i = strlen(p);
|
|
||||||
if (n > 0 && i > n)
|
|
||||||
i = n;
|
|
||||||
Py_MEMCPY(s, p, i);
|
|
||||||
s += i;
|
|
||||||
break;
|
|
||||||
case 'p':
|
|
||||||
sprintf(s, "%p", va_arg(vargs, void*));
|
|
||||||
/* %p is ill-defined: ensure leading 0x. */
|
|
||||||
if (s[1] == 'X')
|
|
||||||
s[1] = 'x';
|
|
||||||
else if (s[1] != 'x') {
|
|
||||||
memmove(s+2, s, strlen(s)+1);
|
|
||||||
s[0] = '0';
|
|
||||||
s[1] = 'x';
|
|
||||||
}
|
|
||||||
s += strlen(s);
|
|
||||||
break;
|
|
||||||
case '%':
|
|
||||||
*s++ = '%';
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
strcpy(s, p);
|
|
||||||
s += strlen(s);
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
*s++ = *f;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
end:
|
#undef WRITE_BYTES
|
||||||
_PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
|
|
||||||
return string;
|
return _PyBytesWriter_Finish(&writer, s);
|
||||||
|
|
||||||
|
error:
|
||||||
|
_PyBytesWriter_Dealloc(&writer);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
|
Loading…
Reference in New Issue