bpo-41486: zlib uses an UINT32_MAX sliding window for the output buffer (GH-26143)

* zlib uses an UINT32_MAX sliding window for the output buffer

These funtions have an initial output buffer size parameter:
- zlib.decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE)
- zlib.Decompress.flush([length])

If the initial size > UINT32_MAX, use an UINT32_MAX sliding window, instead of clamping to UINT32_MAX.
Speed up when (the initial size == the actual size).

This fixes a memory consumption and copying performance regression in earlier 3.10 beta releases if someone used an output buffer larger than 4GiB with zlib.decompress.

Reviewed-by: Gregory P. Smith
This commit is contained in:
Ma Lin 2021-07-05 09:10:44 +08:00 committed by GitHub
parent 5644c7b3ff
commit a9a69bb3ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 117 additions and 30 deletions

View File

@ -0,0 +1,3 @@
Fix a memory consumption and copying performance regression in earlier 3.10
beta releases if someone used an output buffer larger than 4GiB with
zlib.decompress on input data that expands that large.

View File

@ -30,28 +30,6 @@ OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
return allocated;
}
/* On success, return value >= 0
On failure, return -1 */
static inline Py_ssize_t
OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, Py_ssize_t init_size,
Bytef **next_out, uint32_t *avail_out)
{
Py_ssize_t allocated;
if (init_size >= 0 && // ensure (size_t) cast is safe
(size_t)init_size > UINT32_MAX)
{
/* In 32-bit build, never reach this conditional branch.
The maximum block size accepted by zlib is UINT32_MAX. */
init_size = UINT32_MAX;
}
allocated = _BlocksOutputBuffer_InitWithSize(
buffer, init_size, (void**) next_out);
*avail_out = (uint32_t) allocated;
return allocated;
}
/* On success, return value >= 0
On failure, return -1 */
static inline Py_ssize_t
@ -84,6 +62,106 @@ OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
_BlocksOutputBuffer_OnError(buffer);
}
/* The max buffer size accepted by zlib is UINT32_MAX, the initial buffer size
`init_size` may > it in 64-bit build. These wrapper functions maintain an
UINT32_MAX sliding window for the first block:
1. OutputBuffer_WindowInitWithSize()
2. OutputBuffer_WindowGrow()
3. OutputBuffer_WindowFinish()
4. OutputBuffer_WindowOnError()
==== is the sliding window:
1. ====------
^ next_posi, left_bytes is 6
2. ----====--
^ next_posi, left_bytes is 2
3. --------==
^ next_posi, left_bytes is 0 */
typedef struct {
Py_ssize_t left_bytes;
Bytef *next_posi;
} _Uint32Window;
/* Initialize the buffer with an inital buffer size.
On success, return value >= 0
On failure, return value < 0 */
static inline Py_ssize_t
OutputBuffer_WindowInitWithSize(_BlocksOutputBuffer *buffer, _Uint32Window *window,
Py_ssize_t init_size,
Bytef **next_out, uint32_t *avail_out)
{
Py_ssize_t allocated = _BlocksOutputBuffer_InitWithSize(
buffer, init_size, (void**) next_out);
if (allocated >= 0) {
// the UINT32_MAX sliding window
Py_ssize_t window_size = Py_MIN((size_t)allocated, UINT32_MAX);
*avail_out = (uint32_t) window_size;
window->left_bytes = allocated - window_size;
window->next_posi = *next_out + window_size;
}
return allocated;
}
/* Grow the buffer.
On success, return value >= 0
On failure, return value < 0 */
static inline Py_ssize_t
OutputBuffer_WindowGrow(_BlocksOutputBuffer *buffer, _Uint32Window *window,
Bytef **next_out, uint32_t *avail_out)
{
Py_ssize_t allocated;
/* ensure no gaps in the data.
if inlined, this check could be optimized away.*/
if (*avail_out != 0) {
PyErr_SetString(PyExc_SystemError,
"*avail_out != 0 in OutputBuffer_WindowGrow().");
return -1;
}
// slide the UINT32_MAX sliding window
if (window->left_bytes > 0) {
Py_ssize_t window_size = Py_MIN((size_t)window->left_bytes, UINT32_MAX);
*next_out = window->next_posi;
*avail_out = (uint32_t) window_size;
window->left_bytes -= window_size;
window->next_posi += window_size;
return window_size;
}
assert(window->left_bytes == 0);
// only the first block may > UINT32_MAX
allocated = _BlocksOutputBuffer_Grow(
buffer, (void**) next_out, (Py_ssize_t) *avail_out);
*avail_out = (uint32_t) allocated;
return allocated;
}
/* Finish the buffer.
On success, return a bytes object
On failure, return NULL */
static inline PyObject *
OutputBuffer_WindowFinish(_BlocksOutputBuffer *buffer, _Uint32Window *window,
uint32_t avail_out)
{
Py_ssize_t real_avail_out = (Py_ssize_t) avail_out + window->left_bytes;
return _BlocksOutputBuffer_Finish(buffer, real_avail_out);
}
static inline void
OutputBuffer_WindowOnError(_BlocksOutputBuffer *buffer, _Uint32Window *window)
{
_BlocksOutputBuffer_OnError(buffer);
}
#define ENTER_ZLIB(obj) do { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
@ -344,6 +422,7 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
int err, flush;
z_stream zst;
_BlocksOutputBuffer buffer = {.list = NULL};
_Uint32Window window; // output buffer's UINT32_MAX sliding window
zlibstate *state = get_zlib_state(module);
@ -354,7 +433,8 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
bufsize = 1;
}
if (OutputBuffer_InitWithSize(&buffer, bufsize, &zst.next_out, &zst.avail_out) < 0) {
if (OutputBuffer_WindowInitWithSize(&buffer, &window, bufsize,
&zst.next_out, &zst.avail_out) < 0) {
goto error;
}
@ -387,7 +467,8 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
do {
if (zst.avail_out == 0) {
if (OutputBuffer_Grow(&buffer, &zst.next_out, &zst.avail_out) < 0) {
if (OutputBuffer_WindowGrow(&buffer, &window,
&zst.next_out, &zst.avail_out) < 0) {
inflateEnd(&zst);
goto error;
}
@ -430,13 +511,13 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
goto error;
}
RetVal = OutputBuffer_Finish(&buffer, zst.avail_out);
RetVal = OutputBuffer_WindowFinish(&buffer, &window, zst.avail_out);
if (RetVal != NULL) {
return RetVal;
}
error:
OutputBuffer_OnError(&buffer);
OutputBuffer_WindowOnError(&buffer, &window);
return NULL;
}
@ -1171,6 +1252,7 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
PyObject *RetVal;
Py_ssize_t ibuflen;
_BlocksOutputBuffer buffer = {.list = NULL};
_Uint32Window window; // output buffer's UINT32_MAX sliding window
PyObject *module = PyType_GetModule(cls);
if (module == NULL) {
@ -1193,7 +1275,8 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
self->zst.next_in = data.buf;
ibuflen = data.len;
if (OutputBuffer_InitWithSize(&buffer, length, &self->zst.next_out, &self->zst.avail_out) < 0) {
if (OutputBuffer_WindowInitWithSize(&buffer, &window, length,
&self->zst.next_out, &self->zst.avail_out) < 0) {
goto abort;
}
@ -1203,7 +1286,8 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
do {
if (self->zst.avail_out == 0) {
if (OutputBuffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0) {
if (OutputBuffer_WindowGrow(&buffer, &window,
&self->zst.next_out, &self->zst.avail_out) < 0) {
goto abort;
}
}
@ -1248,13 +1332,13 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
}
}
RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out);
RetVal = OutputBuffer_WindowFinish(&buffer, &window, self->zst.avail_out);
if (RetVal != NULL) {
goto success;
}
abort:
OutputBuffer_OnError(&buffer);
OutputBuffer_WindowOnError(&buffer, &window);
RetVal = NULL;
success:
PyBuffer_Release(&data);