diff --git a/Misc/NEWS b/Misc/NEWS index d27ce303d64..03c4569acae 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -229,6 +229,9 @@ Library Extension Modules ----------------- +- Issue #13159: FileIO, BZ2File, and the built-in file class now use a + linear-time buffer growth strategy instead of a quadratic one. + - Issue #13070: Fix a crash when a TextIOWrapper caught in a reference cycle would be finalized after the reference to its underlying BufferedRWPair's writer got cleared by the GC. diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 9f9044860fb..0048240b791 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -42,12 +42,6 @@ #define SMALLCHUNK BUFSIZ #endif -#if SIZEOF_INT < 4 -#define BIGCHUNK (512 * 32) -#else -#define BIGCHUNK (512 * 1024) -#endif - typedef struct { PyObject_HEAD int fd; @@ -528,15 +522,10 @@ new_buffersize(fileio *self, size_t currentsize) } } #endif - if (currentsize > SMALLCHUNK) { - /* Keep doubling until we reach BIGCHUNK; - then keep adding BIGCHUNK. */ - if (currentsize <= BIGCHUNK) - return currentsize + currentsize; - else - return currentsize + BIGCHUNK; - } - return currentsize + SMALLCHUNK; + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. Use a less-than-double + growth factor to avoid excessive allocation. */ + return currentsize + (currentsize >> 3) + 6; } static PyObject * diff --git a/Modules/bz2module.c b/Modules/bz2module.c index de2e20bbf7a..9c59c041692 100644 --- a/Modules/bz2module.c +++ b/Modules/bz2module.c @@ -224,25 +224,14 @@ Util_CatchBZ2Error(int bzerror) #define SMALLCHUNK BUFSIZ #endif -#if SIZEOF_INT < 4 -#define BIGCHUNK (512 * 32) -#else -#define BIGCHUNK (512 * 1024) -#endif - /* This is a hacked version of Python's fileobject.c:new_buffersize(). */ static size_t Util_NewBufferSize(size_t currentsize) { - if (currentsize > SMALLCHUNK) { - /* Keep doubling until we reach BIGCHUNK; - then keep adding BIGCHUNK. */ - if (currentsize <= BIGCHUNK) - return currentsize + currentsize; - else - return currentsize + BIGCHUNK; - } - return currentsize + SMALLCHUNK; + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. Use a less-than-double + growth factor to avoid excessive allocation. */ + return currentsize + (currentsize >> 3) + 6; } /* This is a hacked version of Python's fileobject.c:get_line(). */ diff --git a/Objects/fileobject.c b/Objects/fileobject.c index edd839e8b0d..737ebb76350 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -992,12 +992,6 @@ file_isatty(PyFileObject *f) #define SMALLCHUNK BUFSIZ #endif -#if SIZEOF_INT < 4 -#define BIGCHUNK (512 * 32) -#else -#define BIGCHUNK (512 * 1024) -#endif - static size_t new_buffersize(PyFileObject *f, size_t currentsize) { @@ -1026,15 +1020,10 @@ new_buffersize(PyFileObject *f, size_t currentsize) /* Add 1 so if the file were to grow we'd notice. */ } #endif - if (currentsize > SMALLCHUNK) { - /* Keep doubling until we reach BIGCHUNK; - then keep adding BIGCHUNK. */ - if (currentsize <= BIGCHUNK) - return currentsize + currentsize; - else - return currentsize + BIGCHUNK; - } - return currentsize + SMALLCHUNK; + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. Use a less-than-double + growth factor to avoid excessive allocation. */ + return currentsize + (currentsize >> 3) + 6; } #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN