Give Python a debug-mode pymalloc, much as sketched on Python-Dev.

When WITH_PYMALLOC is defined, define PYMALLOC_DEBUG to enable the debug
allocator.  This can be done independent of build type (release or debug).
A debug build automatically defines PYMALLOC_DEBUG when pymalloc is
enabled.  It's a detected error to define PYMALLOC_DEBUG when pymalloc
isn't enabled.

Two debugging entry points defined only under PYMALLOC_DEBUG:

+ _PyMalloc_DebugCheckAddress(const void *p) can be used (e.g., from gdb)
  to sanity-check a memory block obtained from pymalloc.  It sprays
  info to stderr (see next) and dies via Py_FatalError if the block is
  detectably damaged.

+ _PyMalloc_DebugDumpAddress(const void *p) can be used to spray info
  about a debug memory block to stderr.

A tiny start at implementing "API family" checks isn't good for
anything yet.

_PyMalloc_DebugRealloc() has been optimized to do little when the new
size is <= old size.  However, if the new size is larger, it really
can't call the underlying realloc() routine without either violating its
contract, or knowing something non-trivial about how the underlying
realloc() works.  A memcpy is always done in this case.

This was a disaster for (and only) one of the std tests:  test_bufio
creates single text file lines up to a million characters long.  On
Windows, fileobject.c's get_line() uses the horridly funky
getline_via_fgets(), which keeps growing and growing a string object
hoping to find a newline.  It grew the string object 1000 bytes each
time, so for a million-character string it took approximately forever
(I gave up after a few minutes).

So, also:

fileobject.c, getline_via_fgets():  When a single line is outrageously
long, grow the string object at a mildly exponential rate, instead of
just 1000 bytes at a time.

That's enough so that a debug-build test_bufio finishes in about 5 seconds
on my Win98SE box.  I'm curious to try this on Win2K, because it has very
different memory behavior than Win9X, and test_bufio always took a factor
of 10 longer to complete on Win2K.  It *could* be that the endless
reallocs were simply killing it on Win2K even in the release build.
This commit is contained in:
Tim Peters 2002-03-23 10:03:50 +00:00
parent 91cc17d20e
commit ddea208be9
4 changed files with 346 additions and 28 deletions

View File

@ -61,6 +61,15 @@
#include "pyport.h"
/* Debug-mode build with pymalloc implies PYMALLOC_DEBUG.
* PYMALLOC_DEBUG is in error if pymalloc is not in use.
*/
#if defined(Py_DEBUG) && defined(WITH_PYMALLOC) && !defined(PYMALLOC_DEBUG)
#define PYMALLOC_DEBUG
#endif
#if defined(PYMALLOC_DEBUG) && !defined(WITH_PYMALLOC)
#error "PYMALLOC_DEBUG requires WITH_PYMALLOC"
#endif
#include "pymem.h"
#include "object.h"

View File

@ -89,20 +89,34 @@ extern DL_IMPORT(void) PyMem_Free(void *);
it is recommended to write the test explicitly in the code.
Note that according to ANSI C, free(NULL) has no effect. */
/* pymalloc (private to the interpreter) */
#ifdef WITH_PYMALLOC
DL_IMPORT(void *) _PyMalloc_Malloc(size_t nbytes);
DL_IMPORT(void *) _PyMalloc_Realloc(void *p, size_t nbytes);
DL_IMPORT(void) _PyMalloc_Free(void *p);
#ifdef PYMALLOC_DEBUG
DL_IMPORT(void *) _PyMalloc_DebugMalloc(size_t nbytes, int family);
DL_IMPORT(void *) _PyMalloc_DebugRealloc(void *p, size_t nbytes, int family);
DL_IMPORT(void) _PyMalloc_DebugFree(void *p, int family);
DL_IMPORT(void) _PyMalloc_DebugDumpAddress(const void *p);
DL_IMPORT(void) _PyMalloc_DebugCheckAddress(const void *p);
#define _PyMalloc_MALLOC(N) _PyMalloc_DebugMalloc(N, 0)
#define _PyMalloc_REALLOC(P, N) _PyMalloc_DebugRealloc(P, N, 0)
#define _PyMalloc_FREE(P) _PyMalloc_DebugFree(P, 0)
#else /* WITH_PYMALLOC && ! PYMALLOC_DEBUG */
#define _PyMalloc_MALLOC _PyMalloc_Malloc
#define _PyMalloc_REALLOC _PyMalloc_Realloc
#define _PyMalloc_FREE _PyMalloc_Free
#else
#endif
#else /* ! WITH_PYMALLOC */
#define _PyMalloc_MALLOC PyMem_MALLOC
#define _PyMalloc_REALLOC PyMem_REALLOC
#define _PyMalloc_FREE PyMem_FREE
#endif
#endif /* WITH_PYMALLOC */
#ifdef __cplusplus

View File

@ -772,13 +772,9 @@ getline_via_fgets(FILE *fp)
* cautions about boosting that. 300 was chosen because the worst real-life
* text-crunching job reported on Python-Dev was a mail-log crawler where over
* half the lines were 254 chars.
* INCBUFSIZE is the amount by which we grow the buffer, if MAXBUFSIZE isn't
* enough. It doesn't much matter what this is set to: we only get here for
* absurdly long lines anyway.
*/
#define INITBUFSIZE 100
#define MAXBUFSIZE 300
#define INCBUFSIZE 1000
char* p; /* temp */
char buf[MAXBUFSIZE];
PyObject* v; /* the string object result */
@ -786,6 +782,7 @@ getline_via_fgets(FILE *fp)
char* pvend; /* address one beyond last free slot */
size_t nfree; /* # of free buffer slots; pvend-pvfree */
size_t total_v_size; /* total # of slots in buffer */
size_t increment; /* amount to increment the buffer */
/* Optimize for normal case: avoid _PyString_Resize if at all
* possible via first reading into stack buffer "buf".
@ -853,7 +850,7 @@ getline_via_fgets(FILE *fp)
/* The stack buffer isn't big enough; malloc a string object and read
* into its buffer.
*/
total_v_size = MAXBUFSIZE + INCBUFSIZE;
total_v_size = MAXBUFSIZE << 1;
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
if (v == NULL)
return v;
@ -897,7 +894,8 @@ getline_via_fgets(FILE *fp)
}
/* expand buffer and try again */
assert(*(pvend-1) == '\0');
total_v_size += INCBUFSIZE;
increment = total_v_size >> 2; /* mild exponential growth */
total_v_size += increment;
if (total_v_size > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"line is longer than a Python string can hold");
@ -907,14 +905,13 @@ getline_via_fgets(FILE *fp)
if (_PyString_Resize(&v, (int)total_v_size) < 0)
return NULL;
/* overwrite the trailing null byte */
pvfree = BUF(v) + (total_v_size - INCBUFSIZE - 1);
pvfree = BUF(v) + (total_v_size - increment - 1);
}
if (BUF(v) + total_v_size != p)
_PyString_Resize(&v, p - BUF(v));
return v;
#undef INITBUFSIZE
#undef MAXBUFSIZE
#undef INCBUFSIZE
}
#endif /* ifdef USE_FGETS_IN_GETLINE */

View File

@ -623,24 +623,10 @@ _PyMalloc_Realloc(void *p, size_t nbytes)
return (void *)bp;
}
/* calloc */
/* -- unused --
void *
_PyMalloc_Calloc(size_t nbel, size_t elsz)
{
void *p;
size_t nbytes;
nbytes = nbel * elsz;
p = _PyMalloc_Malloc(nbytes);
if (p != NULL)
memset(p, 0, nbytes);
return p;
}
*/
#else /* ! WITH_PYMALLOC */
/*==========================================================================*/
/* pymalloc not enabled: Redirect the entry points to the PyMem family. */
void *
_PyMalloc_Malloc(size_t n)
{
@ -686,3 +672,315 @@ _PyMalloc_Del(PyObject *op)
{
_PyMalloc_FREE(op);
}
#ifdef PYMALLOC_DEBUG
/*==========================================================================*/
/* A x-platform debugging allocator. */
#define PYMALLOC_CLEANBYTE 0xCB /* uninitialized memory */
#define PYMALLOC_DEADBYTE 0xDB /* free()ed memory */
#define PYMALLOC_FORBIDDENBYTE 0xFB /* unusable memory */
static ulong serialno = 0; /* incremented on each debug {m,re}alloc */
/* Read 4 bytes at p as a big-endian ulong. */
static ulong
read4(const void *p)
{
const unsigned char *q = (unsigned char *)p;
return ((ulong)q[0] << 24) |
((ulong)q[1] << 16) |
((ulong)q[2] << 8) |
(ulong)q[3];
}
/* Write the 4 least-significant bytes of n as a big-endian unsigned int,
MSB at address p, LSB at p+3. */
static void
write4(void *p, ulong n)
{
unsigned char *q = (unsigned char *)p;
q[0] = (unsigned char)((n >> 24) & 0xff);
q[1] = (unsigned char)((n >> 16) & 0xff);
q[2] = (unsigned char)((n >> 8) & 0xff);
q[3] = (unsigned char)( n & 0xff);
}
static void
check_family(const void *p, int family)
{
const uchar *q = (const uchar *)p;
int original_family;
char buf[200];
assert(p != NULL);
original_family = (int)*(q-4);
if (family != original_family) {
/* XXX better msg */
PyOS_snprintf(buf, sizeof(buf),
"free or realloc from family #%d called, "
"but block was allocated by family #%d",
family, original_family);
_PyMalloc_DebugDumpAddress(p);
Py_FatalError(buf);
}
}
/* The debug malloc asks for 16 extra bytes and fills them with useful stuff,
here calling the underlying malloc's result p:
p[0:4]
Number of bytes originally asked for. 4-byte unsigned integer,
big-endian (easier to read in a memory dump).
p[4]
The API "family" this malloc call belongs to. XXX todo XXX
p[5:8]
Copies of PYMALLOC_FORBIDDENBYTE. Used to catch under- writes
and reads.
p[8:8+n]
The requested memory, filled with copies of PYMALLOC_CLEANBYTE.
Used to catch reference to uninitialized memory.
&p[8] is returned. Note that this is 8-byte aligned if PyMalloc
handled the request itself.
p[8+n:8+n+4]
Copies of PYMALLOC_FORBIDDENBYTE. Used to catch over- writes
and reads.
p[8+n+4:8+n+8]
A serial number, incremented by 1 on each call to _PyMalloc_DebugMalloc
and _PyMalloc_DebugRealloc.
4-byte unsigned integer, big-endian.
If "bad memory" is detected later, the serial number gives an
excellent way to set a breakpoint on the next run, to capture the
instant at which this block was passed out.
*/
void *
_PyMalloc_DebugMalloc(size_t nbytes, int family)
{
uchar *p; /* base address of malloc'ed block */
uchar *q; /* p + 8 + nbytes + */
size_t total; /* nbytes + 16 */
assert(family == 0);
++serialno;
total = nbytes + 16;
if (total < nbytes || (total >> 31) > 1) {
/* overflow, or we can't represent it in 4 bytes */
/* Obscure: can't do (total >> 32) != 0 instead, because
C doesn't define what happens for a right-shift of 32
when size_t is a 32-bit type. At least C guarantees
size_t is an unsigned type. */
return NULL;
}
p = _PyMalloc_Malloc(total); /* XXX derive from family */
if (p == NULL)
return NULL;
write4(p, nbytes);
p[4] = (uchar)family;
p[5] = p[6] = p[7] = PYMALLOC_FORBIDDENBYTE;
if (nbytes > 0)
memset(p+8, PYMALLOC_CLEANBYTE, nbytes);
q = p + 8 + nbytes;
q[0] = q[1] = q[2] = q[3] = PYMALLOC_FORBIDDENBYTE;
write4(q+4, serialno);
return p+8;
}
/* The debug free first uses the address to find the number of bytes
originally asked for, then checks the 8 bytes on each end for
sanity (in particular, that the PYMALLOC_FORBIDDENBYTEs are still
intact).
Then fills the original bytes with PYMALLOC_DEADBYTE.
Then calls the underlying free.
*/
void
_PyMalloc_DebugFree(void *p, int family)
{
uchar *q = (uchar*)p;
size_t nbytes;
assert(family == 0);
if (p == NULL)
return;
check_family(p, family);
_PyMalloc_DebugCheckAddress(p);
nbytes = read4(q-8);
if (nbytes > 0)
memset(q, PYMALLOC_DEADBYTE, nbytes);
_PyMalloc_Free(q-8); /* XXX derive from family */
}
void *
_PyMalloc_DebugRealloc(void *p, size_t nbytes, int family)
{
uchar *q = (uchar *)p;
size_t original_nbytes;
uchar *fresh; /* new memory block, if needed */
assert(family == 0);
if (p == NULL)
return _PyMalloc_DebugMalloc(nbytes, family);
check_family(p, family);
_PyMalloc_DebugCheckAddress(p);
original_nbytes = read4(q-8);
if (nbytes == original_nbytes) {
/* note that this case is likely to be common due to the
way Python appends to lists */
++serialno;
write4(q + nbytes + 4, serialno);
return p;
}
if (nbytes < original_nbytes) {
/* shrinking -- leave the guts alone, except to
fill the excess with DEADBYTE */
const size_t excess = original_nbytes - nbytes;
++serialno;
write4(q-8, nbytes);
/* kill the excess bytes plus the trailing 8 pad bytes */
memset(q + nbytes, PYMALLOC_DEADBYTE, excess + 8);
q += nbytes;
q[0] = q[1] = q[2] = q[3] = PYMALLOC_FORBIDDENBYTE;
write4(q+4, serialno);
return p;
}
/* More memory is needed: get it, copy over the first original_nbytes
of the original data, and free the original memory. */
fresh = (uchar *)_PyMalloc_DebugMalloc(nbytes, family);
if (fresh != NULL && original_nbytes > 0)
memcpy(fresh, p, original_nbytes);
_PyMalloc_DebugFree(p, family);
return fresh;
}
void
_PyMalloc_DebugCheckAddress(const void *p)
{
const uchar *q = (const uchar *)p;
char *msg = NULL;
if (p == NULL)
msg = "didn't expect a NULL pointer";
else if (*(q-3) != PYMALLOC_FORBIDDENBYTE ||
*(q-2) != PYMALLOC_FORBIDDENBYTE ||
*(q-1) != PYMALLOC_FORBIDDENBYTE)
msg = "bad leading pad byte";
else {
const ulong nbytes = read4(q-8);
const uchar *tail = q + nbytes;
int i;
for (i = 0; i < 4; ++i) {
if (tail[i] != PYMALLOC_FORBIDDENBYTE) {
msg = "bad trailing pad byte";
break;
}
}
}
if (msg != NULL) {
_PyMalloc_DebugDumpAddress(p);
Py_FatalError(msg);
}
}
void
_PyMalloc_DebugDumpAddress(const void *p)
{
const uchar *q = (const uchar *)p;
const uchar *tail;
ulong nbytes, serial;
fprintf(stderr, "Debug memory block at address p=%p:\n", p);
if (p == NULL)
return;
nbytes = read4(q-8);
fprintf(stderr, " %lu bytes originally allocated\n", nbytes);
fprintf(stderr, " from API family #%d\n", *(q-4));
/* In case this is nuts, check the pad bytes before trying to read up
the serial number (the address deref could blow up). */
fprintf(stderr, " the 3 pad bytes at p-3 are ");
if (*(q-3) == PYMALLOC_FORBIDDENBYTE &&
*(q-2) == PYMALLOC_FORBIDDENBYTE &&
*(q-1) == PYMALLOC_FORBIDDENBYTE) {
fprintf(stderr, "PYMALLOC_FORBIDDENBYTE, as expected\n");
}
else {
int i;
fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
PYMALLOC_FORBIDDENBYTE);
for (i = 3; i >= 1; --i) {
const uchar byte = *(q-i);
fprintf(stderr, " at p-%d: 0x%02x", i, byte);
if (byte != PYMALLOC_FORBIDDENBYTE)
fputs(" *** OUCH", stderr);
fputc('\n', stderr);
}
}
tail = q + nbytes;
fprintf(stderr, " the 4 pad bytes at tail=%p are ", tail);
if (tail[0] == PYMALLOC_FORBIDDENBYTE &&
tail[1] == PYMALLOC_FORBIDDENBYTE &&
tail[2] == PYMALLOC_FORBIDDENBYTE &&
tail[3] == PYMALLOC_FORBIDDENBYTE) {
fprintf(stderr, "PYMALLOC_FORBIDDENBYTE, as expected\n");
}
else {
int i;
fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
PYMALLOC_FORBIDDENBYTE);
for (i = 0; i < 4; ++i) {
const uchar byte = tail[i];
fprintf(stderr, " at tail+%d: 0x%02x",
i, byte);
if (byte != PYMALLOC_FORBIDDENBYTE)
fputs(" *** OUCH", stderr);
fputc('\n', stderr);
}
}
serial = read4(tail+4);
fprintf(stderr, " the block was made by call #%lu to "
"debug malloc/realloc\n", serial);
if (nbytes > 0) {
int i = 0;
fprintf(stderr, " data at p:");
/* print up to 8 bytes at the start */
while (q < tail && i < 8) {
fprintf(stderr, " %02x", *q);
++i;
++q;
}
/* and up to 8 at the end */
if (q < tail) {
if (tail - q > 8) {
fprintf(stderr, " ...");
q = tail - 8;
}
while (q < tail) {
fprintf(stderr, " %02x", *q);
++q;
}
}
fprintf(stderr, "\n");
}
}
#endif /* PYMALLOC_DEBUG */