PyNode_AddChild(): Do aggressive over-allocation when the number of
children gets large, to avoid severe platform realloc() degeneration in extreme cases (like test_longexp). Bugfix candidate. This was doing extremely timid over-allocation, just rounding up to the nearest multiple of 3. Now so long as the number of children is <= 128, it rounds up to a multiple of 4 but via a much faster method. When the number of children exceeds 128, though, and more space is needed, it doubles the capacity. This is aggressive over-allocation. SF patch <http://www.python.org/sf/578297> has Andrew MacIntyre using PyMalloc in the parser to overcome platform malloc problems in test_longexp on OS/2 EMX. Jack Jansen notes there that it didn't help him on the Mac, because the Mac has problems with frequent ever-growing reallocs, not just with gazillions of teensy mallocs. Win98 has no visible problems with test_longexp, but I tried boosting the test-case size and soon got "senseless" MemoryErrors out of it, and soon after crashed the OS: as I've seen in many other contexts before, while the Win98 realloc remains zippy in bad cases, it leads to extreme fragmentation of user address space, to the point that the OS barfs. I don't yet know whether this fixes Jack's Mac problems, but it does cure Win98's problems when boosting the test case size. It also speeds test_longexp in its unaltered state.
This commit is contained in:
parent
059ed83cc3
commit
755ebea23b
|
@ -18,25 +18,58 @@ PyNode_New(int type)
|
|||
return n;
|
||||
}
|
||||
|
||||
#define XXX 3 /* Node alignment factor to speed up realloc */
|
||||
#define XXXROUNDUP(n) ((n) == 1 ? 1 : ((n) + XXX - 1) / XXX * XXX)
|
||||
/* See comments at XXXROUNDUP below. */
|
||||
static int
|
||||
fancy_roundup(int n)
|
||||
{
|
||||
/* Round up to the closest power of 2 >= n. */
|
||||
int result = 256;
|
||||
assert(n > 128);
|
||||
while (result < n)
|
||||
result <<= 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* A gimmick to make massive numbers of reallocs quicker. The result is
|
||||
* a number >= the input. For n=0 we must return 0.
|
||||
* For n=1, we return 1, to avoid wasting memory in common 1-child nodes
|
||||
* (XXX are those actually common?).
|
||||
* Else for n <= 128, round up to the closest multiple of 4. Why 4?
|
||||
* Rounding up to a multiple of an exact power of 2 is very efficient.
|
||||
* Else call fancy_roundup() to grow proportionately to n. We've got an
|
||||
* extreme case then (like test_longexp.py), and on many platforms doing
|
||||
* anything less than proportional growth leads to exorbitant runtime
|
||||
* (e.g., MacPython), or extreme fragmentation of user address space (e.g.,
|
||||
* Win98).
|
||||
* This would be straightforward if a node stored its current capacity. The
|
||||
* code is tricky to avoid that.
|
||||
*/
|
||||
#define XXXROUNDUP(n) ((n) == 1 ? 1 : \
|
||||
(n) <= 128 ? (((n) + 3) & ~3) : \
|
||||
fancy_roundup(n))
|
||||
|
||||
|
||||
int
|
||||
PyNode_AddChild(register node *n1, int type, char *str, int lineno)
|
||||
{
|
||||
register int nch = n1->n_nchildren;
|
||||
register int nch1 = nch+1;
|
||||
register node *n;
|
||||
const int nch = n1->n_nchildren;
|
||||
int current_capacity;
|
||||
int required_capacity;
|
||||
node *n;
|
||||
|
||||
if (nch == INT_MAX || nch < 0)
|
||||
return E_OVERFLOW;
|
||||
if (XXXROUNDUP(nch) < nch1) {
|
||||
|
||||
current_capacity = XXXROUNDUP(nch);
|
||||
required_capacity = XXXROUNDUP(nch + 1);
|
||||
if (current_capacity < required_capacity) {
|
||||
n = n1->n_child;
|
||||
nch1 = XXXROUNDUP(nch1);
|
||||
PyMem_RESIZE(n, node, nch1);
|
||||
PyMem_RESIZE(n, node, required_capacity);
|
||||
if (n == NULL)
|
||||
return E_NOMEM;
|
||||
n1->n_child = n;
|
||||
}
|
||||
|
||||
n = &n1->n_child[n1->n_nchildren++];
|
||||
n->n_type = type;
|
||||
n->n_str = str;
|
||||
|
|
Loading…
Reference in New Issue