cpython/Lib/test/sortperf.py

170 lines
4.6 KiB
Python
Raw Normal View History

1998-05-10 15:27:29 -03:00
"""Sort performance test.
See main() for command line syntax.
See tabulate() for output format.
"""
1998-05-10 15:20:05 -03:00
import sys
import time
2002-04-10 11:54:39 -03:00
import random
1998-05-10 15:20:05 -03:00
import marshal
import tempfile
import os
td = tempfile.gettempdir()
def randfloats(n):
"""Return a list of n random floats in [0, 1)."""
# Generating floats is expensive, so this writes them out to a file in
# a temp directory. If the file already exists, it just reads them
# back in and shuffles them a bit.
1998-05-10 15:20:05 -03:00
fn = os.path.join(td, "rr%06d" % n)
try:
fp = open(fn, "rb")
except IOError:
r = random.random
result = [r() for i in xrange(n)]
1998-05-10 15:20:05 -03:00
try:
try:
fp = open(fn, "wb")
marshal.dump(result, fp)
fp.close()
fp = None
finally:
if fp:
try:
os.unlink(fn)
except os.error:
pass
except IOError, msg:
print "can't write", fn, ":", msg
else:
result = marshal.load(fp)
fp.close()
# Shuffle it a bit...
for i in range(10):
i = random.randrange(n)
1998-05-10 15:20:05 -03:00
temp = result[:i]
del result[:i]
temp.reverse()
result.extend(temp)
1998-05-10 15:20:05 -03:00
del temp
assert len(result) == n
1998-05-10 15:20:05 -03:00
return result
def flush():
1998-05-10 15:20:05 -03:00
sys.stdout.flush()
def doit(L):
t0 = time.clock()
L.sort()
t1 = time.clock()
print "%6.2f" % (t1-t0),
flush()
1998-05-10 15:20:05 -03:00
def tabulate(r):
1998-05-10 15:27:29 -03:00
"""Tabulate sort speed for lists of various sizes.
The sizes are 2**i for i in r (the argument, a list).
The output displays i, 2**i, and the time to sort arrays of 2**i
floating point numbers with the following properties:
*sort: random data
\sort: descending data
/sort: ascending data
3sort: ascending, then 3 random exchanges
+sort: ascending, then 10 random at the end
%sort: ascending, then randomly replace 1% of the elements w/ random values
1998-05-10 15:27:29 -03:00
~sort: many duplicates
=sort: all equal
!sort: worst case scenario
1998-05-10 15:27:29 -03:00
"""
cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
fmt = ("%2s %7s" + " %6s"*len(cases))
print fmt % (("i", "2**i") + cases)
1998-05-10 15:20:05 -03:00
for i in r:
n = 1 << i
L = randfloats(n)
print "%2d %7d" % (i, n),
flush()
1998-05-10 15:20:05 -03:00
doit(L) # *sort
L.reverse()
doit(L) # \sort
doit(L) # /sort
# Do 3 random exchanges.
for dummy in range(3):
i1 = random.randrange(n)
i2 = random.randrange(n)
L[i1], L[i2] = L[i2], L[i1]
doit(L) # 3sort
# Replace the last 10 with random floats.
if n >= 10:
L[-10:] = [random.random() for dummy in range(10)]
doit(L) # +sort
# Replace 1% of the elements at random.
for dummy in xrange(n // 100):
L[random.randrange(n)] = random.random()
doit(L) # %sort
# Arrange for lots of duplicates.
1998-05-10 15:20:05 -03:00
if n > 4:
1998-05-12 10:21:31 -03:00
del L[4:]
L = L * (n // 4)
# Force the elements to be distinct objects, else timings can be
# artificially low.
1998-05-12 10:21:31 -03:00
L = map(lambda x: --x, L)
1998-05-10 15:20:05 -03:00
doit(L) # ~sort
1998-05-12 10:21:31 -03:00
del L
# All equal. Again, force the elements to be distinct objects.
L = map(abs, [-0.5] * n)
doit(L) # =sort
del L
# This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case
# for an older implementation of quicksort, which used the median
# of the first, last and middle elements as the pivot.
half = n // 2
L = range(half - 1, -1, -1)
L.extend(range(half))
# Force to float, so that the timings are comparable. This is
# significantly faster if we leave tham as ints.
L = map(float, L)
doit(L) # !sort
1998-05-10 15:20:05 -03:00
print
def main():
1998-05-10 15:27:29 -03:00
"""Main program when invoked as a script.
One argument: tabulate a single row.
Two arguments: tabulate a range (inclusive).
Extra arguments are used to seed the random generator.
"""
1998-05-10 15:20:05 -03:00
# default range (inclusive)
k1 = 15
k2 = 20
1998-05-10 15:20:05 -03:00
if sys.argv[1:]:
1998-05-10 15:27:29 -03:00
# one argument: single point
2001-02-09 07:51:27 -04:00
k1 = k2 = int(sys.argv[1])
1998-05-10 15:20:05 -03:00
if sys.argv[2:]:
1998-05-10 15:27:29 -03:00
# two arguments: specify range
2001-02-09 07:51:27 -04:00
k2 = int(sys.argv[2])
1998-05-10 15:20:05 -03:00
if sys.argv[3:]:
# derive random seed from remaining arguments
x = 1
1998-05-10 15:20:05 -03:00
for a in sys.argv[3:]:
x = 69069 * x + hash(a)
random.seed(x)
1998-05-10 15:27:29 -03:00
r = range(k1, k2+1) # include the end point
1998-05-10 15:20:05 -03:00
tabulate(r)
if __name__ == '__main__':
main()