cpython/Lib/test/sortperf.py
Guido van Rossum 805365ee39 Merged revisions 55007-55179 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/p3yk

........
  r55077 | guido.van.rossum | 2007-05-02 11:54:37 -0700 (Wed, 02 May 2007) | 2 lines

  Use the new print syntax, at least.
........
  r55142 | fred.drake | 2007-05-04 21:27:30 -0700 (Fri, 04 May 2007) | 1 line

  remove old cruftiness
........
  r55143 | fred.drake | 2007-05-04 21:52:16 -0700 (Fri, 04 May 2007) | 1 line

  make this work with the new Python
........
  r55162 | neal.norwitz | 2007-05-06 22:29:18 -0700 (Sun, 06 May 2007) | 1 line

  Get asdl code gen working with Python 2.3.  Should continue to work with 3.0
........
  r55164 | neal.norwitz | 2007-05-07 00:00:38 -0700 (Mon, 07 May 2007) | 1 line

  Verify checkins to p3yk (sic) branch go to 3000 list.
........
  r55166 | neal.norwitz | 2007-05-07 00:12:35 -0700 (Mon, 07 May 2007) | 1 line

  Fix this test so it runs again by importing warnings_test properly.
........
  r55167 | neal.norwitz | 2007-05-07 01:03:22 -0700 (Mon, 07 May 2007) | 8 lines

  So long xrange.  range() now supports values that are outside
  -sys.maxint to sys.maxint.  floats raise a TypeError.

  This has been sitting for a long time.  It probably has some problems and
  needs cleanup.  Objects/rangeobject.c now uses 4-space indents since
  it is almost completely new.
........
  r55171 | guido.van.rossum | 2007-05-07 10:21:26 -0700 (Mon, 07 May 2007) | 4 lines

  Fix two tests that were previously depending on significant spaces
  at the end of a line (and before that on Python 2.x print behavior
  that has no exact equivalent in 3.0).
........
2007-05-07 22:24:25 +00:00

170 lines
4.7 KiB
Python

"""Sort performance test.
See main() for command line syntax.
See tabulate() for output format.
"""
import sys
import time
import random
import marshal
import tempfile
import os
td = tempfile.gettempdir()
def randfloats(n):
"""Return a list of n random floats in [0, 1)."""
# Generating floats is expensive, so this writes them out to a file in
# a temp directory. If the file already exists, it just reads them
# back in and shuffles them a bit.
fn = os.path.join(td, "rr%06d" % n)
try:
fp = open(fn, "rb")
except IOError:
r = random.random
result = [r() for i in range(n)]
try:
try:
fp = open(fn, "wb")
marshal.dump(result, fp)
fp.close()
fp = None
finally:
if fp:
try:
os.unlink(fn)
except os.error:
pass
except IOError as msg:
print("can't write", fn, ":", msg)
else:
result = marshal.load(fp)
fp.close()
# Shuffle it a bit...
for i in range(10):
i = random.randrange(n)
temp = result[:i]
del result[:i]
temp.reverse()
result.extend(temp)
del temp
assert len(result) == n
return result
def flush():
sys.stdout.flush()
def doit(L):
t0 = time.clock()
L.sort()
t1 = time.clock()
print("%6.2f" % (t1-t0), end=' ')
flush()
def tabulate(r):
"""Tabulate sort speed for lists of various sizes.
The sizes are 2**i for i in r (the argument, a list).
The output displays i, 2**i, and the time to sort arrays of 2**i
floating point numbers with the following properties:
*sort: random data
\sort: descending data
/sort: ascending data
3sort: ascending, then 3 random exchanges
+sort: ascending, then 10 random at the end
%sort: ascending, then randomly replace 1% of the elements w/ random values
~sort: many duplicates
=sort: all equal
!sort: worst case scenario
"""
cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
fmt = ("%2s %7s" + " %6s"*len(cases))
print(fmt % (("i", "2**i") + cases))
for i in r:
n = 1 << i
L = randfloats(n)
print("%2d %7d" % (i, n), end=' ')
flush()
doit(L) # *sort
L.reverse()
doit(L) # \sort
doit(L) # /sort
# Do 3 random exchanges.
for dummy in range(3):
i1 = random.randrange(n)
i2 = random.randrange(n)
L[i1], L[i2] = L[i2], L[i1]
doit(L) # 3sort
# Replace the last 10 with random floats.
if n >= 10:
L[-10:] = [random.random() for dummy in range(10)]
doit(L) # +sort
# Replace 1% of the elements at random.
for dummy in range(n // 100):
L[random.randrange(n)] = random.random()
doit(L) # %sort
# Arrange for lots of duplicates.
if n > 4:
del L[4:]
L = L * (n // 4)
# Force the elements to be distinct objects, else timings can be
# artificially low.
L = map(lambda x: --x, L)
doit(L) # ~sort
del L
# All equal. Again, force the elements to be distinct objects.
L = map(abs, [-0.5] * n)
doit(L) # =sort
del L
# This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case
# for an older implementation of quicksort, which used the median
# of the first, last and middle elements as the pivot.
half = n // 2
L = range(half - 1, -1, -1)
L.extend(range(half))
# Force to float, so that the timings are comparable. This is
# significantly faster if we leave tham as ints.
L = map(float, L)
doit(L) # !sort
print()
def main():
"""Main program when invoked as a script.
One argument: tabulate a single row.
Two arguments: tabulate a range (inclusive).
Extra arguments are used to seed the random generator.
"""
# default range (inclusive)
k1 = 15
k2 = 20
if sys.argv[1:]:
# one argument: single point
k1 = k2 = int(sys.argv[1])
if sys.argv[2:]:
# two arguments: specify range
k2 = int(sys.argv[2])
if sys.argv[3:]:
# derive random seed from remaining arguments
x = 1
for a in sys.argv[3:]:
x = 69069 * x + hash(a)
random.seed(x)
r = range(k1, k2+1) # include the end point
tabulate(r)
if __name__ == '__main__':
main()