cpython/Lib/test/sortperf.py

170 lines
4.7 KiB
Python
Raw Normal View History

1998-05-11 02:27:29 +08:00
"""Sort performance test.
See main() for command line syntax.
See tabulate() for output format.
"""
1998-05-11 02:20:05 +08:00
import sys
import time
2002-04-10 22:54:39 +08:00
import random
1998-05-11 02:20:05 +08:00
import marshal
import tempfile
import os
td = tempfile.gettempdir()
def randfloats(n):
"""Return a list of n random floats in [0, 1)."""
# Generating floats is expensive, so this writes them out to a file in
# a temp directory. If the file already exists, it just reads them
# back in and shuffles them a bit.
1998-05-11 02:20:05 +08:00
fn = os.path.join(td, "rr%06d" % n)
try:
fp = open(fn, "rb")
except IOError:
r = random.random
Merged revisions 55007-55179 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ........ r55077 | guido.van.rossum | 2007-05-02 11:54:37 -0700 (Wed, 02 May 2007) | 2 lines Use the new print syntax, at least. ........ r55142 | fred.drake | 2007-05-04 21:27:30 -0700 (Fri, 04 May 2007) | 1 line remove old cruftiness ........ r55143 | fred.drake | 2007-05-04 21:52:16 -0700 (Fri, 04 May 2007) | 1 line make this work with the new Python ........ r55162 | neal.norwitz | 2007-05-06 22:29:18 -0700 (Sun, 06 May 2007) | 1 line Get asdl code gen working with Python 2.3. Should continue to work with 3.0 ........ r55164 | neal.norwitz | 2007-05-07 00:00:38 -0700 (Mon, 07 May 2007) | 1 line Verify checkins to p3yk (sic) branch go to 3000 list. ........ r55166 | neal.norwitz | 2007-05-07 00:12:35 -0700 (Mon, 07 May 2007) | 1 line Fix this test so it runs again by importing warnings_test properly. ........ r55167 | neal.norwitz | 2007-05-07 01:03:22 -0700 (Mon, 07 May 2007) | 8 lines So long xrange. range() now supports values that are outside -sys.maxint to sys.maxint. floats raise a TypeError. This has been sitting for a long time. It probably has some problems and needs cleanup. Objects/rangeobject.c now uses 4-space indents since it is almost completely new. ........ r55171 | guido.van.rossum | 2007-05-07 10:21:26 -0700 (Mon, 07 May 2007) | 4 lines Fix two tests that were previously depending on significant spaces at the end of a line (and before that on Python 2.x print behavior that has no exact equivalent in 3.0). ........
2007-05-08 06:24:25 +08:00
result = [r() for i in range(n)]
1998-05-11 02:20:05 +08:00
try:
try:
fp = open(fn, "wb")
marshal.dump(result, fp)
fp.close()
fp = None
finally:
if fp:
try:
os.unlink(fn)
except os.error:
pass
except IOError as msg:
print("can't write", fn, ":", msg)
1998-05-11 02:20:05 +08:00
else:
result = marshal.load(fp)
fp.close()
# Shuffle it a bit...
for i in range(10):
i = random.randrange(n)
1998-05-11 02:20:05 +08:00
temp = result[:i]
del result[:i]
temp.reverse()
result.extend(temp)
1998-05-11 02:20:05 +08:00
del temp
assert len(result) == n
1998-05-11 02:20:05 +08:00
return result
def flush():
1998-05-11 02:20:05 +08:00
sys.stdout.flush()
def doit(L):
t0 = time.clock()
L.sort()
t1 = time.clock()
print("%6.2f" % (t1-t0), end=' ')
flush()
1998-05-11 02:20:05 +08:00
def tabulate(r):
1998-05-11 02:27:29 +08:00
"""Tabulate sort speed for lists of various sizes.
The sizes are 2**i for i in r (the argument, a list).
The output displays i, 2**i, and the time to sort arrays of 2**i
floating point numbers with the following properties:
*sort: random data
\sort: descending data
/sort: ascending data
3sort: ascending, then 3 random exchanges
+sort: ascending, then 10 random at the end
%sort: ascending, then randomly replace 1% of the elements w/ random values
1998-05-11 02:27:29 +08:00
~sort: many duplicates
=sort: all equal
!sort: worst case scenario
1998-05-11 02:27:29 +08:00
"""
cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
fmt = ("%2s %7s" + " %6s"*len(cases))
print(fmt % (("i", "2**i") + cases))
1998-05-11 02:20:05 +08:00
for i in r:
n = 1 << i
L = randfloats(n)
print("%2d %7d" % (i, n), end=' ')
flush()
1998-05-11 02:20:05 +08:00
doit(L) # *sort
L.reverse()
doit(L) # \sort
doit(L) # /sort
# Do 3 random exchanges.
for dummy in range(3):
i1 = random.randrange(n)
i2 = random.randrange(n)
L[i1], L[i2] = L[i2], L[i1]
doit(L) # 3sort
# Replace the last 10 with random floats.
if n >= 10:
L[-10:] = [random.random() for dummy in range(10)]
doit(L) # +sort
# Replace 1% of the elements at random.
Merged revisions 55007-55179 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ........ r55077 | guido.van.rossum | 2007-05-02 11:54:37 -0700 (Wed, 02 May 2007) | 2 lines Use the new print syntax, at least. ........ r55142 | fred.drake | 2007-05-04 21:27:30 -0700 (Fri, 04 May 2007) | 1 line remove old cruftiness ........ r55143 | fred.drake | 2007-05-04 21:52:16 -0700 (Fri, 04 May 2007) | 1 line make this work with the new Python ........ r55162 | neal.norwitz | 2007-05-06 22:29:18 -0700 (Sun, 06 May 2007) | 1 line Get asdl code gen working with Python 2.3. Should continue to work with 3.0 ........ r55164 | neal.norwitz | 2007-05-07 00:00:38 -0700 (Mon, 07 May 2007) | 1 line Verify checkins to p3yk (sic) branch go to 3000 list. ........ r55166 | neal.norwitz | 2007-05-07 00:12:35 -0700 (Mon, 07 May 2007) | 1 line Fix this test so it runs again by importing warnings_test properly. ........ r55167 | neal.norwitz | 2007-05-07 01:03:22 -0700 (Mon, 07 May 2007) | 8 lines So long xrange. range() now supports values that are outside -sys.maxint to sys.maxint. floats raise a TypeError. This has been sitting for a long time. It probably has some problems and needs cleanup. Objects/rangeobject.c now uses 4-space indents since it is almost completely new. ........ r55171 | guido.van.rossum | 2007-05-07 10:21:26 -0700 (Mon, 07 May 2007) | 4 lines Fix two tests that were previously depending on significant spaces at the end of a line (and before that on Python 2.x print behavior that has no exact equivalent in 3.0). ........
2007-05-08 06:24:25 +08:00
for dummy in range(n // 100):
L[random.randrange(n)] = random.random()
doit(L) # %sort
# Arrange for lots of duplicates.
1998-05-11 02:20:05 +08:00
if n > 4:
1998-05-12 21:21:31 +08:00
del L[4:]
L = L * (n // 4)
# Force the elements to be distinct objects, else timings can be
# artificially low.
L = list(map(lambda x: --x, L))
1998-05-11 02:20:05 +08:00
doit(L) # ~sort
1998-05-12 21:21:31 +08:00
del L
# All equal. Again, force the elements to be distinct objects.
L = list(map(abs, [-0.5] * n))
doit(L) # =sort
del L
# This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case
# for an older implementation of quicksort, which used the median
# of the first, last and middle elements as the pivot.
half = n // 2
L = list(range(half - 1, -1, -1))
L.extend(range(half))
# Force to float, so that the timings are comparable. This is
# significantly faster if we leave tham as ints.
L = list(map(float, L))
doit(L) # !sort
print()
1998-05-11 02:20:05 +08:00
def main():
1998-05-11 02:27:29 +08:00
"""Main program when invoked as a script.
One argument: tabulate a single row.
Two arguments: tabulate a range (inclusive).
Extra arguments are used to seed the random generator.
"""
1998-05-11 02:20:05 +08:00
# default range (inclusive)
k1 = 15
k2 = 20
1998-05-11 02:20:05 +08:00
if sys.argv[1:]:
1998-05-11 02:27:29 +08:00
# one argument: single point
2001-02-09 19:51:27 +08:00
k1 = k2 = int(sys.argv[1])
1998-05-11 02:20:05 +08:00
if sys.argv[2:]:
1998-05-11 02:27:29 +08:00
# two arguments: specify range
2001-02-09 19:51:27 +08:00
k2 = int(sys.argv[2])
1998-05-11 02:20:05 +08:00
if sys.argv[3:]:
# derive random seed from remaining arguments
x = 1
1998-05-11 02:20:05 +08:00
for a in sys.argv[3:]:
x = 69069 * x + hash(a)
random.seed(x)
1998-05-11 02:27:29 +08:00
r = range(k1, k2+1) # include the end point
1998-05-11 02:20:05 +08:00
tabulate(r)
if __name__ == '__main__':
main()