mirror of
https://github.com/python/cpython.git
synced 2025-01-19 15:05:15 +08:00
613 lines
20 KiB
Python
Executable File
613 lines
20 KiB
Python
Executable File
#
|
|
# Class for profiling python code. rev 1.0 6/2/94
|
|
#
|
|
# Based on prior profile module by Sjoerd Mullender...
|
|
# which was hacked somewhat by: Guido van Rossum
|
|
#
|
|
# See profile.doc for more information
|
|
|
|
|
|
# Copyright 1994, by InfoSeek Corporation, all rights reserved.
|
|
# Written by James Roskind
|
|
#
|
|
# Permission to use, copy, modify, and distribute this Python software
|
|
# and its associated documentation for any purpose (subject to the
|
|
# restriction in the following sentence) without fee is hereby granted,
|
|
# provided that the above copyright notice appears in all copies, and
|
|
# that both that copyright notice and this permission notice appear in
|
|
# supporting documentation, and that the name of InfoSeek not be used in
|
|
# advertising or publicity pertaining to distribution of the software
|
|
# without specific, written prior permission. This permission is
|
|
# explicitly restricted to the copying and modification of the software
|
|
# to remain in Python, compiled Python, or other languages (such as C)
|
|
# wherein the modified or derived code is exclusively imported into a
|
|
# Python module.
|
|
#
|
|
# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
|
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
|
|
# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
|
|
# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
|
|
# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import string
|
|
import marshal
|
|
|
|
|
|
# Global variables
|
|
func_norm_dict = {}
|
|
func_norm_counter = 0
|
|
pid_string = `os.getpid()`
|
|
|
|
|
|
# Optimized intermodule references
|
|
ostimes = os.times
|
|
|
|
|
|
# Sample timer for use with
|
|
#i_count = 0
|
|
#def integer_timer():
|
|
# global i_count
|
|
# i_count = i_count + 1
|
|
# return i_count
|
|
#itimes = integer_timer # replace with C coded timer returning integers
|
|
|
|
#**************************************************************************
|
|
# The following are the static member functions for the profiler class
|
|
# Note that an instance of Profile() is *not* needed to call them.
|
|
#**************************************************************************
|
|
|
|
|
|
# simplified user interface
|
|
def run(statement, *args):
|
|
prof = Profile()
|
|
try:
|
|
prof = prof.run(statement)
|
|
except SystemExit:
|
|
pass
|
|
if args:
|
|
prof.dump_stats(args[0])
|
|
else:
|
|
return prof.print_stats()
|
|
|
|
# print help
|
|
def help():
|
|
for dirname in sys.path:
|
|
fullname = os.path.join(dirname, 'profile.doc')
|
|
if os.path.exists(fullname):
|
|
sts = os.system('${PAGER-more} '+fullname)
|
|
if sts: print '*** Pager exit status:', sts
|
|
break
|
|
else:
|
|
print 'Sorry, can\'t find the help file "profile.doc"',
|
|
print 'along the Python search path'
|
|
|
|
|
|
#**************************************************************************
|
|
# class Profile documentation:
|
|
#**************************************************************************
|
|
# self.cur is always a tuple. Each such tuple corresponds to a stack
|
|
# frame that is currently active (self.cur[-2]). The following are the
|
|
# definitions of its members. We use this external "parallel stack" to
|
|
# avoid contaminating the program that we are profiling. (old profiler
|
|
# used to write into the frames local dictionary!!) Derived classes
|
|
# can change the definition of some entries, as long as they leave
|
|
# [-2:] intact.
|
|
#
|
|
# [ 0] = Time that needs to be charged to the parent frame's function. It is
|
|
# used so that a function call will not have to access the timing data
|
|
# for the parents frame.
|
|
# [ 1] = Total time spent in this frame's function, excluding time in
|
|
# subfunctions
|
|
# [ 2] = Cumulative time spent in this frame's function, including time in
|
|
# all subfunctions to this frame.
|
|
# [-3] = Name of the function that corresonds to this frame.
|
|
# [-2] = Actual frame that we correspond to (used to sync exception handling)
|
|
# [-1] = Our parent 6-tuple (corresonds to frame.f_back)
|
|
#**************************************************************************
|
|
# Timing data for each function is stored as a 5-tuple in the dictionary
|
|
# self.timings[]. The index is always the name stored in self.cur[4].
|
|
# The following are the definitions of the members:
|
|
#
|
|
# [0] = The number of times this function was called, not counting direct
|
|
# or indirect recursion,
|
|
# [1] = Number of times this function appears on the stack, minus one
|
|
# [2] = Total time spent internal to this function
|
|
# [3] = Cumulative time that this function was present on the stack. In
|
|
# non-recursive functions, this is the total execution time from start
|
|
# to finish of each invocation of a function, including time spent in
|
|
# all subfunctions.
|
|
# [5] = A dictionary indicating for each function name, the number of times
|
|
# it was called by us.
|
|
#**************************************************************************
|
|
# We produce function names via a repr() call on the f_code object during
|
|
# profiling. This save a *lot* of CPU time. This results in a string that
|
|
# always looks like:
|
|
# <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76>
|
|
# After we "normalize it, it is a tuple of filename, line, function-name.
|
|
# We wait till we are done profiling to do the normalization.
|
|
# *IF* this repr format changes, then only the normalization routine should
|
|
# need to be fixed.
|
|
#**************************************************************************
|
|
class Profile:
|
|
|
|
def __init__(self, *arg):
|
|
self.timings = {}
|
|
self.cur = None
|
|
self.cmd = ""
|
|
|
|
self.dispatch = { \
|
|
'call' : self.trace_dispatch_call, \
|
|
'return' : self.trace_dispatch_return, \
|
|
'exception': self.trace_dispatch_exception, \
|
|
}
|
|
|
|
if not arg:
|
|
self.timer = os.times
|
|
self.dispatcher = self.trace_dispatch
|
|
else:
|
|
self.timer = arg[0]
|
|
t = self.timer() # test out timer function
|
|
try:
|
|
if len(t) == 2:
|
|
self.dispatcher = self.trace_dispatch
|
|
else:
|
|
self.dispatcher = self.trace_dispatch_r
|
|
except:
|
|
self.dispatcher = self.trace_dispatch_i
|
|
self.t = self.get_time()
|
|
self.simulate_call('profiler')
|
|
|
|
|
|
def get_time(self): # slow simulation of method to acquire time
|
|
t = self.timer()
|
|
if type(t) == type(()) or type(t) == type([]):
|
|
t = reduce(lambda x,y: x+y, t, 0)
|
|
return t
|
|
|
|
|
|
# Heavily optimized dispatch routine for os.times() timer
|
|
|
|
def trace_dispatch(self, frame, event, arg):
|
|
t = self.timer()
|
|
t = t[0] + t[1] - self.t # No Calibration constant
|
|
# t = t[0] + t[1] - self.t - .00053 # Calibration constant
|
|
|
|
if self.dispatch[event](frame,t):
|
|
t = self.timer()
|
|
self.t = t[0] + t[1]
|
|
else:
|
|
r = self.timer()
|
|
self.t = r[0] + r[1] - t # put back unrecorded delta
|
|
return
|
|
|
|
|
|
|
|
# Dispatch routine for best timer program (return = scalar integer)
|
|
|
|
def trace_dispatch_i(self, frame, event, arg):
|
|
t = self.timer() - self.t # - 1 # Integer calibration constant
|
|
if self.dispatch[event](frame,t):
|
|
self.t = self.timer()
|
|
else:
|
|
self.t = self.timer() - t # put back unrecorded delta
|
|
return
|
|
|
|
|
|
# SLOW generic dispatch rountine for timer returning lists of numbers
|
|
|
|
def trace_dispatch_l(self, frame, event, arg):
|
|
t = self.get_time() - self.t
|
|
|
|
if self.dispatch[event](frame,t):
|
|
self.t = self.get_time()
|
|
else:
|
|
self.t = self.get_time()-t # put back unrecorded delta
|
|
return
|
|
|
|
|
|
def trace_dispatch_exception(self, frame, t):
|
|
rt, rtt, rct, rfn, rframe, rcur = self.cur
|
|
if (not rframe is frame) and rcur:
|
|
return self.trace_dispatch_return(rframe, t)
|
|
return 0
|
|
|
|
|
|
def trace_dispatch_call(self, frame, t):
|
|
fn = `frame.f_code`
|
|
|
|
# The following should be about the best approach, but
|
|
# we would need a function that maps from id() back to
|
|
# the actual code object.
|
|
# fn = id(frame.f_code)
|
|
# Note we would really use our own function, which would
|
|
# return the code address, *and* bump the ref count. We
|
|
# would then fix up the normalize function to do the
|
|
# actualy repr(fn) call.
|
|
|
|
# The following is an interesting alternative
|
|
# It doesn't do as good a job, and it doesn't run as
|
|
# fast 'cause repr() is written in C, and this is Python.
|
|
#fcode = frame.f_code
|
|
#code = fcode.co_code
|
|
#if ord(code[0]) == 127: # == SET_LINENO
|
|
# # see "opcode.h" in the Python source
|
|
# fn = (fcode.co_filename, ord(code[1]) | \
|
|
# ord(code[2]) << 8, fcode.co_name)
|
|
#else:
|
|
# fn = (fcode.co_filename, 0, fcode.co_name)
|
|
|
|
self.cur = (t, 0, 0, fn, frame, self.cur)
|
|
if self.timings.has_key(fn):
|
|
cc, ns, tt, ct, callers = self.timings[fn]
|
|
self.timings[fn] = cc, ns + 1, tt, ct, callers
|
|
else:
|
|
self.timings[fn] = 0, 0, 0, 0, {}
|
|
return 1
|
|
|
|
def trace_dispatch_return(self, frame, t):
|
|
# if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
|
|
|
|
# Prefix "r" means part of the Returning or exiting frame
|
|
# Prefix "p" means part of the Previous or older frame
|
|
|
|
rt, rtt, rct, rfn, frame, rcur = self.cur
|
|
rtt = rtt + t
|
|
sft = rtt + rct
|
|
|
|
pt, ptt, pct, pfn, pframe, pcur = rcur
|
|
self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
|
|
|
|
cc, ns, tt, ct, callers = self.timings[rfn]
|
|
if not ns:
|
|
ct = ct + sft
|
|
cc = cc + 1
|
|
if callers.has_key(pfn):
|
|
callers[pfn] = callers[pfn] + 1 # hack: gather more
|
|
# stats such as the amount of time added to ct courtesy
|
|
# of this specific call, and the contribution to cc
|
|
# courtesy of this call.
|
|
else:
|
|
callers[pfn] = 1
|
|
self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
|
|
|
|
return 1
|
|
|
|
# The next few function play with self.cmd. By carefully preloading
|
|
# our paralell stack, we can force the profiled result to include
|
|
# an arbitrary string as the name of the calling function.
|
|
# We use self.cmd as that string, and the resulting stats look
|
|
# very nice :-).
|
|
|
|
def set_cmd(self, cmd):
|
|
if self.cur[-1]: return # already set
|
|
self.cmd = cmd
|
|
self.simulate_call(cmd)
|
|
|
|
class fake_code:
|
|
def __init__(self, filename, line, name):
|
|
self.co_filename = filename
|
|
self.co_line = line
|
|
self.co_name = name
|
|
self.co_code = '\0' # anything but 127
|
|
|
|
def __repr__(self):
|
|
return (self.co_filename, self.co_line, self.co_name)
|
|
|
|
class fake_frame:
|
|
def __init__(self, code, prior):
|
|
self.f_code = code
|
|
self.f_back = prior
|
|
|
|
def simulate_call(self, name):
|
|
code = self.fake_code('profile', 0, name)
|
|
if self.cur:
|
|
pframe = self.cur[-2]
|
|
else:
|
|
pframe = None
|
|
frame = self.fake_frame(code, pframe)
|
|
a = self.dispatch['call'](frame, 0)
|
|
return
|
|
|
|
# collect stats from pending stack, including getting final
|
|
# timings for self.cmd frame.
|
|
|
|
def simulate_cmd_complete(self):
|
|
t = self.get_time() - self.t
|
|
while self.cur[-1]:
|
|
# We *can* cause assertion errors here if
|
|
# dispatch_trace_return checks for a frame match!
|
|
a = self.dispatch['return'](self.cur[-2], t)
|
|
t = 0
|
|
self.t = self.get_time() - t
|
|
|
|
|
|
def print_stats(self):
|
|
import pstats
|
|
pstats.Stats(self).strip_dirs().sort_stats(-1). \
|
|
print_stats()
|
|
|
|
def dump_stats(self, file):
|
|
f = open(file, 'w')
|
|
self.create_stats()
|
|
marshal.dump(self.stats, f)
|
|
f.close()
|
|
|
|
def create_stats(self):
|
|
self.simulate_cmd_complete()
|
|
self.snapshot_stats()
|
|
|
|
def snapshot_stats(self):
|
|
self.stats = {}
|
|
for func in self.timings.keys():
|
|
cc, ns, tt, ct, callers = self.timings[func]
|
|
nor_func = self.func_normalize(func)
|
|
nor_callers = {}
|
|
nc = 0
|
|
for func_caller in callers.keys():
|
|
nor_callers[self.func_normalize(func_caller)]=\
|
|
callers[func_caller]
|
|
nc = nc + callers[func_caller]
|
|
self.stats[nor_func] = cc, nc, tt, ct, nor_callers
|
|
|
|
|
|
# Override the following function if you can figure out
|
|
# a better name for the binary f_code entries. I just normalize
|
|
# them sequentially in a dictionary. It would be nice if we could
|
|
# *really* see the name of the underlying C code :-). Sometimes
|
|
# you can figure out what-is-what by looking at caller and callee
|
|
# lists (and knowing what your python code does).
|
|
|
|
def func_normalize(self, func_name):
|
|
global func_norm_dict
|
|
global func_norm_counter
|
|
global func_sequence_num
|
|
|
|
if func_norm_dict.has_key(func_name):
|
|
return func_norm_dict[func_name]
|
|
if type(func_name) == type(""):
|
|
long_name = string.split(func_name)
|
|
file_name = long_name[6][1:-2]
|
|
func = long_name[2]
|
|
lineno = long_name[8][:-1]
|
|
if '?' == func: # Until I find out how to may 'em...
|
|
file_name = 'python'
|
|
func_norm_counter = func_norm_counter + 1
|
|
func = pid_string + ".C." + `func_norm_counter`
|
|
result = file_name , string.atoi(lineno) , func
|
|
else:
|
|
result = func_name
|
|
func_norm_dict[func_name] = result
|
|
return result
|
|
|
|
|
|
# The following two methods can be called by clients to use
|
|
# a profiler to profile a statement, given as a string.
|
|
|
|
def run(self, cmd):
|
|
import __main__
|
|
dict = __main__.__dict__
|
|
self.runctx(cmd, dict, dict)
|
|
return self
|
|
|
|
def runctx(self, cmd, globals, locals):
|
|
self.set_cmd(cmd)
|
|
sys.setprofile(self.trace_dispatch)
|
|
try:
|
|
exec cmd in globals, locals
|
|
finally:
|
|
sys.setprofile(None)
|
|
|
|
# This method is more useful to profile a single function call.
|
|
def runcall(self, func, *args):
|
|
self.set_cmd(`func`)
|
|
sys.setprofile(self.trace_dispatch)
|
|
try:
|
|
apply(func, args)
|
|
finally:
|
|
sys.setprofile(None)
|
|
return self
|
|
|
|
|
|
#******************************************************************
|
|
# The following calculates the overhead for using a profiler. The
|
|
# problem is that it takes a fair amount of time for the profiler
|
|
# to stop the stopwatch (from the time it recieves an event).
|
|
# Similarly, there is a delay from the time that the profiler
|
|
# re-starts the stopwatch before the user's code really gets to
|
|
# continue. The following code tries to measure the difference on
|
|
# a per-event basis. The result can the be placed in the
|
|
# Profile.dispatch_event() routine for the given platform. Note
|
|
# that this difference is only significant if there are a lot of
|
|
# events, and relatively little user code per event. For example,
|
|
# code with small functions will typically benefit from having the
|
|
# profiler calibrated for the current platform. This *could* be
|
|
# done on the fly during init() time, but it is not worth the
|
|
# effort. Also note that if too large a value specified, then
|
|
# execution time on some functions will actually appear as a
|
|
# negative number. It is *normal* for some functions (with very
|
|
# low call counts) to have such negative stats, even if the
|
|
# calibration figure is "correct."
|
|
#
|
|
# One alternative to profile-time calibration adjustments (i.e.,
|
|
# adding in the magic little delta during each event) is to track
|
|
# more carefully the number of events (and cumulatively, the number
|
|
# of events during sub functions) that are seen. If this were
|
|
# done, then the arithmetic could be done after the fact (i.e., at
|
|
# display time). Currintly, we track only call/return events.
|
|
# These values can be deduced by examining the callees and callers
|
|
# vectors for each functions. Hence we *can* almost correct the
|
|
# internal time figure at print time (note that we currently don't
|
|
# track exception event processing counts). Unfortunately, there
|
|
# is currently no similar information for cumulative sub-function
|
|
# time. It would not be hard to "get all this info" at profiler
|
|
# time. Specifically, we would have to extend the tuples to keep
|
|
# counts of this in each frame, and then extend the defs of timing
|
|
# tuples to include the significant two figures. I'm a bit fearful
|
|
# that this additional feature will slow the heavily optimized
|
|
# event/time ratio (i.e., the profiler would run slower, fur a very
|
|
# low "value added" feature.)
|
|
#
|
|
# Plugging in the calibration constant doesn't slow down the
|
|
# profiler very much, and the accuracy goes way up.
|
|
#**************************************************************
|
|
|
|
def calibrate(self, m):
|
|
n = m
|
|
s = self.timer()
|
|
while n:
|
|
self.simple()
|
|
n = n - 1
|
|
f = self.timer()
|
|
my_simple = f[0]+f[1]-s[0]-s[1]
|
|
#print "Simple =", my_simple,
|
|
|
|
n = m
|
|
s = self.timer()
|
|
while n:
|
|
self.instrumented()
|
|
n = n - 1
|
|
f = self.timer()
|
|
my_inst = f[0]+f[1]-s[0]-s[1]
|
|
# print "Instrumented =", my_inst
|
|
avg_cost = (my_inst - my_simple)/m
|
|
#print "Delta/call =", avg_cost, "(profiler fixup constant)"
|
|
return avg_cost
|
|
|
|
# simulate a program with no profiler activity
|
|
def simple(self):
|
|
a = 1
|
|
pass
|
|
|
|
# simulate a program with call/return event processing
|
|
def instrumented(self):
|
|
a = 1
|
|
self.profiler_simulation(a, a, a)
|
|
|
|
# simulate an event processing activity (from user's perspective)
|
|
def profiler_simulation(self, x, y, z):
|
|
t = self.timer()
|
|
t = t[0] + t[1]
|
|
self.ut = t
|
|
|
|
|
|
|
|
#****************************************************************************
|
|
# OldProfile class documentation
|
|
#****************************************************************************
|
|
#
|
|
# The following derived profiler simulates the old style profile, providing
|
|
# errant results on recursive functions. The reason for the usefulnes of this
|
|
# profiler is that it runs faster (i.e., less overhead). It still creates
|
|
# all the caller stats, and is quite useful when there is *no* recursion
|
|
# in the user's code.
|
|
#
|
|
# This code also shows how easy it is to create a modified profiler.
|
|
#****************************************************************************
|
|
class OldProfile(Profile):
|
|
def trace_dispatch_exception(self, frame, t):
|
|
rt, rtt, rct, rfn, rframe, rcur = self.cur
|
|
if rcur and not rframe is frame:
|
|
return self.trace_dispatch_return(rframe, t)
|
|
return 0
|
|
|
|
def trace_dispatch_call(self, frame, t):
|
|
fn = `frame.f_code`
|
|
|
|
self.cur = (t, 0, 0, fn, frame, self.cur)
|
|
if self.timings.has_key(fn):
|
|
tt, ct, callers = self.timings[fn]
|
|
self.timings[fn] = tt, ct, callers
|
|
else:
|
|
self.timings[fn] = 0, 0, {}
|
|
return 1
|
|
|
|
def trace_dispatch_return(self, frame, t):
|
|
rt, rtt, rct, rfn, frame, rcur = self.cur
|
|
rtt = rtt + t
|
|
sft = rtt + rct
|
|
|
|
pt, ptt, pct, pfn, pframe, pcur = rcur
|
|
self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
|
|
|
|
tt, ct, callers = self.timings[rfn]
|
|
if callers.has_key(pfn):
|
|
callers[pfn] = callers[pfn] + 1
|
|
else:
|
|
callers[pfn] = 1
|
|
self.timings[rfn] = tt+rtt, ct + sft, callers
|
|
|
|
return 1
|
|
|
|
|
|
def snapshot_stats(self):
|
|
self.stats = {}
|
|
for func in self.timings.keys():
|
|
tt, ct, callers = self.timings[func]
|
|
nor_func = self.func_normalize(func)
|
|
nor_callers = {}
|
|
nc = 0
|
|
for func_caller in callers.keys():
|
|
nor_callers[self.func_normalize(func_caller)]=\
|
|
callers[func_caller]
|
|
nc = nc + callers[func_caller]
|
|
self.stats[nor_func] = nc, nc, tt, ct, nor_callers
|
|
|
|
|
|
|
|
#****************************************************************************
|
|
# HotProfile class documentation
|
|
#****************************************************************************
|
|
#
|
|
# This profiler is the fastest derived profile example. It does not
|
|
# calculate caller-callee relationships, and does not calculate cumulative
|
|
# time under a function. It only calculates time spent in a function, so
|
|
# it runs very quickly (re: very low overhead)
|
|
#****************************************************************************
|
|
class HotProfile(Profile):
|
|
def trace_dispatch_exception(self, frame, t):
|
|
rt, rtt, rfn, rframe, rcur = self.cur
|
|
if rcur and not rframe is frame:
|
|
return self.trace_dispatch_return(rframe, t)
|
|
return 0
|
|
|
|
def trace_dispatch_call(self, frame, t):
|
|
self.cur = (t, 0, frame, self.cur)
|
|
return 1
|
|
|
|
def trace_dispatch_return(self, frame, t):
|
|
rt, rtt, frame, rcur = self.cur
|
|
|
|
rfn = `frame.f_code`
|
|
|
|
pt, ptt, pframe, pcur = rcur
|
|
self.cur = pt, ptt+rt, pframe, pcur
|
|
|
|
if self.timings.has_key(rfn):
|
|
nc, tt = self.timings[rfn]
|
|
self.timings[rfn] = nc + 1, rt + rtt + tt
|
|
else:
|
|
self.timings[rfn] = 1, rt + rtt
|
|
|
|
return 1
|
|
|
|
|
|
def snapshot_stats(self):
|
|
self.stats = {}
|
|
for func in self.timings.keys():
|
|
nc, tt = self.timings[func]
|
|
nor_func = self.func_normalize(func)
|
|
self.stats[nor_func] = nc, nc, tt, 0, {}
|
|
|
|
|
|
|
|
#****************************************************************************
|
|
def Stats(*args):
|
|
print 'Report generating functions are in the "pstats" module\a'
|