# After we "normalize it, it is a tuple of filename, line, function-name. # We wait till we are done profiling to do the normalization. # *IF* this repr format changes, then only the normalization routine should # need to be fixed. #************************************************************************** class Profile: def __init__(self, *arg): self.timings = {} self.cur = None self.cmd = "" self.dispatch = { \ 'call' : self.trace_dispatch_call, \ 'return' : self.trace_dispatch_return, \ 'exception': self.trace_dispatch_exception, \ } if not arg: self.timer = os.times self.dispatcher = self.trace_dispatch else: self.timer = arg[0] t = self.timer() # test out timer function try: if len(t) == 2: self.dispatcher = self.trace_dispatch else: self.dispatcher = self.trace_dispatch_r except: self.dispatcher = self.trace_dispatch_i self.t = self.get_time() self.simulate_call('profiler') def get_time(self): # slow simulation of method to acquire time t = self.timer() if type(t) == type(()) or type(t) == type([]): t = reduce(lambda x,y: x+y, t, 0) return t # Heavily optimized dispatch routine for os.times() timer def trace_dispatch(self, frame, event, arg): t = self.timer() t = t[0] + t[1] - self.t # No Calibration constant # t = t[0] + t[1] - self.t - .00053 # Calibration constant if self.dispatch[event](frame,t): t = self.timer() self.t = t[0] + t[1] else: r = self.timer() self.t = r[0] + r[1] - t # put back unrecorded delta return # Dispatch routine for best timer program (return = scalar integer) def trace_dispatch_i(self, frame, event, arg): t = self.timer() - self.t # - 1 # Integer calibration constant if self.dispatch[event](frame,t): self.t = self.timer() else: self.t = self.timer() - t # put back unrecorded delta return # SLOW generic dispatch rountine for timer returning lists of numbers def trace_dispatch_l(self, frame, event, arg): t = self.get_time() - self.t if self.dispatch[event](frame,t): self.t = self.get_time() else: self.t = self.get_time()-t # put back unrecorded delta return def trace_dispatch_exception(self, frame, t): rt, rtt, rct, rfn, rframe, rcur = self.cur if (not rframe is frame) and rcur: return self.trace_dispatch_return(rframe, t) return 0 def trace_dispatch_call(self, frame, t): fn = `frame.f_code` # The following should be about the best approach, but # we would need a function that maps from id() back to # the actual code object. # fn = id(frame.f_code) # Note we would really use our own function, which would # return the code address, *and* bump the ref count. We # would then fix up the normalize function to do the # actualy repr(fn) call. # The following is an interesting alternative # It doesn't do as good a job, and it doesn't run as # fast 'cause repr() is written in C, and this is Python. #fcode = frame.f_code #code = fcode.co_code #if ord(code[0]) == 127: # == SET_LINENO # # see "opcode.h" in the Python source # fn = (fcode.co_filename, ord(code[1]) | \ # ord(code[2]) << 8, fcode.co_name) #else: # fn = (fcode.co_filename, 0, fcode.co_name) self.cur = (t, 0, 0, fn, frame, self.cur) if self.timings.has_key(fn): cc, ns, tt, ct, callers = self.timings[fn] self.timings[fn] = cc, ns + 1, tt, ct, callers else: self.timings[fn] = 0, 0, 0, 0, {} return 1 def trace_dispatch_return(self, frame, t): # if not frame is self.cur[-2]: raise "Bad return", self.cur[3] # Prefix "r" means part of the Returning or exiting frame # Prefix "p" means part of the Previous or older frame rt, rtt, rct, rfn, frame, rcur = self.cur rtt = rtt + t sft = rtt + rct pt, ptt, pct, pfn, pframe, pcur = rcur self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur cc, ns, tt, ct, callers = self.timings[rfn] if not ns: ct = ct + sft cc = cc + 1 if callers.has_key(pfn): callers[pfn] = callers[pfn] + 1 # hack: gather more # stats such as the amount of time added to ct courtesy # of this specific call, and the contribution to cc # courtesy of this call. else: callers[pfn] = 1 self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers return 1 # The next few function play with self.cmd. By carefully preloading # our paralell stack, we can force the profiled result to include # an arbitrary string as the name of the calling function. # We use self.cmd as that string, and the resulting stats look # very nice :-). def set_cmd(self, cmd): if self.cur[-1]: return # already set self.cmd = cmd self.simulate_call(cmd) class fake_code: def __init__(self, filename, line, name): self.co_filename = filename self.co_line = line self.co_name = name self.co_code = '\0' # anything but 127 def __repr__(self): return (self.co_filename, self.co_line, self.co_name) class fake_frame: def __init__(self, code, prior): self.f_code = code self.f_back = prior def simulate_call(self, name): code = self.fake_code('profile', 0, name) if self.cur: pframe = self.cur[-2] else: pframe = None frame = self.fake_frame(code, pframe) a = self.dispatch['call'](frame, 0) return # collect stats from pending stack, including getting final # timings for self.cmd frame. def simulate_cmd_complete(self): t = self.get_time() - self.t while self.cur[-1]: # We *can* cause assertion errors here if # dispatch_trace_return checks for a frame match! a = self.dispatch['return'](self.cur[-2], t) t = 0 self.t = self.get_time() - t def print_stats(self): import pstats pstats.Stats(self).strip_dirs().sort_stats(-1). \ print_stats() def dump_stats(self, file): f = open(file, 'w') self.create_stats() marshal.dump(self.stats, f) f.close() def create_stats(self): self.simulate_cmd_complete() self.snapshot_stats() def snapshot_stats(self): self.stats = {} for func in self.timings.keys(): cc, ns, tt, ct, callers = self.timings[func] nor_func = self.func_normalize(func) nor_callers = {} nc = 0 for func_caller in callers.keys(): nor_callers[self.func_normalize(func_caller)]=\ callers[func_caller] nc = nc + callers[func_caller] self.stats[nor_func] = cc, nc, tt, ct, nor_callers # Override the following function if you can figure out # a better name for the binary f_code entries. I just normalize # them sequentially in a dictionary. It would be nice if we could # *really* see the name of the underlying C code :-). Sometimes # you can figure out what-is-what by looking at caller and callee # lists (and knowing what your python code does). def func_normalize(self, func_name): global func_norm_dict global func_norm_counter global func_sequence_num if func_norm_dict.has_key(func_name): return func_norm_dict[func_name] if type(func_name) == type(""): long_name = string.split(func_name) file_name = long_name[6][1:-2] func = long_name[2] lineno = long_name[8][:-1] if '?' == func: # Until I find out how to may 'em... file_name = 'python' func_norm_counter = func_norm_counter + 1 func = pid_string + ".C." + `func_norm_counter` result = file_name , string.atoi(lineno) , func else: result = func_name func_norm_dict[func_name] = result return result # The following two methods can be called by clients to use # a profiler to profile a statement, given as a string. def run(self, cmd): import __main__ dict = __main__.__dict__ self.runctx(cmd, dict, dict) return self def runctx(self, cmd, globals, locals): self.set_cmd(cmd) sys.setprofile(self.trace_dispatch) try: exec(cmd, globals, locals) finally: sys.setprofile(None) # This method is more useful to profile a single function call. def runcall(self, func, *args): self.set_cmd(func.__name__) sys.setprofile(self.trace_dispatch) try: apply(func, args) finally: sys.setprofile(None) return self #****************************************************************** # The following calculates the overhead for using a profiler. The # problem is that it takes a fair amount of time for the profiler # to stop the stopwatch (from the time it recieves an event). # Similarly, there is a delay from the time that the profiler # re-starts the stopwatch before the user's code really gets to # continue. The following code tries to measure the difference on # a per-event basis. The result can the be placed in the # Profile.dispatch_event() routine for the given platform. Note # that this difference is only significant if there are a lot of # events, and relatively little user code per event. For example, # code with small functions will typically benefit from having the # profiler calibrated for the current platform. This *could* be # done on the fly during init() time, but it is not worth the # effort. Also note that if too large a value specified, then # execution time on some functions will actually appear as a # negative number. It is *normal* for some functions (with very # low call counts) to have such negative stats, even if the # calibration figure is "correct." # # One alternative to profile-time calibration adjustments (i.e., # adding in the magic little delta during each event) is to track # more carefully the number of events (and cumulatively, the number # of events during sub functions) that are seen. If this were # done, then the arithmetic could be done after the fact (i.e., at # display time). Currintly, we track only call/return events. # These values can be deduced by examining the callees and callers # vectors for each functions. Hence we *can* almost correct the # internal time figure at print time (note that we currently don't # track exception event processing counts). Unfortunately, there # is currently no similar information for cumulative sub-function # time. It would not be hard to "get all this info" at profiler # time. Specifically, we would have to extend the tuples to keep # counts of this in each frame, and then extend the defs of timing # tuples to include the significant two figures. I'm a bit fearful # that this additional feature will slow the heavily optimized # event/time ratio (i.e., the profiler would run slower, fur a very # low "value added" feature.) # # Plugging in the calibration constant doesn't slow down the # profiler very much, and the accuracy goes way up. #************************************************************** def calibrate(self, m): n = m s = self.timer() while n: self.simple() n = n - 1 f = self.timer() my_simple = f[0]+f[1]-s[0]-s[1] #print "Simple =", my_simple, n = m s = self.timer() while n: self.instrumented() n = n - 1 f = self.timer() my_inst = f[0]+f[1]-s[0]-s[1] # print "Instrumented =", my_inst avg_cost = (my_inst - my_simple)/m #print "Delta/call =", avg_cost, "(profiler fixup constant)" return avg_cost # simulate a program with no profiler activity def simple(self): a = 1 pass # simulate a program with call/return event processing def instrumented(self): a = 1 self.profiler_simulation(a, a, a) # simulate an event processing activity (from user's perspective) def profiler_simulation(self, x, y, z): t = self.timer() t = t[0] + t[1] self.ut = t #**************************************************************************** # OldProfile class documentation #**************************************************************************** # # The following derived profiler simulates the old style profile, providing # errant results on recursive functions. The reason for the usefulnes of this # profiler is that it runs faster (i.e., less overhead). It still creates # all the caller stats, and is quite useful when there is *no* recursion # in the user's code. # # This code also shows how easy it is to create a modified profiler. #**************************************************************************** class OldProfile(Profile): def trace_dispatch_exception(self, frame, t): rt, rtt, rct, rfn, rframe, rcur = self.cur if rcur and not rframe is frame: return self.trace_dispatch_return(rframe, t) return 0 def trace_dispatch_call(self, frame, t): fn = `frame.f_code` self.cur = (t, 0, 0, fn, frame, self.cur) if self.timings.has_key(fn): tt, ct, callers = self.timings[fn] self.timings[fn] = tt, ct, callers else: self.timings[fn] = 0, 0, {} return 1 def trace_dispatch_return(self, frame, t): rt, rtt, rct, rfn, frame, rcur = self.cur rtt = rtt + t sft = rtt + rct pt, ptt, pct, pfn, pframe, pcur = rcur self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur tt, ct, callers = self.timings[rfn] if callers.has_key(pfn): callers[pfn] = callers[pfn] + 1 else: callers[pfn] = 1 self.timings[rfn] = tt+rtt, ct + sft, callers return 1 def snapshot_stats(self): self.stats = {} for func in self.timings.keys(): tt, ct, callers = self.timings[func] nor_func = self.func_normalize(func) nor_callers = {} nc = 0 for func_caller in callers.keys(): nor_callers[self.func_normalize(func_caller)]=\ callers[func_caller] nc = nc + callers[func_caller] self.stats[nor_func] = nc, nc, tt, ct, nor_callers #**************************************************************************** # HotProfile class documentation #**************************************************************************** # # This profiler is the fastest derived profile example. It does not # calculate caller-callee relationships, and does not calculate cumulative # time under a function. It only calculates time spent in a function, so # it runs very quickly (re: very low overhead) #**************************************************************************** class HotProfile(Profile): def trace_dispatch_exception(self, frame, t): rt, rtt, rfn, rframe, rcur = self.cur if rcur and not rframe is frame: return self.trace_dispatch_return(rframe, t) return 0 def trace_dispatch_call(self, frame, t): self.cur = (t, 0, frame, self.cur) return 1 def trace_dispatch_return(self, frame, t): rt, rtt, frame, rcur = self.cur rfn = `frame.f_code` pt, ptt, pframe, pcur = rcur self.cur = pt, ptt+rt, pframe, pcur if self.timings.has_key(rfn): nc, tt = self.timings[rfn] self.timings[rfn] = nc + 1, rt + rtt + tt else: self.timings[rfn] = 1, rt + rtt return 1 def snapshot_stats(self): self.stats = {} for func in self.timings.keys(): nc, tt = self.timings[func] nor_func = self.func_normalize(func) self.stats[nor_func] = nc, nc, tt, 0, {} #**************************************************************************** def Stats(*args): print 'Report generating functions are in the "pstats" module\a'
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4