123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440 |
- from __future__ import print_function
- import os
- import sys
- import csv
- from pprint import pprint
- from collections import deque
- try:
- long # Python 2
- except NameError:
- long = int # Python 3
- # trace.hpp
- REGION_FLAG_IMPL_MASK = 15 << 16
- REGION_FLAG_IMPL_IPP = 1 << 16
- REGION_FLAG_IMPL_OPENCL = 2 << 16
- DEBUG = False
- if DEBUG:
- dprint = print
- dpprint = pprint
- else:
- def dprint(args, **kwargs):
- pass
- def dpprint(args, **kwargs):
- pass
- def tryNum(s):
- if s.startswith('0x'):
- try:
- return int(s, 16)
- except ValueError:
- pass
- try:
- return int(s)
- except ValueError:
- pass
- if sys.version_info[0] < 3:
- try:
- return long(s)
- except ValueError:
- pass
- return s
- def formatTimestamp(t):
- return "%.3f" % (t * 1e-6)
- try:
- from statistics import median
- except ImportError:
- def median(lst):
- sortedLst = sorted(lst)
- lstLen = len(lst)
- index = (lstLen - 1) // 2
- if (lstLen % 2):
- return sortedLst[index]
- else:
- return (sortedLst[index] + sortedLst[index + 1]) * 0.5
- def getCXXFunctionName(spec):
- def dropParams(spec):
- pos = len(spec) - 1
- depth = 0
- while pos >= 0:
- if spec[pos] == ')':
- depth = depth + 1
- elif spec[pos] == '(':
- depth = depth - 1
- if depth == 0:
- if pos == 0 or spec[pos - 1] in ['#', ':']:
- res = dropParams(spec[pos+1:-1])
- return (spec[:pos] + res[0], res[1])
- return (spec[:pos], spec[pos:])
- pos = pos - 1
- return (spec, '')
- def extractName(spec):
- pos = len(spec) - 1
- inName = False
- while pos >= 0:
- if spec[pos] == ' ':
- if inName:
- return spec[pos+1:]
- elif spec[pos].isalnum():
- inName = True
- pos = pos - 1
- return spec
- if spec.startswith('IPP') or spec.startswith('OpenCL'):
- prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
- prefix = spec[:prefix_size]
- if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
- prefix = prefix + spec[prefix_size]
- prefix_size = prefix_size + 1
- begin = prefix_size
- while begin < len(spec):
- if spec[begin].isalnum() or spec[begin] in ['_', ':']:
- break
- begin = begin + 1
- if begin == len(spec):
- return spec
- end = begin
- while end < len(spec):
- if not (spec[end].isalnum() or spec[end] in ['_', ':']):
- break
- end = end + 1
- return prefix + spec[begin:end]
- spec = spec.replace(') const', ')') # const methods
- (ret_type_name, params) = dropParams(spec)
- name = extractName(ret_type_name)
- if 'operator' in name:
- return name + params
- if name.startswith('&'):
- return name[1:]
- return name
- stack_size = 10
- class Trace:
- def __init__(self, filename=None):
- self.tasks = {}
- self.tasks_list = []
- self.locations = {}
- self.threads_stack = {}
- self.pending_files = deque()
- if filename:
- self.load(filename)
- class TraceTask:
- def __init__(self, threadID, taskID, locationID, beginTimestamp):
- self.threadID = threadID
- self.taskID = taskID
- self.locationID = locationID
- self.beginTimestamp = beginTimestamp
- self.endTimestamp = None
- self.parentTaskID = None
- self.parentThreadID = None
- self.childTask = []
- self.selfTimeIPP = 0
- self.selfTimeOpenCL = 0
- self.totalTimeIPP = 0
- self.totalTimeOpenCL = 0
- def __repr__(self):
- return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
- self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
- self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
- class TraceLocation:
- def __init__(self, locationID, filename, line, name, flags):
- self.locationID = locationID
- self.filename = os.path.split(filename)[1]
- self.line = line
- self.name = getCXXFunctionName(name)
- self.flags = flags
- def __str__(self):
- return "{}#{}:{}".format(self.name, self.filename, self.line)
- def __repr__(self):
- return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
- def parse_file(self, filename):
- dprint("Process file: '{}'".format(filename))
- with open(filename) as infile:
- for line in infile:
- line = str(line).strip()
- if line[0] == "#":
- if line.startswith("#thread file:"):
- name = str(line.split(':', 1)[1]).strip()
- self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
- continue
- self.parse_line(line)
- def parse_line(self, line):
- opts = line.split(',')
- dpprint(opts)
- if opts[0] == 'l':
- opts = list(csv.reader([line]))[0] # process quote more
- locationID = int(opts[1])
- filename = str(opts[2])
- line = int(opts[3])
- name = opts[4]
- flags = tryNum(opts[5])
- self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
- return
- extra_opts = {}
- for e in opts[5:]:
- if not '=' in e:
- continue
- (k, v) = e.split('=')
- extra_opts[k] = tryNum(v)
- if extra_opts:
- dpprint(extra_opts)
- threadID = None
- taskID = None
- locationID = None
- ts = None
- if opts[0] in ['b', 'e']:
- threadID = int(opts[1])
- taskID = int(opts[4])
- locationID = int(opts[3])
- ts = tryNum(opts[2])
- thread_stack = None
- currentTask = (None, None)
- if threadID is not None:
- if not threadID in self.threads_stack:
- thread_stack = deque()
- self.threads_stack[threadID] = thread_stack
- else:
- thread_stack = self.threads_stack[threadID]
- currentTask = None if not thread_stack else thread_stack[-1]
- t = (threadID, taskID)
- if opts[0] == 'b':
- assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
- task = self.TraceTask(threadID, taskID, locationID, ts)
- self.tasks[t] = task
- self.tasks_list.append(task)
- thread_stack.append((threadID, taskID))
- if currentTask:
- task.parentThreadID = currentTask[0]
- task.parentTaskID = currentTask[1]
- if 'parentThread' in extra_opts:
- task.parentThreadID = extra_opts['parentThread']
- if 'parent' in extra_opts:
- task.parentTaskID = extra_opts['parent']
- if opts[0] == 'e':
- task = self.tasks[t]
- task.endTimestamp = ts
- if 'tIPP' in extra_opts:
- task.selfTimeIPP = extra_opts['tIPP']
- if 'tOCL' in extra_opts:
- task.selfTimeOpenCL = extra_opts['tOCL']
- thread_stack.pop()
- def load(self, filename):
- self.pending_files.append(filename)
- if DEBUG:
- with open(filename, 'r') as f:
- print(f.read(), end='')
- while self.pending_files:
- self.parse_file(self.pending_files.pop())
- def getParentTask(self, task):
- return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
- def process(self):
- self.tasks_list.sort(key=lambda x: x.beginTimestamp)
- parallel_for_location = None
- for (id, l) in self.locations.items():
- if l.name == 'parallel_for':
- parallel_for_location = l.locationID
- break
- for task in self.tasks_list:
- try:
- task.duration = task.endTimestamp - task.beginTimestamp
- task.selfDuration = task.duration
- except:
- task.duration = None
- task.selfDuration = None
- task.totalTimeIPP = task.selfTimeIPP
- task.totalTimeOpenCL = task.selfTimeOpenCL
- dpprint(self.tasks)
- dprint("Calculate total times")
- for task in self.tasks_list:
- parentTask = self.getParentTask(task)
- if parentTask:
- parentTask.selfDuration = parentTask.selfDuration - task.duration
- parentTask.childTask.append(task)
- timeIPP = task.selfTimeIPP
- timeOpenCL = task.selfTimeOpenCL
- while parentTask:
- if parentTask.locationID == parallel_for_location: # TODO parallel_for
- break
- parentLocation = self.locations[parentTask.locationID]
- if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
- parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
- timeIPP = 0
- else:
- parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
- if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
- parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
- timeOpenCL = 0
- else:
- parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
- parentTask = self.getParentTask(parentTask)
- dpprint(self.tasks)
- dprint("Calculate total times (parallel_for)")
- for task in self.tasks_list:
- if task.locationID == parallel_for_location:
- task.selfDuration = 0
- childDuration = sum([t.duration for t in task.childTask])
- if task.duration == 0 or childDuration == 0:
- continue
- timeCoef = task.duration / float(childDuration)
- childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
- childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
- if childTimeIPP == 0 and childTimeOpenCL == 0:
- continue
- timeIPP = childTimeIPP * timeCoef
- timeOpenCL = childTimeOpenCL * timeCoef
- parentTask = task
- while parentTask:
- parentLocation = self.locations[parentTask.locationID]
- if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
- parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
- timeIPP = 0
- else:
- parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
- if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
- parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
- timeOpenCL = 0
- else:
- parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
- parentTask = self.getParentTask(parentTask)
- dpprint(self.tasks)
- dprint("Done")
- def dump(self, max_entries):
- assert isinstance(max_entries, int)
- class CallInfo():
- def __init__(self, callID):
- self.callID = callID
- self.totalTimes = []
- self.selfTimes = []
- self.threads = set()
- self.selfTimesIPP = []
- self.selfTimesOpenCL = []
- self.totalTimesIPP = []
- self.totalTimesOpenCL = []
- calls = {}
- for currentTask in self.tasks_list:
- task = currentTask
- callID = []
- for i in range(stack_size):
- callID.append(task.locationID)
- task = self.getParentTask(task)
- if not task:
- break
- callID = tuple(callID)
- if not callID in calls:
- call = CallInfo(callID)
- calls[callID] = call
- else:
- call = calls[callID]
- call.totalTimes.append(currentTask.duration)
- call.selfTimes.append(currentTask.selfDuration)
- call.threads.add(currentTask.threadID)
- call.selfTimesIPP.append(currentTask.selfTimeIPP)
- call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
- call.totalTimesIPP.append(currentTask.totalTimeIPP)
- call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
- dpprint(self.tasks)
- dpprint(self.locations)
- dpprint(calls)
- calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
- calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
- calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
- calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
- calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
- calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
- calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
- calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
- if max_entries > 0 and len(calls_sorted) > max_entries:
- calls_sorted = calls_sorted[:max_entries]
- def formatPercents(p):
- if p is not None:
- return "{:>3d}".format(int(p*100))
- return ''
- name_width = 70
- timestamp_width = 12
- def fmtTS():
- return '{:>' + str(timestamp_width) + '}'
- fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
- fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
- print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
- print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
- for (index, callID) in enumerate(calls_sorted):
- call_self_times = calls[callID].selfTimes
- loc0 = self.locations[callID[0]]
- loc_array = [] # [str(callID)]
- for (i, l) in enumerate(callID):
- loc = self.locations[l]
- loc_array.append(loc.name if i > 0 else str(loc))
- loc_str = '|'.join(loc_array)
- if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
- print(fmt.format(index + 1, loc_str, len(call_self_times),
- len(calls[callID].threads),
- formatTimestamp(min(call_self_times)),
- formatTimestamp(max(call_self_times)),
- formatTimestamp(calls_median[callID]),
- formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
- formatTimestamp(sum(call_self_times)),
- formatTimestamp(calls_self_sum_IPP[callID]),
- formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
- formatTimestamp(calls_self_sum_OpenCL[callID]),
- formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
- ))
- call_total_times = calls[callID].totalTimes
- print(fmt2.format("", "", "", "",
- formatTimestamp(min(call_total_times)),
- formatTimestamp(max(call_total_times)),
- formatTimestamp(median(call_total_times)),
- formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
- formatTimestamp(sum(call_total_times)),
- formatTimestamp(calls_total_sum_IPP[callID]),
- formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
- formatTimestamp(calls_total_sum_OpenCL[callID]),
- formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
- ))
- print()
- if __name__ == "__main__":
- tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
- count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
- trace = Trace(tracefile)
- trace.process()
- trace.dump(max_entries = count)
- print("OK")
|