trace_profiler.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. from __future__ import print_function
  2. import os
  3. import sys
  4. import csv
  5. from pprint import pprint
  6. from collections import deque
  7. try:
  8. long # Python 2
  9. except NameError:
  10. long = int # Python 3
  11. # trace.hpp
  12. REGION_FLAG_IMPL_MASK = 15 << 16
  13. REGION_FLAG_IMPL_IPP = 1 << 16
  14. REGION_FLAG_IMPL_OPENCL = 2 << 16
  15. DEBUG = False
  16. if DEBUG:
  17. dprint = print
  18. dpprint = pprint
  19. else:
  20. def dprint(args, **kwargs):
  21. pass
  22. def dpprint(args, **kwargs):
  23. pass
  24. def tryNum(s):
  25. if s.startswith('0x'):
  26. try:
  27. return int(s, 16)
  28. except ValueError:
  29. pass
  30. try:
  31. return int(s)
  32. except ValueError:
  33. pass
  34. if sys.version_info[0] < 3:
  35. try:
  36. return long(s)
  37. except ValueError:
  38. pass
  39. return s
  40. def formatTimestamp(t):
  41. return "%.3f" % (t * 1e-6)
  42. try:
  43. from statistics import median
  44. except ImportError:
  45. def median(lst):
  46. sortedLst = sorted(lst)
  47. lstLen = len(lst)
  48. index = (lstLen - 1) // 2
  49. if (lstLen % 2):
  50. return sortedLst[index]
  51. else:
  52. return (sortedLst[index] + sortedLst[index + 1]) * 0.5
  53. def getCXXFunctionName(spec):
  54. def dropParams(spec):
  55. pos = len(spec) - 1
  56. depth = 0
  57. while pos >= 0:
  58. if spec[pos] == ')':
  59. depth = depth + 1
  60. elif spec[pos] == '(':
  61. depth = depth - 1
  62. if depth == 0:
  63. if pos == 0 or spec[pos - 1] in ['#', ':']:
  64. res = dropParams(spec[pos+1:-1])
  65. return (spec[:pos] + res[0], res[1])
  66. return (spec[:pos], spec[pos:])
  67. pos = pos - 1
  68. return (spec, '')
  69. def extractName(spec):
  70. pos = len(spec) - 1
  71. inName = False
  72. while pos >= 0:
  73. if spec[pos] == ' ':
  74. if inName:
  75. return spec[pos+1:]
  76. elif spec[pos].isalnum():
  77. inName = True
  78. pos = pos - 1
  79. return spec
  80. if spec.startswith('IPP') or spec.startswith('OpenCL'):
  81. prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
  82. prefix = spec[:prefix_size]
  83. if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
  84. prefix = prefix + spec[prefix_size]
  85. prefix_size = prefix_size + 1
  86. begin = prefix_size
  87. while begin < len(spec):
  88. if spec[begin].isalnum() or spec[begin] in ['_', ':']:
  89. break
  90. begin = begin + 1
  91. if begin == len(spec):
  92. return spec
  93. end = begin
  94. while end < len(spec):
  95. if not (spec[end].isalnum() or spec[end] in ['_', ':']):
  96. break
  97. end = end + 1
  98. return prefix + spec[begin:end]
  99. spec = spec.replace(') const', ')') # const methods
  100. (ret_type_name, params) = dropParams(spec)
  101. name = extractName(ret_type_name)
  102. if 'operator' in name:
  103. return name + params
  104. if name.startswith('&'):
  105. return name[1:]
  106. return name
  107. stack_size = 10
  108. class Trace:
  109. def __init__(self, filename=None):
  110. self.tasks = {}
  111. self.tasks_list = []
  112. self.locations = {}
  113. self.threads_stack = {}
  114. self.pending_files = deque()
  115. if filename:
  116. self.load(filename)
  117. class TraceTask:
  118. def __init__(self, threadID, taskID, locationID, beginTimestamp):
  119. self.threadID = threadID
  120. self.taskID = taskID
  121. self.locationID = locationID
  122. self.beginTimestamp = beginTimestamp
  123. self.endTimestamp = None
  124. self.parentTaskID = None
  125. self.parentThreadID = None
  126. self.childTask = []
  127. self.selfTimeIPP = 0
  128. self.selfTimeOpenCL = 0
  129. self.totalTimeIPP = 0
  130. self.totalTimeOpenCL = 0
  131. def __repr__(self):
  132. return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
  133. self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
  134. self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
  135. class TraceLocation:
  136. def __init__(self, locationID, filename, line, name, flags):
  137. self.locationID = locationID
  138. self.filename = os.path.split(filename)[1]
  139. self.line = line
  140. self.name = getCXXFunctionName(name)
  141. self.flags = flags
  142. def __str__(self):
  143. return "{}#{}:{}".format(self.name, self.filename, self.line)
  144. def __repr__(self):
  145. return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
  146. def parse_file(self, filename):
  147. dprint("Process file: '{}'".format(filename))
  148. with open(filename) as infile:
  149. for line in infile:
  150. line = str(line).strip()
  151. if line[0] == "#":
  152. if line.startswith("#thread file:"):
  153. name = str(line.split(':', 1)[1]).strip()
  154. self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
  155. continue
  156. self.parse_line(line)
  157. def parse_line(self, line):
  158. opts = line.split(',')
  159. dpprint(opts)
  160. if opts[0] == 'l':
  161. opts = list(csv.reader([line]))[0] # process quote more
  162. locationID = int(opts[1])
  163. filename = str(opts[2])
  164. line = int(opts[3])
  165. name = opts[4]
  166. flags = tryNum(opts[5])
  167. self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
  168. return
  169. extra_opts = {}
  170. for e in opts[5:]:
  171. if not '=' in e:
  172. continue
  173. (k, v) = e.split('=')
  174. extra_opts[k] = tryNum(v)
  175. if extra_opts:
  176. dpprint(extra_opts)
  177. threadID = None
  178. taskID = None
  179. locationID = None
  180. ts = None
  181. if opts[0] in ['b', 'e']:
  182. threadID = int(opts[1])
  183. taskID = int(opts[4])
  184. locationID = int(opts[3])
  185. ts = tryNum(opts[2])
  186. thread_stack = None
  187. currentTask = (None, None)
  188. if threadID is not None:
  189. if not threadID in self.threads_stack:
  190. thread_stack = deque()
  191. self.threads_stack[threadID] = thread_stack
  192. else:
  193. thread_stack = self.threads_stack[threadID]
  194. currentTask = None if not thread_stack else thread_stack[-1]
  195. t = (threadID, taskID)
  196. if opts[0] == 'b':
  197. assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
  198. task = self.TraceTask(threadID, taskID, locationID, ts)
  199. self.tasks[t] = task
  200. self.tasks_list.append(task)
  201. thread_stack.append((threadID, taskID))
  202. if currentTask:
  203. task.parentThreadID = currentTask[0]
  204. task.parentTaskID = currentTask[1]
  205. if 'parentThread' in extra_opts:
  206. task.parentThreadID = extra_opts['parentThread']
  207. if 'parent' in extra_opts:
  208. task.parentTaskID = extra_opts['parent']
  209. if opts[0] == 'e':
  210. task = self.tasks[t]
  211. task.endTimestamp = ts
  212. if 'tIPP' in extra_opts:
  213. task.selfTimeIPP = extra_opts['tIPP']
  214. if 'tOCL' in extra_opts:
  215. task.selfTimeOpenCL = extra_opts['tOCL']
  216. thread_stack.pop()
  217. def load(self, filename):
  218. self.pending_files.append(filename)
  219. if DEBUG:
  220. with open(filename, 'r') as f:
  221. print(f.read(), end='')
  222. while self.pending_files:
  223. self.parse_file(self.pending_files.pop())
  224. def getParentTask(self, task):
  225. return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
  226. def process(self):
  227. self.tasks_list.sort(key=lambda x: x.beginTimestamp)
  228. parallel_for_location = None
  229. for (id, l) in self.locations.items():
  230. if l.name == 'parallel_for':
  231. parallel_for_location = l.locationID
  232. break
  233. for task in self.tasks_list:
  234. try:
  235. task.duration = task.endTimestamp - task.beginTimestamp
  236. task.selfDuration = task.duration
  237. except:
  238. task.duration = None
  239. task.selfDuration = None
  240. task.totalTimeIPP = task.selfTimeIPP
  241. task.totalTimeOpenCL = task.selfTimeOpenCL
  242. dpprint(self.tasks)
  243. dprint("Calculate total times")
  244. for task in self.tasks_list:
  245. parentTask = self.getParentTask(task)
  246. if parentTask:
  247. parentTask.selfDuration = parentTask.selfDuration - task.duration
  248. parentTask.childTask.append(task)
  249. timeIPP = task.selfTimeIPP
  250. timeOpenCL = task.selfTimeOpenCL
  251. while parentTask:
  252. if parentTask.locationID == parallel_for_location: # TODO parallel_for
  253. break
  254. parentLocation = self.locations[parentTask.locationID]
  255. if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
  256. parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
  257. timeIPP = 0
  258. else:
  259. parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
  260. if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
  261. parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
  262. timeOpenCL = 0
  263. else:
  264. parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
  265. parentTask = self.getParentTask(parentTask)
  266. dpprint(self.tasks)
  267. dprint("Calculate total times (parallel_for)")
  268. for task in self.tasks_list:
  269. if task.locationID == parallel_for_location:
  270. task.selfDuration = 0
  271. childDuration = sum([t.duration for t in task.childTask])
  272. if task.duration == 0 or childDuration == 0:
  273. continue
  274. timeCoef = task.duration / float(childDuration)
  275. childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
  276. childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
  277. if childTimeIPP == 0 and childTimeOpenCL == 0:
  278. continue
  279. timeIPP = childTimeIPP * timeCoef
  280. timeOpenCL = childTimeOpenCL * timeCoef
  281. parentTask = task
  282. while parentTask:
  283. parentLocation = self.locations[parentTask.locationID]
  284. if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
  285. parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
  286. timeIPP = 0
  287. else:
  288. parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
  289. if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
  290. parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
  291. timeOpenCL = 0
  292. else:
  293. parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
  294. parentTask = self.getParentTask(parentTask)
  295. dpprint(self.tasks)
  296. dprint("Done")
  297. def dump(self, max_entries):
  298. assert isinstance(max_entries, int)
  299. class CallInfo():
  300. def __init__(self, callID):
  301. self.callID = callID
  302. self.totalTimes = []
  303. self.selfTimes = []
  304. self.threads = set()
  305. self.selfTimesIPP = []
  306. self.selfTimesOpenCL = []
  307. self.totalTimesIPP = []
  308. self.totalTimesOpenCL = []
  309. calls = {}
  310. for currentTask in self.tasks_list:
  311. task = currentTask
  312. callID = []
  313. for i in range(stack_size):
  314. callID.append(task.locationID)
  315. task = self.getParentTask(task)
  316. if not task:
  317. break
  318. callID = tuple(callID)
  319. if not callID in calls:
  320. call = CallInfo(callID)
  321. calls[callID] = call
  322. else:
  323. call = calls[callID]
  324. call.totalTimes.append(currentTask.duration)
  325. call.selfTimes.append(currentTask.selfDuration)
  326. call.threads.add(currentTask.threadID)
  327. call.selfTimesIPP.append(currentTask.selfTimeIPP)
  328. call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
  329. call.totalTimesIPP.append(currentTask.totalTimeIPP)
  330. call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
  331. dpprint(self.tasks)
  332. dpprint(self.locations)
  333. dpprint(calls)
  334. calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
  335. calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
  336. calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
  337. calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
  338. calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
  339. calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
  340. calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
  341. calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
  342. if max_entries > 0 and len(calls_sorted) > max_entries:
  343. calls_sorted = calls_sorted[:max_entries]
  344. def formatPercents(p):
  345. if p is not None:
  346. return "{:>3d}".format(int(p*100))
  347. return ''
  348. name_width = 70
  349. timestamp_width = 12
  350. def fmtTS():
  351. return '{:>' + str(timestamp_width) + '}'
  352. fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
  353. fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
  354. print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
  355. print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
  356. for (index, callID) in enumerate(calls_sorted):
  357. call_self_times = calls[callID].selfTimes
  358. loc0 = self.locations[callID[0]]
  359. loc_array = [] # [str(callID)]
  360. for (i, l) in enumerate(callID):
  361. loc = self.locations[l]
  362. loc_array.append(loc.name if i > 0 else str(loc))
  363. loc_str = '|'.join(loc_array)
  364. if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
  365. print(fmt.format(index + 1, loc_str, len(call_self_times),
  366. len(calls[callID].threads),
  367. formatTimestamp(min(call_self_times)),
  368. formatTimestamp(max(call_self_times)),
  369. formatTimestamp(calls_median[callID]),
  370. formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
  371. formatTimestamp(sum(call_self_times)),
  372. formatTimestamp(calls_self_sum_IPP[callID]),
  373. formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
  374. formatTimestamp(calls_self_sum_OpenCL[callID]),
  375. formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
  376. ))
  377. call_total_times = calls[callID].totalTimes
  378. print(fmt2.format("", "", "", "",
  379. formatTimestamp(min(call_total_times)),
  380. formatTimestamp(max(call_total_times)),
  381. formatTimestamp(median(call_total_times)),
  382. formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
  383. formatTimestamp(sum(call_total_times)),
  384. formatTimestamp(calls_total_sum_IPP[callID]),
  385. formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
  386. formatTimestamp(calls_total_sum_OpenCL[callID]),
  387. formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
  388. ))
  389. print()
  390. if __name__ == "__main__":
  391. tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
  392. count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
  393. trace = Trace(tracefile)
  394. trace.process()
  395. trace.dump(max_entries = count)
  396. print("OK")