gapi_parsers_tests_common.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. // This file is part of OpenCV project.
  2. // It is subject to the license terms in the LICENSE file found in the top-level directory
  3. // of this distribution and at http://opencv.org/license.html.
  4. //
  5. // Copyright (C) 2020 Intel Corporation
  6. #ifndef OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP
  7. #define OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP
  8. #include "gapi_tests_common.hpp"
  9. #include "../../include/opencv2/gapi/infer/parsers.hpp"
  10. namespace opencv_test
  11. {
  12. class ParserSSDTest
  13. {
  14. public:
  15. cv::Mat generateSSDoutput(const cv::Size& in_sz)
  16. {
  17. constexpr int maxN = 200;
  18. constexpr int objSize = 7;
  19. std::vector<int> dims{ 1, 1, maxN, objSize };
  20. cv::Mat mat(dims, CV_32FC1);
  21. auto data = mat.ptr<float>();
  22. for (int i = 0; i < maxN; ++i)
  23. {
  24. float* it = data + i * objSize;
  25. auto ssdIt = generateItem(i, in_sz);
  26. it[0] = ssdIt.image_id;
  27. it[1] = ssdIt.label;
  28. it[2] = ssdIt.confidence;
  29. it[3] = ssdIt.rc_left;
  30. it[4] = ssdIt.rc_top;
  31. it[5] = ssdIt.rc_right;
  32. it[6] = ssdIt.rc_bottom;
  33. }
  34. return mat;
  35. }
  36. void parseSSDref(const cv::Mat& in_ssd_result,
  37. const cv::Size& in_size,
  38. const float confidence_threshold,
  39. const bool alignment_to_square,
  40. const bool filter_out_of_bounds,
  41. std::vector<cv::Rect>& out_boxes)
  42. {
  43. out_boxes.clear();
  44. const auto &in_ssd_dims = in_ssd_result.size;
  45. CV_Assert(in_ssd_dims.dims() == 4u);
  46. const int MAX_PROPOSALS = in_ssd_dims[2];
  47. const int OBJECT_SIZE = in_ssd_dims[3];
  48. CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
  49. const float *data = in_ssd_result.ptr<float>();
  50. cv::Rect surface({0,0}, in_size), rc;
  51. float image_id, confidence;
  52. int label;
  53. for (int i = 0; i < MAX_PROPOSALS; ++i)
  54. {
  55. std::tie(rc, image_id, confidence, label)
  56. = extract(data + i*OBJECT_SIZE, in_size);
  57. if (image_id < 0.f)
  58. {
  59. break; // marks end-of-detections
  60. }
  61. if (confidence < confidence_threshold)
  62. {
  63. continue; // skip objects with low confidence
  64. }
  65. if (alignment_to_square)
  66. {
  67. adjustBoundingBox(rc);
  68. }
  69. const auto clipped_rc = rc & surface;
  70. if (filter_out_of_bounds)
  71. {
  72. if (clipped_rc.area() != rc.area())
  73. {
  74. continue;
  75. }
  76. }
  77. out_boxes.emplace_back(clipped_rc);
  78. }
  79. }
  80. void parseSSDBLref(const cv::Mat& in_ssd_result,
  81. const cv::Size& in_size,
  82. const float confidence_threshold,
  83. const int filter_label,
  84. std::vector<cv::Rect>& out_boxes,
  85. std::vector<int>& out_labels)
  86. {
  87. out_boxes.clear();
  88. out_labels.clear();
  89. const auto &in_ssd_dims = in_ssd_result.size;
  90. CV_Assert(in_ssd_dims.dims() == 4u);
  91. const int MAX_PROPOSALS = in_ssd_dims[2];
  92. const int OBJECT_SIZE = in_ssd_dims[3];
  93. CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
  94. cv::Rect surface({0,0}, in_size), rc;
  95. float image_id, confidence;
  96. int label;
  97. const float *data = in_ssd_result.ptr<float>();
  98. for (int i = 0; i < MAX_PROPOSALS; i++)
  99. {
  100. std::tie(rc, image_id, confidence, label)
  101. = extract(data + i*OBJECT_SIZE, in_size);
  102. if (image_id < 0.f)
  103. {
  104. break; // marks end-of-detections
  105. }
  106. if (confidence < confidence_threshold ||
  107. (filter_label != -1 && label != filter_label))
  108. {
  109. continue; // filter out object classes if filter is specified
  110. }
  111. out_boxes.emplace_back(rc & surface);
  112. out_labels.emplace_back(label);
  113. }
  114. }
  115. private:
  116. void adjustBoundingBox(cv::Rect& boundingBox)
  117. {
  118. auto w = boundingBox.width;
  119. auto h = boundingBox.height;
  120. boundingBox.x -= static_cast<int>(0.067 * w);
  121. boundingBox.y -= static_cast<int>(0.028 * h);
  122. boundingBox.width += static_cast<int>(0.15 * w);
  123. boundingBox.height += static_cast<int>(0.13 * h);
  124. if (boundingBox.width < boundingBox.height)
  125. {
  126. auto dx = (boundingBox.height - boundingBox.width);
  127. boundingBox.x -= dx / 2;
  128. boundingBox.width += dx;
  129. }
  130. else
  131. {
  132. auto dy = (boundingBox.width - boundingBox.height);
  133. boundingBox.y -= dy / 2;
  134. boundingBox.height += dy;
  135. }
  136. }
  137. std::tuple<cv::Rect, float, float, int> extract(const float* it,
  138. const cv::Size& in_size)
  139. {
  140. float image_id = it[0];
  141. int label = static_cast<int>(it[1]);
  142. float confidence = it[2];
  143. float rc_left = it[3];
  144. float rc_top = it[4];
  145. float rc_right = it[5];
  146. float rc_bottom = it[6];
  147. cv::Rect rc; // map relative coordinates to the original image scale
  148. rc.x = static_cast<int>(rc_left * in_size.width);
  149. rc.y = static_cast<int>(rc_top * in_size.height);
  150. rc.width = static_cast<int>(rc_right * in_size.width) - rc.x;
  151. rc.height = static_cast<int>(rc_bottom * in_size.height) - rc.y;
  152. return std::make_tuple(rc, image_id, confidence, label);
  153. }
  154. int randInRange(const int start, const int end)
  155. {
  156. GAPI_Assert(start <= end);
  157. return theRNG().uniform(start, end);
  158. }
  159. cv::Rect generateBox(const cv::Size& in_sz)
  160. {
  161. // Generated rectangle can reside outside of the initial image by border pixels
  162. constexpr int border = 10;
  163. constexpr int minW = 16;
  164. constexpr int minH = 16;
  165. cv::Rect box;
  166. box.width = randInRange(minW, in_sz.width + 2*border);
  167. box.height = randInRange(minH, in_sz.height + 2*border);
  168. box.x = randInRange(-border, in_sz.width + border - box.width);
  169. box.y = randInRange(-border, in_sz.height + border - box.height);
  170. return box;
  171. }
  172. struct SSDitem
  173. {
  174. float image_id = 0.0f;
  175. float label = 0.0f;
  176. float confidence = 0.0f;
  177. float rc_left = 0.0f;
  178. float rc_top = 0.0f;
  179. float rc_right = 0.0f;
  180. float rc_bottom = 0.0f;
  181. };
  182. SSDitem generateItem(const int i, const cv::Size& in_sz)
  183. {
  184. const auto normalize = [](int v, int range) { return static_cast<float>(v) / range; };
  185. SSDitem it;
  186. it.image_id = static_cast<float>(i);
  187. it.label = static_cast<float>(randInRange(0, 9));
  188. it.confidence = theRNG().uniform(0.f, 1.f);
  189. auto box = generateBox(in_sz);
  190. it.rc_left = normalize(box.x, in_sz.width);
  191. it.rc_right = normalize(box.x + box.width, in_sz.width);
  192. it.rc_top = normalize(box.y, in_sz.height);
  193. it.rc_bottom = normalize(box.y + box.height, in_sz.height);
  194. return it;
  195. }
  196. };
  197. class ParserYoloTest
  198. {
  199. public:
  200. cv::Mat generateYoloOutput(const int num_classes, std::pair<bool,int> dims_config = {false, 4})
  201. {
  202. bool one_dim = false;
  203. int num_dims = 0;
  204. std::tie(one_dim, num_dims) = dims_config;
  205. GAPI_Assert(num_dims <= 4);
  206. GAPI_Assert((!one_dim && num_dims >= 3) ||
  207. ( one_dim && num_dims >= 1));
  208. std::vector<int> dims(num_dims, 1);
  209. if (one_dim) {
  210. dims.back() = (num_classes+5)*5*13*13;
  211. } else {
  212. dims.back() = (num_classes+5)*5;
  213. dims[num_dims-2] = 13;
  214. dims[num_dims-3] = 13;
  215. }
  216. cv::Mat mat(dims, CV_32FC1);
  217. auto data = mat.ptr<float>();
  218. const size_t range = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int>());
  219. cv::RNG& rng = theRNG();
  220. for (size_t i = 0; i < range; ++i)
  221. {
  222. data[i] = rng.uniform(0.f, 1.f);
  223. }
  224. return mat;
  225. }
  226. void parseYoloRef(const cv::Mat& in_yolo_result,
  227. const cv::Size& in_size,
  228. const float confidence_threshold,
  229. const float nms_threshold,
  230. const int num_classes,
  231. const std::vector<float>& anchors,
  232. std::vector<cv::Rect>& out_boxes,
  233. std::vector<int>& out_labels)
  234. {
  235. YoloParams params;
  236. constexpr auto side_square = 13 * 13;
  237. this->m_out = in_yolo_result.ptr<float>();
  238. this->m_side = 13;
  239. this->m_lcoords = params.coords;
  240. this->m_lclasses = num_classes;
  241. std::vector<Detection> detections;
  242. for (int i = 0; i < side_square; ++i)
  243. {
  244. for (int b = 0; b < params.num; ++b)
  245. {
  246. float scale = this->scale(i, b);
  247. if (scale < confidence_threshold)
  248. {
  249. continue;
  250. }
  251. double x = this->x(i, b);
  252. double y = this->y(i, b);
  253. double height = this->height(i, b, anchors[2 * b + 1]);
  254. double width = this->width(i, b, anchors[2 * b]);
  255. for (int label = 0; label < num_classes; ++label)
  256. {
  257. float prob = scale * classConf(i,b,label);
  258. if (prob < confidence_threshold)
  259. {
  260. continue;
  261. }
  262. auto box = toBox(x, y, height, width, in_size);
  263. detections.emplace_back(Detection(box, prob, label));
  264. }
  265. }
  266. }
  267. std::stable_sort(std::begin(detections), std::end(detections),
  268. [](const Detection& a, const Detection& b)
  269. {
  270. return a.conf > b.conf;
  271. });
  272. if (nms_threshold < 1.0f)
  273. {
  274. for (const auto& d : detections)
  275. {
  276. if (std::end(out_boxes) ==
  277. std::find_if(std::begin(out_boxes), std::end(out_boxes),
  278. [&d, nms_threshold](const cv::Rect& r)
  279. {
  280. float rectOverlap = 1.f - static_cast<float>(jaccardDistance(r, d.rect));
  281. return rectOverlap > nms_threshold;
  282. }))
  283. {
  284. out_boxes. emplace_back(d.rect);
  285. out_labels.emplace_back(d.label);
  286. }
  287. }
  288. }
  289. else
  290. {
  291. for (const auto& d: detections)
  292. {
  293. out_boxes. emplace_back(d.rect);
  294. out_labels.emplace_back(d.label);
  295. }
  296. }
  297. }
  298. private:
  299. struct Detection
  300. {
  301. Detection(const cv::Rect& in_rect, const float in_conf, const int in_label)
  302. : rect(in_rect), conf(in_conf), label(in_label)
  303. {}
  304. cv::Rect rect;
  305. float conf = 0.0f;
  306. int label = 0;
  307. };
  308. struct YoloParams
  309. {
  310. int num = 5;
  311. int coords = 4;
  312. };
  313. float scale(const int i, const int b)
  314. {
  315. int obj_index = index(i, b, m_lcoords);
  316. return m_out[obj_index];
  317. }
  318. double x(const int i, const int b)
  319. {
  320. int box_index = index(i, b, 0);
  321. int col = i % m_side;
  322. return (col + m_out[box_index]) / m_side;
  323. }
  324. double y(const int i, const int b)
  325. {
  326. int box_index = index(i, b, 0);
  327. int row = i / m_side;
  328. return (row + m_out[box_index + m_side * m_side]) / m_side;
  329. }
  330. double width(const int i, const int b, const float anchor)
  331. {
  332. int box_index = index(i, b, 0);
  333. return std::exp(m_out[box_index + 2 * m_side * m_side]) * anchor / m_side;
  334. }
  335. double height(const int i, const int b, const float anchor)
  336. {
  337. int box_index = index(i, b, 0);
  338. return std::exp(m_out[box_index + 3 * m_side * m_side]) * anchor / m_side;
  339. }
  340. float classConf(const int i, const int b, const int label)
  341. {
  342. int class_index = index(i, b, m_lcoords + 1 + label);
  343. return m_out[class_index];
  344. }
  345. cv::Rect toBox(const double x, const double y, const double h, const double w, const cv::Size& in_sz)
  346. {
  347. auto h_scale = in_sz.height;
  348. auto w_scale = in_sz.width;
  349. cv::Rect r;
  350. r.x = static_cast<int>((x - w / 2) * w_scale);
  351. r.y = static_cast<int>((y - h / 2) * h_scale);
  352. r.width = static_cast<int>(w * w_scale);
  353. r.height = static_cast<int>(h * h_scale);
  354. return r;
  355. }
  356. int index(const int i, const int b, const int entry)
  357. {
  358. return b * m_side * m_side * (m_lcoords + m_lclasses + 1) + entry * m_side * m_side + i;
  359. }
  360. const float* m_out = nullptr;
  361. int m_side = 0, m_lcoords = 0, m_lclasses = 0;
  362. };
  363. } // namespace opencv_test
  364. #endif // OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP