face_detection_mtcnn.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. #include <algorithm>
  2. #include <cctype>
  3. #include <cmath>
  4. #include <iostream>
  5. #include <limits>
  6. #include <numeric>
  7. #include <stdexcept>
  8. #include <string>
  9. #include <vector>
  10. #include <opencv2/gapi.hpp>
  11. #include <opencv2/gapi/core.hpp>
  12. #include <opencv2/gapi/imgproc.hpp>
  13. #include <opencv2/gapi/cpu/gcpukernel.hpp>
  14. #include <opencv2/gapi/infer.hpp>
  15. #include <opencv2/gapi/infer/ie.hpp>
  16. #include <opencv2/gapi/streaming/cap.hpp>
  17. #include <opencv2/gapi/gopaque.hpp>
  18. #include <opencv2/highgui.hpp>
  19. const std::string about =
  20. "This is an OpenCV-based version of OMZ MTCNN Face Detection example";
  21. const std::string keys =
  22. "{ h help | | Print this help message }"
  23. "{ input | | Path to the input video file }"
  24. "{ mtcnnpm | mtcnn-p.xml | Path to OpenVINO MTCNN P (Proposal) detection model (.xml)}"
  25. "{ mtcnnpd | CPU | Target device for the MTCNN P (e.g. CPU, GPU, VPU, ...) }"
  26. "{ mtcnnrm | mtcnn-r.xml | Path to OpenVINO MTCNN R (Refinement) detection model (.xml)}"
  27. "{ mtcnnrd | CPU | Target device for the MTCNN R (e.g. CPU, GPU, VPU, ...) }"
  28. "{ mtcnnom | mtcnn-o.xml | Path to OpenVINO MTCNN O (Output) detection model (.xml)}"
  29. "{ mtcnnod | CPU | Target device for the MTCNN O (e.g. CPU, GPU, VPU, ...) }"
  30. "{ thrp | 0.6 | MTCNN P confidence threshold}"
  31. "{ thrr | 0.7 | MTCNN R confidence threshold}"
  32. "{ thro | 0.7 | MTCNN O confidence threshold}"
  33. "{ half_scale | false | MTCNN P use half scale pyramid}"
  34. "{ queue_capacity | 1 | Streaming executor queue capacity. Calculated automaticaly if 0}"
  35. ;
  36. namespace {
  37. std::string weights_path(const std::string& model_path) {
  38. const auto EXT_LEN = 4u;
  39. const auto sz = model_path.size();
  40. CV_Assert(sz > EXT_LEN);
  41. const auto ext = model_path.substr(sz - EXT_LEN);
  42. CV_Assert(cv::toLowerCase(ext) == ".xml");
  43. return model_path.substr(0u, sz - EXT_LEN) + ".bin";
  44. }
  45. //////////////////////////////////////////////////////////////////////
  46. } // anonymous namespace
  47. namespace custom {
  48. namespace {
  49. // Define custom structures and operations
  50. #define NUM_REGRESSIONS 4
  51. #define NUM_PTS 5
  52. struct BBox {
  53. int x1;
  54. int y1;
  55. int x2;
  56. int y2;
  57. cv::Rect getRect() const { return cv::Rect(x1,
  58. y1,
  59. x2 - x1,
  60. y2 - y1); }
  61. BBox getSquare() const {
  62. BBox bbox;
  63. float bboxWidth = static_cast<float>(x2 - x1);
  64. float bboxHeight = static_cast<float>(y2 - y1);
  65. float side = std::max(bboxWidth, bboxHeight);
  66. bbox.x1 = static_cast<int>(static_cast<float>(x1) + (bboxWidth - side) * 0.5f);
  67. bbox.y1 = static_cast<int>(static_cast<float>(y1) + (bboxHeight - side) * 0.5f);
  68. bbox.x2 = static_cast<int>(static_cast<float>(bbox.x1) + side);
  69. bbox.y2 = static_cast<int>(static_cast<float>(bbox.y1) + side);
  70. return bbox;
  71. }
  72. };
  73. struct Face {
  74. BBox bbox;
  75. float score;
  76. std::array<float, NUM_REGRESSIONS> regression;
  77. std::array<float, 2 * NUM_PTS> ptsCoords;
  78. static void applyRegression(std::vector<Face>& faces, bool addOne = false) {
  79. for (auto& face : faces) {
  80. float bboxWidth =
  81. face.bbox.x2 - face.bbox.x1 + static_cast<float>(addOne);
  82. float bboxHeight =
  83. face.bbox.y2 - face.bbox.y1 + static_cast<float>(addOne);
  84. face.bbox.x1 = static_cast<int>(static_cast<float>(face.bbox.x1) + (face.regression[1] * bboxWidth));
  85. face.bbox.y1 = static_cast<int>(static_cast<float>(face.bbox.y1) + (face.regression[0] * bboxHeight));
  86. face.bbox.x2 = static_cast<int>(static_cast<float>(face.bbox.x2) + (face.regression[3] * bboxWidth));
  87. face.bbox.y2 = static_cast<int>(static_cast<float>(face.bbox.y2) + (face.regression[2] * bboxHeight));
  88. }
  89. }
  90. static void bboxes2Squares(std::vector<Face>& faces) {
  91. for (auto& face : faces) {
  92. face.bbox = face.bbox.getSquare();
  93. }
  94. }
  95. static std::vector<Face> runNMS(std::vector<Face>& faces, const float threshold,
  96. const bool useMin = false) {
  97. std::vector<Face> facesNMS;
  98. if (faces.empty()) {
  99. return facesNMS;
  100. }
  101. std::sort(faces.begin(), faces.end(), [](const Face& f1, const Face& f2) {
  102. return f1.score > f2.score;
  103. });
  104. std::vector<int> indices(faces.size());
  105. std::iota(indices.begin(), indices.end(), 0);
  106. while (indices.size() > 0) {
  107. const int idx = indices[0];
  108. facesNMS.push_back(faces[idx]);
  109. std::vector<int> tmpIndices = indices;
  110. indices.clear();
  111. const float area1 = static_cast<float>(faces[idx].bbox.x2 - faces[idx].bbox.x1 + 1) *
  112. static_cast<float>(faces[idx].bbox.y2 - faces[idx].bbox.y1 + 1);
  113. for (size_t i = 1; i < tmpIndices.size(); ++i) {
  114. int tmpIdx = tmpIndices[i];
  115. const float interX1 = static_cast<float>(std::max(faces[idx].bbox.x1, faces[tmpIdx].bbox.x1));
  116. const float interY1 = static_cast<float>(std::max(faces[idx].bbox.y1, faces[tmpIdx].bbox.y1));
  117. const float interX2 = static_cast<float>(std::min(faces[idx].bbox.x2, faces[tmpIdx].bbox.x2));
  118. const float interY2 = static_cast<float>(std::min(faces[idx].bbox.y2, faces[tmpIdx].bbox.y2));
  119. const float bboxWidth = std::max(0.0f, (interX2 - interX1 + 1));
  120. const float bboxHeight = std::max(0.0f, (interY2 - interY1 + 1));
  121. const float interArea = bboxWidth * bboxHeight;
  122. const float area2 = static_cast<float>(faces[tmpIdx].bbox.x2 - faces[tmpIdx].bbox.x1 + 1) *
  123. static_cast<float>(faces[tmpIdx].bbox.y2 - faces[tmpIdx].bbox.y1 + 1);
  124. float overlap = 0.0;
  125. if (useMin) {
  126. overlap = interArea / std::min(area1, area2);
  127. } else {
  128. overlap = interArea / (area1 + area2 - interArea);
  129. }
  130. if (overlap <= threshold) {
  131. indices.push_back(tmpIdx);
  132. }
  133. }
  134. }
  135. return facesNMS;
  136. }
  137. };
  138. const float P_NET_WINDOW_SIZE = 12.0f;
  139. std::vector<Face> buildFaces(const cv::Mat& scores,
  140. const cv::Mat& regressions,
  141. const float scaleFactor,
  142. const float threshold) {
  143. auto w = scores.size[3];
  144. auto h = scores.size[2];
  145. auto size = w * h;
  146. const float* scores_data = scores.ptr<float>();
  147. scores_data += size;
  148. const float* reg_data = regressions.ptr<float>();
  149. auto out_side = std::max(h, w);
  150. auto in_side = 2 * out_side + 11;
  151. float stride = 0.0f;
  152. if (out_side != 1)
  153. {
  154. stride = static_cast<float>(in_side - P_NET_WINDOW_SIZE) / static_cast<float>(out_side - 1);
  155. }
  156. std::vector<Face> boxes;
  157. for (int i = 0; i < size; i++) {
  158. if (scores_data[i] >= (threshold)) {
  159. float y = static_cast<float>(i / w);
  160. float x = static_cast<float>(i - w * y);
  161. Face faceInfo;
  162. BBox& faceBox = faceInfo.bbox;
  163. faceBox.x1 = std::max(0, static_cast<int>((x * stride) / scaleFactor));
  164. faceBox.y1 = std::max(0, static_cast<int>((y * stride) / scaleFactor));
  165. faceBox.x2 = static_cast<int>((x * stride + P_NET_WINDOW_SIZE - 1.0f) / scaleFactor);
  166. faceBox.y2 = static_cast<int>((y * stride + P_NET_WINDOW_SIZE - 1.0f) / scaleFactor);
  167. faceInfo.regression[0] = reg_data[i];
  168. faceInfo.regression[1] = reg_data[i + size];
  169. faceInfo.regression[2] = reg_data[i + 2 * size];
  170. faceInfo.regression[3] = reg_data[i + 3 * size];
  171. faceInfo.score = scores_data[i];
  172. boxes.push_back(faceInfo);
  173. }
  174. }
  175. return boxes;
  176. }
  177. // Define networks for this sample
  178. using GMat2 = std::tuple<cv::GMat, cv::GMat>;
  179. using GMat3 = std::tuple<cv::GMat, cv::GMat, cv::GMat>;
  180. using GMats = cv::GArray<cv::GMat>;
  181. using GRects = cv::GArray<cv::Rect>;
  182. using GSize = cv::GOpaque<cv::Size>;
  183. G_API_NET(MTCNNRefinement,
  184. <GMat2(cv::GMat)>,
  185. "sample.custom.mtcnn_refinement");
  186. G_API_NET(MTCNNOutput,
  187. <GMat3(cv::GMat)>,
  188. "sample.custom.mtcnn_output");
  189. using GFaces = cv::GArray<Face>;
  190. G_API_OP(BuildFaces,
  191. <GFaces(cv::GMat, cv::GMat, float, float)>,
  192. "sample.custom.mtcnn.build_faces") {
  193. static cv::GArrayDesc outMeta(const cv::GMatDesc&,
  194. const cv::GMatDesc&,
  195. const float,
  196. const float) {
  197. return cv::empty_array_desc();
  198. }
  199. };
  200. G_API_OP(RunNMS,
  201. <GFaces(GFaces, float, bool)>,
  202. "sample.custom.mtcnn.run_nms") {
  203. static cv::GArrayDesc outMeta(const cv::GArrayDesc&,
  204. const float, const bool) {
  205. return cv::empty_array_desc();
  206. }
  207. };
  208. G_API_OP(AccumulatePyramidOutputs,
  209. <GFaces(GFaces, GFaces)>,
  210. "sample.custom.mtcnn.accumulate_pyramid_outputs") {
  211. static cv::GArrayDesc outMeta(const cv::GArrayDesc&,
  212. const cv::GArrayDesc&) {
  213. return cv::empty_array_desc();
  214. }
  215. };
  216. G_API_OP(ApplyRegression,
  217. <GFaces(GFaces, bool)>,
  218. "sample.custom.mtcnn.apply_regression") {
  219. static cv::GArrayDesc outMeta(const cv::GArrayDesc&, const bool) {
  220. return cv::empty_array_desc();
  221. }
  222. };
  223. G_API_OP(BBoxesToSquares,
  224. <GFaces(GFaces)>,
  225. "sample.custom.mtcnn.bboxes_to_squares") {
  226. static cv::GArrayDesc outMeta(const cv::GArrayDesc&) {
  227. return cv::empty_array_desc();
  228. }
  229. };
  230. G_API_OP(R_O_NetPreProcGetROIs,
  231. <GRects(GFaces, GSize)>,
  232. "sample.custom.mtcnn.bboxes_r_o_net_preproc_get_rois") {
  233. static cv::GArrayDesc outMeta(const cv::GArrayDesc&, const cv::GOpaqueDesc&) {
  234. return cv::empty_array_desc();
  235. }
  236. };
  237. G_API_OP(RNetPostProc,
  238. <GFaces(GFaces, GMats, GMats, float)>,
  239. "sample.custom.mtcnn.rnet_postproc") {
  240. static cv::GArrayDesc outMeta(const cv::GArrayDesc&,
  241. const cv::GArrayDesc&,
  242. const cv::GArrayDesc&,
  243. const float) {
  244. return cv::empty_array_desc();
  245. }
  246. };
  247. G_API_OP(ONetPostProc,
  248. <GFaces(GFaces, GMats, GMats, GMats, float)>,
  249. "sample.custom.mtcnn.onet_postproc") {
  250. static cv::GArrayDesc outMeta(const cv::GArrayDesc&,
  251. const cv::GArrayDesc&,
  252. const cv::GArrayDesc&,
  253. const cv::GArrayDesc&,
  254. const float) {
  255. return cv::empty_array_desc();
  256. }
  257. };
  258. G_API_OP(SwapFaces,
  259. <GFaces(GFaces)>,
  260. "sample.custom.mtcnn.swap_faces") {
  261. static cv::GArrayDesc outMeta(const cv::GArrayDesc&) {
  262. return cv::empty_array_desc();
  263. }
  264. };
  265. //Custom kernels implementation
  266. GAPI_OCV_KERNEL(OCVBuildFaces, BuildFaces) {
  267. static void run(const cv::Mat & in_scores,
  268. const cv::Mat & in_regresssions,
  269. const float scaleFactor,
  270. const float threshold,
  271. std::vector<Face> &out_faces) {
  272. out_faces = buildFaces(in_scores, in_regresssions, scaleFactor, threshold);
  273. }
  274. };// GAPI_OCV_KERNEL(BuildFaces)
  275. GAPI_OCV_KERNEL(OCVRunNMS, RunNMS) {
  276. static void run(const std::vector<Face> &in_faces,
  277. const float threshold,
  278. const bool useMin,
  279. std::vector<Face> &out_faces) {
  280. std::vector<Face> in_faces_copy = in_faces;
  281. out_faces = Face::runNMS(in_faces_copy, threshold, useMin);
  282. }
  283. };// GAPI_OCV_KERNEL(RunNMS)
  284. GAPI_OCV_KERNEL(OCVAccumulatePyramidOutputs, AccumulatePyramidOutputs) {
  285. static void run(const std::vector<Face> &total_faces,
  286. const std::vector<Face> &in_faces,
  287. std::vector<Face> &out_faces) {
  288. out_faces = total_faces;
  289. out_faces.insert(out_faces.end(), in_faces.begin(), in_faces.end());
  290. }
  291. };// GAPI_OCV_KERNEL(AccumulatePyramidOutputs)
  292. GAPI_OCV_KERNEL(OCVApplyRegression, ApplyRegression) {
  293. static void run(const std::vector<Face> &in_faces,
  294. const bool addOne,
  295. std::vector<Face> &out_faces) {
  296. std::vector<Face> in_faces_copy = in_faces;
  297. Face::applyRegression(in_faces_copy, addOne);
  298. out_faces.clear();
  299. out_faces.insert(out_faces.end(), in_faces_copy.begin(), in_faces_copy.end());
  300. }
  301. };// GAPI_OCV_KERNEL(ApplyRegression)
  302. GAPI_OCV_KERNEL(OCVBBoxesToSquares, BBoxesToSquares) {
  303. static void run(const std::vector<Face> &in_faces,
  304. std::vector<Face> &out_faces) {
  305. std::vector<Face> in_faces_copy = in_faces;
  306. Face::bboxes2Squares(in_faces_copy);
  307. out_faces.clear();
  308. out_faces.insert(out_faces.end(), in_faces_copy.begin(), in_faces_copy.end());
  309. }
  310. };// GAPI_OCV_KERNEL(BBoxesToSquares)
  311. GAPI_OCV_KERNEL(OCVR_O_NetPreProcGetROIs, R_O_NetPreProcGetROIs) {
  312. static void run(const std::vector<Face> &in_faces,
  313. const cv::Size & in_image_size,
  314. std::vector<cv::Rect> &outs) {
  315. outs.clear();
  316. for (const auto& face : in_faces) {
  317. cv::Rect tmp_rect = face.bbox.getRect();
  318. //Compare to transposed sizes width<->height
  319. tmp_rect &= cv::Rect(tmp_rect.x, tmp_rect.y, in_image_size.height - tmp_rect.x, in_image_size.width - tmp_rect.y) &
  320. cv::Rect(0, 0, in_image_size.height, in_image_size.width);
  321. outs.push_back(tmp_rect);
  322. }
  323. }
  324. };// GAPI_OCV_KERNEL(R_O_NetPreProcGetROIs)
  325. GAPI_OCV_KERNEL(OCVRNetPostProc, RNetPostProc) {
  326. static void run(const std::vector<Face> &in_faces,
  327. const std::vector<cv::Mat> &in_scores,
  328. const std::vector<cv::Mat> &in_regresssions,
  329. const float threshold,
  330. std::vector<Face> &out_faces) {
  331. out_faces.clear();
  332. for (unsigned int k = 0; k < in_faces.size(); ++k) {
  333. const float* scores_data = in_scores[k].ptr<float>();
  334. const float* reg_data = in_regresssions[k].ptr<float>();
  335. if (scores_data[1] >= threshold) {
  336. Face info = in_faces[k];
  337. info.score = scores_data[1];
  338. std::copy_n(reg_data, NUM_REGRESSIONS, info.regression.begin());
  339. out_faces.push_back(info);
  340. }
  341. }
  342. }
  343. };// GAPI_OCV_KERNEL(RNetPostProc)
  344. GAPI_OCV_KERNEL(OCVONetPostProc, ONetPostProc) {
  345. static void run(const std::vector<Face> &in_faces,
  346. const std::vector<cv::Mat> &in_scores,
  347. const std::vector<cv::Mat> &in_regresssions,
  348. const std::vector<cv::Mat> &in_landmarks,
  349. const float threshold,
  350. std::vector<Face> &out_faces) {
  351. out_faces.clear();
  352. for (unsigned int k = 0; k < in_faces.size(); ++k) {
  353. const float* scores_data = in_scores[k].ptr<float>();
  354. const float* reg_data = in_regresssions[k].ptr<float>();
  355. const float* landmark_data = in_landmarks[k].ptr<float>();
  356. if (scores_data[1] >= threshold) {
  357. Face info = in_faces[k];
  358. info.score = scores_data[1];
  359. for (size_t i = 0; i < 4; ++i) {
  360. info.regression[i] = reg_data[i];
  361. }
  362. float w = info.bbox.x2 - info.bbox.x1 + 1.0f;
  363. float h = info.bbox.y2 - info.bbox.y1 + 1.0f;
  364. for (size_t p = 0; p < NUM_PTS; ++p) {
  365. info.ptsCoords[2 * p] =
  366. info.bbox.x1 + static_cast<float>(landmark_data[NUM_PTS + p]) * w - 1;
  367. info.ptsCoords[2 * p + 1] = info.bbox.y1 + static_cast<float>(landmark_data[p]) * h - 1;
  368. }
  369. out_faces.push_back(info);
  370. }
  371. }
  372. }
  373. };// GAPI_OCV_KERNEL(ONetPostProc)
  374. GAPI_OCV_KERNEL(OCVSwapFaces, SwapFaces) {
  375. static void run(const std::vector<Face> &in_faces,
  376. std::vector<Face> &out_faces) {
  377. std::vector<Face> in_faces_copy = in_faces;
  378. out_faces.clear();
  379. if (!in_faces_copy.empty()) {
  380. for (size_t i = 0; i < in_faces_copy.size(); ++i) {
  381. std::swap(in_faces_copy[i].bbox.x1, in_faces_copy[i].bbox.y1);
  382. std::swap(in_faces_copy[i].bbox.x2, in_faces_copy[i].bbox.y2);
  383. for (size_t p = 0; p < NUM_PTS; ++p) {
  384. std::swap(in_faces_copy[i].ptsCoords[2 * p], in_faces_copy[i].ptsCoords[2 * p + 1]);
  385. }
  386. }
  387. out_faces = in_faces_copy;
  388. }
  389. }
  390. };// GAPI_OCV_KERNEL(SwapFaces)
  391. } // anonymous namespace
  392. } // namespace custom
  393. namespace vis {
  394. namespace {
  395. void bbox(const cv::Mat& m, const cv::Rect& rc) {
  396. cv::rectangle(m, rc, cv::Scalar{ 0,255,0 }, 2, cv::LINE_8, 0);
  397. };
  398. using rectPoints = std::pair<cv::Rect, std::vector<cv::Point>>;
  399. static cv::Mat drawRectsAndPoints(const cv::Mat& img,
  400. const std::vector<rectPoints> data) {
  401. cv::Mat outImg;
  402. img.copyTo(outImg);
  403. for (const auto& el : data) {
  404. vis::bbox(outImg, el.first);
  405. auto pts = el.second;
  406. for (size_t i = 0; i < pts.size(); ++i) {
  407. cv::circle(outImg, pts[i], 3, cv::Scalar(0, 255, 255), 1);
  408. }
  409. }
  410. return outImg;
  411. }
  412. } // anonymous namespace
  413. } // namespace vis
  414. //Infer helper function
  415. namespace {
  416. static inline std::tuple<cv::GMat, cv::GMat> run_mtcnn_p(cv::GMat &in, const std::string &id) {
  417. cv::GInferInputs inputs;
  418. inputs["data"] = in;
  419. auto outputs = cv::gapi::infer<cv::gapi::Generic>(id, inputs);
  420. auto regressions = outputs.at("conv4-2");
  421. auto scores = outputs.at("prob1");
  422. return std::make_tuple(regressions, scores);
  423. }
  424. static inline std::string get_pnet_level_name(const cv::Size &in_size) {
  425. return "MTCNNProposal_" + std::to_string(in_size.width) + "x" + std::to_string(in_size.height);
  426. }
  427. int calculate_scales(const cv::Size &input_size, std::vector<double> &out_scales, std::vector<cv::Size> &out_sizes ) {
  428. //calculate multi - scale and limit the maxinum side to 1000
  429. //pr_scale: limit the maxinum side to 1000, < 1.0
  430. double pr_scale = 1.0;
  431. double h = static_cast<double>(input_size.height);
  432. double w = static_cast<double>(input_size.width);
  433. if (std::min(w, h) > 1000)
  434. {
  435. pr_scale = 1000.0 / std::min(h, w);
  436. w = w * pr_scale;
  437. h = h * pr_scale;
  438. }
  439. else if (std::max(w, h) < 1000)
  440. {
  441. w = w * pr_scale;
  442. h = h * pr_scale;
  443. }
  444. //multi - scale
  445. out_scales.clear();
  446. out_sizes.clear();
  447. const double factor = 0.709;
  448. int factor_count = 0;
  449. double minl = std::min(h, w);
  450. while (minl >= 12)
  451. {
  452. const double current_scale = pr_scale * std::pow(factor, factor_count);
  453. cv::Size current_size(static_cast<int>(static_cast<double>(input_size.width) * current_scale),
  454. static_cast<int>(static_cast<double>(input_size.height) * current_scale));
  455. out_scales.push_back(current_scale);
  456. out_sizes.push_back(current_size);
  457. minl *= factor;
  458. factor_count += 1;
  459. }
  460. return factor_count;
  461. }
  462. int calculate_half_scales(const cv::Size &input_size, std::vector<double>& out_scales, std::vector<cv::Size>& out_sizes) {
  463. double pr_scale = 0.5;
  464. const double h = static_cast<double>(input_size.height);
  465. const double w = static_cast<double>(input_size.width);
  466. //multi - scale
  467. out_scales.clear();
  468. out_sizes.clear();
  469. const double factor = 0.5;
  470. int factor_count = 0;
  471. double minl = std::min(h, w);
  472. while (minl >= 12.0*2.0)
  473. {
  474. const double current_scale = pr_scale;
  475. cv::Size current_size(static_cast<int>(static_cast<double>(input_size.width) * current_scale),
  476. static_cast<int>(static_cast<double>(input_size.height) * current_scale));
  477. out_scales.push_back(current_scale);
  478. out_sizes.push_back(current_size);
  479. minl *= factor;
  480. factor_count += 1;
  481. pr_scale *= 0.5;
  482. }
  483. return factor_count;
  484. }
  485. const int MAX_PYRAMID_LEVELS = 13;
  486. //////////////////////////////////////////////////////////////////////
  487. } // anonymous namespace
  488. int main(int argc, char* argv[]) {
  489. cv::CommandLineParser cmd(argc, argv, keys);
  490. cmd.about(about);
  491. if (cmd.has("help")) {
  492. cmd.printMessage();
  493. return 0;
  494. }
  495. const auto input_file_name = cmd.get<std::string>("input");
  496. const auto model_path_p = cmd.get<std::string>("mtcnnpm");
  497. const auto target_dev_p = cmd.get<std::string>("mtcnnpd");
  498. const auto conf_thresh_p = cmd.get<float>("thrp");
  499. const auto model_path_r = cmd.get<std::string>("mtcnnrm");
  500. const auto target_dev_r = cmd.get<std::string>("mtcnnrd");
  501. const auto conf_thresh_r = cmd.get<float>("thrr");
  502. const auto model_path_o = cmd.get<std::string>("mtcnnom");
  503. const auto target_dev_o = cmd.get<std::string>("mtcnnod");
  504. const auto conf_thresh_o = cmd.get<float>("thro");
  505. const auto use_half_scale = cmd.get<bool>("half_scale");
  506. const auto streaming_queue_capacity = cmd.get<unsigned int>("queue_capacity");
  507. std::vector<cv::Size> level_size;
  508. std::vector<double> scales;
  509. //MTCNN input size
  510. cv::VideoCapture cap;
  511. cap.open(input_file_name);
  512. if (!cap.isOpened())
  513. CV_Assert(false);
  514. auto in_rsz = cv::Size{ static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH)),
  515. static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT)) };
  516. //Calculate scales, number of pyramid levels and sizes for PNet pyramid
  517. auto pyramid_levels = use_half_scale ? calculate_half_scales(in_rsz, scales, level_size) :
  518. calculate_scales(in_rsz, scales, level_size);
  519. CV_Assert(pyramid_levels <= MAX_PYRAMID_LEVELS);
  520. //Proposal part of MTCNN graph
  521. //Preprocessing BGR2RGB + transpose (NCWH is expected instead of NCHW)
  522. cv::GMat in_original;
  523. cv::GMat in_originalRGB = cv::gapi::BGR2RGB(in_original);
  524. cv::GMat in_transposedRGB = cv::gapi::transpose(in_originalRGB);
  525. cv::GOpaque<cv::Size> in_sz = cv::gapi::streaming::size(in_original);
  526. cv::GMat regressions[MAX_PYRAMID_LEVELS];
  527. cv::GMat scores[MAX_PYRAMID_LEVELS];
  528. cv::GArray<custom::Face> nms_p_faces[MAX_PYRAMID_LEVELS];
  529. cv::GArray<custom::Face> total_faces[MAX_PYRAMID_LEVELS];
  530. //The very first PNet pyramid layer to init total_faces[0]
  531. std::tie(regressions[0], scores[0]) = run_mtcnn_p(in_transposedRGB, get_pnet_level_name(level_size[0]));
  532. cv::GArray<custom::Face> faces0 = custom::BuildFaces::on(scores[0], regressions[0], static_cast<float>(scales[0]), conf_thresh_p);
  533. cv::GArray<custom::Face> final_p_faces_for_bb2squares = custom::ApplyRegression::on(faces0, true);
  534. cv::GArray<custom::Face> final_faces_pnet0 = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares);
  535. total_faces[0] = custom::RunNMS::on(final_faces_pnet0, 0.5f, false);
  536. //The rest PNet pyramid layers to accumlate all layers result in total_faces[PYRAMID_LEVELS - 1]]
  537. for (int i = 1; i < pyramid_levels; ++i)
  538. {
  539. std::tie(regressions[i], scores[i]) = run_mtcnn_p(in_transposedRGB, get_pnet_level_name(level_size[i]));
  540. cv::GArray<custom::Face> faces = custom::BuildFaces::on(scores[i], regressions[i], static_cast<float>(scales[i]), conf_thresh_p);
  541. cv::GArray<custom::Face> final_p_faces_for_bb2squares_i = custom::ApplyRegression::on(faces, true);
  542. cv::GArray<custom::Face> final_faces_pnet_i = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares_i);
  543. nms_p_faces[i] = custom::RunNMS::on(final_faces_pnet_i, 0.5f, false);
  544. total_faces[i] = custom::AccumulatePyramidOutputs::on(total_faces[i - 1], nms_p_faces[i]);
  545. }
  546. //Proposal post-processing
  547. cv::GArray<custom::Face> final_faces_pnet = custom::RunNMS::on(total_faces[pyramid_levels - 1], 0.7f, true);
  548. //Refinement part of MTCNN graph
  549. cv::GArray<cv::Rect> faces_roi_pnet = custom::R_O_NetPreProcGetROIs::on(final_faces_pnet, in_sz);
  550. cv::GArray<cv::GMat> regressionsRNet, scoresRNet;
  551. std::tie(regressionsRNet, scoresRNet) = cv::gapi::infer<custom::MTCNNRefinement>(faces_roi_pnet, in_transposedRGB);
  552. //Refinement post-processing
  553. cv::GArray<custom::Face> rnet_post_proc_faces = custom::RNetPostProc::on(final_faces_pnet, scoresRNet, regressionsRNet, conf_thresh_r);
  554. cv::GArray<custom::Face> nms07_r_faces_total = custom::RunNMS::on(rnet_post_proc_faces, 0.7f, false);
  555. cv::GArray<custom::Face> final_r_faces_for_bb2squares = custom::ApplyRegression::on(nms07_r_faces_total, true);
  556. cv::GArray<custom::Face> final_faces_rnet = custom::BBoxesToSquares::on(final_r_faces_for_bb2squares);
  557. //Output part of MTCNN graph
  558. cv::GArray<cv::Rect> faces_roi_rnet = custom::R_O_NetPreProcGetROIs::on(final_faces_rnet, in_sz);
  559. cv::GArray<cv::GMat> regressionsONet, scoresONet, landmarksONet;
  560. std::tie(regressionsONet, landmarksONet, scoresONet) = cv::gapi::infer<custom::MTCNNOutput>(faces_roi_rnet, in_transposedRGB);
  561. //Output post-processing
  562. cv::GArray<custom::Face> onet_post_proc_faces = custom::ONetPostProc::on(final_faces_rnet, scoresONet, regressionsONet, landmarksONet, conf_thresh_o);
  563. cv::GArray<custom::Face> final_o_faces_for_nms07 = custom::ApplyRegression::on(onet_post_proc_faces, true);
  564. cv::GArray<custom::Face> nms07_o_faces_total = custom::RunNMS::on(final_o_faces_for_nms07, 0.7f, true);
  565. cv::GArray<custom::Face> final_faces_onet = custom::SwapFaces::on(nms07_o_faces_total);
  566. cv::GComputation graph_mtcnn(cv::GIn(in_original), cv::GOut(cv::gapi::copy(in_original), final_faces_onet));
  567. // MTCNN Refinement detection network
  568. auto mtcnnr_net = cv::gapi::ie::Params<custom::MTCNNRefinement>{
  569. model_path_r, // path to topology IR
  570. weights_path(model_path_r), // path to weights
  571. target_dev_r, // device specifier
  572. }.cfgOutputLayers({ "conv5-2", "prob1" }).cfgInputLayers({ "data" });
  573. // MTCNN Output detection network
  574. auto mtcnno_net = cv::gapi::ie::Params<custom::MTCNNOutput>{
  575. model_path_o, // path to topology IR
  576. weights_path(model_path_o), // path to weights
  577. target_dev_o, // device specifier
  578. }.cfgOutputLayers({ "conv6-2", "conv6-3", "prob1" }).cfgInputLayers({ "data" });
  579. auto networks_mtcnn = cv::gapi::networks(mtcnnr_net, mtcnno_net);
  580. // MTCNN Proposal detection network
  581. for (int i = 0; i < pyramid_levels; ++i)
  582. {
  583. std::string net_id = get_pnet_level_name(level_size[i]);
  584. std::vector<size_t> reshape_dims = { 1, 3, (size_t)level_size[i].width, (size_t)level_size[i].height };
  585. cv::gapi::ie::Params<cv::gapi::Generic> mtcnnp_net{
  586. net_id, // tag
  587. model_path_p, // path to topology IR
  588. weights_path(model_path_p), // path to weights
  589. target_dev_p, // device specifier
  590. };
  591. mtcnnp_net.cfgInputReshape({ {"data", reshape_dims} });
  592. networks_mtcnn += cv::gapi::networks(mtcnnp_net);
  593. }
  594. auto kernels_mtcnn = cv::gapi::kernels< custom::OCVBuildFaces
  595. , custom::OCVRunNMS
  596. , custom::OCVAccumulatePyramidOutputs
  597. , custom::OCVApplyRegression
  598. , custom::OCVBBoxesToSquares
  599. , custom::OCVR_O_NetPreProcGetROIs
  600. , custom::OCVRNetPostProc
  601. , custom::OCVONetPostProc
  602. , custom::OCVSwapFaces
  603. >();
  604. auto mtcnn_args = cv::compile_args(networks_mtcnn, kernels_mtcnn);
  605. if (streaming_queue_capacity != 0)
  606. mtcnn_args += cv::compile_args(cv::gapi::streaming::queue_capacity{ streaming_queue_capacity });
  607. auto pipeline_mtcnn = graph_mtcnn.compileStreaming(std::move(mtcnn_args));
  608. std::cout << "Reading " << input_file_name << std::endl;
  609. // Input stream
  610. auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input_file_name);
  611. // Set the pipeline source & start the pipeline
  612. pipeline_mtcnn.setSource(cv::gin(in_src));
  613. pipeline_mtcnn.start();
  614. // Declare the output data & run the processing loop
  615. cv::TickMeter tm;
  616. cv::Mat image;
  617. std::vector<custom::Face> out_faces;
  618. tm.start();
  619. int frames = 0;
  620. while (pipeline_mtcnn.pull(cv::gout(image, out_faces))) {
  621. frames++;
  622. std::cout << "Final Faces Size " << out_faces.size() << std::endl;
  623. std::vector<vis::rectPoints> data;
  624. // show the image with faces in it
  625. for (const auto& out_face : out_faces) {
  626. std::vector<cv::Point> pts;
  627. for (size_t p = 0; p < NUM_PTS; ++p) {
  628. pts.push_back(
  629. cv::Point(static_cast<int>(out_face.ptsCoords[2 * p]), static_cast<int>(out_face.ptsCoords[2 * p + 1])));
  630. }
  631. auto rect = out_face.bbox.getRect();
  632. auto d = std::make_pair(rect, pts);
  633. data.push_back(d);
  634. }
  635. // Visualize results on the frame
  636. auto resultImg = vis::drawRectsAndPoints(image, data);
  637. tm.stop();
  638. const auto fps_str = std::to_string(frames / tm.getTimeSec()) + " FPS";
  639. cv::putText(resultImg, fps_str, { 0,32 }, cv::FONT_HERSHEY_SIMPLEX, 1.0, { 0,255,0 }, 2);
  640. cv::imshow("Out", resultImg);
  641. cv::waitKey(1);
  642. out_faces.clear();
  643. tm.start();
  644. }
  645. tm.stop();
  646. std::cout << "Processed " << frames << " frames"
  647. << " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
  648. return 0;
  649. }