face_detect.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. #include <opencv2/dnn.hpp>
  2. #include <opencv2/imgproc.hpp>
  3. #include <opencv2/highgui.hpp>
  4. #include <opencv2/objdetect.hpp>
  5. #include <iostream>
  6. using namespace cv;
  7. using namespace std;
  8. static
  9. void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2)
  10. {
  11. std::string fpsString = cv::format("FPS : %.2f", (float)fps);
  12. if (frame >= 0)
  13. cout << "Frame " << frame << ", ";
  14. cout << "FPS: " << fpsString << endl;
  15. for (int i = 0; i < faces.rows; i++)
  16. {
  17. // Print results
  18. cout << "Face " << i
  19. << ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
  20. << "box width: " << faces.at<float>(i, 2) << ", box height: " << faces.at<float>(i, 3) << ", "
  21. << "score: " << cv::format("%.2f", faces.at<float>(i, 14))
  22. << endl;
  23. // Draw bounding box
  24. rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
  25. // Draw landmarks
  26. circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);
  27. circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness);
  28. circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness);
  29. circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);
  30. circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness);
  31. }
  32. putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
  33. }
  34. int main(int argc, char** argv)
  35. {
  36. CommandLineParser parser(argc, argv,
  37. "{help h | | Print this message}"
  38. "{image1 i1 | | Path to the input image1. Omit for detecting through VideoCapture}"
  39. "{image2 i2 | | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}"
  40. "{video v | 0 | Path to the input video}"
  41. "{scale sc | 1.0 | Scale factor used to resize input video frames}"
  42. "{fd_model fd | face_detection_yunet_2021dec.onnx| Path to the model. Download yunet.onnx in https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet}"
  43. "{fr_model fr | face_recognition_sface_2021dec.onnx | Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface}"
  44. "{score_threshold | 0.9 | Filter out faces of score < score_threshold}"
  45. "{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold}"
  46. "{top_k | 5000 | Keep top_k bounding boxes before NMS}"
  47. "{save s | false | Set true to save results. This flag is invalid when using camera}"
  48. );
  49. if (parser.has("help"))
  50. {
  51. parser.printMessage();
  52. return 0;
  53. }
  54. String fd_modelPath = parser.get<String>("fd_model");
  55. String fr_modelPath = parser.get<String>("fr_model");
  56. float scoreThreshold = parser.get<float>("score_threshold");
  57. float nmsThreshold = parser.get<float>("nms_threshold");
  58. int topK = parser.get<int>("top_k");
  59. bool save = parser.get<bool>("save");
  60. float scale = parser.get<float>("scale");
  61. double cosine_similar_thresh = 0.363;
  62. double l2norm_similar_thresh = 1.128;
  63. //! [initialize_FaceDetectorYN]
  64. // Initialize FaceDetectorYN
  65. Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
  66. //! [initialize_FaceDetectorYN]
  67. TickMeter tm;
  68. // If input is an image
  69. if (parser.has("image1"))
  70. {
  71. String input1 = parser.get<String>("image1");
  72. Mat image1 = imread(samples::findFile(input1));
  73. if (image1.empty())
  74. {
  75. std::cerr << "Cannot read image: " << input1 << std::endl;
  76. return 2;
  77. }
  78. int imageWidth = int(image1.cols * scale);
  79. int imageHeight = int(image1.rows * scale);
  80. resize(image1, image1, Size(imageWidth, imageHeight));
  81. tm.start();
  82. //! [inference]
  83. // Set input size before inference
  84. detector->setInputSize(image1.size());
  85. Mat faces1;
  86. detector->detect(image1, faces1);
  87. if (faces1.rows < 1)
  88. {
  89. std::cerr << "Cannot find a face in " << input1 << std::endl;
  90. return 1;
  91. }
  92. //! [inference]
  93. tm.stop();
  94. // Draw results on the input image
  95. visualize(image1, -1, faces1, tm.getFPS());
  96. // Save results if save is true
  97. if (save)
  98. {
  99. cout << "Saving result.jpg...\n";
  100. imwrite("result.jpg", image1);
  101. }
  102. // Visualize results
  103. imshow("image1", image1);
  104. pollKey(); // handle UI events to show content
  105. if (parser.has("image2"))
  106. {
  107. String input2 = parser.get<String>("image2");
  108. Mat image2 = imread(samples::findFile(input2));
  109. if (image2.empty())
  110. {
  111. std::cerr << "Cannot read image2: " << input2 << std::endl;
  112. return 2;
  113. }
  114. tm.reset();
  115. tm.start();
  116. detector->setInputSize(image2.size());
  117. Mat faces2;
  118. detector->detect(image2, faces2);
  119. if (faces2.rows < 1)
  120. {
  121. std::cerr << "Cannot find a face in " << input2 << std::endl;
  122. return 1;
  123. }
  124. tm.stop();
  125. visualize(image2, -1, faces2, tm.getFPS());
  126. if (save)
  127. {
  128. cout << "Saving result2.jpg...\n";
  129. imwrite("result2.jpg", image2);
  130. }
  131. imshow("image2", image2);
  132. pollKey();
  133. //! [initialize_FaceRecognizerSF]
  134. // Initialize FaceRecognizerSF
  135. Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");
  136. //! [initialize_FaceRecognizerSF]
  137. //! [facerecognizer]
  138. // Aligning and cropping facial image through the first face of faces detected.
  139. Mat aligned_face1, aligned_face2;
  140. faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
  141. faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
  142. // Run feature extraction with given aligned_face
  143. Mat feature1, feature2;
  144. faceRecognizer->feature(aligned_face1, feature1);
  145. feature1 = feature1.clone();
  146. faceRecognizer->feature(aligned_face2, feature2);
  147. feature2 = feature2.clone();
  148. //! [facerecognizer]
  149. //! [match]
  150. double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
  151. double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
  152. //! [match]
  153. if (cos_score >= cosine_similar_thresh)
  154. {
  155. std::cout << "They have the same identity;";
  156. }
  157. else
  158. {
  159. std::cout << "They have different identities;";
  160. }
  161. std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
  162. if (L2_score <= l2norm_similar_thresh)
  163. {
  164. std::cout << "They have the same identity;";
  165. }
  166. else
  167. {
  168. std::cout << "They have different identities.";
  169. }
  170. std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
  171. }
  172. cout << "Press any key to exit..." << endl;
  173. waitKey(0);
  174. }
  175. else
  176. {
  177. int frameWidth, frameHeight;
  178. VideoCapture capture;
  179. std::string video = parser.get<string>("video");
  180. if (video.size() == 1 && isdigit(video[0]))
  181. capture.open(parser.get<int>("video"));
  182. else
  183. capture.open(samples::findFileOrKeep(video)); // keep GStreamer pipelines
  184. if (capture.isOpened())
  185. {
  186. frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale);
  187. frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale);
  188. cout << "Video " << video
  189. << ": width=" << frameWidth
  190. << ", height=" << frameHeight
  191. << endl;
  192. }
  193. else
  194. {
  195. cout << "Could not initialize video capturing: " << video << "\n";
  196. return 1;
  197. }
  198. detector->setInputSize(Size(frameWidth, frameHeight));
  199. cout << "Press 'SPACE' to save frame, any other key to exit..." << endl;
  200. int nFrame = 0;
  201. for (;;)
  202. {
  203. // Get frame
  204. Mat frame;
  205. if (!capture.read(frame))
  206. {
  207. cerr << "Can't grab frame! Stop\n";
  208. break;
  209. }
  210. resize(frame, frame, Size(frameWidth, frameHeight));
  211. // Inference
  212. Mat faces;
  213. tm.start();
  214. detector->detect(frame, faces);
  215. tm.stop();
  216. Mat result = frame.clone();
  217. // Draw results on the input image
  218. visualize(result, nFrame, faces, tm.getFPS());
  219. // Visualize results
  220. imshow("Live", result);
  221. int key = waitKey(1);
  222. bool saveFrame = save;
  223. if (key == ' ')
  224. {
  225. saveFrame = true;
  226. key = 0; // handled
  227. }
  228. if (saveFrame)
  229. {
  230. std::string frame_name = cv::format("frame_%05d.png", nFrame);
  231. std::string result_name = cv::format("result_%05d.jpg", nFrame);
  232. cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n";
  233. imwrite(frame_name, frame);
  234. imwrite(result_name, result);
  235. }
  236. ++nFrame;
  237. if (key > 0)
  238. break;
  239. }
  240. cout << "Processed " << nFrame << " frames" << endl;
  241. }
  242. cout << "Done." << endl;
  243. return 0;
  244. }