infer_ie_onnx_hybrid.cpp 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. #include <chrono>
  2. #include <iomanip>
  3. #include "opencv2/imgproc.hpp"
  4. #include "opencv2/highgui.hpp"
  5. #include "opencv2/gapi.hpp"
  6. #include "opencv2/gapi/core.hpp"
  7. #include "opencv2/gapi/imgproc.hpp"
  8. #include "opencv2/gapi/infer.hpp"
  9. #include "opencv2/gapi/infer/ie.hpp"
  10. #include "opencv2/gapi/infer/onnx.hpp"
  11. #include "opencv2/gapi/cpu/gcpukernel.hpp"
  12. #include "opencv2/gapi/streaming/cap.hpp"
  13. namespace {
  14. const std::string keys =
  15. "{ h help | | print this help message }"
  16. "{ input | | Path to an input video file }"
  17. "{ fdm | | IE face detection model IR }"
  18. "{ fdw | | IE face detection model weights }"
  19. "{ fdd | | IE face detection device }"
  20. "{ emom | | ONNX emotions recognition model }"
  21. "{ output | | (Optional) Path to an output video file }"
  22. ;
  23. } // namespace
  24. namespace custom {
  25. G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector");
  26. G_API_NET(Emotions, <cv::GMat(cv::GMat)>, "emotions-recognition");
  27. G_API_OP(PostProc, <cv::GArray<cv::Rect>(cv::GMat, cv::GMat)>, "custom.fd_postproc") {
  28. static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &) {
  29. return cv::empty_array_desc();
  30. }
  31. };
  32. GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
  33. static void run(const cv::Mat &in_ssd_result,
  34. const cv::Mat &in_frame,
  35. std::vector<cv::Rect> &out_faces) {
  36. const int MAX_PROPOSALS = 200;
  37. const int OBJECT_SIZE = 7;
  38. const cv::Size upscale = in_frame.size();
  39. const cv::Rect surface({0,0}, upscale);
  40. out_faces.clear();
  41. const float *data = in_ssd_result.ptr<float>();
  42. for (int i = 0; i < MAX_PROPOSALS; i++) {
  43. const float image_id = data[i * OBJECT_SIZE + 0]; // batch id
  44. const float confidence = data[i * OBJECT_SIZE + 2];
  45. const float rc_left = data[i * OBJECT_SIZE + 3];
  46. const float rc_top = data[i * OBJECT_SIZE + 4];
  47. const float rc_right = data[i * OBJECT_SIZE + 5];
  48. const float rc_bottom = data[i * OBJECT_SIZE + 6];
  49. if (image_id < 0.f) { // indicates end of detections
  50. break;
  51. }
  52. if (confidence < 0.5f) {
  53. continue;
  54. }
  55. cv::Rect rc;
  56. rc.x = static_cast<int>(rc_left * upscale.width);
  57. rc.y = static_cast<int>(rc_top * upscale.height);
  58. rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
  59. rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
  60. out_faces.push_back(rc & surface);
  61. }
  62. }
  63. };
  64. //! [Postproc]
  65. } // namespace custom
  66. namespace labels {
  67. // Labels as defined in
  68. // https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus
  69. //
  70. const std::string emotions[] = {
  71. "neutral", "happiness", "surprise", "sadness", "anger", "disgust", "fear", "contempt"
  72. };
  73. namespace {
  74. template<typename Iter>
  75. std::vector<float> softmax(Iter begin, Iter end) {
  76. std::vector<float> prob(end - begin, 0.f);
  77. std::transform(begin, end, prob.begin(), [](float x) { return std::exp(x); });
  78. float sum = std::accumulate(prob.begin(), prob.end(), 0.0f);
  79. for (int i = 0; i < static_cast<int>(prob.size()); i++)
  80. prob[i] /= sum;
  81. return prob;
  82. }
  83. void DrawResults(cv::Mat &frame,
  84. const std::vector<cv::Rect> &faces,
  85. const std::vector<cv::Mat> &out_emotions) {
  86. CV_Assert(faces.size() == out_emotions.size());
  87. for (auto it = faces.begin(); it != faces.end(); ++it) {
  88. const auto idx = std::distance(faces.begin(), it);
  89. const auto &rc = *it;
  90. const float *emotions_data = out_emotions[idx].ptr<float>();
  91. auto sm = softmax(emotions_data, emotions_data + 8);
  92. const auto emo_id = std::max_element(sm.begin(), sm.end()) - sm.begin();
  93. const int ATTRIB_OFFSET = 15;
  94. cv::rectangle(frame, rc, {0, 255, 0}, 4);
  95. cv::putText(frame, emotions[emo_id],
  96. cv::Point(rc.x, rc.y - ATTRIB_OFFSET),
  97. cv::FONT_HERSHEY_COMPLEX_SMALL,
  98. 1,
  99. cv::Scalar(0, 0, 255));
  100. std::cout << emotions[emo_id] << " at " << rc << std::endl;
  101. }
  102. }
  103. } // anonymous namespace
  104. } // namespace labels
  105. int main(int argc, char *argv[])
  106. {
  107. cv::CommandLineParser cmd(argc, argv, keys);
  108. if (cmd.has("help")) {
  109. cmd.printMessage();
  110. return 0;
  111. }
  112. const std::string input = cmd.get<std::string>("input");
  113. const std::string output = cmd.get<std::string>("output");
  114. // OpenVINO FD parameters here
  115. auto det_net = cv::gapi::ie::Params<custom::Faces> {
  116. cmd.get<std::string>("fdm"), // read cmd args: path to topology IR
  117. cmd.get<std::string>("fdw"), // read cmd args: path to weights
  118. cmd.get<std::string>("fdd"), // read cmd args: device specifier
  119. };
  120. // ONNX Emotions parameters here
  121. auto emo_net = cv::gapi::onnx::Params<custom::Emotions> {
  122. cmd.get<std::string>("emom"), // read cmd args: path to the ONNX model
  123. }.cfgNormalize({false}); // model accepts 0..255 range in FP32
  124. auto kernels = cv::gapi::kernels<custom::OCVPostProc>();
  125. auto networks = cv::gapi::networks(det_net, emo_net);
  126. cv::GMat in;
  127. cv::GMat bgr = cv::gapi::copy(in);
  128. cv::GMat frame = cv::gapi::streaming::desync(bgr);
  129. cv::GMat detections = cv::gapi::infer<custom::Faces>(frame);
  130. cv::GArray<cv::Rect> faces = custom::PostProc::on(detections, frame);
  131. cv::GArray<cv::GMat> emotions = cv::gapi::infer<custom::Emotions>(faces, frame);
  132. auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(bgr, faces, emotions))
  133. .compileStreaming(cv::compile_args(kernels, networks));
  134. auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input);
  135. pipeline.setSource(cv::gin(in_src));
  136. cv::util::optional<cv::Mat> out_frame;
  137. cv::util::optional<std::vector<cv::Rect>> out_faces;
  138. cv::util::optional<std::vector<cv::Mat>> out_emotions;
  139. cv::Mat last_mat;
  140. std::vector<cv::Rect> last_faces;
  141. std::vector<cv::Mat> last_emotions;
  142. cv::VideoWriter writer;
  143. cv::TickMeter tm;
  144. std::size_t frames = 0u;
  145. tm.start();
  146. pipeline.start();
  147. while (pipeline.pull(cv::gout(out_frame, out_faces, out_emotions))) {
  148. ++frames;
  149. if (out_faces && out_emotions) {
  150. last_faces = *out_faces;
  151. last_emotions = *out_emotions;
  152. }
  153. if (out_frame) {
  154. last_mat = *out_frame;
  155. labels::DrawResults(last_mat, last_faces, last_emotions);
  156. if (!output.empty()) {
  157. if (!writer.isOpened()) {
  158. const auto sz = cv::Size{last_mat.cols, last_mat.rows};
  159. writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
  160. CV_Assert(writer.isOpened());
  161. }
  162. writer << last_mat;
  163. }
  164. }
  165. if (!last_mat.empty()) {
  166. cv::imshow("Out", last_mat);
  167. cv::waitKey(1);
  168. }
  169. }
  170. tm.stop();
  171. std::cout << "Processed " << frames << " frames" << " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
  172. return 0;
  173. }