test_model.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. // This file is part of OpenCV project.
  2. // It is subject to the license terms in the LICENSE file found in the top-level directory
  3. // of this distribution and at http://opencv.org/license.html.
  4. #include "test_precomp.hpp"
  5. #include <opencv2/dnn/shape_utils.hpp>
  6. #include "npy_blob.hpp"
  7. namespace opencv_test { namespace {
  8. template<typename TString>
  9. static std::string _tf(TString filename, bool required = true)
  10. {
  11. String rootFolder = "dnn/";
  12. return findDataFile(rootFolder + filename, required);
  13. }
  14. class Test_Model : public DNNTestLayer
  15. {
  16. public:
  17. void testDetectModel(const std::string& weights, const std::string& cfg,
  18. const std::string& imgPath, const std::vector<int>& refClassIds,
  19. const std::vector<float>& refConfidences,
  20. const std::vector<Rect2d>& refBoxes,
  21. double scoreDiff, double iouDiff,
  22. double confThreshold = 0.24, double nmsThreshold = 0.0,
  23. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  24. double scale = 1.0, bool swapRB = false, bool crop = false,
  25. bool nmsAcrossClasses = false)
  26. {
  27. checkBackend();
  28. Mat frame = imread(imgPath);
  29. DetectionModel model(weights, cfg);
  30. model.setInputSize(size).setInputMean(mean).setInputScale(scale)
  31. .setInputSwapRB(swapRB).setInputCrop(crop);
  32. model.setPreferableBackend(backend);
  33. model.setPreferableTarget(target);
  34. model.setNmsAcrossClasses(nmsAcrossClasses);
  35. std::vector<int> classIds;
  36. std::vector<float> confidences;
  37. std::vector<Rect> boxes;
  38. model.detect(frame, classIds, confidences, boxes, confThreshold, nmsThreshold);
  39. std::vector<Rect2d> boxesDouble(boxes.size());
  40. for (int i = 0; i < boxes.size(); i++) {
  41. boxesDouble[i] = boxes[i];
  42. }
  43. normAssertDetections(refClassIds, refConfidences, refBoxes, classIds,
  44. confidences, boxesDouble, "",
  45. confThreshold, scoreDiff, iouDiff);
  46. }
  47. void testClassifyModel(const std::string& weights, const std::string& cfg,
  48. const std::string& imgPath, std::pair<int, float> ref, float norm,
  49. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  50. double scale = 1.0, bool swapRB = false, bool crop = false)
  51. {
  52. checkBackend();
  53. Mat frame = imread(imgPath);
  54. ClassificationModel model(weights, cfg);
  55. model.setInputSize(size).setInputMean(mean).setInputScale(scale)
  56. .setInputSwapRB(swapRB).setInputCrop(crop);
  57. std::pair<int, float> prediction = model.classify(frame);
  58. EXPECT_EQ(prediction.first, ref.first);
  59. ASSERT_NEAR(prediction.second, ref.second, norm);
  60. }
  61. void testKeypointsModel(const std::string& weights, const std::string& cfg,
  62. const Mat& frame, const Mat& exp, float norm,
  63. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  64. double scale = 1.0, bool swapRB = false, bool crop = false)
  65. {
  66. checkBackend();
  67. std::vector<Point2f> points;
  68. KeypointsModel model(weights, cfg);
  69. model.setInputSize(size).setInputMean(mean).setInputScale(scale)
  70. .setInputSwapRB(swapRB).setInputCrop(crop);
  71. model.setPreferableBackend(backend);
  72. model.setPreferableTarget(target);
  73. points = model.estimate(frame, 0.5);
  74. Mat out = Mat(points).reshape(1);
  75. normAssert(exp, out, "", norm, norm);
  76. }
  77. void testSegmentationModel(const std::string& weights_file, const std::string& config_file,
  78. const std::string& inImgPath, const std::string& outImgPath,
  79. float norm, const Size& size = {-1, -1}, Scalar mean = Scalar(),
  80. double scale = 1.0, bool swapRB = false, bool crop = false)
  81. {
  82. checkBackend();
  83. Mat frame = imread(inImgPath);
  84. Mat mask;
  85. Mat exp = imread(outImgPath, 0);
  86. SegmentationModel model(weights_file, config_file);
  87. model.setInputSize(size).setInputMean(mean).setInputScale(scale)
  88. .setInputSwapRB(swapRB).setInputCrop(crop);
  89. model.segment(frame, mask);
  90. normAssert(mask, exp, "", norm, norm);
  91. }
  92. void testTextRecognitionModel(const std::string& weights, const std::string& cfg,
  93. const std::string& imgPath, const std::string& seq,
  94. const std::string& decodeType, const std::vector<std::string>& vocabulary,
  95. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  96. double scale = 1.0, bool swapRB = false, bool crop = false)
  97. {
  98. checkBackend();
  99. Mat frame = imread(imgPath, IMREAD_GRAYSCALE);
  100. TextRecognitionModel model(weights, cfg);
  101. model.setDecodeType(decodeType)
  102. .setVocabulary(vocabulary)
  103. .setInputSize(size).setInputMean(mean).setInputScale(scale)
  104. .setInputSwapRB(swapRB).setInputCrop(crop);
  105. model.setPreferableBackend(backend);
  106. model.setPreferableTarget(target);
  107. std::string result = model.recognize(frame);
  108. EXPECT_EQ(result, seq) << "Full frame: " << imgPath;
  109. std::vector<Rect> rois;
  110. rois.push_back(Rect(0, 0, frame.cols, frame.rows));
  111. rois.push_back(Rect(0, 0, frame.cols, frame.rows)); // twice
  112. std::vector<std::string> results;
  113. model.recognize(frame, rois, results);
  114. EXPECT_EQ((size_t)2u, results.size()) << "ROI: " << imgPath;
  115. EXPECT_EQ(results[0], seq) << "ROI[0]: " << imgPath;
  116. EXPECT_EQ(results[1], seq) << "ROI[1]: " << imgPath;
  117. }
  118. void testTextDetectionModelByDB(const std::string& weights, const std::string& cfg,
  119. const std::string& imgPath, const std::vector<std::vector<Point>>& gt,
  120. float binThresh, float polyThresh,
  121. uint maxCandidates, double unclipRatio,
  122. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  123. double scale = 1.0, bool swapRB = false, bool crop = false)
  124. {
  125. checkBackend();
  126. Mat frame = imread(imgPath);
  127. TextDetectionModel_DB model(weights, cfg);
  128. model.setBinaryThreshold(binThresh)
  129. .setPolygonThreshold(polyThresh)
  130. .setUnclipRatio(unclipRatio)
  131. .setMaxCandidates(maxCandidates)
  132. .setInputSize(size).setInputMean(mean).setInputScale(scale)
  133. .setInputSwapRB(swapRB).setInputCrop(crop);
  134. model.setPreferableBackend(backend);
  135. model.setPreferableTarget(target);
  136. // 1. Check common TextDetectionModel API through RotatedRect
  137. std::vector<cv::RotatedRect> results;
  138. model.detectTextRectangles(frame, results);
  139. EXPECT_GT(results.size(), (size_t)0);
  140. std::vector< std::vector<Point> > contours;
  141. for (size_t i = 0; i < results.size(); i++)
  142. {
  143. const RotatedRect& box = results[i];
  144. Mat contour;
  145. boxPoints(box, contour);
  146. std::vector<Point> contour2i(4);
  147. for (int i = 0; i < 4; i++)
  148. {
  149. contour2i[i].x = cvRound(contour.at<float>(i, 0));
  150. contour2i[i].y = cvRound(contour.at<float>(i, 1));
  151. }
  152. contours.push_back(contour2i);
  153. }
  154. #if 0 // test debug
  155. Mat result = frame.clone();
  156. drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
  157. imshow("result", result); // imwrite("result.png", result);
  158. waitKey(0);
  159. #endif
  160. normAssertTextDetections(gt, contours, "", 0.05f);
  161. // 2. Check quadrangle-based API
  162. // std::vector< std::vector<Point> > contours;
  163. model.detect(frame, contours);
  164. #if 0 // test debug
  165. Mat result = frame.clone();
  166. drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
  167. imshow("result_contours", result); // imwrite("result_contours.png", result);
  168. waitKey(0);
  169. #endif
  170. normAssertTextDetections(gt, contours, "", 0.05f);
  171. }
  172. void testTextDetectionModelByEAST(
  173. const std::string& weights, const std::string& cfg,
  174. const std::string& imgPath, const std::vector<RotatedRect>& gt,
  175. float confThresh, float nmsThresh,
  176. const Size& size = {-1, -1}, Scalar mean = Scalar(),
  177. double scale = 1.0, bool swapRB = false, bool crop = false,
  178. double eps_center = 5/*pixels*/, double eps_size = 5/*pixels*/, double eps_angle = 1
  179. )
  180. {
  181. checkBackend();
  182. Mat frame = imread(imgPath);
  183. TextDetectionModel_EAST model(weights, cfg);
  184. model.setConfidenceThreshold(confThresh)
  185. .setNMSThreshold(nmsThresh)
  186. .setInputSize(size).setInputMean(mean).setInputScale(scale)
  187. .setInputSwapRB(swapRB).setInputCrop(crop);
  188. model.setPreferableBackend(backend);
  189. model.setPreferableTarget(target);
  190. std::vector<cv::RotatedRect> results;
  191. model.detectTextRectangles(frame, results);
  192. EXPECT_EQ(results.size(), (size_t)1);
  193. for (size_t i = 0; i < results.size(); i++)
  194. {
  195. const RotatedRect& box = results[i];
  196. #if 0 // test debug
  197. Mat contour;
  198. boxPoints(box, contour);
  199. std::vector<Point> contour2i(4);
  200. for (int i = 0; i < 4; i++)
  201. {
  202. contour2i[i].x = cvRound(contour.at<float>(i, 0));
  203. contour2i[i].y = cvRound(contour.at<float>(i, 1));
  204. }
  205. std::vector< std::vector<Point> > contours;
  206. contours.push_back(contour2i);
  207. Mat result = frame.clone();
  208. drawContours(result, contours, -1, Scalar(0, 0, 255), 1);
  209. imshow("result", result); //imwrite("result.png", result);
  210. waitKey(0);
  211. #endif
  212. const RotatedRect& gtBox = gt[i];
  213. EXPECT_NEAR(box.center.x, gtBox.center.x, eps_center);
  214. EXPECT_NEAR(box.center.y, gtBox.center.y, eps_center);
  215. EXPECT_NEAR(box.size.width, gtBox.size.width, eps_size);
  216. EXPECT_NEAR(box.size.height, gtBox.size.height, eps_size);
  217. EXPECT_NEAR(box.angle, gtBox.angle, eps_angle);
  218. }
  219. }
  220. };
  221. TEST_P(Test_Model, Classify)
  222. {
  223. std::pair<int, float> ref(652, 0.641789);
  224. std::string img_path = _tf("grace_hopper_227.png");
  225. std::string config_file = _tf("bvlc_alexnet.prototxt");
  226. std::string weights_file = _tf("bvlc_alexnet.caffemodel", false);
  227. Size size{227, 227};
  228. float norm = 1e-4;
  229. testClassifyModel(weights_file, config_file, img_path, ref, norm, size);
  230. }
  231. TEST_P(Test_Model, DetectRegion)
  232. {
  233. applyTestTag(
  234. CV_TEST_TAG_LONG,
  235. CV_TEST_TAG_MEMORY_2GB
  236. );
  237. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  238. // accuracy
  239. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  240. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  241. #endif
  242. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
  243. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
  244. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  245. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  246. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  247. #endif
  248. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
  249. // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused
  250. if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
  251. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
  252. #endif
  253. #if defined(INF_ENGINE_RELEASE)
  254. if (target == DNN_TARGET_MYRIAD
  255. && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
  256. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
  257. #endif
  258. std::vector<int> refClassIds = {6, 1, 11};
  259. std::vector<float> refConfidences = {0.750469f, 0.780879f, 0.901615f};
  260. std::vector<Rect2d> refBoxes = {Rect2d(240, 53, 135, 72),
  261. Rect2d(112, 109, 192, 200),
  262. Rect2d(58, 141, 117, 249)};
  263. std::string img_path = _tf("dog416.png");
  264. std::string weights_file = _tf("yolo-voc.weights", false);
  265. std::string config_file = _tf("yolo-voc.cfg");
  266. double scale = 1.0 / 255.0;
  267. Size size{416, 416};
  268. bool swapRB = true;
  269. double confThreshold = 0.24;
  270. double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.397 : 0.4;
  271. double scoreDiff = 8e-5, iouDiff = 1e-5;
  272. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16)
  273. {
  274. scoreDiff = 1e-2;
  275. iouDiff = 1.6e-2;
  276. }
  277. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
  278. refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
  279. Scalar(), scale, swapRB);
  280. }
  281. TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses)
  282. {
  283. applyTestTag(
  284. CV_TEST_TAG_LONG,
  285. CV_TEST_TAG_MEMORY_2GB
  286. );
  287. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  288. // accuracy
  289. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  290. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  291. #endif
  292. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
  293. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
  294. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  295. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
  296. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  297. #endif
  298. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
  299. if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
  300. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
  301. #endif
  302. #if defined(INF_ENGINE_RELEASE)
  303. if (target == DNN_TARGET_MYRIAD
  304. && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
  305. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
  306. #endif
  307. std::vector<int> refClassIds = { 6, 11 };
  308. std::vector<float> refConfidences = { 0.750469f, 0.901615f };
  309. std::vector<Rect2d> refBoxes = { Rect2d(240, 53, 135, 72),
  310. Rect2d(58, 141, 117, 249) };
  311. std::string img_path = _tf("dog416.png");
  312. std::string weights_file = _tf("yolo-voc.weights", false);
  313. std::string config_file = _tf("yolo-voc.cfg");
  314. double scale = 1.0 / 255.0;
  315. Size size{ 416, 416 };
  316. bool swapRB = true;
  317. bool crop = false;
  318. bool nmsAcrossClasses = true;
  319. double confThreshold = 0.24;
  320. double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15;
  321. double scoreDiff = 8e-5, iouDiff = 1e-5;
  322. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16)
  323. {
  324. scoreDiff = 1e-2;
  325. iouDiff = 1.6e-2;
  326. }
  327. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
  328. refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
  329. Scalar(), scale, swapRB, crop,
  330. nmsAcrossClasses);
  331. }
  332. TEST_P(Test_Model, DetectionOutput)
  333. {
  334. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  335. // Exception: Function contains several inputs and outputs with one friendly name! (HETERO bug?)
  336. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU)
  337. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  338. #endif
  339. #if defined(INF_ENGINE_RELEASE)
  340. // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused
  341. if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
  342. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
  343. if (target == DNN_TARGET_MYRIAD)
  344. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
  345. #endif
  346. std::vector<int> refClassIds = {7, 12};
  347. std::vector<float> refConfidences = {0.991359f, 0.94786f};
  348. std::vector<Rect2d> refBoxes = {Rect2d(491, 81, 212, 98),
  349. Rect2d(132, 223, 207, 344)};
  350. std::string img_path = _tf("dog416.png");
  351. std::string weights_file = _tf("resnet50_rfcn_final.caffemodel", false);
  352. std::string config_file = _tf("rfcn_pascal_voc_resnet50.prototxt");
  353. Scalar mean = Scalar(102.9801, 115.9465, 122.7717);
  354. Size size{800, 600};
  355. double scoreDiff = default_l1, iouDiff = 1e-5;
  356. float confThreshold = 0.8;
  357. double nmsThreshold = 0.0;
  358. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CUDA_FP16)
  359. {
  360. if (backend == DNN_BACKEND_OPENCV)
  361. scoreDiff = 4e-3;
  362. else
  363. scoreDiff = 2e-2;
  364. iouDiff = 1.8e-1;
  365. }
  366. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
  367. scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);
  368. }
  369. TEST_P(Test_Model, DetectionMobilenetSSD)
  370. {
  371. Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));
  372. ref = ref.reshape(1, ref.size[2]);
  373. std::string img_path = _tf("street.png");
  374. Mat frame = imread(img_path);
  375. int frameWidth = frame.cols;
  376. int frameHeight = frame.rows;
  377. std::vector<int> refClassIds;
  378. std::vector<float> refConfidences;
  379. std::vector<Rect2d> refBoxes;
  380. for (int i = 0; i < ref.rows; i++)
  381. {
  382. refClassIds.emplace_back(ref.at<float>(i, 1));
  383. refConfidences.emplace_back(ref.at<float>(i, 2));
  384. int left = ref.at<float>(i, 3) * frameWidth;
  385. int top = ref.at<float>(i, 4) * frameHeight;
  386. int right = ref.at<float>(i, 5) * frameWidth;
  387. int bottom = ref.at<float>(i, 6) * frameHeight;
  388. int width = right - left + 1;
  389. int height = bottom - top + 1;
  390. refBoxes.emplace_back(left, top, width, height);
  391. }
  392. std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false);
  393. std::string config_file = _tf("MobileNetSSD_deploy.prototxt");
  394. Scalar mean = Scalar(127.5, 127.5, 127.5);
  395. double scale = 1.0 / 127.5;
  396. Size size{300, 300};
  397. double scoreDiff = 1e-5, iouDiff = 1e-5;
  398. if (target == DNN_TARGET_OPENCL_FP16)
  399. {
  400. scoreDiff = 1.7e-2;
  401. iouDiff = 6.91e-2;
  402. }
  403. else if (target == DNN_TARGET_MYRIAD)
  404. {
  405. scoreDiff = 0.017;
  406. if (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
  407. iouDiff = 0.1;
  408. }
  409. else if (target == DNN_TARGET_CUDA_FP16)
  410. {
  411. scoreDiff = 0.002;
  412. iouDiff = 1e-2;
  413. }
  414. float confThreshold = FLT_MIN;
  415. double nmsThreshold = 0.0;
  416. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
  417. scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale);
  418. }
  419. TEST_P(Test_Model, Keypoints_pose)
  420. {
  421. if (target == DNN_TARGET_OPENCL_FP16)
  422. applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
  423. #ifdef HAVE_INF_ENGINE
  424. if (target == DNN_TARGET_MYRIAD)
  425. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  426. #endif
  427. Mat inp = imread(_tf("pose.png"));
  428. std::string weights = _tf("onnx/models/lightweight_pose_estimation_201912.onnx", false);
  429. float kpdata[] = {
  430. 237.65625f, 78.25f, 237.65625f, 136.9375f,
  431. 190.125f, 136.9375f, 142.59375f, 195.625f, 79.21875f, 176.0625f, 285.1875f, 117.375f,
  432. 348.5625f, 195.625f, 396.09375f, 176.0625f, 205.96875f, 313.0f, 205.96875f, 430.375f,
  433. 205.96875f, 528.1875f, 269.34375f, 293.4375f, 253.5f, 430.375f, 237.65625f, 528.1875f,
  434. 221.8125f, 58.6875f, 253.5f, 58.6875f, 205.96875f, 78.25f, 253.5f, 58.6875f
  435. };
  436. Mat exp(18, 2, CV_32FC1, kpdata);
  437. Size size{256, 256};
  438. float norm = 1e-4;
  439. double scale = 1.0/255;
  440. Scalar mean = Scalar(128, 128, 128);
  441. bool swapRB = false;
  442. // Ref. Range: [58.6875, 508.625]
  443. if (target == DNN_TARGET_CUDA_FP16)
  444. norm = 20; // l1 = 1.5, lInf = 20
  445. testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB);
  446. }
  447. TEST_P(Test_Model, Keypoints_face)
  448. {
  449. #if defined(INF_ENGINE_RELEASE)
  450. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
  451. applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
  452. #endif
  453. Mat inp = imread(_tf("gray_face.png"), 0);
  454. std::string weights = _tf("onnx/models/facial_keypoints.onnx", false);
  455. Mat exp = blobFromNPY(_tf("facial_keypoints_exp.npy"));
  456. Size size{224, 224};
  457. double scale = 1.0/255;
  458. Scalar mean = Scalar();
  459. bool swapRB = false;
  460. // Ref. Range: [-1.1784188, 1.7758257]
  461. float norm = 1e-4;
  462. if (target == DNN_TARGET_OPENCL_FP16)
  463. norm = 5e-3;
  464. if (target == DNN_TARGET_MYRIAD)
  465. {
  466. // Myriad2: l1 = 0.0004, lInf = 0.002
  467. // MyriadX: l1 = 0.003, lInf = 0.009
  468. norm = 0.009;
  469. }
  470. if (target == DNN_TARGET_CUDA_FP16)
  471. norm = 0.004; // l1 = 0.0006, lInf = 0.004
  472. testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB);
  473. }
  474. TEST_P(Test_Model, Detection_normalized)
  475. {
  476. std::string img_path = _tf("grace_hopper_227.png");
  477. std::vector<int> refClassIds = {15};
  478. std::vector<float> refConfidences = {0.999222f};
  479. std::vector<Rect2d> refBoxes = {Rect2d(0, 4, 227, 222)};
  480. std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false);
  481. std::string config_file = _tf("MobileNetSSD_deploy.prototxt");
  482. Scalar mean = Scalar(127.5, 127.5, 127.5);
  483. double scale = 1.0 / 127.5;
  484. Size size{300, 300};
  485. double scoreDiff = 1e-5, iouDiff = 1e-5;
  486. float confThreshold = FLT_MIN;
  487. double nmsThreshold = 0.0;
  488. if (target == DNN_TARGET_CUDA)
  489. {
  490. scoreDiff = 3e-4;
  491. iouDiff = 0.018;
  492. }
  493. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16)
  494. {
  495. scoreDiff = 5e-3;
  496. iouDiff = 0.09;
  497. }
  498. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020040000)
  499. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
  500. {
  501. scoreDiff = 0.02;
  502. iouDiff = 0.1f;
  503. }
  504. #endif
  505. testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
  506. scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale);
  507. }
  508. TEST_P(Test_Model, Segmentation)
  509. {
  510. applyTestTag(
  511. CV_TEST_TAG_MEMORY_2GB
  512. );
  513. std::string inp = _tf("dog416.png");
  514. std::string weights_file = _tf("fcn8s-heavy-pascal.prototxt");
  515. std::string config_file = _tf("fcn8s-heavy-pascal.caffemodel", false);
  516. std::string exp = _tf("segmentation_exp.png");
  517. Size size{128, 128};
  518. float norm = 0;
  519. double scale = 1.0;
  520. Scalar mean = Scalar();
  521. bool swapRB = false;
  522. testSegmentationModel(weights_file, config_file, inp, exp, norm, size, mean, scale, swapRB);
  523. }
  524. TEST_P(Test_Model, TextRecognition)
  525. {
  526. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  527. // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape
  528. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
  529. applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
  530. CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
  531. );
  532. #endif
  533. std::string imgPath = _tf("text_rec_test.png");
  534. std::string weightPath = _tf("onnx/models/crnn.onnx", false);
  535. std::string seq = "welcome";
  536. Size size{100, 32};
  537. double scale = 1.0 / 127.5;
  538. Scalar mean = Scalar(127.5);
  539. std::string decodeType = "CTC-greedy";
  540. std::vector<std::string> vocabulary = {"0","1","2","3","4","5","6","7","8","9",
  541. "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
  542. testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale);
  543. }
  544. TEST_P(Test_Model, TextRecognitionWithCTCPrefixBeamSearch)
  545. {
  546. #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
  547. // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape
  548. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
  549. applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
  550. CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
  551. );
  552. #endif
  553. std::string imgPath = _tf("text_rec_test.png");
  554. std::string weightPath = _tf("onnx/models/crnn.onnx", false);
  555. std::string seq = "welcome";
  556. Size size{100, 32};
  557. double scale = 1.0 / 127.5;
  558. Scalar mean = Scalar(127.5);
  559. std::string decodeType = "CTC-prefix-beam-search";
  560. std::vector<std::string> vocabulary = {"0","1","2","3","4","5","6","7","8","9",
  561. "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
  562. testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale);
  563. }
  564. TEST_P(Test_Model, TextDetectionByDB)
  565. {
  566. if (target == DNN_TARGET_OPENCL_FP16)
  567. applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
  568. std::string imgPath = _tf("text_det_test1.png");
  569. std::string weightPath = _tf("onnx/models/DB_TD500_resnet50.onnx", false);
  570. // GroundTruth
  571. std::vector<std::vector<Point>> gt = {
  572. { Point(142, 193), Point(136, 164), Point(213, 150), Point(219, 178) },
  573. { Point(136, 165), Point(122, 114), Point(319, 71), Point(330, 122) }
  574. };
  575. Size size{736, 736};
  576. double scale = 1.0 / 255.0;
  577. Scalar mean = Scalar(122.67891434, 116.66876762, 104.00698793);
  578. float binThresh = 0.3;
  579. float polyThresh = 0.5;
  580. uint maxCandidates = 200;
  581. double unclipRatio = 2.0;
  582. testTextDetectionModelByDB(weightPath, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, mean, scale);
  583. }
  584. TEST_P(Test_Model, TextDetectionByEAST)
  585. {
  586. std::string imgPath = _tf("text_det_test2.jpg");
  587. std::string weightPath = _tf("frozen_east_text_detection.pb", false);
  588. // GroundTruth
  589. std::vector<RotatedRect> gt = {
  590. RotatedRect(Point2f(657.55f, 409.5f), Size2f(316.84f, 62.45f), -4.79)
  591. };
  592. // Model parameters
  593. Size size{320, 320};
  594. double scale = 1.0;
  595. Scalar mean = Scalar(123.68, 116.78, 103.94);
  596. bool swapRB = true;
  597. // Detection algorithm parameters
  598. float confThresh = 0.5;
  599. float nmsThresh = 0.4;
  600. double eps_center = 5/*pixels*/;
  601. double eps_size = 5/*pixels*/;
  602. double eps_angle = 1;
  603. if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_MYRIAD)
  604. {
  605. eps_center = 10;
  606. eps_size = 25;
  607. eps_angle = 3;
  608. }
  609. testTextDetectionModelByEAST(weightPath, "", imgPath, gt, confThresh, nmsThresh, size, mean, scale, swapRB, false/*crop*/,
  610. eps_center, eps_size, eps_angle
  611. );
  612. }
  613. INSTANTIATE_TEST_CASE_P(/**/, Test_Model, dnnBackendsAndTargets());
  614. }} // namespace