human_parsing.cpp 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. //
  2. // this sample demonstrates parsing (segmenting) human body parts from an image using opencv's dnn,
  3. // based on https://github.com/Engineering-Course/LIP_JPPNet
  4. //
  5. // get the pretrained model from: https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0
  6. //
  7. #include <opencv2/dnn.hpp>
  8. #include <opencv2/highgui.hpp>
  9. #include <opencv2/imgproc.hpp>
  10. using namespace cv;
  11. static Mat parse_human(const Mat &image, const std::string &model, int backend=dnn::DNN_BACKEND_DEFAULT, int target=dnn::DNN_TARGET_CPU) {
  12. // this network expects an image and a flipped copy as input
  13. Mat flipped;
  14. flip(image, flipped, 1);
  15. std::vector<Mat> batch;
  16. batch.push_back(image);
  17. batch.push_back(flipped);
  18. Mat blob = dnn::blobFromImages(batch, 1.0, Size(), Scalar(104.00698793, 116.66876762, 122.67891434));
  19. dnn::Net net = dnn::readNet(model);
  20. net.setPreferableBackend(backend);
  21. net.setPreferableTarget(target);
  22. net.setInput(blob);
  23. Mat out = net.forward();
  24. // expected output: [2, 20, 384, 384], (2 lists(orig, flipped) of 20 body part heatmaps 384x384)
  25. // LIP classes:
  26. // 0 Background, 1 Hat, 2 Hair, 3 Glove, 4 Sunglasses, 5 UpperClothes, 6 Dress, 7 Coat, 8 Socks, 9 Pants
  27. // 10 Jumpsuits, 11 Scarf, 12 Skirt, 13 Face, 14 LeftArm, 15 RightArm, 16 LeftLeg, 17 RightLeg, 18 LeftShoe. 19 RightShoe
  28. Vec3b colors[] = {
  29. Vec3b(0, 0, 0), Vec3b(128, 0, 0), Vec3b(255, 0, 0), Vec3b(0, 85, 0), Vec3b(170, 0, 51), Vec3b(255, 85, 0),
  30. Vec3b(0, 0, 85), Vec3b(0, 119, 221), Vec3b(85, 85, 0), Vec3b(0, 85, 85), Vec3b(85, 51, 0), Vec3b(52, 86, 128),
  31. Vec3b(0, 128, 0), Vec3b(0, 0, 255), Vec3b(51, 170, 221), Vec3b(0, 255, 255), Vec3b(85, 255, 170),
  32. Vec3b(170, 255, 85), Vec3b(255, 255, 0), Vec3b(255, 170, 0)
  33. };
  34. Mat segm(image.size(), CV_8UC3, Scalar(0,0,0));
  35. Mat maxval(image.size(), CV_32F, Scalar(0));
  36. // iterate over body part heatmaps (LIP classes)
  37. for (int i=0; i<out.size[1]; i++) {
  38. // resize heatmaps to original image size
  39. // "head" is the original image result, "tail" the flipped copy
  40. Mat head, h(out.size[2], out.size[3], CV_32F, out.ptr<float>(0,i));
  41. resize(h, head, image.size());
  42. // we have to swap the last 3 pairs in the "tail" list
  43. static int tail_order[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,15,14,17,16,19,18};
  44. Mat tail, t(out.size[2], out.size[3], CV_32F, out.ptr<float>(1,tail_order[i]));
  45. resize(t, tail, image.size());
  46. flip(tail, tail, 1);
  47. // mix original and flipped result
  48. Mat avg = (head + tail) * 0.5;
  49. // write color if prob value > maxval
  50. Mat cmask;
  51. compare(avg, maxval, cmask, CMP_GT);
  52. segm.setTo(colors[i], cmask);
  53. // keep largest values for next iteration
  54. max(avg, maxval, maxval);
  55. }
  56. cvtColor(segm, segm, COLOR_RGB2BGR);
  57. return segm;
  58. }
  59. int main(int argc, char**argv)
  60. {
  61. CommandLineParser parser(argc,argv,
  62. "{help h | | show help screen / args}"
  63. "{image i | | person image to process }"
  64. "{model m |lip_jppnet_384.pb| network model}"
  65. "{backend b | 0 | Choose one of computation backends: "
  66. "0: automatically (by default), "
  67. "1: Halide language (http://halide-lang.org/), "
  68. "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
  69. "3: OpenCV implementation, "
  70. "4: VKCOM, "
  71. "5: CUDA }"
  72. "{target t | 0 | Choose one of target computation devices: "
  73. "0: CPU target (by default), "
  74. "1: OpenCL, "
  75. "2: OpenCL fp16 (half-float precision), "
  76. "3: VPU, "
  77. "4: Vulkan, "
  78. "6: CUDA, "
  79. "7: CUDA fp16 (half-float preprocess) }"
  80. );
  81. if (argc == 1 || parser.has("help"))
  82. {
  83. parser.printMessage();
  84. return 0;
  85. }
  86. std::string model = parser.get<std::string>("model");
  87. std::string image = parser.get<std::string>("image");
  88. int backend = parser.get<int>("backend");
  89. int target = parser.get<int>("target");
  90. Mat input = imread(image);
  91. Mat segm = parse_human(input, model, backend, target);
  92. imshow("human parsing", segm);
  93. waitKey();
  94. return 0;
  95. }