tracking_by_matching.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. #include <opencv2/core.hpp>
  2. #include <opencv2/highgui.hpp>
  3. #include <opencv2/tracking/tracking_by_matching.hpp>
  4. #include <iostream>
  5. #ifdef HAVE_OPENCV_DNN
  6. #include <opencv2/dnn.hpp>
  7. using namespace std;
  8. using namespace cv;
  9. using namespace cv::detail::tracking;
  10. using namespace cv::detail::tracking::tbm;
  11. static const char* keys =
  12. { "{video_name | | video name }"
  13. "{start_frame |0| Start frame }"
  14. "{frame_step |1| Frame step }"
  15. "{detector_model | | Path to detector's Caffe model }"
  16. "{detector_weights | | Path to detector's Caffe weights }"
  17. "{desired_class_id |-1| The desired class that should be tracked }"
  18. };
  19. static void help()
  20. {
  21. cout << "\nThis example shows the functionality of \"Tracking-by-Matching\" approach:"
  22. " detector is used to detect objects on frames, \n"
  23. "matching is used to find correspondences between new detections and tracked objects.\n"
  24. "Detection is made by DNN detection network every `--frame_step` frame.\n"
  25. "Point a .prototxt file of the network as the parameter `--detector_model`, and a .caffemodel file"
  26. " as the parameter `--detector_weights`.\n"
  27. "(As an example of such detection network is a popular MobileNet_SSD network trained on VOC dataset.)\n"
  28. "If `--desired_class_id` parameter is set, the detection result is filtered by class id,"
  29. " returned by the detection network.\n"
  30. "(That is, if a detection net was trained on VOC dataset, then to track pedestrians point --desired_class_id=15)\n"
  31. "Example of <video_name> is in opencv_extra/testdata/cv/tracking/\n"
  32. "Call:\n"
  33. "./example_tracking_tracking_by_matching --video_name=<video_name> --detector_model=<detector_model_path> --detector_weights=<detector_weights_path> \\\n"
  34. " [--start_frame=<start_frame>] \\\n"
  35. " [--frame_step=<frame_step>] \\\n"
  36. " [--desired_class_id=<desired_class_id>]\n"
  37. << endl;
  38. cout << "\n\nHot keys: \n"
  39. "\tq - quit the program\n"
  40. "\tp - pause/resume video\n";
  41. }
  42. cv::Ptr<ITrackerByMatching> createTrackerByMatchingWithFastDescriptor();
  43. class DnnObjectDetector
  44. {
  45. public:
  46. DnnObjectDetector(const String& net_caffe_model_path, const String& net_caffe_weights_path,
  47. int desired_class_id=-1,
  48. float confidence_threshold = 0.2,
  49. //the following parameters are default for popular MobileNet_SSD caffe model
  50. const String& net_input_name="data",
  51. const String& net_output_name="detection_out",
  52. double net_scalefactor=0.007843,
  53. const Size& net_size = Size(300,300),
  54. const Scalar& net_mean = Scalar(127.5, 127.5, 127.5),
  55. bool net_swapRB=false)
  56. :desired_class_id(desired_class_id),
  57. confidence_threshold(confidence_threshold),
  58. net_input_name(net_input_name),
  59. net_output_name(net_output_name),
  60. net_scalefactor(net_scalefactor),
  61. net_size(net_size),
  62. net_mean(net_mean),
  63. net_swapRB(net_swapRB)
  64. {
  65. net = dnn::readNetFromCaffe(net_caffe_model_path, net_caffe_weights_path);
  66. if (net.empty())
  67. CV_Error(Error::StsError, "Cannot read Caffe net");
  68. }
  69. TrackedObjects detect(const cv::Mat& frame, int frame_idx)
  70. {
  71. Mat resized_frame;
  72. resize(frame, resized_frame, net_size);
  73. Mat inputBlob = cv::dnn::blobFromImage(resized_frame, net_scalefactor, net_size, net_mean, net_swapRB);
  74. net.setInput(inputBlob, net_input_name);
  75. Mat detection = net.forward(net_output_name);
  76. Mat detection_as_mat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
  77. TrackedObjects res;
  78. for (int i = 0; i < detection_as_mat.rows; i++)
  79. {
  80. float cur_confidence = detection_as_mat.at<float>(i, 2);
  81. int cur_class_id = static_cast<int>(detection_as_mat.at<float>(i, 1));
  82. int x_left = static_cast<int>(detection_as_mat.at<float>(i, 3) * frame.cols);
  83. int y_bottom = static_cast<int>(detection_as_mat.at<float>(i, 4) * frame.rows);
  84. int x_right = static_cast<int>(detection_as_mat.at<float>(i, 5) * frame.cols);
  85. int y_top = static_cast<int>(detection_as_mat.at<float>(i, 6) * frame.rows);
  86. Rect cur_rect(x_left, y_bottom, (x_right - x_left), (y_top - y_bottom));
  87. if (cur_confidence < confidence_threshold)
  88. continue;
  89. if ((desired_class_id >= 0) && (cur_class_id != desired_class_id))
  90. continue;
  91. //clipping by frame size
  92. cur_rect = cur_rect & Rect(Point(), frame.size());
  93. if (cur_rect.empty())
  94. continue;
  95. TrackedObject cur_obj(cur_rect, cur_confidence, frame_idx, -1);
  96. res.push_back(cur_obj);
  97. }
  98. return res;
  99. }
  100. private:
  101. cv::dnn::Net net;
  102. int desired_class_id;
  103. float confidence_threshold;
  104. String net_input_name;
  105. String net_output_name;
  106. double net_scalefactor;
  107. Size net_size;
  108. Scalar net_mean;
  109. bool net_swapRB;
  110. };
  111. cv::Ptr<ITrackerByMatching>
  112. createTrackerByMatchingWithFastDescriptor() {
  113. tbm::TrackerParams params;
  114. cv::Ptr<ITrackerByMatching> tracker = createTrackerByMatching(params);
  115. std::shared_ptr<IImageDescriptor> descriptor_fast =
  116. std::make_shared<ResizedImageDescriptor>(
  117. cv::Size(16, 32), cv::InterpolationFlags::INTER_LINEAR);
  118. std::shared_ptr<IDescriptorDistance> distance_fast =
  119. std::make_shared<MatchTemplateDistance>();
  120. tracker->setDescriptorFast(descriptor_fast);
  121. tracker->setDistanceFast(distance_fast);
  122. return tracker;
  123. }
  124. int main( int argc, char** argv ){
  125. CommandLineParser parser( argc, argv, keys );
  126. cv::Ptr<ITrackerByMatching> tracker = createTrackerByMatchingWithFastDescriptor();
  127. String video_name = parser.get<String>("video_name");
  128. int start_frame = parser.get<int>("start_frame");
  129. int frame_step = parser.get<int>("frame_step");
  130. String detector_model = parser.get<String>("detector_model");
  131. String detector_weights = parser.get<String>("detector_weights");
  132. int desired_class_id = parser.get<int>("desired_class_id");
  133. if( video_name.empty() || detector_model.empty() || detector_weights.empty() )
  134. {
  135. help();
  136. return -1;
  137. }
  138. //open the capture
  139. VideoCapture cap;
  140. cap.open( video_name );
  141. cap.set( CAP_PROP_POS_FRAMES, start_frame );
  142. if( !cap.isOpened() )
  143. {
  144. help();
  145. cout << "***Could not initialize capturing...***\n";
  146. cout << "Current parameter's value: \n";
  147. parser.printMessage();
  148. return -1;
  149. }
  150. // If you use the popular MobileNet_SSD detector, the default parameters may be used.
  151. // Otherwise, set your own parameters (net_mean, net_scalefactor, etc).
  152. DnnObjectDetector detector(detector_model, detector_weights, desired_class_id);
  153. Mat frame;
  154. namedWindow( "Tracking by Matching", 1 );
  155. int frame_counter = -1;
  156. int64 time_total = 0;
  157. bool paused = false;
  158. for ( ;; )
  159. {
  160. if( paused )
  161. {
  162. char c = (char) waitKey(30);
  163. if (c == 'p')
  164. paused = !paused;
  165. if (c == 'q')
  166. break;
  167. continue;
  168. }
  169. cap >> frame;
  170. if(frame.empty()){
  171. break;
  172. }
  173. frame_counter++;
  174. if (frame_counter < start_frame)
  175. continue;
  176. if (frame_counter % frame_step != 0)
  177. continue;
  178. int64 frame_time = getTickCount();
  179. TrackedObjects detections = detector.detect(frame, frame_counter);
  180. // timestamp in milliseconds
  181. uint64_t cur_timestamp = static_cast<uint64_t>(1000.0 / 30 * frame_counter);
  182. tracker->process(frame, detections, cur_timestamp);
  183. frame_time = getTickCount() - frame_time;
  184. time_total += frame_time;
  185. // Drawing colored "worms" (tracks).
  186. frame = tracker->drawActiveTracks(frame);
  187. // Drawing all detected objects on a frame by BLUE COLOR
  188. for (const auto &detection : detections) {
  189. cv::rectangle(frame, detection.rect, cv::Scalar(255, 0, 0), 3);
  190. }
  191. // Drawing tracked detections only by RED color and print ID and detection
  192. // confidence level.
  193. for (const auto &detection : tracker->trackedDetections()) {
  194. cv::rectangle(frame, detection.rect, cv::Scalar(0, 0, 255), 3);
  195. std::string text = std::to_string(detection.object_id) +
  196. " conf: " + std::to_string(detection.confidence);
  197. cv::putText(frame, text, detection.rect.tl(), cv::FONT_HERSHEY_COMPLEX,
  198. 1.0, cv::Scalar(0, 0, 255), 3);
  199. }
  200. imshow( "Tracking by Matching", frame );
  201. char c = (char) waitKey( 2 );
  202. if (c == 'q')
  203. break;
  204. if (c == 'p')
  205. paused = !paused;
  206. }
  207. double s = frame_counter / (time_total / getTickFrequency());
  208. printf("FPS: %f\n", s);
  209. return 0;
  210. }
  211. #else // #ifdef HAVE_OPENCV_DNN
  212. int main(int, char**){
  213. CV_Error(cv::Error::StsNotImplemented, "At the moment the sample 'tracking_by_matching' can work only when opencv_dnn module is built.");
  214. }
  215. #endif // #ifdef HAVE_OPENCV_DNN