123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165 |
- #include <iostream>
- #include <fstream>
- #include <opencv2/imgproc.hpp>
- #include <opencv2/highgui.hpp>
- #include <opencv2/dnn/dnn.hpp>
- using namespace cv;
- using namespace cv::dnn;
- std::string keys =
- "{ help h | | Print help message. }"
- "{ inputImage i | | Path to an input image. Skip this argument to capture frames from a camera. }"
- "{ modelPath mp | | Path to a binary .onnx file contains trained DB detector model. "
- "Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}"
- "{ inputHeight ih |736| image height of the model input. It should be multiple by 32.}"
- "{ inputWidth iw |736| image width of the model input. It should be multiple by 32.}"
- "{ binaryThreshold bt |0.3| Confidence threshold of the binary map. }"
- "{ polygonThreshold pt |0.5| Confidence threshold of polygons. }"
- "{ maxCandidate max |200| Max candidates of polygons. }"
- "{ unclipRatio ratio |2.0| unclip ratio. }"
- "{ evaluate e |false| false: predict with input images; true: evaluate on benchmarks. }"
- "{ evalDataPath edp | | Path to benchmarks for evaluation. "
- "Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}";
- static
- void split(const std::string& s, char delimiter, std::vector<std::string>& elems)
- {
- elems.clear();
- size_t prev_pos = 0;
- size_t pos = 0;
- while ((pos = s.find(delimiter, prev_pos)) != std::string::npos)
- {
- elems.emplace_back(s.substr(prev_pos, pos - prev_pos));
- prev_pos = pos + 1;
- }
- if (prev_pos < s.size())
- elems.emplace_back(s.substr(prev_pos, s.size() - prev_pos));
- }
- int main(int argc, char** argv)
- {
- // Parse arguments
- CommandLineParser parser(argc, argv, keys);
- parser.about("Use this script to run the official PyTorch implementation (https://github.com/MhLiao/DB) of "
- "Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947)\n"
- "The current version of this script is a variant of the original network without deformable convolution");
- if (argc == 1 || parser.has("help"))
- {
- parser.printMessage();
- return 0;
- }
- float binThresh = parser.get<float>("binaryThreshold");
- float polyThresh = parser.get<float>("polygonThreshold");
- uint maxCandidates = parser.get<uint>("maxCandidate");
- String modelPath = parser.get<String>("modelPath");
- double unclipRatio = parser.get<double>("unclipRatio");
- int height = parser.get<int>("inputHeight");
- int width = parser.get<int>("inputWidth");
- if (!parser.check())
- {
- parser.printErrors();
- return 1;
- }
- // Load the network
- CV_Assert(!modelPath.empty());
- TextDetectionModel_DB detector(modelPath);
- detector.setBinaryThreshold(binThresh)
- .setPolygonThreshold(polyThresh)
- .setUnclipRatio(unclipRatio)
- .setMaxCandidates(maxCandidates);
- double scale = 1.0 / 255.0;
- Size inputSize = Size(width, height);
- Scalar mean = Scalar(122.67891434, 116.66876762, 104.00698793);
- detector.setInputParams(scale, inputSize, mean);
- // Create a window
- static const std::string winName = "TextDetectionModel";
- if (parser.get<bool>("evaluate")) {
- // for evaluation
- String evalDataPath = parser.get<String>("evalDataPath");
- CV_Assert(!evalDataPath.empty());
- String testListPath = evalDataPath + "/test_list.txt";
- std::ifstream testList;
- testList.open(testListPath);
- CV_Assert(testList.is_open());
- // Create a window for showing groundtruth
- static const std::string winNameGT = "GT";
- String testImgPath;
- while (std::getline(testList, testImgPath)) {
- String imgPath = evalDataPath + "/test_images/" + testImgPath;
- std::cout << "Image Path: " << imgPath << std::endl;
- Mat frame = imread(samples::findFile(imgPath), IMREAD_COLOR);
- CV_Assert(!frame.empty());
- Mat src = frame.clone();
- // Inference
- std::vector<std::vector<Point>> results;
- detector.detect(frame, results);
- polylines(frame, results, true, Scalar(0, 255, 0), 2);
- imshow(winName, frame);
- // load groundtruth
- String imgName = testImgPath.substr(0, testImgPath.length() - 4);
- String gtPath = evalDataPath + "/test_gts/" + imgName + ".txt";
- // std::cout << gtPath << std::endl;
- std::ifstream gtFile;
- gtFile.open(gtPath);
- CV_Assert(gtFile.is_open());
- std::vector<std::vector<Point>> gts;
- String gtLine;
- while (std::getline(gtFile, gtLine)) {
- size_t splitLoc = gtLine.find_last_of(',');
- String text = gtLine.substr(splitLoc+1);
- if ( text == "###\r" || text == "1") {
- // ignore difficult instances
- continue;
- }
- gtLine = gtLine.substr(0, splitLoc);
- std::vector<std::string> v;
- split(gtLine, ',', v);
- std::vector<int> loc;
- std::vector<Point> pts;
- for (auto && s : v) {
- loc.push_back(atoi(s.c_str()));
- }
- for (size_t i = 0; i < loc.size() / 2; i++) {
- pts.push_back(Point(loc[2 * i], loc[2 * i + 1]));
- }
- gts.push_back(pts);
- }
- polylines(src, gts, true, Scalar(0, 255, 0), 2);
- imshow(winNameGT, src);
- waitKey();
- }
- } else {
- // Open an image file
- CV_Assert(parser.has("inputImage"));
- Mat frame = imread(samples::findFile(parser.get<String>("inputImage")));
- CV_Assert(!frame.empty());
- // Detect
- std::vector<std::vector<Point>> results;
- detector.detect(frame, results);
- polylines(frame, results, true, Scalar(0, 255, 0), 2);
- imshow(winName, frame);
- waitKey();
- }
- return 0;
- }
|