text_recognition_cnn.cpp 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. #include <opencv2/text.hpp>
  2. #include <opencv2/highgui.hpp>
  3. #include <opencv2/imgproc.hpp>
  4. #include <opencv2/dnn.hpp>
  5. #include <iostream>
  6. #include <fstream>
  7. using namespace cv;
  8. using namespace std;
  9. namespace
  10. {
  11. void printHelpStr(const string& progFname)
  12. {
  13. cout << " Demo of text recognition CNN for text detection." << endl
  14. << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<endl<<endl
  15. << " Usage: " << progFname << " <output_file> <input_image>" << endl
  16. << " Caffe Model files (textbox.prototxt, TextBoxes_icdar13.caffemodel)"<<endl
  17. << " must be in the current directory. See the documentation of text::TextDetectorCNN class to get download links." << endl
  18. << " Obtaining text recognition Caffe Model files in linux shell:" << endl
  19. << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel" << endl
  20. << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt" << endl
  21. << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt" <<endl << endl;
  22. }
  23. bool fileExists (const string& filename)
  24. {
  25. ifstream f(filename.c_str());
  26. return f.good();
  27. }
  28. void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, std::vector<int>& indexes)
  29. {
  30. for (size_t i = 0; i < indexes.size(); i++)
  31. {
  32. if (src.type() == CV_8UC3)
  33. {
  34. Rect currrentBox = groups[indexes[i]];
  35. rectangle(src, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
  36. String label = format("%.2f", probs[indexes[i]]);
  37. std::cout << "text box: " << currrentBox << " confidence: " << probs[indexes[i]] << "\n";
  38. int baseLine = 0;
  39. Size labelSize = getTextSize(label, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
  40. int yLeftBottom = std::max(currrentBox.y, labelSize.height);
  41. rectangle(src, Point(currrentBox.x, yLeftBottom - labelSize.height),
  42. Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
  43. putText(src, label, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
  44. }
  45. else
  46. rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
  47. }
  48. }
  49. }
  50. int main(int argc, const char * argv[])
  51. {
  52. if (argc < 2)
  53. {
  54. printHelpStr(argv[0]);
  55. cout << "Insufiecient parameters. Aborting!" << endl;
  56. exit(1);
  57. }
  58. const string modelArch = "textbox.prototxt";
  59. const string moddelWeights = "TextBoxes_icdar13.caffemodel";
  60. if (!fileExists(modelArch) || !fileExists(moddelWeights))
  61. {
  62. printHelpStr(argv[0]);
  63. cout << "Model files not found in the current directory. Aborting!" << endl;
  64. exit(1);
  65. }
  66. Mat image = imread(String(argv[1]), IMREAD_COLOR);
  67. cout << "Starting Text Box Demo" << endl;
  68. Ptr<text::TextDetectorCNN> textSpotter =
  69. text::TextDetectorCNN::create(modelArch, moddelWeights);
  70. vector<Rect> bbox;
  71. vector<float> outProbabillities;
  72. textSpotter->detect(image, bbox, outProbabillities);
  73. std::vector<int> indexes;
  74. cv::dnn::NMSBoxes(bbox, outProbabillities, 0.4f, 0.5f, indexes);
  75. Mat image_copy = image.clone();
  76. textbox_draw(image_copy, bbox, outProbabillities, indexes);
  77. imshow("Text detection", image_copy);
  78. image_copy = image.clone();
  79. Ptr<text::OCRHolisticWordRecognizer> wordSpotter =
  80. text::OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
  81. for(size_t i = 0; i < indexes.size(); i++)
  82. {
  83. Mat wordImg;
  84. cvtColor(image(bbox[indexes[i]]), wordImg, COLOR_BGR2GRAY);
  85. string word;
  86. vector<float> confs;
  87. wordSpotter->run(wordImg, word, NULL, NULL, &confs);
  88. Rect currrentBox = bbox[indexes[i]];
  89. rectangle(image_copy, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
  90. int baseLine = 0;
  91. Size labelSize = getTextSize(word, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
  92. int yLeftBottom = std::max(currrentBox.y, labelSize.height);
  93. rectangle(image_copy, Point(currrentBox.x, yLeftBottom - labelSize.height),
  94. Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
  95. putText(image_copy, word, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
  96. }
  97. imshow("Text recognition", image_copy);
  98. cout << "Recognition finished. Press any key to exit.\n";
  99. waitKey();
  100. return 0;
  101. }