segmented_word_recognition.cpp 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. /*
  2. * segmented_word_recognition.cpp
  3. *
  4. * A demo program on segmented word recognition.
  5. * Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.
  6. *
  7. * Created on: Jul 31, 2015
  8. * Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
  9. */
  10. #include "opencv2/text.hpp"
  11. #include "opencv2/core/utility.hpp"
  12. #include "opencv2/highgui.hpp"
  13. #include "opencv2/imgproc.hpp"
  14. #include <iostream>
  15. using namespace std;
  16. using namespace cv;
  17. using namespace text;
  18. int main(int argc, char* argv[]) {
  19. const String keys =
  20. "{help h usage ? | | print this message.}"
  21. "{@image | | source image for recognition.}"
  22. "{@mask | | binary segmentation mask where each contour is a character.}"
  23. "{lexicon lex l | | (optional) lexicon provided as a list of comma separated words.}"
  24. ;
  25. CommandLineParser parser(argc, argv, keys);
  26. parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n");
  27. String filename1 = parser.get<String>(0);
  28. String filename2 = parser.get<String>(1);
  29. parser.printMessage();
  30. cout << endl << endl;
  31. if ((parser.has("help")) || (filename1.size()==0))
  32. {
  33. return 0;
  34. }
  35. if (!parser.check())
  36. {
  37. parser.printErrors();
  38. return 0;
  39. }
  40. Mat image = imread(filename1);
  41. Mat mask;
  42. if (filename2.size() > 0)
  43. mask = imread(filename2);
  44. else
  45. image.copyTo(mask);
  46. // be sure the mask is a binary image
  47. cvtColor(mask, mask, COLOR_BGR2GRAY);
  48. threshold(mask, mask, 128., 255, THRESH_BINARY);
  49. // character recognition vocabulary
  50. string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
  51. // Emission probabilities for the HMM language model (identity matrix by default)
  52. Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1);
  53. // Bigram transition probabilities for the HMM language model
  54. Mat transitionProbabilities;
  55. string lex = parser.get<string>("lex");
  56. if (lex.size()>0)
  57. {
  58. // Build tailored language model for the provided lexicon
  59. vector<string> lexicon;
  60. size_t pos = 0;
  61. string delimiter = ",";
  62. std::string token;
  63. while ((pos = lex.find(delimiter)) != std::string::npos) {
  64. token = lex.substr(0, pos);
  65. lexicon.push_back(token);
  66. lex.erase(0, pos + delimiter.length());
  67. }
  68. lexicon.push_back(lex);
  69. createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities);
  70. } else {
  71. // Or load the generic language model (from Aspell English dictionary)
  72. FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ);
  73. fs["transition_probabilities"] >> transitionProbabilities;
  74. fs.release();
  75. }
  76. Ptr<OCRTesseract> ocrTes = OCRTesseract::create();
  77. Ptr<OCRHMMDecoder> ocrNM = OCRHMMDecoder::create(
  78. loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"),
  79. voc, transitionProbabilities, emissionProbabilities);
  80. Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create(
  81. loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
  82. voc, transitionProbabilities, emissionProbabilities);
  83. std::string output;
  84. double t_r = (double)getTickCount();
  85. ocrTes->run(mask, output);
  86. output.erase(remove(output.begin(), output.end(), '\n'), output.end());
  87. cout << " OCR_Tesseract output \"" << output << "\". Done in "
  88. << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
  89. t_r = (double)getTickCount();
  90. ocrNM->run(mask, output);
  91. cout << " OCR_NM output \"" << output << "\". Done in "
  92. << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
  93. t_r = (double)getTickCount();
  94. ocrCNN->run(image, mask, output);
  95. cout << " OCR_CNN output \"" << output << "\". Done in "
  96. << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
  97. }