cropped_word_recognition.cpp 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /*
  2. * cropped_word_recognition.cpp
  3. *
  4. * A demo program of text recognition in a given cropped word.
  5. * Shows the use of the OCRBeamSearchDecoder class API using the provided default classifier.
  6. *
  7. * Created on: Jul 9, 2015
  8. * Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
  9. */
  10. #include "opencv2/text.hpp"
  11. #include "opencv2/core/utility.hpp"
  12. #include "opencv2/highgui.hpp"
  13. #include "opencv2/imgproc.hpp"
  14. #include <iostream>
  15. using namespace std;
  16. using namespace cv;
  17. using namespace cv::text;
  18. int main(int argc, char* argv[])
  19. {
  20. cout << endl << argv[0] << endl << endl;
  21. cout << "A demo program of Scene Text cropped word Recognition: " << endl;
  22. cout << "Shows the use of the OCRBeamSearchDecoder class using the Single Layer CNN character classifier described in:" << endl;
  23. cout << "Coates, Adam, et al. \"Text detection and character recognition in scene images with unsupervised feature learning.\" ICDAR 2011." << endl << endl;
  24. Mat image;
  25. if(argc>1)
  26. image = imread(argv[1]);
  27. else
  28. {
  29. cout << " Usage: " << argv[0] << " <input_image>" << endl << endl;
  30. return(0);
  31. }
  32. string vocabulary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // must have the same order as the classifier output classes
  33. vector<string> lexicon; // a list of words expected to be found on the input image
  34. lexicon.push_back(string("abb"));
  35. lexicon.push_back(string("riser"));
  36. lexicon.push_back(string("CHINA"));
  37. lexicon.push_back(string("HERE"));
  38. lexicon.push_back(string("President"));
  39. lexicon.push_back(string("smash"));
  40. lexicon.push_back(string("KUALA"));
  41. lexicon.push_back(string("Produkt"));
  42. lexicon.push_back(string("NINTENDO"));
  43. // Create tailored language model a small given lexicon
  44. Mat transition_p;
  45. createOCRHMMTransitionsTable(vocabulary,lexicon,transition_p);
  46. // An alternative would be to load the default generic language model
  47. // (created from ispell 42869 English words list)
  48. /*Mat transition_p;
  49. string filename = "OCRHMM_transitions_table.xml";
  50. FileStorage fs(filename, FileStorage::READ);
  51. fs["transition_probabilities"] >> transition_p;
  52. fs.release();*/
  53. Mat emission_p = Mat::eye(62,62,CV_64FC1);
  54. // Notice we set here a beam size of 50. This is much faster than using the default value (500).
  55. // 50 works well with our tiny lexicon example, but may not with larger dictionaries.
  56. Ptr<OCRBeamSearchDecoder> ocr = OCRBeamSearchDecoder::create(
  57. loadOCRBeamSearchClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
  58. vocabulary, transition_p, emission_p, OCR_DECODER_VITERBI, 50);
  59. double t_r = (double)getTickCount();
  60. string output;
  61. vector<Rect> boxes;
  62. vector<string> words;
  63. vector<float> confidences;
  64. ocr->run(image, output, &boxes, &words, &confidences, OCR_LEVEL_WORD);
  65. cout << "OCR output = \"" << output << "\". Decoded in "
  66. << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl << endl;
  67. return 0;
  68. }