tr_svt_benchmark.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2014, Itseez Inc, all rights reserved.
  14. // Third party copyrights are property of their respective owners.
  15. //
  16. // Redistribution and use in source and binary forms, with or without modification,
  17. // are permitted provided that the following conditions are met:
  18. //
  19. // * Redistribution's of source code must retain the above copyright notice,
  20. // this list of conditions and the following disclaimer.
  21. //
  22. // * Redistribution's in binary form must reproduce the above copyright notice,
  23. // this list of conditions and the following disclaimer in the documentation
  24. // and/or other materials provided with the distribution.
  25. //
  26. // * The name of the copyright holders may not be used to endorse or promote products
  27. // derived from this software without specific prior written permission.
  28. //
  29. // This software is provided by the copyright holders and contributors "as is" and
  30. // any express or implied warranties, including, but not limited to, the implied
  31. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32. // In no event shall the Itseez Inc or contributors be liable for any direct,
  33. // indirect, incidental, special, exemplary, or consequential damages
  34. // (including, but not limited to, procurement of substitute goods or services;
  35. // loss of use, data, or profits; or business interruption) however caused
  36. // and on any theory of liability, whether in contract, strict liability,
  37. // or tort (including negligence or otherwise) arising in any way out of
  38. // the use of this software, even if advised of the possibility of such damage.
  39. //
  40. //M*/
  41. #include <iostream>
  42. #include <opencv2/opencv_modules.hpp>
  43. #ifdef HAVE_OPENCV_TEXT
  44. #include "opencv2/datasets/tr_svt.hpp"
  45. #include <opencv2/core.hpp>
  46. #include "opencv2/text.hpp"
  47. #include "opencv2/imgproc.hpp"
  48. #include "opencv2/imgcodecs.hpp"
  49. #include <cstdio>
  50. #include <cstdlib> // atoi
  51. #include <string>
  52. #include <vector>
  53. using namespace std;
  54. using namespace cv;
  55. using namespace cv::datasets;
  56. using namespace cv::text;
  57. //Calculate edit distance between two words
  58. size_t edit_distance(const string& A, const string& B);
  59. size_t min(size_t x, size_t y, size_t z);
  60. bool isRepetitive(const string& s);
  61. bool sort_by_length(const string &a, const string &b);
  62. //Draw ER's in an image via floodFill
  63. void er_draw(vector<Mat> &channels, vector<vector<ERStat> > &regions, vector<Vec2i> group, Mat& segmentation);
  64. size_t min(size_t x, size_t y, size_t z)
  65. {
  66. return x < y ? min(x,z) : min(y,z);
  67. }
  68. size_t edit_distance(const string& A, const string& B)
  69. {
  70. size_t NA = A.size();
  71. size_t NB = B.size();
  72. vector< vector<size_t> > M(NA + 1, vector<size_t>(NB + 1));
  73. for (size_t a = 0; a <= NA; ++a)
  74. M[a][0] = a;
  75. for (size_t b = 0; b <= NB; ++b)
  76. M[0][b] = b;
  77. for (size_t a = 1; a <= NA; ++a)
  78. for (size_t b = 1; b <= NB; ++b)
  79. {
  80. size_t x = M[a-1][b] + 1;
  81. size_t y = M[a][b-1] + 1;
  82. size_t z = M[a-1][b-1] + (A[a-1] == B[b-1] ? 0 : 1);
  83. M[a][b] = min(x,y,z);
  84. }
  85. return M[A.size()][B.size()];
  86. }
  87. bool sort_by_length(const string &a, const string &b){return (a.size()>b.size());}
  88. bool isRepetitive(const string& s)
  89. {
  90. int count = 0;
  91. for (int i=0; i<(int)s.size(); i++)
  92. {
  93. if ((s[i] == 'i') ||
  94. (s[i] == 'l') ||
  95. (s[i] == 'I'))
  96. count++;
  97. }
  98. if (count > ((int)s.size()+1)/2)
  99. {
  100. return true;
  101. }
  102. return false;
  103. }
  104. void er_draw(vector<Mat> &channels, vector<vector<ERStat> > &regions, vector<Vec2i> group, Mat& segmentation)
  105. {
  106. for (int r=0; r<(int)group.size(); r++)
  107. {
  108. ERStat er = regions[group[r][0]][group[r][1]];
  109. if (er.parent != NULL) // deprecate the root region
  110. {
  111. int newMaskVal = 255;
  112. int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
  113. floodFill(channels[group[r][0]],segmentation,Point(er.pixel%channels[group[r][0]].cols,er.pixel/channels[group[r][0]].cols),
  114. Scalar(255),0,Scalar(er.level),Scalar(0),flags);
  115. }
  116. }
  117. }
  118. // std::toupper is int->int
  119. static char char_toupper(char ch)
  120. {
  121. return (char)std::toupper((int)ch);
  122. }
  123. int main(int argc, char *argv[])
  124. {
  125. const char *keys =
  126. "{ help h usage ? | | show this message }"
  127. "{ path p |true| path to dataset xml files }";
  128. CommandLineParser parser(argc, argv, keys);
  129. string path(parser.get<string>("path"));
  130. if (parser.has("help") || path=="true")
  131. {
  132. parser.printMessage();
  133. return -1;
  134. }
  135. // loading train & test images description
  136. Ptr<TR_svt> dataset = TR_svt::create();
  137. dataset->load(path);
  138. vector<double> f1Each;
  139. unsigned int correctNum = 0;
  140. unsigned int returnedNum = 0;
  141. unsigned int returnedCorrectNum = 0;
  142. vector< Ptr<Object> >& test = dataset->getTest();
  143. unsigned int num = 0;
  144. for (vector< Ptr<Object> >::iterator itT=test.begin(); itT!=test.end(); ++itT)
  145. {
  146. TR_svtObj *example = static_cast<TR_svtObj *>((*itT).get());
  147. num++;
  148. printf("processed image: %u, name: %s\n", num, example->fileName.c_str());
  149. correctNum += example->tags.size();
  150. /* printf("\ntags:\n");
  151. for (vector<tag>::iterator it=example->tags.begin(); it!=example->tags.end(); ++it)
  152. {
  153. tag &t = (*it);
  154. printf("%s\nx: %u, y: %u, width: %u, height: %u\n",
  155. t.value.c_str(), t.x, t.y, t.x+t.width, t.y+t.height);
  156. }*/
  157. unsigned int correctNumEach = example->tags.size();
  158. unsigned int returnedNumEach = 0;
  159. unsigned int returnedCorrectNumEach = 0;
  160. Mat image = imread((path+example->fileName).c_str());
  161. /*Text Detection*/
  162. // Extract channels to be processed individually
  163. vector<Mat> channels;
  164. Mat grey;
  165. cvtColor(image,grey,COLOR_RGB2GRAY);
  166. // Notice here we are only using grey channel, see textdetection.cpp for example with more channels
  167. channels.push_back(grey);
  168. channels.push_back(255-grey);
  169. // Create ERFilter objects with the 1st and 2nd stage default classifiers
  170. Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00015f,0.13f,0.2f,true,0.1f);
  171. Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5);
  172. vector<vector<ERStat> > regions(channels.size());
  173. // Apply the default cascade classifier to each independent channel (could be done in parallel)
  174. for (int c=0; c<(int)channels.size(); c++)
  175. {
  176. er_filter1->run(channels[c], regions[c]);
  177. er_filter2->run(channels[c], regions[c]);
  178. }
  179. // Detect character groups
  180. vector< vector<Vec2i> > nm_region_groups;
  181. vector<Rect> nm_boxes;
  182. erGrouping(image, channels, regions, nm_region_groups, nm_boxes, ERGROUPING_ORIENTATION_HORIZ);
  183. /*Text Recognition (OCR)*/
  184. Ptr<OCRTesseract> ocr = OCRTesseract::create();
  185. for (int i=0; i<(int)nm_boxes.size(); i++)
  186. {
  187. Mat group_img = Mat::zeros(image.rows+2, image.cols+2, CV_8UC1);
  188. er_draw(channels, regions, nm_region_groups[i], group_img);
  189. group_img(nm_boxes[i]).copyTo(group_img);
  190. copyMakeBorder(group_img,group_img,15,15,15,15,BORDER_CONSTANT,Scalar(0));
  191. string output;
  192. vector<Rect> boxes;
  193. vector<string> words;
  194. vector<float> confidences;
  195. ocr->run(group_img, output, &boxes, &words, &confidences, OCR_LEVEL_WORD);
  196. output.erase(remove(output.begin(), output.end(), '\n'), output.end());
  197. //cout << "OCR output = \"" << output << "\" length = " << output.size() << endl;
  198. if (output.size() < 3)
  199. continue;
  200. for (int j=0; j<(int)boxes.size(); j++)
  201. {
  202. boxes[j].x += nm_boxes[i].x-15;
  203. boxes[j].y += nm_boxes[i].y-15;
  204. //cout << " word = " << words[j] << "\t confidence = " << confidences[j] << endl;
  205. if ((words[j].size() < 2) || (confidences[j] < 51) ||
  206. ((words[j].size()==2) && (words[j][0] == words[j][1])) ||
  207. ((words[j].size()< 4) && (confidences[j] < 60)) ||
  208. isRepetitive(words[j]))
  209. {
  210. continue;
  211. }
  212. std::transform(words[j].begin(), words[j].end(), words[j].begin(), char_toupper);
  213. if (find(example->lex.begin(), example->lex.end(), words[j]) == example->lex.end())
  214. {
  215. continue;
  216. }
  217. returnedNum++;
  218. returnedNumEach++;
  219. /*printf("%s\nx: %u, y: %u, width: %u, height: %u\n",
  220. words[j].c_str(), boxes[j].tl().x, boxes[j].tl().y, boxes[j].br().x, boxes[j].br().y);*/
  221. for (vector<tag>::iterator it=example->tags.begin(); it!=example->tags.end(); ++it)
  222. {
  223. tag &t = (*it);
  224. if (t.value==words[j] &&
  225. !(boxes[j].tl().x > t.x+t.width || boxes[j].br().x < t.x ||
  226. boxes[j].tl().y > t.y+t.height || boxes[j].br().y < t.y))
  227. {
  228. returnedCorrectNum++;
  229. returnedCorrectNumEach++;
  230. break;
  231. }
  232. }
  233. }
  234. }
  235. double p = 0.0;
  236. if (0 != returnedNumEach)
  237. {
  238. p = 1.0*returnedCorrectNumEach/returnedNumEach;
  239. }
  240. double r = 0.0;
  241. if (0 != correctNumEach)
  242. {
  243. r = 1.0*returnedCorrectNumEach/correctNumEach;
  244. }
  245. double f1 = 0.0;
  246. if (0 != p+r)
  247. {
  248. f1 = 2*(p*r)/(p+r);
  249. }
  250. //printf("|%f|\n", f1);
  251. f1Each.push_back(f1);
  252. }
  253. double p = 1.0*returnedCorrectNum/returnedNum;
  254. double r = 1.0*returnedCorrectNum/correctNum;
  255. double f1 = 2*(p*r)/(p+r);
  256. printf("f1: %f\n", f1);
  257. /*double f1 = 0.0;
  258. for (vector<double>::iterator it=f1Each.begin(); it!=f1Each.end(); ++it)
  259. {
  260. f1 += *it;
  261. }
  262. f1 /= f1Each.size();
  263. printf("mean f1: %f\n", f1);*/
  264. return 0;
  265. }
  266. #else
  267. int main()
  268. {
  269. std::cerr << "OpenCV was built without text module" << std::endl;
  270. return 0;
  271. }
  272. #endif // HAVE_OPENCV_TEXT