textdetection.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. #!/usr/bin/python
  2. import sys
  3. import os
  4. import cv2 as cv
  5. import numpy as np
  6. print('\ntextdetection.py')
  7. print(' A demo script of the Extremal Region Filter algorithm described in:')
  8. print(' Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012\n')
  9. if (len(sys.argv) < 2):
  10. print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
  11. quit()
  12. pathname = os.path.dirname(sys.argv[0])
  13. img = cv.imread(str(sys.argv[1]))
  14. # for visualization
  15. vis = img.copy()
  16. # Extract channels to be processed individually
  17. channels = list(cv.text.computeNMChannels(img))
  18. # Append negative channels to detect ER- (bright regions over dark background)
  19. cn = len(channels)-1
  20. for c in range(0,cn):
  21. channels.append(255-channels[c])
  22. # Apply the default cascade classifier to each independent channel (could be done in parallel)
  23. erc1 = cv.text.loadClassifierNM1('trained_classifierNM1.xml')
  24. er1 = cv.text.createERFilterNM1(erc1,16,0.00015,0.13,0.2,True,0.1)
  25. erc2 = cv.text.loadClassifierNM2('trained_classifierNM2.xml')
  26. er2 = cv.text.createERFilterNM2(erc2,0.5)
  27. print("Extracting Class Specific Extremal Regions from "+str(len(channels))+" channels ...")
  28. print(" (...) this may take a while (...)")
  29. for channel in channels:
  30. regions = cv.text.detectRegions(channel,er1,er2)
  31. rects = cv.text.erGrouping(img,channel,[r.tolist() for r in regions])
  32. #rects = cv.text.erGrouping(img,channel,[x.tolist() for x in regions], cv.text.ERGROUPING_ORIENTATION_ANY,'../../GSoC2014/opencv_contrib/modules/text/samples/trained_classifier_erGrouping.xml',0.5)
  33. #Visualization
  34. for rect in rects:
  35. cv.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (0, 0, 0), 2)
  36. cv.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)
  37. #Visualization
  38. cv.imshow("Text detection result", vis)
  39. cv.waitKey(0)