CLprocessor.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. #define __CL_ENABLE_EXCEPTIONS
  2. #define CL_USE_DEPRECATED_OPENCL_1_1_APIS /*let's give a chance for OpenCL 1.1 devices*/
  3. #include <CL/cl.hpp>
  4. #include <GLES2/gl2.h>
  5. #include <EGL/egl.h>
  6. #include <opencv2/core.hpp>
  7. #include <opencv2/imgproc.hpp>
  8. #include <opencv2/core/ocl.hpp>
  9. #include "common.hpp"
  10. const char oclProgB2B[] = "// clBuffer to clBuffer";
  11. const char oclProgI2B[] = "// clImage to clBuffer";
  12. const char oclProgI2I[] = \
  13. "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; \n" \
  14. "\n" \
  15. "__kernel void Laplacian( \n" \
  16. " __read_only image2d_t imgIn, \n" \
  17. " __write_only image2d_t imgOut \n" \
  18. " ) { \n" \
  19. " \n" \
  20. " const int2 pos = {get_global_id(0), get_global_id(1)}; \n" \
  21. " \n" \
  22. " float4 sum = (float4) 0.0f; \n" \
  23. " sum += read_imagef(imgIn, sampler, pos + (int2)(-1,0)); \n" \
  24. " sum += read_imagef(imgIn, sampler, pos + (int2)(+1,0)); \n" \
  25. " sum += read_imagef(imgIn, sampler, pos + (int2)(0,-1)); \n" \
  26. " sum += read_imagef(imgIn, sampler, pos + (int2)(0,+1)); \n" \
  27. " sum -= read_imagef(imgIn, sampler, pos) * 4; \n" \
  28. " \n" \
  29. " write_imagef(imgOut, pos, sum*10); \n" \
  30. "} \n";
  31. void dumpCLinfo()
  32. {
  33. LOGD("*** OpenCL info ***");
  34. try
  35. {
  36. std::vector<cl::Platform> platforms;
  37. cl::Platform::get(&platforms);
  38. LOGD("OpenCL info: Found %d OpenCL platforms", platforms.size());
  39. for (int i = 0; i < platforms.size(); ++i)
  40. {
  41. std::string name = platforms[i].getInfo<CL_PLATFORM_NAME>();
  42. std::string version = platforms[i].getInfo<CL_PLATFORM_VERSION>();
  43. std::string profile = platforms[i].getInfo<CL_PLATFORM_PROFILE>();
  44. std::string extensions = platforms[i].getInfo<CL_PLATFORM_EXTENSIONS>();
  45. LOGD( "OpenCL info: Platform[%d] = %s, ver = %s, prof = %s, ext = %s",
  46. i, name.c_str(), version.c_str(), profile.c_str(), extensions.c_str() );
  47. }
  48. std::vector<cl::Device> devices;
  49. platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
  50. for (int i = 0; i < devices.size(); ++i)
  51. {
  52. std::string name = devices[i].getInfo<CL_DEVICE_NAME>();
  53. std::string extensions = devices[i].getInfo<CL_DEVICE_EXTENSIONS>();
  54. cl_ulong type = devices[i].getInfo<CL_DEVICE_TYPE>();
  55. LOGD( "OpenCL info: Device[%d] = %s (%s), ext = %s",
  56. i, name.c_str(), (type==CL_DEVICE_TYPE_GPU ? "GPU" : "CPU"), extensions.c_str() );
  57. }
  58. }
  59. catch(const cl::Error& e)
  60. {
  61. LOGE( "OpenCL info: error while gathering OpenCL info: %s (%d)", e.what(), e.err() );
  62. }
  63. catch(const std::exception& e)
  64. {
  65. LOGE( "OpenCL info: error while gathering OpenCL info: %s", e.what() );
  66. }
  67. catch(...)
  68. {
  69. LOGE( "OpenCL info: unknown error while gathering OpenCL info" );
  70. }
  71. LOGD("*******************");
  72. }
  73. cl::Context theContext;
  74. cl::CommandQueue theQueue;
  75. cl::Program theProgB2B, theProgI2B, theProgI2I;
  76. bool haveOpenCL = false;
  77. extern "C" void initCL()
  78. {
  79. dumpCLinfo();
  80. EGLDisplay mEglDisplay = eglGetCurrentDisplay();
  81. if (mEglDisplay == EGL_NO_DISPLAY)
  82. LOGE("initCL: eglGetCurrentDisplay() returned 'EGL_NO_DISPLAY', error = %x", eglGetError());
  83. EGLContext mEglContext = eglGetCurrentContext();
  84. if (mEglContext == EGL_NO_CONTEXT)
  85. LOGE("initCL: eglGetCurrentContext() returned 'EGL_NO_CONTEXT', error = %x", eglGetError());
  86. cl_context_properties props[] =
  87. { CL_GL_CONTEXT_KHR, (cl_context_properties) mEglContext,
  88. CL_EGL_DISPLAY_KHR, (cl_context_properties) mEglDisplay,
  89. CL_CONTEXT_PLATFORM, 0,
  90. 0 };
  91. try
  92. {
  93. haveOpenCL = false;
  94. cl::Platform p = cl::Platform::getDefault();
  95. std::string ext = p.getInfo<CL_PLATFORM_EXTENSIONS>();
  96. if(ext.find("cl_khr_gl_sharing") == std::string::npos)
  97. LOGE("Warning: CL-GL sharing isn't supported by PLATFORM");
  98. props[5] = (cl_context_properties) p();
  99. theContext = cl::Context(CL_DEVICE_TYPE_GPU, props);
  100. std::vector<cl::Device> devs = theContext.getInfo<CL_CONTEXT_DEVICES>();
  101. LOGD("Context returned %d devices, taking the 1st one", devs.size());
  102. ext = devs[0].getInfo<CL_DEVICE_EXTENSIONS>();
  103. if(ext.find("cl_khr_gl_sharing") == std::string::npos)
  104. LOGE("Warning: CL-GL sharing isn't supported by DEVICE");
  105. theQueue = cl::CommandQueue(theContext, devs[0]);
  106. cl::Program::Sources src(1, std::make_pair(oclProgI2I, sizeof(oclProgI2I)));
  107. theProgI2I = cl::Program(theContext, src);
  108. theProgI2I.build(devs);
  109. cv::ocl::attachContext(p.getInfo<CL_PLATFORM_NAME>(), p(), theContext(), devs[0]());
  110. if( cv::ocl::useOpenCL() )
  111. LOGD("OpenCV+OpenCL works OK!");
  112. else
  113. LOGE("Can't init OpenCV with OpenCL TAPI");
  114. haveOpenCL = true;
  115. }
  116. catch(const cl::Error& e)
  117. {
  118. LOGE("cl::Error: %s (%d)", e.what(), e.err());
  119. }
  120. catch(const std::exception& e)
  121. {
  122. LOGE("std::exception: %s", e.what());
  123. }
  124. catch(...)
  125. {
  126. LOGE( "OpenCL info: unknown error while initializing OpenCL stuff" );
  127. }
  128. LOGD("initCL completed");
  129. }
  130. extern "C" void closeCL()
  131. {
  132. }
  133. #define GL_TEXTURE_2D 0x0DE1
  134. void procOCL_I2I(int texIn, int texOut, int w, int h)
  135. {
  136. LOGD("Processing OpenCL Direct (image2d)");
  137. if(!haveOpenCL)
  138. {
  139. LOGE("OpenCL isn't initialized");
  140. return;
  141. }
  142. LOGD("procOCL_I2I(%d, %d, %d, %d)", texIn, texOut, w, h);
  143. cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, texIn);
  144. cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, texOut);
  145. std::vector < cl::Memory > images;
  146. images.push_back(imgIn);
  147. images.push_back(imgOut);
  148. int64_t t = getTimeMs();
  149. theQueue.enqueueAcquireGLObjects(&images);
  150. theQueue.finish();
  151. LOGD("enqueueAcquireGLObjects() costs %d ms", getTimeInterval(t));
  152. t = getTimeMs();
  153. cl::Kernel Laplacian(theProgI2I, "Laplacian"); //TODO: may be done once
  154. Laplacian.setArg(0, imgIn);
  155. Laplacian.setArg(1, imgOut);
  156. theQueue.finish();
  157. LOGD("Kernel() costs %d ms", getTimeInterval(t));
  158. t = getTimeMs();
  159. theQueue.enqueueNDRangeKernel(Laplacian, cl::NullRange, cl::NDRange(w, h), cl::NullRange);
  160. theQueue.finish();
  161. LOGD("enqueueNDRangeKernel() costs %d ms", getTimeInterval(t));
  162. t = getTimeMs();
  163. theQueue.enqueueReleaseGLObjects(&images);
  164. theQueue.finish();
  165. LOGD("enqueueReleaseGLObjects() costs %d ms", getTimeInterval(t));
  166. }
  167. void procOCL_OCV(int texIn, int texOut, int w, int h)
  168. {
  169. LOGD("Processing OpenCL via OpenCV");
  170. if(!haveOpenCL)
  171. {
  172. LOGE("OpenCL isn't initialized");
  173. return;
  174. }
  175. int64_t t = getTimeMs();
  176. cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, texIn);
  177. std::vector < cl::Memory > images(1, imgIn);
  178. theQueue.enqueueAcquireGLObjects(&images);
  179. theQueue.finish();
  180. cv::UMat uIn, uOut, uTmp;
  181. cv::ocl::convertFromImage(imgIn(), uIn);
  182. LOGD("loading texture data to OpenCV UMat costs %d ms", getTimeInterval(t));
  183. theQueue.enqueueReleaseGLObjects(&images);
  184. t = getTimeMs();
  185. //cv::blur(uIn, uOut, cv::Size(5, 5));
  186. cv::Laplacian(uIn, uTmp, CV_8U);
  187. cv:multiply(uTmp, 10, uOut);
  188. cv::ocl::finish();
  189. LOGD("OpenCV processing costs %d ms", getTimeInterval(t));
  190. t = getTimeMs();
  191. cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, texOut);
  192. images.clear();
  193. images.push_back(imgOut);
  194. theQueue.enqueueAcquireGLObjects(&images);
  195. cl_mem clBuffer = (cl_mem)uOut.handle(cv::ACCESS_READ);
  196. cl_command_queue q = (cl_command_queue)cv::ocl::Queue::getDefault().ptr();
  197. size_t offset = 0;
  198. size_t origin[3] = { 0, 0, 0 };
  199. size_t region[3] = { w, h, 1 };
  200. CV_Assert(clEnqueueCopyBufferToImage (q, clBuffer, imgOut(), offset, origin, region, 0, NULL, NULL) == CL_SUCCESS);
  201. theQueue.enqueueReleaseGLObjects(&images);
  202. cv::ocl::finish();
  203. LOGD("uploading results to texture costs %d ms", getTimeInterval(t));
  204. }
  205. void drawFrameProcCPU(int w, int h, int texOut)
  206. {
  207. LOGD("Processing on CPU");
  208. int64_t t;
  209. // let's modify pixels in FBO texture in C++ code (on CPU)
  210. static cv::Mat m;
  211. m.create(h, w, CV_8UC4);
  212. // read
  213. t = getTimeMs();
  214. // expecting FBO to be bound
  215. glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, m.data);
  216. LOGD("glReadPixels() costs %d ms", getTimeInterval(t));
  217. // modify
  218. t = getTimeMs();
  219. cv::Laplacian(m, m, CV_8U);
  220. m *= 10;
  221. LOGD("Laplacian() costs %d ms", getTimeInterval(t));
  222. // write back
  223. glActiveTexture(GL_TEXTURE0);
  224. glBindTexture(GL_TEXTURE_2D, texOut);
  225. t = getTimeMs();
  226. glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, m.data);
  227. LOGD("glTexSubImage2D() costs %d ms", getTimeInterval(t));
  228. }
  229. enum ProcMode {PROC_MODE_NO_PROC=0, PROC_MODE_CPU=1, PROC_MODE_OCL_DIRECT=2, PROC_MODE_OCL_OCV=3};
  230. extern "C" void processFrame(int tex1, int tex2, int w, int h, int mode)
  231. {
  232. switch(mode)
  233. {
  234. //case PROC_MODE_NO_PROC:
  235. case PROC_MODE_CPU:
  236. drawFrameProcCPU(w, h, tex2);
  237. break;
  238. case PROC_MODE_OCL_DIRECT:
  239. procOCL_I2I(tex1, tex2, w, h);
  240. break;
  241. case PROC_MODE_OCL_OCV:
  242. procOCL_OCV(tex1, tex2, w, h);
  243. break;
  244. default:
  245. LOGE("Unexpected processing mode: %d", mode);
  246. }
  247. }