warp.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Third party copyrights are property of their respective owners.
  16. //
  17. // Redistribution and use in source and binary forms, with or without modification,
  18. // are permitted provided that the following conditions are met:
  19. //
  20. // * Redistribution's of source code must retain the above copyright notice,
  21. // this list of conditions and the following disclaimer.
  22. //
  23. // * Redistribution's in binary form must reproduce the above copyright notice,
  24. // this list of conditions and the following disclaimer in the documentation
  25. // and/or other materials provided with the distribution.
  26. //
  27. // * The name of the copyright holders may not be used to endorse or promote products
  28. // derived from this software without specific prior written permission.
  29. //
  30. // This software is provided by the copyright holders and contributors "as is" and
  31. // any express or implied warranties, including, but not limited to, the implied
  32. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33. // In no event shall the Intel Corporation or contributors be liable for any direct,
  34. // indirect, incidental, special, exemplary, or consequential damages
  35. // (including, but not limited to, procurement of substitute goods or services;
  36. // loss of use, data, or profits; or business interruption) however caused
  37. // and on any theory of liability, whether in contract, strict liability,
  38. // or tort (including negligence or otherwise) arising in any way out of
  39. // the use of this software, even if advised of the possibility of such damage.
  40. //
  41. //M*/
  42. #include "precomp.hpp"
  43. using namespace cv;
  44. using namespace cv::cuda;
  45. #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
  46. void cv::cuda::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
  47. void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
  48. void cv::cuda::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
  49. void cv::cuda::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
  50. void cv::cuda::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
  51. #else // HAVE_CUDA
  52. namespace cv { namespace cuda { namespace device
  53. {
  54. namespace imgproc
  55. {
  56. void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
  57. template <typename T>
  58. void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
  59. int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
  60. void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
  61. template <typename T>
  62. void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
  63. int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
  64. }
  65. }}}
  66. void cv::cuda::buildWarpAffineMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
  67. {
  68. using namespace cv::cuda::device::imgproc;
  69. Mat M = _M.getMat();
  70. CV_Assert( M.rows == 2 && M.cols == 3 );
  71. _xmap.create(dsize, CV_32FC1);
  72. _ymap.create(dsize, CV_32FC1);
  73. GpuMat xmap = _xmap.getGpuMat();
  74. GpuMat ymap = _ymap.getGpuMat();
  75. float coeffs[2 * 3];
  76. Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
  77. if (inverse)
  78. M.convertTo(coeffsMat, coeffsMat.type());
  79. else
  80. {
  81. cv::Mat iM;
  82. invertAffineTransform(M, iM);
  83. iM.convertTo(coeffsMat, coeffsMat.type());
  84. }
  85. buildWarpAffineMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
  86. }
  87. void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
  88. {
  89. using namespace cv::cuda::device::imgproc;
  90. Mat M = _M.getMat();
  91. CV_Assert( M.rows == 3 && M.cols == 3 );
  92. _xmap.create(dsize, CV_32FC1);
  93. _ymap.create(dsize, CV_32FC1);
  94. GpuMat xmap = _xmap.getGpuMat();
  95. GpuMat ymap = _ymap.getGpuMat();
  96. float coeffs[3 * 3];
  97. Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
  98. if (inverse)
  99. M.convertTo(coeffsMat, coeffsMat.type());
  100. else
  101. {
  102. cv::Mat iM;
  103. invert(M, iM);
  104. iM.convertTo(coeffsMat, coeffsMat.type());
  105. }
  106. buildWarpPerspectiveMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
  107. }
  108. namespace
  109. {
  110. template <int DEPTH> struct NppWarpFunc
  111. {
  112. typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
  113. typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
  114. int dstStep, NppiRect dstRoi, const double coeffs[][3],
  115. int interpolation);
  116. };
  117. template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
  118. {
  119. typedef typename NppWarpFunc<DEPTH>::npp_type npp_type;
  120. static void call(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
  121. {
  122. static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
  123. NppiSize srcsz;
  124. srcsz.height = src.rows;
  125. srcsz.width = src.cols;
  126. NppiRect srcroi;
  127. srcroi.x = 0;
  128. srcroi.y = 0;
  129. srcroi.height = src.rows;
  130. srcroi.width = src.cols;
  131. NppiRect dstroi;
  132. dstroi.x = 0;
  133. dstroi.y = 0;
  134. dstroi.height = dst.rows;
  135. dstroi.width = dst.cols;
  136. cv::cuda::NppStreamHandler h(stream);
  137. nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
  138. dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi,
  139. coeffs, npp_inter[interpolation]) );
  140. if (stream == 0)
  141. cudaSafeCall( cudaDeviceSynchronize() );
  142. }
  143. };
  144. }
  145. void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
  146. {
  147. GpuMat src = _src.getGpuMat();
  148. Mat M = _M.getMat();
  149. CV_Assert( M.rows == 2 && M.cols == 3 );
  150. const int interpolation = flags & INTER_MAX;
  151. CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
  152. CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
  153. CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
  154. _dst.create(dsize, src.type());
  155. GpuMat dst = _dst.getGpuMat();
  156. Size wholeSize;
  157. Point ofs;
  158. src.locateROI(wholeSize, ofs);
  159. static const bool useNppTab[6][4][3] =
  160. {
  161. {
  162. {false, false, true},
  163. {false, false, false},
  164. {false, true, true},
  165. {false, false, false}
  166. },
  167. {
  168. {false, false, false},
  169. {false, false, false},
  170. {false, false, false},
  171. {false, false, false}
  172. },
  173. {
  174. {false, true, true},
  175. {false, false, false},
  176. {false, true, true},
  177. {false, false, false}
  178. },
  179. {
  180. {false, false, false},
  181. {false, false, false},
  182. {false, false, false},
  183. {false, false, false}
  184. },
  185. {
  186. {false, true, true},
  187. {false, false, false},
  188. {false, true, true},
  189. {false, false, true}
  190. },
  191. {
  192. {false, true, true},
  193. {false, false, false},
  194. {false, true, true},
  195. {false, false, true}
  196. }
  197. };
  198. bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
  199. // NPP bug on float data
  200. useNpp = useNpp && src.depth() != CV_32F;
  201. if (useNpp)
  202. {
  203. typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
  204. static const func_t funcs[2][6][4] =
  205. {
  206. {
  207. {NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
  208. {0, 0, 0, 0},
  209. {NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
  210. {0, 0, 0, 0},
  211. {NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
  212. {NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
  213. },
  214. {
  215. {NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
  216. {0, 0, 0, 0},
  217. {NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
  218. {0, 0, 0, 0},
  219. {NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
  220. {NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
  221. }
  222. };
  223. dst.setTo(borderValue, stream);
  224. double coeffs[2][3];
  225. Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
  226. M.convertTo(coeffsMat, coeffsMat.type());
  227. const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
  228. CV_Assert(func != 0);
  229. func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
  230. }
  231. else
  232. {
  233. using namespace cv::cuda::device::imgproc;
  234. typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
  235. int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
  236. static const func_t funcs[6][4] =
  237. {
  238. {warpAffine_gpu<uchar> , 0 /*warpAffine_gpu<uchar2>*/ , warpAffine_gpu<uchar3> , warpAffine_gpu<uchar4> },
  239. {0 /*warpAffine_gpu<schar>*/, 0 /*warpAffine_gpu<char2>*/ , 0 /*warpAffine_gpu<char3>*/, 0 /*warpAffine_gpu<char4>*/},
  240. {warpAffine_gpu<ushort> , 0 /*warpAffine_gpu<ushort2>*/, warpAffine_gpu<ushort3> , warpAffine_gpu<ushort4> },
  241. {warpAffine_gpu<short> , 0 /*warpAffine_gpu<short2>*/ , warpAffine_gpu<short3> , warpAffine_gpu<short4> },
  242. {0 /*warpAffine_gpu<int>*/ , 0 /*warpAffine_gpu<int2>*/ , 0 /*warpAffine_gpu<int3>*/ , 0 /*warpAffine_gpu<int4>*/ },
  243. {warpAffine_gpu<float> , 0 /*warpAffine_gpu<float2>*/ , warpAffine_gpu<float3> , warpAffine_gpu<float4> }
  244. };
  245. const func_t func = funcs[src.depth()][src.channels() - 1];
  246. CV_Assert(func != 0);
  247. float coeffs[2 * 3];
  248. Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
  249. if (flags & WARP_INVERSE_MAP)
  250. M.convertTo(coeffsMat, coeffsMat.type());
  251. else
  252. {
  253. cv::Mat iM;
  254. invertAffineTransform(M, iM);
  255. iM.convertTo(coeffsMat, coeffsMat.type());
  256. }
  257. Scalar_<float> borderValueFloat;
  258. borderValueFloat = borderValue;
  259. func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
  260. dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
  261. }
  262. }
  263. void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
  264. {
  265. GpuMat src = _src.getGpuMat();
  266. Mat M = _M.getMat();
  267. CV_Assert( M.rows == 3 && M.cols == 3 );
  268. const int interpolation = flags & INTER_MAX;
  269. CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
  270. CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
  271. CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP) ;
  272. _dst.create(dsize, src.type());
  273. GpuMat dst = _dst.getGpuMat();
  274. Size wholeSize;
  275. Point ofs;
  276. src.locateROI(wholeSize, ofs);
  277. static const bool useNppTab[6][4][3] =
  278. {
  279. {
  280. {false, false, true},
  281. {false, false, false},
  282. {false, true, true},
  283. {false, false, false}
  284. },
  285. {
  286. {false, false, false},
  287. {false, false, false},
  288. {false, false, false},
  289. {false, false, false}
  290. },
  291. {
  292. {false, true, true},
  293. {false, false, false},
  294. {false, true, true},
  295. {false, false, false}
  296. },
  297. {
  298. {false, false, false},
  299. {false, false, false},
  300. {false, false, false},
  301. {false, false, false}
  302. },
  303. {
  304. {false, true, true},
  305. {false, false, false},
  306. {false, true, true},
  307. {false, false, true}
  308. },
  309. {
  310. {false, true, true},
  311. {false, false, false},
  312. {false, true, true},
  313. {false, false, true}
  314. }
  315. };
  316. bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
  317. // NPP bug on float data
  318. useNpp = useNpp && src.depth() != CV_32F;
  319. if (useNpp)
  320. {
  321. typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
  322. static const func_t funcs[2][6][4] =
  323. {
  324. {
  325. {NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
  326. {0, 0, 0, 0},
  327. {NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
  328. {0, 0, 0, 0},
  329. {NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
  330. {NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
  331. },
  332. {
  333. {NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
  334. {0, 0, 0, 0},
  335. {NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
  336. {0, 0, 0, 0},
  337. {NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
  338. {NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
  339. }
  340. };
  341. dst.setTo(borderValue, stream);
  342. double coeffs[3][3];
  343. Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
  344. M.convertTo(coeffsMat, coeffsMat.type());
  345. const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
  346. CV_Assert(func != 0);
  347. func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
  348. }
  349. else
  350. {
  351. using namespace cv::cuda::device::imgproc;
  352. typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
  353. int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
  354. static const func_t funcs[6][4] =
  355. {
  356. {warpPerspective_gpu<uchar> , 0 /*warpPerspective_gpu<uchar2>*/ , warpPerspective_gpu<uchar3> , warpPerspective_gpu<uchar4> },
  357. {0 /*warpPerspective_gpu<schar>*/, 0 /*warpPerspective_gpu<char2>*/ , 0 /*warpPerspective_gpu<char3>*/, 0 /*warpPerspective_gpu<char4>*/},
  358. {warpPerspective_gpu<ushort> , 0 /*warpPerspective_gpu<ushort2>*/, warpPerspective_gpu<ushort3> , warpPerspective_gpu<ushort4> },
  359. {warpPerspective_gpu<short> , 0 /*warpPerspective_gpu<short2>*/ , warpPerspective_gpu<short3> , warpPerspective_gpu<short4> },
  360. {0 /*warpPerspective_gpu<int>*/ , 0 /*warpPerspective_gpu<int2>*/ , 0 /*warpPerspective_gpu<int3>*/ , 0 /*warpPerspective_gpu<int4>*/ },
  361. {warpPerspective_gpu<float> , 0 /*warpPerspective_gpu<float2>*/ , warpPerspective_gpu<float3> , warpPerspective_gpu<float4> }
  362. };
  363. const func_t func = funcs[src.depth()][src.channels() - 1];
  364. CV_Assert(func != 0);
  365. float coeffs[3 * 3];
  366. Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
  367. if (flags & WARP_INVERSE_MAP)
  368. M.convertTo(coeffsMat, coeffsMat.type());
  369. else
  370. {
  371. cv::Mat iM;
  372. invert(M, iM);
  373. iM.convertTo(coeffsMat, coeffsMat.type());
  374. }
  375. Scalar_<float> borderValueFloat;
  376. borderValueFloat = borderValue;
  377. func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
  378. dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
  379. }
  380. }
  381. ////////////////////////////////////////////////////////////////////////
  382. // rotate
  383. namespace
  384. {
  385. template <int DEPTH> struct NppRotateFunc
  386. {
  387. typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
  388. typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI,
  389. npp_type* pDst, int nDstStep, NppiRect oDstROI,
  390. double nAngle, double nShiftX, double nShiftY, int eInterpolation);
  391. };
  392. template <int DEPTH, typename NppRotateFunc<DEPTH>::func_t func> struct NppRotate
  393. {
  394. typedef typename NppRotateFunc<DEPTH>::npp_type npp_type;
  395. static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
  396. {
  397. CV_UNUSED(dsize);
  398. static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
  399. NppStreamHandler h(stream);
  400. NppiSize srcsz;
  401. srcsz.height = src.rows;
  402. srcsz.width = src.cols;
  403. NppiRect srcroi;
  404. srcroi.x = srcroi.y = 0;
  405. srcroi.height = src.rows;
  406. srcroi.width = src.cols;
  407. NppiRect dstroi;
  408. dstroi.x = dstroi.y = 0;
  409. dstroi.height = dst.rows;
  410. dstroi.width = dst.cols;
  411. nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
  412. dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
  413. if (stream == 0)
  414. cudaSafeCall( cudaDeviceSynchronize() );
  415. }
  416. };
  417. }
  418. void cv::cuda::rotate(InputArray _src, OutputArray _dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
  419. {
  420. typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
  421. static const func_t funcs[6][4] =
  422. {
  423. {NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
  424. {0,0,0,0},
  425. {NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call},
  426. {0,0,0,0},
  427. {0,0,0,0},
  428. {NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
  429. };
  430. GpuMat src = _src.getGpuMat();
  431. CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
  432. CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
  433. CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
  434. _dst.create(dsize, src.type());
  435. GpuMat dst = _dst.getGpuMat();
  436. dst.setTo(Scalar::all(0), stream);
  437. funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
  438. }
  439. #endif // HAVE_CUDA