test_arithm.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Third party copyrights are property of their respective owners.
  16. //
  17. // Redistribution and use in source and binary forms, with or without modification,
  18. // are permitted provided that the following conditions are met:
  19. //
  20. // * Redistribution's of source code must retain the above copyright notice,
  21. // this list of conditions and the following disclaimer.
  22. //
  23. // * Redistribution's in binary form must reproduce the above copyright notice,
  24. // this list of conditions and the following disclaimer in the documentation
  25. // and/or other materials provided with the distribution.
  26. //
  27. // * The name of the copyright holders may not be used to endorse or promote products
  28. // derived from this software without specific prior written permission.
  29. //
  30. // This software is provided by the copyright holders and contributors "as is" and
  31. // any express or implied warranties, including, but not limited to, the implied
  32. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33. // In no event shall the Intel Corporation or contributors be liable for any direct,
  34. // indirect, incidental, special, exemplary, or consequential damages
  35. // (including, but not limited to, procurement of substitute goods or services;
  36. // loss of use, data, or profits; or business interruption) however caused
  37. // and on any theory of liability, whether in contract, strict liability,
  38. // or tort (including negligence or otherwise) arising in any way out of
  39. // the use of this software, even if advised of the possibility of such damage.
  40. //
  41. //M*/
  42. #include "test_precomp.hpp"
  43. #ifdef HAVE_CUDA
  44. namespace opencv_test { namespace {
  45. //////////////////////////////////////////////////////////////////////////////
  46. // GEMM
  47. #ifdef HAVE_CUBLAS
  48. CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
  49. #define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
  50. PARAM_TEST_CASE(GEMM, cv::cuda::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
  51. {
  52. cv::cuda::DeviceInfo devInfo;
  53. cv::Size size;
  54. int type;
  55. int flags;
  56. bool useRoi;
  57. virtual void SetUp()
  58. {
  59. devInfo = GET_PARAM(0);
  60. size = GET_PARAM(1);
  61. type = GET_PARAM(2);
  62. flags = GET_PARAM(3);
  63. useRoi = GET_PARAM(4);
  64. cv::cuda::setDevice(devInfo.deviceID());
  65. }
  66. };
  67. CUDA_TEST_P(GEMM, Accuracy)
  68. {
  69. cv::Mat src1 = randomMat(size, type, -10.0, 10.0);
  70. cv::Mat src2 = randomMat(size, type, -10.0, 10.0);
  71. cv::Mat src3 = randomMat(size, type, -10.0, 10.0);
  72. double alpha = randomDouble(-10.0, 10.0);
  73. double beta = randomDouble(-10.0, 10.0);
  74. if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
  75. {
  76. try
  77. {
  78. cv::cuda::GpuMat dst;
  79. cv::cuda::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
  80. }
  81. catch (const cv::Exception& e)
  82. {
  83. ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
  84. }
  85. }
  86. else if (type == CV_64FC2 && flags != 0)
  87. {
  88. try
  89. {
  90. cv::cuda::GpuMat dst;
  91. cv::cuda::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
  92. }
  93. catch (const cv::Exception& e)
  94. {
  95. ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
  96. }
  97. }
  98. else
  99. {
  100. cv::cuda::GpuMat dst = createMat(size, type, useRoi);
  101. cv::cuda::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
  102. cv::Mat dst_gold;
  103. cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags);
  104. EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10);
  105. }
  106. }
  107. INSTANTIATE_TEST_CASE_P(CUDA_Arithm, GEMM, testing::Combine(
  108. ALL_DEVICES,
  109. DIFFERENT_SIZES,
  110. testing::Values(MatType(CV_32FC1), MatType(CV_32FC2), MatType(CV_64FC1), MatType(CV_64FC2)),
  111. ALL_GEMM_FLAGS,
  112. WHOLE_SUBMAT));
  113. ////////////////////////////////////////////////////////////////////////////
  114. // MulSpectrums
  115. CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
  116. PARAM_TEST_CASE(MulSpectrums, cv::cuda::DeviceInfo, cv::Size, DftFlags)
  117. {
  118. cv::cuda::DeviceInfo devInfo;
  119. cv::Size size;
  120. int flag;
  121. cv::Mat a, b;
  122. virtual void SetUp()
  123. {
  124. devInfo = GET_PARAM(0);
  125. size = GET_PARAM(1);
  126. flag = GET_PARAM(2);
  127. cv::cuda::setDevice(devInfo.deviceID());
  128. a = randomMat(size, CV_32FC2);
  129. b = randomMat(size, CV_32FC2);
  130. }
  131. };
  132. CUDA_TEST_P(MulSpectrums, Simple)
  133. {
  134. cv::cuda::GpuMat c;
  135. cv::cuda::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
  136. cv::Mat c_gold;
  137. cv::mulSpectrums(a, b, c_gold, flag, false);
  138. EXPECT_MAT_NEAR(c_gold, c, 1e-2);
  139. }
  140. CUDA_TEST_P(MulSpectrums, Scaled)
  141. {
  142. float scale = 1.f / size.area();
  143. cv::cuda::GpuMat c;
  144. cv::cuda::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
  145. cv::Mat c_gold;
  146. cv::mulSpectrums(a, b, c_gold, flag, false);
  147. c_gold.convertTo(c_gold, c_gold.type(), scale);
  148. EXPECT_MAT_NEAR(c_gold, c, 1e-2);
  149. }
  150. INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MulSpectrums, testing::Combine(
  151. ALL_DEVICES,
  152. DIFFERENT_SIZES,
  153. testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
  154. ////////////////////////////////////////////////////////////////////////////
  155. // Dft
  156. struct Dft : testing::TestWithParam<cv::cuda::DeviceInfo>
  157. {
  158. cv::cuda::DeviceInfo devInfo;
  159. virtual void SetUp()
  160. {
  161. devInfo = GetParam();
  162. cv::cuda::setDevice(devInfo.deviceID());
  163. }
  164. };
  165. namespace
  166. {
  167. void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
  168. {
  169. SCOPED_TRACE(hint);
  170. cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
  171. cv::Mat b_gold;
  172. cv::dft(a, b_gold, flags);
  173. cv::cuda::GpuMat d_b;
  174. cv::cuda::GpuMat d_b_data;
  175. if (inplace)
  176. {
  177. d_b_data.create(1, a.size().area(), CV_32FC2);
  178. d_b = cv::cuda::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
  179. }
  180. cv::cuda::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
  181. EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
  182. ASSERT_EQ(CV_32F, d_b.depth());
  183. ASSERT_EQ(2, d_b.channels());
  184. EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
  185. }
  186. }
  187. CUDA_TEST_P(Dft, C2C)
  188. {
  189. int cols = randomInt(2, 100);
  190. int rows = randomInt(2, 100);
  191. for (int i = 0; i < 2; ++i)
  192. {
  193. bool inplace = i != 0;
  194. testC2C("no flags", cols, rows, 0, inplace);
  195. testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
  196. testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
  197. testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
  198. testC2C("DFT_INVERSE", cols, rows, cv::DFT_INVERSE, inplace);
  199. testC2C("DFT_ROWS", cols, rows, cv::DFT_ROWS, inplace);
  200. testC2C("single col", 1, rows, 0, inplace);
  201. testC2C("single row", cols, 1, 0, inplace);
  202. testC2C("single col inversed", 1, rows, cv::DFT_INVERSE, inplace);
  203. testC2C("single row inversed", cols, 1, cv::DFT_INVERSE, inplace);
  204. testC2C("single row DFT_ROWS", cols, 1, cv::DFT_ROWS, inplace);
  205. testC2C("size 1 2", 1, 2, 0, inplace);
  206. testC2C("size 2 1", 2, 1, 0, inplace);
  207. }
  208. }
  209. CUDA_TEST_P(Dft, Algorithm)
  210. {
  211. int cols = randomInt(2, 100);
  212. int rows = randomInt(2, 100);
  213. int flags = 0 | DFT_COMPLEX_INPUT;
  214. cv::Ptr<cv::cuda::DFT> dft = cv::cuda::createDFT(cv::Size(cols, rows), flags);
  215. for (int i = 0; i < 5; ++i)
  216. {
  217. SCOPED_TRACE("dft algorithm");
  218. cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
  219. cv::cuda::GpuMat d_b;
  220. cv::cuda::GpuMat d_b_data;
  221. dft->compute(loadMat(a), d_b);
  222. cv::Mat b_gold;
  223. cv::dft(a, b_gold, flags);
  224. ASSERT_EQ(CV_32F, d_b.depth());
  225. ASSERT_EQ(2, d_b.channels());
  226. EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
  227. }
  228. }
  229. namespace
  230. {
  231. void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
  232. {
  233. SCOPED_TRACE(hint);
  234. cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC1, 0.0, 10.0);
  235. cv::cuda::GpuMat d_b, d_c;
  236. cv::cuda::GpuMat d_b_data, d_c_data;
  237. if (inplace)
  238. {
  239. if (a.cols == 1)
  240. {
  241. d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
  242. d_b = cv::cuda::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
  243. }
  244. else
  245. {
  246. d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
  247. d_b = cv::cuda::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
  248. }
  249. d_c_data.create(1, a.size().area(), CV_32F);
  250. d_c = cv::cuda::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
  251. }
  252. cv::cuda::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
  253. cv::cuda::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
  254. EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
  255. EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
  256. ASSERT_EQ(CV_32F, d_c.depth());
  257. ASSERT_EQ(1, d_c.channels());
  258. cv::Mat c(d_c);
  259. EXPECT_MAT_NEAR(a, c, rows * cols * 1e-5);
  260. }
  261. }
  262. CUDA_TEST_P(Dft, R2CThenC2R)
  263. {
  264. int cols = randomInt(2, 100);
  265. int rows = randomInt(2, 100);
  266. testR2CThenC2R("sanity", cols, rows, false);
  267. testR2CThenC2R("sanity 0 1", cols, rows + 1, false);
  268. testR2CThenC2R("sanity 1 0", cols + 1, rows, false);
  269. testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, false);
  270. testR2CThenC2R("single col", 1, rows, false);
  271. testR2CThenC2R("single col 1", 1, rows + 1, false);
  272. testR2CThenC2R("single row", cols, 1, false);
  273. testR2CThenC2R("single row 1", cols + 1, 1, false);
  274. testR2CThenC2R("sanity", cols, rows, true);
  275. testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
  276. testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
  277. testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
  278. testR2CThenC2R("single row", cols, 1, true);
  279. testR2CThenC2R("single row 1", cols + 1, 1, true);
  280. }
  281. INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Dft, ALL_DEVICES);
  282. ////////////////////////////////////////////////////////
  283. // Convolve
  284. namespace
  285. {
  286. void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
  287. {
  288. // reallocate the output array if needed
  289. C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
  290. cv::Size dftSize;
  291. // compute the size of DFT transform
  292. dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
  293. dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
  294. // allocate temporary buffers and initialize them with 0s
  295. cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
  296. cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
  297. // copy A and B to the top-left corners of tempA and tempB, respectively
  298. cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
  299. A.copyTo(roiA);
  300. cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
  301. B.copyTo(roiB);
  302. // now transform the padded A & B in-place;
  303. // use "nonzeroRows" hint for faster processing
  304. cv::dft(tempA, tempA, 0, A.rows);
  305. cv::dft(tempB, tempB, 0, B.rows);
  306. // multiply the spectrums;
  307. // the function handles packed spectrum representations well
  308. cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
  309. // transform the product back from the frequency domain.
  310. // Even though all the result rows will be non-zero,
  311. // you need only the first C.rows of them, and thus you
  312. // pass nonzeroRows == C.rows
  313. cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
  314. // now copy the result back to C.
  315. tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
  316. }
  317. IMPLEMENT_PARAM_CLASS(KSize, int)
  318. IMPLEMENT_PARAM_CLASS(Ccorr, bool)
  319. }
  320. PARAM_TEST_CASE(Convolve, cv::cuda::DeviceInfo, cv::Size, KSize, Ccorr)
  321. {
  322. cv::cuda::DeviceInfo devInfo;
  323. cv::Size size;
  324. int ksize;
  325. bool ccorr;
  326. virtual void SetUp()
  327. {
  328. devInfo = GET_PARAM(0);
  329. size = GET_PARAM(1);
  330. ksize = GET_PARAM(2);
  331. ccorr = GET_PARAM(3);
  332. cv::cuda::setDevice(devInfo.deviceID());
  333. }
  334. };
  335. CUDA_TEST_P(Convolve, Accuracy)
  336. {
  337. cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
  338. cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
  339. cv::Ptr<cv::cuda::Convolution> conv = cv::cuda::createConvolution();
  340. cv::cuda::GpuMat dst;
  341. conv->convolve(loadMat(src), loadMat(kernel), dst, ccorr);
  342. cv::Mat dst_gold;
  343. convolveDFT(src, kernel, dst_gold, ccorr);
  344. EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
  345. }
  346. INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Convolve, testing::Combine(
  347. ALL_DEVICES,
  348. DIFFERENT_SIZES,
  349. testing::Values(KSize(3), KSize(7), KSize(11), KSize(17), KSize(19), KSize(23), KSize(45)),
  350. testing::Values(Ccorr(false), Ccorr(true))));
  351. #endif // HAVE_CUBLAS
  352. }} // namespace
  353. #endif // HAVE_CUDA