test_scan.cu 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. #include "test_precomp.hpp"
  2. using namespace cv;
  3. using namespace cv::cudev;
  4. using namespace cvtest;
  5. // BlockScanInt
  6. template <int THREADS_NUM>
  7. __global__ void int_kernel(int* data)
  8. {
  9. uint tid = Block::threadLineId();
  10. #if CV_CUDEV_ARCH >= 300
  11. const int n_warps = (THREADS_NUM - 1) / WARP_SIZE + 1;
  12. __shared__ int smem[n_warps];
  13. #else
  14. __shared__ int smem[THREADS_NUM];
  15. #endif
  16. data[tid] = blockScanInclusive<THREADS_NUM>(data[tid], smem, tid);
  17. }
  18. #define BLOCK_SCAN_INT_TEST(block_size) \
  19. TEST(BlockScanInt, BlockSize##block_size) \
  20. { \
  21. Mat src = randomMat(Size(block_size, 1), CV_32SC1, 0, 1024); \
  22. \
  23. GpuMat d_src; \
  24. d_src.upload(src); \
  25. \
  26. for (int col = 1; col < block_size; col++) \
  27. src.at<int>(0, col) += src.at<int>(0, col - 1); \
  28. \
  29. int_kernel<block_size><<<1, block_size>>>((int*)d_src.data); \
  30. \
  31. CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize()); \
  32. \
  33. EXPECT_MAT_NEAR(d_src, src, 0); \
  34. }
  35. BLOCK_SCAN_INT_TEST(29)
  36. BLOCK_SCAN_INT_TEST(30)
  37. BLOCK_SCAN_INT_TEST(32)
  38. BLOCK_SCAN_INT_TEST(40)
  39. BLOCK_SCAN_INT_TEST(41)
  40. BLOCK_SCAN_INT_TEST(59)
  41. BLOCK_SCAN_INT_TEST(60)
  42. BLOCK_SCAN_INT_TEST(64)
  43. BLOCK_SCAN_INT_TEST(70)
  44. BLOCK_SCAN_INT_TEST(71)
  45. BLOCK_SCAN_INT_TEST(109)
  46. BLOCK_SCAN_INT_TEST(110)
  47. BLOCK_SCAN_INT_TEST(128)
  48. BLOCK_SCAN_INT_TEST(130)
  49. BLOCK_SCAN_INT_TEST(131)
  50. BLOCK_SCAN_INT_TEST(189)
  51. BLOCK_SCAN_INT_TEST(200)
  52. BLOCK_SCAN_INT_TEST(256)
  53. BLOCK_SCAN_INT_TEST(300)
  54. BLOCK_SCAN_INT_TEST(311)
  55. BLOCK_SCAN_INT_TEST(489)
  56. BLOCK_SCAN_INT_TEST(500)
  57. BLOCK_SCAN_INT_TEST(512)
  58. BLOCK_SCAN_INT_TEST(600)
  59. BLOCK_SCAN_INT_TEST(611)
  60. BLOCK_SCAN_INT_TEST(1024)
  61. // BlockScanDouble
  62. template <int THREADS_NUM>
  63. __global__ void double_kernel(double* data)
  64. {
  65. uint tid = Block::threadLineId();
  66. #if CV_CUDEV_ARCH >= 300
  67. const int n_warps = (THREADS_NUM - 1) / WARP_SIZE + 1;
  68. __shared__ double smem[n_warps];
  69. #else
  70. __shared__ double smem[THREADS_NUM];
  71. #endif
  72. data[tid] = blockScanInclusive<THREADS_NUM>(data[tid], smem, tid);
  73. }
  74. #define BLOCK_SCAN_DOUBLE_TEST(block_size) \
  75. TEST(BlockScanDouble, BlockSize##block_size) \
  76. { \
  77. Mat src = randomMat(Size(block_size, 1), CV_64FC1, 0.0, 1.0); \
  78. \
  79. GpuMat d_src; \
  80. d_src.upload(src); \
  81. \
  82. for (int col = 1; col < block_size; col++) \
  83. src.at<double>(0, col) += src.at<double>(0, col - 1); \
  84. \
  85. double_kernel<block_size><<<1, block_size>>>((double*)d_src.data); \
  86. \
  87. CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize()); \
  88. \
  89. EXPECT_MAT_NEAR(d_src, src, 1e-10); \
  90. }
  91. BLOCK_SCAN_DOUBLE_TEST(29)
  92. BLOCK_SCAN_DOUBLE_TEST(30)
  93. BLOCK_SCAN_DOUBLE_TEST(32)
  94. BLOCK_SCAN_DOUBLE_TEST(40)
  95. BLOCK_SCAN_DOUBLE_TEST(41)
  96. BLOCK_SCAN_DOUBLE_TEST(59)
  97. BLOCK_SCAN_DOUBLE_TEST(60)
  98. BLOCK_SCAN_DOUBLE_TEST(64)
  99. BLOCK_SCAN_DOUBLE_TEST(70)
  100. BLOCK_SCAN_DOUBLE_TEST(71)
  101. BLOCK_SCAN_DOUBLE_TEST(109)
  102. BLOCK_SCAN_DOUBLE_TEST(110)
  103. BLOCK_SCAN_DOUBLE_TEST(128)
  104. BLOCK_SCAN_DOUBLE_TEST(130)
  105. BLOCK_SCAN_DOUBLE_TEST(131)
  106. BLOCK_SCAN_DOUBLE_TEST(189)
  107. BLOCK_SCAN_DOUBLE_TEST(200)
  108. BLOCK_SCAN_DOUBLE_TEST(256)
  109. BLOCK_SCAN_DOUBLE_TEST(300)
  110. BLOCK_SCAN_DOUBLE_TEST(311)
  111. BLOCK_SCAN_DOUBLE_TEST(489)
  112. BLOCK_SCAN_DOUBLE_TEST(500)
  113. BLOCK_SCAN_DOUBLE_TEST(512)
  114. BLOCK_SCAN_DOUBLE_TEST(600)
  115. BLOCK_SCAN_DOUBLE_TEST(611)
  116. BLOCK_SCAN_DOUBLE_TEST(1024)