test_intrin_utils.hpp 58 KB


  1. // This file is part of OpenCV project.
  2. // It is subject to the license terms in the LICENSE file found in the top-level directory
  3. // of this distribution and at http://opencv.org/license.html.
  4. // This file is not standalone.
  5. // It is included with these active namespaces:
  6. //namespace opencv_test { namespace hal { namespace intrinXXX {
  7. //CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
  8. void test_hal_intrin_uint8();
  9. void test_hal_intrin_int8();
  10. void test_hal_intrin_uint16();
  11. void test_hal_intrin_int16();
  12. void test_hal_intrin_uint32();
  13. void test_hal_intrin_int32();
  14. void test_hal_intrin_uint64();
  15. void test_hal_intrin_int64();
  16. void test_hal_intrin_float32();
  17. void test_hal_intrin_float64();
  18. void test_hal_intrin_float16();
  19. #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
  20. template <typename R> struct Data;
  21. template <int N> struct initializer;
  22. template <> struct initializer<64>
  23. {
  24. template <typename R> static R init(const Data<R> & d)
  25. {
  26. return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15],
  27. d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31],
  28. d[32], d[33], d[34], d[35], d[36], d[37], d[38], d[39], d[40], d[41], d[42], d[43], d[44], d[45], d[46], d[47],
  29. d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]);
  30. }
  31. };
  32. template <> struct initializer<32>
  33. {
  34. template <typename R> static R init(const Data<R> & d)
  35. {
  36. return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15],
  37. d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]);
  38. }
  39. };
  40. template <> struct initializer<16>
  41. {
  42. template <typename R> static R init(const Data<R> & d)
  43. {
  44. return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
  45. }
  46. };
  47. template <> struct initializer<8>
  48. {
  49. template <typename R> static R init(const Data<R> & d)
  50. {
  51. return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]);
  52. }
  53. };
  54. template <> struct initializer<4>
  55. {
  56. template <typename R> static R init(const Data<R> & d)
  57. {
  58. return R(d[0], d[1], d[2], d[3]);
  59. }
  60. };
  61. template <> struct initializer<2>
  62. {
  63. template <typename R> static R init(const Data<R> & d)
  64. {
  65. return R(d[0], d[1]);
  66. }
  67. };
  68. //==================================================================================================
  69. template <typename R> struct Data
  70. {
  71. typedef typename R::lane_type LaneType;
  72. typedef typename V_TypeTraits<LaneType>::int_type int_type;
  73. Data()
  74. {
  75. for (int i = 0; i < R::nlanes; ++i)
  76. d[i] = (LaneType)(i + 1);
  77. }
  78. Data(LaneType val)
  79. {
  80. fill(val);
  81. }
  82. Data(const R & r)
  83. {
  84. *this = r;
  85. }
  86. operator R ()
  87. {
  88. return initializer<R::nlanes>().init(*this);
  89. }
  90. Data<R> & operator=(const R & r)
  91. {
  92. v_store(d, r);
  93. return *this;
  94. }
  95. template <typename T> Data<R> & operator*=(T m)
  96. {
  97. for (int i = 0; i < R::nlanes; ++i)
  98. d[i] *= (LaneType)m;
  99. return *this;
  100. }
  101. template <typename T> Data<R> & operator+=(T m)
  102. {
  103. for (int i = 0; i < R::nlanes; ++i)
  104. d[i] += (LaneType)m;
  105. return *this;
  106. }
  107. void fill(LaneType val, int s, int c = R::nlanes)
  108. {
  109. for (int i = s; i < c; ++i)
  110. d[i] = val;
  111. }
  112. void fill(LaneType val)
  113. {
  114. fill(val, 0);
  115. }
  116. void reverse()
  117. {
  118. for (int i = 0; i < R::nlanes / 2; ++i)
  119. std::swap(d[i], d[R::nlanes - i - 1]);
  120. }
  121. const LaneType & operator[](int i) const
  122. {
  123. #if 0 // TODO: strange bug - AVX2 tests are failed with this
  124. CV_CheckGE(i, 0, ""); CV_CheckLT(i, (int)R::nlanes, "");
  125. #else
  126. CV_Assert(i >= 0 && i < R::nlanes);
  127. #endif
  128. return d[i];
  129. }
  130. LaneType & operator[](int i)
  131. {
  132. CV_CheckGE(i, 0, ""); CV_CheckLT(i, (int)R::nlanes, "");
  133. return d[i];
  134. }
  135. int_type as_int(int i) const
  136. {
  137. CV_CheckGE(i, 0, ""); CV_CheckLT(i, (int)R::nlanes, "");
  138. union
  139. {
  140. LaneType l;
  141. int_type i;
  142. } v;
  143. v.l = d[i];
  144. return v.i;
  145. }
  146. const LaneType * mid() const
  147. {
  148. return d + R::nlanes / 2;
  149. }
  150. LaneType * mid()
  151. {
  152. return d + R::nlanes / 2;
  153. }
  154. LaneType sum(int s, int c)
  155. {
  156. LaneType res = 0;
  157. for (int i = s; i < s + c; ++i)
  158. res += d[i];
  159. return res;
  160. }
  161. LaneType sum()
  162. {
  163. return sum(0, R::nlanes);
  164. }
  165. bool operator==(const Data<R> & other) const
  166. {
  167. for (int i = 0; i < R::nlanes; ++i)
  168. if (d[i] != other.d[i])
  169. return false;
  170. return true;
  171. }
  172. void clear()
  173. {
  174. fill(0);
  175. }
  176. bool isZero() const
  177. {
  178. return isValue(0);
  179. }
  180. bool isValue(uchar val) const
  181. {
  182. for (int i = 0; i < R::nlanes; ++i)
  183. if (d[i] != val)
  184. return false;
  185. return true;
  186. }
  187. LaneType d[R::nlanes];
  188. };
  189. template<typename R> struct AlignedData
  190. {
  191. Data<R> CV_DECL_ALIGNED(CV_SIMD_WIDTH) a; // aligned
  192. char dummy;
  193. Data<R> u; // unaligned
  194. };
  195. template <typename R> std::ostream & operator<<(std::ostream & out, const Data<R> & d)
  196. {
  197. out << "{ ";
  198. for (int i = 0; i < R::nlanes; ++i)
  199. {
  200. // out << std::hex << +V_TypeTraits<typename R::lane_type>::reinterpret_int(d.d[i]);
  201. out << +d.d[i];
  202. if (i + 1 < R::nlanes)
  203. out << ", ";
  204. }
  205. out << " }";
  206. return out;
  207. }
  208. template<typename T> static inline void EXPECT_COMPARE_EQ_(const T a, const T b)
  209. {
  210. EXPECT_EQ(a, b);
  211. }
  212. template<> inline void EXPECT_COMPARE_EQ_<float>(const float a, const float b)
  213. {
  214. EXPECT_FLOAT_EQ( a, b );
  215. }
  216. template<> inline void EXPECT_COMPARE_EQ_<double>(const double a, const double b)
  217. {
  218. EXPECT_DOUBLE_EQ( a, b );
  219. }
  220. // pack functions do not do saturation when converting from 64-bit types
  221. template<typename T, typename W>
  222. inline T pack_saturate_cast(W a) { return saturate_cast<T>(a); }
  223. template<>
  224. inline int pack_saturate_cast<int, int64>(int64 a) { return static_cast<int>(a); }
  225. template<>
  226. inline unsigned pack_saturate_cast<unsigned, uint64>(uint64 a) { return static_cast<unsigned>(a); }
  227. template<typename R> struct TheTest
  228. {
  229. typedef typename R::lane_type LaneType;
  230. template <typename T1, typename T2>
  231. static inline void EXPECT_COMPARE_EQ(const T1 a, const T2 b)
  232. {
  233. EXPECT_COMPARE_EQ_<LaneType>((LaneType)a, (LaneType)b);
  234. }
  235. TheTest & test_loadstore()
  236. {
  237. AlignedData<R> data;
  238. AlignedData<R> out;
  239. // check if addresses are aligned and unaligned respectively
  240. EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH);
  241. EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH);
  242. EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH);
  243. EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH);
  244. // check some initialization methods
  245. R r1 = data.a;
  246. R r2 = vx_load(data.u.d);
  247. R r3 = vx_load_aligned(data.a.d);
  248. R r4(r2);
  249. EXPECT_EQ(data.a[0], r1.get0());
  250. EXPECT_EQ(data.u[0], r2.get0());
  251. EXPECT_EQ(data.a[0], r3.get0());
  252. EXPECT_EQ(data.u[0], r4.get0());
  253. R r_low = vx_load_low((LaneType*)data.u.d);
  254. EXPECT_EQ(data.u[0], r_low.get0());
  255. v_store(out.u.d, r_low);
  256. for (int i = 0; i < R::nlanes/2; ++i)
  257. {
  258. SCOPED_TRACE(cv::format("i=%d", i));
  259. EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]);
  260. }
  261. R r_low_align8byte = vx_load_low((LaneType*)((char*)data.u.d + (CV_SIMD_WIDTH / 2)));
  262. EXPECT_EQ(data.u[R::nlanes/2], r_low_align8byte.get0());
  263. v_store(out.u.d, r_low_align8byte);
  264. for (int i = 0; i < R::nlanes/2; ++i)
  265. {
  266. SCOPED_TRACE(cv::format("i=%d", i));
  267. EXPECT_EQ((LaneType)data.u[i + R::nlanes/2], (LaneType)out.u[i]);
  268. }
  269. // check some store methods
  270. out.u.clear();
  271. out.a.clear();
  272. v_store(out.u.d, r1);
  273. v_store_aligned(out.a.d, r2);
  274. EXPECT_EQ(data.a, out.a);
  275. EXPECT_EQ(data.u, out.u);
  276. // check more store methods
  277. Data<R> d, res(0);
  278. R r5 = d;
  279. v_store_high(res.mid(), r5);
  280. v_store_low(res.d, r5);
  281. EXPECT_EQ(d, res);
  282. // check halves load correctness
  283. res.clear();
  284. R r6 = vx_load_halves(d.d, d.mid());
  285. v_store(res.d, r6);
  286. EXPECT_EQ(d, res);
  287. // zero, all
  288. Data<R> resZ, resV;
  289. resZ.fill((LaneType)0);
  290. resV.fill((LaneType)8);
  291. for (int i = 0; i < R::nlanes; ++i)
  292. {
  293. SCOPED_TRACE(cv::format("i=%d", i));
  294. EXPECT_EQ((LaneType)0, resZ[i]);
  295. EXPECT_EQ((LaneType)8, resV[i]);
  296. }
  297. // reinterpret_as
  298. v_uint8 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a);
  299. v_int8 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a);
  300. v_uint16 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a);
  301. v_int16 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a);
  302. v_uint32 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a);
  303. v_int32 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a);
  304. v_uint64 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
  305. v_int64 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
  306. v_float32 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
  307. #if CV_SIMD_64F
  308. v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
  309. #endif
  310. #if CV_SIMD_WIDTH == 16
  311. R setall_res1 = v_setall((LaneType)5);
  312. R setall_res2 = v_setall<LaneType>(6);
  313. #elif CV_SIMD_WIDTH == 32
  314. R setall_res1 = v256_setall((LaneType)5);
  315. R setall_res2 = v256_setall<LaneType>(6);
  316. #elif CV_SIMD_WIDTH == 64
  317. R setall_res1 = v512_setall((LaneType)5);
  318. R setall_res2 = v512_setall<LaneType>(6);
  319. #else
  320. #error "Configuration error"
  321. #endif
  322. #if CV_SIMD_WIDTH > 0
  323. Data<R> setall_res1_; v_store(setall_res1_.d, setall_res1);
  324. Data<R> setall_res2_; v_store(setall_res2_.d, setall_res2);
  325. for (int i = 0; i < R::nlanes; ++i)
  326. {
  327. SCOPED_TRACE(cv::format("i=%d", i));
  328. EXPECT_EQ((LaneType)5, setall_res1_[i]);
  329. EXPECT_EQ((LaneType)6, setall_res2_[i]);
  330. }
  331. #endif
  332. R vx_setall_res1 = vx_setall((LaneType)11);
  333. R vx_setall_res2 = vx_setall<LaneType>(12);
  334. Data<R> vx_setall_res1_; v_store(vx_setall_res1_.d, vx_setall_res1);
  335. Data<R> vx_setall_res2_; v_store(vx_setall_res2_.d, vx_setall_res2);
  336. for (int i = 0; i < R::nlanes; ++i)
  337. {
  338. SCOPED_TRACE(cv::format("i=%d", i));
  339. EXPECT_EQ((LaneType)11, vx_setall_res1_[i]);
  340. EXPECT_EQ((LaneType)12, vx_setall_res2_[i]);
  341. }
  342. #if CV_SIMD_WIDTH == 16
  343. {
  344. uint64 a = CV_BIG_INT(0x7fffffffffffffff);
  345. uint64 b = (uint64)CV_BIG_INT(0xcfffffffffffffff);
  346. v_uint64x2 uint64_vec(a, b);
  347. EXPECT_EQ(a, uint64_vec.get0());
  348. EXPECT_EQ(b, v_extract_n<1>(uint64_vec));
  349. }
  350. {
  351. int64 a = CV_BIG_INT(0x7fffffffffffffff);
  352. int64 b = CV_BIG_INT(-1);
  353. v_int64x2 int64_vec(a, b);
  354. EXPECT_EQ(a, int64_vec.get0());
  355. EXPECT_EQ(b, v_extract_n<1>(int64_vec));
  356. }
  357. #endif
  358. return *this;
  359. }
  360. TheTest & test_interleave()
  361. {
  362. Data<R> data1, data2, data3, data4;
  363. data2 += 20;
  364. data3 += 40;
  365. data4 += 60;
  366. R a = data1, b = data2, c = data3;
  367. R d = data1, e = data2, f = data3, g = data4;
  368. LaneType buf3[R::nlanes * 3];
  369. LaneType buf4[R::nlanes * 4];
  370. v_store_interleave(buf3, a, b, c);
  371. v_store_interleave(buf4, d, e, f, g);
  372. Data<R> z(0);
  373. a = b = c = d = e = f = g = z;
  374. v_load_deinterleave(buf3, a, b, c);
  375. v_load_deinterleave(buf4, d, e, f, g);
  376. for (int i = 0; i < R::nlanes; ++i)
  377. {
  378. SCOPED_TRACE(cv::format("i=%d", i));
  379. EXPECT_EQ(data1, Data<R>(a));
  380. EXPECT_EQ(data2, Data<R>(b));
  381. EXPECT_EQ(data3, Data<R>(c));
  382. EXPECT_EQ(data1, Data<R>(d));
  383. EXPECT_EQ(data2, Data<R>(e));
  384. EXPECT_EQ(data3, Data<R>(f));
  385. EXPECT_EQ(data4, Data<R>(g));
  386. }
  387. return *this;
  388. }
  389. // float32x4 only
  390. TheTest & test_interleave_2channel()
  391. {
  392. Data<R> data1, data2;
  393. data2 += 20;
  394. R a = data1, b = data2;
  395. LaneType buf2[R::nlanes * 2];
  396. v_store_interleave(buf2, a, b);
  397. Data<R> z(0);
  398. a = b = z;
  399. v_load_deinterleave(buf2, a, b);
  400. for (int i = 0; i < R::nlanes; ++i)
  401. {
  402. SCOPED_TRACE(cv::format("i=%d", i));
  403. EXPECT_EQ(data1, Data<R>(a));
  404. EXPECT_EQ(data2, Data<R>(b));
  405. }
  406. return *this;
  407. }
  408. // v_expand and v_load_expand
  409. TheTest & test_expand()
  410. {
  411. typedef typename V_RegTraits<R>::w_reg Rx2;
  412. Data<R> dataA;
  413. R a = dataA;
  414. Data<Rx2> resB = vx_load_expand(dataA.d);
  415. Rx2 c, d, e, f;
  416. v_expand(a, c, d);
  417. e = v_expand_low(a);
  418. f = v_expand_high(a);
  419. Data<Rx2> resC = c, resD = d, resE = e, resF = f;
  420. const int n = Rx2::nlanes;
  421. for (int i = 0; i < n; ++i)
  422. {
  423. SCOPED_TRACE(cv::format("i=%d", i));
  424. EXPECT_EQ(dataA[i], resB[i]);
  425. EXPECT_EQ(dataA[i], resC[i]);
  426. EXPECT_EQ(dataA[i + n], resD[i]);
  427. EXPECT_EQ(dataA[i], resE[i]);
  428. EXPECT_EQ(dataA[i + n], resF[i]);
  429. }
  430. return *this;
  431. }
  432. TheTest & test_expand_q()
  433. {
  434. typedef typename V_RegTraits<R>::q_reg Rx4;
  435. Data<R> data;
  436. Data<Rx4> out = vx_load_expand_q(data.d);
  437. const int n = Rx4::nlanes;
  438. for (int i = 0; i < n; ++i)
  439. {
  440. SCOPED_TRACE(cv::format("i=%d", i));
  441. EXPECT_EQ(data[i], out[i]);
  442. }
  443. return *this;
  444. }
  445. TheTest & test_addsub()
  446. {
  447. Data<R> dataA, dataB;
  448. dataB.reverse();
  449. R a = dataA, b = dataB;
  450. Data<R> resC = a + b, resD = a - b;
  451. for (int i = 0; i < R::nlanes; ++i)
  452. {
  453. SCOPED_TRACE(cv::format("i=%d", i));
  454. EXPECT_EQ(saturate_cast<LaneType>(dataA[i] + dataB[i]), resC[i]);
  455. EXPECT_EQ(saturate_cast<LaneType>(dataA[i] - dataB[i]), resD[i]);
  456. }
  457. return *this;
  458. }
  459. TheTest & test_arithm_wrap()
  460. {
  461. Data<R> dataA, dataB;
  462. dataB.reverse();
  463. R a = dataA, b = dataB;
  464. Data<R> resC = v_add_wrap(a, b),
  465. resD = v_sub_wrap(a, b),
  466. resE = v_mul_wrap(a, b);
  467. for (int i = 0; i < R::nlanes; ++i)
  468. {
  469. SCOPED_TRACE(cv::format("i=%d", i));
  470. EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]);
  471. EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]);
  472. EXPECT_EQ((LaneType)(dataA[i] * dataB[i]), resE[i]);
  473. }
  474. return *this;
  475. }
  476. TheTest & test_mul()
  477. {
  478. Data<R> dataA, dataB;
  479. dataA[1] = static_cast<LaneType>(std::numeric_limits<LaneType>::max());
  480. dataB.reverse();
  481. R a = dataA, b = dataB;
  482. Data<R> resC = a * b;
  483. for (int i = 0; i < R::nlanes; ++i)
  484. {
  485. SCOPED_TRACE(cv::format("i=%d", i));
  486. EXPECT_EQ(saturate_cast<LaneType>(dataA[i] * dataB[i]), resC[i]);
  487. }
  488. return *this;
  489. }
  490. TheTest & test_div()
  491. {
  492. Data<R> dataA, dataB;
  493. dataB.reverse();
  494. R a = dataA, b = dataB;
  495. Data<R> resC = a / b;
  496. for (int i = 0; i < R::nlanes; ++i)
  497. {
  498. SCOPED_TRACE(cv::format("i=%d", i));
  499. EXPECT_EQ(dataA[i] / dataB[i], resC[i]);
  500. }
  501. return *this;
  502. }
  503. TheTest & test_mul_expand()
  504. {
  505. typedef typename V_RegTraits<R>::w_reg Rx2;
  506. Data<R> dataA, dataB(2);
  507. R a = dataA, b = dataB;
  508. Rx2 c, d;
  509. v_mul_expand(a, b, c, d);
  510. Data<Rx2> resC = c, resD = d;
  511. const int n = R::nlanes / 2;
  512. for (int i = 0; i < n; ++i)
  513. {
  514. SCOPED_TRACE(cv::format("i=%d", i));
  515. EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]);
  516. EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]);
  517. }
  518. return *this;
  519. }
  520. TheTest & test_mul_hi()
  521. {
  522. // typedef typename V_RegTraits<R>::w_reg Rx2;
  523. Data<R> dataA, dataB(32767);
  524. R a = dataA, b = dataB;
  525. R c = v_mul_hi(a, b);
  526. Data<R> resC = c;
  527. const int n = R::nlanes / 2;
  528. for (int i = 0; i < n; ++i)
  529. {
  530. SCOPED_TRACE(cv::format("i=%d", i));
  531. EXPECT_EQ((typename R::lane_type)((dataA[i] * dataB[i]) >> 16), resC[i]);
  532. }
  533. return *this;
  534. }
  535. TheTest & test_abs()
  536. {
  537. typedef typename V_RegTraits<R>::u_reg Ru;
  538. typedef typename Ru::lane_type u_type;
  539. Data<R> dataA, dataB(10);
  540. R a = dataA, b = dataB;
  541. a = a - b;
  542. Data<Ru> resC = v_abs(a);
  543. for (int i = 0; i < Ru::nlanes; ++i)
  544. {
  545. SCOPED_TRACE(cv::format("i=%d", i));
  546. EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]);
  547. }
  548. return *this;
  549. }
  550. template <int s>
  551. TheTest & test_shift()
  552. {
  553. SCOPED_TRACE(s);
  554. Data<R> dataA;
  555. dataA[0] = static_cast<LaneType>(std::numeric_limits<LaneType>::max());
  556. R a = dataA;
  557. Data<R> resB = a << s, resC = v_shl<s>(a), resD = a >> s, resE = v_shr<s>(a);
  558. for (int i = 0; i < R::nlanes; ++i)
  559. {
  560. SCOPED_TRACE(cv::format("i=%d", i));
  561. EXPECT_EQ(static_cast<LaneType>(dataA[i] << s), resB[i]);
  562. EXPECT_EQ(static_cast<LaneType>(dataA[i] << s), resC[i]);
  563. EXPECT_EQ(static_cast<LaneType>(dataA[i] >> s), resD[i]);
  564. EXPECT_EQ(static_cast<LaneType>(dataA[i] >> s), resE[i]);
  565. }
  566. return *this;
  567. }
  568. TheTest & test_cmp()
  569. {
  570. Data<R> dataA, dataB;
  571. dataB.reverse();
  572. dataB += 1;
  573. R a = dataA, b = dataB;
  574. Data<R> resC = (a == b);
  575. Data<R> resD = (a != b);
  576. Data<R> resE = (a > b);
  577. Data<R> resF = (a >= b);
  578. Data<R> resG = (a < b);
  579. Data<R> resH = (a <= b);
  580. for (int i = 0; i < R::nlanes; ++i)
  581. {
  582. SCOPED_TRACE(cv::format("i=%d", i));
  583. EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
  584. EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
  585. EXPECT_EQ(dataA[i] > dataB[i], resE[i] != 0);
  586. EXPECT_EQ(dataA[i] >= dataB[i], resF[i] != 0);
  587. EXPECT_EQ(dataA[i] < dataB[i], resG[i] != 0);
  588. EXPECT_EQ(dataA[i] <= dataB[i], resH[i] != 0);
  589. }
  590. return *this;
  591. }
  592. TheTest & test_dotprod()
  593. {
  594. typedef typename V_RegTraits<R>::w_reg Rx2;
  595. typedef typename Rx2::lane_type w_type;
  596. Data<R> dataA, dataB;
  597. dataA += std::numeric_limits<LaneType>::max() - R::nlanes;
  598. dataB += std::numeric_limits<LaneType>::min() + R::nlanes;
  599. R a = dataA, b = dataB;
  600. Data<Rx2> dataC;
  601. dataC += std::numeric_limits<w_type>::is_signed ?
  602. std::numeric_limits<w_type>::min() :
  603. std::numeric_limits<w_type>::max() - R::nlanes * (dataB[0] + 1);
  604. Rx2 c = dataC;
  605. Data<Rx2> resD = v_dotprod(a, b),
  606. resE = v_dotprod(a, b, c);
  607. const int n = R::nlanes / 2;
  608. w_type sumAB = 0, sumABC = 0, tmp_sum;
  609. for (int i = 0; i < n; ++i)
  610. {
  611. SCOPED_TRACE(cv::format("i=%d", i));
  612. tmp_sum = (w_type)dataA[i*2] * (w_type)dataB[i*2] +
  613. (w_type)dataA[i*2 + 1] * (w_type)dataB[i*2 + 1];
  614. sumAB += tmp_sum;
  615. EXPECT_EQ(tmp_sum, resD[i]);
  616. tmp_sum = tmp_sum + dataC[i];
  617. sumABC += tmp_sum;
  618. EXPECT_EQ(tmp_sum, resE[i]);
  619. }
  620. w_type resF = v_reduce_sum(v_dotprod_fast(a, b)),
  621. resG = v_reduce_sum(v_dotprod_fast(a, b, c));
  622. EXPECT_EQ(sumAB, resF);
  623. EXPECT_EQ(sumABC, resG);
  624. return *this;
  625. }
  626. TheTest & test_dotprod_expand()
  627. {
  628. typedef typename V_RegTraits<R>::q_reg Rx4;
  629. typedef typename Rx4::lane_type l4_type;
  630. Data<R> dataA, dataB;
  631. dataA += std::numeric_limits<LaneType>::max() - R::nlanes;
  632. dataB += std::numeric_limits<LaneType>::min() + R::nlanes;
  633. R a = dataA, b = dataB;
  634. Data<Rx4> dataC;
  635. Rx4 c = dataC;
  636. Data<Rx4> resD = v_dotprod_expand(a, b),
  637. resE = v_dotprod_expand(a, b, c);
  638. l4_type sumAB = 0, sumABC = 0, tmp_sum;
  639. for (int i = 0; i < Rx4::nlanes; ++i)
  640. {
  641. SCOPED_TRACE(cv::format("i=%d", i));
  642. tmp_sum = (l4_type)dataA[i*4] * (l4_type)dataB[i*4] +
  643. (l4_type)dataA[i*4 + 1] * (l4_type)dataB[i*4 + 1] +
  644. (l4_type)dataA[i*4 + 2] * (l4_type)dataB[i*4 + 2] +
  645. (l4_type)dataA[i*4 + 3] * (l4_type)dataB[i*4 + 3];
  646. sumAB += tmp_sum;
  647. EXPECT_EQ(tmp_sum, resD[i]);
  648. tmp_sum = tmp_sum + dataC[i];
  649. sumABC += tmp_sum;
  650. EXPECT_EQ(tmp_sum, resE[i]);
  651. }
  652. l4_type resF = v_reduce_sum(v_dotprod_expand_fast(a, b)),
  653. resG = v_reduce_sum(v_dotprod_expand_fast(a, b, c));
  654. EXPECT_EQ(sumAB, resF);
  655. EXPECT_EQ(sumABC, resG);
  656. return *this;
  657. }
  658. TheTest & test_dotprod_expand_f64()
  659. {
  660. #if CV_SIMD_64F
  661. Data<R> dataA, dataB;
  662. dataA += std::numeric_limits<LaneType>::max() - R::nlanes;
  663. dataB += std::numeric_limits<LaneType>::min();
  664. R a = dataA, b = dataB;
  665. Data<v_float64> dataC;
  666. v_float64 c = dataC;
  667. Data<v_float64> resA = v_dotprod_expand(a, a),
  668. resB = v_dotprod_expand(b, b),
  669. resC = v_dotprod_expand(a, b, c);
  670. const int n = R::nlanes / 2;
  671. for (int i = 0; i < n; ++i)
  672. {
  673. SCOPED_TRACE(cv::format("i=%d", i));
  674. EXPECT_COMPARE_EQ((double)dataA[i*2] * (double)dataA[i*2] +
  675. (double)dataA[i*2 + 1] * (double)dataA[i*2 + 1], resA[i]);
  676. EXPECT_COMPARE_EQ((double)dataB[i*2] * (double)dataB[i*2] +
  677. (double)dataB[i*2 + 1] * (double)dataB[i*2 + 1], resB[i]);
  678. EXPECT_COMPARE_EQ((double)dataA[i*2] * (double)dataB[i*2] +
  679. (double)dataA[i*2 + 1] * (double)dataB[i*2 + 1] + dataC[i], resC[i]);
  680. }
  681. #endif
  682. return *this;
  683. }
  684. TheTest & test_logic()
  685. {
  686. Data<R> dataA, dataB(2);
  687. R a = dataA, b = dataB;
  688. Data<R> resC = a & b, resD = a | b, resE = a ^ b, resF = ~a;
  689. for (int i = 0; i < R::nlanes; ++i)
  690. {
  691. SCOPED_TRACE(cv::format("i=%d", i));
  692. EXPECT_EQ(dataA[i] & dataB[i], resC[i]);
  693. EXPECT_EQ(dataA[i] | dataB[i], resD[i]);
  694. EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]);
  695. EXPECT_EQ((LaneType)~dataA[i], resF[i]);
  696. }
  697. return *this;
  698. }
  699. TheTest & test_sqrt_abs()
  700. {
  701. Data<R> dataA, dataD;
  702. dataD *= -1.0;
  703. R a = dataA, d = dataD;
  704. Data<R> resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d);
  705. for (int i = 0; i < R::nlanes; ++i)
  706. {
  707. SCOPED_TRACE(cv::format("i=%d", i));
  708. EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]);
  709. EXPECT_COMPARE_EQ((float)(1/std::sqrt(dataA[i])), (float)resC[i]);
  710. EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]);
  711. }
  712. return *this;
  713. }
  714. TheTest & test_min_max()
  715. {
  716. Data<R> dataA, dataB;
  717. dataB.reverse();
  718. R a = dataA, b = dataB;
  719. Data<R> resC = v_min(a, b), resD = v_max(a, b);
  720. for (int i = 0; i < R::nlanes; ++i)
  721. {
  722. SCOPED_TRACE(cv::format("i=%d", i));
  723. EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]);
  724. EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]);
  725. }
  726. return *this;
  727. }
  728. TheTest & test_popcount()
  729. {
  730. typedef typename V_RegTraits<R>::u_reg Ru;
  731. static unsigned popcountTable[] = {
  732. 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, //0x00-0x0f
  733. 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x10-0x1f
  734. 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x20-0x2f
  735. 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x30-0x3f
  736. 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x40-0x4f
  737. 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x50-0x5f
  738. 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x60-0x6f
  739. 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, //0x70-0x7f
  740. 1 //0x80
  741. };
  742. Data<R> dataA;
  743. R a = dataA;
  744. Data<Ru> resB = v_popcount(a);
  745. for (int i = 0; i < Ru::nlanes; ++i)
  746. EXPECT_EQ(popcountTable[i + 1], resB[i]);
  747. return *this;
  748. }
  749. TheTest & test_absdiff()
  750. {
  751. typedef typename V_RegTraits<R>::u_reg Ru;
  752. typedef typename Ru::lane_type u_type;
  753. Data<R> dataA(std::numeric_limits<LaneType>::max()),
  754. dataB(std::numeric_limits<LaneType>::min());
  755. dataA[0] = (LaneType)-1;
  756. dataB[0] = 1;
  757. dataA[1] = 2;
  758. dataB[1] = (LaneType)-2;
  759. R a = dataA, b = dataB;
  760. Data<Ru> resC = v_absdiff(a, b);
  761. const u_type mask = std::numeric_limits<LaneType>::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0;
  762. for (int i = 0; i < Ru::nlanes; ++i)
  763. {
  764. SCOPED_TRACE(cv::format("i=%d", i));
  765. u_type uA = dataA[i] ^ mask;
  766. u_type uB = dataB[i] ^ mask;
  767. EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]);
  768. }
  769. return *this;
  770. }
  771. TheTest & test_float_absdiff()
  772. {
  773. Data<R> dataA(std::numeric_limits<LaneType>::max()),
  774. dataB(std::numeric_limits<LaneType>::min());
  775. dataA[0] = -1;
  776. dataB[0] = 1;
  777. dataA[1] = 2;
  778. dataB[1] = -2;
  779. R a = dataA, b = dataB;
  780. Data<R> resC = v_absdiff(a, b);
  781. for (int i = 0; i < R::nlanes; ++i)
  782. {
  783. SCOPED_TRACE(cv::format("i=%d", i));
  784. EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]);
  785. }
  786. return *this;
  787. }
  788. TheTest & test_absdiffs()
  789. {
  790. Data<R> dataA(std::numeric_limits<LaneType>::max()),
  791. dataB(std::numeric_limits<LaneType>::min());
  792. dataA[0] = (LaneType)-1;
  793. dataB[0] = 1;
  794. dataA[1] = 2;
  795. dataB[1] = (LaneType)-2;
  796. R a = dataA, b = dataB;
  797. Data<R> resC = v_absdiffs(a, b);
  798. for (int i = 0; i < R::nlanes; ++i)
  799. {
  800. EXPECT_EQ(saturate_cast<LaneType>(std::abs(dataA[i] - dataB[i])), resC[i]);
  801. }
  802. return *this;
  803. }
  804. TheTest & test_reduce()
  805. {
  806. Data<R> dataA;
  807. int sum = 0;
  808. for (int i = 0; i < R::nlanes; ++i)
  809. {
  810. sum += (int)(dataA[i]); // To prevent a constant overflow with int8
  811. }
  812. R a = dataA;
  813. EXPECT_EQ((LaneType)1, (LaneType)v_reduce_min(a));
  814. EXPECT_EQ((LaneType)(R::nlanes), (LaneType)v_reduce_max(a));
  815. EXPECT_EQ((int)(sum), (int)v_reduce_sum(a));
  816. dataA[0] += R::nlanes;
  817. R an = dataA;
  818. EXPECT_EQ((LaneType)2, (LaneType)v_reduce_min(an));
  819. return *this;
  820. }
  821. TheTest & test_reduce_sad()
  822. {
  823. Data<R> dataA, dataB(R::nlanes/2);
  824. R a = dataA;
  825. R b = dataB;
  826. EXPECT_EQ((unsigned)(R::nlanes*R::nlanes/4), v_reduce_sad(a, b));
  827. return *this;
  828. }
  829. TheTest & test_mask()
  830. {
  831. typedef typename V_RegTraits<R>::int_reg int_reg;
  832. typedef typename V_RegTraits<int_reg>::u_reg uint_reg;
  833. typedef typename int_reg::lane_type int_type;
  834. typedef typename uint_reg::lane_type uint_type;
  835. Data<R> dataA, dataB(0), dataC, dataD(1), dataE(2);
  836. dataA[1] *= (LaneType)-1;
  837. union
  838. {
  839. LaneType l;
  840. uint_type ui;
  841. }
  842. all1s;
  843. all1s.ui = (uint_type)-1;
  844. LaneType mask_one = all1s.l;
  845. dataB[R::nlanes - 1] = mask_one;
  846. R l = dataB;
  847. dataB[1] = mask_one;
  848. dataB[R::nlanes / 2] = mask_one;
  849. dataC *= (LaneType)-1;
  850. R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE;
  851. dataC[R::nlanes - 1] = 0;
  852. R nl = dataC;
  853. EXPECT_EQ(2, v_signmask(a));
  854. #if CV_SIMD_WIDTH <= 32
  855. EXPECT_EQ(2 | (1 << (R::nlanes / 2)) | (1 << (R::nlanes - 1)), v_signmask(b));
  856. #endif
  857. EXPECT_EQ(false, v_check_all(a));
  858. EXPECT_EQ(false, v_check_all(b));
  859. EXPECT_EQ(true, v_check_all(c));
  860. EXPECT_EQ(false, v_check_all(nl));
  861. EXPECT_EQ(true, v_check_any(a));
  862. EXPECT_EQ(true, v_check_any(b));
  863. EXPECT_EQ(true, v_check_any(c));
  864. EXPECT_EQ(true, v_check_any(l));
  865. R f = v_select(b, d, e);
  866. Data<R> resF = f;
  867. for (int i = 0; i < R::nlanes; ++i)
  868. {
  869. SCOPED_TRACE(cv::format("i=%d", i));
  870. int_type m2 = dataB.as_int(i);
  871. EXPECT_EQ((dataD.as_int(i) & m2) | (dataE.as_int(i) & ~m2), resF.as_int(i));
  872. }
  873. return *this;
  874. }
  875. template <int s>
  876. TheTest & test_pack()
  877. {
  878. SCOPED_TRACE(s);
  879. typedef typename V_RegTraits<R>::w_reg Rx2;
  880. typedef typename Rx2::lane_type w_type;
  881. Data<Rx2> dataA, dataB;
  882. dataA += std::numeric_limits<LaneType>::is_signed ? -10 : 10;
  883. dataB *= 10;
  884. dataB[0] = static_cast<w_type>(std::numeric_limits<LaneType>::max()) + 17; // to check saturation
  885. Rx2 a = dataA, b = dataB;
  886. Data<R> resC = v_pack(a, b);
  887. Data<R> resD = v_rshr_pack<s>(a, b);
  888. Data<R> resE(0);
  889. v_pack_store(resE.d, b);
  890. Data<R> resF(0);
  891. v_rshr_pack_store<s>(resF.d, b);
  892. const int n = Rx2::nlanes;
  893. const w_type add = (w_type)1 << (s - 1);
  894. for (int i = 0; i < n; ++i)
  895. {
  896. SCOPED_TRACE(cv::format("i=%d", i));
  897. EXPECT_EQ(pack_saturate_cast<LaneType>(dataA[i]), resC[i]);
  898. EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resC[i + n]);
  899. EXPECT_EQ(pack_saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
  900. EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]);
  901. EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resE[i]);
  902. EXPECT_EQ((LaneType)0, resE[i + n]);
  903. EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]);
  904. EXPECT_EQ((LaneType)0, resF[i + n]);
  905. }
  906. return *this;
  907. }
  908. template <int s>
  909. TheTest & test_pack_u()
  910. {
  911. SCOPED_TRACE(s);
  912. //typedef typename V_RegTraits<LaneType>::w_type LaneType_w;
  913. typedef typename V_RegTraits<R>::w_reg R2;
  914. typedef typename V_RegTraits<R2>::int_reg Ri2;
  915. typedef typename Ri2::lane_type w_type;
  916. Data<Ri2> dataA, dataB;
  917. dataA += -10;
  918. dataB *= 10;
  919. dataB[0] = static_cast<w_type>(std::numeric_limits<LaneType>::max()) + 17; // to check saturation
  920. Ri2 a = dataA, b = dataB;
  921. Data<R> resC = v_pack_u(a, b);
  922. Data<R> resD = v_rshr_pack_u<s>(a, b);
  923. Data<R> resE(0);
  924. v_pack_u_store(resE.d, b);
  925. Data<R> resF(0);
  926. v_rshr_pack_u_store<s>(resF.d, b);
  927. const int n = Ri2::nlanes;
  928. const w_type add = (w_type)1 << (s - 1);
  929. for (int i = 0; i < n; ++i)
  930. {
  931. SCOPED_TRACE(cv::format("i=%d", i));
  932. EXPECT_EQ(pack_saturate_cast<LaneType>(dataA[i]), resC[i]);
  933. EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resC[i + n]);
  934. EXPECT_EQ(pack_saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
  935. EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]);
  936. EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resE[i]);
  937. EXPECT_EQ((LaneType)0, resE[i + n]);
  938. EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]);
  939. EXPECT_EQ((LaneType)0, resF[i + n]);
  940. }
  941. return *this;
  942. }
  943. // v_uint8 only
  944. TheTest & test_pack_b()
  945. {
  946. // 16-bit
  947. Data<R> dataA, dataB;
  948. dataB.fill(0, R::nlanes / 2);
  949. R a = dataA, b = dataB;
  950. Data<R> maskA = a == b, maskB = a != b;
  951. a = maskA; b = maskB;
  952. Data<R> res = v_pack_b(v_reinterpret_as_u16(a), v_reinterpret_as_u16(b));
  953. for (int i = 0; i < v_uint16::nlanes; ++i)
  954. {
  955. SCOPED_TRACE(cv::format("i=%d", i));
  956. EXPECT_EQ(maskA[i * 2], res[i]);
  957. EXPECT_EQ(maskB[i * 2], res[i + v_uint16::nlanes]);
  958. }
  959. // 32-bit
  960. Data<R> dataC, dataD;
  961. dataD.fill(0, R::nlanes / 2);
  962. R c = dataC, d = dataD;
  963. Data<R> maskC = c == d, maskD = c != d;
  964. c = maskC; d = maskD;
  965. res = v_pack_b
  966. (
  967. v_reinterpret_as_u32(a), v_reinterpret_as_u32(b),
  968. v_reinterpret_as_u32(c), v_reinterpret_as_u32(d)
  969. );
  970. for (int i = 0; i < v_uint32::nlanes; ++i)
  971. {
  972. SCOPED_TRACE(cv::format("i=%d", i));
  973. EXPECT_EQ(maskA[i * 4], res[i]);
  974. EXPECT_EQ(maskB[i * 4], res[i + v_uint32::nlanes]);
  975. EXPECT_EQ(maskC[i * 4], res[i + v_uint32::nlanes * 2]);
  976. EXPECT_EQ(maskD[i * 4], res[i + v_uint32::nlanes * 3]);
  977. }
  978. // 64-bit
  979. Data<R> dataE, dataF, dataG(0), dataH(0xFF);
  980. dataF.fill(0, R::nlanes / 2);
  981. R e = dataE, f = dataF, g = dataG, h = dataH;
  982. Data<R> maskE = e == f, maskF = e != f;
  983. e = maskE; f = maskF;
  984. res = v_pack_b
  985. (
  986. v_reinterpret_as_u64(a), v_reinterpret_as_u64(b),
  987. v_reinterpret_as_u64(c), v_reinterpret_as_u64(d),
  988. v_reinterpret_as_u64(e), v_reinterpret_as_u64(f),
  989. v_reinterpret_as_u64(g), v_reinterpret_as_u64(h)
  990. );
  991. for (int i = 0; i < v_uint64::nlanes; ++i)
  992. {
  993. SCOPED_TRACE(cv::format("i=%d", i));
  994. EXPECT_EQ(maskA[i * 8], res[i]);
  995. EXPECT_EQ(maskB[i * 8], res[i + v_uint64::nlanes]);
  996. EXPECT_EQ(maskC[i * 8], res[i + v_uint64::nlanes * 2]);
  997. EXPECT_EQ(maskD[i * 8], res[i + v_uint64::nlanes * 3]);
  998. EXPECT_EQ(maskE[i * 8], res[i + v_uint64::nlanes * 4]);
  999. EXPECT_EQ(maskF[i * 8], res[i + v_uint64::nlanes * 5]);
  1000. EXPECT_EQ(dataG[i * 8], res[i + v_uint64::nlanes * 6]);
  1001. EXPECT_EQ(dataH[i * 8], res[i + v_uint64::nlanes * 7]);
  1002. }
  1003. return *this;
  1004. }
  1005. TheTest & test_unpack()
  1006. {
  1007. Data<R> dataA, dataB;
  1008. dataB *= 10;
  1009. R a = dataA, b = dataB;
  1010. R c, d, e, f, lo, hi;
  1011. v_zip(a, b, c, d);
  1012. v_recombine(a, b, e, f);
  1013. lo = v_combine_low(a, b);
  1014. hi = v_combine_high(a, b);
  1015. Data<R> resC = c, resD = d, resE = e, resF = f, resLo = lo, resHi = hi;
  1016. const int n = R::nlanes/2;
  1017. for (int i = 0; i < n; ++i)
  1018. {
  1019. SCOPED_TRACE(cv::format("i=%d", i));
  1020. EXPECT_EQ(dataA[i], resC[i*2]);
  1021. EXPECT_EQ(dataB[i], resC[i*2+1]);
  1022. EXPECT_EQ(dataA[i+n], resD[i*2]);
  1023. EXPECT_EQ(dataB[i+n], resD[i*2+1]);
  1024. EXPECT_EQ(dataA[i], resE[i]);
  1025. EXPECT_EQ(dataB[i], resE[i+n]);
  1026. EXPECT_EQ(dataA[i+n], resF[i]);
  1027. EXPECT_EQ(dataB[i+n], resF[i+n]);
  1028. EXPECT_EQ(dataA[i], resLo[i]);
  1029. EXPECT_EQ(dataB[i], resLo[i+n]);
  1030. EXPECT_EQ(dataA[i+n], resHi[i]);
  1031. EXPECT_EQ(dataB[i+n], resHi[i+n]);
  1032. }
  1033. return *this;
  1034. }
  1035. TheTest & test_reverse()
  1036. {
  1037. Data<R> dataA;
  1038. R a = dataA;
  1039. Data<R> resB = v_reverse(a);
  1040. for (int i = 0; i < R::nlanes; ++i)
  1041. {
  1042. SCOPED_TRACE(cv::format("i=%d", i));
  1043. EXPECT_EQ(dataA[R::nlanes - i - 1], resB[i]);
  1044. }
  1045. return *this;
  1046. }
  1047. template<int s>
  1048. TheTest & test_extract()
  1049. {
  1050. SCOPED_TRACE(s);
  1051. Data<R> dataA, dataB;
  1052. dataB *= 10;
  1053. R a = dataA, b = dataB;
  1054. Data<R> resC = v_extract<s>(a, b);
  1055. for (int i = 0; i < R::nlanes; ++i)
  1056. {
  1057. SCOPED_TRACE(cv::format("i=%d", i));
  1058. if (i + s >= R::nlanes)
  1059. EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]);
  1060. else
  1061. EXPECT_EQ(dataA[i + s], resC[i]);
  1062. }
  1063. return *this;
  1064. }
  1065. template<int s>
  1066. TheTest & test_rotate()
  1067. {
  1068. SCOPED_TRACE(s);
  1069. Data<R> dataA, dataB;
  1070. dataB *= 10;
  1071. R a = dataA, b = dataB;
  1072. Data<R> resC = v_rotate_right<s>(a);
  1073. Data<R> resD = v_rotate_right<s>(a, b);
  1074. Data<R> resE = v_rotate_left<s>(a);
  1075. Data<R> resF = v_rotate_left<s>(a, b);
  1076. for (int i = 0; i < R::nlanes; ++i)
  1077. {
  1078. SCOPED_TRACE(cv::format("i=%d", i));
  1079. if (i + s >= R::nlanes)
  1080. {
  1081. EXPECT_EQ((LaneType)0, resC[i]);
  1082. EXPECT_EQ(dataB[i - R::nlanes + s], resD[i]);
  1083. EXPECT_EQ((LaneType)0, resE[i - R::nlanes + s]);
  1084. EXPECT_EQ(dataB[i], resF[i - R::nlanes + s]);
  1085. }
  1086. else
  1087. {
  1088. EXPECT_EQ(dataA[i + s], resC[i]);
  1089. EXPECT_EQ(dataA[i + s], resD[i]);
  1090. EXPECT_EQ(dataA[i], resE[i + s]);
  1091. EXPECT_EQ(dataA[i], resF[i + s]);
  1092. }
  1093. }
  1094. return *this;
  1095. }
  1096. template<int s>
  1097. TheTest & test_extract_n()
  1098. {
  1099. SCOPED_TRACE(s);
  1100. Data<R> dataA;
  1101. LaneType test_value = (LaneType)(s + 50);
  1102. dataA[s] = test_value;
  1103. R a = dataA;
  1104. LaneType res = v_extract_n<s>(a);
  1105. EXPECT_EQ(test_value, res);
  1106. return *this;
  1107. }
  1108. template<int s>
  1109. TheTest & test_broadcast_element()
  1110. {
  1111. SCOPED_TRACE(s);
  1112. Data<R> dataA;
  1113. LaneType test_value = (LaneType)(s + 50);
  1114. dataA[s] = test_value;
  1115. R a = dataA;
  1116. Data<R> res = v_broadcast_element<s>(a);
  1117. for (int i = 0; i < R::nlanes; ++i)
  1118. {
  1119. SCOPED_TRACE(i);
  1120. EXPECT_EQ(test_value, res[i]);
  1121. }
  1122. return *this;
  1123. }
  1124. TheTest & test_float_math()
  1125. {
  1126. typedef typename V_RegTraits<R>::round_reg Ri;
  1127. Data<R> data1, data2, data3;
  1128. data1 *= 1.1;
  1129. data2 += 10;
  1130. R a1 = data1, a2 = data2, a3 = data3;
  1131. Data<Ri> resB = v_round(a1),
  1132. resC = v_trunc(a1),
  1133. resD = v_floor(a1),
  1134. resE = v_ceil(a1);
  1135. Data<R> resF = v_magnitude(a1, a2),
  1136. resG = v_sqr_magnitude(a1, a2),
  1137. resH = v_muladd(a1, a2, a3);
  1138. for (int i = 0; i < R::nlanes; ++i)
  1139. {
  1140. SCOPED_TRACE(cv::format("i=%d", i));
  1141. EXPECT_EQ(cvRound(data1[i]), resB[i]);
  1142. EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]);
  1143. EXPECT_EQ(cvFloor(data1[i]), resD[i]);
  1144. EXPECT_EQ(cvCeil(data1[i]), resE[i]);
  1145. EXPECT_COMPARE_EQ(std::sqrt(data1[i]*data1[i] + data2[i]*data2[i]), resF[i]);
  1146. EXPECT_COMPARE_EQ(data1[i]*data1[i] + data2[i]*data2[i], resG[i]);
  1147. EXPECT_COMPARE_EQ(data1[i]*data2[i] + data3[i], resH[i]);
  1148. }
  1149. return *this;
  1150. }
  1151. TheTest & test_float_cvt32()
  1152. {
  1153. typedef v_float32 Rt;
  1154. Data<R> dataA;
  1155. dataA *= 1.1;
  1156. R a = dataA;
  1157. Rt b = v_cvt_f32(a);
  1158. Data<Rt> resB = b;
  1159. int n = std::min<int>(Rt::nlanes, R::nlanes);
  1160. for (int i = 0; i < n; ++i)
  1161. {
  1162. SCOPED_TRACE(cv::format("i=%d", i));
  1163. EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
  1164. }
  1165. return *this;
  1166. }
  1167. TheTest & test_float_cvt64()
  1168. {
  1169. #if CV_SIMD_64F
  1170. typedef v_float64 Rt;
  1171. Data<R> dataA;
  1172. dataA *= 1.1;
  1173. R a = dataA;
  1174. Rt b = v_cvt_f64(a);
  1175. Rt c = v_cvt_f64_high(a);
  1176. Data<Rt> resB = b;
  1177. Data<Rt> resC = c;
  1178. int n = std::min<int>(Rt::nlanes, R::nlanes);
  1179. for (int i = 0; i < n; ++i)
  1180. {
  1181. SCOPED_TRACE(cv::format("i=%d", i));
  1182. EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
  1183. }
  1184. for (int i = 0; i < n; ++i)
  1185. {
  1186. SCOPED_TRACE(cv::format("i=%d", i));
  1187. EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]);
  1188. }
  1189. #endif
  1190. return *this;
  1191. }
  1192. TheTest & test_cvt64_double()
  1193. {
  1194. #if CV_SIMD_64F
  1195. Data<R> dataA(std::numeric_limits<LaneType>::max()),
  1196. dataB(std::numeric_limits<LaneType>::min());
  1197. dataB += R::nlanes;
  1198. R a = dataA, b = dataB;
  1199. v_float64 c = v_cvt_f64(a), d = v_cvt_f64(b);
  1200. Data<v_float64> resC = c;
  1201. Data<v_float64> resD = d;
  1202. for (int i = 0; i < R::nlanes; ++i)
  1203. {
  1204. SCOPED_TRACE(cv::format("i=%d", i));
  1205. EXPECT_EQ((double)dataA[i], resC[i]);
  1206. EXPECT_EQ((double)dataB[i], resD[i]);
  1207. }
  1208. #endif
  1209. return *this;
  1210. }
  1211. TheTest & test_matmul()
  1212. {
  1213. Data<R> dataV, dataA, dataB, dataC, dataD;
  1214. dataB.reverse();
  1215. dataC += 2;
  1216. dataD *= 0.3;
  1217. R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD;
  1218. Data<R> res = v_matmul(v, a, b, c, d);
  1219. for (int i = 0; i < R::nlanes; i += 4)
  1220. {
  1221. for (int j = i; j < i + 4; ++j)
  1222. {
  1223. SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
  1224. LaneType val = dataV[i] * dataA[j]
  1225. + dataV[i + 1] * dataB[j]
  1226. + dataV[i + 2] * dataC[j]
  1227. + dataV[i + 3] * dataD[j];
  1228. EXPECT_COMPARE_EQ(val, res[j]);
  1229. }
  1230. }
  1231. Data<R> resAdd = v_matmuladd(v, a, b, c, d);
  1232. for (int i = 0; i < R::nlanes; i += 4)
  1233. {
  1234. for (int j = i; j < i + 4; ++j)
  1235. {
  1236. SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
  1237. LaneType val = dataV[i] * dataA[j]
  1238. + dataV[i + 1] * dataB[j]
  1239. + dataV[i + 2] * dataC[j]
  1240. + dataD[j];
  1241. EXPECT_COMPARE_EQ(val, resAdd[j]);
  1242. }
  1243. }
  1244. return *this;
  1245. }
  1246. TheTest & test_transpose()
  1247. {
  1248. Data<R> dataA, dataB, dataC, dataD;
  1249. dataB *= 5;
  1250. dataC *= 10;
  1251. dataD *= 15;
  1252. R a = dataA, b = dataB, c = dataC, d = dataD;
  1253. R e, f, g, h;
  1254. v_transpose4x4(a, b, c, d,
  1255. e, f, g, h);
  1256. Data<R> res[4] = {e, f, g, h};
  1257. for (int i = 0; i < R::nlanes; i += 4)
  1258. {
  1259. for (int j = 0; j < 4; ++j)
  1260. {
  1261. SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
  1262. EXPECT_EQ(dataA[i + j], res[j][i]);
  1263. EXPECT_EQ(dataB[i + j], res[j][i + 1]);
  1264. EXPECT_EQ(dataC[i + j], res[j][i + 2]);
  1265. EXPECT_EQ(dataD[i + j], res[j][i + 3]);
  1266. }
  1267. }
  1268. return *this;
  1269. }
  1270. TheTest & test_reduce_sum4()
  1271. {
  1272. Data<R> dataA, dataB, dataC, dataD;
  1273. dataB *= 0.01f;
  1274. dataC *= 0.001f;
  1275. dataD *= 0.002f;
  1276. R a = dataA, b = dataB, c = dataC, d = dataD;
  1277. Data<R> res = v_reduce_sum4(a, b, c, d);
  1278. for (int i = 0; i < R::nlanes; i += 4)
  1279. {
  1280. SCOPED_TRACE(cv::format("i=%d", i));
  1281. EXPECT_COMPARE_EQ(dataA.sum(i, 4), res[i]);
  1282. EXPECT_COMPARE_EQ(dataB.sum(i, 4), res[i + 1]);
  1283. EXPECT_COMPARE_EQ(dataC.sum(i, 4), res[i + 2]);
  1284. EXPECT_COMPARE_EQ(dataD.sum(i, 4), res[i + 3]);
  1285. }
  1286. return *this;
  1287. }
  1288. TheTest & test_loadstore_fp16_f32()
  1289. {
  1290. printf("test_loadstore_fp16_f32 ...\n");
  1291. AlignedData<v_uint16> data; data.a.clear();
  1292. data.a.d[0] = 0x3c00; // 1.0
  1293. data.a.d[R::nlanes - 1] = (unsigned short)0xc000; // -2.0
  1294. AlignedData<v_float32> data_f32; data_f32.a.clear();
  1295. AlignedData<v_uint16> out;
  1296. R r1 = vx_load_expand((const cv::float16_t*)data.a.d);
  1297. R r2(r1);
  1298. EXPECT_EQ(1.0f, r1.get0());
  1299. v_store(data_f32.a.d, r2);
  1300. EXPECT_EQ(-2.0f, data_f32.a.d[R::nlanes - 1]);
  1301. out.a.clear();
  1302. v_pack_store((cv::float16_t*)out.a.d, r2);
  1303. for (int i = 0; i < R::nlanes; ++i)
  1304. {
  1305. EXPECT_EQ(data.a[i], out.a[i]) << "i=" << i;
  1306. }
  1307. return *this;
  1308. }
  1309. #if 0
  1310. TheTest & test_loadstore_fp16()
  1311. {
  1312. printf("test_loadstore_fp16 ...\n");
  1313. AlignedData<R> data;
  1314. AlignedData<R> out;
  1315. // check if addresses are aligned and unaligned respectively
  1316. EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH);
  1317. EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH);
  1318. EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH);
  1319. EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH);
  1320. // check some initialization methods
  1321. R r1 = data.u;
  1322. R r2 = vx_load_expand((const float16_t*)data.a.d);
  1323. R r3(r2);
  1324. EXPECT_EQ(data.u[0], r1.get0());
  1325. EXPECT_EQ(data.a[0], r2.get0());
  1326. EXPECT_EQ(data.a[0], r3.get0());
  1327. // check some store methods
  1328. out.a.clear();
  1329. v_store(out.a.d, r1);
  1330. EXPECT_EQ(data.a, out.a);
  1331. return *this;
  1332. }
  1333. TheTest & test_float_cvt_fp16()
  1334. {
  1335. printf("test_float_cvt_fp16 ...\n");
  1336. AlignedData<v_float32> data;
  1337. // check conversion
  1338. v_float32 r1 = vx_load(data.a.d);
  1339. v_float16 r2 = v_cvt_f16(r1, vx_setzero_f32());
  1340. v_float32 r3 = v_cvt_f32(r2);
  1341. EXPECT_EQ(0x3c00, r2.get0());
  1342. EXPECT_EQ(r3.get0(), r1.get0());
  1343. return *this;
  1344. }
  1345. #endif
  1346. #if CV_SIMD_64F
  1347. TheTest & test_cmp64()
  1348. {
  1349. Data<R> dataA, dataB;
  1350. R a = dataA, b = dataB;
  1351. for (int i = 0; i < R::nlanes; ++i)
  1352. {
  1353. dataA[i] = dataB[i];
  1354. }
  1355. dataA[0]++;
  1356. a = dataA, b = dataB;
  1357. Data<R> resC = (a == b);
  1358. Data<R> resD = (a != b);
  1359. for (int i = 0; i < R::nlanes; ++i)
  1360. {
  1361. SCOPED_TRACE(cv::format("i=%d", i));
  1362. EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
  1363. EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
  1364. }
  1365. for (int i = 0; i < R::nlanes; ++i)
  1366. {
  1367. dataA[i] = dataB[i] = (LaneType)-1;
  1368. }
  1369. a = dataA, b = dataB;
  1370. resC = (a == b);
  1371. resD = (a != b);
  1372. for (int i = 0; i < R::nlanes; ++i)
  1373. {
  1374. SCOPED_TRACE(cv::format("i=%d", i));
  1375. EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
  1376. EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
  1377. }
  1378. return *this;
  1379. }
  1380. #endif
  1381. };
  1382. #if 1
  1383. #define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*(int)sizeof(v_uint8), CV__TRACE_FUNCTION);
  1384. #endif
  1385. //============= 8-bit integer =====================================================================
  1386. void test_hal_intrin_uint8()
  1387. {
  1388. DUMP_ENTRY(v_uint8);
  1389. typedef v_uint8 R;
  1390. TheTest<v_uint8>()
  1391. .test_loadstore()
  1392. .test_interleave()
  1393. .test_expand()
  1394. .test_expand_q()
  1395. .test_addsub()
  1396. .test_arithm_wrap()
  1397. .test_mul()
  1398. .test_mul_expand()
  1399. .test_cmp()
  1400. .test_logic()
  1401. .test_dotprod_expand()
  1402. .test_min_max()
  1403. .test_absdiff()
  1404. .test_reduce()
  1405. .test_reduce_sad()
  1406. .test_mask()
  1407. .test_popcount()
  1408. .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
  1409. .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
  1410. .test_pack_b()
  1411. .test_unpack()
  1412. .test_reverse()
  1413. .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
  1414. .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
  1415. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1416. //.test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1417. #if CV_SIMD_WIDTH == 32
  1418. .test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>()
  1419. .test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>()
  1420. .test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>()
  1421. .test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>()
  1422. #endif
  1423. ;
  1424. }
  1425. void test_hal_intrin_int8()
  1426. {
  1427. DUMP_ENTRY(v_int8);
  1428. typedef v_int8 R;
  1429. TheTest<v_int8>()
  1430. .test_loadstore()
  1431. .test_interleave()
  1432. .test_expand()
  1433. .test_expand_q()
  1434. .test_addsub()
  1435. .test_arithm_wrap()
  1436. .test_mul()
  1437. .test_mul_expand()
  1438. .test_cmp()
  1439. .test_logic()
  1440. .test_dotprod_expand()
  1441. .test_min_max()
  1442. .test_absdiff()
  1443. .test_absdiffs()
  1444. .test_abs()
  1445. .test_reduce()
  1446. .test_reduce_sad()
  1447. .test_mask()
  1448. .test_popcount()
  1449. .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
  1450. .test_unpack()
  1451. .test_reverse()
  1452. .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
  1453. .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
  1454. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1455. //.test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1456. ;
  1457. }
  1458. //============= 16-bit integer =====================================================================
  1459. void test_hal_intrin_uint16()
  1460. {
  1461. DUMP_ENTRY(v_uint16);
  1462. typedef v_uint16 R;
  1463. TheTest<v_uint16>()
  1464. .test_loadstore()
  1465. .test_interleave()
  1466. .test_expand()
  1467. .test_addsub()
  1468. .test_arithm_wrap()
  1469. .test_mul()
  1470. .test_mul_expand()
  1471. .test_mul_hi()
  1472. .test_cmp()
  1473. .test_shift<1>()
  1474. .test_shift<8>()
  1475. .test_dotprod_expand()
  1476. .test_logic()
  1477. .test_min_max()
  1478. .test_absdiff()
  1479. .test_reduce()
  1480. .test_reduce_sad()
  1481. .test_mask()
  1482. .test_popcount()
  1483. .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
  1484. .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
  1485. .test_unpack()
  1486. .test_reverse()
  1487. .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
  1488. .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
  1489. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1490. //.test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1491. ;
  1492. }
  1493. void test_hal_intrin_int16()
  1494. {
  1495. DUMP_ENTRY(v_int16);
  1496. typedef v_int16 R;
  1497. TheTest<v_int16>()
  1498. .test_loadstore()
  1499. .test_interleave()
  1500. .test_expand()
  1501. .test_addsub()
  1502. .test_arithm_wrap()
  1503. .test_mul()
  1504. .test_mul_expand()
  1505. .test_mul_hi()
  1506. .test_cmp()
  1507. .test_shift<1>()
  1508. .test_shift<8>()
  1509. .test_dotprod()
  1510. .test_dotprod_expand()
  1511. .test_logic()
  1512. .test_min_max()
  1513. .test_absdiff()
  1514. .test_absdiffs()
  1515. .test_abs()
  1516. .test_reduce()
  1517. .test_reduce_sad()
  1518. .test_mask()
  1519. .test_popcount()
  1520. .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
  1521. .test_unpack()
  1522. .test_reverse()
  1523. .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
  1524. .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
  1525. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1526. //.test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1527. ;
  1528. }
  1529. //============= 32-bit integer =====================================================================
  1530. void test_hal_intrin_uint32()
  1531. {
  1532. DUMP_ENTRY(v_uint32);
  1533. typedef v_uint32 R;
  1534. TheTest<v_uint32>()
  1535. .test_loadstore()
  1536. .test_interleave()
  1537. .test_expand()
  1538. .test_addsub()
  1539. .test_mul()
  1540. .test_mul_expand()
  1541. .test_cmp()
  1542. .test_shift<1>()
  1543. .test_shift<8>()
  1544. .test_logic()
  1545. .test_min_max()
  1546. .test_absdiff()
  1547. .test_reduce()
  1548. .test_reduce_sad()
  1549. .test_mask()
  1550. .test_popcount()
  1551. .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
  1552. .test_unpack()
  1553. .test_reverse()
  1554. .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
  1555. .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
  1556. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1557. .test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1558. .test_transpose()
  1559. ;
  1560. }
  1561. void test_hal_intrin_int32()
  1562. {
  1563. DUMP_ENTRY(v_int32);
  1564. typedef v_int32 R;
  1565. TheTest<v_int32>()
  1566. .test_loadstore()
  1567. .test_interleave()
  1568. .test_expand()
  1569. .test_addsub()
  1570. .test_mul()
  1571. .test_abs()
  1572. .test_cmp()
  1573. .test_popcount()
  1574. .test_shift<1>().test_shift<8>()
  1575. .test_dotprod()
  1576. .test_dotprod_expand_f64()
  1577. .test_logic()
  1578. .test_min_max()
  1579. .test_absdiff()
  1580. .test_reduce()
  1581. .test_reduce_sad()
  1582. .test_mask()
  1583. .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
  1584. .test_unpack()
  1585. .test_reverse()
  1586. .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
  1587. .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
  1588. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1589. .test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1590. .test_float_cvt32()
  1591. .test_float_cvt64()
  1592. .test_transpose()
  1593. ;
  1594. }
  1595. //============= 64-bit integer =====================================================================
  1596. void test_hal_intrin_uint64()
  1597. {
  1598. DUMP_ENTRY(v_uint64);
  1599. typedef v_uint64 R;
  1600. TheTest<v_uint64>()
  1601. .test_loadstore()
  1602. .test_addsub()
  1603. #if CV_SIMD_64F
  1604. .test_cmp64()
  1605. #endif
  1606. .test_shift<1>().test_shift<8>()
  1607. .test_logic()
  1608. .test_reverse()
  1609. .test_extract<0>().test_extract<1>()
  1610. .test_rotate<0>().test_rotate<1>()
  1611. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1612. //.test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1613. ;
  1614. }
  1615. void test_hal_intrin_int64()
  1616. {
  1617. DUMP_ENTRY(v_int64);
  1618. typedef v_int64 R;
  1619. TheTest<v_int64>()
  1620. .test_loadstore()
  1621. .test_addsub()
  1622. #if CV_SIMD_64F
  1623. .test_cmp64()
  1624. #endif
  1625. .test_shift<1>().test_shift<8>()
  1626. .test_logic()
  1627. .test_reverse()
  1628. .test_extract<0>().test_extract<1>()
  1629. .test_rotate<0>().test_rotate<1>()
  1630. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1631. //.test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1632. .test_cvt64_double()
  1633. ;
  1634. }
  1635. //============= Floating point =====================================================================
  1636. void test_hal_intrin_float32()
  1637. {
  1638. DUMP_ENTRY(v_float32);
  1639. typedef v_float32 R;
  1640. TheTest<v_float32>()
  1641. .test_loadstore()
  1642. .test_interleave()
  1643. .test_interleave_2channel()
  1644. .test_addsub()
  1645. .test_mul()
  1646. .test_div()
  1647. .test_cmp()
  1648. .test_sqrt_abs()
  1649. .test_min_max()
  1650. .test_float_absdiff()
  1651. .test_reduce()
  1652. .test_reduce_sad()
  1653. .test_mask()
  1654. .test_unpack()
  1655. .test_float_math()
  1656. .test_float_cvt64()
  1657. .test_matmul()
  1658. .test_transpose()
  1659. .test_reduce_sum4()
  1660. .test_reverse()
  1661. .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
  1662. .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
  1663. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1664. .test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1665. #if CV_SIMD_WIDTH == 32
  1666. .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
  1667. .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
  1668. #endif
  1669. ;
  1670. }
  1671. void test_hal_intrin_float64()
  1672. {
  1673. DUMP_ENTRY(v_float64);
  1674. #if CV_SIMD_64F
  1675. typedef v_float64 R;
  1676. TheTest<v_float64>()
  1677. .test_loadstore()
  1678. .test_addsub()
  1679. .test_mul()
  1680. .test_div()
  1681. .test_cmp()
  1682. .test_sqrt_abs()
  1683. .test_min_max()
  1684. .test_float_absdiff()
  1685. .test_mask()
  1686. .test_unpack()
  1687. .test_float_math()
  1688. .test_float_cvt32()
  1689. .test_reverse()
  1690. .test_extract<0>().test_extract<1>()
  1691. .test_rotate<0>().test_rotate<1>()
  1692. .test_extract_n<0>().test_extract_n<1>().test_extract_n<R::nlanes - 1>()
  1693. //.test_broadcast_element<0>().test_broadcast_element<1>().test_broadcast_element<R::nlanes - 1>()
  1694. #if CV_SIMD_WIDTH == 32
  1695. .test_extract<2>().test_extract<3>()
  1696. .test_rotate<2>().test_rotate<3>()
  1697. #endif
  1698. ;
  1699. #endif
  1700. }
  1701. void test_hal_intrin_float16()
  1702. {
  1703. DUMP_ENTRY(v_float16);
  1704. #if CV_FP16
  1705. TheTest<v_float32>()
  1706. .test_loadstore_fp16_f32()
  1707. #if CV_SIMD_FP16
  1708. .test_loadstore_fp16()
  1709. .test_float_cvt_fp16()
  1710. #endif
  1711. ;
  1712. #else
  1713. std::cout << "SKIP: CV_FP16 is not available" << std::endl;
  1714. #endif
  1715. }
  1716. /*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16
  1717. void test_hal_intrin_float16()
  1718. {
  1719. TheTest<v_float16>()
  1720. .test_loadstore_fp16()
  1721. .test_float_cvt_fp16()
  1722. ;
  1723. }
  1724. #endif*/
  1725. #endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
  1726. //CV_CPU_OPTIMIZATION_NAMESPACE_END
  1727. //}}} // namespace