op_diagvec_meat.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
  2. // Copyright 2008-2016 National ICT Australia (NICTA)
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // ------------------------------------------------------------------------
  15. //! \addtogroup op_diagvec
  16. //! @{
  17. template<typename T1>
  18. inline
  19. void
  20. op_diagvec::apply(Mat<typename T1::elem_type>& out, const Op<T1, op_diagvec>& X)
  21. {
  22. arma_extra_debug_sigprint();
  23. typedef typename T1::elem_type eT;
  24. const Proxy<T1> P(X.m);
  25. if(P.is_alias(out) == false)
  26. {
  27. op_diagvec::apply_proxy(out, P);
  28. }
  29. else
  30. {
  31. Mat<eT> tmp;
  32. op_diagvec::apply_proxy(tmp, P);
  33. out.steal_mem(tmp);
  34. }
  35. }
  36. template<typename T1>
  37. inline
  38. void
  39. op_diagvec::apply_proxy(Mat<typename T1::elem_type>& out, const Proxy<T1>& P)
  40. {
  41. arma_extra_debug_sigprint();
  42. typedef typename T1::elem_type eT;
  43. const uword n_rows = P.get_n_rows();
  44. const uword n_cols = P.get_n_cols();
  45. const uword len = (std::min)(n_rows, n_cols);
  46. out.set_size(len, 1);
  47. eT* out_mem = out.memptr();
  48. uword i,j;
  49. for(i=0, j=1; j < len; i+=2, j+=2)
  50. {
  51. const eT tmp_i = P.at(i, i);
  52. const eT tmp_j = P.at(j, j);
  53. out_mem[i] = tmp_i;
  54. out_mem[j] = tmp_j;
  55. }
  56. if(i < len)
  57. {
  58. out_mem[i] = P.at(i, i);
  59. }
  60. }
  61. template<typename T1, typename T2>
  62. inline
  63. void
  64. op_diagvec::apply(Mat<typename T1::elem_type>& actual_out, const Op< Glue<T1,T2,glue_times>, op_diagvec>& X, const typename arma_not_cx<typename T1::elem_type>::result* junk)
  65. {
  66. arma_extra_debug_sigprint();
  67. arma_ignore(junk);
  68. typedef typename T1::elem_type eT;
  69. const partial_unwrap<T1> UA(X.m.A);
  70. const partial_unwrap<T2> UB(X.m.B);
  71. const typename partial_unwrap<T1>::stored_type& A = UA.M;
  72. const typename partial_unwrap<T2>::stored_type& B = UB.M;
  73. arma_debug_assert_trans_mul_size< partial_unwrap<T1>::do_trans, partial_unwrap<T2>::do_trans >(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication");
  74. if( (A.n_elem == 0) || (B.n_elem == 0) ) { actual_out.reset(); return; }
  75. const bool use_alpha = partial_unwrap<T1>::do_times || partial_unwrap<T2>::do_times;
  76. const eT alpha = use_alpha ? (UA.get_val() * UB.get_val()) : eT(0);
  77. const bool is_alias = (UA.is_alias(actual_out) || UB.is_alias(actual_out));
  78. Mat<eT> tmp;
  79. Mat<eT>& out = (is_alias) ? tmp : actual_out;
  80. const uword A_n_rows = A.n_rows;
  81. const uword A_n_cols = A.n_cols;
  82. const uword B_n_rows = B.n_rows;
  83. const uword B_n_cols = B.n_cols;
  84. if( (partial_unwrap<T1>::do_trans == false) && (partial_unwrap<T2>::do_trans == false) )
  85. {
  86. arma_extra_debug_print("trans_A = false; trans_B = false;");
  87. const uword N = (std::min)(A_n_rows, B_n_cols);
  88. out.set_size(N,1);
  89. eT* out_mem = out.memptr();
  90. for(uword k=0; k < N; ++k)
  91. {
  92. eT acc1 = eT(0);
  93. eT acc2 = eT(0);
  94. const eT* B_colptr = B.colptr(k);
  95. // condition: A_n_cols = B_n_rows
  96. uword j;
  97. for(j=1; j < A_n_cols; j+=2)
  98. {
  99. const uword i = (j-1);
  100. const eT tmp_i = B_colptr[i];
  101. const eT tmp_j = B_colptr[j];
  102. acc1 += A.at(k, i) * tmp_i;
  103. acc2 += A.at(k, j) * tmp_j;
  104. }
  105. const uword i = (j-1);
  106. if(i < A_n_cols)
  107. {
  108. acc1 += A.at(k, i) * B_colptr[i];
  109. }
  110. const eT acc = acc1 + acc2;
  111. out_mem[k] = (use_alpha) ? eT(alpha * acc) : eT(acc);
  112. }
  113. }
  114. else
  115. if( (partial_unwrap<T1>::do_trans == true ) && (partial_unwrap<T2>::do_trans == false) )
  116. {
  117. arma_extra_debug_print("trans_A = true; trans_B = false;");
  118. const uword N = (std::min)(A_n_cols, B_n_cols);
  119. out.set_size(N,1);
  120. eT* out_mem = out.memptr();
  121. for(uword k=0; k < N; ++k)
  122. {
  123. const eT* A_colptr = A.colptr(k);
  124. const eT* B_colptr = B.colptr(k);
  125. // condition: A_n_rows = B_n_rows
  126. const eT acc = op_dot::direct_dot(A_n_rows, A_colptr, B_colptr);
  127. out_mem[k] = (use_alpha) ? eT(alpha * acc) : eT(acc);
  128. }
  129. }
  130. else
  131. if( (partial_unwrap<T1>::do_trans == false) && (partial_unwrap<T2>::do_trans == true ) )
  132. {
  133. arma_extra_debug_print("trans_A = false; trans_B = true;");
  134. const uword N = (std::min)(A_n_rows, B_n_rows);
  135. out.set_size(N,1);
  136. eT* out_mem = out.memptr();
  137. for(uword k=0; k < N; ++k)
  138. {
  139. eT acc = eT(0);
  140. // condition: A_n_cols = B_n_cols
  141. for(uword i=0; i < A_n_cols; ++i)
  142. {
  143. acc += A.at(k,i) * B.at(k,i);
  144. }
  145. out_mem[k] = (use_alpha) ? eT(alpha * acc) : eT(acc);
  146. }
  147. }
  148. else
  149. if( (partial_unwrap<T1>::do_trans == true ) && (partial_unwrap<T2>::do_trans == true ) )
  150. {
  151. arma_extra_debug_print("trans_A = true; trans_B = true;");
  152. const uword N = (std::min)(A_n_cols, B_n_rows);
  153. out.set_size(N,1);
  154. eT* out_mem = out.memptr();
  155. for(uword k=0; k < N; ++k)
  156. {
  157. eT acc = eT(0);
  158. const eT* A_colptr = A.colptr(k);
  159. // condition: A_n_rows = B_n_cols
  160. for(uword i=0; i < A_n_rows; ++i)
  161. {
  162. acc += A_colptr[i] * B.at(k,i);
  163. }
  164. out_mem[k] = (use_alpha) ? eT(alpha * acc) : eT(acc);
  165. }
  166. }
  167. if(is_alias) { actual_out.steal_mem(tmp); }
  168. }
  169. template<typename T1, typename T2>
  170. inline
  171. void
  172. op_diagvec::apply(Mat<typename T1::elem_type>& actual_out, const Op< Glue<T1,T2,glue_times>, op_diagvec>& X, const typename arma_cx_only<typename T1::elem_type>::result* junk)
  173. {
  174. arma_extra_debug_sigprint();
  175. arma_ignore(junk);
  176. typedef typename T1::pod_type T;
  177. typedef typename T1::elem_type eT;
  178. const partial_unwrap<T1> UA(X.m.A);
  179. const partial_unwrap<T2> UB(X.m.B);
  180. const typename partial_unwrap<T1>::stored_type& A = UA.M;
  181. const typename partial_unwrap<T2>::stored_type& B = UB.M;
  182. arma_debug_assert_trans_mul_size< partial_unwrap<T1>::do_trans, partial_unwrap<T2>::do_trans >(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication");
  183. if( (A.n_elem == 0) || (B.n_elem == 0) ) { actual_out.reset(); return; }
  184. const bool use_alpha = partial_unwrap<T1>::do_times || partial_unwrap<T2>::do_times;
  185. const eT alpha = use_alpha ? (UA.get_val() * UB.get_val()) : eT(0);
  186. const bool is_alias = (UA.is_alias(actual_out) || UB.is_alias(actual_out));
  187. Mat<eT> tmp;
  188. Mat<eT>& out = (is_alias) ? tmp : actual_out;
  189. const uword A_n_rows = A.n_rows;
  190. const uword A_n_cols = A.n_cols;
  191. const uword B_n_rows = B.n_rows;
  192. const uword B_n_cols = B.n_cols;
  193. if( (partial_unwrap<T1>::do_trans == false) && (partial_unwrap<T2>::do_trans == false) )
  194. {
  195. arma_extra_debug_print("trans_A = false; trans_B = false;");
  196. const uword N = (std::min)(A_n_rows, B_n_cols);
  197. out.set_size(N,1);
  198. eT* out_mem = out.memptr();
  199. for(uword k=0; k < N; ++k)
  200. {
  201. T acc_real = T(0);
  202. T acc_imag = T(0);
  203. const eT* B_colptr = B.colptr(k);
  204. // condition: A_n_cols = B_n_rows
  205. for(uword i=0; i < A_n_cols; ++i)
  206. {
  207. // acc += A.at(k, i) * B_colptr[i];
  208. const std::complex<T>& xx = A.at(k, i);
  209. const std::complex<T>& yy = B_colptr[i];
  210. const T a = xx.real();
  211. const T b = xx.imag();
  212. const T c = yy.real();
  213. const T d = yy.imag();
  214. acc_real += (a*c) - (b*d);
  215. acc_imag += (a*d) + (b*c);
  216. }
  217. const eT acc = std::complex<T>(acc_real, acc_imag);
  218. out_mem[k] = (use_alpha) ? eT(alpha * acc) : eT(acc);
  219. }
  220. }
  221. else
  222. if( (partial_unwrap<T1>::do_trans == true) && (partial_unwrap<T2>::do_trans == false) )
  223. {
  224. arma_extra_debug_print("trans_A = true; trans_B = false;");
  225. const uword N = (std::min)(A_n_cols, B_n_cols);
  226. out.set_size(N,1);
  227. eT* out_mem = out.memptr();
  228. for(uword k=0; k < N; ++k)
  229. {
  230. T acc_real = T(0);
  231. T acc_imag = T(0);
  232. const eT* A_colptr = A.colptr(k);
  233. const eT* B_colptr = B.colptr(k);
  234. // condition: A_n_rows = B_n_rows
  235. for(uword i=0; i < A_n_rows; ++i)
  236. {
  237. // acc += std::conj(A_colptr[i]) * B_colptr[i];
  238. const std::complex<T>& xx = A_colptr[i];
  239. const std::complex<T>& yy = B_colptr[i];
  240. const T a = xx.real();
  241. const T b = xx.imag();
  242. const T c = yy.real();
  243. const T d = yy.imag();
  244. // take into account the complex conjugate of xx
  245. acc_real += (a*c) + (b*d);
  246. acc_imag += (a*d) - (b*c);
  247. }
  248. const eT acc = std::complex<T>(acc_real, acc_imag);
  249. out_mem[k] = (use_alpha) ? eT(alpha * acc) : eT(acc);
  250. }
  251. }
  252. else
  253. if( (partial_unwrap<T1>::do_trans == false) && (partial_unwrap<T2>::do_trans == true) )
  254. {
  255. arma_extra_debug_print("trans_A = false; trans_B = true;");
  256. const uword N = (std::min)(A_n_rows, B_n_rows);
  257. out.set_size(N,1);
  258. eT* out_mem = out.memptr();
  259. for(uword k=0; k < N; ++k)
  260. {
  261. T acc_real = T(0);
  262. T acc_imag = T(0);
  263. // condition: A_n_cols = B_n_cols
  264. for(uword i=0; i < A_n_cols; ++i)
  265. {
  266. // acc += A.at(k,i) * std::conj(B.at(k,i));
  267. const std::complex<T>& xx = A.at(k, i);
  268. const std::complex<T>& yy = B.at(k, i);
  269. const T a = xx.real();
  270. const T b = xx.imag();
  271. const T c = yy.real();
  272. const T d = -yy.imag(); // take the conjugate
  273. acc_real += (a*c) - (b*d);
  274. acc_imag += (a*d) + (b*c);
  275. }
  276. const eT acc = std::complex<T>(acc_real, acc_imag);
  277. out_mem[k] = (use_alpha) ? eT(alpha * acc) : eT(acc);
  278. }
  279. }
  280. else
  281. if( (partial_unwrap<T1>::do_trans == true) && (partial_unwrap<T2>::do_trans == true) )
  282. {
  283. arma_extra_debug_print("trans_A = true; trans_B = true;");
  284. const uword N = (std::min)(A_n_cols, B_n_rows);
  285. out.set_size(N,1);
  286. eT* out_mem = out.memptr();
  287. for(uword k=0; k < N; ++k)
  288. {
  289. T acc_real = T(0);
  290. T acc_imag = T(0);
  291. const eT* A_colptr = A.colptr(k);
  292. // condition: A_n_rows = B_n_cols
  293. for(uword i=0; i < A_n_rows; ++i)
  294. {
  295. // acc += std::conj(A_colptr[i]) * std::conj(B.at(k,i));
  296. const std::complex<T>& xx = A_colptr[i];
  297. const std::complex<T>& yy = B.at(k, i);
  298. const T a = xx.real();
  299. const T b = -xx.imag(); // take the conjugate
  300. const T c = yy.real();
  301. const T d = -yy.imag(); // take the conjugate
  302. acc_real += (a*c) - (b*d);
  303. acc_imag += (a*d) + (b*c);
  304. }
  305. const eT acc = std::complex<T>(acc_real, acc_imag);
  306. out_mem[k] = (use_alpha) ? eT(alpha * acc) : eT(acc);
  307. }
  308. }
  309. if(is_alias) { actual_out.steal_mem(tmp); }
  310. }
  311. //
  312. //
  313. //
  314. template<typename T1>
  315. inline
  316. void
  317. op_diagvec2::apply(Mat<typename T1::elem_type>& out, const Op<T1, op_diagvec2>& X)
  318. {
  319. arma_extra_debug_sigprint();
  320. typedef typename T1::elem_type eT;
  321. const uword a = X.aux_uword_a;
  322. const uword b = X.aux_uword_b;
  323. const uword row_offset = (b > 0) ? a : 0;
  324. const uword col_offset = (b == 0) ? a : 0;
  325. const Proxy<T1> P(X.m);
  326. if(P.is_alias(out) == false)
  327. {
  328. op_diagvec2::apply_proxy(out, P, row_offset, col_offset);
  329. }
  330. else
  331. {
  332. Mat<eT> tmp;
  333. op_diagvec2::apply_proxy(tmp, P, row_offset, col_offset);
  334. out.steal_mem(tmp);
  335. }
  336. }
  337. template<typename T1>
  338. inline
  339. void
  340. op_diagvec2::apply_proxy(Mat<typename T1::elem_type>& out, const Proxy<T1>& P, const uword row_offset, const uword col_offset)
  341. {
  342. arma_extra_debug_sigprint();
  343. typedef typename T1::elem_type eT;
  344. const uword n_rows = P.get_n_rows();
  345. const uword n_cols = P.get_n_cols();
  346. arma_debug_check
  347. (
  348. ((row_offset > 0) && (row_offset >= n_rows)) || ((col_offset > 0) && (col_offset >= n_cols)),
  349. "diagvec(): requested diagonal is out of bounds"
  350. );
  351. const uword len = (std::min)(n_rows - row_offset, n_cols - col_offset);
  352. out.set_size(len, 1);
  353. eT* out_mem = out.memptr();
  354. uword i,j;
  355. for(i=0, j=1; j < len; i+=2, j+=2)
  356. {
  357. const eT tmp_i = P.at( i + row_offset, i + col_offset );
  358. const eT tmp_j = P.at( j + row_offset, j + col_offset );
  359. out_mem[i] = tmp_i;
  360. out_mem[j] = tmp_j;
  361. }
  362. if(i < len)
  363. {
  364. out_mem[i] = P.at( i + row_offset, i + col_offset );
  365. }
  366. }
  367. //! @}