fft_engine.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
  2. // Copyright 2008-2016 National ICT Australia (NICTA)
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // ------------------------------------------------------------------------
  16. //
  17. // This file includes portions of Kiss FFT software,
  18. // licensed under the following conditions.
  19. //
  20. // Copyright (c) 2003-2010 Mark Borgerding
  21. //
  22. // All rights reserved.
  23. //
  24. // Redistribution and use in source and binary forms, with or without modification,
  25. // are permitted provided that the following conditions are met:
  26. //
  27. // * Redistributions of source code must retain the above copyright notice,
  28. // this list of conditions and the following disclaimer.
  29. //
  30. // * Redistributions in binary form must reproduce the above copyright notice,
  31. // this list of conditions and the following disclaimer in the documentation
  32. // and/or other materials provided with the distribution.
  33. //
  34. // * Neither the author nor the names of any contributors may be used to
  35. // endorse or promote products derived from this software without specific
  36. // prior written permission.
  37. //
  38. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  39. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  40. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  41. // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  42. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  43. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  44. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  45. // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  46. // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  47. // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  48. // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  49. //
  50. // ------------------------------------------------------------------------
  51. //! \addtogroup fft_engine
  52. //! @{
  53. template<typename cx_type, uword fixed_N, bool> struct fft_store {};
  54. template<typename cx_type, uword fixed_N>
  55. struct fft_store<cx_type, fixed_N, true>
  56. {
  57. static const uword N = fixed_N;
  58. arma_aligned cx_type coeffs_array[fixed_N];
  59. inline fft_store() {}
  60. inline fft_store(uword) {}
  61. arma_inline cx_type* coeffs_ptr() { return &coeffs_array[0]; }
  62. arma_inline const cx_type* coeffs_ptr() const { return &coeffs_array[0]; }
  63. };
  64. template<typename cx_type, uword fixed_N>
  65. struct fft_store<cx_type, fixed_N, false>
  66. {
  67. const uword N;
  68. podarray<cx_type> coeffs_array;
  69. inline fft_store() : N(0) {}
  70. inline fft_store(uword in_N) : N(in_N) { coeffs_array.set_size(N); }
  71. arma_inline cx_type* coeffs_ptr() { return coeffs_array.memptr(); }
  72. arma_inline const cx_type* coeffs_ptr() const { return coeffs_array.memptr(); }
  73. };
  74. template<typename cx_type, bool inverse, uword fixed_N = 0>
  75. class fft_engine : public fft_store<cx_type, fixed_N, (fixed_N > 0)>
  76. {
  77. public:
  78. typedef typename get_pod_type<cx_type>::result T;
  79. using fft_store<cx_type, fixed_N, (fixed_N > 0)>::N;
  80. using fft_store<cx_type, fixed_N, (fixed_N > 0)>::coeffs_ptr;
  81. podarray<uword> residue;
  82. podarray<uword> radix;
  83. podarray<cx_type> tmp_array;
  84. template<bool fill>
  85. inline
  86. uword
  87. calc_radix()
  88. {
  89. uword i = 0;
  90. for(uword n = N, r=4; n >= 2; ++i)
  91. {
  92. while( (n % r) > 0 )
  93. {
  94. switch(r)
  95. {
  96. case 2: r = 3; break;
  97. case 4: r = 2; break;
  98. default: r += 2; break;
  99. }
  100. if(r*r > n) { r = n; }
  101. }
  102. n /= r;
  103. if(fill)
  104. {
  105. residue[i] = n;
  106. radix[i] = r;
  107. }
  108. }
  109. return i;
  110. }
  111. inline
  112. fft_engine(const uword in_N)
  113. : fft_store< cx_type, fixed_N, (fixed_N > 0) >(in_N)
  114. {
  115. arma_extra_debug_sigprint();
  116. const uword len = calc_radix<false>();
  117. residue.set_size(len);
  118. radix.set_size(len);
  119. calc_radix<true>();
  120. // calculate the constant coefficients
  121. cx_type* coeffs = coeffs_ptr();
  122. const T k = T( (inverse) ? +2 : -2 ) * std::acos( T(-1) ) / T(N);
  123. for(uword i=0; i < N; ++i) { coeffs[i] = std::exp( cx_type(T(0), i*k) ); }
  124. }
  125. arma_hot
  126. inline
  127. void
  128. butterfly_2(cx_type* Y, const uword stride, const uword m)
  129. {
  130. arma_extra_debug_sigprint();
  131. const cx_type* coeffs = coeffs_ptr();
  132. for(uword i=0; i < m; ++i)
  133. {
  134. const cx_type t = Y[i+m] * coeffs[i*stride];
  135. Y[i+m] = Y[i] - t;
  136. Y[i ] += t;
  137. }
  138. }
  139. arma_hot
  140. inline
  141. void
  142. butterfly_3(cx_type* Y, const uword stride, const uword m)
  143. {
  144. arma_extra_debug_sigprint();
  145. arma_aligned cx_type tmp[5];
  146. cx_type* coeffs1 = coeffs_ptr();
  147. cx_type* coeffs2 = coeffs1;
  148. const T coeff_sm_imag = coeffs1[stride*m].imag();
  149. const uword n = m*2;
  150. // TODO: rearrange the indices within tmp[] into a more sane order
  151. for(uword i = m; i > 0; --i)
  152. {
  153. tmp[1] = Y[m] * (*coeffs1);
  154. tmp[2] = Y[n] * (*coeffs2);
  155. tmp[0] = tmp[1] - tmp[2];
  156. tmp[0] *= coeff_sm_imag;
  157. tmp[3] = tmp[1] + tmp[2];
  158. Y[m] = cx_type( (Y[0].real() - (T(0.5)*tmp[3].real())), (Y[0].imag() - (T(0.5)*tmp[3].imag())) );
  159. Y[0] += tmp[3];
  160. Y[n] = cx_type( (Y[m].real() + tmp[0].imag()), (Y[m].imag() - tmp[0].real()) );
  161. Y[m] += cx_type( -tmp[0].imag(), tmp[0].real() );
  162. Y++;
  163. coeffs1 += stride;
  164. coeffs2 += stride*2;
  165. }
  166. }
  167. arma_hot
  168. inline
  169. void
  170. butterfly_4(cx_type* Y, const uword stride, const uword m)
  171. {
  172. arma_extra_debug_sigprint();
  173. arma_aligned cx_type tmp[7];
  174. const cx_type* coeffs = coeffs_ptr();
  175. const uword m2 = m*2;
  176. const uword m3 = m*3;
  177. // TODO: rearrange the indices within tmp[] into a more sane order
  178. for(uword i=0; i < m; ++i)
  179. {
  180. tmp[0] = Y[i + m ] * coeffs[i*stride ];
  181. tmp[2] = Y[i + m3] * coeffs[i*stride*3];
  182. tmp[3] = tmp[0] + tmp[2];
  183. //tmp[4] = tmp[0] - tmp[2];
  184. //tmp[4] = (inverse) ? cx_type( -(tmp[4].imag()), tmp[4].real() ) : cx_type( tmp[4].imag(), -tmp[4].real() );
  185. tmp[4] = (inverse)
  186. ? cx_type( (tmp[2].imag() - tmp[0].imag()), (tmp[0].real() - tmp[2].real()) )
  187. : cx_type( (tmp[0].imag() - tmp[2].imag()), (tmp[2].real() - tmp[0].real()) );
  188. tmp[1] = Y[i + m2] * coeffs[i*stride*2];
  189. tmp[5] = Y[i] - tmp[1];
  190. Y[i ] += tmp[1];
  191. Y[i + m2] = Y[i] - tmp[3];
  192. Y[i ] += tmp[3];
  193. Y[i + m ] = tmp[5] + tmp[4];
  194. Y[i + m3] = tmp[5] - tmp[4];
  195. }
  196. }
  197. inline
  198. arma_hot
  199. void
  200. butterfly_5(cx_type* Y, const uword stride, const uword m)
  201. {
  202. arma_extra_debug_sigprint();
  203. arma_aligned cx_type tmp[13];
  204. const cx_type* coeffs = coeffs_ptr();
  205. const T a_real = coeffs[stride*1*m].real();
  206. const T a_imag = coeffs[stride*1*m].imag();
  207. const T b_real = coeffs[stride*2*m].real();
  208. const T b_imag = coeffs[stride*2*m].imag();
  209. cx_type* Y0 = Y;
  210. cx_type* Y1 = Y + 1*m;
  211. cx_type* Y2 = Y + 2*m;
  212. cx_type* Y3 = Y + 3*m;
  213. cx_type* Y4 = Y + 4*m;
  214. for(uword i=0; i < m; ++i)
  215. {
  216. tmp[0] = (*Y0);
  217. tmp[1] = (*Y1) * coeffs[stride*1*i];
  218. tmp[2] = (*Y2) * coeffs[stride*2*i];
  219. tmp[3] = (*Y3) * coeffs[stride*3*i];
  220. tmp[4] = (*Y4) * coeffs[stride*4*i];
  221. tmp[7] = tmp[1] + tmp[4];
  222. tmp[8] = tmp[2] + tmp[3];
  223. tmp[9] = tmp[2] - tmp[3];
  224. tmp[10] = tmp[1] - tmp[4];
  225. (*Y0) += tmp[7];
  226. (*Y0) += tmp[8];
  227. tmp[5] = tmp[0] + cx_type( ( (tmp[7].real() * a_real) + (tmp[8].real() * b_real) ), ( (tmp[7].imag() * a_real) + (tmp[8].imag() * b_real) ) );
  228. tmp[6] = cx_type( ( (tmp[10].imag() * a_imag) + (tmp[9].imag() * b_imag) ), ( -(tmp[10].real() * a_imag) - (tmp[9].real() * b_imag) ) );
  229. (*Y1) = tmp[5] - tmp[6];
  230. (*Y4) = tmp[5] + tmp[6];
  231. tmp[11] = tmp[0] + cx_type( ( (tmp[7].real() * b_real) + (tmp[8].real() * a_real) ), ( (tmp[7].imag() * b_real) + (tmp[8].imag() * a_real) ) );
  232. tmp[12] = cx_type( ( -(tmp[10].imag() * b_imag) + (tmp[9].imag() * a_imag) ), ( (tmp[10].real() * b_imag) - (tmp[9].real() * a_imag) ) );
  233. (*Y2) = tmp[11] + tmp[12];
  234. (*Y3) = tmp[11] - tmp[12];
  235. Y0++;
  236. Y1++;
  237. Y2++;
  238. Y3++;
  239. Y4++;
  240. }
  241. }
  242. arma_hot
  243. inline
  244. void
  245. butterfly_N(cx_type* Y, const uword stride, const uword m, const uword r)
  246. {
  247. arma_extra_debug_sigprint();
  248. const cx_type* coeffs = coeffs_ptr();
  249. tmp_array.set_min_size(r);
  250. cx_type* tmp = tmp_array.memptr();
  251. for(uword u=0; u < m; ++u)
  252. {
  253. uword k = u;
  254. for(uword v=0; v < r; ++v)
  255. {
  256. tmp[v] = Y[k];
  257. k += m;
  258. }
  259. k = u;
  260. for(uword v=0; v < r; ++v)
  261. {
  262. Y[k] = tmp[0];
  263. uword j = 0;
  264. for(uword w=1; w < r; ++w)
  265. {
  266. j += stride * k;
  267. if(j >= N) { j -= N; }
  268. Y[k] += tmp[w] * coeffs[j];
  269. }
  270. k += m;
  271. }
  272. }
  273. }
  274. inline
  275. void
  276. run(cx_type* Y, const cx_type* X, const uword stage = 0, const uword stride = 1)
  277. {
  278. arma_extra_debug_sigprint();
  279. const uword m = residue[stage];
  280. const uword r = radix[stage];
  281. const cx_type *Y_end = Y + r*m;
  282. if(m == 1)
  283. {
  284. for(cx_type* Yi = Y; Yi != Y_end; Yi++, X += stride) { (*Yi) = (*X); }
  285. }
  286. else
  287. {
  288. const uword next_stage = stage + 1;
  289. const uword next_stride = stride * r;
  290. for(cx_type* Yi = Y; Yi != Y_end; Yi += m, X += stride) { run(Yi, X, next_stage, next_stride); }
  291. }
  292. switch(r)
  293. {
  294. case 2: butterfly_2(Y, stride, m ); break;
  295. case 3: butterfly_3(Y, stride, m ); break;
  296. case 4: butterfly_4(Y, stride, m ); break;
  297. case 5: butterfly_5(Y, stride, m ); break;
  298. default: butterfly_N(Y, stride, m, r); break;
  299. }
  300. }
  301. };
  302. //! @}