ImathMatrixAlgo.cpp 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252
  1. ///////////////////////////////////////////////////////////////////////////
  2. //
  3. // Copyright (c) 2002-2012, Industrial Light & Magic, a division of Lucas
  4. // Digital Ltd. LLC
  5. //
  6. // All rights reserved.
  7. //
  8. // Redistribution and use in source and binary forms, with or without
  9. // modification, are permitted provided that the following conditions are
  10. // met:
  11. // * Redistributions of source code must retain the above copyright
  12. // notice, this list of conditions and the following disclaimer.
  13. // * Redistributions in binary form must reproduce the above
  14. // copyright notice, this list of conditions and the following disclaimer
  15. // in the documentation and/or other materials provided with the
  16. // distribution.
  17. // * Neither the name of Industrial Light & Magic nor the names of
  18. // its contributors may be used to endorse or promote products derived
  19. // from this software without specific prior written permission.
  20. //
  21. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. //
  33. ///////////////////////////////////////////////////////////////////////////
  34. //----------------------------------------------------------------------------
  35. //
  36. // Implementation of non-template items declared in ImathMatrixAlgo.h
  37. //
  38. //----------------------------------------------------------------------------
  39. #include "ImathMatrixAlgo.h"
  40. #include <cmath>
  41. #include <algorithm>
  42. #if defined(OPENEXR_DLL)
  43. #define EXPORT_CONST __declspec(dllexport)
  44. #else
  45. #define EXPORT_CONST const
  46. #endif
  47. IMATH_INTERNAL_NAMESPACE_SOURCE_ENTER
  48. EXPORT_CONST M33f identity33f ( 1, 0, 0,
  49. 0, 1, 0,
  50. 0, 0, 1);
  51. EXPORT_CONST M33d identity33d ( 1, 0, 0,
  52. 0, 1, 0,
  53. 0, 0, 1);
  54. EXPORT_CONST M44f identity44f ( 1, 0, 0, 0,
  55. 0, 1, 0, 0,
  56. 0, 0, 1, 0,
  57. 0, 0, 0, 1);
  58. EXPORT_CONST M44d identity44d ( 1, 0, 0, 0,
  59. 0, 1, 0, 0,
  60. 0, 0, 1, 0,
  61. 0, 0, 0, 1);
  62. namespace
  63. {
  64. class KahanSum
  65. {
  66. public:
  67. KahanSum() : _total(0), _correction(0) {}
  68. void
  69. operator+= (const double val)
  70. {
  71. const double y = val - _correction;
  72. const double t = _total + y;
  73. _correction = (t - _total) - y;
  74. _total = t;
  75. }
  76. double get() const
  77. {
  78. return _total;
  79. }
  80. private:
  81. double _total;
  82. double _correction;
  83. };
  84. }
  85. template <typename T>
  86. M44d
  87. procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const T* weights, const size_t numPoints, const bool doScale)
  88. {
  89. if (numPoints == 0)
  90. return M44d();
  91. // Always do the accumulation in double precision:
  92. V3d Acenter (0.0);
  93. V3d Bcenter (0.0);
  94. double weightsSum = 0.0;
  95. if (weights == 0)
  96. {
  97. for (int i = 0; i < numPoints; ++i)
  98. {
  99. Acenter += (V3d) A[i];
  100. Bcenter += (V3d) B[i];
  101. }
  102. weightsSum = (double) numPoints;
  103. }
  104. else
  105. {
  106. for (int i = 0; i < numPoints; ++i)
  107. {
  108. const double w = weights[i];
  109. weightsSum += w;
  110. Acenter += w * (V3d) A[i];
  111. Bcenter += w * (V3d) B[i];
  112. }
  113. }
  114. if (weightsSum == 0)
  115. return M44d();
  116. Acenter /= weightsSum;
  117. Bcenter /= weightsSum;
  118. //
  119. // Find Q such that |Q*A - B| (actually A-Acenter and B-Bcenter, weighted)
  120. // is minimized in the least squares sense.
  121. // From Golub/Van Loan, p.601
  122. //
  123. // A,B are 3xn
  124. // Let C = B A^T (where A is 3xn and B^T is nx3, so C is 3x3)
  125. // Compute the SVD: C = U D V^T (U,V rotations, D diagonal).
  126. // Throw away the D part, and return Q = U V^T
  127. M33d C (0.0);
  128. if (weights == 0)
  129. {
  130. for (int i = 0; i < numPoints; ++i)
  131. C += outerProduct ((V3d) B[i] - Bcenter, (V3d) A[i] - Acenter);
  132. }
  133. else
  134. {
  135. for (int i = 0; i < numPoints; ++i)
  136. {
  137. const double w = weights[i];
  138. C += outerProduct (w * ((V3d) B[i] - Bcenter), (V3d) A[i] - Acenter);
  139. }
  140. }
  141. M33d U, V;
  142. V3d S;
  143. jacobiSVD (C, U, S, V, IMATH_INTERNAL_NAMESPACE::limits<double>::epsilon(), true);
  144. // We want Q.transposed() here since we are going to be using it in the
  145. // Imath style (multiplying vectors on the right, v' = v*A^T):
  146. const M33d Qt = V * U.transposed();
  147. double s = 1.0;
  148. if (doScale && numPoints > 1)
  149. {
  150. // Finding a uniform scale: let us assume the Q is completely fixed
  151. // at this point (solving for both simultaneously seems much harder).
  152. // We are trying to compute (again, per Golub and van Loan)
  153. // min || s*A*Q - B ||_F
  154. // Notice that we've jammed a uniform scale in front of the Q.
  155. // Now, the Frobenius norm (the least squares norm over matrices)
  156. // has the neat property that it is equivalent to minimizing the trace
  157. // of M^T*M (see your friendly neighborhood linear algebra text for a
  158. // derivation). Thus, we can expand this out as
  159. // min tr (s*A*Q - B)^T*(s*A*Q - B)
  160. // = min tr(Q^T*A^T*s*s*A*Q) + tr(B^T*B) - 2*tr(Q^T*A^T*s*B) by linearity of the trace
  161. // = min s^2 tr(A^T*A) + tr(B^T*B) - 2*s*tr(Q^T*A^T*B) using the fact that the trace is invariant
  162. // under similarity transforms Q*M*Q^T
  163. // If we differentiate w.r.t. s and set this to 0, we get
  164. // 0 = 2*s*tr(A^T*A) - 2*tr(Q^T*A^T*B)
  165. // so
  166. // 2*s*tr(A^T*A) = 2*s*tr(Q^T*A^T*B)
  167. // s = tr(Q^T*A^T*B) / tr(A^T*A)
  168. KahanSum traceATA;
  169. if (weights == 0)
  170. {
  171. for (int i = 0; i < numPoints; ++i)
  172. traceATA += ((V3d) A[i] - Acenter).length2();
  173. }
  174. else
  175. {
  176. for (int i = 0; i < numPoints; ++i)
  177. traceATA += ((double) weights[i]) * ((V3d) A[i] - Acenter).length2();
  178. }
  179. KahanSum traceBATQ;
  180. for (int i = 0; i < 3; ++i)
  181. for (int j = 0; j < 3; ++j)
  182. traceBATQ += Qt[j][i] * C[i][j];
  183. s = traceBATQ.get() / traceATA.get();
  184. }
  185. // Q is the rotation part of what we want to return.
  186. // The entire transform is:
  187. // (translate origin to Bcenter) * Q * (translate Acenter to origin)
  188. // last first
  189. // The effect of this on a point is:
  190. // (translate origin to Bcenter) * Q * (translate Acenter to origin) * point
  191. // = (translate origin to Bcenter) * Q * (-Acenter + point)
  192. // = (translate origin to Bcenter) * (-Q*Acenter + Q*point)
  193. // = (translate origin to Bcenter) * (translate Q*Acenter to origin) * Q*point
  194. // = (translate Q*Acenter to Bcenter) * Q*point
  195. // So what we want to return is:
  196. // (translate Q*Acenter to Bcenter) * Q
  197. //
  198. // In block form, this is:
  199. // [ 1 0 0 | ] [ 0 ] [ 1 0 0 | ] [ 1 0 0 | ] [ | ] [ ]
  200. // [ 0 1 0 tb ] [ s*Q 0 ] [ 0 1 0 -ta ] = [ 0 1 0 tb ] [ s*Q -s*Q*ta ] = [ Q tb-s*Q*ta ]
  201. // [ 0 0 1 | ] [ 0 ] [ 0 0 1 | ] [ 0 0 1 | ] [ | ] [ ]
  202. // [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ]
  203. // (ofc the whole thing is transposed for Imath).
  204. const V3d translate = Bcenter - s*Acenter*Qt;
  205. return M44d (s*Qt.x[0][0], s*Qt.x[0][1], s*Qt.x[0][2], T(0),
  206. s*Qt.x[1][0], s*Qt.x[1][1], s*Qt.x[1][2], T(0),
  207. s*Qt.x[2][0], s*Qt.x[2][1], s*Qt.x[2][2], T(0),
  208. translate.x, translate.y, translate.z, T(1));
  209. } // procrustesRotationAndTranslation
  210. template <typename T>
  211. M44d
  212. procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const size_t numPoints, const bool doScale)
  213. {
  214. return procrustesRotationAndTranslation (A, B, (const T*) 0, numPoints, doScale);
  215. } // procrustesRotationAndTranslation
  216. template IMATH_EXPORT M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const size_t numPoints, const bool doScale);
  217. template IMATH_EXPORT M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const size_t numPoints, const bool doScale);
  218. template IMATH_EXPORT M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const double* weights, const size_t numPoints, const bool doScale);
  219. template IMATH_EXPORT M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const float* weights, const size_t numPoints, const bool doScale);
  220. namespace
  221. {
  222. // Applies the 2x2 Jacobi rotation
  223. // [ c s 0 ] [ 1 0 0 ] [ c 0 s ]
  224. // [ -s c 0 ] or [ 0 c s ] or [ 0 1 0 ]
  225. // [ 0 0 1 ] [ 0 -s c ] [ -s 0 c ]
  226. // from the right; that is, computes
  227. // J * A
  228. // for the Jacobi rotation J and the matrix A. This is efficient because we
  229. // only need to touch exactly the 2 columns that are affected, so we never
  230. // need to explicitly construct the J matrix.
  231. template <typename T, int j, int k>
  232. void
  233. jacobiRotateRight (IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A,
  234. const T c,
  235. const T s)
  236. {
  237. for (int i = 0; i < 3; ++i)
  238. {
  239. const T tau1 = A[i][j];
  240. const T tau2 = A[i][k];
  241. A[i][j] = c * tau1 - s * tau2;
  242. A[i][k] = s * tau1 + c * tau2;
  243. }
  244. }
  245. template <typename T>
  246. void
  247. jacobiRotateRight (IMATH_INTERNAL_NAMESPACE::Matrix44<T>& A,
  248. const int j,
  249. const int k,
  250. const T c,
  251. const T s)
  252. {
  253. for (int i = 0; i < 4; ++i)
  254. {
  255. const T tau1 = A[i][j];
  256. const T tau2 = A[i][k];
  257. A[i][j] = c * tau1 - s * tau2;
  258. A[i][k] = s * tau1 + c * tau2;
  259. }
  260. }
  261. // This routine solves the 2x2 SVD:
  262. // [ c1 s1 ] [ w x ] [ c2 s2 ] [ d1 0 ]
  263. // [ ] [ ] [ ] = [ ]
  264. // [ -s1 c1 ] [ y z ] [ -s2 c2 ] [ 0 d2 ]
  265. // where
  266. // [ w x ]
  267. // A = [ ]
  268. // [ y z ]
  269. // is the subset of A consisting of the [j,k] entries, A([j k], [j k]) in
  270. // Matlab parlance. The method is the 'USVD' algorithm described in the
  271. // following paper:
  272. // 'Computation of the Singular Value Decomposition using Mesh-Connected Processors'
  273. // by Richard P. Brent, Franklin T. Luk, and Charles Van Loan
  274. // It breaks the computation into two steps: the first symmetrizes the matrix,
  275. // and the second diagonalizes the symmetric matrix.
  276. template <typename T, int j, int k, int l>
  277. bool
  278. twoSidedJacobiRotation (IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A,
  279. IMATH_INTERNAL_NAMESPACE::Matrix33<T>& U,
  280. IMATH_INTERNAL_NAMESPACE::Matrix33<T>& V,
  281. const T tol)
  282. {
  283. // Load everything into local variables to make things easier on the
  284. // optimizer:
  285. const T w = A[j][j];
  286. const T x = A[j][k];
  287. const T y = A[k][j];
  288. const T z = A[k][k];
  289. // We will keep track of whether we're actually performing any rotations,
  290. // since if the matrix is already diagonal we'll end up with the identity
  291. // as our Jacobi rotation and we can short-circuit.
  292. bool changed = false;
  293. // The first step is to symmetrize the 2x2 matrix,
  294. // [ c s ]^T [ w x ] = [ p q ]
  295. // [ -s c ] [ y z ] [ q r ]
  296. T mu_1 = w + z;
  297. T mu_2 = x - y;
  298. T c, s;
  299. if (std::abs(mu_2) <= tol*std::abs(mu_1)) // Already symmetric (to tolerance)
  300. { // Note that the <= is important here
  301. c = T(1); // because we want to bypass the computation
  302. s = T(0); // of rho if mu_1 = mu_2 = 0.
  303. const T p = w;
  304. const T r = z;
  305. mu_1 = r - p;
  306. mu_2 = x + y;
  307. }
  308. else
  309. {
  310. const T rho = mu_1 / mu_2;
  311. s = T(1) / std::sqrt (T(1) + rho*rho); // TODO is there a native inverse square root function?
  312. if (rho < 0)
  313. s = -s;
  314. c = s * rho;
  315. mu_1 = s * (x + y) + c * (z - w); // = r - p
  316. mu_2 = T(2) * (c * x - s * z); // = 2*q
  317. changed = true;
  318. }
  319. // The second stage diagonalizes,
  320. // [ c2 s2 ]^T [ p q ] [ c2 s2 ] = [ d1 0 ]
  321. // [ -s2 c2 ] [ q r ] [ -s2 c2 ] [ 0 d2 ]
  322. T c_2, s_2;
  323. if (std::abs(mu_2) <= tol*std::abs(mu_1))
  324. {
  325. c_2 = T(1);
  326. s_2 = T(0);
  327. }
  328. else
  329. {
  330. const T rho_2 = mu_1 / mu_2;
  331. T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2));
  332. if (rho_2 < 0)
  333. t_2 = -t_2;
  334. c_2 = T(1) / std::sqrt (T(1) + t_2*t_2);
  335. s_2 = c_2 * t_2;
  336. changed = true;
  337. }
  338. const T c_1 = c_2 * c - s_2 * s;
  339. const T s_1 = s_2 * c + c_2 * s;
  340. if (!changed)
  341. {
  342. // We've decided that the off-diagonal entries are already small
  343. // enough, so we'll set them to zero. This actually appears to result
  344. // in smaller errors than leaving them be, possibly because it prevents
  345. // us from trying to do extra rotations later that we don't need.
  346. A[k][j] = 0;
  347. A[j][k] = 0;
  348. return false;
  349. }
  350. const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2);
  351. const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2);
  352. // For the entries we just zeroed out, we'll just set them to 0, since
  353. // they should be 0 up to machine precision.
  354. A[j][j] = d_1;
  355. A[k][k] = d_2;
  356. A[k][j] = 0;
  357. A[j][k] = 0;
  358. // Rotate the entries that _weren't_ involved in the 2x2 SVD:
  359. {
  360. // Rotate on the left by
  361. // [ c1 s1 0 ]^T [ c1 0 s1 ]^T [ 1 0 0 ]^T
  362. // [ -s1 c1 0 ] or [ 0 1 0 ] or [ 0 c1 s1 ]
  363. // [ 0 0 1 ] [ -s1 0 c1 ] [ 0 -s1 c1 ]
  364. // This has the effect of adding the (weighted) ith and jth _rows_ to
  365. // each other.
  366. const T tau1 = A[j][l];
  367. const T tau2 = A[k][l];
  368. A[j][l] = c_1 * tau1 - s_1 * tau2;
  369. A[k][l] = s_1 * tau1 + c_1 * tau2;
  370. }
  371. {
  372. // Rotate on the right by
  373. // [ c2 s2 0 ] [ c2 0 s2 ] [ 1 0 0 ]
  374. // [ -s2 c2 0 ] or [ 0 1 0 ] or [ 0 c2 s2 ]
  375. // [ 0 0 1 ] [ -s2 0 c2 ] [ 0 -s2 c2 ]
  376. // This has the effect of adding the (weighted) ith and jth _columns_ to
  377. // each other.
  378. const T tau1 = A[l][j];
  379. const T tau2 = A[l][k];
  380. A[l][j] = c_2 * tau1 - s_2 * tau2;
  381. A[l][k] = s_2 * tau1 + c_2 * tau2;
  382. }
  383. // Now apply the rotations to U and V:
  384. // Remember that we have
  385. // R1^T * A * R2 = D
  386. // This is in the 2x2 case, but after doing a bunch of these
  387. // we will get something like this for the 3x3 case:
  388. // ... R1b^T * R1a^T * A * R2a * R2b * ... = D
  389. // ----------------- ---------------
  390. // = U^T = V
  391. // So,
  392. // U = R1a * R1b * ...
  393. // V = R2a * R2b * ...
  394. jacobiRotateRight<T, j, k> (U, c_1, s_1);
  395. jacobiRotateRight<T, j, k> (V, c_2, s_2);
  396. return true;
  397. }
  398. template <typename T>
  399. bool
  400. twoSidedJacobiRotation (IMATH_INTERNAL_NAMESPACE::Matrix44<T>& A,
  401. int j,
  402. int k,
  403. IMATH_INTERNAL_NAMESPACE::Matrix44<T>& U,
  404. IMATH_INTERNAL_NAMESPACE::Matrix44<T>& V,
  405. const T tol)
  406. {
  407. // Load everything into local variables to make things easier on the
  408. // optimizer:
  409. const T w = A[j][j];
  410. const T x = A[j][k];
  411. const T y = A[k][j];
  412. const T z = A[k][k];
  413. // We will keep track of whether we're actually performing any rotations,
  414. // since if the matrix is already diagonal we'll end up with the identity
  415. // as our Jacobi rotation and we can short-circuit.
  416. bool changed = false;
  417. // The first step is to symmetrize the 2x2 matrix,
  418. // [ c s ]^T [ w x ] = [ p q ]
  419. // [ -s c ] [ y z ] [ q r ]
  420. T mu_1 = w + z;
  421. T mu_2 = x - y;
  422. T c, s;
  423. if (std::abs(mu_2) <= tol*std::abs(mu_1)) // Already symmetric (to tolerance)
  424. { // Note that the <= is important here
  425. c = T(1); // because we want to bypass the computation
  426. s = T(0); // of rho if mu_1 = mu_2 = 0.
  427. const T p = w;
  428. const T r = z;
  429. mu_1 = r - p;
  430. mu_2 = x + y;
  431. }
  432. else
  433. {
  434. const T rho = mu_1 / mu_2;
  435. s = T(1) / std::sqrt (T(1) + rho*rho); // TODO is there a native inverse square root function?
  436. if (rho < 0)
  437. s = -s;
  438. c = s * rho;
  439. mu_1 = s * (x + y) + c * (z - w); // = r - p
  440. mu_2 = T(2) * (c * x - s * z); // = 2*q
  441. changed = true;
  442. }
  443. // The second stage diagonalizes,
  444. // [ c2 s2 ]^T [ p q ] [ c2 s2 ] = [ d1 0 ]
  445. // [ -s2 c2 ] [ q r ] [ -s2 c2 ] [ 0 d2 ]
  446. T c_2, s_2;
  447. if (std::abs(mu_2) <= tol*std::abs(mu_1))
  448. {
  449. c_2 = T(1);
  450. s_2 = T(0);
  451. }
  452. else
  453. {
  454. const T rho_2 = mu_1 / mu_2;
  455. T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2));
  456. if (rho_2 < 0)
  457. t_2 = -t_2;
  458. c_2 = T(1) / std::sqrt (T(1) + t_2*t_2);
  459. s_2 = c_2 * t_2;
  460. changed = true;
  461. }
  462. const T c_1 = c_2 * c - s_2 * s;
  463. const T s_1 = s_2 * c + c_2 * s;
  464. if (!changed)
  465. {
  466. // We've decided that the off-diagonal entries are already small
  467. // enough, so we'll set them to zero. This actually appears to result
  468. // in smaller errors than leaving them be, possibly because it prevents
  469. // us from trying to do extra rotations later that we don't need.
  470. A[k][j] = 0;
  471. A[j][k] = 0;
  472. return false;
  473. }
  474. const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2);
  475. const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2);
  476. // For the entries we just zeroed out, we'll just set them to 0, since
  477. // they should be 0 up to machine precision.
  478. A[j][j] = d_1;
  479. A[k][k] = d_2;
  480. A[k][j] = 0;
  481. A[j][k] = 0;
  482. // Rotate the entries that _weren't_ involved in the 2x2 SVD:
  483. for (int l = 0; l < 4; ++l)
  484. {
  485. if (l == j || l == k)
  486. continue;
  487. // Rotate on the left by
  488. // [ 1 ]
  489. // [ . ]
  490. // [ c2 s2 ] j
  491. // [ 1 ]
  492. // [ -s2 c2 ] k
  493. // [ . ]
  494. // [ 1 ]
  495. // j k
  496. //
  497. // This has the effect of adding the (weighted) ith and jth _rows_ to
  498. // each other.
  499. const T tau1 = A[j][l];
  500. const T tau2 = A[k][l];
  501. A[j][l] = c_1 * tau1 - s_1 * tau2;
  502. A[k][l] = s_1 * tau1 + c_1 * tau2;
  503. }
  504. for (int l = 0; l < 4; ++l)
  505. {
  506. // We set the A[j/k][j/k] entries already
  507. if (l == j || l == k)
  508. continue;
  509. // Rotate on the right by
  510. // [ 1 ]
  511. // [ . ]
  512. // [ c2 s2 ] j
  513. // [ 1 ]
  514. // [ -s2 c2 ] k
  515. // [ . ]
  516. // [ 1 ]
  517. // j k
  518. //
  519. // This has the effect of adding the (weighted) ith and jth _columns_ to
  520. // each other.
  521. const T tau1 = A[l][j];
  522. const T tau2 = A[l][k];
  523. A[l][j] = c_2 * tau1 - s_2 * tau2;
  524. A[l][k] = s_2 * tau1 + c_2 * tau2;
  525. }
  526. // Now apply the rotations to U and V:
  527. // Remember that we have
  528. // R1^T * A * R2 = D
  529. // This is in the 2x2 case, but after doing a bunch of these
  530. // we will get something like this for the 3x3 case:
  531. // ... R1b^T * R1a^T * A * R2a * R2b * ... = D
  532. // ----------------- ---------------
  533. // = U^T = V
  534. // So,
  535. // U = R1a * R1b * ...
  536. // V = R2a * R2b * ...
  537. jacobiRotateRight (U, j, k, c_1, s_1);
  538. jacobiRotateRight (V, j, k, c_2, s_2);
  539. return true;
  540. }
  541. template <typename T>
  542. void
  543. swapColumns (IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A, int j, int k)
  544. {
  545. for (int i = 0; i < 3; ++i)
  546. std::swap (A[i][j], A[i][k]);
  547. }
  548. template <typename T>
  549. T
  550. maxOffDiag (const IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A)
  551. {
  552. T result = 0;
  553. result = std::max (result, std::abs (A[0][1]));
  554. result = std::max (result, std::abs (A[0][2]));
  555. result = std::max (result, std::abs (A[1][0]));
  556. result = std::max (result, std::abs (A[1][2]));
  557. result = std::max (result, std::abs (A[2][0]));
  558. result = std::max (result, std::abs (A[2][1]));
  559. return result;
  560. }
  561. template <typename T>
  562. T
  563. maxOffDiag (const IMATH_INTERNAL_NAMESPACE::Matrix44<T>& A)
  564. {
  565. T result = 0;
  566. for (int i = 0; i < 4; ++i)
  567. {
  568. for (int j = 0; j < 4; ++j)
  569. {
  570. if (i != j)
  571. result = std::max (result, std::abs (A[i][j]));
  572. }
  573. }
  574. return result;
  575. }
  576. template <typename T>
  577. void
  578. twoSidedJacobiSVD (IMATH_INTERNAL_NAMESPACE::Matrix33<T> A,
  579. IMATH_INTERNAL_NAMESPACE::Matrix33<T>& U,
  580. IMATH_INTERNAL_NAMESPACE::Vec3<T>& S,
  581. IMATH_INTERNAL_NAMESPACE::Matrix33<T>& V,
  582. const T tol,
  583. const bool forcePositiveDeterminant)
  584. {
  585. // The two-sided Jacobi SVD works by repeatedly zeroing out
  586. // off-diagonal entries of the matrix, 2 at a time. Basically,
  587. // we can take our 3x3 matrix,
  588. // [* * *]
  589. // [* * *]
  590. // [* * *]
  591. // and use a pair of orthogonal transforms to zero out, say, the
  592. // pair of entries (0, 1) and (1, 0):
  593. // [ c1 s1 ] [* * *] [ c2 s2 ] [* *]
  594. // [-s1 c1 ] [* * *] [-s2 c2 ] = [ * *]
  595. // [ 1] [* * *] [ 1] [* * *]
  596. // When we go to zero out the next pair of entries (say, (0, 2) and (2, 0))
  597. // then we don't expect those entries to stay 0:
  598. // [ c1 s1 ] [* *] [ c2 s2 ] [* * ]
  599. // [-s1 c1 ] [ * *] [-s2 c2 ] = [* * *]
  600. // [ 1] [* * *] [ 1] [ * *]
  601. // However, if we keep doing this, we'll find that the off-diagonal entries
  602. // converge to 0 fairly quickly (convergence should be roughly cubic). The
  603. // result is a diagonal A matrix and a bunch of orthogonal transforms:
  604. // [* * *] [* ]
  605. // L1 L2 ... Ln [* * *] Rn ... R2 R1 = [ * ]
  606. // [* * *] [ *]
  607. // ------------ ------- ------------ -------
  608. // U^T A V S
  609. // This turns out to be highly accurate because (1) orthogonal transforms
  610. // are extremely stable to compute and apply (this is why QR factorization
  611. // works so well, FWIW) and because (2) by applying everything to the original
  612. // matrix A instead of computing (A^T * A) we avoid any precision loss that
  613. // would result from that.
  614. U.makeIdentity();
  615. V.makeIdentity();
  616. const int maxIter = 20; // In case we get really unlucky, prevents infinite loops
  617. const T absTol = tol * maxOffDiag (A); // Tolerance is in terms of the maximum
  618. if (absTol != 0) // _off-diagonal_ entry.
  619. {
  620. int numIter = 0;
  621. do
  622. {
  623. ++numIter;
  624. bool changed = twoSidedJacobiRotation<T, 0, 1, 2> (A, U, V, tol);
  625. changed = twoSidedJacobiRotation<T, 0, 2, 1> (A, U, V, tol) || changed;
  626. changed = twoSidedJacobiRotation<T, 1, 2, 0> (A, U, V, tol) || changed;
  627. if (!changed)
  628. break;
  629. } while (maxOffDiag(A) > absTol && numIter < maxIter);
  630. }
  631. // The off-diagonal entries are (effectively) 0, so whatever's left on the
  632. // diagonal are the singular values:
  633. S.x = A[0][0];
  634. S.y = A[1][1];
  635. S.z = A[2][2];
  636. // Nothing thus far has guaranteed that the singular values are positive,
  637. // so let's go back through and flip them if not (since by contract we are
  638. // supposed to return all positive SVs):
  639. for (int i = 0; i < 3; ++i)
  640. {
  641. if (S[i] < 0)
  642. {
  643. // If we flip S[i], we need to flip the corresponding column of U
  644. // (we could also pick V if we wanted; it doesn't really matter):
  645. S[i] = -S[i];
  646. for (int j = 0; j < 3; ++j)
  647. U[j][i] = -U[j][i];
  648. }
  649. }
  650. // Order the singular values from largest to smallest; this requires
  651. // exactly two passes through the data using bubble sort:
  652. for (int i = 0; i < 2; ++i)
  653. {
  654. for (int j = 0; j < (2 - i); ++j)
  655. {
  656. // No absolute values necessary since we already ensured that
  657. // they're positive:
  658. if (S[j] < S[j+1])
  659. {
  660. // If we swap singular values we also have to swap
  661. // corresponding columns in U and V:
  662. std::swap (S[j], S[j+1]);
  663. swapColumns (U, j, j+1);
  664. swapColumns (V, j, j+1);
  665. }
  666. }
  667. }
  668. if (forcePositiveDeterminant)
  669. {
  670. // We want to guarantee that the returned matrices always have positive
  671. // determinant. We can do this by adding the appropriate number of
  672. // matrices of the form:
  673. // [ 1 ]
  674. // L = [ 1 ]
  675. // [ -1 ]
  676. // Note that L' = L and L*L = Identity. Thus we can add:
  677. // U*L*L*S*V = (U*L)*(L*S)*V
  678. // if U has a negative determinant, and
  679. // U*S*L*L*V = U*(S*L)*(L*V)
  680. // if V has a neg. determinant.
  681. if (U.determinant() < 0)
  682. {
  683. for (int i = 0; i < 3; ++i)
  684. U[i][2] = -U[i][2];
  685. S.z = -S.z;
  686. }
  687. if (V.determinant() < 0)
  688. {
  689. for (int i = 0; i < 3; ++i)
  690. V[i][2] = -V[i][2];
  691. S.z = -S.z;
  692. }
  693. }
  694. }
  695. template <typename T>
  696. void
  697. twoSidedJacobiSVD (IMATH_INTERNAL_NAMESPACE::Matrix44<T> A,
  698. IMATH_INTERNAL_NAMESPACE::Matrix44<T>& U,
  699. IMATH_INTERNAL_NAMESPACE::Vec4<T>& S,
  700. IMATH_INTERNAL_NAMESPACE::Matrix44<T>& V,
  701. const T tol,
  702. const bool forcePositiveDeterminant)
  703. {
  704. // Please see the Matrix33 version for a detailed description of the algorithm.
  705. U.makeIdentity();
  706. V.makeIdentity();
  707. const int maxIter = 20; // In case we get really unlucky, prevents infinite loops
  708. const T absTol = tol * maxOffDiag (A); // Tolerance is in terms of the maximum
  709. if (absTol != 0) // _off-diagonal_ entry.
  710. {
  711. int numIter = 0;
  712. do
  713. {
  714. ++numIter;
  715. bool changed = twoSidedJacobiRotation (A, 0, 1, U, V, tol);
  716. changed = twoSidedJacobiRotation (A, 0, 2, U, V, tol) || changed;
  717. changed = twoSidedJacobiRotation (A, 0, 3, U, V, tol) || changed;
  718. changed = twoSidedJacobiRotation (A, 1, 2, U, V, tol) || changed;
  719. changed = twoSidedJacobiRotation (A, 1, 3, U, V, tol) || changed;
  720. changed = twoSidedJacobiRotation (A, 2, 3, U, V, tol) || changed;
  721. if (!changed)
  722. break;
  723. } while (maxOffDiag(A) > absTol && numIter < maxIter);
  724. }
  725. // The off-diagonal entries are (effectively) 0, so whatever's left on the
  726. // diagonal are the singular values:
  727. S[0] = A[0][0];
  728. S[1] = A[1][1];
  729. S[2] = A[2][2];
  730. S[3] = A[3][3];
  731. // Nothing thus far has guaranteed that the singular values are positive,
  732. // so let's go back through and flip them if not (since by contract we are
  733. // supposed to return all positive SVs):
  734. for (int i = 0; i < 4; ++i)
  735. {
  736. if (S[i] < 0)
  737. {
  738. // If we flip S[i], we need to flip the corresponding column of U
  739. // (we could also pick V if we wanted; it doesn't really matter):
  740. S[i] = -S[i];
  741. for (int j = 0; j < 4; ++j)
  742. U[j][i] = -U[j][i];
  743. }
  744. }
  745. // Order the singular values from largest to smallest using insertion sort:
  746. for (int i = 1; i < 4; ++i)
  747. {
  748. const IMATH_INTERNAL_NAMESPACE::Vec4<T> uCol (U[0][i], U[1][i], U[2][i], U[3][i]);
  749. const IMATH_INTERNAL_NAMESPACE::Vec4<T> vCol (V[0][i], V[1][i], V[2][i], V[3][i]);
  750. const T sVal = S[i];
  751. int j = i - 1;
  752. while (std::abs (S[j]) < std::abs (sVal))
  753. {
  754. for (int k = 0; k < 4; ++k)
  755. U[k][j+1] = U[k][j];
  756. for (int k = 0; k < 4; ++k)
  757. V[k][j+1] = V[k][j];
  758. S[j+1] = S[j];
  759. --j;
  760. if (j < 0)
  761. break;
  762. }
  763. for (int k = 0; k < 4; ++k)
  764. U[k][j+1] = uCol[k];
  765. for (int k = 0; k < 4; ++k)
  766. V[k][j+1] = vCol[k];
  767. S[j+1] = sVal;
  768. }
  769. if (forcePositiveDeterminant)
  770. {
  771. // We want to guarantee that the returned matrices always have positive
  772. // determinant. We can do this by adding the appropriate number of
  773. // matrices of the form:
  774. // [ 1 ]
  775. // L = [ 1 ]
  776. // [ 1 ]
  777. // [ -1 ]
  778. // Note that L' = L and L*L = Identity. Thus we can add:
  779. // U*L*L*S*V = (U*L)*(L*S)*V
  780. // if U has a negative determinant, and
  781. // U*S*L*L*V = U*(S*L)*(L*V)
  782. // if V has a neg. determinant.
  783. if (U.determinant() < 0)
  784. {
  785. for (int i = 0; i < 4; ++i)
  786. U[i][3] = -U[i][3];
  787. S[3] = -S[3];
  788. }
  789. if (V.determinant() < 0)
  790. {
  791. for (int i = 0; i < 4; ++i)
  792. V[i][3] = -V[i][3];
  793. S[3] = -S[3];
  794. }
  795. }
  796. }
  797. }
  798. template <typename T>
  799. void
  800. jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix33<T>& A,
  801. IMATH_INTERNAL_NAMESPACE::Matrix33<T>& U,
  802. IMATH_INTERNAL_NAMESPACE::Vec3<T>& S,
  803. IMATH_INTERNAL_NAMESPACE::Matrix33<T>& V,
  804. const T tol,
  805. const bool forcePositiveDeterminant)
  806. {
  807. twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant);
  808. }
  809. template <typename T>
  810. void
  811. jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix44<T>& A,
  812. IMATH_INTERNAL_NAMESPACE::Matrix44<T>& U,
  813. IMATH_INTERNAL_NAMESPACE::Vec4<T>& S,
  814. IMATH_INTERNAL_NAMESPACE::Matrix44<T>& V,
  815. const T tol,
  816. const bool forcePositiveDeterminant)
  817. {
  818. twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant);
  819. }
  820. template IMATH_EXPORT void jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix33<float>& A,
  821. IMATH_INTERNAL_NAMESPACE::Matrix33<float>& U,
  822. IMATH_INTERNAL_NAMESPACE::Vec3<float>& S,
  823. IMATH_INTERNAL_NAMESPACE::Matrix33<float>& V,
  824. const float tol,
  825. const bool forcePositiveDeterminant);
  826. template IMATH_EXPORT void jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix33<double>& A,
  827. IMATH_INTERNAL_NAMESPACE::Matrix33<double>& U,
  828. IMATH_INTERNAL_NAMESPACE::Vec3<double>& S,
  829. IMATH_INTERNAL_NAMESPACE::Matrix33<double>& V,
  830. const double tol,
  831. const bool forcePositiveDeterminant);
  832. template IMATH_EXPORT void jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix44<float>& A,
  833. IMATH_INTERNAL_NAMESPACE::Matrix44<float>& U,
  834. IMATH_INTERNAL_NAMESPACE::Vec4<float>& S,
  835. IMATH_INTERNAL_NAMESPACE::Matrix44<float>& V,
  836. const float tol,
  837. const bool forcePositiveDeterminant);
  838. template IMATH_EXPORT void jacobiSVD (const IMATH_INTERNAL_NAMESPACE::Matrix44<double>& A,
  839. IMATH_INTERNAL_NAMESPACE::Matrix44<double>& U,
  840. IMATH_INTERNAL_NAMESPACE::Vec4<double>& S,
  841. IMATH_INTERNAL_NAMESPACE::Matrix44<double>& V,
  842. const double tol,
  843. const bool forcePositiveDeterminant);
  844. namespace
  845. {
  846. template <int j, int k, typename TM>
  847. inline
  848. void
  849. jacobiRotateRight (TM& A,
  850. const typename TM::BaseType s,
  851. const typename TM::BaseType tau)
  852. {
  853. typedef typename TM::BaseType T;
  854. for (unsigned int i = 0; i < TM::dimensions(); ++i)
  855. {
  856. const T nu1 = A[i][j];
  857. const T nu2 = A[i][k];
  858. A[i][j] -= s * (nu2 + tau * nu1);
  859. A[i][k] += s * (nu1 - tau * nu2);
  860. }
  861. }
  862. template <int j, int k, int l, typename T>
  863. bool
  864. jacobiRotation (Matrix33<T>& A,
  865. Matrix33<T>& V,
  866. Vec3<T>& Z,
  867. const T tol)
  868. {
  869. // Load everything into local variables to make things easier on the
  870. // optimizer:
  871. const T x = A[j][j];
  872. const T y = A[j][k];
  873. const T z = A[k][k];
  874. // The first stage diagonalizes,
  875. // [ c s ]^T [ x y ] [ c -s ] = [ d1 0 ]
  876. // [ -s c ] [ y z ] [ s c ] [ 0 d2 ]
  877. const T mu1 = z - x;
  878. const T mu2 = 2 * y;
  879. if (std::abs(mu2) <= tol*std::abs(mu1))
  880. {
  881. // We've decided that the off-diagonal entries are already small
  882. // enough, so we'll set them to zero. This actually appears to result
  883. // in smaller errors than leaving them be, possibly because it prevents
  884. // us from trying to do extra rotations later that we don't need.
  885. A[j][k] = 0;
  886. return false;
  887. }
  888. const T rho = mu1 / mu2;
  889. const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho));
  890. const T c = T(1) / std::sqrt (T(1) + t*t);
  891. const T s = t * c;
  892. const T tau = s / (T(1) + c);
  893. const T h = t * y;
  894. // Update diagonal elements.
  895. Z[j] -= h;
  896. Z[k] += h;
  897. A[j][j] -= h;
  898. A[k][k] += h;
  899. // For the entries we just zeroed out, we'll just set them to 0, since
  900. // they should be 0 up to machine precision.
  901. A[j][k] = 0;
  902. // We only update upper triagnular elements of A, since
  903. // A is supposed to be symmetric.
  904. T& offd1 = l < j ? A[l][j] : A[j][l];
  905. T& offd2 = l < k ? A[l][k] : A[k][l];
  906. const T nu1 = offd1;
  907. const T nu2 = offd2;
  908. offd1 = nu1 - s * (nu2 + tau * nu1);
  909. offd2 = nu2 + s * (nu1 - tau * nu2);
  910. // Apply rotation to V
  911. jacobiRotateRight<j, k> (V, s, tau);
  912. return true;
  913. }
  914. template <int j, int k, int l1, int l2, typename T>
  915. bool
  916. jacobiRotation (Matrix44<T>& A,
  917. Matrix44<T>& V,
  918. Vec4<T>& Z,
  919. const T tol)
  920. {
  921. const T x = A[j][j];
  922. const T y = A[j][k];
  923. const T z = A[k][k];
  924. const T mu1 = z - x;
  925. const T mu2 = T(2) * y;
  926. // Let's see if rho^(-1) = mu2 / mu1 is less than tol
  927. // This test also checks if rho^2 will overflow
  928. // when tol^(-1) < sqrt(limits<T>::max()).
  929. if (std::abs(mu2) <= tol*std::abs(mu1))
  930. {
  931. A[j][k] = 0;
  932. return true;
  933. }
  934. const T rho = mu1 / mu2;
  935. const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho));
  936. const T c = T(1) / std::sqrt (T(1) + t*t);
  937. const T s = c * t;
  938. const T tau = s / (T(1) + c);
  939. const T h = t * y;
  940. Z[j] -= h;
  941. Z[k] += h;
  942. A[j][j] -= h;
  943. A[k][k] += h;
  944. A[j][k] = 0;
  945. {
  946. T& offd1 = l1 < j ? A[l1][j] : A[j][l1];
  947. T& offd2 = l1 < k ? A[l1][k] : A[k][l1];
  948. const T nu1 = offd1;
  949. const T nu2 = offd2;
  950. offd1 -= s * (nu2 + tau * nu1);
  951. offd2 += s * (nu1 - tau * nu2);
  952. }
  953. {
  954. T& offd1 = l2 < j ? A[l2][j] : A[j][l2];
  955. T& offd2 = l2 < k ? A[l2][k] : A[k][l2];
  956. const T nu1 = offd1;
  957. const T nu2 = offd2;
  958. offd1 -= s * (nu2 + tau * nu1);
  959. offd2 += s * (nu1 - tau * nu2);
  960. }
  961. jacobiRotateRight<j, k> (V, s, tau);
  962. return true;
  963. }
  964. template <typename TM>
  965. inline
  966. typename TM::BaseType
  967. maxOffDiagSymm (const TM& A)
  968. {
  969. typedef typename TM::BaseType T;
  970. T result = 0;
  971. for (unsigned int i = 0; i < TM::dimensions(); ++i)
  972. for (unsigned int j = i+1; j < TM::dimensions(); ++j)
  973. result = std::max (result, std::abs (A[i][j]));
  974. return result;
  975. }
  976. } // namespace
  977. template <typename T>
  978. void
  979. jacobiEigenSolver (Matrix33<T>& A,
  980. Vec3<T>& S,
  981. Matrix33<T>& V,
  982. const T tol)
  983. {
  984. V.makeIdentity();
  985. for(int i = 0; i < 3; ++i) {
  986. S[i] = A[i][i];
  987. }
  988. const int maxIter = 20; // In case we get really unlucky, prevents infinite loops
  989. const T absTol = tol * maxOffDiagSymm (A); // Tolerance is in terms of the maximum
  990. if (absTol != 0) // _off-diagonal_ entry.
  991. {
  992. int numIter = 0;
  993. do
  994. {
  995. // Z is for accumulating small changes (h) to diagonal entries
  996. // of A for one sweep. Adding h's directly to A might cause
  997. // a cancellation effect when h is relatively very small to
  998. // the corresponding diagonal entry of A and
  999. // this will increase numerical errors
  1000. Vec3<T> Z(0, 0, 0);
  1001. ++numIter;
  1002. bool changed = jacobiRotation<0, 1, 2> (A, V, Z, tol);
  1003. changed = jacobiRotation<0, 2, 1> (A, V, Z, tol) || changed;
  1004. changed = jacobiRotation<1, 2, 0> (A, V, Z, tol) || changed;
  1005. // One sweep passed. Add accumulated changes (Z) to singular values (S)
  1006. // Update diagonal elements of A for better accuracy as well.
  1007. for(int i = 0; i < 3; ++i) {
  1008. A[i][i] = S[i] += Z[i];
  1009. }
  1010. if (!changed)
  1011. break;
  1012. } while (maxOffDiagSymm(A) > absTol && numIter < maxIter);
  1013. }
  1014. }
  1015. template <typename T>
  1016. void
  1017. jacobiEigenSolver (Matrix44<T>& A,
  1018. Vec4<T>& S,
  1019. Matrix44<T>& V,
  1020. const T tol)
  1021. {
  1022. V.makeIdentity();
  1023. for(int i = 0; i < 4; ++i) {
  1024. S[i] = A[i][i];
  1025. }
  1026. const int maxIter = 20; // In case we get really unlucky, prevents infinite loops
  1027. const T absTol = tol * maxOffDiagSymm (A); // Tolerance is in terms of the maximum
  1028. if (absTol != 0) // _off-diagonal_ entry.
  1029. {
  1030. int numIter = 0;
  1031. do
  1032. {
  1033. ++numIter;
  1034. Vec4<T> Z(0, 0, 0, 0);
  1035. bool changed = jacobiRotation<0, 1, 2, 3> (A, V, Z, tol);
  1036. changed = jacobiRotation<0, 2, 1, 3> (A, V, Z, tol) || changed;
  1037. changed = jacobiRotation<0, 3, 1, 2> (A, V, Z, tol) || changed;
  1038. changed = jacobiRotation<1, 2, 0, 3> (A, V, Z, tol) || changed;
  1039. changed = jacobiRotation<1, 3, 0, 2> (A, V, Z, tol) || changed;
  1040. changed = jacobiRotation<2, 3, 0, 1> (A, V, Z, tol) || changed;
  1041. for(int i = 0; i < 4; ++i) {
  1042. A[i][i] = S[i] += Z[i];
  1043. }
  1044. if (!changed)
  1045. break;
  1046. } while (maxOffDiagSymm(A) > absTol && numIter < maxIter);
  1047. }
  1048. }
  1049. template <typename TM, typename TV>
  1050. void
  1051. maxEigenVector (TM& A, TV& V)
  1052. {
  1053. TV S;
  1054. TM MV;
  1055. jacobiEigenSolver(A, S, MV);
  1056. int maxIdx(0);
  1057. for(unsigned int i = 1; i < TV::dimensions(); ++i)
  1058. {
  1059. if(std::abs(S[i]) > std::abs(S[maxIdx]))
  1060. maxIdx = i;
  1061. }
  1062. for(unsigned int i = 0; i < TV::dimensions(); ++i)
  1063. V[i] = MV[i][maxIdx];
  1064. }
  1065. template <typename TM, typename TV>
  1066. void
  1067. minEigenVector (TM& A, TV& V)
  1068. {
  1069. TV S;
  1070. TM MV;
  1071. jacobiEigenSolver(A, S, MV);
  1072. int minIdx(0);
  1073. for(unsigned int i = 1; i < TV::dimensions(); ++i)
  1074. {
  1075. if(std::abs(S[i]) < std::abs(S[minIdx]))
  1076. minIdx = i;
  1077. }
  1078. for(unsigned int i = 0; i < TV::dimensions(); ++i)
  1079. V[i] = MV[i][minIdx];
  1080. }
  1081. template IMATH_EXPORT void jacobiEigenSolver (Matrix33<float>& A,
  1082. Vec3<float>& S,
  1083. Matrix33<float>& V,
  1084. const float tol);
  1085. template IMATH_EXPORT void jacobiEigenSolver (Matrix33<double>& A,
  1086. Vec3<double>& S,
  1087. Matrix33<double>& V,
  1088. const double tol);
  1089. template IMATH_EXPORT void jacobiEigenSolver (Matrix44<float>& A,
  1090. Vec4<float>& S,
  1091. Matrix44<float>& V,
  1092. const float tol);
  1093. template IMATH_EXPORT void jacobiEigenSolver (Matrix44<double>& A,
  1094. Vec4<double>& S,
  1095. Matrix44<double>& V,
  1096. const double tol);
  1097. template IMATH_EXPORT void maxEigenVector (Matrix33<float>& A,
  1098. Vec3<float>& S);
  1099. template IMATH_EXPORT void maxEigenVector (Matrix44<float>& A,
  1100. Vec4<float>& S);
  1101. template IMATH_EXPORT void maxEigenVector (Matrix33<double>& A,
  1102. Vec3<double>& S);
  1103. template IMATH_EXPORT void maxEigenVector (Matrix44<double>& A,
  1104. Vec4<double>& S);
  1105. template IMATH_EXPORT void minEigenVector (Matrix33<float>& A,
  1106. Vec3<float>& S);
  1107. template IMATH_EXPORT void minEigenVector (Matrix44<float>& A,
  1108. Vec4<float>& S);
  1109. template IMATH_EXPORT void minEigenVector (Matrix33<double>& A,
  1110. Vec3<double>& S);
  1111. template IMATH_EXPORT void minEigenVector (Matrix44<double>& A,
  1112. Vec4<double>& S);
  1113. IMATH_INTERNAL_NAMESPACE_SOURCE_EXIT