half.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. ///////////////////////////////////////////////////////////////////////////
  2. //
  3. // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
  4. // Digital Ltd. LLC
  5. //
  6. // All rights reserved.
  7. //
  8. // Redistribution and use in source and binary forms, with or without
  9. // modification, are permitted provided that the following conditions are
  10. // met:
  11. // * Redistributions of source code must retain the above copyright
  12. // notice, this list of conditions and the following disclaimer.
  13. // * Redistributions in binary form must reproduce the above
  14. // copyright notice, this list of conditions and the following disclaimer
  15. // in the documentation and/or other materials provided with the
  16. // distribution.
  17. // * Neither the name of Industrial Light & Magic nor the names of
  18. // its contributors may be used to endorse or promote products derived
  19. // from this software without specific prior written permission.
  20. //
  21. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. //
  33. ///////////////////////////////////////////////////////////////////////////
  34. // Primary authors:
  35. // Florian Kainz <kainz@ilm.com>
  36. // Rod Bogart <rgb@ilm.com>
  37. //---------------------------------------------------------------------------
  38. //
  39. // half -- a 16-bit floating point number class:
  40. //
  41. // Type half can represent positive and negative numbers whose
  42. // magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
  43. // error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
  44. // with an absolute error of 6.0e-8. All integers from -2048 to
  45. // +2048 can be represented exactly.
  46. //
  47. // Type half behaves (almost) like the built-in C++ floating point
  48. // types. In arithmetic expressions, half, float and double can be
  49. // mixed freely. Here are a few examples:
  50. //
  51. // half a (3.5);
  52. // float b (a + sqrt (a));
  53. // a += b;
  54. // b += a;
  55. // b = a + 7;
  56. //
  57. // Conversions from half to float are lossless; all half numbers
  58. // are exactly representable as floats.
  59. //
  60. // Conversions from float to half may not preserve a float's value
  61. // exactly. If a float is not representable as a half, then the
  62. // float value is rounded to the nearest representable half. If a
  63. // float value is exactly in the middle between the two closest
  64. // representable half values, then the float value is rounded to
  65. // the closest half whose least significant bit is zero.
  66. //
  67. // Overflows during float-to-half conversions cause arithmetic
  68. // exceptions. An overflow occurs when the float value to be
  69. // converted is too large to be represented as a half, or if the
  70. // float value is an infinity or a NAN.
  71. //
  72. // The implementation of type half makes the following assumptions
  73. // about the implementation of the built-in C++ types:
  74. //
  75. // float is an IEEE 754 single-precision number
  76. // sizeof (float) == 4
  77. // sizeof (unsigned int) == sizeof (float)
  78. // alignof (unsigned int) == alignof (float)
  79. // sizeof (unsigned short) == 2
  80. //
  81. //---------------------------------------------------------------------------
  82. #ifndef _HALF_H_
  83. #define _HALF_H_
  84. #include "halfExport.h" // for definition of HALF_EXPORT
  85. #include <iostream>
  86. class half
  87. {
  88. public:
  89. //-------------
  90. // Constructors
  91. //-------------
  92. half (); // no initialization
  93. half (float f);
  94. //--------------------
  95. // Conversion to float
  96. //--------------------
  97. operator float () const;
  98. //------------
  99. // Unary minus
  100. //------------
  101. half operator - () const;
  102. //-----------
  103. // Assignment
  104. //-----------
  105. half & operator = (half h);
  106. half & operator = (float f);
  107. half & operator += (half h);
  108. half & operator += (float f);
  109. half & operator -= (half h);
  110. half & operator -= (float f);
  111. half & operator *= (half h);
  112. half & operator *= (float f);
  113. half & operator /= (half h);
  114. half & operator /= (float f);
  115. //---------------------------------------------------------
  116. // Round to n-bit precision (n should be between 0 and 10).
  117. // After rounding, the significand's 10-n least significant
  118. // bits will be zero.
  119. //---------------------------------------------------------
  120. half round (unsigned int n) const;
  121. //--------------------------------------------------------------------
  122. // Classification:
  123. //
  124. // h.isFinite() returns true if h is a normalized number,
  125. // a denormalized number or zero
  126. //
  127. // h.isNormalized() returns true if h is a normalized number
  128. //
  129. // h.isDenormalized() returns true if h is a denormalized number
  130. //
  131. // h.isZero() returns true if h is zero
  132. //
  133. // h.isNan() returns true if h is a NAN
  134. //
  135. // h.isInfinity() returns true if h is a positive
  136. // or a negative infinity
  137. //
  138. // h.isNegative() returns true if the sign bit of h
  139. // is set (negative)
  140. //--------------------------------------------------------------------
  141. bool isFinite () const;
  142. bool isNormalized () const;
  143. bool isDenormalized () const;
  144. bool isZero () const;
  145. bool isNan () const;
  146. bool isInfinity () const;
  147. bool isNegative () const;
  148. //--------------------------------------------
  149. // Special values
  150. //
  151. // posInf() returns +infinity
  152. //
  153. // negInf() returns -infinity
  154. //
  155. // qNan() returns a NAN with the bit
  156. // pattern 0111111111111111
  157. //
  158. // sNan() returns a NAN with the bit
  159. // pattern 0111110111111111
  160. //--------------------------------------------
  161. static half posInf ();
  162. static half negInf ();
  163. static half qNan ();
  164. static half sNan ();
  165. //--------------------------------------
  166. // Access to the internal representation
  167. //--------------------------------------
  168. HALF_EXPORT unsigned short bits () const;
  169. HALF_EXPORT void setBits (unsigned short bits);
  170. public:
  171. union uif
  172. {
  173. unsigned int i;
  174. float f;
  175. };
  176. private:
  177. HALF_EXPORT static short convert (int i);
  178. HALF_EXPORT static float overflow ();
  179. unsigned short _h;
  180. HALF_EXPORT static const uif _toFloat[1 << 16];
  181. HALF_EXPORT static const unsigned short _eLut[1 << 9];
  182. };
  183. //-----------
  184. // Stream I/O
  185. //-----------
  186. HALF_EXPORT std::ostream & operator << (std::ostream &os, half h);
  187. HALF_EXPORT std::istream & operator >> (std::istream &is, half &h);
  188. //----------
  189. // Debugging
  190. //----------
  191. HALF_EXPORT void printBits (std::ostream &os, half h);
  192. HALF_EXPORT void printBits (std::ostream &os, float f);
  193. HALF_EXPORT void printBits (char c[19], half h);
  194. HALF_EXPORT void printBits (char c[35], float f);
  195. //-------------------------------------------------------------------------
  196. // Limits
  197. //
  198. // Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
  199. // constants, but at least one other compiler (gcc 2.96) produces incorrect
  200. // results if they are.
  201. //-------------------------------------------------------------------------
  202. #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
  203. #define HALF_MIN 5.96046448e-08f // Smallest positive half
  204. #define HALF_NRM_MIN 6.10351562e-05f // Smallest positive normalized half
  205. #define HALF_MAX 65504.0f // Largest positive half
  206. #define HALF_EPSILON 0.00097656f // Smallest positive e for which
  207. // half (1.0 + e) != half (1.0)
  208. #else
  209. #define HALF_MIN 5.96046448e-08 // Smallest positive half
  210. #define HALF_NRM_MIN 6.10351562e-05 // Smallest positive normalized half
  211. #define HALF_MAX 65504.0 // Largest positive half
  212. #define HALF_EPSILON 0.00097656 // Smallest positive e for which
  213. // half (1.0 + e) != half (1.0)
  214. #endif
  215. #define HALF_MANT_DIG 11 // Number of digits in mantissa
  216. // (significand + hidden leading 1)
  217. #define HALF_DIG 2 // Number of base 10 digits that
  218. // can be represented without change
  219. #define HALF_DECIMAL_DIG 5 // Number of base-10 digits that are
  220. // necessary to uniquely represent all
  221. // distinct values
  222. #define HALF_RADIX 2 // Base of the exponent
  223. #define HALF_MIN_EXP -13 // Minimum negative integer such that
  224. // HALF_RADIX raised to the power of
  225. // one less than that integer is a
  226. // normalized half
  227. #define HALF_MAX_EXP 16 // Maximum positive integer such that
  228. // HALF_RADIX raised to the power of
  229. // one less than that integer is a
  230. // normalized half
  231. #define HALF_MIN_10_EXP -4 // Minimum positive integer such
  232. // that 10 raised to that power is
  233. // a normalized half
  234. #define HALF_MAX_10_EXP 4 // Maximum positive integer such
  235. // that 10 raised to that power is
  236. // a normalized half
  237. //---------------------------------------------------------------------------
  238. //
  239. // Implementation --
  240. //
  241. // Representation of a float:
  242. //
  243. // We assume that a float, f, is an IEEE 754 single-precision
  244. // floating point number, whose bits are arranged as follows:
  245. //
  246. // 31 (msb)
  247. // |
  248. // | 30 23
  249. // | | |
  250. // | | | 22 0 (lsb)
  251. // | | | | |
  252. // X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
  253. //
  254. // s e m
  255. //
  256. // S is the sign-bit, e is the exponent and m is the significand.
  257. //
  258. // If e is between 1 and 254, f is a normalized number:
  259. //
  260. // s e-127
  261. // f = (-1) * 2 * 1.m
  262. //
  263. // If e is 0, and m is not zero, f is a denormalized number:
  264. //
  265. // s -126
  266. // f = (-1) * 2 * 0.m
  267. //
  268. // If e and m are both zero, f is zero:
  269. //
  270. // f = 0.0
  271. //
  272. // If e is 255, f is an "infinity" or "not a number" (NAN),
  273. // depending on whether m is zero or not.
  274. //
  275. // Examples:
  276. //
  277. // 0 00000000 00000000000000000000000 = 0.0
  278. // 0 01111110 00000000000000000000000 = 0.5
  279. // 0 01111111 00000000000000000000000 = 1.0
  280. // 0 10000000 00000000000000000000000 = 2.0
  281. // 0 10000000 10000000000000000000000 = 3.0
  282. // 1 10000101 11110000010000000000000 = -124.0625
  283. // 0 11111111 00000000000000000000000 = +infinity
  284. // 1 11111111 00000000000000000000000 = -infinity
  285. // 0 11111111 10000000000000000000000 = NAN
  286. // 1 11111111 11111111111111111111111 = NAN
  287. //
  288. // Representation of a half:
  289. //
  290. // Here is the bit-layout for a half number, h:
  291. //
  292. // 15 (msb)
  293. // |
  294. // | 14 10
  295. // | | |
  296. // | | | 9 0 (lsb)
  297. // | | | | |
  298. // X XXXXX XXXXXXXXXX
  299. //
  300. // s e m
  301. //
  302. // S is the sign-bit, e is the exponent and m is the significand.
  303. //
  304. // If e is between 1 and 30, h is a normalized number:
  305. //
  306. // s e-15
  307. // h = (-1) * 2 * 1.m
  308. //
  309. // If e is 0, and m is not zero, h is a denormalized number:
  310. //
  311. // S -14
  312. // h = (-1) * 2 * 0.m
  313. //
  314. // If e and m are both zero, h is zero:
  315. //
  316. // h = 0.0
  317. //
  318. // If e is 31, h is an "infinity" or "not a number" (NAN),
  319. // depending on whether m is zero or not.
  320. //
  321. // Examples:
  322. //
  323. // 0 00000 0000000000 = 0.0
  324. // 0 01110 0000000000 = 0.5
  325. // 0 01111 0000000000 = 1.0
  326. // 0 10000 0000000000 = 2.0
  327. // 0 10000 1000000000 = 3.0
  328. // 1 10101 1111000001 = -124.0625
  329. // 0 11111 0000000000 = +infinity
  330. // 1 11111 0000000000 = -infinity
  331. // 0 11111 1000000000 = NAN
  332. // 1 11111 1111111111 = NAN
  333. //
  334. // Conversion:
  335. //
  336. // Converting from a float to a half requires some non-trivial bit
  337. // manipulations. In some cases, this makes conversion relatively
  338. // slow, but the most common case is accelerated via table lookups.
  339. //
  340. // Converting back from a half to a float is easier because we don't
  341. // have to do any rounding. In addition, there are only 65536
  342. // different half numbers; we can convert each of those numbers once
  343. // and store the results in a table. Later, all conversions can be
  344. // done using only simple table lookups.
  345. //
  346. //---------------------------------------------------------------------------
  347. //--------------------
  348. // Simple constructors
  349. //--------------------
  350. inline
  351. half::half ()
  352. {
  353. // no initialization
  354. }
  355. //----------------------------
  356. // Half-from-float constructor
  357. //----------------------------
  358. inline
  359. half::half (float f)
  360. {
  361. uif x;
  362. x.f = f;
  363. if (f == 0)
  364. {
  365. //
  366. // Common special case - zero.
  367. // Preserve the zero's sign bit.
  368. //
  369. _h = (x.i >> 16);
  370. }
  371. else
  372. {
  373. //
  374. // We extract the combined sign and exponent, e, from our
  375. // floating-point number, f. Then we convert e to the sign
  376. // and exponent of the half number via a table lookup.
  377. //
  378. // For the most common case, where a normalized half is produced,
  379. // the table lookup returns a non-zero value; in this case, all
  380. // we have to do is round f's significand to 10 bits and combine
  381. // the result with e.
  382. //
  383. // For all other cases (overflow, zeroes, denormalized numbers
  384. // resulting from underflow, infinities and NANs), the table
  385. // lookup returns zero, and we call a longer, non-inline function
  386. // to do the float-to-half conversion.
  387. //
  388. int e = (x.i >> 23) & 0x000001ff;
  389. e = _eLut[e];
  390. if (e)
  391. {
  392. //
  393. // Simple case - round the significand, m, to 10
  394. // bits and combine it with the sign and exponent.
  395. //
  396. int m = x.i & 0x007fffff;
  397. _h = (unsigned short)(e + ((m + 0x00000fff + ((m >> 13) & 1)) >> 13));
  398. }
  399. else
  400. {
  401. //
  402. // Difficult case - call a function.
  403. //
  404. _h = convert (x.i);
  405. }
  406. }
  407. }
  408. //------------------------------------------
  409. // Half-to-float conversion via table lookup
  410. //------------------------------------------
  411. inline
  412. half::operator float () const
  413. {
  414. return _toFloat[_h].f;
  415. }
  416. //-------------------------
  417. // Round to n-bit precision
  418. //-------------------------
  419. inline half
  420. half::round (unsigned int n) const
  421. {
  422. //
  423. // Parameter check.
  424. //
  425. if (n >= 10)
  426. return *this;
  427. //
  428. // Disassemble h into the sign, s,
  429. // and the combined exponent and significand, e.
  430. //
  431. unsigned short s = _h & 0x8000;
  432. unsigned short e = _h & 0x7fff;
  433. //
  434. // Round the exponent and significand to the nearest value
  435. // where ones occur only in the (10-n) most significant bits.
  436. // Note that the exponent adjusts automatically if rounding
  437. // up causes the significand to overflow.
  438. //
  439. e >>= 9 - n;
  440. e += e & 1;
  441. e <<= 9 - n;
  442. //
  443. // Check for exponent overflow.
  444. //
  445. if (e >= 0x7c00)
  446. {
  447. //
  448. // Overflow occurred -- truncate instead of rounding.
  449. //
  450. e = _h;
  451. e >>= 10 - n;
  452. e <<= 10 - n;
  453. }
  454. //
  455. // Put the original sign bit back.
  456. //
  457. half h;
  458. h._h = s | e;
  459. return h;
  460. }
  461. //-----------------------
  462. // Other inline functions
  463. //-----------------------
  464. inline half
  465. half::operator - () const
  466. {
  467. half h;
  468. h._h = _h ^ 0x8000;
  469. return h;
  470. }
  471. inline half &
  472. half::operator = (half h)
  473. {
  474. _h = h._h;
  475. return *this;
  476. }
  477. inline half &
  478. half::operator = (float f)
  479. {
  480. *this = half (f);
  481. return *this;
  482. }
  483. inline half &
  484. half::operator += (half h)
  485. {
  486. *this = half (float (*this) + float (h));
  487. return *this;
  488. }
  489. inline half &
  490. half::operator += (float f)
  491. {
  492. *this = half (float (*this) + f);
  493. return *this;
  494. }
  495. inline half &
  496. half::operator -= (half h)
  497. {
  498. *this = half (float (*this) - float (h));
  499. return *this;
  500. }
  501. inline half &
  502. half::operator -= (float f)
  503. {
  504. *this = half (float (*this) - f);
  505. return *this;
  506. }
  507. inline half &
  508. half::operator *= (half h)
  509. {
  510. *this = half (float (*this) * float (h));
  511. return *this;
  512. }
  513. inline half &
  514. half::operator *= (float f)
  515. {
  516. *this = half (float (*this) * f);
  517. return *this;
  518. }
  519. inline half &
  520. half::operator /= (half h)
  521. {
  522. *this = half (float (*this) / float (h));
  523. return *this;
  524. }
  525. inline half &
  526. half::operator /= (float f)
  527. {
  528. *this = half (float (*this) / f);
  529. return *this;
  530. }
  531. inline bool
  532. half::isFinite () const
  533. {
  534. unsigned short e = (_h >> 10) & 0x001f;
  535. return e < 31;
  536. }
  537. inline bool
  538. half::isNormalized () const
  539. {
  540. unsigned short e = (_h >> 10) & 0x001f;
  541. return e > 0 && e < 31;
  542. }
  543. inline bool
  544. half::isDenormalized () const
  545. {
  546. unsigned short e = (_h >> 10) & 0x001f;
  547. unsigned short m = _h & 0x3ff;
  548. return e == 0 && m != 0;
  549. }
  550. inline bool
  551. half::isZero () const
  552. {
  553. return (_h & 0x7fff) == 0;
  554. }
  555. inline bool
  556. half::isNan () const
  557. {
  558. unsigned short e = (_h >> 10) & 0x001f;
  559. unsigned short m = _h & 0x3ff;
  560. return e == 31 && m != 0;
  561. }
  562. inline bool
  563. half::isInfinity () const
  564. {
  565. unsigned short e = (_h >> 10) & 0x001f;
  566. unsigned short m = _h & 0x3ff;
  567. return e == 31 && m == 0;
  568. }
  569. inline bool
  570. half::isNegative () const
  571. {
  572. return (_h & 0x8000) != 0;
  573. }
  574. inline half
  575. half::posInf ()
  576. {
  577. half h;
  578. h._h = 0x7c00;
  579. return h;
  580. }
  581. inline half
  582. half::negInf ()
  583. {
  584. half h;
  585. h._h = 0xfc00;
  586. return h;
  587. }
  588. inline half
  589. half::qNan ()
  590. {
  591. half h;
  592. h._h = 0x7fff;
  593. return h;
  594. }
  595. inline half
  596. half::sNan ()
  597. {
  598. half h;
  599. h._h = 0x7dff;
  600. return h;
  601. }
  602. inline unsigned short
  603. half::bits () const
  604. {
  605. return _h;
  606. }
  607. inline void
  608. half::setBits (unsigned short bits)
  609. {
  610. _h = bits;
  611. }
  612. #endif