half.cpp 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. ///////////////////////////////////////////////////////////////////////////
  2. //
  3. // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
  4. // Digital Ltd. LLC
  5. //
  6. // All rights reserved.
  7. //
  8. // Redistribution and use in source and binary forms, with or without
  9. // modification, are permitted provided that the following conditions are
  10. // met:
  11. // * Redistributions of source code must retain the above copyright
  12. // notice, this list of conditions and the following disclaimer.
  13. // * Redistributions in binary form must reproduce the above
  14. // copyright notice, this list of conditions and the following disclaimer
  15. // in the documentation and/or other materials provided with the
  16. // distribution.
  17. // * Neither the name of Industrial Light & Magic nor the names of
  18. // its contributors may be used to endorse or promote products derived
  19. // from this software without specific prior written permission.
  20. //
  21. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. //
  33. ///////////////////////////////////////////////////////////////////////////
  34. // Primary authors:
  35. // Florian Kainz <kainz@ilm.com>
  36. // Rod Bogart <rgb@ilm.com>
  37. //---------------------------------------------------------------------------
  38. //
  39. // class half --
  40. // implementation of non-inline members
  41. //
  42. //---------------------------------------------------------------------------
  43. #include <assert.h>
  44. #include "half.h"
  45. using namespace std;
  46. //-------------------------------------------------------------
  47. // Lookup tables for half-to-float and float-to-half conversion
  48. //-------------------------------------------------------------
  49. HALF_EXPORT const half::uif half::_toFloat[1 << 16] =
  50. #include "toFloat.h"
  51. HALF_EXPORT const unsigned short half::_eLut[1 << 9] =
  52. #include "eLut.h"
  53. //-----------------------------------------------
  54. // Overflow handler for float-to-half conversion;
  55. // generates a hardware floating-point overflow,
  56. // which may be trapped by the operating system.
  57. //-----------------------------------------------
  58. HALF_EXPORT float
  59. half::overflow ()
  60. {
  61. volatile float f = 1e10;
  62. for (int i = 0; i < 10; i++)
  63. f *= f; // this will overflow before
  64. // the for­loop terminates
  65. return f;
  66. }
  67. //-----------------------------------------------------
  68. // Float-to-half conversion -- general case, including
  69. // zeroes, denormalized numbers and exponent overflows.
  70. //-----------------------------------------------------
  71. HALF_EXPORT short
  72. half::convert (int i)
  73. {
  74. //
  75. // Our floating point number, f, is represented by the bit
  76. // pattern in integer i. Disassemble that bit pattern into
  77. // the sign, s, the exponent, e, and the significand, m.
  78. // Shift s into the position where it will go in in the
  79. // resulting half number.
  80. // Adjust e, accounting for the different exponent bias
  81. // of float and half (127 versus 15).
  82. //
  83. int s = (i >> 16) & 0x00008000;
  84. int e = ((i >> 23) & 0x000000ff) - (127 - 15);
  85. int m = i & 0x007fffff;
  86. //
  87. // Now reassemble s, e and m into a half:
  88. //
  89. if (e <= 0)
  90. {
  91. if (e < -10)
  92. {
  93. //
  94. // E is less than -10. The absolute value of f is
  95. // less than HALF_MIN (f may be a small normalized
  96. // float, a denormalized float or a zero).
  97. //
  98. // We convert f to a half zero with the same sign as f.
  99. //
  100. return s;
  101. }
  102. //
  103. // E is between -10 and 0. F is a normalized float
  104. // whose magnitude is less than HALF_NRM_MIN.
  105. //
  106. // We convert f to a denormalized half.
  107. //
  108. //
  109. // Add an explicit leading 1 to the significand.
  110. //
  111. m = m | 0x00800000;
  112. //
  113. // Round to m to the nearest (10+e)-bit value (with e between
  114. // -10 and 0); in case of a tie, round to the nearest even value.
  115. //
  116. // Rounding may cause the significand to overflow and make
  117. // our number normalized. Because of the way a half's bits
  118. // are laid out, we don't have to treat this case separately;
  119. // the code below will handle it correctly.
  120. //
  121. int t = 14 - e;
  122. int a = (1 << (t - 1)) - 1;
  123. int b = (m >> t) & 1;
  124. m = (m + a + b) >> t;
  125. //
  126. // Assemble the half from s, e (zero) and m.
  127. //
  128. return s | m;
  129. }
  130. else if (e == 0xff - (127 - 15))
  131. {
  132. if (m == 0)
  133. {
  134. //
  135. // F is an infinity; convert f to a half
  136. // infinity with the same sign as f.
  137. //
  138. return s | 0x7c00;
  139. }
  140. else
  141. {
  142. //
  143. // F is a NAN; we produce a half NAN that preserves
  144. // the sign bit and the 10 leftmost bits of the
  145. // significand of f, with one exception: If the 10
  146. // leftmost bits are all zero, the NAN would turn
  147. // into an infinity, so we have to set at least one
  148. // bit in the significand.
  149. //
  150. m >>= 13;
  151. return s | 0x7c00 | m | (m == 0);
  152. }
  153. }
  154. else
  155. {
  156. //
  157. // E is greater than zero. F is a normalized float.
  158. // We try to convert f to a normalized half.
  159. //
  160. //
  161. // Round to m to the nearest 10-bit value. In case of
  162. // a tie, round to the nearest even value.
  163. //
  164. m = m + 0x00000fff + ((m >> 13) & 1);
  165. if (m & 0x00800000)
  166. {
  167. m = 0; // overflow in significand,
  168. e += 1; // adjust exponent
  169. }
  170. //
  171. // Handle exponent overflow
  172. //
  173. if (e > 30)
  174. {
  175. overflow (); // Cause a hardware floating point overflow;
  176. return s | 0x7c00; // if this returns, the half becomes an
  177. } // infinity with the same sign as f.
  178. //
  179. // Assemble the half from s, e and m.
  180. //
  181. return s | (e << 10) | (m >> 13);
  182. }
  183. }
  184. //---------------------
  185. // Stream I/O operators
  186. //---------------------
  187. HALF_EXPORT ostream &
  188. operator << (ostream &os, half h)
  189. {
  190. os << float (h);
  191. return os;
  192. }
  193. HALF_EXPORT istream &
  194. operator >> (istream &is, half &h)
  195. {
  196. float f;
  197. is >> f;
  198. h = half (f);
  199. return is;
  200. }
  201. //---------------------------------------
  202. // Functions to print the bit-layout of
  203. // floats and halfs, mostly for debugging
  204. //---------------------------------------
  205. HALF_EXPORT void
  206. printBits (ostream &os, half h)
  207. {
  208. unsigned short b = h.bits();
  209. for (int i = 15; i >= 0; i--)
  210. {
  211. os << (((b >> i) & 1)? '1': '0');
  212. if (i == 15 || i == 10)
  213. os << ' ';
  214. }
  215. }
  216. HALF_EXPORT void
  217. printBits (ostream &os, float f)
  218. {
  219. half::uif x;
  220. x.f = f;
  221. for (int i = 31; i >= 0; i--)
  222. {
  223. os << (((x.i >> i) & 1)? '1': '0');
  224. if (i == 31 || i == 23)
  225. os << ' ';
  226. }
  227. }
  228. HALF_EXPORT void
  229. printBits (char c[19], half h)
  230. {
  231. unsigned short b = h.bits();
  232. for (int i = 15, j = 0; i >= 0; i--, j++)
  233. {
  234. c[j] = (((b >> i) & 1)? '1': '0');
  235. if (i == 15 || i == 10)
  236. c[++j] = ' ';
  237. }
  238. c[18] = 0;
  239. }
  240. HALF_EXPORT void
  241. printBits (char c[35], float f)
  242. {
  243. half::uif x;
  244. x.f = f;
  245. for (int i = 31, j = 0; i >= 0; i--, j++)
  246. {
  247. c[j] = (((x.i >> i) & 1)? '1': '0');
  248. if (i == 31 || i == 23)
  249. c[++j] = ' ';
  250. }
  251. c[34] = 0;
  252. }