memory.hpp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
  2. // Copyright 2008-2016 National ICT Australia (NICTA)
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // ------------------------------------------------------------------------
  15. //! \addtogroup memory
  16. //! @{
  17. class memory
  18. {
  19. public:
  20. inline arma_deprecated static uword enlarge_to_mult_of_chunksize(const uword n_elem);
  21. template<typename eT> inline arma_malloc static eT* acquire(const uword n_elem);
  22. template<typename eT> inline arma_deprecated static eT* acquire_chunked(const uword n_elem);
  23. template<typename eT> arma_inline static void release(eT* mem);
  24. template<typename eT> arma_inline static bool is_aligned(const eT* mem);
  25. template<typename eT> arma_inline static void mark_as_aligned( eT*& mem);
  26. template<typename eT> arma_inline static void mark_as_aligned(const eT*& mem);
  27. };
  28. //! no longer used; this function will be removed
  29. inline
  30. arma_deprecated
  31. uword
  32. memory::enlarge_to_mult_of_chunksize(const uword n_elem)
  33. {
  34. return n_elem;
  35. }
  36. template<typename eT>
  37. inline
  38. arma_malloc
  39. eT*
  40. memory::acquire(const uword n_elem)
  41. {
  42. if(n_elem == 0) { return NULL; }
  43. arma_debug_check
  44. (
  45. ( size_t(n_elem) > (std::numeric_limits<size_t>::max() / sizeof(eT)) ),
  46. "arma::memory::acquire(): requested size is too large"
  47. );
  48. eT* out_memptr;
  49. #if defined(ARMA_USE_TBB_ALLOC)
  50. {
  51. out_memptr = (eT *) scalable_malloc(sizeof(eT)*n_elem);
  52. }
  53. #elif defined(ARMA_USE_MKL_ALLOC)
  54. {
  55. out_memptr = (eT *) mkl_malloc( sizeof(eT)*n_elem, 32 );
  56. }
  57. #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
  58. {
  59. eT* memptr = NULL;
  60. const size_t n_bytes = sizeof(eT)*size_t(n_elem);
  61. const size_t alignment = (n_bytes >= size_t(1024)) ? size_t(32) : size_t(16);
  62. // TODO: investigate apparent memory leak when using alignment >= 64 (as shown on Fedora 28, glibc 2.27)
  63. int status = posix_memalign((void **)&memptr, ( (alignment >= sizeof(void*)) ? alignment : sizeof(void*) ), n_bytes);
  64. out_memptr = (status == 0) ? memptr : NULL;
  65. }
  66. #elif defined(_MSC_VER)
  67. {
  68. //out_memptr = (eT *) malloc(sizeof(eT)*n_elem);
  69. //out_memptr = (eT *) _aligned_malloc( sizeof(eT)*n_elem, 16 ); // lives in malloc.h
  70. const size_t n_bytes = sizeof(eT)*size_t(n_elem);
  71. const size_t alignment = (n_bytes >= size_t(1024)) ? size_t(32) : size_t(16);
  72. out_memptr = (eT *) _aligned_malloc( n_bytes, alignment );
  73. }
  74. #else
  75. {
  76. //return ( new(std::nothrow) eT[n_elem] );
  77. out_memptr = (eT *) malloc(sizeof(eT)*n_elem);
  78. }
  79. #endif
  80. // TODO: for mingw, use __mingw_aligned_malloc
  81. arma_check_bad_alloc( (out_memptr == NULL), "arma::memory::acquire(): out of memory" );
  82. return out_memptr;
  83. }
  84. //! no longer used; this function will be removed; replace with call to memory::acquire()
  85. template<typename eT>
  86. inline
  87. arma_deprecated
  88. eT*
  89. memory::acquire_chunked(const uword n_elem)
  90. {
  91. return memory::acquire<eT>(n_elem);
  92. }
  93. template<typename eT>
  94. arma_inline
  95. void
  96. memory::release(eT* mem)
  97. {
  98. if(mem == NULL) { return; }
  99. #if defined(ARMA_USE_TBB_ALLOC)
  100. {
  101. scalable_free( (void *)(mem) );
  102. }
  103. #elif defined(ARMA_USE_MKL_ALLOC)
  104. {
  105. mkl_free( (void *)(mem) );
  106. }
  107. #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
  108. {
  109. free( (void *)(mem) );
  110. }
  111. #elif defined(_MSC_VER)
  112. {
  113. //free( (void *)(mem) );
  114. _aligned_free( (void *)(mem) );
  115. }
  116. #else
  117. {
  118. //delete [] mem;
  119. free( (void *)(mem) );
  120. }
  121. #endif
  122. // TODO: for mingw, use __mingw_aligned_free
  123. }
  124. template<typename eT>
  125. arma_inline
  126. bool
  127. memory::is_aligned(const eT* mem)
  128. {
  129. #if (defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) || defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)) && !defined(ARMA_DONT_CHECK_ALIGNMENT)
  130. {
  131. return (sizeof(std::size_t) >= sizeof(eT*)) ? ((std::size_t(mem) & 0x0F) == 0) : false;
  132. }
  133. #else
  134. {
  135. arma_ignore(mem);
  136. return false;
  137. }
  138. #endif
  139. }
  140. template<typename eT>
  141. arma_inline
  142. void
  143. memory::mark_as_aligned(eT*& mem)
  144. {
  145. #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
  146. {
  147. __assume_aligned(mem, 16);
  148. }
  149. #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
  150. {
  151. mem = (eT*)__builtin_assume_aligned(mem, 16);
  152. }
  153. #else
  154. {
  155. arma_ignore(mem);
  156. }
  157. #endif
  158. // TODO: MSVC? __assume( (mem & 0x0F) == 0 );
  159. //
  160. // http://comments.gmane.org/gmane.comp.gcc.patches/239430
  161. // GCC __builtin_assume_aligned is similar to ICC's __assume_aligned,
  162. // so for lvalue first argument ICC's __assume_aligned can be emulated using
  163. // #define __assume_aligned(lvalueptr, align) lvalueptr = __builtin_assume_aligned (lvalueptr, align)
  164. //
  165. // http://www.inf.ethz.ch/personal/markusp/teaching/263-2300-ETH-spring11/slides/class19.pdf
  166. // http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/cpp/lin/index.htm
  167. // http://d3f8ykwhia686p.cloudfront.net/1live/intel/CompilerAutovectorizationGuide.pdf
  168. }
  169. template<typename eT>
  170. arma_inline
  171. void
  172. memory::mark_as_aligned(const eT*& mem)
  173. {
  174. #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
  175. {
  176. __assume_aligned(mem, 16);
  177. }
  178. #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
  179. {
  180. mem = (const eT*)__builtin_assume_aligned(mem, 16);
  181. }
  182. #else
  183. {
  184. arma_ignore(mem);
  185. }
  186. #endif
  187. }
  188. //! @}