AssignEvaluator.h 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
  5. // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
  6. // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
  7. //
  8. // This Source Code Form is subject to the terms of the Mozilla
  9. // Public License v. 2.0. If a copy of the MPL was not distributed
  10. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  11. #ifndef EIGEN_ASSIGN_EVALUATOR_H
  12. #define EIGEN_ASSIGN_EVALUATOR_H
  13. namespace Eigen {
  14. // This implementation is based on Assign.h
  15. namespace internal {
  16. /***************************************************************************
  17. * Part 1 : the logic deciding a strategy for traversal and unrolling *
  18. ***************************************************************************/
  19. // copy_using_evaluator_traits is based on assign_traits
  20. template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
  21. struct copy_using_evaluator_traits
  22. {
  23. typedef typename DstEvaluator::XprType Dst;
  24. typedef typename Dst::Scalar DstScalar;
  25. enum {
  26. DstFlags = DstEvaluator::Flags,
  27. SrcFlags = SrcEvaluator::Flags
  28. };
  29. public:
  30. enum {
  31. DstAlignment = DstEvaluator::Alignment,
  32. SrcAlignment = SrcEvaluator::Alignment,
  33. DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
  34. JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
  35. };
  36. private:
  37. enum {
  38. InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
  39. : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
  40. : int(Dst::RowsAtCompileTime),
  41. InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
  42. : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
  43. : int(Dst::MaxRowsAtCompileTime),
  44. RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),
  45. RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
  46. OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
  47. MaxSizeAtCompileTime = Dst::SizeAtCompileTime
  48. };
  49. // TODO distinguish between linear traversal and inner-traversals
  50. typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType;
  51. typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;
  52. enum {
  53. LinearPacketSize = unpacket_traits<LinearPacketType>::size,
  54. InnerPacketSize = unpacket_traits<InnerPacketType>::size
  55. };
  56. public:
  57. enum {
  58. LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
  59. InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
  60. };
  61. private:
  62. enum {
  63. DstIsRowMajor = DstFlags&RowMajorBit,
  64. SrcIsRowMajor = SrcFlags&RowMajorBit,
  65. StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
  66. MightVectorize = bool(StorageOrdersAgree)
  67. && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
  68. && bool(functor_traits<AssignFunc>::PacketAccess),
  69. MayInnerVectorize = MightVectorize
  70. && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
  71. && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
  72. && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
  73. MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
  74. MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
  75. && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
  76. /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
  77. so it's only good for large enough sizes. */
  78. MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
  79. && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
  80. /* slice vectorization can be slow, so we only want it if the slices are big, which is
  81. indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
  82. in a fixed-size matrix
  83. However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
  84. };
  85. public:
  86. enum {
  87. Traversal = int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.
  88. : (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)
  89. : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
  90. : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
  91. : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
  92. : int(MayLinearize) ? int(LinearTraversal)
  93. : int(DefaultTraversal),
  94. Vectorized = int(Traversal) == InnerVectorizedTraversal
  95. || int(Traversal) == LinearVectorizedTraversal
  96. || int(Traversal) == SliceVectorizedTraversal
  97. };
  98. typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
  99. private:
  100. enum {
  101. ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
  102. : Vectorized ? InnerPacketSize
  103. : 1,
  104. UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
  105. MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
  106. && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
  107. MayUnrollInner = int(InnerSize) != Dynamic
  108. && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
  109. };
  110. public:
  111. enum {
  112. Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
  113. ? (
  114. int(MayUnrollCompletely) ? int(CompleteUnrolling)
  115. : int(MayUnrollInner) ? int(InnerUnrolling)
  116. : int(NoUnrolling)
  117. )
  118. : int(Traversal) == int(LinearVectorizedTraversal)
  119. ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
  120. ? int(CompleteUnrolling)
  121. : int(NoUnrolling) )
  122. : int(Traversal) == int(LinearTraversal)
  123. ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
  124. : int(NoUnrolling) )
  125. #if EIGEN_UNALIGNED_VECTORIZE
  126. : int(Traversal) == int(SliceVectorizedTraversal)
  127. ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
  128. : int(NoUnrolling) )
  129. #endif
  130. : int(NoUnrolling)
  131. };
  132. #ifdef EIGEN_DEBUG_ASSIGN
  133. static void debug()
  134. {
  135. std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
  136. std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
  137. std::cerr.setf(std::ios::hex, std::ios::basefield);
  138. std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
  139. std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
  140. std::cerr.unsetf(std::ios::hex);
  141. EIGEN_DEBUG_VAR(DstAlignment)
  142. EIGEN_DEBUG_VAR(SrcAlignment)
  143. EIGEN_DEBUG_VAR(LinearRequiredAlignment)
  144. EIGEN_DEBUG_VAR(InnerRequiredAlignment)
  145. EIGEN_DEBUG_VAR(JointAlignment)
  146. EIGEN_DEBUG_VAR(InnerSize)
  147. EIGEN_DEBUG_VAR(InnerMaxSize)
  148. EIGEN_DEBUG_VAR(LinearPacketSize)
  149. EIGEN_DEBUG_VAR(InnerPacketSize)
  150. EIGEN_DEBUG_VAR(ActualPacketSize)
  151. EIGEN_DEBUG_VAR(StorageOrdersAgree)
  152. EIGEN_DEBUG_VAR(MightVectorize)
  153. EIGEN_DEBUG_VAR(MayLinearize)
  154. EIGEN_DEBUG_VAR(MayInnerVectorize)
  155. EIGEN_DEBUG_VAR(MayLinearVectorize)
  156. EIGEN_DEBUG_VAR(MaySliceVectorize)
  157. std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
  158. EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
  159. EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
  160. EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
  161. EIGEN_DEBUG_VAR(UnrollingLimit)
  162. EIGEN_DEBUG_VAR(MayUnrollCompletely)
  163. EIGEN_DEBUG_VAR(MayUnrollInner)
  164. std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
  165. std::cerr << std::endl;
  166. }
  167. #endif
  168. };
  169. /***************************************************************************
  170. * Part 2 : meta-unrollers
  171. ***************************************************************************/
  172. /************************
  173. *** Default traversal ***
  174. ************************/
  175. template<typename Kernel, int Index, int Stop>
  176. struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
  177. {
  178. // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
  179. typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
  180. typedef typename DstEvaluatorType::XprType DstXprType;
  181. enum {
  182. outer = Index / DstXprType::InnerSizeAtCompileTime,
  183. inner = Index % DstXprType::InnerSizeAtCompileTime
  184. };
  185. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  186. {
  187. kernel.assignCoeffByOuterInner(outer, inner);
  188. copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
  189. }
  190. };
  191. template<typename Kernel, int Stop>
  192. struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
  193. {
  194. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
  195. };
  196. template<typename Kernel, int Index_, int Stop>
  197. struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
  198. {
  199. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
  200. {
  201. kernel.assignCoeffByOuterInner(outer, Index_);
  202. copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
  203. }
  204. };
  205. template<typename Kernel, int Stop>
  206. struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
  207. {
  208. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
  209. };
  210. /***********************
  211. *** Linear traversal ***
  212. ***********************/
  213. template<typename Kernel, int Index, int Stop>
  214. struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
  215. {
  216. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
  217. {
  218. kernel.assignCoeff(Index);
  219. copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
  220. }
  221. };
  222. template<typename Kernel, int Stop>
  223. struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
  224. {
  225. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
  226. };
  227. /**************************
  228. *** Inner vectorization ***
  229. **************************/
  230. template<typename Kernel, int Index, int Stop>
  231. struct copy_using_evaluator_innervec_CompleteUnrolling
  232. {
  233. // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
  234. typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
  235. typedef typename DstEvaluatorType::XprType DstXprType;
  236. typedef typename Kernel::PacketType PacketType;
  237. enum {
  238. outer = Index / DstXprType::InnerSizeAtCompileTime,
  239. inner = Index % DstXprType::InnerSizeAtCompileTime,
  240. SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
  241. DstAlignment = Kernel::AssignmentTraits::DstAlignment
  242. };
  243. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  244. {
  245. kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
  246. enum { NextIndex = Index + unpacket_traits<PacketType>::size };
  247. copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
  248. }
  249. };
  250. template<typename Kernel, int Stop>
  251. struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
  252. {
  253. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
  254. };
  255. template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
  256. struct copy_using_evaluator_innervec_InnerUnrolling
  257. {
  258. typedef typename Kernel::PacketType PacketType;
  259. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
  260. {
  261. kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
  262. enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
  263. copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
  264. }
  265. };
  266. template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
  267. struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
  268. {
  269. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
  270. };
  271. /***************************************************************************
  272. * Part 3 : implementation of all cases
  273. ***************************************************************************/
  274. // dense_assignment_loop is based on assign_impl
  275. template<typename Kernel,
  276. int Traversal = Kernel::AssignmentTraits::Traversal,
  277. int Unrolling = Kernel::AssignmentTraits::Unrolling>
  278. struct dense_assignment_loop;
  279. /************************
  280. ***** Special Cases *****
  281. ************************/
  282. // Zero-sized assignment is a no-op.
  283. template<typename Kernel, int Unrolling>
  284. struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
  285. {
  286. EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
  287. {
  288. typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
  289. EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
  290. EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
  291. }
  292. };
  293. /************************
  294. *** Default traversal ***
  295. ************************/
  296. template<typename Kernel>
  297. struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
  298. {
  299. EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
  300. {
  301. for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
  302. for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
  303. kernel.assignCoeffByOuterInner(outer, inner);
  304. }
  305. }
  306. }
  307. };
  308. template<typename Kernel>
  309. struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
  310. {
  311. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  312. {
  313. typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
  314. copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
  315. }
  316. };
  317. template<typename Kernel>
  318. struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
  319. {
  320. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  321. {
  322. typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
  323. const Index outerSize = kernel.outerSize();
  324. for(Index outer = 0; outer < outerSize; ++outer)
  325. copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
  326. }
  327. };
  328. /***************************
  329. *** Linear vectorization ***
  330. ***************************/
  331. // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
  332. // of the non vectorizable beginning and ending parts
  333. template <bool IsAligned = false>
  334. struct unaligned_dense_assignment_loop
  335. {
  336. // if IsAligned = true, then do nothing
  337. template <typename Kernel>
  338. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
  339. };
  340. template <>
  341. struct unaligned_dense_assignment_loop<false>
  342. {
  343. // MSVC must not inline this functions. If it does, it fails to optimize the
  344. // packet access path.
  345. // FIXME check which version exhibits this issue
  346. #if EIGEN_COMP_MSVC
  347. template <typename Kernel>
  348. static EIGEN_DONT_INLINE void run(Kernel &kernel,
  349. Index start,
  350. Index end)
  351. #else
  352. template <typename Kernel>
  353. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
  354. Index start,
  355. Index end)
  356. #endif
  357. {
  358. for (Index index = start; index < end; ++index)
  359. kernel.assignCoeff(index);
  360. }
  361. };
  362. template<typename Kernel>
  363. struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
  364. {
  365. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  366. {
  367. const Index size = kernel.size();
  368. typedef typename Kernel::Scalar Scalar;
  369. typedef typename Kernel::PacketType PacketType;
  370. enum {
  371. requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
  372. packetSize = unpacket_traits<PacketType>::size,
  373. dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
  374. dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
  375. : int(Kernel::AssignmentTraits::DstAlignment),
  376. srcAlignment = Kernel::AssignmentTraits::JointAlignment
  377. };
  378. const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
  379. const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
  380. unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
  381. for(Index index = alignedStart; index < alignedEnd; index += packetSize)
  382. kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
  383. unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
  384. }
  385. };
  386. template<typename Kernel>
  387. struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
  388. {
  389. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  390. {
  391. typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
  392. typedef typename Kernel::PacketType PacketType;
  393. enum { size = DstXprType::SizeAtCompileTime,
  394. packetSize =unpacket_traits<PacketType>::size,
  395. alignedSize = (int(size)/packetSize)*packetSize };
  396. copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
  397. copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
  398. }
  399. };
  400. /**************************
  401. *** Inner vectorization ***
  402. **************************/
  403. template<typename Kernel>
  404. struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
  405. {
  406. typedef typename Kernel::PacketType PacketType;
  407. enum {
  408. SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
  409. DstAlignment = Kernel::AssignmentTraits::DstAlignment
  410. };
  411. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  412. {
  413. const Index innerSize = kernel.innerSize();
  414. const Index outerSize = kernel.outerSize();
  415. const Index packetSize = unpacket_traits<PacketType>::size;
  416. for(Index outer = 0; outer < outerSize; ++outer)
  417. for(Index inner = 0; inner < innerSize; inner+=packetSize)
  418. kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
  419. }
  420. };
  421. template<typename Kernel>
  422. struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
  423. {
  424. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  425. {
  426. typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
  427. copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
  428. }
  429. };
  430. template<typename Kernel>
  431. struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
  432. {
  433. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  434. {
  435. typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
  436. typedef typename Kernel::AssignmentTraits Traits;
  437. const Index outerSize = kernel.outerSize();
  438. for(Index outer = 0; outer < outerSize; ++outer)
  439. copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
  440. Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
  441. }
  442. };
  443. /***********************
  444. *** Linear traversal ***
  445. ***********************/
  446. template<typename Kernel>
  447. struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
  448. {
  449. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  450. {
  451. const Index size = kernel.size();
  452. for(Index i = 0; i < size; ++i)
  453. kernel.assignCoeff(i);
  454. }
  455. };
  456. template<typename Kernel>
  457. struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
  458. {
  459. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  460. {
  461. typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
  462. copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
  463. }
  464. };
  465. /**************************
  466. *** Slice vectorization ***
  467. ***************************/
  468. template<typename Kernel>
  469. struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
  470. {
  471. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  472. {
  473. typedef typename Kernel::Scalar Scalar;
  474. typedef typename Kernel::PacketType PacketType;
  475. enum {
  476. packetSize = unpacket_traits<PacketType>::size,
  477. requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
  478. alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
  479. dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
  480. dstAlignment = alignable ? int(requestedAlignment)
  481. : int(Kernel::AssignmentTraits::DstAlignment)
  482. };
  483. const Scalar *dst_ptr = kernel.dstDataPtr();
  484. if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
  485. {
  486. // the pointer is not aligned-on scalar, so alignment is not possible
  487. return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
  488. }
  489. const Index packetAlignedMask = packetSize - 1;
  490. const Index innerSize = kernel.innerSize();
  491. const Index outerSize = kernel.outerSize();
  492. const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
  493. Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
  494. for(Index outer = 0; outer < outerSize; ++outer)
  495. {
  496. const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
  497. // do the non-vectorizable part of the assignment
  498. for(Index inner = 0; inner<alignedStart ; ++inner)
  499. kernel.assignCoeffByOuterInner(outer, inner);
  500. // do the vectorizable part of the assignment
  501. for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
  502. kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
  503. // do the non-vectorizable part of the assignment
  504. for(Index inner = alignedEnd; inner<innerSize ; ++inner)
  505. kernel.assignCoeffByOuterInner(outer, inner);
  506. alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
  507. }
  508. }
  509. };
  510. #if EIGEN_UNALIGNED_VECTORIZE
  511. template<typename Kernel>
  512. struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
  513. {
  514. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  515. {
  516. typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
  517. typedef typename Kernel::PacketType PacketType;
  518. enum { innerSize = DstXprType::InnerSizeAtCompileTime,
  519. packetSize =unpacket_traits<PacketType>::size,
  520. vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
  521. size = DstXprType::SizeAtCompileTime };
  522. for(Index outer = 0; outer < kernel.outerSize(); ++outer)
  523. {
  524. copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
  525. copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
  526. }
  527. }
  528. };
  529. #endif
  530. /***************************************************************************
  531. * Part 4 : Generic dense assignment kernel
  532. ***************************************************************************/
  533. // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
  534. // to another dense writable evaluator.
  535. // It is parametrized by the two evaluators, and the actual assignment functor.
  536. // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
  537. // One can customize the assignment using this generic dense_assignment_kernel with different
  538. // functors, or by completely overloading it, by-passing a functor.
  539. template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
  540. class generic_dense_assignment_kernel
  541. {
  542. protected:
  543. typedef typename DstEvaluatorTypeT::XprType DstXprType;
  544. typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
  545. public:
  546. typedef DstEvaluatorTypeT DstEvaluatorType;
  547. typedef SrcEvaluatorTypeT SrcEvaluatorType;
  548. typedef typename DstEvaluatorType::Scalar Scalar;
  549. typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
  550. typedef typename AssignmentTraits::PacketType PacketType;
  551. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  552. generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
  553. : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
  554. {
  555. #ifdef EIGEN_DEBUG_ASSIGN
  556. AssignmentTraits::debug();
  557. #endif
  558. }
  559. EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }
  560. EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }
  561. EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }
  562. EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }
  563. EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }
  564. EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }
  565. EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }
  566. EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
  567. /// Assign src(row,col) to dst(row,col) through the assignment functor.
  568. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
  569. {
  570. m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
  571. }
  572. /// \sa assignCoeff(Index,Index)
  573. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
  574. {
  575. m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
  576. }
  577. /// \sa assignCoeff(Index,Index)
  578. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
  579. {
  580. Index row = rowIndexByOuterInner(outer, inner);
  581. Index col = colIndexByOuterInner(outer, inner);
  582. assignCoeff(row, col);
  583. }
  584. template<int StoreMode, int LoadMode, typename PacketType>
  585. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
  586. {
  587. m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
  588. }
  589. template<int StoreMode, int LoadMode, typename PacketType>
  590. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
  591. {
  592. m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
  593. }
  594. template<int StoreMode, int LoadMode, typename PacketType>
  595. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
  596. {
  597. Index row = rowIndexByOuterInner(outer, inner);
  598. Index col = colIndexByOuterInner(outer, inner);
  599. assignPacket<StoreMode,LoadMode,PacketType>(row, col);
  600. }
  601. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
  602. {
  603. typedef typename DstEvaluatorType::ExpressionTraits Traits;
  604. return int(Traits::RowsAtCompileTime) == 1 ? 0
  605. : int(Traits::ColsAtCompileTime) == 1 ? inner
  606. : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
  607. : inner;
  608. }
  609. EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
  610. {
  611. typedef typename DstEvaluatorType::ExpressionTraits Traits;
  612. return int(Traits::ColsAtCompileTime) == 1 ? 0
  613. : int(Traits::RowsAtCompileTime) == 1 ? inner
  614. : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
  615. : outer;
  616. }
  617. EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
  618. {
  619. return m_dstExpr.data();
  620. }
  621. protected:
  622. DstEvaluatorType& m_dst;
  623. const SrcEvaluatorType& m_src;
  624. const Functor &m_functor;
  625. // TODO find a way to avoid the needs of the original expression
  626. DstXprType& m_dstExpr;
  627. };
  628. // Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
  629. // PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
  630. // when computing the product.
  631. template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
  632. class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>
  633. {
  634. protected:
  635. typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
  636. public:
  637. typedef typename Base::Scalar Scalar;
  638. typedef typename Base::DstXprType DstXprType;
  639. typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
  640. typedef typename AssignmentTraits::PacketType PacketType;
  641. EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
  642. : Base(dst, src, func, dstExpr)
  643. {
  644. }
  645. };
  646. /***************************************************************************
  647. * Part 5 : Entry point for dense rectangular assignment
  648. ***************************************************************************/
  649. template<typename DstXprType,typename SrcXprType, typename Functor>
  650. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  651. void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
  652. {
  653. EIGEN_ONLY_USED_FOR_DEBUG(dst);
  654. EIGEN_ONLY_USED_FOR_DEBUG(src);
  655. eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
  656. }
  657. template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
  658. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  659. void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
  660. {
  661. Index dstRows = src.rows();
  662. Index dstCols = src.cols();
  663. if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
  664. dst.resize(dstRows, dstCols);
  665. eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
  666. }
  667. template<typename DstXprType, typename SrcXprType, typename Functor>
  668. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
  669. {
  670. typedef evaluator<DstXprType> DstEvaluatorType;
  671. typedef evaluator<SrcXprType> SrcEvaluatorType;
  672. SrcEvaluatorType srcEvaluator(src);
  673. // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
  674. // we need to resize the destination after the source evaluator has been created.
  675. resize_if_allowed(dst, src, func);
  676. DstEvaluatorType dstEvaluator(dst);
  677. typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
  678. Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
  679. dense_assignment_loop<Kernel>::run(kernel);
  680. }
  681. // Specialization for filling the destination with a constant value.
  682. #ifndef EIGEN_GPU_COMPILE_PHASE
  683. template<typename DstXprType>
  684. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)
  685. {
  686. resize_if_allowed(dst, src, func);
  687. std::fill_n(dst.data(), dst.size(), src.functor()());
  688. }
  689. #endif
  690. template<typename DstXprType, typename SrcXprType>
  691. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
  692. {
  693. call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
  694. }
  695. /***************************************************************************
  696. * Part 6 : Generic assignment
  697. ***************************************************************************/
  698. // Based on the respective shapes of the destination and source,
  699. // the class AssignmentKind determine the kind of assignment mechanism.
  700. // AssignmentKind must define a Kind typedef.
  701. template<typename DstShape, typename SrcShape> struct AssignmentKind;
  702. // Assignment kind defined in this file:
  703. struct Dense2Dense {};
  704. struct EigenBase2EigenBase {};
  705. template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
  706. template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
  707. // This is the main assignment class
  708. template< typename DstXprType, typename SrcXprType, typename Functor,
  709. typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
  710. typename EnableIf = void>
  711. struct Assignment;
  712. // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
  713. // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
  714. // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
  715. // does not has to bother about these annoying details.
  716. template<typename Dst, typename Src>
  717. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  718. void call_assignment(Dst& dst, const Src& src)
  719. {
  720. call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
  721. }
  722. template<typename Dst, typename Src>
  723. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  724. void call_assignment(const Dst& dst, const Src& src)
  725. {
  726. call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
  727. }
  728. // Deal with "assume-aliasing"
  729. template<typename Dst, typename Src, typename Func>
  730. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  731. void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
  732. {
  733. typename plain_matrix_type<Src>::type tmp(src);
  734. call_assignment_no_alias(dst, tmp, func);
  735. }
  736. template<typename Dst, typename Src, typename Func>
  737. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  738. void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
  739. {
  740. call_assignment_no_alias(dst, src, func);
  741. }
  742. // by-pass "assume-aliasing"
  743. // When there is no aliasing, we require that 'dst' has been properly resized
  744. template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
  745. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  746. void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
  747. {
  748. call_assignment_no_alias(dst.expression(), src, func);
  749. }
  750. template<typename Dst, typename Src, typename Func>
  751. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  752. void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
  753. {
  754. enum {
  755. NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
  756. || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
  757. ) && int(Dst::SizeAtCompileTime) != 1
  758. };
  759. typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
  760. typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
  761. ActualDstType actualDst(dst);
  762. // TODO check whether this is the right place to perform these checks:
  763. EIGEN_STATIC_ASSERT_LVALUE(Dst)
  764. EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
  765. EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
  766. Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
  767. }
  768. template<typename Dst, typename Src, typename Func>
  769. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  770. void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
  771. {
  772. typedef evaluator<Dst> DstEvaluatorType;
  773. typedef evaluator<Src> SrcEvaluatorType;
  774. typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel;
  775. EIGEN_STATIC_ASSERT_LVALUE(Dst)
  776. EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
  777. SrcEvaluatorType srcEvaluator(src);
  778. resize_if_allowed(dst, src, func);
  779. DstEvaluatorType dstEvaluator(dst);
  780. Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
  781. dense_assignment_loop<Kernel>::run(kernel);
  782. }
  783. template<typename Dst, typename Src>
  784. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  785. void call_assignment_no_alias(Dst& dst, const Src& src)
  786. {
  787. call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
  788. }
  789. template<typename Dst, typename Src, typename Func>
  790. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  791. void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
  792. {
  793. // TODO check whether this is the right place to perform these checks:
  794. EIGEN_STATIC_ASSERT_LVALUE(Dst)
  795. EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
  796. EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
  797. Assignment<Dst,Src,Func>::run(dst, src, func);
  798. }
  799. template<typename Dst, typename Src>
  800. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  801. void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
  802. {
  803. call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
  804. }
  805. // forward declaration
  806. template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
  807. // Generic Dense to Dense assignment
  808. // Note that the last template argument "Weak" is needed to make it possible to perform
  809. // both partial specialization+SFINAE without ambiguous specialization
  810. template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
  811. struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
  812. {
  813. EIGEN_DEVICE_FUNC
  814. static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
  815. {
  816. #ifndef EIGEN_NO_DEBUG
  817. internal::check_for_aliasing(dst, src);
  818. #endif
  819. call_dense_assignment_loop(dst, src, func);
  820. }
  821. };
  822. // Generic assignment through evalTo.
  823. // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
  824. // Note that the last template argument "Weak" is needed to make it possible to perform
  825. // both partial specialization+SFINAE without ambiguous specialization
  826. template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
  827. struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
  828. {
  829. EIGEN_DEVICE_FUNC
  830. static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  831. {
  832. Index dstRows = src.rows();
  833. Index dstCols = src.cols();
  834. if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
  835. dst.resize(dstRows, dstCols);
  836. eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
  837. src.evalTo(dst);
  838. }
  839. // NOTE The following two functions are templated to avoid their instantiation if not needed
  840. // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
  841. template<typename SrcScalarType>
  842. EIGEN_DEVICE_FUNC
  843. static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
  844. {
  845. Index dstRows = src.rows();
  846. Index dstCols = src.cols();
  847. if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
  848. dst.resize(dstRows, dstCols);
  849. eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
  850. src.addTo(dst);
  851. }
  852. template<typename SrcScalarType>
  853. EIGEN_DEVICE_FUNC
  854. static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
  855. {
  856. Index dstRows = src.rows();
  857. Index dstCols = src.cols();
  858. if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
  859. dst.resize(dstRows, dstCols);
  860. eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
  861. src.subTo(dst);
  862. }
  863. };
  864. } // namespace internal
  865. } // end namespace Eigen
  866. #endif // EIGEN_ASSIGN_EVALUATOR_H