$darkmode
Eigen  5.0.1-dev
AssignEvaluator.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 #ifndef EIGEN_ASSIGN_EVALUATOR_H
13 #define EIGEN_ASSIGN_EVALUATOR_H
14 
15 // IWYU pragma: private
16 #include "./InternalHeaderCheck.h"
17 
18 namespace Eigen {
19 
20 // This implementation is based on Assign.h
21 
22 namespace internal {
23 
24 /***************************************************************************
25  * Part 1 : the logic deciding a strategy for traversal and unrolling *
26  ***************************************************************************/
27 
28 // copy_using_evaluator_traits is based on assign_traits
29 
30 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = Dynamic>
31 struct copy_using_evaluator_traits {
32  using Src = typename SrcEvaluator::XprType;
33  using Dst = typename DstEvaluator::XprType;
34  using DstScalar = typename Dst::Scalar;
35 
36  static constexpr int DstFlags = DstEvaluator::Flags;
37  static constexpr int SrcFlags = SrcEvaluator::Flags;
38 
39  public:
40  static constexpr int DstAlignment = DstEvaluator::Alignment;
41  static constexpr int SrcAlignment = SrcEvaluator::Alignment;
42  static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment);
43  static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit);
44  static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit);
45  static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit);
46  static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime;
47  static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime);
48  static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime);
49  static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime);
50  static constexpr int MaxRowsAtCompileTime =
51  min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime);
52  static constexpr int MaxColsAtCompileTime =
53  min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime);
54  static constexpr int MaxSizeAtCompileTime =
55  min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime);
56  static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime
57  : DstIsRowMajor ? ColsAtCompileTime
58  : RowsAtCompileTime;
59  static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime
60  : DstIsRowMajor ? MaxColsAtCompileTime
61  : MaxRowsAtCompileTime;
62  static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize);
63  static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize);
64  static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;
65 
66  // TODO distinguish between linear traversal and inner-traversals
67  using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
68  using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;
69 
70  static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size;
71  static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size;
72 
73  public:
74  static constexpr int LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment;
75  static constexpr int InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment;
76 
77  private:
78  static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor;
79  static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) &&
80  bool(functor_traits<AssignFunc>::PacketAccess);
81  static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) &&
82  (InnerSizeAtCompileTime % InnerPacketSize == 0) &&
83  (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) &&
84  (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment);
85  static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit);
86  static constexpr bool MayLinearVectorize =
87  MightVectorize && MayLinearize && DstHasDirectAccess &&
88  (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) &&
89  (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize);
90  /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
91  so it's only good for large enough sizes. */
92  static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize;
93  static constexpr bool MaySliceVectorize =
94  MightVectorize && DstHasDirectAccess &&
95  (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold);
96  /* slice vectorization can be slow, so we only want it if the slices are big, which is
97  indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
98  in a fixed-size matrix
99  However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
100 
101  public:
102  static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal
103  : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize))
104  ? LinearVectorizedTraversal
105  : MayInnerVectorize ? InnerVectorizedTraversal
106  : MayLinearVectorize ? LinearVectorizedTraversal
107  : MaySliceVectorize ? SliceVectorizedTraversal
108  : MayLinearize ? LinearTraversal
109  : DefaultTraversal;
110  static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal ||
111  Traversal == SliceVectorizedTraversal;
112 
113  using PacketType = std::conditional_t<Traversal == LinearVectorizedTraversal, LinearPacketType, InnerPacketType>;
114 
115  private:
116  static constexpr int ActualPacketSize = Vectorized ? unpacket_traits<PacketType>::size : 1;
117  static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize;
118  static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost);
119  static constexpr bool MayUnrollCompletely =
120  (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
121  static constexpr bool MayUnrollInner =
122  (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
123 
124  public:
125  static constexpr int Unrolling =
126  (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal)
127  ? (MayUnrollCompletely ? CompleteUnrolling
128  : MayUnrollInner ? InnerUnrolling
129  : NoUnrolling)
130  : Traversal == LinearVectorizedTraversal
131  ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment))
132  ? CompleteUnrolling
133  : NoUnrolling)
134  : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling)
135 #if EIGEN_UNALIGNED_VECTORIZE
136  : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
137 #endif
138  : NoUnrolling;
139  static constexpr bool UsePacketSegment = has_packet_segment<PacketType>::value;
140 
141 #ifdef EIGEN_DEBUG_ASSIGN
142  static void debug() {
143  std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
144  std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
145  std::cerr.setf(std::ios::hex, std::ios::basefield);
146  std::cerr << "DstFlags"
147  << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
148  std::cerr << "SrcFlags"
149  << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
150  std::cerr.unsetf(std::ios::hex);
151  EIGEN_DEBUG_VAR(DstAlignment)
152  EIGEN_DEBUG_VAR(SrcAlignment)
153  EIGEN_DEBUG_VAR(LinearRequiredAlignment)
154  EIGEN_DEBUG_VAR(InnerRequiredAlignment)
155  EIGEN_DEBUG_VAR(JointAlignment)
156  EIGEN_DEBUG_VAR(InnerSizeAtCompileTime)
157  EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime)
158  EIGEN_DEBUG_VAR(LinearPacketSize)
159  EIGEN_DEBUG_VAR(InnerPacketSize)
160  EIGEN_DEBUG_VAR(ActualPacketSize)
161  EIGEN_DEBUG_VAR(StorageOrdersAgree)
162  EIGEN_DEBUG_VAR(MightVectorize)
163  EIGEN_DEBUG_VAR(MayLinearize)
164  EIGEN_DEBUG_VAR(MayInnerVectorize)
165  EIGEN_DEBUG_VAR(MayLinearVectorize)
166  EIGEN_DEBUG_VAR(MaySliceVectorize)
167  std::cerr << "Traversal"
168  << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
169  EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
170  EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
171  EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
172  EIGEN_DEBUG_VAR(UnrollingLimit)
173  EIGEN_DEBUG_VAR(MayUnrollCompletely)
174  EIGEN_DEBUG_VAR(MayUnrollInner)
175  std::cerr << "Unrolling"
176  << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
177  std::cerr << std::endl;
178  }
179 #endif
180 };
181 
182 /***************************************************************************
183  * Part 2 : meta-unrollers
184  ***************************************************************************/
185 
186 /************************
187 *** Default traversal ***
188 ************************/
189 
190 template <typename Kernel, int Index_, int Stop>
191 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling {
192  static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
193  static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
194 
195  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
196  kernel.assignCoeffByOuterInner(Outer, Inner);
197  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
198  }
199 };
200 
201 template <typename Kernel, int Stop>
202 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
203  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
204 };
205 
206 template <typename Kernel, int Index_, int Stop>
207 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling {
208  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer) {
209  kernel.assignCoeffByOuterInner(outer, Index_);
210  copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_ + 1, Stop>::run(kernel, outer);
211  }
212 };
213 
214 template <typename Kernel, int Stop>
215 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> {
216  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
217 };
218 
219 /***********************
220 *** Linear traversal ***
221 ***********************/
222 
223 template <typename Kernel, int Index_, int Stop>
224 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {
225  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
226  kernel.assignCoeff(Index_);
227  copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
228  }
229 };
230 
231 template <typename Kernel, int Stop>
232 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
233  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
234 };
235 
236 /**************************
237 *** Inner vectorization ***
238 **************************/
239 
240 template <typename Kernel, int Index_, int Stop>
241 struct copy_using_evaluator_innervec_CompleteUnrolling {
242  using PacketType = typename Kernel::PacketType;
243  static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
244  static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
245  static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
246  static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
247  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
248 
249  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
250  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(Outer, Inner);
251  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
252  }
253 };
254 
255 template <typename Kernel, int Stop>
256 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> {
257  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
258 };
259 
260 template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
261 struct copy_using_evaluator_innervec_InnerUnrolling {
262  using PacketType = typename Kernel::PacketType;
263  static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
264 
265  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
266  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
267  copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel,
268  outer);
269  }
270 };
271 
272 template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
273 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> {
274  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
275 };
276 
277 template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment, bool UsePacketSegment>
278 struct copy_using_evaluator_innervec_segment {
279  using PacketType = typename Kernel::PacketType;
280 
281  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
282  kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Start, 0,
283  Stop - Start);
284  }
285 };
286 
287 template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment>
288 struct copy_using_evaluator_innervec_segment<Kernel, Start, Stop, SrcAlignment, DstAlignment,
289  /*UsePacketSegment*/ false>
290  : copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Start, Stop> {};
291 
292 template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
293 struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
294  /*UsePacketSegment*/ true> {
295  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
296 };
297 
298 template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
299 struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
300  /*UsePacketSegment*/ false> {
301  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
302 };
303 
304 /***************************************************************************
305  * Part 3 : implementation of all cases
306  ***************************************************************************/
307 
308 // dense_assignment_loop is based on assign_impl
309 
310 template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
311  int Unrolling = Kernel::AssignmentTraits::Unrolling>
312 struct dense_assignment_loop_impl;
313 
314 template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
315  int Unrolling = Kernel::AssignmentTraits::Unrolling>
316 struct dense_assignment_loop {
317  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
318 #ifdef __cpp_lib_is_constant_evaluated
319  if (internal::is_constant_evaluated())
320  dense_assignment_loop_impl<Kernel, Traversal == AllAtOnceTraversal ? AllAtOnceTraversal : DefaultTraversal,
321  NoUnrolling>::run(kernel);
322  else
323 #endif
324  dense_assignment_loop_impl<Kernel, Traversal, Unrolling>::run(kernel);
325  }
326 };
327 
328 /************************
329 ***** Special Cases *****
330 ************************/
331 
332 // Zero-sized assignment is a no-op.
333 template <typename Kernel, int Unrolling>
334 struct dense_assignment_loop_impl<Kernel, AllAtOnceTraversal, Unrolling> {
335  static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
336 
337  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& /*kernel*/) {
338  EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
339  }
340 };
341 
342 /************************
343 *** Default traversal ***
344 ************************/
345 
346 template <typename Kernel>
347 struct dense_assignment_loop_impl<Kernel, DefaultTraversal, NoUnrolling> {
348  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& kernel) {
349  for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
350  for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
351  kernel.assignCoeffByOuterInner(outer, inner);
352  }
353  }
354  }
355 };
356 
357 template <typename Kernel>
358 struct dense_assignment_loop_impl<Kernel, DefaultTraversal, CompleteUnrolling> {
359  static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
360 
361  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
362  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
363  }
364 };
365 
366 template <typename Kernel>
367 struct dense_assignment_loop_impl<Kernel, DefaultTraversal, InnerUnrolling> {
368  static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
369 
370  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
371  const Index outerSize = kernel.outerSize();
372  for (Index outer = 0; outer < outerSize; ++outer)
373  copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer);
374  }
375 };
376 
377 /***************************
378 *** Linear vectorization ***
379 ***************************/
380 
381 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
382 // of the non vectorizable beginning and ending parts
383 
384 template <typename PacketType, int DstAlignment, int SrcAlignment, bool UsePacketSegment, bool Skip>
385 struct unaligned_dense_assignment_loop {
386  // if Skip == true, then do nothing
387  template <typename Kernel>
388  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*start*/, Index /*end*/) {}
389  template <typename Kernel>
390  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*outer*/,
391  Index /*innerStart*/, Index /*innerEnd*/) {}
392 };
393 
394 template <typename PacketType, int DstAlignment, int SrcAlignment>
395 struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ true,
396  /*Skip*/ false> {
397  template <typename Kernel>
398  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
399  Index count = end - start;
400  eigen_assert(count <= unpacket_traits<PacketType>::size);
401  if (count > 0) kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(start, 0, count);
402  }
403  template <typename Kernel>
404  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index start, Index end) {
405  Index count = end - start;
406  eigen_assert(count <= unpacket_traits<PacketType>::size);
407  if (count > 0)
408  kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, start, 0, count);
409  }
410 };
411 
412 template <typename PacketType, int DstAlignment, int SrcAlignment>
413 struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ false,
414  /*Skip*/ false> {
415  template <typename Kernel>
416  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
417  for (Index index = start; index < end; ++index) kernel.assignCoeff(index);
418  }
419  template <typename Kernel>
420  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index innerStart,
421  Index innerEnd) {
422  for (Index inner = innerStart; inner < innerEnd; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
423  }
424 };
425 
426 template <typename Kernel, int Index_, int Stop>
427 struct copy_using_evaluator_linearvec_CompleteUnrolling {
428  using PacketType = typename Kernel::PacketType;
429  static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
430  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
431  static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
432 
433  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
434  kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index_);
435  copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
436  }
437 };
438 
439 template <typename Kernel, int Stop>
440 struct copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, Stop, Stop> {
441  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
442 };
443 
444 template <typename Kernel, int Index_, int Stop, bool UsePacketSegment>
445 struct copy_using_evaluator_linearvec_segment {
446  using PacketType = typename Kernel::PacketType;
447  static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
448  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
449 
450  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
451  kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(Index_, 0, Stop - Index_);
452  }
453 };
454 
455 template <typename Kernel, int Index_, int Stop>
456 struct copy_using_evaluator_linearvec_segment<Kernel, Index_, Stop, /*UsePacketSegment*/ false>
457  : copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_, Stop> {};
458 
459 template <typename Kernel, int Stop>
460 struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ true> {
461  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
462 };
463 
464 template <typename Kernel, int Stop>
465 struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ false> {
466  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
467 };
468 
469 template <typename Kernel>
470 struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling> {
471  using Scalar = typename Kernel::Scalar;
472  using PacketType = typename Kernel::PacketType;
473  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
474  static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
475  static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
476  static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
477  static constexpr bool Alignable =
478  (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
479  static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
480  static constexpr bool DstIsAligned = DstAlignment >= Alignment;
481  static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
482 
483  using head_loop =
484  unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, UsePacketSegment, DstIsAligned>;
485  using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, SrcAlignment, UsePacketSegment, false>;
486 
487  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
488  const Index size = kernel.size();
489  const Index alignedStart = DstIsAligned ? 0 : first_aligned<Alignment>(kernel.dstDataPtr(), size);
490  const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
491 
492  head_loop::run(kernel, 0, alignedStart);
493 
494  for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
495  kernel.template assignPacket<Alignment, SrcAlignment, PacketType>(index);
496 
497  tail_loop::run(kernel, alignedEnd, size);
498  }
499 };
500 
501 template <typename Kernel>
502 struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
503  using PacketType = typename Kernel::PacketType;
504  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
505  static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
506  static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
507  static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
508 
509  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
510  copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
511  copy_using_evaluator_linearvec_segment<Kernel, AlignedSize, Size, UsePacketSegment>::run(kernel);
512  }
513 };
514 
515 /**************************
516 *** Inner vectorization ***
517 **************************/
518 
519 template <typename Kernel>
520 struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, NoUnrolling> {
521  using PacketType = typename Kernel::PacketType;
522  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
523  static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
524  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
525 
526  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
527  const Index innerSize = kernel.innerSize();
528  const Index outerSize = kernel.outerSize();
529  for (Index outer = 0; outer < outerSize; ++outer)
530  for (Index inner = 0; inner < innerSize; inner += PacketSize)
531  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
532  }
533 };
534 
535 template <typename Kernel>
536 struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
537  static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
538 
539  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
540  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
541  }
542 };
543 
544 template <typename Kernel>
545 struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
546  static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
547  static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
548  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
549 
550  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
551  const Index outerSize = kernel.outerSize();
552  for (Index outer = 0; outer < outerSize; ++outer)
553  copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(kernel,
554  outer);
555  }
556 };
557 
558 /***********************
559 *** Linear traversal ***
560 ***********************/
561 
562 template <typename Kernel>
563 struct dense_assignment_loop_impl<Kernel, LinearTraversal, NoUnrolling> {
564  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
565  const Index size = kernel.size();
566  for (Index i = 0; i < size; ++i) kernel.assignCoeff(i);
567  }
568 };
569 
570 template <typename Kernel>
571 struct dense_assignment_loop_impl<Kernel, LinearTraversal, CompleteUnrolling> {
572  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
573  copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run(
574  kernel);
575  }
576 };
577 
578 /**************************
579 *** Slice vectorization ***
580 ***************************/
581 
582 template <typename Kernel>
583 struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, NoUnrolling> {
584  using Scalar = typename Kernel::Scalar;
585  using PacketType = typename Kernel::PacketType;
586  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
587  static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
588  static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
589  static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
590  static constexpr bool Alignable =
591  (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
592  static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
593  static constexpr bool DstIsAligned = DstAlignment >= Alignment;
594  static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
595 
596  using head_loop = unaligned_dense_assignment_loop<PacketType, DstAlignment, Unaligned, UsePacketSegment, !Alignable>;
597  using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, Unaligned, UsePacketSegment, false>;
598 
599  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
600  const Scalar* dst_ptr = kernel.dstDataPtr();
601  const Index innerSize = kernel.innerSize();
602  const Index outerSize = kernel.outerSize();
603  const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
604  Index alignedStart = ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<Alignment>(dst_ptr, innerSize);
605 
606  for (Index outer = 0; outer < outerSize; ++outer) {
607  const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
608 
609  head_loop::run(kernel, outer, 0, alignedStart);
610 
611  // do the vectorizable part of the assignment
612  for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
613  kernel.template assignPacketByOuterInner<Alignment, Unaligned, PacketType>(outer, inner);
614 
615  tail_loop::run(kernel, outer, alignedEnd, innerSize);
616 
617  alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
618  }
619  }
620 };
621 
622 #if EIGEN_UNALIGNED_VECTORIZE
623 template <typename Kernel>
624 struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
625  using PacketType = typename Kernel::PacketType;
626  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
627  static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
628  static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
629  static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
630 
631  using packet_loop = copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, Unaligned, Unaligned>;
632  using packet_segment_loop = copy_using_evaluator_innervec_segment<Kernel, VectorizableSize, InnerSize, Unaligned,
633  Unaligned, UsePacketSegment>;
634 
635  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
636  for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
637  packet_loop::run(kernel, outer);
638  packet_segment_loop::run(kernel, outer);
639  }
640  }
641 };
642 #endif
643 
644 /***************************************************************************
645  * Part 4 : Generic dense assignment kernel
646  ***************************************************************************/
647 
648 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
649 // to another dense writable evaluator.
650 // It is parametrized by the two evaluators, and the actual assignment functor.
651 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
652 // One can customize the assignment using this generic dense_assignment_kernel with different
653 // functors, or by completely overloading it, by-passing a functor.
654 template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
655 class generic_dense_assignment_kernel {
656  protected:
657  typedef typename DstEvaluatorTypeT::XprType DstXprType;
658  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
659 
660  public:
661  typedef DstEvaluatorTypeT DstEvaluatorType;
662  typedef SrcEvaluatorTypeT SrcEvaluatorType;
663  typedef typename DstEvaluatorType::Scalar Scalar;
664  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
665  typedef typename AssignmentTraits::PacketType PacketType;
666 
667  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst,
668  const SrcEvaluatorType& src,
669  const Functor& func,
670  DstXprType& dstExpr)
671  : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) {
672 #ifdef EIGEN_DEBUG_ASSIGN
673  AssignmentTraits::debug();
674 #endif
675  }
676 
677  EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return m_dstExpr.size(); }
678  EIGEN_DEVICE_FUNC constexpr Index innerSize() const noexcept { return m_dstExpr.innerSize(); }
679  EIGEN_DEVICE_FUNC constexpr Index outerSize() const noexcept { return m_dstExpr.outerSize(); }
680  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dstExpr.rows(); }
681  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); }
682  EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); }
683 
684  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; }
685  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; }
686 
688  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) {
689  m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col));
690  }
691 
693  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) {
694  m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
695  }
696 
698  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) {
699  Index row = rowIndexByOuterInner(outer, inner);
700  Index col = colIndexByOuterInner(outer, inner);
701  assignCoeff(row, col);
702  }
703 
704  template <int StoreMode, int LoadMode, typename Packet>
705  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) {
706  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row, col),
707  m_src.template packet<LoadMode, Packet>(row, col));
708  }
709 
710  template <int StoreMode, int LoadMode, typename Packet>
711  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) {
712  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode, Packet>(index));
713  }
714 
715  template <int StoreMode, int LoadMode, typename Packet>
716  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) {
717  Index row = rowIndexByOuterInner(outer, inner);
718  Index col = colIndexByOuterInner(outer, inner);
719  assignPacket<StoreMode, LoadMode, Packet>(row, col);
720  }
721 
722  template <int StoreMode, int LoadMode, typename Packet>
723  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) {
724  m_functor.template assignPacketSegment<StoreMode>(
725  &m_dst.coeffRef(row, col), m_src.template packetSegment<LoadMode, Packet>(row, col, begin, count), begin,
726  count);
727  }
728 
729  template <int StoreMode, int LoadMode, typename Packet>
730  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) {
731  m_functor.template assignPacketSegment<StoreMode>(
732  &m_dst.coeffRef(index), m_src.template packetSegment<LoadMode, Packet>(index, begin, count), begin, count);
733  }
734 
735  template <int StoreMode, int LoadMode, typename Packet>
736  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin,
737  Index count) {
738  Index row = rowIndexByOuterInner(outer, inner);
739  Index col = colIndexByOuterInner(outer, inner);
740  assignPacketSegment<StoreMode, LoadMode, Packet>(row, col, begin, count);
741  }
742 
743  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
744  typedef typename DstEvaluatorType::ExpressionTraits Traits;
745  return int(Traits::RowsAtCompileTime) == 1 ? 0
746  : int(Traits::ColsAtCompileTime) == 1 ? inner
747  : int(DstEvaluatorType::Flags) & RowMajorBit ? outer
748  : inner;
749  }
750 
751  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) {
752  typedef typename DstEvaluatorType::ExpressionTraits Traits;
753  return int(Traits::ColsAtCompileTime) == 1 ? 0
754  : int(Traits::RowsAtCompileTime) == 1 ? inner
755  : int(DstEvaluatorType::Flags) & RowMajorBit ? inner
756  : outer;
757  }
758 
759  EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const { return m_dstExpr.data(); }
760 
761  protected:
762  DstEvaluatorType& m_dst;
763  const SrcEvaluatorType& m_src;
764  const Functor& m_functor;
765  // TODO find a way to avoid the needs of the original expression
766  DstXprType& m_dstExpr;
767 };
768 
769 // Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
770 // PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
771 // when computing the product.
772 
773 template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
774 class restricted_packet_dense_assignment_kernel
775  : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> {
776  protected:
777  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
778 
779  public:
780  typedef typename Base::Scalar Scalar;
781  typedef typename Base::DstXprType DstXprType;
782  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
783  typedef typename AssignmentTraits::PacketType PacketType;
784 
785  EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT& dst, const SrcEvaluatorTypeT& src,
786  const Functor& func, DstXprType& dstExpr)
787  : Base(dst, src, func, dstExpr) {}
788 };
789 
790 /***************************************************************************
791  * Part 5 : Entry point for dense rectangular assignment
792  ***************************************************************************/
793 
794 template <typename DstXprType, typename SrcXprType, typename Functor>
795 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
796  const Functor& /*func*/) {
797  EIGEN_ONLY_USED_FOR_DEBUG(dst);
798  EIGEN_ONLY_USED_FOR_DEBUG(src);
799  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
800 }
801 
802 template <typename DstXprType, typename SrcXprType, typename T1, typename T2>
803 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
804  const internal::assign_op<T1, T2>& /*func*/) {
805  Index dstRows = src.rows();
806  Index dstCols = src.cols();
807  if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols);
808  eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
809 }
810 
811 template <typename DstXprType, typename SrcXprType, typename Functor>
812 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src,
813  const Functor& func) {
814  typedef evaluator<DstXprType> DstEvaluatorType;
815  typedef evaluator<SrcXprType> SrcEvaluatorType;
816 
817  SrcEvaluatorType srcEvaluator(src);
818 
819  // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
820  // we need to resize the destination after the source evaluator has been created.
821  resize_if_allowed(dst, src, func);
822 
823  DstEvaluatorType dstEvaluator(dst);
824 
825  typedef generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor> Kernel;
826  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
827 
828  dense_assignment_loop<Kernel>::run(kernel);
829 }
830 
831 template <typename DstXprType, typename SrcXprType>
832 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) {
833  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>());
834 }
835 
836 /***************************************************************************
837  * Part 6 : Generic assignment
838  ***************************************************************************/
839 
840 // Based on the respective shapes of the destination and source,
841 // the class AssignmentKind determine the kind of assignment mechanism.
842 // AssignmentKind must define a Kind typedef.
843 template <typename DstShape, typename SrcShape>
844 struct AssignmentKind;
845 
846 // Assignment kind defined in this file:
847 struct Dense2Dense {};
848 struct EigenBase2EigenBase {};
849 
850 template <typename, typename>
851 struct AssignmentKind {
852  typedef EigenBase2EigenBase Kind;
853 };
854 template <>
855 struct AssignmentKind<DenseShape, DenseShape> {
856  typedef Dense2Dense Kind;
857 };
858 
859 // This is the main assignment class
860 template <typename DstXprType, typename SrcXprType, typename Functor,
861  typename Kind = typename AssignmentKind<typename evaluator_traits<DstXprType>::Shape,
862  typename evaluator_traits<SrcXprType>::Shape>::Kind,
863  typename EnableIf = void>
864 struct Assignment;
865 
866 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic
867 // transposition. Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite
868 // complicated. So this intermediate function removes everything related to "assume-aliasing" such that Assignment does
869 // not has to bother about these annoying details.
870 
871 template <typename Dst, typename Src>
872 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) {
873  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
874 }
875 template <typename Dst, typename Src>
876 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) {
877  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
878 }
879 
880 // Deal with "assume-aliasing"
881 template <typename Dst, typename Src, typename Func>
882 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
883  Dst& dst, const Src& src, const Func& func, std::enable_if_t<evaluator_assume_aliasing<Src>::value, void*> = 0) {
884  typename plain_matrix_type<Src>::type tmp(src);
885  call_assignment_no_alias(dst, tmp, func);
886 }
887 
888 template <typename Dst, typename Src, typename Func>
889 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
890  Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0) {
891  call_assignment_no_alias(dst, src, func);
892 }
893 
894 // by-pass "assume-aliasing"
895 // When there is no aliasing, we require that 'dst' has been properly resized
896 template <typename Dst, template <typename> class StorageBase, typename Src, typename Func>
897 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(NoAlias<Dst, StorageBase>& dst, const Src& src,
898  const Func& func) {
899  call_assignment_no_alias(dst.expression(), src, func);
900 }
901 
902 template <typename Dst, typename Src, typename Func>
903 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src,
904  const Func& func) {
905  enum {
906  NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) ||
907  (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) &&
908  int(Dst::SizeAtCompileTime) != 1
909  };
910 
911  typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst> ActualDstTypeCleaned;
912  typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&> ActualDstType;
913  ActualDstType actualDst(dst);
914 
915  // TODO check whether this is the right place to perform these checks:
916  EIGEN_STATIC_ASSERT_LVALUE(Dst)
917  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
918  EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
919 
920  Assignment<ActualDstTypeCleaned, Src, Func>::run(actualDst, src, func);
921 }
922 
923 template <typename Dst, typename Src, typename Func>
924 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src,
925  const Func& func) {
926  typedef evaluator<Dst> DstEvaluatorType;
927  typedef evaluator<Src> SrcEvaluatorType;
928  typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Func> Kernel;
929 
930  EIGEN_STATIC_ASSERT_LVALUE(Dst)
931  EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
932 
933  SrcEvaluatorType srcEvaluator(src);
934  resize_if_allowed(dst, src, func);
935 
936  DstEvaluatorType dstEvaluator(dst);
937  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
938 
939  dense_assignment_loop<Kernel>::run(kernel);
940 }
941 
942 template <typename Dst, typename Src>
943 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src) {
944  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
945 }
946 
947 template <typename Dst, typename Src, typename Func>
948 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src,
949  const Func& func) {
950  // TODO check whether this is the right place to perform these checks:
951  EIGEN_STATIC_ASSERT_LVALUE(Dst)
952  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src)
953  EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
954 
955  Assignment<Dst, Src, Func>::run(dst, src, func);
956 }
957 template <typename Dst, typename Src>
958 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) {
959  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
960 }
961 
962 // forward declaration
963 template <typename Dst, typename Src>
964 EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src);
965 
966 // Generic Dense to Dense assignment
967 // Note that the last template argument "Weak" is needed to make it possible to perform
968 // both partial specialization+SFINAE without ambiguous specialization
969 template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
970 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> {
971  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src,
972  const Functor& func) {
973 #ifndef EIGEN_NO_DEBUG
974  if (!internal::is_constant_evaluated()) {
975  internal::check_for_aliasing(dst, src);
976  }
977 #endif
978 
979  call_dense_assignment_loop(dst, src, func);
980  }
981 };
982 
983 template <typename DstXprType, typename SrcPlainObject, typename Weak>
984 struct Assignment<DstXprType, CwiseNullaryOp<scalar_constant_op<typename DstXprType::Scalar>, SrcPlainObject>,
985  assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
986  using Scalar = typename DstXprType::Scalar;
987  using NullaryOp = scalar_constant_op<Scalar>;
988  using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
989  using Functor = assign_op<Scalar, Scalar>;
990  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
991  const Functor& /*func*/) {
992  eigen_fill_impl<DstXprType>::run(dst, src);
993  }
994 };
995 
996 template <typename DstXprType, typename SrcPlainObject, typename Weak>
997 struct Assignment<DstXprType, CwiseNullaryOp<scalar_zero_op<typename DstXprType::Scalar>, SrcPlainObject>,
998  assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
999  using Scalar = typename DstXprType::Scalar;
1000  using NullaryOp = scalar_zero_op<Scalar>;
1001  using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
1002  using Functor = assign_op<Scalar, Scalar>;
1003  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
1004  const Functor& /*func*/) {
1005  eigen_zero_impl<DstXprType>::run(dst, src);
1006  }
1007 };
1008 
1009 // Generic assignment through evalTo.
1010 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
1011 // Note that the last template argument "Weak" is needed to make it possible to perform
1012 // both partial specialization+SFINAE without ambiguous specialization
1013 template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
1014 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> {
1015  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1016  DstXprType& dst, const SrcXprType& src,
1017  const internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>& /*func*/) {
1018  Index dstRows = src.rows();
1019  Index dstCols = src.cols();
1020  if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
1021 
1022  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1023  src.evalTo(dst);
1024  }
1025 
1026  // NOTE The following two functions are templated to avoid their instantiation if not needed
1027  // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
1028  template <typename SrcScalarType>
1029  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1030  DstXprType& dst, const SrcXprType& src,
1031  const internal::add_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
1032  Index dstRows = src.rows();
1033  Index dstCols = src.cols();
1034  if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
1035 
1036  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1037  src.addTo(dst);
1038  }
1039 
1040  template <typename SrcScalarType>
1041  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1042  DstXprType& dst, const SrcXprType& src,
1043  const internal::sub_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
1044  Index dstRows = src.rows();
1045  Index dstCols = src.cols();
1046  if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
1047 
1048  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1049  src.subTo(dst);
1050  }
1051 };
1052 
1053 } // namespace internal
1054 
1055 } // end namespace Eigen
1056 
1057 #endif // EIGEN_ASSIGN_EVALUATOR_H
static constexpr lastp1_t end
Definition: IndexedViewHelper.h:79
const unsigned int DirectAccessBit
Definition: Constants.h:159
Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1
Definition: Constants.h:235
const unsigned int RowMajorBit
Definition: Constants.h:70
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
const int Dynamic
Definition: Constants.h:25
const unsigned int ActualPacketAccessBit
Definition: Constants.h:108
const unsigned int LinearAccessBit
Definition: Constants.h:133