$darkmode
Eigen  5.0.1-dev
ProductEvaluators.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 #ifndef EIGEN_PRODUCTEVALUATORS_H
13 #define EIGEN_PRODUCTEVALUATORS_H
14 
15 // IWYU pragma: private
16 #include "./InternalHeaderCheck.h"
17 
18 namespace Eigen {
19 
20 namespace internal {
21 
30 template <typename Lhs, typename Rhs, int Options>
31 struct evaluator<Product<Lhs, Rhs, Options>> : public product_evaluator<Product<Lhs, Rhs, Options>> {
32  typedef Product<Lhs, Rhs, Options> XprType;
33  typedef product_evaluator<XprType> Base;
34 
35  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
36 };
37 
38 // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
39 // TODO we should apply that rule only if that's really helpful
40 template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
41 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
42  const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
43  const Product<Lhs, Rhs, DefaultProduct>>> {
44  static const bool value = true;
45 };
46 template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
47 struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
48  const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
49  const Product<Lhs, Rhs, DefaultProduct>>>
50  : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> {
51  typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
52  const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
53  const Product<Lhs, Rhs, DefaultProduct>>
54  XprType;
55  typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> Base;
56 
57  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
58  : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {}
59 };
60 
61 template <typename Lhs, typename Rhs, int DiagIndex>
62 struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex>>
63  : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> {
64  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
65  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> Base;
66 
67  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
68  : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
69  Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index())) {}
70 };
71 
72 // Helper class to perform a matrix product with the destination at hand.
73 // Depending on the sizes of the factors, there are different evaluation strategies
74 // as controlled by internal::product_type.
75 template <typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape,
76  typename RhsShape = typename evaluator_traits<Rhs>::Shape,
77  int ProductType = internal::product_type<Lhs, Rhs>::value>
78 struct generic_product_impl;
79 
80 template <typename Lhs, typename Rhs>
81 struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct>> {
82  static const bool value = true;
83 };
84 
85 // This is the default evaluator implementation for products:
86 // It creates a temporary and call generic_product_impl
87 template <typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
88 struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
89  : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> {
90  typedef Product<Lhs, Rhs, Options> XprType;
91  typedef typename XprType::PlainObject PlainObject;
92  typedef evaluator<PlainObject> Base;
93  enum { Flags = Base::Flags | EvalBeforeNestingBit };
94 
95  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
96  : m_result(xpr.rows(), xpr.cols()) {
97  internal::construct_at<Base>(this, m_result);
98 
99  // FIXME shall we handle nested_eval here?,
100  // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in
101  // permutation_matrix_product, transposition_matrix_product, etc.)
102  // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
103  // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
104  // typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
105  // typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
106  //
107  // const LhsNested lhs(xpr.lhs());
108  // const RhsNested rhs(xpr.rhs());
109  //
110  // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
111 
112  generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
113  }
114 
115  protected:
116  PlainObject m_result;
117 };
118 
119 // The following three shortcuts are enabled only if the scalar types match exactly.
120 // TODO: we could enable them for different scalar types when the product is not vectorized.
121 
122 // Dense = Product
123 template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
124 struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::assign_op<Scalar, Scalar>, Dense2Dense,
125  std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
126  typedef Product<Lhs, Rhs, Options> SrcXprType;
127  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
128  const internal::assign_op<Scalar, Scalar>&) {
129  Index dstRows = src.rows();
130  Index dstCols = src.cols();
131  if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
132  // FIXME shall we handle nested_eval here?
133  generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
134  }
135 };
136 
137 // Dense += Product
138 template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
139 struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::add_assign_op<Scalar, Scalar>, Dense2Dense,
140  std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
141  typedef Product<Lhs, Rhs, Options> SrcXprType;
142  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
143  const internal::add_assign_op<Scalar, Scalar>&) {
144  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
145  // FIXME shall we handle nested_eval here?
146  generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
147  }
148 };
149 
150 // Dense -= Product
151 template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
152 struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::sub_assign_op<Scalar, Scalar>, Dense2Dense,
153  std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
154  typedef Product<Lhs, Rhs, Options> SrcXprType;
155  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
156  const internal::sub_assign_op<Scalar, Scalar>&) {
157  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
158  // FIXME shall we handle nested_eval here?
159  generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
160  }
161 };
162 
163 // Dense ?= scalar * Product
164 // TODO we should apply that rule if that's really helpful
165 // for instance, this is not good for inner products
166 template <typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis,
167  typename Plain>
168 struct Assignment<DstXprType,
169  CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
170  const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
171  const Product<Lhs, Rhs, DefaultProduct>>,
172  AssignFunc, Dense2Dense> {
173  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
174  const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
175  const Product<Lhs, Rhs, DefaultProduct>>
176  SrcXprType;
177  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
178  const AssignFunc& func) {
179  call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs()) * src.rhs().rhs(), func);
180  }
181 };
182 
183 //----------------------------------------
184 // Catch "Dense ?= xpr + Product<>" expression to save one temporary
185 // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
186 
187 template <typename OtherXpr, typename Lhs, typename Rhs>
188 struct evaluator_assume_aliasing<
189  CwiseBinaryOp<
190  internal::scalar_sum_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
191  const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
192  DenseShape> {
193  static const bool value = true;
194 };
195 
196 template <typename OtherXpr, typename Lhs, typename Rhs>
197 struct evaluator_assume_aliasing<
198  CwiseBinaryOp<
199  internal::scalar_difference_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
200  const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
201  DenseShape> {
202  static const bool value = true;
203 };
204 
205 template <typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
206 struct assignment_from_xpr_op_product {
207  template <typename SrcXprType, typename InitialFunc>
208  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
209  const InitialFunc& /*func*/) {
210  call_assignment_no_alias(dst, src.lhs(), Func1());
211  call_assignment_no_alias(dst, src.rhs(), Func2());
212  }
213 };
214 
215 #define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP, BINOP, ASSIGN_OP2) \
216  template <typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, \
217  typename SrcScalar, typename OtherScalar, typename ProdScalar> \
218  struct Assignment<DstXprType, \
219  CwiseBinaryOp<internal::BINOP<OtherScalar, ProdScalar>, const OtherXpr, \
220  const Product<Lhs, Rhs, DefaultProduct>>, \
221  internal::ASSIGN_OP<DstScalar, SrcScalar>, Dense2Dense> \
222  : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs, Rhs, DefaultProduct>, \
223  internal::ASSIGN_OP<DstScalar, OtherScalar>, \
224  internal::ASSIGN_OP2<DstScalar, ProdScalar>> {}
225 
226 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op, add_assign_op);
227 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_sum_op, add_assign_op);
228 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_sum_op, sub_assign_op);
229 
230 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op, sub_assign_op);
231 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_difference_op, sub_assign_op);
232 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_difference_op, add_assign_op);
233 
234 //----------------------------------------
235 
236 template <typename Lhs, typename Rhs>
237 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, InnerProduct> {
238  using impl = default_inner_product_impl<Lhs, Rhs, false>;
239  template <typename Dst>
240  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
241  dst.coeffRef(0, 0) = impl::run(lhs, rhs);
242  }
243 
244  template <typename Dst>
245  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
246  dst.coeffRef(0, 0) += impl::run(lhs, rhs);
247  }
248 
249  template <typename Dst>
250  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
251  dst.coeffRef(0, 0) -= impl::run(lhs, rhs);
252  }
253 };
254 
255 /***********************************************************************
256  * Implementation of outer dense * dense vector product
257  ***********************************************************************/
258 
259 // Column major result
260 template <typename Dst, typename Lhs, typename Rhs, typename Func>
261 void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func,
262  const false_type&) {
263  evaluator<Rhs> rhsEval(rhs);
264  ei_declare_local_nested_eval(Lhs, lhs, Rhs::SizeAtCompileTime, actual_lhs);
265  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
266  // FIXME not very good if rhs is real and lhs complex while alpha is real too
267  const Index cols = dst.cols();
268  for (Index j = 0; j < cols; ++j) func(dst.col(j), rhsEval.coeff(Index(0), j) * actual_lhs);
269 }
270 
271 // Row major result
272 template <typename Dst, typename Lhs, typename Rhs, typename Func>
273 void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func,
274  const true_type&) {
275  evaluator<Lhs> lhsEval(lhs);
276  ei_declare_local_nested_eval(Rhs, rhs, Lhs::SizeAtCompileTime, actual_rhs);
277  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
278  // FIXME not very good if lhs is real and rhs complex while alpha is real too
279  const Index rows = dst.rows();
280  for (Index i = 0; i < rows; ++i) func(dst.row(i), lhsEval.coeff(i, Index(0)) * actual_rhs);
281 }
282 
283 template <typename Lhs, typename Rhs>
284 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, OuterProduct> {
285  template <typename T>
286  struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {};
287  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
288 
289  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
290  struct set {
291  template <typename Dst, typename Src>
292  EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
293  dst.const_cast_derived() = src;
294  }
295  };
296  struct add {
298  template <typename Dst, typename Src>
299  EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
300  dst.const_cast_derived() += src;
301  }
302  };
303  struct sub {
304  template <typename Dst, typename Src>
305  EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
306  dst.const_cast_derived() -= src;
307  }
308  };
310  struct adds {
311  Scalar m_scale;
313  explicit adds(const Scalar& s) : m_scale(s) {}
315  template <typename Dst, typename Src>
316  void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
317  dst.const_cast_derived() += m_scale * src;
318  }
319  };
320 
321  template <typename Dst>
322  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
323  internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
324  }
325 
326  template <typename Dst>
327  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
328  internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
329  }
330 
331  template <typename Dst>
332  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
333  internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
334  }
335 
336  template <typename Dst>
337  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
338  const Scalar& alpha) {
339  internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
340  }
341 };
342 
343 // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
344 template <typename Lhs, typename Rhs, typename Derived>
345 struct generic_product_impl_base {
346  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
347 
348  template <typename Dst>
349  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
350  dst.setZero();
351  scaleAndAddTo(dst, lhs, rhs, Scalar(1));
352  }
353 
354  template <typename Dst>
355  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
356  scaleAndAddTo(dst, lhs, rhs, Scalar(1));
357  }
358 
359  template <typename Dst>
360  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
361  scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
362  }
363 
364  template <typename Dst>
365  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
366  const Scalar& alpha) {
367  Derived::scaleAndAddTo(dst, lhs, rhs, alpha);
368  }
369 };
370 
371 template <typename Lhs, typename Rhs>
372 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>
373  : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>> {
374  typedef typename nested_eval<Lhs, 1>::type LhsNested;
375  typedef typename nested_eval<Rhs, 1>::type RhsNested;
376  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
377  enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
378  typedef internal::remove_all_t<std::conditional_t<int(Side) == OnTheRight, LhsNested, RhsNested>> MatrixType;
379 
380  template <typename Dest>
381  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
382  const Scalar& alpha) {
383  // Fallback to inner product if both the lhs and rhs is a runtime vector.
384  if (lhs.rows() == 1 && rhs.cols() == 1) {
385  dst.coeffRef(0, 0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
386  return;
387  }
388  LhsNested actual_lhs(lhs);
389  RhsNested actual_rhs(rhs);
390  internal::gemv_dense_selector<Side, (int(MatrixType::Flags) & RowMajorBit) ? RowMajor : ColMajor,
391  bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(actual_lhs,
392  actual_rhs, dst,
393  alpha);
394  }
395 };
396 
397 template <typename Lhs, typename Rhs>
398 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {
399  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
400 
401  template <typename Dst>
402  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
403  // Same as: dst.noalias() = lhs.lazyProduct(rhs);
404  // but easier on the compiler side
405  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar, Scalar>());
406  }
407 
408  template <typename Dst>
409  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
410  // dst.noalias() += lhs.lazyProduct(rhs);
411  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar, Scalar>());
412  }
413 
414  template <typename Dst>
415  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
416  // dst.noalias() -= lhs.lazyProduct(rhs);
417  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar, Scalar>());
418  }
419 
420  // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
421  // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
422  // dst {,+,-}= (s1*A)*(B*s2)
423  // will be rewritten as:
424  // dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
425  // There are at least four benefits of doing so:
426  // 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
427  // 2 - it is faster than simply by-passing the heap allocation through stack allocation.
428  // 3 - it makes this fallback consistent with the heavy GEMM routine.
429  // 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
430  // (see https://stackoverflow.com/questions/54738495)
431  // For small fixed sizes matrices, however, the gains are less obvious, it is sometimes x2 faster, but sometimes x3
432  // slower, and the behavior depends also a lot on the compiler... This is why this re-writing strategy is currently
433  // enabled only when falling back from the main GEMM.
434  template <typename Dst, typename Func>
435  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs,
436  const Func& func) {
437  enum {
438  HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
439  ConjLhs = blas_traits<Lhs>::NeedToConjugate,
440  ConjRhs = blas_traits<Rhs>::NeedToConjugate
441  };
442  // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
443  // this is important for real*complex_mat
444  Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
445 
446  eval_dynamic_impl(dst, blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
447  blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(), func, actualAlpha,
448  bool_constant<HasScalarFactor>());
449  }
450 
451  protected:
452  template <typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
453  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs,
454  const Func& func, const Scalar& s /* == 1 */,
455  false_type) {
456  EIGEN_UNUSED_VARIABLE(s);
457  eigen_internal_assert(numext::is_exactly_one(s));
458  call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
459  }
460 
461  template <typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
462  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs,
463  const Func& func, const Scalar& s, true_type) {
464  call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
465  }
466 };
467 
468 // This specialization enforces the use of a coefficient-based evaluation strategy
469 template <typename Lhs, typename Rhs>
470 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, LazyCoeffBasedProductMode>
471  : generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {};
472 
473 // Case 2: Evaluate coeff by coeff
474 //
475 // This is mostly taken from CoeffBasedProduct.h
476 // The main difference is that we add an extra argument to the etor_product_*_impl::run() function
477 // for the inner dimension of the product, because evaluator object do not know their size.
478 
479 template <int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
480 struct etor_product_coeff_impl;
481 
482 template <int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
483 struct etor_product_packet_impl;
484 
485 template <typename Lhs, typename Rhs, int ProductTag>
486 struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
487  : evaluator_base<Product<Lhs, Rhs, LazyProduct>> {
488  typedef Product<Lhs, Rhs, LazyProduct> XprType;
489  typedef typename XprType::Scalar Scalar;
490  typedef typename XprType::CoeffReturnType CoeffReturnType;
491 
492  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
493  : m_lhs(xpr.lhs()),
494  m_rhs(xpr.rhs()),
495  m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
496  m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable
497  // them when not needed, or perhaps declare them on the fly on the packet method... We
498  // have experiment to check what's best.
499  m_innerDim(xpr.lhs().cols()) {
500  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
501  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
502  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
503 #if 0
504  std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
505  std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
506  std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
507  std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
508  std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
509  std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
510  std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
511  std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
512  std::cerr << "Alignment= " << Alignment << "\n";
513  std::cerr << "Flags= " << Flags << "\n";
514 #endif
515  }
516 
517  // Everything below here is taken from CoeffBasedProduct.h
518 
519  typedef typename internal::nested_eval<Lhs, Rhs::ColsAtCompileTime>::type LhsNested;
520  typedef typename internal::nested_eval<Rhs, Lhs::RowsAtCompileTime>::type RhsNested;
521 
522  typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
523  typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
524 
525  typedef evaluator<LhsNestedCleaned> LhsEtorType;
526  typedef evaluator<RhsNestedCleaned> RhsEtorType;
527 
528  enum {
529  RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
530  ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
531  InnerSize = min_size_prefer_fixed(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
532  MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
533  MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
534  };
535 
536  typedef typename find_best_packet<Scalar, RowsAtCompileTime>::type LhsVecPacketType;
537  typedef typename find_best_packet<Scalar, ColsAtCompileTime>::type RhsVecPacketType;
538 
539  enum {
540 
541  LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
542  RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
543  CoeffReadCost = InnerSize == 0 ? NumTraits<Scalar>::ReadCost
544  : InnerSize == Dynamic
545  ? HugeCost
546  : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) +
547  (InnerSize - 1) * NumTraits<Scalar>::AddCost,
548 
549  Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
550 
551  LhsFlags = LhsEtorType::Flags,
552  RhsFlags = RhsEtorType::Flags,
553 
554  LhsRowMajor = LhsFlags & RowMajorBit,
555  RhsRowMajor = RhsFlags & RowMajorBit,
556 
557  LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
558  RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
559 
560  // Here, we don't care about alignment larger than the usable packet size.
561  LhsAlignment =
562  plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar))),
563  RhsAlignment =
564  plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar))),
565 
566  SameType = is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value,
567 
568  CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1),
569  CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1),
570 
571  EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1
572  : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1)
573  ? 0
574  : (bool(RhsRowMajor) && !CanVectorizeLhs),
575 
576  Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) |
577  (EvalToRowMajor ? RowMajorBit : 0)
578  // TODO enable vectorization for mixed types
579  | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
580  (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
581 
582  LhsOuterStrideBytes =
583  int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
584  RhsOuterStrideBytes =
585  int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
586 
587  Alignment = bool(CanVectorizeLhs)
588  ? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0
589  ? 0
590  : LhsAlignment)
591  : bool(CanVectorizeRhs)
592  ? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0
593  ? 0
594  : RhsAlignment)
595  : 0,
596 
597  /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
598  * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
599  * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
600  * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
601  */
602  CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) &&
603  (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) &&
604  (int(InnerSize) % packet_traits<Scalar>::size == 0)
605  };
606 
607  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const {
608  return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
609  }
610 
611  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
612  * which is why we don't set the LinearAccessBit.
613  * TODO: this seems possible when the result is a vector
614  */
615  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
616  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
617  const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
618  return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
619  }
620 
621  template <int LoadMode, typename PacketType>
622  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index row, Index col) const {
623  PacketType res;
624  typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
625  Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
626  PacketImpl;
627  PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
628  return res;
629  }
630 
631  template <int LoadMode, typename PacketType>
632  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index index) const {
633  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
634  const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
635  return packet<LoadMode, PacketType>(row, col);
636  }
637 
638  template <int LoadMode, typename PacketType>
639  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin,
640  Index count) const {
641  PacketType res;
642  typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
643  Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
644  PacketImpl;
645  PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count);
646  return res;
647  }
648 
649  template <int LoadMode, typename PacketType>
650  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index index, Index begin, Index count) const {
651  const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
652  const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
653  return packetSegment<LoadMode, PacketType>(row, col, begin, count);
654  }
655 
656  protected:
657  add_const_on_value_type_t<LhsNested> m_lhs;
658  add_const_on_value_type_t<RhsNested> m_rhs;
659 
660  LhsEtorType m_lhsImpl;
661  RhsEtorType m_rhsImpl;
662 
663  // TODO: Get rid of m_innerDim if known at compile time
664  Index m_innerDim;
665 };
666 
667 template <typename Lhs, typename Rhs>
668 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
669  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape> {
670  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
671  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
672  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
673  enum { Flags = Base::Flags | EvalBeforeNestingBit };
674  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
675  : Base(BaseProduct(xpr.lhs(), xpr.rhs())) {}
676 };
677 
678 /****************************************
679 *** Coeff based product, Packet path ***
680 ****************************************/
681 
682 template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
683 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> {
684  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
685  Index innerDim, Packet& res) {
686  etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs,
687  innerDim, res);
688  res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
689  rhs.template packet<LoadMode, Packet>(Index(UnrollingIndex - 1), col), res);
690  }
691  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
692  Index innerDim, Packet& res, Index begin, Index count) {
693  etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
694  row, col, lhs, rhs, innerDim, res, begin, count);
695  res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
696  rhs.template packetSegment<LoadMode, Packet>(Index(UnrollingIndex - 1), col, begin, count), res);
697  }
698 };
699 
700 template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
701 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> {
702  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
703  Index innerDim, Packet& res) {
704  etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs,
705  innerDim, res);
706  res = pmadd(lhs.template packet<LoadMode, Packet>(row, Index(UnrollingIndex - 1)),
707  pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
708  }
709  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
710  Index innerDim, Packet& res, Index begin, Index count) {
711  etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
712  row, col, lhs, rhs, innerDim, res, begin, count);
713  res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, Index(UnrollingIndex - 1), begin, count),
714  pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
715  }
716 };
717 
718 template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
719 struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> {
720  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
721  Index /*innerDim*/, Packet& res) {
722  res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))), rhs.template packet<LoadMode, Packet>(Index(0), col));
723  }
724  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
725  Index /*innerDim*/, Packet& res, Index begin,
726  Index count) {
727  res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),
728  rhs.template packetSegment<LoadMode, Packet>(Index(0), col, begin, count));
729  }
730 };
731 
732 template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
733 struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> {
734  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
735  Index /*innerDim*/, Packet& res) {
736  res = pmul(lhs.template packet<LoadMode, Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
737  }
738  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
739  Index /*innerDim*/, Packet& res, Index begin,
740  Index count) {
741  res = pmul(lhs.template packetSegment<LoadMode, Packet>(row, Index(0), begin, count),
742  pset1<Packet>(rhs.coeff(Index(0), col)));
743  }
744 };
745 
746 template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
747 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> {
748  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
749  const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
750  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
751  }
752  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
753  const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
754  Index /*begin*/, Index /*count*/) {
755  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
756  }
757 };
758 
759 template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
760 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> {
761  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
762  const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
763  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
764  }
765  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
766  const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
767  Index /*begin*/, Index /*count*/) {
768  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
769  }
770 };
771 
772 template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
773 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
774  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
775  Index innerDim, Packet& res) {
776  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
777  for (Index i = 0; i < innerDim; ++i)
778  res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode, Packet>(i, col), res);
779  }
780  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
781  Index innerDim, Packet& res, Index begin, Index count) {
782  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
783  for (Index i = 0; i < innerDim; ++i)
784  res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packetSegment<LoadMode, Packet>(i, col, begin, count),
785  res);
786  }
787 };
788 
789 template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
790 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
791  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
792  Index innerDim, Packet& res) {
793  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
794  for (Index i = 0; i < innerDim; ++i)
795  res = pmadd(lhs.template packet<LoadMode, Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
796  }
797  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
798  Index innerDim, Packet& res, Index begin, Index count) {
799  res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
800  for (Index i = 0; i < innerDim; ++i)
801  res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, i, begin, count), pset1<Packet>(rhs.coeff(i, col)),
802  res);
803  }
804 };
805 
806 /***************************************************************************
807  * Triangular products
808  ***************************************************************************/
809 template <int Mode, bool LhsIsTriangular, typename Lhs, bool LhsIsVector, typename Rhs, bool RhsIsVector>
810 struct triangular_product_impl;
811 
812 template <typename Lhs, typename Rhs, int ProductTag>
813 struct generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>
814  : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>> {
815  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
816 
817  template <typename Dest>
818  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
819  triangular_product_impl<Lhs::Mode, true, typename Lhs::MatrixType, false, Rhs, Rhs::ColsAtCompileTime == 1>::run(
820  dst, lhs.nestedExpression(), rhs, alpha);
821  }
822 };
823 
824 template <typename Lhs, typename Rhs, int ProductTag>
825 struct generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>
826  : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>> {
827  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
828 
829  template <typename Dest>
830  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
831  triangular_product_impl<Rhs::Mode, false, Lhs, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, false>::run(
832  dst, lhs, rhs.nestedExpression(), alpha);
833  }
834 };
835 
836 /***************************************************************************
837  * SelfAdjoint products
838  ***************************************************************************/
839 template <typename Lhs, int LhsMode, bool LhsIsVector, typename Rhs, int RhsMode, bool RhsIsVector>
840 struct selfadjoint_product_impl;
841 
842 template <typename Lhs, typename Rhs, int ProductTag>
843 struct generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>
844  : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>> {
845  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
846 
847  template <typename Dest>
848  static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
849  selfadjoint_product_impl<typename Lhs::MatrixType, Lhs::Mode, false, Rhs, 0, Rhs::ColsAtCompileTime == 1>::run(
850  dst, lhs.nestedExpression(), rhs, alpha);
851  }
852 };
853 
854 template <typename Lhs, typename Rhs, int ProductTag>
855 struct generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>
856  : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>> {
857  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
858 
859  template <typename Dest>
860  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
861  selfadjoint_product_impl<Lhs, 0, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, Rhs::Mode, false>::run(
862  dst, lhs, rhs.nestedExpression(), alpha);
863  }
864 };
865 
866 /***************************************************************************
867  * Diagonal products
868  ***************************************************************************/
869 
870 template <typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
871 struct diagonal_product_evaluator_base : evaluator_base<Derived> {
872  typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
873 
874  public:
875  enum {
876  CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) +
877  int(evaluator<DiagonalType>::CoeffReadCost),
878 
879  MatrixFlags = evaluator<MatrixType>::Flags,
880  DiagFlags = evaluator<DiagonalType>::Flags,
881 
882  StorageOrder_ = (Derived::MaxRowsAtCompileTime == 1 && Derived::MaxColsAtCompileTime != 1) ? RowMajor
883  : (Derived::MaxColsAtCompileTime == 1 && Derived::MaxRowsAtCompileTime != 1) ? ColMajor
884  : MatrixFlags & RowMajorBit ? RowMajor
885  : ColMajor,
886  SameStorageOrder_ = int(StorageOrder_) == ((MatrixFlags & RowMajorBit) ? RowMajor : ColMajor),
887 
888  ScalarAccessOnDiag_ = !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft) ||
889  (int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)),
890  SameTypes_ = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
891  // FIXME currently we need same types, but in the future the next rule should be the one
892  // Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ &&
893  // bool(int(DiagFlags)&PacketAccessBit))),
894  Vectorizable_ = bool(int(MatrixFlags) & PacketAccessBit) && SameTypes_ &&
895  (SameStorageOrder_ || (MatrixFlags & LinearAccessBit) == LinearAccessBit) &&
896  (ScalarAccessOnDiag_ || (bool(int(DiagFlags) & PacketAccessBit))),
897  LinearAccessMask_ =
898  (MatrixType::RowsAtCompileTime == 1 || MatrixType::ColsAtCompileTime == 1) ? LinearAccessBit : 0,
899  Flags =
900  ((HereditaryBits | LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
901  Alignment = evaluator<MatrixType>::Alignment,
902 
903  AsScalarProduct =
904  (DiagonalType::SizeAtCompileTime == 1) ||
905  (DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::RowsAtCompileTime == 1 &&
906  ProductOrder == OnTheLeft) ||
907  (DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::ColsAtCompileTime == 1 && ProductOrder == OnTheRight)
908  };
909 
910  EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType& mat, const DiagonalType& diag)
911  : m_diagImpl(diag), m_matImpl(mat) {
912  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
913  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
914  }
915 
916  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const {
917  if (AsScalarProduct)
918  return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
919  else
920  return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
921  }
922 
923  protected:
924  template <int LoadMode, typename PacketType>
925  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const {
926  return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
927  internal::pset1<PacketType>(m_diagImpl.coeff(id)));
928  }
929 
930  template <int LoadMode, typename PacketType>
931  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const {
932  enum {
933  InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
934  DiagonalPacketLoadMode = plain_enum_min(
935  LoadMode,
936  ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
937  };
938  return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
939  m_diagImpl.template packet<DiagonalPacketLoadMode, PacketType>(id));
940  }
941 
942  template <int LoadMode, typename PacketType>
943  EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
944  internal::true_type) const {
945  return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
946  internal::pset1<PacketType>(m_diagImpl.coeff(id)));
947  }
948 
949  template <int LoadMode, typename PacketType>
950  EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
951  internal::false_type) const {
952  enum {
953  InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
954  DiagonalPacketLoadMode = plain_enum_min(
955  LoadMode,
956  ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
957  };
958  return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
959  m_diagImpl.template packetSegment<DiagonalPacketLoadMode, PacketType>(id, begin, count));
960  }
961 
962  evaluator<DiagonalType> m_diagImpl;
963  evaluator<MatrixType> m_matImpl;
964 };
965 
966 // diagonal * dense
967 template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
968 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
969  : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
970  OnTheLeft> {
971  typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
972  OnTheLeft>
973  Base;
974  using Base::coeff;
975  using Base::m_diagImpl;
976  using Base::m_matImpl;
977  typedef typename Base::Scalar Scalar;
978 
979  typedef Product<Lhs, Rhs, ProductKind> XprType;
980  typedef typename XprType::PlainObject PlainObject;
981  typedef typename Lhs::DiagonalVectorType DiagonalType;
982 
983  static constexpr int StorageOrder = Base::StorageOrder_;
984  using IsRowMajor_t = bool_constant<StorageOrder == RowMajor>;
985 
986  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
987 
988  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
989  return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
990  }
991 
992 #ifndef EIGEN_GPUCC
993  template <int LoadMode, typename PacketType>
994  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
995  // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
996  // See also similar calls below.
997  return this->template packet_impl<LoadMode, PacketType>(row, col, row, IsRowMajor_t());
998  }
999 
1000  template <int LoadMode, typename PacketType>
1001  EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
1002  return packet<LoadMode, PacketType>(int(StorageOrder) == ColMajor ? idx : 0,
1003  int(StorageOrder) == ColMajor ? 0 : idx);
1004  }
1005 
1006  template <int LoadMode, typename PacketType>
1007  EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
1008  // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
1009  // See also similar calls below.
1010  return this->template packet_segment_impl<LoadMode, PacketType>(row, col, row, begin, count, IsRowMajor_t());
1011  }
1012 
1013  template <int LoadMode, typename PacketType>
1014  EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
1015  return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
1016  begin, count);
1017  }
1018 #endif
1019 };
1020 
1021 // dense * diagonal
1022 template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
1023 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
1024  : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
1025  OnTheRight> {
1026  typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
1027  OnTheRight>
1028  Base;
1029  using Base::coeff;
1030  using Base::m_diagImpl;
1031  using Base::m_matImpl;
1032  typedef typename Base::Scalar Scalar;
1033 
1034  typedef Product<Lhs, Rhs, ProductKind> XprType;
1035  typedef typename XprType::PlainObject PlainObject;
1036 
1037  static constexpr int StorageOrder = Base::StorageOrder_;
1038  using IsColMajor_t = bool_constant<StorageOrder == ColMajor>;
1039 
1040  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}
1041 
1042  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
1043  return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
1044  }
1045 
1046 #ifndef EIGEN_GPUCC
1047  template <int LoadMode, typename PacketType>
1048  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
1049  return this->template packet_impl<LoadMode, PacketType>(row, col, col, IsColMajor_t());
1050  }
1051 
1052  template <int LoadMode, typename PacketType>
1053  EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
1054  return packet<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx);
1055  }
1056 
1057  template <int LoadMode, typename PacketType>
1058  EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
1059  return this->template packet_segment_impl<LoadMode, PacketType>(row, col, col, begin, count, IsColMajor_t());
1060  }
1061 
1062  template <int LoadMode, typename PacketType>
1063  EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
1064  return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
1065  begin, count);
1066  }
1067 #endif
1068 };
1069 
1070 /***************************************************************************
1071  * Products with permutation matrices
1072  ***************************************************************************/
1073 
1079 template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1080 struct permutation_matrix_product;
1081 
1082 template <typename ExpressionType, int Side, bool Transposed>
1083 struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape> {
1084  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1085  typedef remove_all_t<MatrixType> MatrixTypeCleaned;
1086 
1087  template <typename Dest, typename PermutationType>
1088  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm,
1089  const ExpressionType& xpr) {
1090  MatrixType mat(xpr);
1091  const Index n = Side == OnTheLeft ? mat.rows() : mat.cols();
1092  // FIXME we need an is_same for expression that is not sensitive to constness. For instance
1093  // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
1094  // if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
1095  if (is_same_dense(dst, mat)) {
1096  // apply the permutation inplace
1097  Matrix<bool, PermutationType::RowsAtCompileTime, 1, 0, PermutationType::MaxRowsAtCompileTime> mask(perm.size());
1098  mask.fill(false);
1099  Index r = 0;
1100  while (r < perm.size()) {
1101  // search for the next seed
1102  while (r < perm.size() && mask[r]) r++;
1103  if (r >= perm.size()) break;
1104  // we got one, let's follow it until we are back to the seed
1105  Index k0 = r++;
1106  Index kPrev = k0;
1107  mask.coeffRef(k0) = true;
1108  for (Index k = perm.indices().coeff(k0); k != k0; k = perm.indices().coeff(k)) {
1109  Block<Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime,
1110  Side == OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
1111  .swap(Block < Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime,
1112  Side == OnTheRight
1113  ? 1
1114  : Dest::ColsAtCompileTime > (dst, ((Side == OnTheLeft) ^ Transposed) ? k0 : kPrev));
1115 
1116  mask.coeffRef(k) = true;
1117  kPrev = k;
1118  }
1119  }
1120  } else {
1121  for (Index i = 0; i < n; ++i) {
1122  Block<Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side == OnTheRight ? 1 : Dest::ColsAtCompileTime>(
1123  dst, ((Side == OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
1124 
1125  =
1126 
1127  Block < const MatrixTypeCleaned,
1128  Side == OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,
1129  Side == OnTheRight ? 1
1130  : MatrixTypeCleaned::ColsAtCompileTime >
1131  (mat, ((Side == OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
1132  }
1133  }
1134  }
1135 };
1136 
1137 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1138 struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag> {
1139  template <typename Dest>
1140  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1141  permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1142  }
1143 };
1144 
1145 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1146 struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag> {
1147  template <typename Dest>
1148  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1149  permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1150  }
1151 };
1152 
1153 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1154 struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag> {
1155  template <typename Dest>
1156  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs) {
1157  permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1158  }
1159 };
1160 
1161 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1162 struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag> {
1163  template <typename Dest>
1164  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs) {
1165  permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1166  }
1167 };
1168 
1169 /***************************************************************************
1170  * Products with transpositions matrices
1171  ***************************************************************************/
1172 
1173 // FIXME could we unify Transpositions and Permutation into a single "shape"??
1174 
1179 template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1180 struct transposition_matrix_product {
1181  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1182  typedef remove_all_t<MatrixType> MatrixTypeCleaned;
1183 
1184  template <typename Dest, typename TranspositionType>
1185  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr,
1186  const ExpressionType& xpr) {
1187  MatrixType mat(xpr);
1188  typedef typename TranspositionType::StorageIndex StorageIndex;
1189  const Index size = tr.size();
1190  StorageIndex j = 0;
1191 
1192  if (!is_same_dense(dst, mat)) dst = mat;
1193 
1194  for (Index k = (Transposed ? size - 1 : 0); Transposed ? k >= 0 : k < size; Transposed ? --k : ++k)
1195  if (Index(j = tr.coeff(k)) != k) {
1196  if (Side == OnTheLeft)
1197  dst.row(k).swap(dst.row(j));
1198  else if (Side == OnTheRight)
1199  dst.col(k).swap(dst.col(j));
1200  }
1201  }
1202 };
1203 
1204 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1205 struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag> {
1206  template <typename Dest>
1207  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1208  transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1209  }
1210 };
1211 
1212 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1213 struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag> {
1214  template <typename Dest>
1215  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1216  transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1217  }
1218 };
1219 
1220 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1221 struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag> {
1222  template <typename Dest>
1223  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) {
1224  transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1225  }
1226 };
1227 
1228 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1229 struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag> {
1230  template <typename Dest>
1231  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) {
1232  transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1233  }
1234 };
1235 
1236 /***************************************************************************
1237  * skew symmetric products
1238  * for now we just call the generic implementation
1239  ***************************************************************************/
1240 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1241 struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, MatrixShape, ProductTag> {
1242  template <typename Dest>
1243  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1244  generic_product_impl<typename Lhs::DenseMatrixType, Rhs, DenseShape, MatrixShape, ProductTag>::evalTo(dst, lhs,
1245  rhs);
1246  }
1247 };
1248 
1249 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1250 struct generic_product_impl<Lhs, Rhs, MatrixShape, SkewSymmetricShape, ProductTag> {
1251  template <typename Dest>
1252  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1253  generic_product_impl<Lhs, typename Rhs::DenseMatrixType, MatrixShape, DenseShape, ProductTag>::evalTo(dst, lhs,
1254  rhs);
1255  }
1256 };
1257 
1258 template <typename Lhs, typename Rhs, int ProductTag>
1259 struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, SkewSymmetricShape, ProductTag> {
1260  template <typename Dest>
1261  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1262  generic_product_impl<typename Lhs::DenseMatrixType, typename Rhs::DenseMatrixType, DenseShape, DenseShape,
1263  ProductTag>::evalTo(dst, lhs, rhs);
1264  }
1265 };
1266 
1267 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1268 struct generic_product_impl<Lhs, Rhs, MatrixShape, HomogeneousShape, ProductTag>
1269  : generic_product_impl<Lhs, typename Rhs::PlainObject, MatrixShape, DenseShape, ProductTag> {};
1270 
1271 template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1272 struct generic_product_impl<Lhs, Rhs, HomogeneousShape, MatrixShape, ProductTag>
1273  : generic_product_impl<typename Lhs::PlainObject, Rhs, DenseShape, MatrixShape, ProductTag> {};
1274 
1275 template <typename Lhs, typename Rhs, int ProductTag>
1276 struct generic_product_impl<Lhs, Rhs, PermutationShape, HomogeneousShape, ProductTag>
1277  : generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag> {};
1278 
1279 template <typename Lhs, typename Rhs, int ProductTag>
1280 struct generic_product_impl<Lhs, Rhs, HomogeneousShape, PermutationShape, ProductTag>
1281  : generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag> {};
1282 
1283 } // end namespace internal
1284 
1285 } // end namespace Eigen
1286 
1287 #endif // EIGEN_PRODUCT_EVALUATORS_H
Definition: Constants.h:318
const int HugeCost
Definition: Constants.h:48
Definition: Constants.h:333
Definition: Constants.h:237
Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1
const unsigned int RowMajorBit
Definition: Constants.h:70
const unsigned int PacketAccessBit
Definition: Constants.h:97
void operator()(const Dst &dst, const Src &src) const
Definition: ProductEvaluators.h:316
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
std::enable_if_t< std::is_base_of< DenseBase< std::decay_t< DerivedA > >, std::decay_t< DerivedA > >::value &&std::is_base_of< DenseBase< std::decay_t< DerivedB > >, std::decay_t< DerivedB > >::value, void > swap(DerivedA &&a, DerivedB &&b)
Definition: DenseBase.h:667
Definition: Constants.h:331
Definition: Constants.h:320
const int Dynamic
Definition: Constants.h:25
const unsigned int EvalBeforeNestingBit
Definition: Constants.h:74
const unsigned int ActualPacketAccessBit
Definition: Constants.h:108
const unsigned int LinearAccessBit
Definition: Constants.h:133