$darkmode
Eigen  5.0.1-dev
PartialReduxEvaluator.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_PARTIALREDUX_H
11 #define EIGEN_PARTIALREDUX_H
12 
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
20 /***************************************************************************
21  *
22  * This file provides evaluators for partial reductions.
23  * There are two modes:
24  *
25  * - scalar path: simply calls the respective function on the column or row.
26  * -> nothing special here, all the tricky part is handled by the return
27  * types of VectorwiseOp's members. They embed the functor calling the
28  * respective DenseBase's member function.
29  *
30  * - vectorized path: implements a packet-wise reductions followed by
31  * some (optional) processing of the outcome, e.g., division by n for mean.
32  *
33  * For the vectorized path let's observe that the packet-size and outer-unrolling
34  * are both decided by the assignment logic. So all we have to do is to decide
35  * on the inner unrolling.
36  *
37  * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
38  * but be need to be careful to specify correct increment.
39  *
40  ***************************************************************************/
41 
42 /* logic deciding a strategy for unrolling of vectorized paths */
43 template <typename Func, typename Evaluator>
44 struct packetwise_redux_traits {
45  enum {
46  OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,
47  Cost = OuterSize == Dynamic ? HugeCost
48  : OuterSize * Evaluator::CoeffReadCost + (OuterSize - 1) * functor_traits<Func>::Cost,
49  Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling
50  };
51 };
52 
53 /* Value to be returned when size==0 , by default let's return 0 */
54 template <typename PacketType, typename Func>
55 EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const Func&) {
56  const typename unpacket_traits<PacketType>::type zero(0);
57  return pset1<PacketType>(zero);
58 }
59 
60 /* For products the default is 1 */
61 template <typename PacketType, typename Scalar>
62 EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar, Scalar>&) {
63  return pset1<PacketType>(Scalar(1));
64 }
65 
66 /* Perform the actual reduction */
67 template <typename Func, typename Evaluator, int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling>
68 struct packetwise_redux_impl;
69 
70 /* Perform the actual reduction with unrolling */
71 template <typename Func, typename Evaluator>
72 struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling> {
73  typedef redux_novec_unroller<Func, Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
74  typedef typename Evaluator::Scalar Scalar;
75 
76  template <typename PacketType>
77  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func, Index /*size*/) {
78  return redux_vec_unroller<Func, Evaluator, 0,
79  packetwise_redux_traits<Func, Evaluator>::OuterSize>::template run<PacketType>(eval,
80  func);
81  }
82 };
83 
84 /* Add a specialization of redux_vec_unroller for size==0 at compiletime.
85  * This specialization is not required for general reductions, which is
86  * why it is defined here.
87  */
88 template <typename Func, typename Evaluator, Index Start>
89 struct redux_vec_unroller<Func, Evaluator, Start, 0> {
90  template <typename PacketType>
91  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator&, const Func& f) {
92  return packetwise_redux_empty_value<PacketType>(f);
93  }
94 };
95 
96 /* Perform the actual reduction for dynamic sizes */
97 template <typename Func, typename Evaluator>
98 struct packetwise_redux_impl<Func, Evaluator, NoUnrolling> {
99  typedef typename Evaluator::Scalar Scalar;
100  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
101 
102  template <typename PacketType>
103  EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size) {
104  if (size == 0) return packetwise_redux_empty_value<PacketType>(func);
105 
106  const Index size4 = 1 + numext::round_down(size - 1, 4);
107  PacketType p = eval.template packetByOuterInner<Unaligned, PacketType>(0, 0);
108  // This loop is optimized for instruction pipelining:
109  // - each iteration generates two independent instructions
110  // - thanks to branch prediction and out-of-order execution we have independent instructions across loops
111  for (Index i = 1; i < size4; i += 4)
112  p = func.packetOp(
113  p, func.packetOp(func.packetOp(eval.template packetByOuterInner<Unaligned, PacketType>(i + 0, 0),
114  eval.template packetByOuterInner<Unaligned, PacketType>(i + 1, 0)),
115  func.packetOp(eval.template packetByOuterInner<Unaligned, PacketType>(i + 2, 0),
116  eval.template packetByOuterInner<Unaligned, PacketType>(i + 3, 0))));
117  for (Index i = size4; i < size; ++i)
118  p = func.packetOp(p, eval.template packetByOuterInner<Unaligned, PacketType>(i, 0));
119  return p;
120  }
121 };
122 
123 template <typename Func, typename Evaluator>
124 struct packetwise_segment_redux_impl {
125  typedef typename Evaluator::Scalar Scalar;
126  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
127 
128  template <typename PacketType>
129  EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size, Index begin,
130  Index count) {
131  if (size == 0) return packetwise_redux_empty_value<PacketType>(func);
132 
133  PacketType p = eval.template packetSegmentByOuterInner<Unaligned, PacketType>(0, 0, begin, count);
134  for (Index i = 1; i < size; ++i)
135  p = func.packetOp(p, eval.template packetSegmentByOuterInner<Unaligned, PacketType>(i, 0, begin, count));
136  return p;
137  }
138 };
139 
140 template <typename ArgType, typename MemberOp, int Direction>
141 struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
142  : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > {
143  typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
144  typedef typename internal::nested_eval<ArgType, 1>::type ArgTypeNested;
145  typedef add_const_on_value_type_t<ArgTypeNested> ConstArgTypeNested;
146  typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
147  typedef typename ArgType::Scalar InputScalar;
148  typedef typename XprType::Scalar Scalar;
149  enum {
150  TraversalSize = Direction == int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
151  };
152  typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;
153  enum {
154  CoeffReadCost = TraversalSize == Dynamic ? HugeCost
155  : TraversalSize == 0
156  ? 1
157  : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
158 
159  ArgFlags_ = evaluator<ArgType>::Flags,
160 
161  Vectorizable_ = bool(int(ArgFlags_) & PacketAccessBit) && bool(MemberOp::Vectorizable) &&
162  (Direction == int(Vertical) ? bool(ArgFlags_ & RowMajorBit) : (ArgFlags_ & RowMajorBit) == 0) &&
163  (TraversalSize != 0),
164 
165  Flags = (traits<XprType>::Flags & RowMajorBit) | (evaluator<ArgType>::Flags & (HereditaryBits & (~RowMajorBit))) |
166  (Vectorizable_ ? PacketAccessBit : 0) | LinearAccessBit,
167 
168  Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
169  };
170 
171  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) {
172  EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize == Dynamic ? HugeCost
173  : (TraversalSize == 0 ? 1 : int(CostOpType::value)));
174  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
175  }
176 
177  typedef typename XprType::CoeffReturnType CoeffReturnType;
178 
179  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const {
180  return coeff(Direction == Vertical ? j : i);
181  }
182 
183  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const {
184  return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));
185  }
186 
187  template <int LoadMode, typename PacketType>
188  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index i, Index j) const {
189  return packet<LoadMode, PacketType>(Direction == Vertical ? j : i);
190  }
191 
192  template <int LoadMode, typename PacketType>
193  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packet(Index idx) const {
194  static constexpr int PacketSize = internal::unpacket_traits<PacketType>::size;
195  static constexpr int PanelRows = Direction == Vertical ? ArgType::RowsAtCompileTime : PacketSize;
196  static constexpr int PanelCols = Direction == Vertical ? PacketSize : ArgType::ColsAtCompileTime;
197  using PanelType = Block<const ArgTypeNestedCleaned, PanelRows, PanelCols, true /* InnerPanel */>;
198  using PanelEvaluator = typename internal::redux_evaluator<PanelType>;
199  using BinaryOp = typename MemberOp::BinaryOp;
200  using Impl = internal::packetwise_redux_impl<BinaryOp, PanelEvaluator>;
201 
202  // FIXME
203  // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of
204  // panel get reversed and methods like packetByOuterInner do not make sense anymore in this context. So let's just
205  // by pass "vectorization" in this case:
206  if (PacketSize == 1) return internal::pset1<PacketType>(coeff(idx));
207 
208  Index startRow = Direction == Vertical ? 0 : idx;
209  Index startCol = Direction == Vertical ? idx : 0;
210  Index numRows = Direction == Vertical ? m_arg.rows() : PacketSize;
211  Index numCols = Direction == Vertical ? PacketSize : m_arg.cols();
212 
213  PanelType panel(m_arg, startRow, startCol, numRows, numCols);
214  PanelEvaluator panel_eval(panel);
215  PacketType p = Impl::template run<PacketType>(panel_eval, m_functor.binaryFunc(), m_arg.outerSize());
216  return p;
217  }
218 
219  template <int LoadMode, typename PacketType>
220  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index i, Index j, Index begin, Index count) const {
221  return packetSegment<LoadMode, PacketType>(Direction == Vertical ? j : i, begin, count);
222  }
223 
224  template <int LoadMode, typename PacketType>
225  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packetSegment(Index idx, Index begin, Index count) const {
226  static constexpr int PanelRows = Direction == Vertical ? ArgType::RowsAtCompileTime : Dynamic;
227  static constexpr int PanelCols = Direction == Vertical ? Dynamic : ArgType::ColsAtCompileTime;
228  using PanelType = Block<const ArgTypeNestedCleaned, PanelRows, PanelCols, true /* InnerPanel */>;
229  using PanelEvaluator = typename internal::redux_evaluator<PanelType>;
230  using BinaryOp = typename MemberOp::BinaryOp;
231  using Impl = internal::packetwise_segment_redux_impl<BinaryOp, PanelEvaluator>;
232 
233  Index startRow = Direction == Vertical ? 0 : idx;
234  Index startCol = Direction == Vertical ? idx : 0;
235  Index numRows = Direction == Vertical ? m_arg.rows() : begin + count;
236  Index numCols = Direction == Vertical ? begin + count : m_arg.cols();
237 
238  PanelType panel(m_arg, startRow, startCol, numRows, numCols);
239  PanelEvaluator panel_eval(panel);
240  PacketType p = Impl::template run<PacketType>(panel_eval, m_functor.binaryFunc(), m_arg.outerSize(), begin, count);
241  return p;
242  }
243 
244  protected:
245  ConstArgTypeNested m_arg;
246  const MemberOp m_functor;
247 };
248 
249 } // end namespace internal
250 
251 } // end namespace Eigen
252 
253 #endif // EIGEN_PARTIALREDUX_H
const int HugeCost
Definition: Constants.h:48
Definition: Constants.h:266
DirectionType
Definition: Constants.h:263
Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1
const unsigned int RowMajorBit
Definition: Constants.h:70
const unsigned int PacketAccessBit
Definition: Constants.h:97
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
const int Dynamic
Definition: Constants.h:25
const unsigned int LinearAccessBit
Definition: Constants.h:133