$darkmode
Eigen-unsupported  5.0.1-dev
TensorConversion.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
12 
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 template <typename TargetType, typename XprType>
20 struct traits<TensorConversionOp<TargetType, XprType> > {
21  // Type promotion to handle the case where the types of the lhs and the rhs are different.
22  typedef TargetType Scalar;
23  typedef typename traits<XprType>::StorageKind StorageKind;
24  typedef typename traits<XprType>::Index Index;
25  typedef typename XprType::Nested Nested;
26  typedef std::remove_reference_t<Nested> Nested_;
27  static constexpr int NumDimensions = traits<XprType>::NumDimensions;
28  static constexpr int Layout = traits<XprType>::Layout;
29  enum { Flags = 0 };
30  typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
31 };
32 
33 template <typename TargetType, typename XprType>
34 struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense> {
35  typedef const TensorConversionOp<TargetType, XprType>& type;
36 };
37 
38 template <typename TargetType, typename XprType>
39 struct nested<TensorConversionOp<TargetType, XprType>, 1,
40  typename eval<TensorConversionOp<TargetType, XprType> >::type> {
41  typedef TensorConversionOp<TargetType, XprType> type;
42 };
43 
44 } // end namespace internal
45 
46 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
47 struct PacketConverter;
48 
49 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
50 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> {
51  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {}
52 
53  template <int LoadMode, typename Index>
54  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
55  return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
56  }
57 
58  private:
59  const TensorEvaluator& m_impl;
60 };
61 
62 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
63 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
64  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {}
65 
66  template <int LoadMode, typename Index>
67  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
68  const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
69 
70  SrcPacket src1 = m_impl.template packet<LoadMode>(index);
71  SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
72  TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
73  return result;
74  }
75 
76  private:
77  const TensorEvaluator& m_impl;
78 };
79 
80 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
81 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
82  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {}
83 
84  template <int LoadMode, typename Index>
85  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
86  const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
87 
88  SrcPacket src1 = m_impl.template packet<LoadMode>(index);
89  SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
90  SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
91  SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
92  TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
93  return result;
94  }
95 
96  private:
97  const TensorEvaluator& m_impl;
98 };
99 
100 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
101 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> {
102  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {}
103 
104  template <int LoadMode, typename Index>
105  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
106  const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
107 
108  SrcPacket src1 = m_impl.template packet<LoadMode>(index);
109  SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize);
110  SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
111  SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
112  SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize);
113  SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize);
114  SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize);
115  SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize);
116  TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8);
117  return result;
118  }
119 
120  private:
121  const TensorEvaluator& m_impl;
122 };
123 
124 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int TgtCoeffRatio>
125 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
126  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl)
127  : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
128 
129  template <int LoadMode, typename Index>
130  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
131  const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
132  // Only call m_impl.packet() when we have direct access to the underlying data. This
133  // ensures that we don't compute the subexpression twice. We may however load some
134  // coefficients twice, but in practice this doesn't negatively impact performance.
135  if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
136  // Force unaligned memory loads since we can't ensure alignment anymore
137  return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
138  } else {
139  const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
140  typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
141  typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
142  internal::scalar_cast_op<SrcType, TgtType> converter;
143  EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
144  EIGEN_UNROLL_LOOP
145  for (int i = 0; i < TgtPacketSize; ++i) {
146  values[i] = converter(m_impl.coeff(index + i));
147  }
148  TgtPacket rslt = internal::pload<TgtPacket>(values);
149  return rslt;
150  }
151  }
152 
153  private:
154  const TensorEvaluator& m_impl;
155  const typename TensorEvaluator::Index m_maxIndex;
156 };
157 
165 template <typename TargetType, typename XprType>
166 class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors> {
167  public:
168  typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
169  typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
170  typedef typename internal::traits<TensorConversionOp>::Index Index;
171  typedef typename internal::nested<TensorConversionOp>::type Nested;
172  typedef Scalar CoeffReturnType;
173  typedef typename NumTraits<Scalar>::Real RealScalar;
174 
175  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) : m_xpr(xpr) {}
176 
177  EIGEN_DEVICE_FUNC const internal::remove_all_t<typename XprType::Nested>& expression() const { return m_xpr; }
178 
179  protected:
180  typename XprType::Nested m_xpr;
181 };
182 
183 template <bool SameType, typename Eval, typename EvalPointerType>
184 struct ConversionSubExprEval {
185  static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
186  impl.evalSubExprsIfNeeded(NULL);
187  return true;
188  }
189 };
190 
191 template <typename Eval, typename EvalPointerType>
192 struct ConversionSubExprEval<true, Eval, EvalPointerType> {
193  static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) { return impl.evalSubExprsIfNeeded(data); }
194 };
195 
196 #ifdef EIGEN_USE_THREADS
197 template <bool SameType, typename Eval, typename EvalPointerType, typename EvalSubExprsCallback>
198 struct ConversionSubExprEvalAsync {
199  static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
200  impl.evalSubExprsIfNeededAsync(nullptr, std::move(done));
201  }
202 };
203 
204 template <typename Eval, typename EvalPointerType, typename EvalSubExprsCallback>
205 struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType, EvalSubExprsCallback> {
206  static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
207  impl.evalSubExprsIfNeededAsync(data, std::move(done));
208  }
209 };
210 #endif
211 
212 namespace internal {
213 
214 template <typename SrcType, typename TargetType, bool IsSameT>
215 struct CoeffConv {
216  template <typename ArgType, typename Device>
217  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl,
218  Index index) {
219  internal::scalar_cast_op<SrcType, TargetType> converter;
220  return converter(impl.coeff(index));
221  }
222 };
223 
224 template <typename SrcType, typename TargetType>
225 struct CoeffConv<SrcType, TargetType, true> {
226  template <typename ArgType, typename Device>
227  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl,
228  Index index) {
229  return impl.coeff(index);
230  }
231 };
232 
233 template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize, bool IsSameT>
234 struct PacketConv {
235  typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
236  typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
237 
238  static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
239 
240  template <typename ArgType, typename Device>
241  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl,
242  Index index) {
243  internal::scalar_cast_op<SrcType, TargetType> converter;
244  EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
245  EIGEN_UNROLL_LOOP
246  for (int i = 0; i < PacketSize; ++i) {
247  values[i] = converter(impl.coeff(index + i));
248  }
249  TargetPacket rslt = internal::pload<TargetPacket>(values);
250  return rslt;
251  }
252 };
253 
254 template <typename SrcPacket, typename TargetPacket, int LoadMode, bool IsSameT>
255 struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
256  typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
257  typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
258 
259  template <typename ArgType, typename Device>
260  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl,
261  Index index) {
262  const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
263  const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
264  PacketConverter<TensorEvaluator<ArgType, Device>, SrcPacket, TargetPacket, SrcCoeffRatio, TgtCoeffRatio> converter(
265  impl);
266  return converter.template packet<LoadMode>(index);
267  }
268 };
269 
270 template <typename SrcPacket, typename TargetPacket, int LoadMode>
271 struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
272  typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
273  static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
274 
275  template <typename ArgType, typename Device>
276  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl,
277  Index index) {
278  EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
279  for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index + i);
280  return internal::pload<TargetPacket>(values);
281  }
282 };
283 
284 template <typename SrcPacket, typename TargetPacket, int LoadMode>
285 struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> {
286  template <typename ArgType, typename Device>
287  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl,
288  Index index) {
289  return impl.template packet<LoadMode>(index);
290  }
291 };
292 
293 } // namespace internal
294 
295 // Eval as rvalue
296 template <typename TargetType, typename ArgType, typename Device>
297 struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> {
298  typedef TensorConversionOp<TargetType, ArgType> XprType;
299  typedef typename XprType::Index Index;
300  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
301  typedef TargetType Scalar;
302  typedef TargetType CoeffReturnType;
303  typedef internal::remove_all_t<typename internal::traits<ArgType>::Scalar> SrcType;
304  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
305  typedef typename PacketType<SrcType, Device>::type PacketSourceType;
306  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
307  static constexpr bool IsSameType = internal::is_same<TargetType, SrcType>::value;
308  typedef StorageMemory<CoeffReturnType, Device> Storage;
309  typedef typename Storage::Type EvaluatorPointerType;
310 
311  enum {
312  IsAligned = false,
313  PacketAccess =
314 #ifndef EIGEN_USE_SYCL
315  true,
316 #else
317  TensorEvaluator<ArgType, Device>::PacketAccess &
318  internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
319 #endif
320  BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
321  PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
322  RawAccess = false
323  };
324 
325  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
326  static constexpr int NumDims = internal::array_size<Dimensions>::value;
327 
328  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
329  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
330  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
331 
332  typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock ArgTensorBlock;
333 
334  struct TensorConversionOpBlockFactory {
335  template <typename ArgXprType>
336  struct XprType {
337  typedef TensorConversionOp<TargetType, const ArgXprType> type;
338  };
339 
340  template <typename ArgXprType>
341  typename XprType<ArgXprType>::type expr(const ArgXprType& expr) const {
342  return typename XprType<ArgXprType>::type(expr);
343  }
344  };
345 
346  typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory, ArgTensorBlock> TensorBlock;
347  //===--------------------------------------------------------------------===//
348 
349  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) {}
350 
351  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
352 
353  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
354  return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data);
355  }
356 
357 #ifdef EIGEN_USE_THREADS
358  template <typename EvalSubExprsCallback>
359  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType data, EvalSubExprsCallback done) {
360  ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType,
361  EvalSubExprsCallback>::run(m_impl, data, std::move(done));
362  }
363 #endif
364 
365  EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
366 
367  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
368  return internal::CoeffConv<SrcType, TargetType, IsSameType>::run(m_impl, index);
369  }
370 
371  template <int LoadMode>
372  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
373  // If we are not going to do the cast, we just need to check that base
374  // TensorEvaluator has packet access. Otherwise we also need to make sure,
375  // that we have an implementation of vectorized cast.
376  const bool Vectorizable = IsSameType ? TensorEvaluator<ArgType, Device>::PacketAccess
377  : int(TensorEvaluator<ArgType, Device>::PacketAccess) &
378  int(internal::type_casting_traits<SrcType, TargetType>::VectorizedCast);
379 
380  return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode, Vectorizable, IsSameType>::run(m_impl,
381  index);
382  }
383 
384  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
385  const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
386  if (vectorized) {
387  const double SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
388  const double TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
389  return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
390  TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
391  } else {
392  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
393  }
394  }
395 
396  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const {
397  return m_impl.getResourceRequirements();
398  }
399 
400  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
401  bool /*root_of_expr_ast*/ = false) const {
402  return TensorBlock(m_impl.block(desc, scratch), TensorConversionOpBlockFactory());
403  }
404 
405  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
406 
408  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
409 
410  protected:
411  TensorEvaluator<ArgType, Device> m_impl;
412 };
413 
414 } // end namespace Eigen
415 
416 #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
Tensor conversion class. This class makes it possible to vectorize type casting operations when the n...
Definition: TensorConversion.h:166
The tensor base class.
Definition: TensorForwardDeclarations.h:68