$darkmode
Eigen  5.0.1-dev
GenericPacketMath.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_GENERIC_PACKET_MATH_H
12 #define EIGEN_GENERIC_PACKET_MATH_H
13 
14 // IWYU pragma: private
15 #include "./InternalHeaderCheck.h"
16 
17 namespace Eigen {
18 
19 namespace internal {
20 
29 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
30 #define EIGEN_DEBUG_ALIGNED_LOAD
31 #endif
32 
33 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
34 #define EIGEN_DEBUG_UNALIGNED_LOAD
35 #endif
36 
37 #ifndef EIGEN_DEBUG_ALIGNED_STORE
38 #define EIGEN_DEBUG_ALIGNED_STORE
39 #endif
40 
41 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
42 #define EIGEN_DEBUG_UNALIGNED_STORE
43 #endif
44 
45 struct default_packet_traits {
46  enum {
47  // Ops that are implemented for most types.
48  HasAdd = 1,
49  HasSub = 1,
50  HasShift = 1,
51  HasMul = 1,
52  HasNegate = 1,
53  HasAbs = 1,
54  HasAbs2 = 1,
55  HasMin = 1,
56  HasMax = 1,
57  HasConj = 1,
58  HasSetLinear = 1,
59  HasSign = 1,
60  // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
61  // types
62  HasRound = 1,
63 
64  HasArg = 0,
65  HasAbsDiff = 0,
66  HasBlend = 0,
67  // This flag is used to indicate whether packet comparison is supported.
68  // pcmp_eq and pcmp_lt should be defined for it to be true.
69  HasCmp = 0,
70 
71  HasDiv = 0,
72  HasReciprocal = 0,
73  HasSqrt = 0,
74  HasRsqrt = 0,
75  HasCbrt = 0,
76  HasExp = 0,
77  HasExpm1 = 0,
78  HasLog = 0,
79  HasLog1p = 0,
80  HasLog10 = 0,
81  HasPow = 0,
82  HasSin = 0,
83  HasCos = 0,
84  HasTan = 0,
85  HasASin = 0,
86  HasACos = 0,
87  HasATan = 0,
88  HasATanh = 0,
89  HasSinh = 0,
90  HasCosh = 0,
91  HasTanh = 0,
92  HasLGamma = 0,
93  HasDiGamma = 0,
94  HasZeta = 0,
95  HasPolygamma = 0,
96  HasErf = 0,
97  HasErfc = 0,
98  HasNdtri = 0,
99  HasBessel = 0,
100  HasIGamma = 0,
101  HasIGammaDerA = 0,
102  HasGammaSampleDerAlpha = 0,
103  HasIGammac = 0,
104  HasBetaInc = 0
105  };
106 };
107 
108 template <typename T>
109 struct packet_traits : default_packet_traits {
110  typedef T type;
111  typedef T half;
112  enum {
113  Vectorizable = 0,
114  size = 1,
115  AlignedOnScalar = 0,
116  };
117  enum {
118  HasAdd = 0,
119  HasSub = 0,
120  HasMul = 0,
121  HasNegate = 0,
122  HasAbs = 0,
123  HasAbs2 = 0,
124  HasMin = 0,
125  HasMax = 0,
126  HasConj = 0,
127  HasSetLinear = 0
128  };
129 };
130 
131 template <typename T>
132 struct packet_traits<const T> : packet_traits<T> {};
133 
134 template <typename T>
135 struct unpacket_traits {
136  typedef T type;
137  typedef T half;
138  typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
139  enum {
140  size = 1,
141  alignment = alignof(T),
142  vectorizable = false,
143  masked_load_available = false,
144  masked_store_available = false
145  };
146 };
147 
148 template <typename T>
149 struct unpacket_traits<const T> : unpacket_traits<T> {};
150 
154 template <typename Packet>
155 struct is_scalar {
156  using Scalar = typename unpacket_traits<Packet>::type;
157  enum { value = internal::is_same<Packet, Scalar>::value };
158 };
159 
160 // automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
161 // 1) the packets are the same type, or
162 // 2) the packets differ only in sign.
163 // In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
164 template <typename SrcPacket, typename TgtPacket,
165  bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
166 struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
167 template <>
168 struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
169 template <>
170 struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
171 template <>
172 struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
173 template <>
174 struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
175 
176 template <typename SrcPacket, typename TgtPacket>
177 struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
178  using SrcScalar = typename unpacket_traits<SrcPacket>::type;
179  static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
180  using TgtScalar = typename unpacket_traits<TgtPacket>::type;
181  static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
182  static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
183 };
184 
185 // is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
186 template <typename SrcPacket, typename TgtPacket>
187 struct is_degenerate {
188  static constexpr bool value =
189  is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
190 };
191 
192 template <typename Packet>
193 struct is_half {
194  using Scalar = typename unpacket_traits<Packet>::type;
195  static constexpr int Size = unpacket_traits<Packet>::size;
196  using DefaultPacket = typename packet_traits<Scalar>::type;
197  static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
198  static constexpr bool value = Size != 1 && Size < DefaultSize;
199 };
200 
201 template <typename Src, typename Tgt>
202 struct type_casting_traits {
203  enum {
204  VectorizedCast =
205  is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
206  SrcCoeffRatio = 1,
207  TgtCoeffRatio = 1
208  };
209 };
210 
211 // provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types
212 template <typename Src, typename Tgt>
213 struct vectorized_type_casting_traits {
214  enum : int {
215  DefaultSrcPacketSize = packet_traits<Src>::size,
216  DefaultTgtPacketSize = packet_traits<Tgt>::size,
217  VectorizedCast = 1,
218  SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
219  TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
220  };
221 };
222 
225 template <typename T, int unique_id = 0>
226 struct eigen_packet_wrapper {
227  EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
228  EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
229  EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
230  EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
231  EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
232  m_val = v;
233  return *this;
234  }
235 
236  T m_val;
237 };
238 
239 template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
240 struct preinterpret_generic;
241 
242 template <typename Target, typename Packet>
243 struct preinterpret_generic<Target, Packet, false> {
244  // the packets are not the same, attempt scalar bit_cast
245  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
246  return numext::bit_cast<Target, Packet>(a);
247  }
248 };
249 
250 template <typename Packet>
251 struct preinterpret_generic<Packet, Packet, true> {
252  // the packets are the same type: do nothing
253  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
254 };
255 
256 template <typename ComplexPacket>
257 struct preinterpret_generic<typename unpacket_traits<ComplexPacket>::as_real, ComplexPacket, false> {
258  using RealPacket = typename unpacket_traits<ComplexPacket>::as_real;
259  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; }
260 };
261 
263 template <typename Target, typename Packet>
264 EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
265  return preinterpret_generic<Target, Packet>::run(a);
266 }
267 
268 template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
269  bool TgtIsHalf = is_half<TgtPacket>::value>
270 struct pcast_generic;
271 
272 template <typename SrcPacket, typename TgtPacket>
273 struct pcast_generic<SrcPacket, TgtPacket, false, false> {
274  // the packets are not degenerate: attempt scalar static_cast
275  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
276  return cast_impl<SrcPacket, TgtPacket>::run(a);
277  }
278 };
279 
280 template <typename Packet>
281 struct pcast_generic<Packet, Packet, true, false> {
282  // the packets are the same: do nothing
283  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
284 };
285 
286 template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
287 struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
288  // the packets are degenerate: preinterpret is equivalent to pcast
289  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
290 };
291 
293 template <typename SrcPacket, typename TgtPacket>
294 EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
295  return pcast_generic<SrcPacket, TgtPacket>::run(a);
296 }
297 template <typename SrcPacket, typename TgtPacket>
298 EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
299  return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
300 }
301 template <typename SrcPacket, typename TgtPacket>
302 EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
303  const SrcPacket& d) {
304  return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
305 }
306 template <typename SrcPacket, typename TgtPacket>
307 EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
308  const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
309  const SrcPacket& h) {
310  return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
311 }
312 
313 template <typename SrcPacket, typename TgtPacket>
314 struct pcast_generic<SrcPacket, TgtPacket, false, true> {
315  // TgtPacket is a half packet of some other type
316  // perform cast and truncate result
317  using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
318  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
319  return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
320  }
321 };
322 
324 template <typename Packet>
325 EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
326  return a + b;
327 }
328 // Avoid compiler warning for boolean algebra.
329 template <>
330 EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
331  return a || b;
332 }
333 
338 template <typename Packet>
339 EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
340  const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
341 
343 template <typename Packet>
344 EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
345  return a - b;
346 }
347 
349 template <typename Packet>
350 EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
351  EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
352  NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
353  return numext::negate(a);
354 }
355 
357 template <typename Packet>
358 EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
359  return numext::conj(a);
360 }
361 
363 template <typename Packet>
364 EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
365  return a * b;
366 }
367 // Avoid compiler warning for boolean algebra.
368 template <>
369 EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
370  return a && b;
371 }
372 
374 template <typename Packet>
375 EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
376  return a / b;
377 }
378 // Avoid compiler warning for boolean algebra.
379 template <>
380 EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
381  return a && b;
382 }
383 
384 // In the generic packet case, memset to all one bits.
385 template <typename Packet, typename EnableIf = void>
386 struct ptrue_impl {
387  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
388  Packet b;
389  memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
390  return b;
391  }
392 };
393 
394 // Use a value of one for scalars.
395 template <typename Scalar>
396 struct ptrue_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
397  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); }
398 };
399 
400 // For booleans, we can only directly set a valid `bool` value to avoid UB.
401 template <>
402 struct ptrue_impl<bool, void> {
403  static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; }
404 };
405 
407 template <typename Packet>
408 EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
409  return ptrue_impl<Packet>::run(a);
410 }
411 
412 // In the general packet case, memset to zero.
413 template <typename Packet, typename EnableIf = void>
414 struct pzero_impl {
415  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
416  Packet b;
417  memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
418  return b;
419  }
420 };
421 
422 // For scalars, explicitly set to Scalar(0), since the underlying representation
423 // for zero may not consist of all-zero bits.
424 template <typename T>
425 struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
426  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
427 };
428 
430 template <typename Packet>
431 EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
432  return pzero_impl<Packet>::run(a);
433 }
434 
435 template <typename T>
436 struct bit_and {
437  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
438 };
439 
440 template <typename T>
441 struct bit_or {
442  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
443 };
444 
445 template <typename T>
446 struct bit_xor {
447  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
448 };
449 
450 template <typename T>
451 struct bit_not {
452  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
453 };
454 
455 template <>
456 struct bit_and<bool> {
457  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; }
458 };
459 
460 template <>
461 struct bit_or<bool> {
462  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; }
463 };
464 
465 template <>
466 struct bit_xor<bool> {
467  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; }
468 };
469 
470 template <>
471 struct bit_not<bool> {
472  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
473 };
474 
475 // Use operators &, |, ^, ~.
476 template <typename T>
477 struct operator_bitwise_helper {
478  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
479  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
480  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
481  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
482 };
483 
484 // Apply binary operations byte-by-byte
485 template <typename T>
486 struct bytewise_bitwise_helper {
487  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
488  return binary(a, b, bit_and<unsigned char>());
489  }
490  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
491  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
492  return binary(a, b, bit_xor<unsigned char>());
493  }
494  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
495 
496  private:
497  template <typename Op>
498  EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
499  const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
500  T c;
501  unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
502  for (size_t i = 0; i < sizeof(T); ++i) {
503  *c_ptr++ = op(*a_ptr++);
504  }
505  return c;
506  }
507 
508  template <typename Op>
509  EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
510  const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
511  const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
512  T c;
513  unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
514  for (size_t i = 0; i < sizeof(T); ++i) {
515  *c_ptr++ = op(*a_ptr++, *b_ptr++);
516  }
517  return c;
518  }
519 };
520 
521 // In the general case, use byte-by-byte manipulation.
522 template <typename T, typename EnableIf = void>
523 struct bitwise_helper : public bytewise_bitwise_helper<T> {};
524 
525 // For integers or non-trivial scalars, use binary operators.
526 template <typename T>
527 struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
528  (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
529  : public operator_bitwise_helper<T> {};
530 
532 template <typename Packet>
533 EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
534  return bitwise_helper<Packet>::bitwise_and(a, b);
535 }
536 
538 template <typename Packet>
539 EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
540  return bitwise_helper<Packet>::bitwise_or(a, b);
541 }
542 
544 template <typename Packet>
545 EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
546  return bitwise_helper<Packet>::bitwise_xor(a, b);
547 }
548 
550 template <typename Packet>
551 EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
552  return bitwise_helper<Packet>::bitwise_not(a);
553 }
554 
556 template <typename Packet>
557 EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
558  return pand(a, pnot(b));
559 }
560 
562 template <typename Packet>
563 EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
564  return a < b ? ptrue(a) : pzero(a);
565 }
566 
568 template <typename Packet>
569 EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
570  return a == b ? ptrue(a) : pzero(a);
571 }
572 
574 template <typename Packet>
575 EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
576  return por(pcmp_eq(a, b), pcmp_lt(a, b));
577 }
578 
580 template <typename Packet>
581 EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
582  return a >= b ? pzero(a) : ptrue(a);
583 }
584 
585 // In the general case, use bitwise select.
586 template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
587 struct pselect_impl {
588  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
589  return por(pand(a, mask), pandnot(b, mask));
590  }
591 };
592 
593 // For scalars, use ternary select.
594 template <typename Packet>
595 struct pselect_impl<Packet, true> {
596  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
597  return numext::select(mask, a, b);
598  }
599 };
600 
602 template <typename Packet>
603 EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
604  return pselect_impl<Packet>::run(mask, a, b);
605 }
606 
607 template <>
608 EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
609  return cond ? a : b;
610 }
611 
614 template <int NaNPropagation, bool IsInteger>
615 struct pminmax_impl {
616  template <typename Packet, typename Op>
617  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
618  return op(a, b);
619  }
620 };
621 
624 template <>
625 struct pminmax_impl<PropagateNaN, false> {
626  template <typename Packet, typename Op>
627  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
628  Packet not_nan_mask_a = pcmp_eq(a, a);
629  Packet not_nan_mask_b = pcmp_eq(b, b);
630  return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
631  }
632 };
633 
637 template <>
638 struct pminmax_impl<PropagateNumbers, false> {
639  template <typename Packet, typename Op>
640  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
641  Packet not_nan_mask_a = pcmp_eq(a, a);
642  Packet not_nan_mask_b = pcmp_eq(b, b);
643  return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
644  }
645 };
646 
647 #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); }
648 
651 template <typename Packet>
652 EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
653  return numext::mini(a, b);
654 }
655 
658 template <int NaNPropagation, typename Packet>
659 EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
660  constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
661  return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
662 }
663 
666 template <typename Packet>
667 EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
668  return numext::maxi(a, b);
669 }
670 
673 template <int NaNPropagation, typename Packet>
674 EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
675  constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
676  return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
677 }
678 
680 template <typename Packet>
681 EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
682  return numext::abs(a);
683 }
684 template <>
685 EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
686  return a;
687 }
688 template <>
689 EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
690  return a;
691 }
692 template <>
693 EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
694  return a;
695 }
696 
698 template <typename Packet>
699 EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
700  return pselect(peven_mask(a), padd(a, b), psub(a, b));
701 }
702 
704 template <typename Packet>
705 EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
706  using numext::arg;
707  return arg(a);
708 }
709 
711 template <int N, typename T>
712 EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
713  return numext::arithmetic_shift_right(a, N);
714 }
715 
717 template <int N, typename T>
718 EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
719  return numext::logical_shift_right(a, N);
720 }
721 
723 template <int N, typename T>
724 EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
725  return numext::logical_shift_left(a, N);
726 }
727 
731 template <typename Packet>
732 EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
733  int exp;
734  EIGEN_USING_STD(frexp);
735  Packet result = static_cast<Packet>(frexp(a, &exp));
736  exponent = static_cast<Packet>(exp);
737  return result;
738 }
739 
743 template <typename Packet>
744 EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
745  EIGEN_USING_STD(ldexp)
746  return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
747 }
748 
750 template <typename Packet>
751 EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
752  return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
753 }
754 
756 template <typename Packet>
757 EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
758  return *from;
759 }
760 
765 template <typename Packet>
766 EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
767  const Index offset = 0) {
768  const Index packet_size = unpacket_traits<Packet>::size;
769  eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
770  typedef typename unpacket_traits<Packet>::type Scalar;
771  EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
772  for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
773  elements[i] = from[i - offset];
774  }
775  return pload<Packet>(elements);
776 }
777 
779 template <typename Packet>
780 EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
781  return *from;
782 }
783 
786 template <typename Packet>
787 EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
788  const Index offset = 0) {
789  const Index packet_size = unpacket_traits<Packet>::size;
790  eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
791  typedef typename unpacket_traits<Packet>::type Scalar;
792  EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
793  for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
794  elements[i] = from[i - offset];
795  }
796  return pload<Packet>(elements);
797 }
798 
803 template <typename Packet>
804 EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
805  const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
806 
808 template <typename Packet>
809 EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
810  return a;
811 }
812 
814 template <typename Packet, typename BitsType>
815 EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
816 
818 template <typename Packet>
819 EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
820  return pset1<Packet>(*a);
821 }
822 
828 template <typename Packet>
829 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
830  return *from;
831 }
832 
839 template <typename Packet>
840 EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
841  return pload1<Packet>(from);
842 }
843 
853 template <typename Packet>
854 EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
855  Packet& a2, Packet& a3) {
856  a0 = pload1<Packet>(a + 0);
857  a1 = pload1<Packet>(a + 1);
858  a2 = pload1<Packet>(a + 2);
859  a3 = pload1<Packet>(a + 3);
860 }
861 
869 template <typename Packet>
870 EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
871  a0 = pload1<Packet>(a + 0);
872  a1 = pload1<Packet>(a + 1);
873 }
874 
876 template <typename Packet>
877 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
878  return a;
879 }
880 
881 template <typename Packet, typename EnableIf = void>
882 struct peven_mask_impl {
883  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) {
884  typedef typename unpacket_traits<Packet>::type Scalar;
885  const size_t n = unpacket_traits<Packet>::size;
886  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
887  for (size_t i = 0; i < n; ++i) {
888  memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
889  }
890  return ploadu<Packet>(elements);
891  }
892 };
893 
894 template <typename Scalar>
895 struct peven_mask_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
896  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); }
897 };
898 
901 template <typename Packet>
902 EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) {
903  return peven_mask_impl<Packet>::run(a);
904 }
905 
907 template <typename Scalar, typename Packet>
908 EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
909  (*to) = from;
910 }
911 
915 template <typename Scalar, typename Packet>
916 EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
917  const Index packet_size = unpacket_traits<Packet>::size;
918  eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
919  EIGEN_ALIGN_MAX Scalar elements[packet_size];
920  pstore<Scalar>(elements, from);
921  for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
922  to[i] = elements[i + offset];
923  }
924 }
925 
927 template <typename Scalar, typename Packet>
928 EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
929  (*to) = from;
930 }
931 
933 template <typename Scalar, typename Packet>
934 EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
935  const Index packet_size = unpacket_traits<Packet>::size;
936  eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
937  EIGEN_ALIGN_MAX Scalar elements[packet_size];
938  pstore<Scalar>(elements, from);
939  for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
940  to[i] = elements[i + offset];
941  }
942 }
943 
948 template <typename Scalar, typename Packet>
949 EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
950  Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
951 
952 template <typename Scalar, typename Packet>
953 EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
954  return ploadu<Packet>(from);
955 }
956 
957 template <typename Scalar, typename Packet>
958 EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
959  const Index packet_size = unpacket_traits<Packet>::size;
960  EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
961  for (Index i = 0; i < numext::mini(n, packet_size); i++) {
962  elements[i] = from[i * stride];
963  }
964  return pload<Packet>(elements);
965 }
966 
967 template <typename Scalar, typename Packet>
968 EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
969  pstore(to, from);
970 }
971 
972 template <typename Scalar, typename Packet>
973 EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
974  const Index packet_size = unpacket_traits<Packet>::size;
975  EIGEN_ALIGN_MAX Scalar elements[packet_size];
976  pstore<Scalar>(elements, from);
977  for (Index i = 0; i < numext::mini(n, packet_size); i++) {
978  to[i * stride] = elements[i];
979  }
980 }
981 
983 template <typename Scalar>
984 EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
985 #if defined(EIGEN_HIP_DEVICE_COMPILE)
986  // do nothing
987 #elif defined(EIGEN_CUDA_ARCH)
988 #if defined(__LP64__) || EIGEN_OS_WIN64
989  // 64-bit pointer operand constraint for inlined asm
990  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
991 #else
992  // 32-bit pointer operand constraint for inlined asm
993  asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
994 #endif
995 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
996  __builtin_prefetch(addr);
997 #endif
998 }
999 
1001 template <typename Packet>
1002 EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
1003  return a;
1004 }
1005 
1007 template <typename Packet>
1008 EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
1009  return Packet(numext::imag(a), numext::real(a));
1010 }
1011 
1012 /**************************
1013  * Special math functions
1014  ***************************/
1015 
1017 template <typename Packet>
1018 EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
1019  return pandnot(ptrue(a), pcmp_eq(a, a));
1020 }
1021 
1023 template <typename Packet>
1024 EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
1025  using Scalar = typename unpacket_traits<Packet>::type;
1026  constexpr Scalar inf = NumTraits<Scalar>::infinity();
1027  return pcmp_eq(pabs(a), pset1<Packet>(inf));
1028 }
1029 
1031 template <typename Packet>
1032 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
1033  EIGEN_USING_STD(sin);
1034  return sin(a);
1035 }
1036 
1038 template <typename Packet>
1039 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
1040  EIGEN_USING_STD(cos);
1041  return cos(a);
1042 }
1043 
1045 template <typename Packet>
1046 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
1047  EIGEN_USING_STD(tan);
1048  return tan(a);
1049 }
1050 
1052 template <typename Packet>
1053 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
1054  EIGEN_USING_STD(asin);
1055  return asin(a);
1056 }
1057 
1059 template <typename Packet>
1060 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
1061  EIGEN_USING_STD(acos);
1062  return acos(a);
1063 }
1064 
1066 template <typename Packet>
1067 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
1068  EIGEN_USING_STD(sinh);
1069  return sinh(a);
1070 }
1071 
1073 template <typename Packet>
1074 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
1075  EIGEN_USING_STD(cosh);
1076  return cosh(a);
1077 }
1078 
1080 template <typename Packet>
1081 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
1082  EIGEN_USING_STD(atan);
1083  return atan(a);
1084 }
1085 
1087 template <typename Packet>
1088 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
1089  EIGEN_USING_STD(tanh);
1090  return tanh(a);
1091 }
1092 
1094 template <typename Packet>
1095 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
1096  EIGEN_USING_STD(atanh);
1097  return atanh(a);
1098 }
1099 
1101 template <typename Packet>
1102 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
1103  return numext::exp(a);
1104 }
1105 
1107 template <typename Packet>
1108 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
1109  return numext::exp2(a);
1110 }
1111 
1113 template <typename Packet>
1114 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
1115  return numext::expm1(a);
1116 }
1117 
1119 template <typename Packet>
1120 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
1121  EIGEN_USING_STD(log);
1122  return log(a);
1123 }
1124 
1126 template <typename Packet>
1127 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
1128  return numext::log1p(a);
1129 }
1130 
1132 template <typename Packet>
1133 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
1134  EIGEN_USING_STD(log10);
1135  return log10(a);
1136 }
1137 
1139 template <typename Packet>
1140 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
1141  using Scalar = typename internal::unpacket_traits<Packet>::type;
1142  using RealScalar = typename NumTraits<Scalar>::Real;
1143  return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
1144 }
1145 
1147 template <typename Packet>
1148 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
1149  return numext::sqrt(a);
1150 }
1151 
1153 template <typename Packet>
1154 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
1155  return numext::cbrt(a);
1156 }
1157 
1158 template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1159  bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1160 struct nearest_integer_packetop_impl {
1161  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
1162  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
1163  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
1164  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
1165  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
1166 };
1167 
1169 template <typename Packet>
1170 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
1171  return nearest_integer_packetop_impl<Packet>::run_round(a);
1172 }
1173 
1175 template <typename Packet>
1176 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
1177  return nearest_integer_packetop_impl<Packet>::run_floor(a);
1178 }
1179 
1182 template <typename Packet>
1183 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
1184  return nearest_integer_packetop_impl<Packet>::run_rint(a);
1185 }
1186 
1188 template <typename Packet>
1189 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
1190  return nearest_integer_packetop_impl<Packet>::run_ceil(a);
1191 }
1192 
1194 template <typename Packet>
1195 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
1196  return nearest_integer_packetop_impl<Packet>::run_trunc(a);
1197 }
1198 
1199 template <typename Packet, typename EnableIf = void>
1200 struct psign_impl {
1201  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
1202 };
1203 
1205 template <typename Packet>
1206 EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
1207  return psign_impl<Packet>::run(a);
1208 }
1209 
1210 template <>
1211 EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
1212  return a;
1213 }
1214 
1216 template <typename Packet>
1217 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
1218  return a;
1219 }
1220 
1225 template <typename Packet>
1226 EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
1227  typename unpacket_traits<Packet>::half, Packet>
1228 predux_half_dowto4(const Packet& a) {
1229  return a;
1230 }
1231 
1232 // Slow generic implementation of Packet reduction.
1233 template <typename Packet, typename Op>
1234 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
1235  typedef typename unpacket_traits<Packet>::type Scalar;
1236  const size_t n = unpacket_traits<Packet>::size;
1237  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
1238  pstoreu<Scalar>(elements, a);
1239  for (size_t k = n / 2; k > 0; k /= 2) {
1240  for (size_t i = 0; i < k; ++i) {
1241  elements[i] = op(elements[i], elements[i + k]);
1242  }
1243  }
1244  return elements[0];
1245 }
1246 
1248 template <typename Packet>
1249 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
1250  return a;
1251 }
1252 
1254 template <typename Packet>
1255 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
1256  typedef typename unpacket_traits<Packet>::type Scalar;
1257  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
1258 }
1259 
1261 template <typename Packet>
1262 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1263  typedef typename unpacket_traits<Packet>::type Scalar;
1264  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<Scalar>)));
1265 }
1266 
1268 template <typename Packet>
1269 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1270  typedef typename unpacket_traits<Packet>::type Scalar;
1271  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<Scalar>)));
1272 }
1273 
1274 template <int NaNPropagation, typename Packet>
1275 struct predux_min_max_helper_impl {
1276  using Scalar = typename unpacket_traits<Packet>::type;
1277  static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits<Scalar>::IsInteger;
1278  template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1279  static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1280  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
1281  }
1282  template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1283  static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1284  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
1285  }
1286  template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1287  static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1288  return predux_min(a);
1289  }
1290  template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1291  static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1292  return predux_max(a);
1293  }
1294 };
1295 
1296 template <int NaNPropagation, typename Packet>
1297 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1298  return predux_min_max_helper_impl<NaNPropagation, Packet>::run_min(a);
1299 }
1300 
1301 template <int NaNPropagation, typename Packet>
1302 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1303  return predux_min_max_helper_impl<NaNPropagation, Packet>::run_max(a);
1304 }
1305 
1306 #undef EIGEN_BINARY_OP_NAN_PROPAGATION
1307 
1311 // not needed yet
1312 // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
1313 // { return bool(a); }
1314 
1318 template <typename Packet>
1319 EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
1320  // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
1321  // It is expected that "true" is either:
1322  // - Scalar(1)
1323  // - bits full of ones (NaN for floats),
1324  // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
1325  // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
1326  typedef typename unpacket_traits<Packet>::type Scalar;
1327  return numext::not_equal_strict(predux(a), Scalar(0));
1328 }
1329 
1330 /***************************************************************************
1331  * The following functions might not have to be overwritten for vectorized types
1332  ***************************************************************************/
1333 
1334 template <typename Packet, typename EnableIf = void>
1335 struct pmadd_impl {
1336  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1337  return padd(pmul(a, b), c);
1338  }
1339  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1340  return psub(pmul(a, b), c);
1341  }
1342  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1343  return psub(c, pmul(a, b));
1344  }
1345  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1346  return pnegate(pmadd(a, b, c));
1347  }
1348 };
1349 
1350 template <typename Scalar>
1351 struct pmadd_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value && NumTraits<Scalar>::IsSigned>> {
1352  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1353  return numext::madd<Scalar>(a, b, c);
1354  }
1355  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1356  return numext::madd<Scalar>(a, b, Scalar(-c));
1357  }
1358  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1359  return numext::madd<Scalar>(Scalar(-a), b, c);
1360  }
1361  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1362  return -Scalar(numext::madd<Scalar>(a, b, c));
1363  }
1364 };
1365 
1366 // Multiply-add instructions.
1368 template <typename Packet>
1369 EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1370  return pmadd_impl<Packet>::pmadd(a, b, c);
1371 }
1372 
1374 template <typename Packet>
1375 EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1376  return pmadd_impl<Packet>::pmsub(a, b, c);
1377 }
1378 
1380 template <typename Packet>
1381 EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1382  return pmadd_impl<Packet>::pnmadd(a, b, c);
1383 }
1384 
1386 template <typename Packet>
1387 EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1388  return pmadd_impl<Packet>::pnmsub(a, b, c);
1389 }
1390 
1393 // NOTE: this function must really be templated on the packet type (think about different packet types for the same
1394 // scalar type)
1395 template <typename Packet>
1396 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
1397  pstore(to, pset1<Packet>(a));
1398 }
1399 
1402 template <typename Packet, int Alignment>
1403 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
1404  if (Alignment >= unpacket_traits<Packet>::alignment)
1405  return pload<Packet>(from);
1406  else
1407  return ploadu<Packet>(from);
1408 }
1409 
1412 template <typename Packet, int Alignment>
1413 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
1414  const Index n, const Index offset = 0) {
1415  if (Alignment >= unpacket_traits<Packet>::alignment)
1416  return pload_partial<Packet>(from, n, offset);
1417  else
1418  return ploadu_partial<Packet>(from, n, offset);
1419 }
1420 
1423 template <typename Scalar, typename Packet, int Alignment>
1424 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
1425  if (Alignment >= unpacket_traits<Packet>::alignment)
1426  pstore(to, from);
1427  else
1428  pstoreu(to, from);
1429 }
1430 
1433 template <typename Scalar, typename Packet, int Alignment>
1434 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
1435  const Index offset = 0) {
1436  if (Alignment >= unpacket_traits<Packet>::alignment)
1437  pstore_partial(to, from, n, offset);
1438  else
1439  pstoreu_partial(to, from, n, offset);
1440 }
1441 
1447 template <typename Packet, int LoadMode>
1448 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
1449  return ploadt<Packet, LoadMode>(from);
1450 }
1451 
1452 /***************************************************************************
1453  * Fast complex products (GCC generates a function call which is very slow)
1454  ***************************************************************************/
1455 
1456 // Eigen+CUDA does not support complexes.
1457 #if !defined(EIGEN_GPUCC)
1458 
1459 template <>
1460 inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
1461  return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1462 }
1463 
1464 template <>
1465 inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
1466  return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1467 }
1468 
1469 #endif
1470 
1471 /***************************************************************************
1472  * PacketBlock, that is a collection of N packets where the number of words
1473  * in the packet is a multiple of N.
1474  ***************************************************************************/
1475 template <typename Packet, int N = unpacket_traits<Packet>::size>
1476 struct PacketBlock {
1477  Packet packet[N];
1478 };
1479 
1480 template <typename Packet>
1481 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
1482  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
1483 }
1484 
1485 /***************************************************************************
1486  * Selector, i.e. vector of N boolean values used to select (i.e. blend)
1487  * words from 2 packets.
1488  ***************************************************************************/
1489 template <size_t N>
1490 struct Selector {
1491  bool select[N];
1492 };
1493 
1494 template <typename Packet>
1495 EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
1496  const Packet& thenPacket, const Packet& elsePacket) {
1497  return ifPacket.select[0] ? thenPacket : elsePacket;
1498 }
1499 
1501 template <typename Packet>
1502 EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
1503  using Scalar = typename unpacket_traits<Packet>::type;
1504  return pdiv(pset1<Packet>(Scalar(1)), a);
1505 }
1506 
1508 template <typename Packet>
1509 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
1510  return preciprocal<Packet>(psqrt(a));
1511 }
1512 
1513 template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1514  bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1515 struct psignbit_impl;
1516 template <typename Packet, bool IsInteger>
1517 struct psignbit_impl<Packet, true, IsInteger> {
1518  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
1519 };
1520 template <typename Packet>
1521 struct psignbit_impl<Packet, false, false> {
1522  // generic implementation if not specialized in PacketMath.h
1523  // slower than arithmetic shift
1524  typedef typename unpacket_traits<Packet>::type Scalar;
1525  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
1526  const Packet cst_pos_one = pset1<Packet>(Scalar(1));
1527  const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
1528  return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
1529  }
1530 };
1531 template <typename Packet>
1532 struct psignbit_impl<Packet, false, true> {
1533  // generic implementation for integer packets
1534  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
1535 };
1537 template <typename Packet>
1538 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
1539  return psignbit_impl<Packet>::run(a);
1540 }
1541 
1543 template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1544 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1545  return numext::atan2(y, x);
1546 }
1547 
1549 template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1550 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1551  typedef typename internal::unpacket_traits<Packet>::type Scalar;
1552 
1553  // See https://en.cppreference.com/w/cpp/numeric/math/atan2
1554  // for how corner cases are supposed to be handled according to the
1555  // IEEE floating-point standard (IEC 60559).
1556  const Packet kSignMask = pset1<Packet>(-Scalar(0));
1557  const Packet kZero = pzero(x);
1558  const Packet kOne = pset1<Packet>(Scalar(1));
1559  const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
1560 
1561  const Packet x_has_signbit = psignbit(x);
1562  const Packet y_signmask = pand(y, kSignMask);
1563  const Packet x_signmask = pand(x, kSignMask);
1564  const Packet result_signmask = pxor(y_signmask, x_signmask);
1565  const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
1566 
1567  const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
1568  const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
1569 
1570  Packet arg = pdiv(y, x);
1571  arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
1572  arg = pselect(x_and_y_are_zero, result_signmask, arg);
1573 
1574  Packet result = patan(arg);
1575  result = padd(result, shift);
1576  return result;
1577 }
1578 
1580 template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1581 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1582  return Packet(numext::arg(a));
1583 }
1584 
1586 template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1587 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1588  EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
1589  THIS METHOD IS FOR COMPLEX TYPES ONLY)
1590  using RealPacket = typename unpacket_traits<Packet>::as_real;
1591  // a // r i r i ...
1592  RealPacket aflip = pcplxflip(a).v; // i r i r ...
1593  RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
1594  return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
1595 }
1596 
1599 template <typename Packet>
1600 EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1601  Index count) {
1602  using Scalar = typename unpacket_traits<Packet>::type;
1603  constexpr Index PacketSize = unpacket_traits<Packet>::size;
1604  eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1605  Scalar aux[PacketSize] = {};
1606  for (Index k = begin; k < begin + count; k++) {
1607  aux[k] = from[k];
1608  }
1609  return ploadu<Packet>(aux);
1610 }
1611 
1614 template <typename Packet>
1615 EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1616  Index count) {
1617  return ploaduSegment<Packet>(from, begin, count);
1618 }
1619 
1623 template <typename Scalar, typename Packet>
1624 EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1625  constexpr Index PacketSize = unpacket_traits<Packet>::size;
1626  eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1627  Scalar aux[PacketSize];
1628  pstoreu<Scalar, Packet>(aux, from);
1629  for (Index k = begin; k < begin + count; k++) {
1630  to[k] = aux[k];
1631  }
1632 }
1633 
1637 template <typename Scalar, typename Packet>
1638 EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1639  return pstoreuSegment(to, from, begin, count);
1640 }
1641 
1644 template <typename Packet, int Alignment>
1645 EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1646  Index count) {
1647  constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1648  if (Alignment >= RequiredAlignment) {
1649  return ploadSegment<Packet>(from, begin, count);
1650  } else {
1651  return ploaduSegment<Packet>(from, begin, count);
1652  }
1653 }
1654 
1657 template <typename Scalar, typename Packet, int Alignment>
1658 EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1659  constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1660  if (Alignment >= RequiredAlignment) {
1661  pstoreSegment<Scalar, Packet>(to, from, begin, count);
1662  } else {
1663  pstoreuSegment<Scalar, Packet>(to, from, begin, count);
1664  }
1665 }
1666 
1667 #ifndef EIGEN_NO_IO
1668 
1669 template <typename Packet>
1670 class StreamablePacket {
1671  public:
1672  using Scalar = typename unpacket_traits<Packet>::type;
1673  StreamablePacket(const Packet& packet) { pstoreu(v_, packet); }
1674 
1675  friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) {
1676  os << "{" << packet.v_[0];
1677  for (int i = 1; i < unpacket_traits<Packet>::size; ++i) {
1678  os << "," << packet.v_[i];
1679  }
1680  os << "}";
1681  return os;
1682  }
1683 
1684  private:
1685  Scalar v_[unpacket_traits<Packet>::size];
1686 };
1687 
1691 template <typename Packet>
1692 StreamablePacket<Packet> postream(const Packet& packet) {
1693  return StreamablePacket<Packet>(packet);
1694 }
1695 
1696 #endif // EIGEN_NO_IO
1697 
1698 } // end namespace internal
1699 
1700 } // end namespace Eigen
1701 
1702 #endif // EIGEN_GENERIC_PACKET_MATH_H
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sinh_op< typename Derived::Scalar >, const Derived > sinh(const Eigen::ArrayBase< Derived > &x)
Definition: Constants.h:340
Definition: Constants.h:344
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)
Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1
Definition: BFloat16.h:231
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_asin_op< typename Derived::Scalar >, const Derived > asin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_acos_op< typename Derived::Scalar >, const Derived > acos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cosh_op< typename Derived::Scalar >, const Derived > cosh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atanh_op< typename Derived::Scalar >, const Derived > atanh(const Eigen::ArrayBase< Derived > &x)
Definition: Constants.h:342
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atan_op< typename Derived::Scalar >, const Derived > atan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)