10 #ifndef EIGEN_REDUCTIONS_AVX512_H 11 #define EIGEN_REDUCTIONS_AVX512_H 14 #include "../../InternalHeaderCheck.h" 23 EIGEN_STRONG_INLINE
int predux(
const Packet16i& a) {
24 return _mm512_reduce_add_epi32(a);
28 EIGEN_STRONG_INLINE
int predux_mul(
const Packet16i& a) {
29 return _mm512_reduce_mul_epi32(a);
33 EIGEN_STRONG_INLINE
int predux_min(
const Packet16i& a) {
34 return _mm512_reduce_min_epi32(a);
38 EIGEN_STRONG_INLINE
int predux_max(
const Packet16i& a) {
39 return _mm512_reduce_max_epi32(a);
43 EIGEN_STRONG_INLINE
bool predux_any(
const Packet16i& a) {
44 return _mm512_reduce_or_epi32(a) != 0;
50 EIGEN_STRONG_INLINE int64_t predux(
const Packet8l& a) {
51 return _mm512_reduce_add_epi64(a);
61 EIGEN_STRONG_INLINE int64_t predux_mul(
const Packet8l& a) {
62 Packet4l lane0 = _mm512_extracti64x4_epi64(a, 0);
63 Packet4l lane1 = _mm512_extracti64x4_epi64(a, 1);
64 return predux_mul(pmul(lane0, lane1));
68 EIGEN_STRONG_INLINE int64_t predux_mul<Packet8l>(
const Packet8l& a) {
69 return _mm512_reduce_mul_epi64(a);
74 EIGEN_STRONG_INLINE int64_t predux_min(
const Packet8l& a) {
75 return _mm512_reduce_min_epi64(a);
79 EIGEN_STRONG_INLINE int64_t predux_max(
const Packet8l& a) {
80 return _mm512_reduce_max_epi64(a);
84 EIGEN_STRONG_INLINE
bool predux_any(
const Packet8l& a) {
85 return _mm512_reduce_or_epi64(a) != 0;
91 EIGEN_STRONG_INLINE
float predux(
const Packet16f& a) {
92 return _mm512_reduce_add_ps(a);
96 EIGEN_STRONG_INLINE
float predux_mul(
const Packet16f& a) {
97 return _mm512_reduce_mul_ps(a);
101 EIGEN_STRONG_INLINE
float predux_min(
const Packet16f& a) {
102 return _mm512_reduce_min_ps(a);
106 EIGEN_STRONG_INLINE
float predux_min<PropagateNumbers>(
const Packet16f& a) {
107 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
108 Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
109 return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lane0, lane1));
113 EIGEN_STRONG_INLINE
float predux_min<PropagateNaN>(
const Packet16f& a) {
114 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
115 Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
116 return predux_min<PropagateNaN>(pmin<PropagateNaN>(lane0, lane1));
120 EIGEN_STRONG_INLINE
float predux_max(
const Packet16f& a) {
121 return _mm512_reduce_max_ps(a);
125 EIGEN_STRONG_INLINE
float predux_max<PropagateNumbers>(
const Packet16f& a) {
126 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
127 Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
128 return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lane0, lane1));
132 EIGEN_STRONG_INLINE
float predux_max<PropagateNaN>(
const Packet16f& a) {
133 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
134 Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
135 return predux_max<PropagateNaN>(pmax<PropagateNaN>(lane0, lane1));
139 EIGEN_STRONG_INLINE
bool predux_any(
const Packet16f& a) {
140 return _mm512_reduce_or_epi32(_mm512_castps_si512(a)) != 0;
146 EIGEN_STRONG_INLINE
double predux(
const Packet8d& a) {
147 return _mm512_reduce_add_pd(a);
151 EIGEN_STRONG_INLINE
double predux_mul(
const Packet8d& a) {
152 return _mm512_reduce_mul_pd(a);
156 EIGEN_STRONG_INLINE
double predux_min(
const Packet8d& a) {
157 return _mm512_reduce_min_pd(a);
161 EIGEN_STRONG_INLINE
double predux_min<PropagateNumbers>(
const Packet8d& a) {
162 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
163 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
164 return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lane0, lane1));
168 EIGEN_STRONG_INLINE
double predux_min<PropagateNaN>(
const Packet8d& a) {
169 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
170 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
171 return predux_min<PropagateNaN>(pmin<PropagateNaN>(lane0, lane1));
175 EIGEN_STRONG_INLINE
double predux_max(
const Packet8d& a) {
176 return _mm512_reduce_max_pd(a);
180 EIGEN_STRONG_INLINE
double predux_max<PropagateNumbers>(
const Packet8d& a) {
181 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
182 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
183 return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lane0, lane1));
187 EIGEN_STRONG_INLINE
double predux_max<PropagateNaN>(
const Packet8d& a) {
188 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
189 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
190 return predux_max<PropagateNaN>(pmax<PropagateNaN>(lane0, lane1));
194 EIGEN_STRONG_INLINE
bool predux_any(
const Packet8d& a) {
195 return _mm512_reduce_or_epi64(_mm512_castpd_si512(a)) != 0;
198 #ifndef EIGEN_VECTORIZE_AVX512FP16 202 EIGEN_STRONG_INLINE half predux(
const Packet16h& from) {
203 return half(predux(half2float(from)));
207 EIGEN_STRONG_INLINE half predux_mul(
const Packet16h& from) {
208 return half(predux_mul(half2float(from)));
212 EIGEN_STRONG_INLINE half predux_min(
const Packet16h& from) {
213 return half(predux_min(half2float(from)));
217 EIGEN_STRONG_INLINE half predux_min<PropagateNumbers>(
const Packet16h& from) {
218 return half(predux_min<PropagateNumbers>(half2float(from)));
222 EIGEN_STRONG_INLINE half predux_min<PropagateNaN>(
const Packet16h& from) {
223 return half(predux_min<PropagateNaN>(half2float(from)));
227 EIGEN_STRONG_INLINE half predux_max(
const Packet16h& from) {
228 return half(predux_max(half2float(from)));
232 EIGEN_STRONG_INLINE half predux_max<PropagateNumbers>(
const Packet16h& from) {
233 return half(predux_max<PropagateNumbers>(half2float(from)));
237 EIGEN_STRONG_INLINE half predux_max<PropagateNaN>(
const Packet16h& from) {
238 return half(predux_max<PropagateNaN>(half2float(from)));
242 EIGEN_STRONG_INLINE
bool predux_any(
const Packet16h& a) {
243 return predux_any<Packet8i>(a.m_val);
250 EIGEN_STRONG_INLINE bfloat16 predux(
const Packet16bf& from) {
251 return static_cast<bfloat16
>(predux<Packet16f>(Bf16ToF32(from)));
255 EIGEN_STRONG_INLINE bfloat16 predux_mul(
const Packet16bf& from) {
256 return static_cast<bfloat16
>(predux_mul<Packet16f>(Bf16ToF32(from)));
260 EIGEN_STRONG_INLINE bfloat16 predux_min(
const Packet16bf& from) {
261 return static_cast<bfloat16
>(predux_min<Packet16f>(Bf16ToF32(from)));
265 EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNumbers>(
const Packet16bf& from) {
266 return static_cast<bfloat16
>(predux_min<PropagateNumbers>(Bf16ToF32(from)));
270 EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNaN>(
const Packet16bf& from) {
271 return static_cast<bfloat16
>(predux_min<PropagateNaN>(Bf16ToF32(from)));
275 EIGEN_STRONG_INLINE bfloat16 predux_max(
const Packet16bf& from) {
276 return static_cast<bfloat16
>(predux_max(Bf16ToF32(from)));
280 EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNumbers>(
const Packet16bf& from) {
281 return static_cast<bfloat16
>(predux_max<PropagateNumbers>(Bf16ToF32(from)));
285 EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNaN>(
const Packet16bf& from) {
286 return static_cast<bfloat16
>(predux_max<PropagateNaN>(Bf16ToF32(from)));
290 EIGEN_STRONG_INLINE
bool predux_any(
const Packet16bf& a) {
291 return predux_any<Packet8i>(a.m_val);
297 #endif // EIGEN_REDUCTIONS_AVX512_H Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1