10 #ifndef EIGEN_REDUCTIONS_AVX_H 11 #define EIGEN_REDUCTIONS_AVX_H 14 #include "../../InternalHeaderCheck.h" 23 EIGEN_STRONG_INLINE
int predux(
const Packet8i& a) {
24 Packet4i lo = _mm256_castsi256_si128(a);
25 Packet4i hi = _mm256_extractf128_si256(a, 1);
26 return predux(padd(lo, hi));
30 EIGEN_STRONG_INLINE
int predux_mul(
const Packet8i& a) {
31 Packet4i lo = _mm256_castsi256_si128(a);
32 Packet4i hi = _mm256_extractf128_si256(a, 1);
33 return predux_mul(pmul(lo, hi));
37 EIGEN_STRONG_INLINE
int predux_min(
const Packet8i& a) {
38 Packet4i lo = _mm256_castsi256_si128(a);
39 Packet4i hi = _mm256_extractf128_si256(a, 1);
40 return predux_min(pmin(lo, hi));
44 EIGEN_STRONG_INLINE
int predux_max(
const Packet8i& a) {
45 Packet4i lo = _mm256_castsi256_si128(a);
46 Packet4i hi = _mm256_extractf128_si256(a, 1);
47 return predux_max(pmax(lo, hi));
51 EIGEN_STRONG_INLINE
bool predux_any(
const Packet8i& a) {
52 #ifdef EIGEN_VECTORIZE_AVX2 53 return _mm256_movemask_epi8(a) != 0x0;
55 return _mm256_movemask_ps(_mm256_castsi256_ps(a)) != 0x0;
62 EIGEN_STRONG_INLINE uint32_t predux(
const Packet8ui& a) {
63 Packet4ui lo = _mm256_castsi256_si128(a);
64 Packet4ui hi = _mm256_extractf128_si256(a, 1);
65 return predux(padd(lo, hi));
69 EIGEN_STRONG_INLINE uint32_t predux_mul(
const Packet8ui& a) {
70 Packet4ui lo = _mm256_castsi256_si128(a);
71 Packet4ui hi = _mm256_extractf128_si256(a, 1);
72 return predux_mul(pmul(lo, hi));
76 EIGEN_STRONG_INLINE uint32_t predux_min(
const Packet8ui& a) {
77 Packet4ui lo = _mm256_castsi256_si128(a);
78 Packet4ui hi = _mm256_extractf128_si256(a, 1);
79 return predux_min(pmin(lo, hi));
83 EIGEN_STRONG_INLINE uint32_t predux_max(
const Packet8ui& a) {
84 Packet4ui lo = _mm256_castsi256_si128(a);
85 Packet4ui hi = _mm256_extractf128_si256(a, 1);
86 return predux_max(pmax(lo, hi));
90 EIGEN_STRONG_INLINE
bool predux_any(
const Packet8ui& a) {
91 #ifdef EIGEN_VECTORIZE_AVX2 92 return _mm256_movemask_epi8(a) != 0x0;
94 return _mm256_movemask_ps(_mm256_castsi256_ps(a)) != 0x0;
98 #ifdef EIGEN_VECTORIZE_AVX2 103 EIGEN_STRONG_INLINE int64_t predux(
const Packet4l& a) {
104 Packet2l lo = _mm256_castsi256_si128(a);
105 Packet2l hi = _mm256_extractf128_si256(a, 1);
106 return predux(padd(lo, hi));
110 EIGEN_STRONG_INLINE
bool predux_any(
const Packet4l& a) {
111 return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0x0;
117 EIGEN_STRONG_INLINE uint64_t predux(
const Packet4ul& a) {
118 return static_cast<uint64_t
>(predux(Packet4l(a)));
122 EIGEN_STRONG_INLINE
bool predux_any(
const Packet4ul& a) {
123 return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0x0;
131 EIGEN_STRONG_INLINE
float predux(
const Packet8f& a) {
132 Packet4f lo = _mm256_castps256_ps128(a);
133 Packet4f hi = _mm256_extractf128_ps(a, 1);
134 return predux(padd(lo, hi));
138 EIGEN_STRONG_INLINE
float predux_mul(
const Packet8f& a) {
139 Packet4f lo = _mm256_castps256_ps128(a);
140 Packet4f hi = _mm256_extractf128_ps(a, 1);
141 return predux_mul(pmul(lo, hi));
145 EIGEN_STRONG_INLINE
float predux_min(
const Packet8f& a) {
146 Packet4f lo = _mm256_castps256_ps128(a);
147 Packet4f hi = _mm256_extractf128_ps(a, 1);
148 return predux_min(pmin(lo, hi));
152 EIGEN_STRONG_INLINE
float predux_min<PropagateNumbers>(
const Packet8f& a) {
153 Packet4f lo = _mm256_castps256_ps128(a);
154 Packet4f hi = _mm256_extractf128_ps(a, 1);
155 return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lo, hi));
159 EIGEN_STRONG_INLINE
float predux_min<PropagateNaN>(
const Packet8f& a) {
160 Packet4f lo = _mm256_castps256_ps128(a);
161 Packet4f hi = _mm256_extractf128_ps(a, 1);
162 return predux_min<PropagateNaN>(pmin<PropagateNaN>(lo, hi));
166 EIGEN_STRONG_INLINE
float predux_max(
const Packet8f& a) {
167 Packet4f lo = _mm256_castps256_ps128(a);
168 Packet4f hi = _mm256_extractf128_ps(a, 1);
169 return predux_max(pmax(lo, hi));
173 EIGEN_STRONG_INLINE
float predux_max<PropagateNumbers>(
const Packet8f& a) {
174 Packet4f lo = _mm256_castps256_ps128(a);
175 Packet4f hi = _mm256_extractf128_ps(a, 1);
176 return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lo, hi));
180 EIGEN_STRONG_INLINE
float predux_max<PropagateNaN>(
const Packet8f& a) {
181 Packet4f lo = _mm256_castps256_ps128(a);
182 Packet4f hi = _mm256_extractf128_ps(a, 1);
183 return predux_max<PropagateNaN>(pmax<PropagateNaN>(lo, hi));
187 EIGEN_STRONG_INLINE
bool predux_any(
const Packet8f& a) {
188 return _mm256_movemask_ps(a) != 0x0;
194 EIGEN_STRONG_INLINE
double predux(
const Packet4d& a) {
195 Packet2d lo = _mm256_castpd256_pd128(a);
196 Packet2d hi = _mm256_extractf128_pd(a, 1);
197 return predux(padd(lo, hi));
201 EIGEN_STRONG_INLINE
double predux_mul(
const Packet4d& a) {
202 Packet2d lo = _mm256_castpd256_pd128(a);
203 Packet2d hi = _mm256_extractf128_pd(a, 1);
204 return predux_mul(pmul(lo, hi));
208 EIGEN_STRONG_INLINE
double predux_min(
const Packet4d& a) {
209 Packet2d lo = _mm256_castpd256_pd128(a);
210 Packet2d hi = _mm256_extractf128_pd(a, 1);
211 return predux_min(pmin(lo, hi));
215 EIGEN_STRONG_INLINE
double predux_min<PropagateNumbers>(
const Packet4d& a) {
216 Packet2d lo = _mm256_castpd256_pd128(a);
217 Packet2d hi = _mm256_extractf128_pd(a, 1);
218 return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lo, hi));
222 EIGEN_STRONG_INLINE
double predux_min<PropagateNaN>(
const Packet4d& a) {
223 Packet2d lo = _mm256_castpd256_pd128(a);
224 Packet2d hi = _mm256_extractf128_pd(a, 1);
225 return predux_min<PropagateNaN>(pmin<PropagateNaN>(lo, hi));
229 EIGEN_STRONG_INLINE
double predux_max(
const Packet4d& a) {
230 Packet2d lo = _mm256_castpd256_pd128(a);
231 Packet2d hi = _mm256_extractf128_pd(a, 1);
232 return predux_max(pmax(lo, hi));
236 EIGEN_STRONG_INLINE
double predux_max<PropagateNumbers>(
const Packet4d& a) {
237 Packet2d lo = _mm256_castpd256_pd128(a);
238 Packet2d hi = _mm256_extractf128_pd(a, 1);
239 return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lo, hi));
243 EIGEN_STRONG_INLINE
double predux_max<PropagateNaN>(
const Packet4d& a) {
244 Packet2d lo = _mm256_castpd256_pd128(a);
245 Packet2d hi = _mm256_extractf128_pd(a, 1);
246 return predux_max<PropagateNaN>(pmax<PropagateNaN>(lo, hi));
250 EIGEN_STRONG_INLINE
bool predux_any(
const Packet4d& a) {
251 return _mm256_movemask_pd(a) != 0x0;
255 #ifndef EIGEN_VECTORIZE_AVX512FP16 258 EIGEN_STRONG_INLINE half predux(
const Packet8h& a) {
259 return static_cast<half
>(predux(half2float(a)));
263 EIGEN_STRONG_INLINE half predux_mul(
const Packet8h& a) {
264 return static_cast<half
>(predux_mul(half2float(a)));
268 EIGEN_STRONG_INLINE half predux_min(
const Packet8h& a) {
269 return static_cast<half
>(predux_min(half2float(a)));
273 EIGEN_STRONG_INLINE half predux_min<PropagateNumbers>(
const Packet8h& a) {
274 return static_cast<half
>(predux_min<PropagateNumbers>(half2float(a)));
278 EIGEN_STRONG_INLINE half predux_min<PropagateNaN>(
const Packet8h& a) {
279 return static_cast<half
>(predux_min<PropagateNaN>(half2float(a)));
283 EIGEN_STRONG_INLINE half predux_max(
const Packet8h& a) {
284 return static_cast<half
>(predux_max(half2float(a)));
288 EIGEN_STRONG_INLINE half predux_max<PropagateNumbers>(
const Packet8h& a) {
289 return static_cast<half
>(predux_max<PropagateNumbers>(half2float(a)));
293 EIGEN_STRONG_INLINE half predux_max<PropagateNaN>(
const Packet8h& a) {
294 return static_cast<half
>(predux_max<PropagateNaN>(half2float(a)));
298 EIGEN_STRONG_INLINE
bool predux_any(
const Packet8h& a) {
299 return _mm_movemask_epi8(a) != 0;
301 #endif // EIGEN_VECTORIZE_AVX512FP16 306 EIGEN_STRONG_INLINE bfloat16 predux(
const Packet8bf& a) {
307 return static_cast<bfloat16
>(predux<Packet8f>(Bf16ToF32(a)));
311 EIGEN_STRONG_INLINE bfloat16 predux_mul(
const Packet8bf& a) {
312 return static_cast<bfloat16
>(predux_mul<Packet8f>(Bf16ToF32(a)));
316 EIGEN_STRONG_INLINE bfloat16 predux_min(
const Packet8bf& a) {
317 return static_cast<bfloat16
>(predux_min(Bf16ToF32(a)));
321 EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNumbers>(
const Packet8bf& a) {
322 return static_cast<bfloat16
>(predux_min<PropagateNumbers>(Bf16ToF32(a)));
326 EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNaN>(
const Packet8bf& a) {
327 return static_cast<bfloat16
>(predux_min<PropagateNaN>(Bf16ToF32(a)));
331 EIGEN_STRONG_INLINE bfloat16 predux_max(
const Packet8bf& a) {
332 return static_cast<bfloat16
>(predux_max<Packet8f>(Bf16ToF32(a)));
336 EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNumbers>(
const Packet8bf& a) {
337 return static_cast<bfloat16
>(predux_max<PropagateNumbers>(Bf16ToF32(a)));
341 EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNaN>(
const Packet8bf& a) {
342 return static_cast<bfloat16
>(predux_max<PropagateNaN>(Bf16ToF32(a)));
346 EIGEN_STRONG_INLINE
bool predux_any(
const Packet8bf& a) {
347 return _mm_movemask_epi8(a) != 0;
353 #endif // EIGEN_REDUCTIONS_AVX_H Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1