$darkmode
Eigen  5.0.1-dev
Reductions.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2025 Charlie Schlosser <cs.schlosser@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_REDUCTIONS_SSE_H
11 #define EIGEN_REDUCTIONS_SSE_H
12 
13 // IWYU pragma: private
14 #include "../../InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
20 template <typename Packet>
21 struct sse_add_wrapper {
22  static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return padd<Packet>(a, b); }
23 };
24 
25 template <typename Packet>
26 struct sse_mul_wrapper {
27  static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmul<Packet>(a, b); }
28 };
29 
30 template <typename Packet>
31 struct sse_min_wrapper {
32  static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmin<Packet>(a, b); }
33 };
34 
35 template <int NaNPropagation, typename Packet>
36 struct sse_min_prop_wrapper {
37  static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) {
38  return pmin<NaNPropagation, Packet>(a, b);
39  }
40 };
41 
42 template <typename Packet>
43 struct sse_max_wrapper {
44  static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmax<Packet>(a, b); }
45 };
46 
47 template <int NaNPropagation, typename Packet>
48 struct sse_max_prop_wrapper {
49  static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) {
50  return pmax<NaNPropagation, Packet>(a, b);
51  }
52 };
53 
54 template <typename Packet, typename Op>
55 struct sse_predux_common;
56 
57 template <typename Packet>
58 struct sse_predux_impl : sse_predux_common<Packet, sse_add_wrapper<Packet>> {};
59 
60 template <typename Packet>
61 struct sse_predux_mul_impl : sse_predux_common<Packet, sse_mul_wrapper<Packet>> {};
62 
63 template <typename Packet>
64 struct sse_predux_min_impl : sse_predux_common<Packet, sse_min_wrapper<Packet>> {};
65 
66 template <int NaNPropagation, typename Packet>
67 struct sse_predux_min_prop_impl : sse_predux_common<Packet, sse_min_prop_wrapper<NaNPropagation, Packet>> {};
68 
69 template <typename Packet>
70 struct sse_predux_max_impl : sse_predux_common<Packet, sse_max_wrapper<Packet>> {};
71 
72 template <int NaNPropagation, typename Packet>
73 struct sse_predux_max_prop_impl : sse_predux_common<Packet, sse_max_prop_wrapper<NaNPropagation, Packet>> {};
74 
75 /* -- -- -- -- -- -- -- -- -- -- -- -- Packet16b -- -- -- -- -- -- -- -- -- -- -- -- */
76 
77 template <>
78 EIGEN_STRONG_INLINE bool predux(const Packet16b& a) {
79  Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a, a));
80  return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0);
81 }
82 
83 template <>
84 EIGEN_STRONG_INLINE bool predux_mul(const Packet16b& a) {
85  Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a, a));
86  return ((pfirst<Packet4i>(tmp) == 0x01010101) && (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) == 0x01010101));
87 }
88 
89 template <>
90 EIGEN_STRONG_INLINE bool predux_min(const Packet16b& a) {
91  return predux_mul(a);
92 }
93 
94 template <>
95 EIGEN_STRONG_INLINE bool predux_max(const Packet16b& a) {
96  return predux(a);
97 }
98 
99 template <>
100 EIGEN_STRONG_INLINE bool predux_any(const Packet16b& a) {
101  return predux(a);
102 }
103 
104 /* -- -- -- -- -- -- -- -- -- -- -- -- Packet4i -- -- -- -- -- -- -- -- -- -- -- -- */
105 
106 template <typename Op>
107 struct sse_predux_common<Packet4i, Op> {
108  static EIGEN_STRONG_INLINE int run(const Packet4i& a) {
109  Packet4i tmp;
110  tmp = Op::packetOp(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)));
111  tmp = Op::packetOp(tmp, _mm_unpackhi_epi32(tmp, tmp));
112  return _mm_cvtsi128_si32(tmp);
113  }
114 };
115 
116 template <>
117 EIGEN_STRONG_INLINE int predux(const Packet4i& a) {
118  return sse_predux_impl<Packet4i>::run(a);
119 }
120 
121 template <>
122 EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) {
123  return sse_predux_mul_impl<Packet4i>::run(a);
124 }
125 
126 #ifdef EIGEN_VECTORIZE_SSE4_1
127 template <>
128 EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) {
129  return sse_predux_min_impl<Packet4i>::run(a);
130 }
131 
132 template <>
133 EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) {
134  return sse_predux_max_impl<Packet4i>::run(a);
135 }
136 #endif
137 
138 template <>
139 EIGEN_STRONG_INLINE bool predux_any(const Packet4i& a) {
140  return _mm_movemask_ps(_mm_castsi128_ps(a)) != 0x0;
141 }
142 
143 /* -- -- -- -- -- -- -- -- -- -- -- -- Packet4ui -- -- -- -- -- -- -- -- -- -- -- -- */
144 
145 template <typename Op>
146 struct sse_predux_common<Packet4ui, Op> {
147  static EIGEN_STRONG_INLINE uint32_t run(const Packet4ui& a) {
148  Packet4ui tmp;
149  tmp = Op::packetOp(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)));
150  tmp = Op::packetOp(tmp, _mm_unpackhi_epi32(tmp, tmp));
151  return static_cast<uint32_t>(_mm_cvtsi128_si32(tmp));
152  }
153 };
154 
155 template <>
156 EIGEN_STRONG_INLINE uint32_t predux(const Packet4ui& a) {
157  return sse_predux_impl<Packet4ui>::run(a);
158 }
159 
160 template <>
161 EIGEN_STRONG_INLINE uint32_t predux_mul(const Packet4ui& a) {
162  return sse_predux_mul_impl<Packet4ui>::run(a);
163 }
164 
165 #ifdef EIGEN_VECTORIZE_SSE4_1
166 template <>
167 EIGEN_STRONG_INLINE uint32_t predux_min(const Packet4ui& a) {
168  return sse_predux_min_impl<Packet4ui>::run(a);
169 }
170 
171 template <>
172 EIGEN_STRONG_INLINE uint32_t predux_max(const Packet4ui& a) {
173  return sse_predux_max_impl<Packet4ui>::run(a);
174 }
175 #endif
176 
177 template <>
178 EIGEN_STRONG_INLINE bool predux_any(const Packet4ui& a) {
179  return _mm_movemask_ps(_mm_castsi128_ps(a)) != 0x0;
180 }
181 
182 /* -- -- -- -- -- -- -- -- -- -- -- -- Packet2l -- -- -- -- -- -- -- -- -- -- -- -- */
183 
184 template <typename Op>
185 struct sse_predux_common<Packet2l, Op> {
186  static EIGEN_STRONG_INLINE int64_t run(const Packet2l& a) {
187  Packet2l tmp;
188  tmp = Op::packetOp(a, _mm_unpackhi_epi64(a, a));
189  return pfirst(tmp);
190  }
191 };
192 
193 template <>
194 EIGEN_STRONG_INLINE int64_t predux(const Packet2l& a) {
195  return sse_predux_impl<Packet2l>::run(a);
196 }
197 
198 template <>
199 EIGEN_STRONG_INLINE bool predux_any(const Packet2l& a) {
200  return _mm_movemask_pd(_mm_castsi128_pd(a)) != 0x0;
201 }
202 
203 /* -- -- -- -- -- -- -- -- -- -- -- -- Packet4f -- -- -- -- -- -- -- -- -- -- -- -- */
204 
205 template <typename Op>
206 struct sse_predux_common<Packet4f, Op> {
207  static EIGEN_STRONG_INLINE float run(const Packet4f& a) {
208  Packet4f tmp;
209  tmp = Op::packetOp(a, _mm_movehl_ps(a, a));
210 #ifdef EIGEN_VECTORIZE_SSE3
211  tmp = Op::packetOp(tmp, _mm_movehdup_ps(tmp));
212 #else
213  tmp = Op::packetOp(tmp, _mm_shuffle_ps(tmp, tmp, 1));
214 #endif
215  return _mm_cvtss_f32(tmp);
216  }
217 };
218 
219 template <>
220 EIGEN_STRONG_INLINE float predux(const Packet4f& a) {
221  return sse_predux_impl<Packet4f>::run(a);
222 }
223 
224 template <>
225 EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) {
226  return sse_predux_mul_impl<Packet4f>::run(a);
227 }
228 
229 template <>
230 EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) {
231  return sse_predux_min_impl<Packet4f>::run(a);
232 }
233 
234 template <>
235 EIGEN_STRONG_INLINE float predux_min<PropagateNumbers>(const Packet4f& a) {
236  return sse_predux_min_prop_impl<PropagateNumbers, Packet4f>::run(a);
237 }
238 
239 template <>
240 EIGEN_STRONG_INLINE float predux_min<PropagateNaN>(const Packet4f& a) {
241  return sse_predux_min_prop_impl<PropagateNaN, Packet4f>::run(a);
242 }
243 
244 template <>
245 EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) {
246  return sse_predux_max_impl<Packet4f>::run(a);
247 }
248 
249 template <>
250 EIGEN_STRONG_INLINE float predux_max<PropagateNumbers>(const Packet4f& a) {
251  return sse_predux_max_prop_impl<PropagateNumbers, Packet4f>::run(a);
252 }
253 
254 template <>
255 EIGEN_STRONG_INLINE float predux_max<PropagateNaN>(const Packet4f& a) {
256  return sse_predux_max_prop_impl<PropagateNaN, Packet4f>::run(a);
257 }
258 
259 template <>
260 EIGEN_STRONG_INLINE bool predux_any(const Packet4f& a) {
261  return _mm_movemask_ps(a) != 0x0;
262 }
263 
264 /* -- -- -- -- -- -- -- -- -- -- -- -- Packet2d -- -- -- -- -- -- -- -- -- -- -- -- */
265 
266 template <typename Op>
267 struct sse_predux_common<Packet2d, Op> {
268  static EIGEN_STRONG_INLINE double run(const Packet2d& a) {
269  Packet2d tmp;
270  tmp = Op::packetOp(a, _mm_unpackhi_pd(a, a));
271  return _mm_cvtsd_f64(tmp);
272  }
273 };
274 
275 template <>
276 EIGEN_STRONG_INLINE double predux(const Packet2d& a) {
277  return sse_predux_impl<Packet2d>::run(a);
278 }
279 
280 template <>
281 EIGEN_STRONG_INLINE double predux_mul(const Packet2d& a) {
282  return sse_predux_mul_impl<Packet2d>::run(a);
283 }
284 
285 template <>
286 EIGEN_STRONG_INLINE double predux_min(const Packet2d& a) {
287  return sse_predux_min_impl<Packet2d>::run(a);
288 }
289 
290 template <>
291 EIGEN_STRONG_INLINE double predux_min<PropagateNumbers>(const Packet2d& a) {
292  return sse_predux_min_prop_impl<PropagateNumbers, Packet2d>::run(a);
293 }
294 
295 template <>
296 EIGEN_STRONG_INLINE double predux_min<PropagateNaN>(const Packet2d& a) {
297  return sse_predux_min_prop_impl<PropagateNaN, Packet2d>::run(a);
298 }
299 
300 template <>
301 EIGEN_STRONG_INLINE double predux_max(const Packet2d& a) {
302  return sse_predux_max_impl<Packet2d>::run(a);
303 }
304 
305 template <>
306 EIGEN_STRONG_INLINE double predux_max<PropagateNumbers>(const Packet2d& a) {
307  return sse_predux_max_prop_impl<PropagateNumbers, Packet2d>::run(a);
308 }
309 
310 template <>
311 EIGEN_STRONG_INLINE double predux_max<PropagateNaN>(const Packet2d& a) {
312  return sse_predux_max_prop_impl<PropagateNaN, Packet2d>::run(a);
313 }
314 
315 template <>
316 EIGEN_STRONG_INLINE bool predux_any(const Packet2d& a) {
317  return _mm_movemask_pd(a) != 0x0;
318 }
319 
320 } // end namespace internal
321 
322 } // end namespace Eigen
323 
324 #endif // EIGEN_REDUCTIONS_SSE_H
Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1