$darkmode
Eigen  5.0.1-dev
ConfigureVectorization.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2020, Arm Limited and Contributors
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12 #define EIGEN_CONFIGURE_VECTORIZATION_H
13 
14 //------------------------------------------------------------------------------------------
15 // Static and dynamic alignment control
16 //
17 // The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
18 // as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
19 // The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
20 // a default value is automatically computed based on architecture, compiler, and OS.
21 //
22 // This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
23 // to be used to declare statically aligned buffers.
24 //------------------------------------------------------------------------------------------
25 
26 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
27  * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
28  * so that vectorization doesn't affect binary compatibility.
29  *
30  * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
31  * vectorized and non-vectorized code.
32  */
33 #if (defined EIGEN_CUDACC)
34 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
35 #define EIGEN_ALIGNOF(x) __alignof(x)
36 #else
37 #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
38 #define EIGEN_ALIGNOF(x) alignof(x)
39 #endif
40 
41 // Align to the boundary that avoids false sharing.
42 // https://en.cppreference.com/w/cpp/thread/hardware_destructive_interference_size
43 // There is a bug in android NDK < r26 where the macro is defined but std::hardware_destructive_interference_size
44 // still does not exist.
45 #if defined(__cpp_lib_hardware_interference_size) && __cpp_lib_hardware_interference_size >= 201603 && \
46  (!EIGEN_OS_ANDROID || __NDK_MAJOR__ + 0 >= 26)
47 #include <new>
48 #define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(std::hardware_destructive_interference_size)
49 #else
50 // Overalign for the cache line size of 128 bytes (Apple M1)
51 #define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(128)
52 #endif
53 
54 // If the user explicitly disable vectorization, then we also disable alignment
55 #if defined(EIGEN_DONT_VECTORIZE)
56 #if defined(EIGEN_GPUCC)
57 // GPU code is always vectorized and requires memory alignment for
58 // statically allocated buffers.
59 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
60 #else
61 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
62 #endif
63 #elif defined(__AVX512F__)
64 // 64 bytes static alignment is preferred only if really required
65 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
66 #elif defined(__AVX__)
67 // 32 bytes static alignment is preferred only if really required
68 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
69 #elif defined __HVX__ && (__HVX_LENGTH__ == 128)
70 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 128
71 #else
72 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
73 #endif
74 
75 // EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
76 #define EIGEN_MIN_ALIGN_BYTES 16
77 
78 // Defined the boundary (in bytes) on which the data needs to be aligned. Note
79 // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
80 // aligned at all regardless of the value of this #define.
81 
82 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && \
83  EIGEN_MAX_STATIC_ALIGN_BYTES > 0
84 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
85 #endif
86 
87 // EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
88 // They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
89 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
90 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
91 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
92 #endif
93 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
94 #endif
95 
96 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
97 
98 // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
99 
100 // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
101 // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
102 // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
103 // certain common platform (compiler+architecture combinations) to avoid these problems.
104 // Only static alignment is really problematic (relies on nonstandard compiler extensions),
105 // try to keep heap alignment even when we have to disable static alignment.
106 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || \
107  EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64)
108 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
109 #else
110 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
111 #endif
112 
113 // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
114 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT && !EIGEN_COMP_SUNCC && !EIGEN_OS_QNX
115 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
116 #else
117 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
118 #endif
119 
120 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
121 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
122 #else
123 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
124 #endif
125 
126 #endif
127 
128 // If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
129 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES < EIGEN_MAX_STATIC_ALIGN_BYTES
130 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
131 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
132 #endif
133 
134 #if EIGEN_MAX_STATIC_ALIGN_BYTES == 0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
135 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
136 #endif
137 
138 // At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
139 // It takes into account both the user choice to explicitly enable/disable alignment (by setting
140 // EIGEN_MAX_STATIC_ALIGN_BYTES) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only
141 // EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
142 
143 // Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
144 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
145 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
146 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
147 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
148 #if EIGEN_MAX_STATIC_ALIGN_BYTES > 0
149 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
150 #else
151 #define EIGEN_ALIGN_MAX
152 #endif
153 
154 // Dynamic alignment control
155 
156 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES > 0
157 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
158 #endif
159 
160 #ifdef EIGEN_DONT_ALIGN
161 #ifdef EIGEN_MAX_ALIGN_BYTES
162 #undef EIGEN_MAX_ALIGN_BYTES
163 #endif
164 #define EIGEN_MAX_ALIGN_BYTES 0
165 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
166 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
167 #endif
168 
169 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
170 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
171 #else
172 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
173 #endif
174 
175 #ifndef EIGEN_UNALIGNED_VECTORIZE
176 #define EIGEN_UNALIGNED_VECTORIZE 1
177 #endif
178 
179 //----------------------------------------------------------------------
180 
181 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
182 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
183 #if EIGEN_MAX_ALIGN_BYTES == 0
184 #ifndef EIGEN_DONT_VECTORIZE
185 #define EIGEN_DONT_VECTORIZE
186 #endif
187 #endif
188 
189 // The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
190 // removed as gcc 4.1 and msvc 2008 are not supported anyways.
191 #if EIGEN_COMP_MSVC
192 #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
193 // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
194 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
195 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
196 #endif
197 #else
198 #if defined(__SSE2__)
199 #define EIGEN_SSE2_ON_NON_MSVC
200 #endif
201 #endif
202 
203 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
204 
205 #if defined(EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
206 
207 // Defines symbols for compile-time detection of which instructions are
208 // used.
209 // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
210 #define EIGEN_VECTORIZE
211 #define EIGEN_VECTORIZE_SSE
212 #define EIGEN_VECTORIZE_SSE2
213 
214 // Detect sse3/ssse3/sse4:
215 // gcc and icc defines __SSE3__, ...
216 // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
217 // want to force the use of those instructions with msvc.
218 #ifdef __SSE3__
219 #define EIGEN_VECTORIZE_SSE3
220 #endif
221 #ifdef __SSSE3__
222 #define EIGEN_VECTORIZE_SSSE3
223 #endif
224 #ifdef __SSE4_1__
225 #define EIGEN_VECTORIZE_SSE4_1
226 #endif
227 #ifdef __SSE4_2__
228 #define EIGEN_VECTORIZE_SSE4_2
229 #endif
230 #ifdef __AVX__
231 #if !defined(EIGEN_USE_SYCL) && !EIGEN_COMP_EMSCRIPTEN
232 #define EIGEN_VECTORIZE_AVX
233 #endif
234 #define EIGEN_VECTORIZE_SSE3
235 #define EIGEN_VECTORIZE_SSSE3
236 #define EIGEN_VECTORIZE_SSE4_1
237 #define EIGEN_VECTORIZE_SSE4_2
238 #endif
239 #ifdef __AVX2__
240 #ifndef EIGEN_USE_SYCL
241 #define EIGEN_VECTORIZE_AVX2
242 #define EIGEN_VECTORIZE_AVX
243 #endif
244 #define EIGEN_VECTORIZE_SSE3
245 #define EIGEN_VECTORIZE_SSSE3
246 #define EIGEN_VECTORIZE_SSE4_1
247 #define EIGEN_VECTORIZE_SSE4_2
248 #endif
249 #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
250 // MSVC does not expose a switch dedicated for FMA
251 // For MSVC, AVX2 => FMA
252 #define EIGEN_VECTORIZE_FMA
253 #endif
254 #if defined(__AVX512F__)
255 #ifndef EIGEN_VECTORIZE_FMA
256 #if EIGEN_COMP_GNUC
257 #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
258 #else
259 #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
260 #endif
261 #endif
262 #ifndef EIGEN_USE_SYCL
263 #define EIGEN_VECTORIZE_AVX512
264 #define EIGEN_VECTORIZE_AVX2
265 #define EIGEN_VECTORIZE_AVX
266 #endif
267 #define EIGEN_VECTORIZE_FMA
268 #define EIGEN_VECTORIZE_SSE3
269 #define EIGEN_VECTORIZE_SSSE3
270 #define EIGEN_VECTORIZE_SSE4_1
271 #define EIGEN_VECTORIZE_SSE4_2
272 #ifndef EIGEN_USE_SYCL
273 #ifdef __AVX512DQ__
274 #define EIGEN_VECTORIZE_AVX512DQ
275 #endif
276 #ifdef __AVX512ER__
277 #define EIGEN_VECTORIZE_AVX512ER
278 #endif
279 #ifdef __AVX512BF16__
280 #define EIGEN_VECTORIZE_AVX512BF16
281 #endif
282 #ifdef __AVX512VL__
283 #define EIGEN_VECTORIZE_AVX512VL
284 #endif
285 #ifdef __AVX512FP16__
286 #ifdef __AVX512VL__
287 #define EIGEN_VECTORIZE_AVX512FP16
288 // Built-in _Float16.
289 #define EIGEN_HAS_BUILTIN_FLOAT16 1
290 #else
291 #if EIGEN_COMP_GNUC
292 #error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
293 #else
294 #error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
295 #endif
296 #endif
297 #endif
298 #endif
299 #endif
300 
301 // Disable AVX support on broken xcode versions
302 #if (EIGEN_COMP_CLANGAPPLE == 11000033) && (__MAC_OS_X_VERSION_MIN_REQUIRED == 101500)
303 // A nasty bug in the clang compiler shipped with xcode in a common compilation situation
304 // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
305 #ifdef EIGEN_VECTORIZE_AVX
306 #undef EIGEN_VECTORIZE_AVX
307 #warning \
308  "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
309 #ifdef EIGEN_VECTORIZE_AVX2
310 #undef EIGEN_VECTORIZE_AVX2
311 #endif
312 #ifdef EIGEN_VECTORIZE_FMA
313 #undef EIGEN_VECTORIZE_FMA
314 #endif
315 #ifdef EIGEN_VECTORIZE_AVX512
316 #undef EIGEN_VECTORIZE_AVX512
317 #endif
318 #ifdef EIGEN_VECTORIZE_AVX512DQ
319 #undef EIGEN_VECTORIZE_AVX512DQ
320 #endif
321 #ifdef EIGEN_VECTORIZE_AVX512ER
322 #undef EIGEN_VECTORIZE_AVX512ER
323 #endif
324 #endif
325 // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX
326 // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2
327 // produce core dumps in 3 tests NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all
328 // cases NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping
329 // tests
330 // with -macosx-version-min=10.15 and AVX
331 // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with
332 // -macosx-version-min=10.15 and AVX
333 #endif
334 
335 // include files
336 
337 // This extern "C" works around a MINGW-w64 compilation issue
338 // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
339 // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
340 // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
341 // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
342 // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
343 // notice that since these are C headers, the extern "C" is theoretically needed anyways.
344 extern "C" {
345 // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
346 // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
347 #if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
348 #include <immintrin.h>
349 #else
350 #include <mmintrin.h>
351 #include <emmintrin.h>
352 #include <xmmintrin.h>
353 #ifdef EIGEN_VECTORIZE_SSE3
354 #include <pmmintrin.h>
355 #endif
356 #ifdef EIGEN_VECTORIZE_SSSE3
357 #include <tmmintrin.h>
358 #endif
359 #ifdef EIGEN_VECTORIZE_SSE4_1
360 #include <smmintrin.h>
361 #endif
362 #ifdef EIGEN_VECTORIZE_SSE4_2
363 #include <nmmintrin.h>
364 #endif
365 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
366 #include <immintrin.h>
367 #endif
368 #endif
369 } // end extern "C"
370 
371 #elif defined(__VSX__) && !defined(__APPLE__)
372 
373 #define EIGEN_VECTORIZE
374 #define EIGEN_VECTORIZE_VSX 1
375 #define EIGEN_VECTORIZE_FMA
376 #include <altivec.h>
377 // We need to #undef all these ugly tokens defined in <altivec.h>
378 // => use __vector instead of vector
379 #undef bool
380 #undef vector
381 #undef pixel
382 
383 #elif defined __ALTIVEC__
384 
385 #define EIGEN_VECTORIZE
386 #define EIGEN_VECTORIZE_ALTIVEC
387 #define EIGEN_VECTORIZE_FMA
388 #include <altivec.h>
389 // We need to #undef all these ugly tokens defined in <altivec.h>
390 // => use __vector instead of vector
391 #undef bool
392 #undef vector
393 #undef pixel
394 
395 #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
396 
397 #define EIGEN_VECTORIZE
398 #define EIGEN_VECTORIZE_NEON
399 #include <arm_neon.h>
400 
401 // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
402 // will not select the backend automatically
403 #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
404 
405 #define EIGEN_VECTORIZE
406 #define EIGEN_VECTORIZE_SVE
407 #include <arm_sve.h>
408 
409 // Since we depend on knowing SVE vector lengths at compile-time, we need
410 // to ensure a fixed lengths is set
411 #if defined __ARM_FEATURE_SVE_BITS
412 #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
413 #else
414 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
415 #endif
416 
417 #elif (defined __s390x__ && defined __VEC__)
418 
419 #define EIGEN_VECTORIZE
420 #define EIGEN_VECTORIZE_ZVECTOR
421 #include <vecintrin.h>
422 
423 #elif defined __mips_msa
424 
425 // Limit MSA optimizations to little-endian CPUs for now.
426 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
427 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
428 #if defined(__LP64__)
429 #define EIGEN_MIPS_64
430 #else
431 #define EIGEN_MIPS_32
432 #endif
433 #define EIGEN_VECTORIZE
434 #define EIGEN_VECTORIZE_MSA
435 #include <msa.h>
436 #endif
437 
438 #elif (defined __loongarch64 && defined __loongarch_sx)
439 
440 #define EIGEN_VECTORIZE
441 #define EIGEN_VECTORIZE_LSX
442 #include <lsxintrin.h>
443 
444 #elif defined __HVX__ && (__HVX_LENGTH__ == 128)
445 
446 #define EIGEN_VECTORIZE
447 #define EIGEN_VECTORIZE_HVX
448 #include <hexagon_types.h>
449 
450 #endif
451 #endif
452 
453 // Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
454 // compilers seem to follow this. We therefore include it explicitly.
455 // See also: https://bugs.llvm.org/show_bug.cgi?id=47955
456 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
457 #include <arm_fp16.h>
458 #endif
459 
460 // Enable FMA for ARM.
461 #if defined(__ARM_FEATURE_FMA)
462 #define EIGEN_VECTORIZE_FMA
463 #endif
464 
465 #if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))
466 // We can use the optimized fp16 to float and float to fp16 conversion routines
467 #define EIGEN_HAS_FP16_C
468 
469 #if EIGEN_COMP_GNUC
470 // Make sure immintrin.h is included, even if e.g. vectorization is
471 // explicitly disabled (see also issue #2395).
472 // Note that FP16C intrinsics for gcc and clang are included by immintrin.h,
473 // as opposed to emmintrin.h as suggested by Intel:
474 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
475 #include <immintrin.h>
476 #endif
477 #endif
478 
479 #if defined EIGEN_CUDACC
480 #define EIGEN_VECTORIZE_GPU
481 #include <vector_types.h>
482 #if EIGEN_CUDA_SDK_VER >= 70500
483 #define EIGEN_HAS_CUDA_FP16
484 #endif
485 #endif
486 
487 #if defined(EIGEN_HAS_CUDA_FP16)
488 #include <cuda_runtime_api.h>
489 #include <cuda_fp16.h>
490 #endif
491 
492 #if defined(EIGEN_HIPCC)
493 #define EIGEN_VECTORIZE_GPU
494 #include <hip/hip_vector_types.h>
495 #define EIGEN_HAS_HIP_FP16
496 #include <hip/hip_fp16.h>
497 #define EIGEN_HAS_HIP_BF16
498 #include <hip/hip_bfloat16.h>
499 #endif
500 
502 // IWYU pragma: private
503 #include "../InternalHeaderCheck.h"
504 
505 namespace Eigen {
506 
507 inline static const char *SimdInstructionSetsInUse(void) {
508 #if defined(EIGEN_VECTORIZE_AVX512)
509  return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
510 #elif defined(EIGEN_VECTORIZE_AVX)
511  return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
512 #elif defined(EIGEN_VECTORIZE_SSE4_2)
513  return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
514 #elif defined(EIGEN_VECTORIZE_SSE4_1)
515  return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
516 #elif defined(EIGEN_VECTORIZE_SSSE3)
517  return "SSE, SSE2, SSE3, SSSE3";
518 #elif defined(EIGEN_VECTORIZE_SSE3)
519  return "SSE, SSE2, SSE3";
520 #elif defined(EIGEN_VECTORIZE_SSE2)
521  return "SSE, SSE2";
522 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
523  return "AltiVec";
524 #elif defined(EIGEN_VECTORIZE_VSX)
525  return "VSX";
526 #elif defined(EIGEN_VECTORIZE_NEON)
527  return "ARM NEON";
528 #elif defined(EIGEN_VECTORIZE_SVE)
529  return "ARM SVE";
530 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
531  return "S390X ZVECTOR";
532 #elif defined(EIGEN_VECTORIZE_MSA)
533  return "MIPS MSA";
534 #elif defined(EIGEN_VECTORIZE_LSX)
535  return "LOONGARCH64 LSX";
536 #else
537  return "None";
538 #endif
539 }
540 
541 } // end namespace Eigen
542 
543 #endif // EIGEN_CONFIGURE_VECTORIZATION_H
Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1