2 #ifdef EIGEN_POWER_USE_PREFETCH 3 #define EIGEN_POWER_PREFETCH(p) prefetch(p) 5 #define EIGEN_POWER_PREFETCH(p) 8 #if defined(_ARCH_PWR9) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) 9 #define USE_PARTIAL_PACKETS 13 #include "../../InternalHeaderCheck.h" 19 template <
typename Scalar,
typename Packet,
typename DataMapper, const Index accRows, const Index accCols>
20 EIGEN_ALWAYS_INLINE
void gemm_extra_row(
const DataMapper& res,
const Scalar* lhs_base,
const Scalar* rhs_base,
22 Index remaining_rows,
const Packet& pAlpha,
const Packet& pMask);
24 template <
typename Scalar,
typename Packet,
typename DataMapper, const Index accCols>
25 EIGEN_ALWAYS_INLINE
void gemm_extra_cols(
const DataMapper& res,
const Scalar* blockA,
const Scalar* blockB,
Index depth,
30 template <
typename Packet>
31 EIGEN_ALWAYS_INLINE Packet bmask(
const Index remaining_rows);
33 template <
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
const Index accRows,
34 const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
35 EIGEN_ALWAYS_INLINE
void gemm_complex_extra_row(
const DataMapper& res,
const Scalar* lhs_base,
const Scalar* rhs_base,
37 Index rows,
Index remaining_rows,
const Packet& pAlphaReal,
38 const Packet& pAlphaImag,
const Packet& pMask);
40 template <
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
const Index accCols,
41 bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
42 EIGEN_ALWAYS_INLINE
void gemm_complex_extra_cols(
const DataMapper& res,
const Scalar* blockA,
const Scalar* blockB,
45 const Packet& pAlphaReal,
const Packet& pAlphaImag,
48 template <
typename DataMapper>
49 EIGEN_ALWAYS_INLINE
void convertArrayBF16toF32(
float* result,
Index cols,
Index rows,
const DataMapper& src);
51 template <const Index size,
bool non_unit_str
ide, Index delta>
52 EIGEN_ALWAYS_INLINE
void storeBF16fromResult(bfloat16* dst, Packet8bf data,
Index resInc,
Index extra = 0);
54 template <
bool non_unit_str
ide = false>
55 EIGEN_ALWAYS_INLINE
void convertArrayPointerBF16toF32(
float* result,
Index cols,
Index rows, bfloat16* src,
58 template <
bool rhsExtraCols,
bool lhsExtraRows>
59 EIGEN_ALWAYS_INLINE
void storeResults(Packet4f (&acc)[4],
Index rows,
const Packet4f pAlpha,
float* result,
62 template <Index num_acc,
bool extraRows, Index size = 4>
63 EIGEN_ALWAYS_INLINE
void outputVecColResults(Packet4f (&acc)[num_acc][size],
float* result, Packet4f pAlpha,
66 template <Index num_acc, Index size = 4>
67 EIGEN_ALWAYS_INLINE
void outputVecResults(Packet4f (&acc)[num_acc][size],
float* result, Packet4f pAlpha);
69 template <
typename RhsMapper,
bool linear>
70 EIGEN_ALWAYS_INLINE Packet8bf loadColData(RhsMapper& rhs,
Index j);
72 template <
typename Packet>
73 EIGEN_ALWAYS_INLINE Packet ploadLhs(
const __UNPACK_TYPE__(Packet) * lhs);
75 template <
typename DataMapper,
typename Packet,
const Index accCols,
int StorageOrder,
bool Complex,
int N,
77 EIGEN_ALWAYS_INLINE
void bload(PacketBlock<Packet, N*(Complex ? 2 : 1)>& acc,
const DataMapper& res,
Index row,
80 template <
typename DataMapper,
typename Packet,
int N>
81 EIGEN_ALWAYS_INLINE
void bstore(PacketBlock<Packet, N>& acc,
const DataMapper& res,
Index row);
83 #ifdef USE_PARTIAL_PACKETS 84 template <
typename DataMapper,
typename Packet, const Index accCols,
bool Complex, Index N,
bool full = true>
85 EIGEN_ALWAYS_INLINE
void bload_partial(PacketBlock<Packet, N*(Complex ? 2 : 1)>& acc,
const DataMapper& res,
Index row,
88 template <
typename DataMapper,
typename Packet, Index N>
89 EIGEN_ALWAYS_INLINE
void bstore_partial(PacketBlock<Packet, N>& acc,
const DataMapper& res,
Index row,
Index elements);
92 template <
typename Packet,
int N>
93 EIGEN_ALWAYS_INLINE
void bscale(PacketBlock<Packet, N>& acc, PacketBlock<Packet, N>& accZ,
const Packet& pAlpha);
95 template <
typename Packet,
int N,
bool mask>
96 EIGEN_ALWAYS_INLINE
void bscale(PacketBlock<Packet, N>& acc, PacketBlock<Packet, N>& accZ,
const Packet& pAlpha,
99 template <
typename Packet,
int N,
bool mask>
100 EIGEN_ALWAYS_INLINE
void bscalec(PacketBlock<Packet, N>& aReal, PacketBlock<Packet, N>& aImag,
const Packet& bReal,
101 const Packet& bImag, PacketBlock<Packet, N>& cReal, PacketBlock<Packet, N>& cImag,
102 const Packet& pMask);
104 template <
typename Packet,
typename Packetc,
int N,
bool full>
105 EIGEN_ALWAYS_INLINE
void bcouple(PacketBlock<Packet, N>& taccReal, PacketBlock<Packet, N>& taccImag,
106 PacketBlock<Packetc, N * 2>& tRes, PacketBlock<Packetc, N>& acc1,
107 PacketBlock<Packetc, N>& acc2);
109 #define MICRO_NORMAL(iter) (accCols == accCols2) || (unroll_factor != (iter + 1)) 111 #define MICRO_UNROLL_ITER1(func, N) \ 112 switch (remaining_rows) { \ 118 if (sizeof(Scalar) == sizeof(float)) { \ 123 if (sizeof(Scalar) == sizeof(float)) { \ 129 #ifdef USE_PARTIAL_PACKETS 130 #define MICRO_UNROLL_ITER(func, N) \ 131 if (remaining_rows) { \ 137 #define MICRO_NORMAL_PARTIAL(iter) full || (unroll_factor != (iter + 1)) 139 #define MICRO_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N) 142 #define MICRO_COMPLEX_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N) 144 #define MICRO_NORMAL_COLS(iter, a, b) ((MICRO_NORMAL(iter)) ? a : b) 146 #define MICRO_LOAD1(lhs_ptr, iter) \ 147 if (unroll_factor > iter) { \ 148 lhsV##iter = ploadLhs<Packet>(lhs_ptr##iter); \ 149 lhs_ptr##iter += MICRO_NORMAL_COLS(iter, accCols, accCols2); \ 151 EIGEN_UNUSED_VARIABLE(lhsV##iter); \ 154 #define MICRO_LOAD_ONE(iter) MICRO_LOAD1(lhs_ptr, iter) 156 #define MICRO_COMPLEX_LOAD_ONE(iter) \ 157 if (!LhsIsReal && (unroll_factor > iter)) { \ 158 lhsVi##iter = ploadLhs<Packet>(lhs_ptr_real##iter + MICRO_NORMAL_COLS(iter, imag_delta, imag_delta2)); \ 160 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \ 162 MICRO_LOAD1(lhs_ptr_real, iter) 164 #define MICRO_SRC_PTR1(lhs_ptr, advRows, iter) \ 165 if (unroll_factor > iter) { \ 166 lhs_ptr##iter = lhs_base + (row + (iter * accCols)) * strideA * advRows - \ 167 MICRO_NORMAL_COLS(iter, 0, (accCols - accCols2) * offsetA); \ 169 EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \ 172 #define MICRO_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr, 1, iter) 174 #define MICRO_COMPLEX_SRC_PTR_ONE(iter) MICRO_SRC_PTR1(lhs_ptr_real, advanceRows, iter) 176 #define MICRO_PREFETCH1(lhs_ptr, iter) \ 177 if (unroll_factor > iter) { \ 178 EIGEN_POWER_PREFETCH(lhs_ptr##iter); \ 181 #define MICRO_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr, iter) 183 #define MICRO_COMPLEX_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr_real, iter) 185 #ifdef USE_PARTIAL_PACKETS 186 #define MICRO_UPDATE_MASK 188 #define MICRO_UPDATE_MASK EIGEN_UNUSED_VARIABLE(pMask); 191 #define MICRO_UPDATE \ 192 if (accCols == accCols2) { \ 194 EIGEN_UNUSED_VARIABLE(offsetA); \ 195 row += unroll_factor * accCols; \ 198 #define MICRO_COMPLEX_UPDATE \ 200 if (LhsIsReal || (accCols == accCols2)) { \ 201 EIGEN_UNUSED_VARIABLE(imag_delta2); \ Namespace containing all symbols from the Eigen library.
Definition: B01_Experimental.dox:1
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82