2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020 Research Organization for Information Science and Technology (RIST).
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * armv8+sve support to GROMACS was contributed by the Research Organization for
39 * Information Science and Technology (RIST).
42 #ifndef GMX_SIMD_IMPL_ARM_SVE_SIMD_FLOAT_H
43 #define GMX_SIMD_IMPL_ARM_SVE_SIMD_FLOAT_H
53 #include "gromacs/math/utilities.h"
63 SimdFloat(const float f
) { this->simdInternal_
= svdup_f32(f
); }
65 SimdFloat(svfloat32_t simd
) : simdInternal_(simd
) {}
67 float32_t simdInternal_
__attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE
/ 8)));
75 SimdFInt32(const int32_t i
) { this->simdInternal_
= svdup_s32(i
); }
77 SimdFInt32(svint32_t simd
) : simdInternal_(simd
) {}
79 int32_t simdInternal_
__attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE
/ 8)));
87 SimdFBool(const bool b
)
89 this->simdInternal_
= svdup_n_u32_x(svptrue_b32(), b
? 0xFFFFFFFF : 0);
92 SimdFBool(svbool_t simd
) { this->simdInternal_
= svdup_n_u32_z(simd
, 0xFFFFFFFF); }
94 SimdFBool(svuint32_t simd
) : simdInternal_(simd
) {}
96 uint32_t simdInternal_
__attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE
/ 8)));
104 SimdFIBool(const bool b
)
106 this->simdInternal_
= svdup_n_u32_x(svptrue_b32(), b
? 0xFFFFFFFF : 0);
109 SimdFIBool(svbool_t simd
) { this->simdInternal_
= svdup_n_u32_z(simd
, 0xFFFFFFFF); }
111 SimdFIBool(svuint32_t simd
) : simdInternal_(simd
) {}
113 uint32_t simdInternal_
__attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE
/ 8)));
116 static inline SimdFloat gmx_simdcall
simdLoad(const float* m
, SimdFloatTag
= {})
118 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
119 svbool_t pg
= svptrue_b32();
120 return { svld1_f32(pg
, m
) };
123 static inline SimdFloat gmx_simdcall
simdLoad(SimdFloat
* m
, int offset
, SimdFloatTag
= {})
125 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
126 svbool_t pg
= svptrue_b32();
127 return { svld1_f32(pg
, reinterpret_cast<float*>(m
) + offset
* svcntw()) };
130 static inline SimdFloat gmx_simdcall
simdLoadFloat(const float* m
)
132 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
133 svbool_t pg
= svptrue_b32();
134 return { svld1_f32(pg
, m
) };
137 static inline void gmx_simdcall
store(float* m
, SimdFloat a
)
139 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
140 svbool_t pg
= svptrue_b32();
141 svst1_f32(pg
, m
, a
.simdInternal_
);
144 static inline SimdFloat gmx_simdcall
simdLoadU(const float* m
, SimdFloatTag
= {})
146 svbool_t pg
= svptrue_b32();
147 return { svld1_f32(pg
, m
) };
150 static inline void gmx_simdcall
storeU(float* m
, SimdFloat a
)
152 svbool_t pg
= svptrue_b32();
153 svst1_f32(pg
, m
, a
.simdInternal_
);
156 static inline SimdFloat gmx_simdcall
setZeroF()
158 return { svdup_f32(0.0f
) };
161 static inline void gmx_simdcall
simdIncr(SimdFloat
*& p
, SimdFloatTag
)
163 p
= reinterpret_cast<SimdFloat
*>(reinterpret_cast<uint64_t>(p
) + svcntw());
166 static inline SimdFInt32 gmx_simdcall
simdLoad(const std::int32_t* m
, SimdFInt32Tag
)
168 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
169 svbool_t pg
= svptrue_b32();
170 return { svld1_s32(pg
, m
) };
173 static inline void gmx_simdcall
store(std::int32_t* m
, SimdFInt32 a
)
175 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
176 svbool_t pg
= svptrue_b32();
177 svst1_s32(pg
, m
, a
.simdInternal_
);
180 static inline SimdFInt32 gmx_simdcall
simdLoadU(const std::int32_t* m
, SimdFInt32Tag
)
182 svbool_t pg
= svptrue_b32();
183 return { svld1_s32(pg
, m
) };
186 static inline void gmx_simdcall
storeU(std::int32_t* m
, SimdFInt32 a
)
188 svbool_t pg
= svptrue_b32();
189 svst1_s32(pg
, m
, a
.simdInternal_
);
192 static inline SimdFInt32 gmx_simdcall
setZeroFI()
194 return { svdup_s32(0) };
198 gmx_simdcall
static inline std::int32_t extract(SimdFInt32 a
)
200 svbool_t pg
= svwhilelt_b32(0, index
);
201 return svlasta_s32(pg
, a
.simdInternal_
);
205 gmx_simdcall
static inline float extract(SimdFloat a
)
207 svbool_t pg
= svwhilelt_b32(0, index
);
208 return svlasta_f32(pg
, a
.simdInternal_
);
211 static inline SimdFloat gmx_simdcall
operator&(SimdFloat a
, SimdFloat b
)
213 svbool_t pg
= svptrue_b32();
214 return { svreinterpret_f32_s32(svand_s32_x(pg
, svreinterpret_s32_f32(a
.simdInternal_
),
215 svreinterpret_s32_f32(b
.simdInternal_
))) };
218 static inline SimdFloat gmx_simdcall
andNot(SimdFloat a
, SimdFloat b
)
220 svbool_t pg
= svptrue_b32();
221 return { svreinterpret_f32_s32(svbic_s32_x(pg
, svreinterpret_s32_f32(b
.simdInternal_
),
222 svreinterpret_s32_f32(a
.simdInternal_
))) };
225 static inline SimdFloat gmx_simdcall
operator|(SimdFloat a
, SimdFloat b
)
227 svbool_t pg
= svptrue_b32();
228 return { svreinterpret_f32_s32(svorr_s32_x(pg
, svreinterpret_s32_f32(a
.simdInternal_
),
229 svreinterpret_s32_f32(b
.simdInternal_
))) };
232 static inline SimdFloat gmx_simdcall
operator^(SimdFloat a
, SimdFloat b
)
234 svbool_t pg
= svptrue_b32();
235 return { svreinterpret_f32_s32(sveor_s32_x(pg
, svreinterpret_s32_f32(a
.simdInternal_
),
236 svreinterpret_s32_f32(b
.simdInternal_
))) };
239 static inline SimdFloat gmx_simdcall
operator+(SimdFloat a
, SimdFloat b
)
241 svbool_t pg
= svptrue_b32();
242 return { svadd_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
245 static inline SimdFloat gmx_simdcall
operator-(SimdFloat a
, SimdFloat b
)
247 svbool_t pg
= svptrue_b32();
248 return { svsub_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
251 static inline SimdFloat gmx_simdcall
operator-(SimdFloat a
)
253 svbool_t pg
= svptrue_b32();
254 return { svneg_f32_x(pg
, a
.simdInternal_
) };
257 static inline SimdFloat gmx_simdcall
operator*(SimdFloat a
, SimdFloat b
)
259 svbool_t pg
= svptrue_b32();
260 return { svmul_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
263 static inline SimdFloat gmx_simdcall
fma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
265 svbool_t pg
= svptrue_b32();
266 return { svmad_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
269 static inline SimdFloat gmx_simdcall
fms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
271 svbool_t pg
= svptrue_b32();
272 return { svnmsb_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
275 static inline SimdFloat gmx_simdcall
fnma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
277 svbool_t pg
= svptrue_b32();
278 return { svmsb_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
281 static inline SimdFloat gmx_simdcall
fnms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
283 svbool_t pg
= svptrue_b32();
284 return { svnmad_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
287 static inline SimdFloat gmx_simdcall
rsqrt(SimdFloat x
)
289 return { svrsqrte_f32(x
.simdInternal_
) };
292 // The SIMD implementation seems to overflow when we square lu for
293 // values close to FLOAT_MAX, so we fall back on the version in
294 // simd_math.h, which is probably slightly slower.
295 #if GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_FLOAT
296 static inline SimdFloat gmx_simdcall
rsqrtIter(SimdFloat lu
, SimdFloat x
)
298 svbool_t pg
= svptrue_b32();
299 svfloat32_t tmp1
, tmp2
;
300 tmp1
= svmul_f32_x(pg
, x
.simdInternal_
, lu
.simdInternal_
);
301 tmp2
= svmul_n_f32_x(pg
, lu
.simdInternal_
, -0.5f
);
302 tmp1
= svmad_n_f32_x(pg
, tmp1
, lu
.simdInternal_
, -3.0f
);
303 return { svmul_f32_x(pg
, tmp1
, tmp2
) };
308 static inline SimdFloat gmx_simdcall
rcp(SimdFloat x
)
310 return { svrecpe_f32(x
.simdInternal_
) };
313 static inline SimdFloat gmx_simdcall
rcpIter(SimdFloat lu
, SimdFloat x
)
315 svbool_t pg
= svptrue_b32();
316 return { svmul_f32_x(pg
, lu
.simdInternal_
, svrecps_f32(lu
.simdInternal_
, x
.simdInternal_
)) };
319 static inline SimdFloat gmx_simdcall
maskAdd(SimdFloat a
, SimdFloat b
, SimdFBool m
)
321 svbool_t pg
= svcmpne_n_u32(svptrue_b32(), m
.simdInternal_
, 0);
322 return { svadd_f32_m(pg
, a
.simdInternal_
, b
.simdInternal_
) };
325 static inline SimdFloat gmx_simdcall
maskzMul(SimdFloat a
, SimdFloat b
, SimdFBool m
)
327 svbool_t pg
= svcmpne_n_u32(svptrue_b32(), m
.simdInternal_
, 0);
328 return { svmul_f32_z(pg
, a
.simdInternal_
, b
.simdInternal_
) };
331 static inline SimdFloat gmx_simdcall
maskzFma(SimdFloat a
, SimdFloat b
, SimdFloat c
, SimdFBool m
)
333 svbool_t pg
= svcmpne_n_u32(svptrue_b32(), m
.simdInternal_
, 0);
334 return { svmad_f32_z(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
337 static inline SimdFloat gmx_simdcall
maskzRsqrt(SimdFloat x
, SimdFBool m
)
339 svbool_t pg
= svcmpne_n_u32(svptrue_b32(), m
.simdInternal_
, 0);
340 // The result will always be correct since we mask the result with m, but
341 // for debug builds we also want to make sure not to generate FP exceptions
343 x
.simdInternal_
= svsel_f32(pg
, x
.simdInternal_
, svdup_n_f32(1.0f
));
345 return { svreinterpret_f32_u32(
346 svand_n_u32_z(pg
, svreinterpret_u32_f32(svrsqrte_f32(x
.simdInternal_
)), 0xFFFFFFFF)) };
349 static inline SimdFloat gmx_simdcall
maskzRcp(SimdFloat x
, SimdFBool m
)
351 svbool_t pg
= svcmpne_n_u32(svptrue_b32(), m
.simdInternal_
, 0);
352 // The result will always be correct since we mask the result with m, but
353 // for debug builds we also want to make sure not to generate FP exceptions
355 x
.simdInternal_
= svsel_f32(pg
, x
.simdInternal_
, svdup_n_f32(1.0f
));
357 return { svreinterpret_f32_u32(
358 svand_n_u32_z(pg
, svreinterpret_u32_f32(svrecpe_f32(x
.simdInternal_
)), 0xFFFFFFFF)) };
361 static inline SimdFloat gmx_simdcall
abs(SimdFloat x
)
363 svbool_t pg
= svptrue_b32();
364 return { svabs_f32_x(pg
, x
.simdInternal_
) };
367 static inline SimdFloat gmx_simdcall
max(SimdFloat a
, SimdFloat b
)
369 svbool_t pg
= svptrue_b32();
370 return { svmax_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
373 static inline SimdFloat gmx_simdcall
min(SimdFloat a
, SimdFloat b
)
375 svbool_t pg
= svptrue_b32();
376 return { svmin_f32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
379 // Round and trunc operations are defined at the end of this file, since they
380 // need to use float-to-integer and integer-to-float conversions.
382 template<MathOptimization opt
= MathOptimization::Safe
>
383 static inline SimdFloat gmx_simdcall
frexp(SimdFloat value
, SimdFInt32
* exponent
)
385 svbool_t pg
= svptrue_b32();
386 const svint32_t exponentMask
= svdup_n_s32(0x7F800000);
387 const svint32_t mantissaMask
= svdup_n_s32(0x807FFFFF);
388 const svint32_t exponentBias
= svdup_n_s32(126); // add 1 to make our definition identical to frexp()
389 const svfloat32_t half
= svdup_n_f32(0.5f
);
392 iExponent
= svand_s32_x(pg
, svreinterpret_s32_f32(value
.simdInternal_
), exponentMask
);
393 iExponent
= svsub_s32_x(
394 pg
, svreinterpret_s32_u32(svlsr_n_u32_x(pg
, svreinterpret_u32_s32(iExponent
), 23)), exponentBias
);
395 exponent
->simdInternal_
= iExponent
;
397 return { svreinterpret_f32_s32(svorr_s32_x(
398 pg
, svand_s32_x(pg
, svreinterpret_s32_f32(value
.simdInternal_
), mantissaMask
),
399 svreinterpret_s32_f32(half
))) };
402 template<MathOptimization opt
= MathOptimization::Safe
>
403 static inline SimdFloat gmx_simdcall
ldexp(SimdFloat value
, SimdFInt32 exponent
)
405 svbool_t pg
= svptrue_b32();
406 const svint32_t exponentBias
= svdup_n_s32(127);
407 svint32_t iExponent
= svadd_s32_x(pg
, exponent
.simdInternal_
, exponentBias
);
409 if (opt
== MathOptimization::Safe
)
411 // Make sure biased argument is not negative
412 iExponent
= svmax_n_s32_x(pg
, iExponent
, 0);
415 iExponent
= svlsl_n_s32_x(pg
, iExponent
, 23);
417 return { svmul_f32_x(pg
, value
.simdInternal_
, svreinterpret_f32_s32(iExponent
)) };
420 static inline float gmx_simdcall
reduce(SimdFloat a
)
422 svbool_t pg
= svptrue_b32();
423 return svaddv_f32(pg
, a
.simdInternal_
);
426 static inline SimdFBool gmx_simdcall
operator==(SimdFloat a
, SimdFloat b
)
428 svbool_t pg
= svptrue_b32();
429 return { svcmpeq_f32(pg
, a
.simdInternal_
, b
.simdInternal_
) };
432 static inline SimdFBool gmx_simdcall
operator!=(SimdFloat a
, SimdFloat b
)
434 svbool_t pg
= svptrue_b32();
435 return { svcmpne_f32(pg
, a
.simdInternal_
, b
.simdInternal_
) };
438 static inline SimdFBool gmx_simdcall
operator<(SimdFloat a
, SimdFloat b
)
440 svbool_t pg
= svptrue_b32();
441 return { svcmplt_f32(pg
, a
.simdInternal_
, b
.simdInternal_
) };
444 static inline SimdFBool gmx_simdcall
operator<=(SimdFloat a
, SimdFloat b
)
446 svbool_t pg
= svptrue_b32();
447 return { svcmple_f32(pg
, a
.simdInternal_
, b
.simdInternal_
) };
450 static inline SimdFBool gmx_simdcall
testBits(SimdFloat a
)
452 svbool_t pg
= svptrue_b32();
453 return { svcmpne_n_s32(pg
, svreinterpret_s32_f32(a
.simdInternal_
), 0) };
456 static inline SimdFBool gmx_simdcall
operator&&(SimdFBool a
, SimdFBool b
)
458 svbool_t pg
= svptrue_b32();
459 return { svand_u32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
462 static inline SimdFBool gmx_simdcall
operator||(SimdFBool a
, SimdFBool b
)
464 svbool_t pg
= svptrue_b32();
465 return { svorr_u32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
468 static inline bool gmx_simdcall
anyTrue(SimdFBool a
)
470 svbool_t pg
= svptrue_b32();
471 return svptest_any(pg
, svcmpne_n_u32(svptrue_b32(), a
.simdInternal_
, 0));
474 static inline bool gmx_simdcall
extractFirst(SimdFBool a
)
476 svbool_t pg
= svptrue_b32();
477 return svptest_first(pg
, svcmpne_n_u32(svptrue_b32(), a
.simdInternal_
, 0));
480 static inline SimdFloat gmx_simdcall
selectByMask(SimdFloat a
, SimdFBool m
)
482 svbool_t pg
= svptrue_b32();
483 return { svreinterpret_f32_u32(svand_u32_x(pg
, svreinterpret_u32_f32(a
.simdInternal_
), m
.simdInternal_
)) };
486 static inline SimdFloat gmx_simdcall
selectByNotMask(SimdFloat a
, SimdFBool m
)
488 svbool_t pg
= svcmpeq_n_u32(svptrue_b32(), m
.simdInternal_
, 0);
489 return { svsel_f32(pg
, a
.simdInternal_
, svdup_f32(0.0f
)) };
492 static inline SimdFloat gmx_simdcall
blend(SimdFloat a
, SimdFloat b
, SimdFBool sel
)
494 svbool_t pg
= svcmpne_n_u32(svptrue_b32(), sel
.simdInternal_
, 0);
495 return { svsel_f32(pg
, b
.simdInternal_
, a
.simdInternal_
) };
498 static inline SimdFInt32 gmx_simdcall
operator&(SimdFInt32 a
, SimdFInt32 b
)
500 svbool_t pg
= svptrue_b32();
501 return { svand_s32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
504 static inline SimdFInt32 gmx_simdcall
andNot(SimdFInt32 a
, SimdFInt32 b
)
506 svbool_t pg
= svptrue_b32();
507 return { svbic_s32_x(pg
, b
.simdInternal_
, a
.simdInternal_
) };
510 static inline SimdFInt32 gmx_simdcall
operator|(SimdFInt32 a
, SimdFInt32 b
)
512 svbool_t pg
= svptrue_b32();
513 return { svorr_s32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
516 static inline SimdFInt32 gmx_simdcall
operator^(SimdFInt32 a
, SimdFInt32 b
)
518 svbool_t pg
= svptrue_b32();
519 return { sveor_s32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
522 static inline SimdFInt32 gmx_simdcall
operator+(SimdFInt32 a
, SimdFInt32 b
)
524 svbool_t pg
= svptrue_b32();
525 return { svadd_s32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
528 static inline SimdFInt32 gmx_simdcall
operator-(SimdFInt32 a
, SimdFInt32 b
)
530 svbool_t pg
= svptrue_b32();
531 return { svsub_s32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
534 static inline SimdFInt32 gmx_simdcall
operator*(SimdFInt32 a
, SimdFInt32 b
)
536 svbool_t pg
= svptrue_b32();
537 return { svmul_s32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
540 static inline SimdFIBool gmx_simdcall
operator==(SimdFInt32 a
, SimdFInt32 b
)
542 svbool_t pg
= svptrue_b32();
543 return { svcmpeq_s32(pg
, a
.simdInternal_
, b
.simdInternal_
) };
546 static inline SimdFIBool gmx_simdcall
testBits(SimdFInt32 a
)
548 svbool_t pg
= svptrue_b32();
549 return { svcmpne_n_s32(pg
, a
.simdInternal_
, (int32_t)0) };
552 static inline SimdFIBool gmx_simdcall
operator<(SimdFInt32 a
, SimdFInt32 b
)
554 svbool_t pg
= svptrue_b32();
555 return { svcmplt_s32(pg
, a
.simdInternal_
, b
.simdInternal_
) };
558 static inline SimdFIBool gmx_simdcall
operator&&(SimdFIBool a
, SimdFIBool b
)
560 svbool_t pg
= svptrue_b32();
561 return { svand_u32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
564 static inline SimdFIBool gmx_simdcall
operator||(SimdFIBool a
, SimdFIBool b
)
566 svbool_t pg
= svptrue_b32();
567 return { svorr_u32_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
570 static inline bool gmx_simdcall
anyTrue(SimdFIBool a
)
572 svbool_t pg
= svptrue_b32();
573 return svptest_any(pg
, svcmpne_n_u32(pg
, a
.simdInternal_
, 0));
576 static inline SimdFInt32 gmx_simdcall
selectByMask(SimdFInt32 a
, SimdFIBool m
)
578 svbool_t pg
= svptrue_b32();
579 return { svand_s32_x(pg
, a
.simdInternal_
, svreinterpret_s32_u32(m
.simdInternal_
)) };
582 static inline SimdFInt32 gmx_simdcall
selectByNotMask(SimdFInt32 a
, SimdFIBool m
)
584 svbool_t pg
= svcmpeq_n_u32(svptrue_b32(), m
.simdInternal_
, 0);
585 return { svadd_n_s32_z(pg
, a
.simdInternal_
, 0) };
588 static inline SimdFInt32 gmx_simdcall
blend(SimdFInt32 a
, SimdFInt32 b
, SimdFIBool sel
)
590 svbool_t pg
= svcmpne_n_u32(svptrue_b32(), sel
.simdInternal_
, 0);
591 return { svsel_s32(pg
, b
.simdInternal_
, a
.simdInternal_
) };
594 static inline SimdFInt32 gmx_simdcall
cvtR2I(SimdFloat a
)
596 svbool_t pg
= svptrue_b32();
597 return { svcvt_s32_x(pg
, svrinta_f32_x(pg
, a
.simdInternal_
)) };
600 static inline SimdFInt32 gmx_simdcall
cvttR2I(SimdFloat a
)
602 svbool_t pg
= svptrue_b32();
603 return { svcvt_s32_x(pg
, a
.simdInternal_
) };
606 static inline SimdFloat gmx_simdcall
cvtI2R(SimdFInt32 a
)
608 svbool_t pg
= svptrue_b32();
609 return { svcvt_f32_x(pg
, a
.simdInternal_
) };
612 static inline SimdFIBool gmx_simdcall
cvtB2IB(SimdFBool a
)
614 return { a
.simdInternal_
};
617 static inline SimdFBool gmx_simdcall
cvtIB2B(SimdFIBool a
)
619 return { a
.simdInternal_
};
622 static inline SimdFloat gmx_simdcall
round(SimdFloat x
)
624 svbool_t pg
= svptrue_b32();
625 return { svrinta_f32_x(pg
, x
.simdInternal_
) };
628 static inline SimdFloat gmx_simdcall
trunc(SimdFloat x
)
630 return cvtI2R(cvttR2I(x
));
635 #endif // GMX_SIMD_IMPL_ARM_SVE_SIMD_FLOAT_H