Make sure frexp() returns correct for argument 0.0
[gromacs.git] / src / gromacs / simd / impl_arm_sve / impl_arm_sve_simd_double.h
blob04cf849e46c882504e3cd7107be4e846963d31a2
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020 Research Organization for Information Science and Technology (RIST).
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * armv8+sve support to GROMACS was contributed by the Research Organization for
39 * Information Science and Technology (RIST).
42 #ifndef GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
43 #define GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
45 #include "config.h"
47 #include <cassert>
48 #include <cstddef>
49 #include <cstdint>
51 #include <arm_sve.h>
53 #include "gromacs/math/utilities.h"
55 #include "impl_arm_sve_simd_float.h"
57 #define SVE_DOUBLE_MASK svptrue_b64()
58 #define SVE_DINT32_MASK svptrue_b64()
60 namespace gmx
63 class SimdDouble
65 public:
66 SimdDouble() {}
68 SimdDouble(const double d) { this->simdInternal_ = svdup_f64(d); }
70 SimdDouble(svfloat64_t simd) : simdInternal_(simd) {}
72 float64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
75 class SimdDInt32
77 public:
78 SimdDInt32() {}
80 SimdDInt32(const int32_t i) { this->simdInternal_ = svdup_s64(i); }
82 SimdDInt32(svint64_t simd) : simdInternal_(simd) {}
84 int64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
87 class SimdDBool
89 public:
90 SimdDBool() {}
92 SimdDBool(const bool b)
94 this->simdInternal_ = svdup_n_u64_x(svptrue_b64(), b ? 0xFFFFFFFFFFFFFFFF : 0);
97 SimdDBool(svbool_t simd) { this->simdInternal_ = svdup_n_u64_z(simd, 0xFFFFFFFFFFFFFFFF); }
99 SimdDBool(svuint64_t simd) : simdInternal_(simd) {}
101 uint64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
104 class SimdDIBool
106 public:
107 SimdDIBool() {}
109 SimdDIBool(const bool b)
111 this->simdInternal_ = svdup_n_u64_x(svptrue_b64(), b ? 0xFFFFFFFFFFFFFFFF : 0);
114 SimdDIBool(svbool_t simd) { this->simdInternal_ = svdup_n_u64_z(simd, 0xFFFFFFFFFFFFFFFF); }
116 SimdDIBool(svuint64_t simd) : simdInternal_(simd) {}
118 uint64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
121 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
123 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
124 svbool_t pg = SVE_DOUBLE_MASK;
125 return { svld1_f64(pg, m) };
128 static inline SimdDouble gmx_simdcall simdLoad(SimdDouble* m, int offset, SimdDoubleTag = {})
130 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
131 svbool_t pg = SVE_DOUBLE_MASK;
132 return { svld1_f64(pg, reinterpret_cast<double*>(m) + offset * svcntd()) };
135 static inline SimdDouble gmx_simdcall simdLoadDouble(const double* m)
137 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
138 svbool_t pg = SVE_DOUBLE_MASK;
139 return { svld1_f64(pg, m) };
142 static inline void gmx_simdcall store(double* m, SimdDouble a)
144 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
145 svbool_t pg = SVE_DOUBLE_MASK;
146 svst1_f64(pg, m, a.simdInternal_);
149 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
151 svbool_t pg = SVE_DOUBLE_MASK;
152 return { svld1_f64(pg, m) };
155 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
157 svbool_t pg = SVE_DOUBLE_MASK;
158 svst1_f64(pg, m, a.simdInternal_);
161 static inline SimdDouble gmx_simdcall setZeroD()
163 return { svdup_f64(0.0) };
166 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
168 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
169 svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
170 return { svunpklo_s64(svld1_s32(pg, m)) };
173 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 a)
175 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
176 svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
177 svst1_s32(pg, m,
178 svuzp1(svreinterpret_s32_s64(a.simdInternal_), svreinterpret_s32_s64(a.simdInternal_)));
181 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
183 svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
184 return { svunpklo_s64(svld1_s32(pg, m)) };
187 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
189 svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
190 svst1_s32(pg, m,
191 svuzp1(svreinterpret_s32_s64(a.simdInternal_), svreinterpret_s32_s64(a.simdInternal_)));
194 static inline SimdDInt32 gmx_simdcall setZeroDI()
196 return { svdup_s64(0) };
199 template<int index>
200 gmx_simdcall static inline std::int32_t extract(SimdDInt32 a)
202 svbool_t pg = svwhilelt_b64(0, index);
203 return svlasta_s64(pg, a.simdInternal_);
206 template<int index>
207 gmx_simdcall static inline double extract(SimdDouble a)
209 svbool_t pg = svwhilelt_b64(0, index);
210 return svlasta_f64(pg, a.simdInternal_);
213 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
215 svbool_t pg = svptrue_b64();
216 return { svreinterpret_f64_s64(svand_s64_x(pg, svreinterpret_s64_f64(a.simdInternal_),
217 svreinterpret_s64_f64(b.simdInternal_))) };
220 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
222 svbool_t pg = svptrue_b64();
223 return { svreinterpret_f64_s64(svbic_s64_x(pg, svreinterpret_s64_f64(b.simdInternal_),
224 svreinterpret_s64_f64(a.simdInternal_))) };
227 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
229 svbool_t pg = svptrue_b64();
230 return { svreinterpret_f64_s64(svorr_s64_x(pg, svreinterpret_s64_f64(a.simdInternal_),
231 svreinterpret_s64_f64(b.simdInternal_))) };
234 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
236 svbool_t pg = svptrue_b64();
237 return { svreinterpret_f64_s64(sveor_s64_x(pg, svreinterpret_s64_f64(a.simdInternal_),
238 svreinterpret_s64_f64(b.simdInternal_))) };
241 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
243 svbool_t pg = svptrue_b64();
244 return { svadd_f64_x(pg, a.simdInternal_, b.simdInternal_) };
247 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
249 svbool_t pg = svptrue_b64();
250 return { svsub_f64_x(pg, a.simdInternal_, b.simdInternal_) };
253 static inline SimdDouble gmx_simdcall operator-(SimdDouble a)
255 svbool_t pg = svptrue_b64();
256 return { svneg_f64_x(pg, a.simdInternal_) };
259 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
261 svbool_t pg = svptrue_b64();
262 return { svmul_f64_x(pg, a.simdInternal_, b.simdInternal_) };
265 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
267 svbool_t pg = svptrue_b64();
268 return { svmad_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
271 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
273 svbool_t pg = svptrue_b64();
274 return { svnmsb_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
277 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
279 svbool_t pg = svptrue_b64();
280 return { svmsb_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
283 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
285 svbool_t pg = svptrue_b64();
286 return { svnmad_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
289 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
291 return { svrsqrte_f64(x.simdInternal_) };
294 // The SIMD implementation seems to overflow when we square lu for
295 // values close to FLOAT_MAX, so we fall back on the version in
296 // simd_math.h, which is probably slightly slower.
297 #if GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_DOUBLE
298 static inline SimdDouble gmx_simdcall rsqrtIter(SimdDouble lu, SimdDouble x)
300 return { vmulq_f64(lu.simdInternal_,
301 vrsqrtsq_f32(vmulq_f32(lu.simdInternal_, lu.simdInternal_), x.simdInternal_)) };
303 #endif
305 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
307 return { svrecpe_f64(x.simdInternal_) };
310 static inline SimdDouble gmx_simdcall rcpIter(SimdDouble lu, SimdDouble x)
312 svbool_t pg = svptrue_b64();
313 return { svmul_f64_x(pg, lu.simdInternal_, svrecps_f64(lu.simdInternal_, x.simdInternal_)) };
316 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
318 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
319 return { svadd_f64_m(pg, a.simdInternal_, b.simdInternal_) };
322 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
324 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
325 return { svmul_f64_z(pg, a.simdInternal_, b.simdInternal_) };
328 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
330 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
331 return { svmad_f64_z(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
334 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
336 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
337 // The result will always be correct since we mask the result with m, but
338 // for debug builds we also want to make sure not to generate FP exceptions
339 #ifndef NDEBUG
340 x.simdInternal_ = svsel_f64(pg, x.simdInternal_, svdup_n_f64(1.0));
341 #endif
342 return { svreinterpret_f64_u64(svand_n_u64_z(
343 pg, svreinterpret_u64_f64(svrsqrte_f64(x.simdInternal_)), 0xFFFFFFFFFFFFFFFF)) };
346 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
348 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
349 // The result will always be correct since we mask the result with m, but
350 // for debug builds we also want to make sure not to generate FP exceptions
351 #ifndef NDEBUG
352 x.simdInternal_ = svsel_f64(m, x.simdInternal_, svdup_n_f64(1.0));
353 #endif
354 return { svreinterpret_f64_u64(svand_n_u64_z(
355 pg, svreinterpret_u64_f64(svrecpe_f64(x.simdInternal_)), 0xFFFFFFFFFFFFFFFF)) };
358 static inline SimdDouble gmx_simdcall abs(SimdDouble x)
360 svbool_t pg = svptrue_b64();
361 return { svabs_f64_x(pg, x.simdInternal_) };
364 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
366 svbool_t pg = svptrue_b64();
367 return { svmax_f64_x(pg, a.simdInternal_, b.simdInternal_) };
370 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
372 svbool_t pg = svptrue_b64();
373 return { svmin_f64_x(pg, a.simdInternal_, b.simdInternal_) };
376 // Round and trunc operations are defined at the end of this file, since they
377 // need to use double-to-integer and integer-to-double conversions.
379 template<MathOptimization opt = MathOptimization::Safe>
380 static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent)
382 svbool_t pg = svptrue_b64();
383 const svint64_t exponentMask = svdup_n_s64(0x7FF0000000000000LL);
384 const svint64_t mantissaMask = svdup_n_s64(0x800FFFFFFFFFFFFFLL);
385 const svint64_t exponentBias = svdup_n_s64(1022LL); // add 1 to make our definition identical to frexp()
386 const svfloat64_t half = svdup_n_f64(0.5);
387 svint64_t iExponent;
389 iExponent = svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), exponentMask);
390 // iExponent = svsub_s64_x(pg, svlsr_n_s64_x(pg, iExponent, 52), exponentBias);
391 iExponent = svsub_s64_x(
392 pg, svreinterpret_s64_u64(svlsr_n_u64_x(pg, svreinterpret_u64_s64(iExponent), 52)), exponentBias);
394 exponent->simdInternal_ = iExponent;
396 return { svreinterpret_f64_s64(svorr_s64_x(
397 pg, svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), mantissaMask),
398 svreinterpret_s64_f64(half))) };
401 template<MathOptimization opt = MathOptimization::Safe>
402 static inline SimdDouble gmx_simdcall ldexp(SimdDouble value, SimdDInt32 exponent)
404 svbool_t pg = svptrue_b64();
405 const svint64_t exponentBias = svdup_n_s64(1023);
406 svint64_t iExponent = svadd_s64_x(pg, exponent.simdInternal_, exponentBias);
408 if (opt == MathOptimization::Safe)
410 // Make sure biased argument is not negative
411 iExponent = svmax_n_s64_x(pg, iExponent, 0);
414 iExponent = svlsl_n_s64_x(pg, iExponent, 52);
416 return { svmul_f64_x(pg, value.simdInternal_, svreinterpret_f64_s64(iExponent)) };
419 static inline double gmx_simdcall reduce(SimdDouble a)
421 svbool_t pg = svptrue_b64();
422 return svadda_f64(pg, 0.0f, a.simdInternal_);
425 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
427 svbool_t pg = svptrue_b64();
428 return { svcmpeq_f64(pg, a.simdInternal_, b.simdInternal_) };
431 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
433 svbool_t pg = svptrue_b64();
434 return { svcmpne_f64(pg, a.simdInternal_, b.simdInternal_) };
437 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
439 svbool_t pg = svptrue_b64();
440 return { svcmplt_f64(pg, a.simdInternal_, b.simdInternal_) };
443 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
445 svbool_t pg = svptrue_b64();
446 return { svcmple_f64(pg, a.simdInternal_, b.simdInternal_) };
449 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
451 svbool_t pg = svptrue_b64();
452 return { svcmpne_n_s64(pg, svreinterpret_s64_f64(a.simdInternal_), 0) };
455 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
457 svbool_t pg = svptrue_b64();
458 return { svand_u64_x(pg, a.simdInternal_, b.simdInternal_) };
461 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
463 svbool_t pg = svptrue_b64();
464 return { svorr_u64_x(pg, a.simdInternal_, b.simdInternal_) };
467 static inline bool gmx_simdcall anyTrue(SimdDBool a)
469 svbool_t pg = svptrue_b64();
470 return svptest_any(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
473 static inline bool gmx_simdcall extractFirst(SimdDBool a)
475 svbool_t pg = svptrue_b64();
476 return svptest_first(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
479 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool m)
481 svbool_t pg = svptrue_b64();
482 return { svreinterpret_f64_u64(svand_u64_x(pg, svreinterpret_u64_f64(a.simdInternal_), m.simdInternal_)) };
485 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool m)
487 svbool_t pg = svcmpeq_n_u64(svptrue_b64(), m.simdInternal_, 0);
488 return { svsel_f64(pg, a.simdInternal_, svdup_f64(0.0f)) };
491 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
493 svbool_t pg = svcmpne_n_u64(svptrue_b64(), sel.simdInternal_, 0);
494 return { svsel_f64(pg, b.simdInternal_, a.simdInternal_) };
497 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
499 svbool_t pg = svptrue_b64();
500 return { svand_s64_x(pg, a.simdInternal_, b.simdInternal_) };
503 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
505 svbool_t pg = svptrue_b64();
506 return { svbic_s64_x(pg, b.simdInternal_, a.simdInternal_) };
509 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
511 svbool_t pg = svptrue_b64();
512 return { svorr_s64_x(pg, a.simdInternal_, b.simdInternal_) };
515 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
517 svbool_t pg = svptrue_b64();
518 return { sveor_s64_x(pg, a.simdInternal_, b.simdInternal_) };
521 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
523 svbool_t pg = svptrue_b64();
524 return { svadd_s64_x(pg, a.simdInternal_, b.simdInternal_) };
527 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
529 svbool_t pg = svptrue_b64();
530 return { svsub_s64_x(pg, a.simdInternal_, b.simdInternal_) };
533 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
535 svbool_t pg = svptrue_b64();
536 return { svmul_s64_x(pg, a.simdInternal_, b.simdInternal_) };
539 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
541 svbool_t pg = svptrue_b64();
542 return { svcmpeq_s64(pg, a.simdInternal_, b.simdInternal_) };
545 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
547 svbool_t pg = svptrue_b64();
548 return { svcmpne_n_s64(pg, a.simdInternal_, (int64_t)0) };
551 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
553 svbool_t pg = svptrue_b64();
554 return { svcmplt_s64(pg, a.simdInternal_, b.simdInternal_) };
557 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
559 svbool_t pg = svptrue_b64();
560 return { svand_u64_x(pg, a.simdInternal_, b.simdInternal_) };
563 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
565 svbool_t pg = svptrue_b64();
566 return { svorr_u64_x(pg, a.simdInternal_, b.simdInternal_) };
569 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
571 svbool_t pg = svptrue_b64();
572 return svptest_any(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
575 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool m)
577 svbool_t pg = svptrue_b64();
578 return { svand_s64_x(pg, a.simdInternal_, svreinterpret_s64_u64(m.simdInternal_)) };
581 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool m)
583 svbool_t pg = svcmpeq_n_u64(svptrue_b64(), m.simdInternal_, 0);
584 return { svadd_n_s64_z(pg, a.simdInternal_, 0) };
587 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
589 svbool_t pg = svcmpne_n_u64(svptrue_b64(), sel.simdInternal_, 0);
590 return { svsel_s64(pg, b.simdInternal_, a.simdInternal_) };
593 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
595 svbool_t pg = svptrue_b64();
596 return { svcvt_s64_x(pg, svrinta_f64_x(pg, a.simdInternal_)) };
599 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
601 // FIXME ???
602 svbool_t pg = svptrue_b64();
603 return { svcvt_s64_x(pg, a.simdInternal_) };
606 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
608 svbool_t pg = svptrue_b64();
609 return { svcvt_f64_x(pg, a.simdInternal_) };
612 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
614 return { a.simdInternal_ };
617 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
619 return { a.simdInternal_ };
622 static inline SimdDouble gmx_simdcall round(SimdDouble x)
624 svbool_t pg = svptrue_b64();
625 return { svrinta_f64_x(pg, x.simdInternal_) };
628 static inline SimdDouble gmx_simdcall trunc(SimdDouble x)
630 return cvtI2R(cvttR2I(x));
633 static inline void gmx_simdcall cvtF2DD(SimdFloat gmx_unused f,
634 SimdDouble gmx_unused* d0,
635 SimdDouble gmx_unused* d1)
637 assert(GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH);
638 svbool_t pg = svptrue_b32();
639 d0->simdInternal_ = svcvt_f64_f32_x(pg, svzip1(f.simdInternal_, f.simdInternal_));
640 d1->simdInternal_ = svcvt_f64_f32_x(pg, svzip2(f.simdInternal_, f.simdInternal_));
643 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
645 svbool_t pg = svptrue_b64();
646 assert(GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH);
647 return { svuzp1_f32(svcvt_f32_f64_x(pg, d0.simdInternal_), svcvt_f32_f64_x(pg, d1.simdInternal_)) };
650 } // namespace gmx
652 #endif // GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H