2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020 Research Organization for Information Science and Technology (RIST).
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * armv8+sve support to GROMACS was contributed by the Research Organization for
39 * Information Science and Technology (RIST).
42 #ifndef GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
43 #define GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
53 #include "gromacs/math/utilities.h"
55 #include "impl_arm_sve_simd_float.h"
57 #define SVE_DOUBLE_MASK svptrue_b64()
58 #define SVE_DINT32_MASK svptrue_b64()
68 SimdDouble(const double d
) { this->simdInternal_
= svdup_f64(d
); }
70 SimdDouble(svfloat64_t simd
) : simdInternal_(simd
) {}
72 float64_t simdInternal_
__attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE
/ 8)));
80 SimdDInt32(const int32_t i
) { this->simdInternal_
= svdup_s64(i
); }
82 SimdDInt32(svint64_t simd
) : simdInternal_(simd
) {}
84 int64_t simdInternal_
__attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE
/ 8)));
92 SimdDBool(const bool b
)
94 this->simdInternal_
= svdup_n_u64_x(svptrue_b64(), b
? 0xFFFFFFFFFFFFFFFF : 0);
97 SimdDBool(svbool_t simd
) { this->simdInternal_
= svdup_n_u64_z(simd
, 0xFFFFFFFFFFFFFFFF); }
99 SimdDBool(svuint64_t simd
) : simdInternal_(simd
) {}
101 uint64_t simdInternal_
__attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE
/ 8)));
109 SimdDIBool(const bool b
)
111 this->simdInternal_
= svdup_n_u64_x(svptrue_b64(), b
? 0xFFFFFFFFFFFFFFFF : 0);
114 SimdDIBool(svbool_t simd
) { this->simdInternal_
= svdup_n_u64_z(simd
, 0xFFFFFFFFFFFFFFFF); }
116 SimdDIBool(svuint64_t simd
) : simdInternal_(simd
) {}
118 uint64_t simdInternal_
__attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE
/ 8)));
121 static inline SimdDouble gmx_simdcall
simdLoad(const double* m
, SimdDoubleTag
= {})
123 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
124 svbool_t pg
= SVE_DOUBLE_MASK
;
125 return { svld1_f64(pg
, m
) };
128 static inline SimdDouble gmx_simdcall
simdLoad(SimdDouble
* m
, int offset
, SimdDoubleTag
= {})
130 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
131 svbool_t pg
= SVE_DOUBLE_MASK
;
132 return { svld1_f64(pg
, reinterpret_cast<double*>(m
) + offset
* svcntd()) };
135 static inline SimdDouble gmx_simdcall
simdLoadDouble(const double* m
)
137 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
138 svbool_t pg
= SVE_DOUBLE_MASK
;
139 return { svld1_f64(pg
, m
) };
142 static inline void gmx_simdcall
store(double* m
, SimdDouble a
)
144 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
145 svbool_t pg
= SVE_DOUBLE_MASK
;
146 svst1_f64(pg
, m
, a
.simdInternal_
);
149 static inline SimdDouble gmx_simdcall
simdLoadU(const double* m
, SimdDoubleTag
= {})
151 svbool_t pg
= SVE_DOUBLE_MASK
;
152 return { svld1_f64(pg
, m
) };
155 static inline void gmx_simdcall
storeU(double* m
, SimdDouble a
)
157 svbool_t pg
= SVE_DOUBLE_MASK
;
158 svst1_f64(pg
, m
, a
.simdInternal_
);
161 static inline SimdDouble gmx_simdcall
setZeroD()
163 return { svdup_f64(0.0) };
166 static inline SimdDInt32 gmx_simdcall
simdLoad(const std::int32_t* m
, SimdDInt32Tag
)
168 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
169 svbool_t pg
= svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH
);
170 return { svunpklo_s64(svld1_s32(pg
, m
)) };
173 static inline void gmx_simdcall
store(std::int32_t* m
, SimdDInt32 a
)
175 assert(0 == (std::size_t(m
) % GMX_SIMD_ALIGNMENT
));
176 svbool_t pg
= svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH
);
178 svuzp1(svreinterpret_s32_s64(a
.simdInternal_
), svreinterpret_s32_s64(a
.simdInternal_
)));
181 static inline SimdDInt32 gmx_simdcall
simdLoadU(const std::int32_t* m
, SimdDInt32Tag
)
183 svbool_t pg
= svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH
);
184 return { svunpklo_s64(svld1_s32(pg
, m
)) };
187 static inline void gmx_simdcall
storeU(std::int32_t* m
, SimdDInt32 a
)
189 svbool_t pg
= svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH
);
191 svuzp1(svreinterpret_s32_s64(a
.simdInternal_
), svreinterpret_s32_s64(a
.simdInternal_
)));
194 static inline SimdDInt32 gmx_simdcall
setZeroDI()
196 return { svdup_s64(0) };
200 gmx_simdcall
static inline std::int32_t extract(SimdDInt32 a
)
202 svbool_t pg
= svwhilelt_b64(0, index
);
203 return svlasta_s64(pg
, a
.simdInternal_
);
207 gmx_simdcall
static inline double extract(SimdDouble a
)
209 svbool_t pg
= svwhilelt_b64(0, index
);
210 return svlasta_f64(pg
, a
.simdInternal_
);
213 static inline SimdDouble gmx_simdcall
operator&(SimdDouble a
, SimdDouble b
)
215 svbool_t pg
= svptrue_b64();
216 return { svreinterpret_f64_s64(svand_s64_x(pg
, svreinterpret_s64_f64(a
.simdInternal_
),
217 svreinterpret_s64_f64(b
.simdInternal_
))) };
220 static inline SimdDouble gmx_simdcall
andNot(SimdDouble a
, SimdDouble b
)
222 svbool_t pg
= svptrue_b64();
223 return { svreinterpret_f64_s64(svbic_s64_x(pg
, svreinterpret_s64_f64(b
.simdInternal_
),
224 svreinterpret_s64_f64(a
.simdInternal_
))) };
227 static inline SimdDouble gmx_simdcall
operator|(SimdDouble a
, SimdDouble b
)
229 svbool_t pg
= svptrue_b64();
230 return { svreinterpret_f64_s64(svorr_s64_x(pg
, svreinterpret_s64_f64(a
.simdInternal_
),
231 svreinterpret_s64_f64(b
.simdInternal_
))) };
234 static inline SimdDouble gmx_simdcall
operator^(SimdDouble a
, SimdDouble b
)
236 svbool_t pg
= svptrue_b64();
237 return { svreinterpret_f64_s64(sveor_s64_x(pg
, svreinterpret_s64_f64(a
.simdInternal_
),
238 svreinterpret_s64_f64(b
.simdInternal_
))) };
241 static inline SimdDouble gmx_simdcall
operator+(SimdDouble a
, SimdDouble b
)
243 svbool_t pg
= svptrue_b64();
244 return { svadd_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
247 static inline SimdDouble gmx_simdcall
operator-(SimdDouble a
, SimdDouble b
)
249 svbool_t pg
= svptrue_b64();
250 return { svsub_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
253 static inline SimdDouble gmx_simdcall
operator-(SimdDouble a
)
255 svbool_t pg
= svptrue_b64();
256 return { svneg_f64_x(pg
, a
.simdInternal_
) };
259 static inline SimdDouble gmx_simdcall
operator*(SimdDouble a
, SimdDouble b
)
261 svbool_t pg
= svptrue_b64();
262 return { svmul_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
265 static inline SimdDouble gmx_simdcall
fma(SimdDouble a
, SimdDouble b
, SimdDouble c
)
267 svbool_t pg
= svptrue_b64();
268 return { svmad_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
271 static inline SimdDouble gmx_simdcall
fms(SimdDouble a
, SimdDouble b
, SimdDouble c
)
273 svbool_t pg
= svptrue_b64();
274 return { svnmsb_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
277 static inline SimdDouble gmx_simdcall
fnma(SimdDouble a
, SimdDouble b
, SimdDouble c
)
279 svbool_t pg
= svptrue_b64();
280 return { svmsb_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
283 static inline SimdDouble gmx_simdcall
fnms(SimdDouble a
, SimdDouble b
, SimdDouble c
)
285 svbool_t pg
= svptrue_b64();
286 return { svnmad_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
289 static inline SimdDouble gmx_simdcall
rsqrt(SimdDouble x
)
291 return { svrsqrte_f64(x
.simdInternal_
) };
294 // The SIMD implementation seems to overflow when we square lu for
295 // values close to FLOAT_MAX, so we fall back on the version in
296 // simd_math.h, which is probably slightly slower.
297 #if GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_DOUBLE
298 static inline SimdDouble gmx_simdcall
rsqrtIter(SimdDouble lu
, SimdDouble x
)
300 return { vmulq_f64(lu
.simdInternal_
,
301 vrsqrtsq_f32(vmulq_f32(lu
.simdInternal_
, lu
.simdInternal_
), x
.simdInternal_
)) };
305 static inline SimdDouble gmx_simdcall
rcp(SimdDouble x
)
307 return { svrecpe_f64(x
.simdInternal_
) };
310 static inline SimdDouble gmx_simdcall
rcpIter(SimdDouble lu
, SimdDouble x
)
312 svbool_t pg
= svptrue_b64();
313 return { svmul_f64_x(pg
, lu
.simdInternal_
, svrecps_f64(lu
.simdInternal_
, x
.simdInternal_
)) };
316 static inline SimdDouble gmx_simdcall
maskAdd(SimdDouble a
, SimdDouble b
, SimdDBool m
)
318 svbool_t pg
= svcmpne_n_u64(svptrue_b64(), m
.simdInternal_
, 0);
319 return { svadd_f64_m(pg
, a
.simdInternal_
, b
.simdInternal_
) };
322 static inline SimdDouble gmx_simdcall
maskzMul(SimdDouble a
, SimdDouble b
, SimdDBool m
)
324 svbool_t pg
= svcmpne_n_u64(svptrue_b64(), m
.simdInternal_
, 0);
325 return { svmul_f64_z(pg
, a
.simdInternal_
, b
.simdInternal_
) };
328 static inline SimdDouble gmx_simdcall
maskzFma(SimdDouble a
, SimdDouble b
, SimdDouble c
, SimdDBool m
)
330 svbool_t pg
= svcmpne_n_u64(svptrue_b64(), m
.simdInternal_
, 0);
331 return { svmad_f64_z(pg
, a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
334 static inline SimdDouble gmx_simdcall
maskzRsqrt(SimdDouble x
, SimdDBool m
)
336 svbool_t pg
= svcmpne_n_u64(svptrue_b64(), m
.simdInternal_
, 0);
337 // The result will always be correct since we mask the result with m, but
338 // for debug builds we also want to make sure not to generate FP exceptions
340 x
.simdInternal_
= svsel_f64(pg
, x
.simdInternal_
, svdup_n_f64(1.0));
342 return { svreinterpret_f64_u64(svand_n_u64_z(
343 pg
, svreinterpret_u64_f64(svrsqrte_f64(x
.simdInternal_
)), 0xFFFFFFFFFFFFFFFF)) };
346 static inline SimdDouble gmx_simdcall
maskzRcp(SimdDouble x
, SimdDBool m
)
348 svbool_t pg
= svcmpne_n_u64(svptrue_b64(), m
.simdInternal_
, 0);
349 // The result will always be correct since we mask the result with m, but
350 // for debug builds we also want to make sure not to generate FP exceptions
352 x
.simdInternal_
= svsel_f64(m
, x
.simdInternal_
, svdup_n_f64(1.0));
354 return { svreinterpret_f64_u64(svand_n_u64_z(
355 pg
, svreinterpret_u64_f64(svrecpe_f64(x
.simdInternal_
)), 0xFFFFFFFFFFFFFFFF)) };
358 static inline SimdDouble gmx_simdcall
abs(SimdDouble x
)
360 svbool_t pg
= svptrue_b64();
361 return { svabs_f64_x(pg
, x
.simdInternal_
) };
364 static inline SimdDouble gmx_simdcall
max(SimdDouble a
, SimdDouble b
)
366 svbool_t pg
= svptrue_b64();
367 return { svmax_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
370 static inline SimdDouble gmx_simdcall
min(SimdDouble a
, SimdDouble b
)
372 svbool_t pg
= svptrue_b64();
373 return { svmin_f64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
376 // Round and trunc operations are defined at the end of this file, since they
377 // need to use double-to-integer and integer-to-double conversions.
379 template<MathOptimization opt
= MathOptimization::Safe
>
380 static inline SimdDouble gmx_simdcall
frexp(SimdDouble value
, SimdDInt32
* exponent
)
382 svbool_t pg
= svptrue_b64();
383 const svint64_t exponentMask
= svdup_n_s64(0x7FF0000000000000LL
);
384 const svint64_t mantissaMask
= svdup_n_s64(0x800FFFFFFFFFFFFFLL
);
385 const svint64_t exponentBias
= svdup_n_s64(1022LL); // add 1 to make our definition identical to frexp()
386 const svfloat64_t half
= svdup_n_f64(0.5);
389 iExponent
= svand_s64_x(pg
, svreinterpret_s64_f64(value
.simdInternal_
), exponentMask
);
390 // iExponent = svsub_s64_x(pg, svlsr_n_s64_x(pg, iExponent, 52), exponentBias);
391 iExponent
= svsub_s64_x(
392 pg
, svreinterpret_s64_u64(svlsr_n_u64_x(pg
, svreinterpret_u64_s64(iExponent
), 52)), exponentBias
);
394 exponent
->simdInternal_
= iExponent
;
396 return { svreinterpret_f64_s64(svorr_s64_x(
397 pg
, svand_s64_x(pg
, svreinterpret_s64_f64(value
.simdInternal_
), mantissaMask
),
398 svreinterpret_s64_f64(half
))) };
401 template<MathOptimization opt
= MathOptimization::Safe
>
402 static inline SimdDouble gmx_simdcall
ldexp(SimdDouble value
, SimdDInt32 exponent
)
404 svbool_t pg
= svptrue_b64();
405 const svint64_t exponentBias
= svdup_n_s64(1023);
406 svint64_t iExponent
= svadd_s64_x(pg
, exponent
.simdInternal_
, exponentBias
);
408 if (opt
== MathOptimization::Safe
)
410 // Make sure biased argument is not negative
411 iExponent
= svmax_n_s64_x(pg
, iExponent
, 0);
414 iExponent
= svlsl_n_s64_x(pg
, iExponent
, 52);
416 return { svmul_f64_x(pg
, value
.simdInternal_
, svreinterpret_f64_s64(iExponent
)) };
419 static inline double gmx_simdcall
reduce(SimdDouble a
)
421 svbool_t pg
= svptrue_b64();
422 return svadda_f64(pg
, 0.0f
, a
.simdInternal_
);
425 static inline SimdDBool gmx_simdcall
operator==(SimdDouble a
, SimdDouble b
)
427 svbool_t pg
= svptrue_b64();
428 return { svcmpeq_f64(pg
, a
.simdInternal_
, b
.simdInternal_
) };
431 static inline SimdDBool gmx_simdcall
operator!=(SimdDouble a
, SimdDouble b
)
433 svbool_t pg
= svptrue_b64();
434 return { svcmpne_f64(pg
, a
.simdInternal_
, b
.simdInternal_
) };
437 static inline SimdDBool gmx_simdcall
operator<(SimdDouble a
, SimdDouble b
)
439 svbool_t pg
= svptrue_b64();
440 return { svcmplt_f64(pg
, a
.simdInternal_
, b
.simdInternal_
) };
443 static inline SimdDBool gmx_simdcall
operator<=(SimdDouble a
, SimdDouble b
)
445 svbool_t pg
= svptrue_b64();
446 return { svcmple_f64(pg
, a
.simdInternal_
, b
.simdInternal_
) };
449 static inline SimdDBool gmx_simdcall
testBits(SimdDouble a
)
451 svbool_t pg
= svptrue_b64();
452 return { svcmpne_n_s64(pg
, svreinterpret_s64_f64(a
.simdInternal_
), 0) };
455 static inline SimdDBool gmx_simdcall
operator&&(SimdDBool a
, SimdDBool b
)
457 svbool_t pg
= svptrue_b64();
458 return { svand_u64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
461 static inline SimdDBool gmx_simdcall
operator||(SimdDBool a
, SimdDBool b
)
463 svbool_t pg
= svptrue_b64();
464 return { svorr_u64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
467 static inline bool gmx_simdcall
anyTrue(SimdDBool a
)
469 svbool_t pg
= svptrue_b64();
470 return svptest_any(pg
, svcmpne_n_u64(pg
, a
.simdInternal_
, 0));
473 static inline bool gmx_simdcall
extractFirst(SimdDBool a
)
475 svbool_t pg
= svptrue_b64();
476 return svptest_first(pg
, svcmpne_n_u64(pg
, a
.simdInternal_
, 0));
479 static inline SimdDouble gmx_simdcall
selectByMask(SimdDouble a
, SimdDBool m
)
481 svbool_t pg
= svptrue_b64();
482 return { svreinterpret_f64_u64(svand_u64_x(pg
, svreinterpret_u64_f64(a
.simdInternal_
), m
.simdInternal_
)) };
485 static inline SimdDouble gmx_simdcall
selectByNotMask(SimdDouble a
, SimdDBool m
)
487 svbool_t pg
= svcmpeq_n_u64(svptrue_b64(), m
.simdInternal_
, 0);
488 return { svsel_f64(pg
, a
.simdInternal_
, svdup_f64(0.0f
)) };
491 static inline SimdDouble gmx_simdcall
blend(SimdDouble a
, SimdDouble b
, SimdDBool sel
)
493 svbool_t pg
= svcmpne_n_u64(svptrue_b64(), sel
.simdInternal_
, 0);
494 return { svsel_f64(pg
, b
.simdInternal_
, a
.simdInternal_
) };
497 static inline SimdDInt32 gmx_simdcall
operator&(SimdDInt32 a
, SimdDInt32 b
)
499 svbool_t pg
= svptrue_b64();
500 return { svand_s64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
503 static inline SimdDInt32 gmx_simdcall
andNot(SimdDInt32 a
, SimdDInt32 b
)
505 svbool_t pg
= svptrue_b64();
506 return { svbic_s64_x(pg
, b
.simdInternal_
, a
.simdInternal_
) };
509 static inline SimdDInt32 gmx_simdcall
operator|(SimdDInt32 a
, SimdDInt32 b
)
511 svbool_t pg
= svptrue_b64();
512 return { svorr_s64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
515 static inline SimdDInt32 gmx_simdcall
operator^(SimdDInt32 a
, SimdDInt32 b
)
517 svbool_t pg
= svptrue_b64();
518 return { sveor_s64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
521 static inline SimdDInt32 gmx_simdcall
operator+(SimdDInt32 a
, SimdDInt32 b
)
523 svbool_t pg
= svptrue_b64();
524 return { svadd_s64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
527 static inline SimdDInt32 gmx_simdcall
operator-(SimdDInt32 a
, SimdDInt32 b
)
529 svbool_t pg
= svptrue_b64();
530 return { svsub_s64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
533 static inline SimdDInt32 gmx_simdcall
operator*(SimdDInt32 a
, SimdDInt32 b
)
535 svbool_t pg
= svptrue_b64();
536 return { svmul_s64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
539 static inline SimdDIBool gmx_simdcall
operator==(SimdDInt32 a
, SimdDInt32 b
)
541 svbool_t pg
= svptrue_b64();
542 return { svcmpeq_s64(pg
, a
.simdInternal_
, b
.simdInternal_
) };
545 static inline SimdDIBool gmx_simdcall
testBits(SimdDInt32 a
)
547 svbool_t pg
= svptrue_b64();
548 return { svcmpne_n_s64(pg
, a
.simdInternal_
, (int64_t)0) };
551 static inline SimdDIBool gmx_simdcall
operator<(SimdDInt32 a
, SimdDInt32 b
)
553 svbool_t pg
= svptrue_b64();
554 return { svcmplt_s64(pg
, a
.simdInternal_
, b
.simdInternal_
) };
557 static inline SimdDIBool gmx_simdcall
operator&&(SimdDIBool a
, SimdDIBool b
)
559 svbool_t pg
= svptrue_b64();
560 return { svand_u64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
563 static inline SimdDIBool gmx_simdcall
operator||(SimdDIBool a
, SimdDIBool b
)
565 svbool_t pg
= svptrue_b64();
566 return { svorr_u64_x(pg
, a
.simdInternal_
, b
.simdInternal_
) };
569 static inline bool gmx_simdcall
anyTrue(SimdDIBool a
)
571 svbool_t pg
= svptrue_b64();
572 return svptest_any(pg
, svcmpne_n_u64(pg
, a
.simdInternal_
, 0));
575 static inline SimdDInt32 gmx_simdcall
selectByMask(SimdDInt32 a
, SimdDIBool m
)
577 svbool_t pg
= svptrue_b64();
578 return { svand_s64_x(pg
, a
.simdInternal_
, svreinterpret_s64_u64(m
.simdInternal_
)) };
581 static inline SimdDInt32 gmx_simdcall
selectByNotMask(SimdDInt32 a
, SimdDIBool m
)
583 svbool_t pg
= svcmpeq_n_u64(svptrue_b64(), m
.simdInternal_
, 0);
584 return { svadd_n_s64_z(pg
, a
.simdInternal_
, 0) };
587 static inline SimdDInt32 gmx_simdcall
blend(SimdDInt32 a
, SimdDInt32 b
, SimdDIBool sel
)
589 svbool_t pg
= svcmpne_n_u64(svptrue_b64(), sel
.simdInternal_
, 0);
590 return { svsel_s64(pg
, b
.simdInternal_
, a
.simdInternal_
) };
593 static inline SimdDInt32 gmx_simdcall
cvtR2I(SimdDouble a
)
595 svbool_t pg
= svptrue_b64();
596 return { svcvt_s64_x(pg
, svrinta_f64_x(pg
, a
.simdInternal_
)) };
599 static inline SimdDInt32 gmx_simdcall
cvttR2I(SimdDouble a
)
602 svbool_t pg
= svptrue_b64();
603 return { svcvt_s64_x(pg
, a
.simdInternal_
) };
606 static inline SimdDouble gmx_simdcall
cvtI2R(SimdDInt32 a
)
608 svbool_t pg
= svptrue_b64();
609 return { svcvt_f64_x(pg
, a
.simdInternal_
) };
612 static inline SimdDIBool gmx_simdcall
cvtB2IB(SimdDBool a
)
614 return { a
.simdInternal_
};
617 static inline SimdDBool gmx_simdcall
cvtIB2B(SimdDIBool a
)
619 return { a
.simdInternal_
};
622 static inline SimdDouble gmx_simdcall
round(SimdDouble x
)
624 svbool_t pg
= svptrue_b64();
625 return { svrinta_f64_x(pg
, x
.simdInternal_
) };
628 static inline SimdDouble gmx_simdcall
trunc(SimdDouble x
)
630 return cvtI2R(cvttR2I(x
));
633 static inline void gmx_simdcall
cvtF2DD(SimdFloat gmx_unused f
,
634 SimdDouble gmx_unused
* d0
,
635 SimdDouble gmx_unused
* d1
)
637 assert(GMX_SIMD_FLOAT_WIDTH
== 2 * GMX_SIMD_DOUBLE_WIDTH
);
638 svbool_t pg
= svptrue_b32();
639 d0
->simdInternal_
= svcvt_f64_f32_x(pg
, svzip1(f
.simdInternal_
, f
.simdInternal_
));
640 d1
->simdInternal_
= svcvt_f64_f32_x(pg
, svzip2(f
.simdInternal_
, f
.simdInternal_
));
643 static inline SimdFloat gmx_simdcall
cvtDD2F(SimdDouble gmx_unused d0
, SimdDouble gmx_unused d1
)
645 svbool_t pg
= svptrue_b64();
646 assert(GMX_SIMD_FLOAT_WIDTH
== 2 * GMX_SIMD_DOUBLE_WIDTH
);
647 return { svuzp1_f32(svcvt_f32_f64_x(pg
, d0
.simdInternal_
), svcvt_f32_f64_x(pg
, d1
.simdInternal_
)) };
652 #endif // GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H