Make sure frexp() returns correct for argument 0.0
[gromacs.git] / src / gromacs / simd / impl_arm_neon_asimd / impl_arm_neon_asimd_simd_double.h
blobee263238be20bcff5fe20b037e75de2168417544
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_ARM_NEON_ASIMD_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPL_ARM_NEON_ASIMD_SIMD_DOUBLE_H
39 #include "config.h"
41 #include <cassert>
43 #include <arm_neon.h>
45 #include "gromacs/math/utilities.h"
47 #include "impl_arm_neon_asimd_simd_float.h"
49 namespace gmx
52 class SimdDouble
54 public:
55 SimdDouble() {}
57 SimdDouble(double d) : simdInternal_(vdupq_n_f64(d)) {}
59 // Internal utility constructor to simplify return statements
60 SimdDouble(float64x2_t simd) : simdInternal_(simd) {}
62 float64x2_t simdInternal_;
65 class SimdDInt32
67 public:
68 SimdDInt32() {}
70 SimdDInt32(std::int32_t i) : simdInternal_(vdup_n_s32(i)) {}
72 // Internal utility constructor to simplify return statements
73 SimdDInt32(int32x2_t simd) : simdInternal_(simd) {}
75 int32x2_t simdInternal_;
78 class SimdDBool
80 public:
81 SimdDBool() {}
83 SimdDBool(bool b) : simdInternal_(vdupq_n_u64(b ? 0xFFFFFFFFFFFFFFFF : 0)) {}
85 // Internal utility constructor to simplify return statements
86 SimdDBool(uint64x2_t simd) : simdInternal_(simd) {}
88 uint64x2_t simdInternal_;
91 class SimdDIBool
93 public:
94 SimdDIBool() {}
96 SimdDIBool(bool b) : simdInternal_(vdup_n_u32(b ? 0xFFFFFFFF : 0)) {}
98 // Internal utility constructor to simplify return statements
99 SimdDIBool(uint32x2_t simd) : simdInternal_(simd) {}
101 uint32x2_t simdInternal_;
104 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
106 assert(std::size_t(m) % 16 == 0);
107 return { vld1q_f64(m) };
110 static inline void gmx_simdcall store(double* m, SimdDouble a)
112 assert(std::size_t(m) % 16 == 0);
113 vst1q_f64(m, a.simdInternal_);
116 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
118 return { vld1q_f64(m) };
121 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
123 vst1q_f64(m, a.simdInternal_);
126 static inline SimdDouble gmx_simdcall setZeroD()
128 return { vdupq_n_f64(0.0) };
131 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
133 assert(std::size_t(m) % 8 == 0);
134 return { vld1_s32(m) };
137 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 a)
139 assert(std::size_t(m) % 8 == 0);
140 vst1_s32(m, a.simdInternal_);
143 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
145 return { vld1_s32(m) };
148 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
150 vst1_s32(m, a.simdInternal_);
153 static inline SimdDInt32 gmx_simdcall setZeroDI()
155 return { vdup_n_s32(0) };
158 template<int index>
159 gmx_simdcall static inline std::int32_t extract(SimdDInt32 a)
161 return vget_lane_s32(a.simdInternal_, index);
164 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
166 return { float64x2_t(vandq_s64(int64x2_t(a.simdInternal_), int64x2_t(b.simdInternal_))) };
169 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
171 return { float64x2_t(vbicq_s64(int64x2_t(b.simdInternal_), int64x2_t(a.simdInternal_))) };
174 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
176 return { float64x2_t(vorrq_s64(int64x2_t(a.simdInternal_), int64x2_t(b.simdInternal_))) };
179 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
181 return { float64x2_t(veorq_s64(int64x2_t(a.simdInternal_), int64x2_t(b.simdInternal_))) };
184 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
186 return { vaddq_f64(a.simdInternal_, b.simdInternal_) };
189 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
191 return { vsubq_f64(a.simdInternal_, b.simdInternal_) };
194 static inline SimdDouble gmx_simdcall operator-(SimdDouble x)
196 return { vnegq_f64(x.simdInternal_) };
199 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
201 return { vmulq_f64(a.simdInternal_, b.simdInternal_) };
204 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
206 return { vfmaq_f64(c.simdInternal_, b.simdInternal_, a.simdInternal_) };
209 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
211 return { vnegq_f64(vfmsq_f64(c.simdInternal_, b.simdInternal_, a.simdInternal_)) };
214 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
216 return { vfmsq_f64(c.simdInternal_, b.simdInternal_, a.simdInternal_) };
219 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
221 return { vnegq_f64(vfmaq_f64(c.simdInternal_, b.simdInternal_, a.simdInternal_)) };
224 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
226 return { vrsqrteq_f64(x.simdInternal_) };
229 static inline SimdDouble gmx_simdcall rsqrtIter(SimdDouble lu, SimdDouble x)
231 return { vmulq_f64(lu.simdInternal_,
232 vrsqrtsq_f64(vmulq_f64(lu.simdInternal_, lu.simdInternal_), x.simdInternal_)) };
235 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
237 return { vrecpeq_f64(x.simdInternal_) };
240 static inline SimdDouble gmx_simdcall rcpIter(SimdDouble lu, SimdDouble x)
242 return { vmulq_f64(lu.simdInternal_, vrecpsq_f64(lu.simdInternal_, x.simdInternal_)) };
245 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
247 float64x2_t addend = float64x2_t(vandq_u64(uint64x2_t(b.simdInternal_), m.simdInternal_));
249 return { vaddq_f64(a.simdInternal_, addend) };
252 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
254 float64x2_t prod = vmulq_f64(a.simdInternal_, b.simdInternal_);
255 return { float64x2_t(vandq_u64(uint64x2_t(prod), m.simdInternal_)) };
258 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
260 float64x2_t prod = vfmaq_f64(c.simdInternal_, b.simdInternal_, a.simdInternal_);
262 return { float64x2_t(vandq_u64(uint64x2_t(prod), m.simdInternal_)) };
265 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
267 // The result will always be correct since we mask the result with m, but
268 // for debug builds we also want to make sure not to generate FP exceptions
269 #ifndef NDEBUG
270 x.simdInternal_ = vbslq_f64(m.simdInternal_, x.simdInternal_, vdupq_n_f64(1.0));
271 #endif
272 return { float64x2_t(vandq_u64(uint64x2_t(vrsqrteq_f64(x.simdInternal_)), m.simdInternal_)) };
275 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
277 // The result will always be correct since we mask the result with m, but
278 // for debug builds we also want to make sure not to generate FP exceptions
279 #ifndef NDEBUG
280 x.simdInternal_ = vbslq_f64(m.simdInternal_, x.simdInternal_, vdupq_n_f64(1.0));
281 #endif
282 return { float64x2_t(vandq_u64(uint64x2_t(vrecpeq_f64(x.simdInternal_)), m.simdInternal_)) };
285 static inline SimdDouble gmx_simdcall abs(SimdDouble x)
287 return { vabsq_f64(x.simdInternal_) };
290 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
292 return { vmaxq_f64(a.simdInternal_, b.simdInternal_) };
295 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
297 return { vminq_f64(a.simdInternal_, b.simdInternal_) };
300 static inline SimdDouble gmx_simdcall round(SimdDouble x)
302 return { vrndnq_f64(x.simdInternal_) };
305 static inline SimdDouble gmx_simdcall trunc(SimdDouble x)
307 return { vrndq_f64(x.simdInternal_) };
310 template<MathOptimization opt = MathOptimization::Safe>
311 static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent)
313 const float64x2_t exponentMask = float64x2_t(vdupq_n_s64(0x7FF0000000000000LL));
314 const float64x2_t mantissaMask = float64x2_t(vdupq_n_s64(0x800FFFFFFFFFFFFFLL));
316 const int64x2_t exponentBias = vdupq_n_s64(1022); // add 1 to make our definition identical to frexp()
317 const float64x2_t half = vdupq_n_f64(0.5);
318 int64x2_t iExponent;
320 iExponent = vandq_s64(int64x2_t(value.simdInternal_), int64x2_t(exponentMask));
321 iExponent = vsubq_s64(vshrq_n_s64(iExponent, 52), exponentBias);
322 exponent->simdInternal_ = vmovn_s64(iExponent);
324 return { float64x2_t(vorrq_s64(
325 vandq_s64(int64x2_t(value.simdInternal_), int64x2_t(mantissaMask)), int64x2_t(half))) };
328 template<MathOptimization opt = MathOptimization::Safe>
329 static inline SimdDouble ldexp(SimdDouble value, SimdDInt32 exponent)
331 const int32x2_t exponentBias = vdup_n_s32(1023);
332 int32x2_t iExponent = vadd_s32(exponent.simdInternal_, exponentBias);
333 int64x2_t iExponent64;
335 if (opt == MathOptimization::Safe)
337 // Make sure biased argument is not negative
338 iExponent = vmax_s32(iExponent, vdup_n_s32(0));
341 iExponent64 = vmovl_s32(iExponent);
342 iExponent64 = vshlq_n_s64(iExponent64, 52);
344 return { vmulq_f64(value.simdInternal_, float64x2_t(iExponent64)) };
347 static inline double gmx_simdcall reduce(SimdDouble a)
349 float64x2_t b = vpaddq_f64(a.simdInternal_, a.simdInternal_);
350 return vgetq_lane_f64(b, 0);
353 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
355 return { vceqq_f64(a.simdInternal_, b.simdInternal_) };
358 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
360 return { vreinterpretq_u64_u32(
361 vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a.simdInternal_, b.simdInternal_)))) };
364 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
366 return { vcltq_f64(a.simdInternal_, b.simdInternal_) };
369 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
371 return { vcleq_f64(a.simdInternal_, b.simdInternal_) };
374 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
376 return { vtstq_s64(int64x2_t(a.simdInternal_), int64x2_t(a.simdInternal_)) };
379 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
381 return { vandq_u64(a.simdInternal_, b.simdInternal_) };
384 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
386 return { vorrq_u64(a.simdInternal_, b.simdInternal_) };
389 static inline bool gmx_simdcall anyTrue(SimdDBool a)
391 return (vmaxvq_u32((uint32x4_t)(a.simdInternal_)) != 0);
394 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool m)
396 return { float64x2_t(vandq_u64(uint64x2_t(a.simdInternal_), m.simdInternal_)) };
399 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool m)
401 return { float64x2_t(vbicq_u64(uint64x2_t(a.simdInternal_), m.simdInternal_)) };
404 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
406 return { vbslq_f64(sel.simdInternal_, b.simdInternal_, a.simdInternal_) };
409 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
411 return { vand_s32(a.simdInternal_, b.simdInternal_) };
414 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
416 return { vbic_s32(b.simdInternal_, a.simdInternal_) };
419 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
421 return { vorr_s32(a.simdInternal_, b.simdInternal_) };
424 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
426 return { veor_s32(a.simdInternal_, b.simdInternal_) };
429 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
431 return { vadd_s32(a.simdInternal_, b.simdInternal_) };
434 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
436 return { vsub_s32(a.simdInternal_, b.simdInternal_) };
439 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
441 return { vmul_s32(a.simdInternal_, b.simdInternal_) };
444 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
446 return { vceq_s32(a.simdInternal_, b.simdInternal_) };
449 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
451 return { vtst_s32(a.simdInternal_, a.simdInternal_) };
454 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
456 return { vclt_s32(a.simdInternal_, b.simdInternal_) };
459 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
461 return { vand_u32(a.simdInternal_, b.simdInternal_) };
464 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
466 return { vorr_u32(a.simdInternal_, b.simdInternal_) };
469 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
471 return (vmaxv_u32(a.simdInternal_) != 0);
474 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool m)
476 return { vand_s32(a.simdInternal_, vreinterpret_s32_u32(m.simdInternal_)) };
479 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool m)
481 return { vbic_s32(a.simdInternal_, vreinterpret_s32_u32(m.simdInternal_)) };
484 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
486 return { vbsl_s32(sel.simdInternal_, b.simdInternal_, a.simdInternal_) };
489 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
491 return { vmovn_s64(vcvtnq_s64_f64(a.simdInternal_)) };
494 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
496 return { vmovn_s64(vcvtq_s64_f64(a.simdInternal_)) };
499 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
501 return { vcvtq_f64_s64(vmovl_s32(a.simdInternal_)) };
504 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
506 return { vqmovn_u64(a.simdInternal_) };
509 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
511 return { vorrq_u64(vmovl_u32(a.simdInternal_), vshlq_n_u64(vmovl_u32(a.simdInternal_), 32)) };
514 static inline void gmx_simdcall cvtF2DD(SimdFloat f, SimdDouble* d0, SimdDouble* d1)
516 d0->simdInternal_ = vcvt_f64_f32(vget_low_f32(f.simdInternal_));
517 d1->simdInternal_ = vcvt_high_f64_f32(f.simdInternal_);
520 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble d0, SimdDouble d1)
522 return { vcvt_high_f32_f64(vcvt_f32_f64(d0.simdInternal_), d1.simdInternal_) };
525 } // namespace gmx
527 #endif // GMX_SIMD_IMPL_ARM_NEON_ASIMD_SIMD_DOUBLE_H