Make sure frexp() returns correct for argument 0.0
[gromacs.git] / src / gromacs / simd / impl_ibm_vsx / impl_ibm_vsx_simd_double.h
blob3c6f3009b17249895b49c14fa763f15e814972c3
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
38 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
40 #include "config.h"
42 #include "gromacs/math/utilities.h"
43 #include "gromacs/utility/basedefinitions.h"
45 #include "impl_ibm_vsx_definitions.h"
47 namespace gmx
50 class SimdDouble
52 public:
53 SimdDouble() {}
55 // gcc-4.9 does not recognize that we use the parameter
56 SimdDouble(double gmx_unused d) : simdInternal_(vec_splats(d)) {}
58 // Internal utility constructor to simplify return statements
59 SimdDouble(__vector double simd) : simdInternal_(simd) {}
61 __vector double simdInternal_;
64 class SimdDInt32
66 public:
67 SimdDInt32() {}
69 // gcc-4.9 does not recognize that we use the parameter
70 SimdDInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
72 // Internal utility constructor to simplify return statements
73 SimdDInt32(__vector signed int simd) : simdInternal_(simd) {}
75 __vector signed int simdInternal_;
78 class SimdDBool
80 public:
81 SimdDBool() {}
83 SimdDBool(bool b) :
84 simdInternal_(reinterpret_cast<__vector vsxBool long long>(vec_splats(b ? 0xFFFFFFFFFFFFFFFFULL : 0)))
88 // Internal utility constructor to simplify return statements
89 SimdDBool(__vector vsxBool long long simd) : simdInternal_(simd) {}
91 __vector vsxBool long long simdInternal_;
94 class SimdDIBool
96 public:
97 SimdDIBool() {}
99 SimdDIBool(bool b) :
100 simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
104 // Internal utility constructor to simplify return statements
105 SimdDIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
107 __vector vsxBool int simdInternal_;
110 // Note that the interfaces we use here have been a mess in xlc;
111 // currently version 13.1.5 is required.
113 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
115 return
117 #if defined(__ibmxl__)
118 vec_ld(0, m)
119 #else
120 # if __GNUC__ < 7
121 *reinterpret_cast<const __vector double*>(m)
122 # else
123 vec_vsx_ld(0, m)
124 # endif
125 #endif
129 static inline void gmx_simdcall store(double* m, SimdDouble a)
131 #if defined(__ibmxl__)
132 vec_st(a.simdInternal_, 0, m);
133 #else
134 # if __GNUC__ < 7
135 *reinterpret_cast<__vector double*>(m) = a.simdInternal_;
136 # else
137 vec_vsx_st(a.simdInternal_, 0, m);
138 # endif
139 #endif
142 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
144 return
146 #if defined(__ibmxl__)
147 vec_xl(0, m)
148 #else
149 # if __GNUC__ < 7
150 *reinterpret_cast<const __vector double*>(m)
151 # else
152 vec_vsx_ld(0, m)
153 # endif
154 #endif
158 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
160 #if defined(__ibmxl__)
161 vec_xst(a.simdInternal_, 0, m);
162 #else
163 # if __GNUC__ < 7
164 *reinterpret_cast<__vector double*>(m) = a.simdInternal_;
165 # else
166 vec_vsx_st(a.simdInternal_, 0, m);
167 # endif
168 #endif
171 static inline SimdDouble gmx_simdcall setZeroD()
173 return { vec_splats(0.0) };
176 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
178 __vector signed int t0, t1;
179 const __vector unsigned char perm = { 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19 };
180 t0 = vec_splats(m[0]);
181 t1 = vec_splats(m[1]);
182 return { vec_perm(t0, t1, perm) };
185 // gcc-4.9 does not understand that arguments to vec_extract() are used
186 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 gmx_unused x)
188 m[0] = vec_extract(x.simdInternal_, 0);
189 m[1] = vec_extract(x.simdInternal_, 2);
192 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
194 return simdLoad(m, SimdDInt32Tag());
197 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
199 return store(m, a);
202 static inline SimdDInt32 gmx_simdcall setZeroDI()
204 return { vec_splats(static_cast<int>(0)) };
207 // gcc-4.9 does not detect that vec_extract() uses its argument
208 template<int index>
209 static inline std::int32_t gmx_simdcall extract(SimdDInt32 gmx_unused a)
211 return vec_extract(a.simdInternal_, 2 * index);
214 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
216 return { vec_and(a.simdInternal_, b.simdInternal_) };
219 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
221 return { vec_andc(b.simdInternal_, a.simdInternal_) };
224 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
226 return { vec_or(a.simdInternal_, b.simdInternal_) };
229 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
231 return { vec_xor(a.simdInternal_, b.simdInternal_) };
234 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
236 return { vec_add(a.simdInternal_, b.simdInternal_) };
239 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
241 return { vec_sub(a.simdInternal_, b.simdInternal_) };
244 static inline SimdDouble gmx_simdcall operator-(SimdDouble x)
246 return { -x.simdInternal_ };
249 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
251 return { vec_mul(a.simdInternal_, b.simdInternal_) };
254 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
256 return { vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
259 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
261 return { vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
264 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
266 return { vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
269 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
271 return { vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
274 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
276 return { vec_rsqrte(x.simdInternal_) };
279 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
281 return { vec_re(x.simdInternal_) };
284 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
286 return { vec_add(a.simdInternal_,
287 vec_and(b.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))) };
290 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
292 SimdDouble prod = a * b;
294 return { vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
297 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
299 SimdDouble prod = fma(a, b, c);
301 return { vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
304 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
306 #ifndef NDEBUG
307 x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
308 #endif
309 return { vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_)) };
312 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
314 #ifndef NDEBUG
315 x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
316 #endif
317 return { vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_)) };
320 static inline SimdDouble gmx_simdcall abs(SimdDouble x)
322 return { vec_abs(x.simdInternal_) };
325 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
327 return { vec_max(a.simdInternal_, b.simdInternal_) };
330 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
332 return { vec_min(a.simdInternal_, b.simdInternal_) };
335 static inline SimdDouble gmx_simdcall round(SimdDouble x)
337 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
338 // gcc up to at least version 4.9 does not have vec_round() in double precision - use inline asm
339 __vector double res;
340 __asm__("xvrdpi %x0,%x1" : "=wd"(res) : "wd"(x.simdInternal_));
341 return { res };
342 #else
343 return { vec_round(x.simdInternal_) };
344 #endif
347 static inline SimdDouble gmx_simdcall trunc(SimdDouble x)
349 return { vec_trunc(x.simdInternal_) };
352 template<MathOptimization opt = MathOptimization::Safe>
353 static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent)
355 const __vector double exponentMask =
356 reinterpret_cast<__vector double>(vec_splats(0x7FF0000000000000ULL));
357 const __vector signed int exponentBias = vec_splats(1022);
358 const __vector double half = vec_splats(0.5);
359 __vector signed int iExponent;
361 __vector vsxBool long long valueIsZero =
362 vec_cmpeq(value.simdInternal_, reinterpret_cast<__vector double>(vec_splats(0.0)));
364 iExponent = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
365 // The data is in the upper half of each double (corresponding to elements 1 and 3).
366 // First shift 52-32=20bits, and then permute to swap element 0 with 1 and element 2 with 3
367 // For big endian they are in opposite order, so then we simply skip the swap.
368 iExponent = vec_sr(iExponent, vec_splats(20U));
369 #ifndef __BIG_ENDIAN__
370 const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
371 iExponent = vec_perm(iExponent, iExponent, perm);
372 #endif
373 iExponent = vec_sub(iExponent, exponentBias);
374 iExponent = vec_andc(iExponent, reinterpret_cast<__vector int>(valueIsZero));
376 __vector double result = vec_or(vec_andc(value.simdInternal_, exponentMask), half);
377 result = vec_sel(result, value.simdInternal_, valueIsZero);
379 exponent->simdInternal_ = iExponent;
381 return { result };
384 template<MathOptimization opt = MathOptimization::Safe>
385 static inline SimdDouble ldexp(SimdDouble value, SimdDInt32 exponent)
387 const __vector signed int exponentBias = vec_splats(1023);
388 __vector signed int iExponent;
389 #ifdef __BIG_ENDIAN__
390 const __vector unsigned char perm = { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 16, 17, 18, 19 };
391 #else
392 const __vector unsigned char perm = { 16, 17, 18, 19, 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11 };
393 #endif
395 iExponent = vec_add(exponent.simdInternal_, exponentBias);
397 if (opt == MathOptimization::Safe)
399 // Make sure biased argument is not negative
400 iExponent = vec_max(iExponent, vec_splat_s32(0));
403 // exponent is now present in pairs of integers; 0011.
404 // Elements 0/2 already correspond to the upper half of each double,
405 // so we only need to shift by another 52-32=20 bits.
406 // The remaining elements are set to zero.
407 iExponent = vec_sl(iExponent, vec_splats(20U));
408 iExponent = vec_perm(iExponent, vec_splats(0), perm);
410 return { vec_mul(value.simdInternal_, reinterpret_cast<__vector double>(iExponent)) };
413 static inline double gmx_simdcall reduce(SimdDouble x)
415 const __vector unsigned char perm = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
416 #ifdef __xlC__
417 /* old xlc version 12 does not understand vec_perm() with double arguments */
418 x.simdInternal_ = vec_add(
419 x.simdInternal_, reinterpret_cast<__vector double>(vec_perm(
420 reinterpret_cast<__vector signed int>(x.simdInternal_),
421 reinterpret_cast<__vector signed int>(x.simdInternal_), perm)));
422 #else
423 x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm));
424 #endif
425 return vec_extract(x.simdInternal_, 0);
428 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
430 return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
433 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
435 return { reinterpret_cast<__vector vsxBool long long>(vec_or(
436 reinterpret_cast<__vector signed int>(vec_cmpgt(a.simdInternal_, b.simdInternal_)),
437 reinterpret_cast<__vector signed int>(vec_cmplt(a.simdInternal_, b.simdInternal_)))) };
440 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
442 return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
445 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
447 return { vec_cmple(a.simdInternal_, b.simdInternal_) };
450 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
452 #ifdef __POWER8_VECTOR__
453 // Power8 VSX has proper support for operations on long long integers
454 return { vec_cmpgt(reinterpret_cast<__vector unsigned long long>(a.simdInternal_), vec_splats(0ULL)) };
455 #else
456 // No support for long long operations.
457 // Start with comparing 32-bit subfields bitwise by casting to integers
458 __vector vsxBool int tmp =
459 vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U));
461 // Shuffle low/high 32-bit fields of tmp into tmp2
462 const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
463 __vector vsxBool int tmp2 = vec_perm(tmp, tmp, perm);
465 // Return the or:d parts of tmp & tmp2
466 return { reinterpret_cast<__vector vsxBool long long>(vec_or(tmp, tmp2)) };
467 #endif
470 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
472 return { reinterpret_cast<__vector vsxBool long long>(
473 vec_and(reinterpret_cast<__vector signed int>(a.simdInternal_),
474 reinterpret_cast<__vector signed int>(b.simdInternal_))) };
477 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
479 return { reinterpret_cast<__vector vsxBool long long>(
480 vec_or(reinterpret_cast<__vector signed int>(a.simdInternal_),
481 reinterpret_cast<__vector signed int>(b.simdInternal_))) };
484 static inline bool gmx_simdcall anyTrue(SimdDBool a)
486 return vec_any_ne(reinterpret_cast<__vector vsxBool int>(a.simdInternal_),
487 reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
490 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool m)
492 return { vec_and(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
495 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool m)
497 return { vec_andc(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
500 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
502 return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
505 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
507 return { vec_and(a.simdInternal_, b.simdInternal_) };
510 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
512 return { vec_andc(b.simdInternal_, a.simdInternal_) };
515 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
517 return { vec_or(a.simdInternal_, b.simdInternal_) };
520 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
522 return { vec_xor(a.simdInternal_, b.simdInternal_) };
525 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
527 return { vec_add(a.simdInternal_, b.simdInternal_) };
530 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
532 return { vec_sub(a.simdInternal_, b.simdInternal_) };
535 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
537 return { a.simdInternal_ * b.simdInternal_ };
540 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
542 return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
545 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
547 return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
550 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
552 return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
555 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
557 return { vec_and(a.simdInternal_, b.simdInternal_) };
560 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
562 return { vec_or(a.simdInternal_, b.simdInternal_) };
565 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
567 return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
570 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool m)
572 return { vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
575 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool m)
577 return { vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
580 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
582 return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
585 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
587 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
588 // gcc up to at least version 6.1 is missing intrinsics for converting double to/from int - use inline asm
589 const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
590 __vector double ix;
592 __asm__("xvcvdpsxws %x0,%x1" : "=wa"(ix) : "wd"(a.simdInternal_));
594 return { reinterpret_cast<__vector signed int>(vec_perm(ix, ix, perm)) };
595 #else
596 return { vec_cts(a.simdInternal_, 0) };
597 #endif
600 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
602 return cvttR2I(round(a));
605 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
607 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
608 // gcc up to at least version 4.9 is missing intrinsics for converting double to/from int - use inline asm
609 __vector double x;
610 # ifndef __BIG_ENDIAN__
611 const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
612 a.simdInternal_ = vec_perm(a.simdInternal_, a.simdInternal_, perm);
613 # endif
615 __asm__("xvcvsxwdp %x0,%x1" : "=wd"(x) : "wa"(a.simdInternal_));
617 return { x };
618 #else
619 return { vec_ctd(a.simdInternal_, 0) };
620 #endif
623 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
625 return { reinterpret_cast<__vector vsxBool int>(a.simdInternal_) };
628 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
630 return { reinterpret_cast<__vector vsxBool long long>(a.simdInternal_) };
633 static inline void gmx_simdcall cvtF2DD(SimdFloat f, SimdDouble* d0, SimdDouble* d1)
635 __vector float fA, fB;
636 fA = vec_mergeh(f.simdInternal_, f.simdInternal_); /* 0011 */
637 fB = vec_mergel(f.simdInternal_, f.simdInternal_); /* 2233 */
638 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
639 // gcc-4.9 is missing double-to-float/float-to-double conversions.
640 __asm__("xvcvspdp %x0,%x1" : "=wd"(d0->simdInternal_) : "wf"(fA));
641 __asm__("xvcvspdp %x0,%x1" : "=wd"(d1->simdInternal_) : "wf"(fB));
642 #else
643 d0->simdInternal_ = vec_cvf(fA); /* 01 */
644 d1->simdInternal_ = vec_cvf(fB); /* 23 */
645 #endif
648 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble d0, SimdDouble d1)
650 __vector float fA, fB, fC, fD, fE;
651 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
652 // gcc-4.9 is missing double-to-float/float-to-double conversions.
653 __asm__("xvcvdpsp %x0,%x1" : "=wf"(fA) : "wd"(d0.simdInternal_));
654 __asm__("xvcvdpsp %x0,%x1" : "=wf"(fB) : "wd"(d1.simdInternal_));
655 #else
656 fA = vec_cvf(d0.simdInternal_); /* 0x1x */
657 fB = vec_cvf(d1.simdInternal_); /* 2x3x */
658 #endif
659 fC = vec_mergeh(fA, fB); /* 02xx */
660 fD = vec_mergel(fA, fB); /* 13xx */
661 fE = vec_mergeh(fC, fD); /* 0123 */
662 return { fE };
665 static inline SimdDouble gmx_simdcall copysign(SimdDouble x, SimdDouble y)
667 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
668 __vector double res;
669 __asm__("xvcpsgndp %x0,%x1,%x2" : "=wd"(res) : "wd"(y.simdInternal_), "wd"(x.simdInternal_));
670 return { res };
671 #else
672 return { vec_cpsgn(y.simdInternal_, x.simdInternal_) };
673 #endif
676 } // namespace gmx
678 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H