2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
38 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
42 #include "gromacs/math/utilities.h"
43 #include "gromacs/utility/basedefinitions.h"
45 #include "impl_ibm_vsx_definitions.h"
55 // gcc-4.9 does not recognize that we use the parameter
56 SimdDouble(double gmx_unused d
) : simdInternal_(vec_splats(d
)) {}
58 // Internal utility constructor to simplify return statements
59 SimdDouble(__vector
double simd
) : simdInternal_(simd
) {}
61 __vector
double simdInternal_
;
69 // gcc-4.9 does not recognize that we use the parameter
70 SimdDInt32(std::int32_t gmx_unused i
) : simdInternal_(vec_splats(i
)) {}
72 // Internal utility constructor to simplify return statements
73 SimdDInt32(__vector
signed int simd
) : simdInternal_(simd
) {}
75 __vector
signed int simdInternal_
;
84 simdInternal_(reinterpret_cast<__vector vsxBool
long long>(vec_splats(b
? 0xFFFFFFFFFFFFFFFFULL
: 0)))
88 // Internal utility constructor to simplify return statements
89 SimdDBool(__vector vsxBool
long long simd
) : simdInternal_(simd
) {}
91 __vector vsxBool
long long simdInternal_
;
100 simdInternal_(reinterpret_cast<__vector vsxBool
int>(vec_splats(b
? 0xFFFFFFFF : 0)))
104 // Internal utility constructor to simplify return statements
105 SimdDIBool(__vector vsxBool
int simd
) : simdInternal_(simd
) {}
107 __vector vsxBool
int simdInternal_
;
110 // Note that the interfaces we use here have been a mess in xlc;
111 // currently version 13.1.5 is required.
113 static inline SimdDouble gmx_simdcall
simdLoad(const double* m
, SimdDoubleTag
= {})
117 #if defined(__ibmxl__)
121 *reinterpret_cast<const __vector
double*>(m
)
129 static inline void gmx_simdcall
store(double* m
, SimdDouble a
)
131 #if defined(__ibmxl__)
132 vec_st(a
.simdInternal_
, 0, m
);
135 *reinterpret_cast<__vector
double*>(m
) = a
.simdInternal_
;
137 vec_vsx_st(a
.simdInternal_
, 0, m
);
142 static inline SimdDouble gmx_simdcall
simdLoadU(const double* m
, SimdDoubleTag
= {})
146 #if defined(__ibmxl__)
150 *reinterpret_cast<const __vector
double*>(m
)
158 static inline void gmx_simdcall
storeU(double* m
, SimdDouble a
)
160 #if defined(__ibmxl__)
161 vec_xst(a
.simdInternal_
, 0, m
);
164 *reinterpret_cast<__vector
double*>(m
) = a
.simdInternal_
;
166 vec_vsx_st(a
.simdInternal_
, 0, m
);
171 static inline SimdDouble gmx_simdcall
setZeroD()
173 return { vec_splats(0.0) };
176 static inline SimdDInt32 gmx_simdcall
simdLoad(const std::int32_t* m
, SimdDInt32Tag
)
178 __vector
signed int t0
, t1
;
179 const __vector
unsigned char perm
= { 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19 };
180 t0
= vec_splats(m
[0]);
181 t1
= vec_splats(m
[1]);
182 return { vec_perm(t0
, t1
, perm
) };
185 // gcc-4.9 does not understand that arguments to vec_extract() are used
186 static inline void gmx_simdcall
store(std::int32_t* m
, SimdDInt32 gmx_unused x
)
188 m
[0] = vec_extract(x
.simdInternal_
, 0);
189 m
[1] = vec_extract(x
.simdInternal_
, 2);
192 static inline SimdDInt32 gmx_simdcall
simdLoadU(const std::int32_t* m
, SimdDInt32Tag
)
194 return simdLoad(m
, SimdDInt32Tag());
197 static inline void gmx_simdcall
storeU(std::int32_t* m
, SimdDInt32 a
)
202 static inline SimdDInt32 gmx_simdcall
setZeroDI()
204 return { vec_splats(static_cast<int>(0)) };
207 // gcc-4.9 does not detect that vec_extract() uses its argument
209 static inline std::int32_t gmx_simdcall
extract(SimdDInt32 gmx_unused a
)
211 return vec_extract(a
.simdInternal_
, 2 * index
);
214 static inline SimdDouble gmx_simdcall
operator&(SimdDouble a
, SimdDouble b
)
216 return { vec_and(a
.simdInternal_
, b
.simdInternal_
) };
219 static inline SimdDouble gmx_simdcall
andNot(SimdDouble a
, SimdDouble b
)
221 return { vec_andc(b
.simdInternal_
, a
.simdInternal_
) };
224 static inline SimdDouble gmx_simdcall
operator|(SimdDouble a
, SimdDouble b
)
226 return { vec_or(a
.simdInternal_
, b
.simdInternal_
) };
229 static inline SimdDouble gmx_simdcall
operator^(SimdDouble a
, SimdDouble b
)
231 return { vec_xor(a
.simdInternal_
, b
.simdInternal_
) };
234 static inline SimdDouble gmx_simdcall
operator+(SimdDouble a
, SimdDouble b
)
236 return { vec_add(a
.simdInternal_
, b
.simdInternal_
) };
239 static inline SimdDouble gmx_simdcall
operator-(SimdDouble a
, SimdDouble b
)
241 return { vec_sub(a
.simdInternal_
, b
.simdInternal_
) };
244 static inline SimdDouble gmx_simdcall
operator-(SimdDouble x
)
246 return { -x
.simdInternal_
};
249 static inline SimdDouble gmx_simdcall
operator*(SimdDouble a
, SimdDouble b
)
251 return { vec_mul(a
.simdInternal_
, b
.simdInternal_
) };
254 static inline SimdDouble gmx_simdcall
fma(SimdDouble a
, SimdDouble b
, SimdDouble c
)
256 return { vec_madd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
259 static inline SimdDouble gmx_simdcall
fms(SimdDouble a
, SimdDouble b
, SimdDouble c
)
261 return { vec_msub(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
264 static inline SimdDouble gmx_simdcall
fnma(SimdDouble a
, SimdDouble b
, SimdDouble c
)
266 return { vec_nmsub(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
269 static inline SimdDouble gmx_simdcall
fnms(SimdDouble a
, SimdDouble b
, SimdDouble c
)
271 return { vec_nmadd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
274 static inline SimdDouble gmx_simdcall
rsqrt(SimdDouble x
)
276 return { vec_rsqrte(x
.simdInternal_
) };
279 static inline SimdDouble gmx_simdcall
rcp(SimdDouble x
)
281 return { vec_re(x
.simdInternal_
) };
284 static inline SimdDouble gmx_simdcall
maskAdd(SimdDouble a
, SimdDouble b
, SimdDBool m
)
286 return { vec_add(a
.simdInternal_
,
287 vec_and(b
.simdInternal_
, reinterpret_cast<__vector
double>(m
.simdInternal_
))) };
290 static inline SimdDouble gmx_simdcall
maskzMul(SimdDouble a
, SimdDouble b
, SimdDBool m
)
292 SimdDouble prod
= a
* b
;
294 return { vec_and(prod
.simdInternal_
, reinterpret_cast<__vector
double>(m
.simdInternal_
)) };
297 static inline SimdDouble gmx_simdcall
maskzFma(SimdDouble a
, SimdDouble b
, SimdDouble c
, SimdDBool m
)
299 SimdDouble prod
= fma(a
, b
, c
);
301 return { vec_and(prod
.simdInternal_
, reinterpret_cast<__vector
double>(m
.simdInternal_
)) };
304 static inline SimdDouble gmx_simdcall
maskzRsqrt(SimdDouble x
, SimdDBool m
)
307 x
.simdInternal_
= vec_sel(vec_splats(1.0), x
.simdInternal_
, m
.simdInternal_
);
309 return { vec_and(vec_rsqrte(x
.simdInternal_
), reinterpret_cast<__vector
double>(m
.simdInternal_
)) };
312 static inline SimdDouble gmx_simdcall
maskzRcp(SimdDouble x
, SimdDBool m
)
315 x
.simdInternal_
= vec_sel(vec_splats(1.0), x
.simdInternal_
, m
.simdInternal_
);
317 return { vec_and(vec_re(x
.simdInternal_
), reinterpret_cast<__vector
double>(m
.simdInternal_
)) };
320 static inline SimdDouble gmx_simdcall
abs(SimdDouble x
)
322 return { vec_abs(x
.simdInternal_
) };
325 static inline SimdDouble gmx_simdcall
max(SimdDouble a
, SimdDouble b
)
327 return { vec_max(a
.simdInternal_
, b
.simdInternal_
) };
330 static inline SimdDouble gmx_simdcall
min(SimdDouble a
, SimdDouble b
)
332 return { vec_min(a
.simdInternal_
, b
.simdInternal_
) };
335 static inline SimdDouble gmx_simdcall
round(SimdDouble x
)
337 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
338 // gcc up to at least version 4.9 does not have vec_round() in double precision - use inline asm
340 __asm__("xvrdpi %x0,%x1" : "=wd"(res
) : "wd"(x
.simdInternal_
));
343 return { vec_round(x
.simdInternal_
) };
347 static inline SimdDouble gmx_simdcall
trunc(SimdDouble x
)
349 return { vec_trunc(x
.simdInternal_
) };
352 template<MathOptimization opt
= MathOptimization::Safe
>
353 static inline SimdDouble
frexp(SimdDouble value
, SimdDInt32
* exponent
)
355 const __vector
double exponentMask
=
356 reinterpret_cast<__vector
double>(vec_splats(0x7FF0000000000000ULL
));
357 const __vector
signed int exponentBias
= vec_splats(1022);
358 const __vector
double half
= vec_splats(0.5);
359 __vector
signed int iExponent
;
361 __vector vsxBool
long long valueIsZero
=
362 vec_cmpeq(value
.simdInternal_
, reinterpret_cast<__vector
double>(vec_splats(0.0)));
364 iExponent
= reinterpret_cast<__vector
signed int>(vec_and(value
.simdInternal_
, exponentMask
));
365 // The data is in the upper half of each double (corresponding to elements 1 and 3).
366 // First shift 52-32=20bits, and then permute to swap element 0 with 1 and element 2 with 3
367 // For big endian they are in opposite order, so then we simply skip the swap.
368 iExponent
= vec_sr(iExponent
, vec_splats(20U));
369 #ifndef __BIG_ENDIAN__
370 const __vector
unsigned char perm
= { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
371 iExponent
= vec_perm(iExponent
, iExponent
, perm
);
373 iExponent
= vec_sub(iExponent
, exponentBias
);
374 iExponent
= vec_andc(iExponent
, reinterpret_cast<__vector
int>(valueIsZero
));
376 __vector
double result
= vec_or(vec_andc(value
.simdInternal_
, exponentMask
), half
);
377 result
= vec_sel(result
, value
.simdInternal_
, valueIsZero
);
379 exponent
->simdInternal_
= iExponent
;
384 template<MathOptimization opt
= MathOptimization::Safe
>
385 static inline SimdDouble
ldexp(SimdDouble value
, SimdDInt32 exponent
)
387 const __vector
signed int exponentBias
= vec_splats(1023);
388 __vector
signed int iExponent
;
389 #ifdef __BIG_ENDIAN__
390 const __vector
unsigned char perm
= { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 16, 17, 18, 19 };
392 const __vector
unsigned char perm
= { 16, 17, 18, 19, 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11 };
395 iExponent
= vec_add(exponent
.simdInternal_
, exponentBias
);
397 if (opt
== MathOptimization::Safe
)
399 // Make sure biased argument is not negative
400 iExponent
= vec_max(iExponent
, vec_splat_s32(0));
403 // exponent is now present in pairs of integers; 0011.
404 // Elements 0/2 already correspond to the upper half of each double,
405 // so we only need to shift by another 52-32=20 bits.
406 // The remaining elements are set to zero.
407 iExponent
= vec_sl(iExponent
, vec_splats(20U));
408 iExponent
= vec_perm(iExponent
, vec_splats(0), perm
);
410 return { vec_mul(value
.simdInternal_
, reinterpret_cast<__vector
double>(iExponent
)) };
413 static inline double gmx_simdcall
reduce(SimdDouble x
)
415 const __vector
unsigned char perm
= { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
417 /* old xlc version 12 does not understand vec_perm() with double arguments */
418 x
.simdInternal_
= vec_add(
419 x
.simdInternal_
, reinterpret_cast<__vector
double>(vec_perm(
420 reinterpret_cast<__vector
signed int>(x
.simdInternal_
),
421 reinterpret_cast<__vector
signed int>(x
.simdInternal_
), perm
)));
423 x
.simdInternal_
= vec_add(x
.simdInternal_
, vec_perm(x
.simdInternal_
, x
.simdInternal_
, perm
));
425 return vec_extract(x
.simdInternal_
, 0);
428 static inline SimdDBool gmx_simdcall
operator==(SimdDouble a
, SimdDouble b
)
430 return { vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
) };
433 static inline SimdDBool gmx_simdcall
operator!=(SimdDouble a
, SimdDouble b
)
435 return { reinterpret_cast<__vector vsxBool
long long>(vec_or(
436 reinterpret_cast<__vector
signed int>(vec_cmpgt(a
.simdInternal_
, b
.simdInternal_
)),
437 reinterpret_cast<__vector
signed int>(vec_cmplt(a
.simdInternal_
, b
.simdInternal_
)))) };
440 static inline SimdDBool gmx_simdcall
operator<(SimdDouble a
, SimdDouble b
)
442 return { vec_cmplt(a
.simdInternal_
, b
.simdInternal_
) };
445 static inline SimdDBool gmx_simdcall
operator<=(SimdDouble a
, SimdDouble b
)
447 return { vec_cmple(a
.simdInternal_
, b
.simdInternal_
) };
450 static inline SimdDBool gmx_simdcall
testBits(SimdDouble a
)
452 #ifdef __POWER8_VECTOR__
453 // Power8 VSX has proper support for operations on long long integers
454 return { vec_cmpgt(reinterpret_cast<__vector
unsigned long long>(a
.simdInternal_
), vec_splats(0ULL)) };
456 // No support for long long operations.
457 // Start with comparing 32-bit subfields bitwise by casting to integers
458 __vector vsxBool
int tmp
=
459 vec_cmpgt(reinterpret_cast<__vector
unsigned int>(a
.simdInternal_
), vec_splats(0U));
461 // Shuffle low/high 32-bit fields of tmp into tmp2
462 const __vector
unsigned char perm
= { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
463 __vector vsxBool
int tmp2
= vec_perm(tmp
, tmp
, perm
);
465 // Return the or:d parts of tmp & tmp2
466 return { reinterpret_cast<__vector vsxBool
long long>(vec_or(tmp
, tmp2
)) };
470 static inline SimdDBool gmx_simdcall
operator&&(SimdDBool a
, SimdDBool b
)
472 return { reinterpret_cast<__vector vsxBool
long long>(
473 vec_and(reinterpret_cast<__vector
signed int>(a
.simdInternal_
),
474 reinterpret_cast<__vector
signed int>(b
.simdInternal_
))) };
477 static inline SimdDBool gmx_simdcall
operator||(SimdDBool a
, SimdDBool b
)
479 return { reinterpret_cast<__vector vsxBool
long long>(
480 vec_or(reinterpret_cast<__vector
signed int>(a
.simdInternal_
),
481 reinterpret_cast<__vector
signed int>(b
.simdInternal_
))) };
484 static inline bool gmx_simdcall
anyTrue(SimdDBool a
)
486 return vec_any_ne(reinterpret_cast<__vector vsxBool
int>(a
.simdInternal_
),
487 reinterpret_cast<__vector vsxBool
int>(vec_splats(0)));
490 static inline SimdDouble gmx_simdcall
selectByMask(SimdDouble a
, SimdDBool m
)
492 return { vec_and(a
.simdInternal_
, reinterpret_cast<__vector
double>(m
.simdInternal_
)) };
495 static inline SimdDouble gmx_simdcall
selectByNotMask(SimdDouble a
, SimdDBool m
)
497 return { vec_andc(a
.simdInternal_
, reinterpret_cast<__vector
double>(m
.simdInternal_
)) };
500 static inline SimdDouble gmx_simdcall
blend(SimdDouble a
, SimdDouble b
, SimdDBool sel
)
502 return { vec_sel(a
.simdInternal_
, b
.simdInternal_
, sel
.simdInternal_
) };
505 static inline SimdDInt32 gmx_simdcall
operator&(SimdDInt32 a
, SimdDInt32 b
)
507 return { vec_and(a
.simdInternal_
, b
.simdInternal_
) };
510 static inline SimdDInt32 gmx_simdcall
andNot(SimdDInt32 a
, SimdDInt32 b
)
512 return { vec_andc(b
.simdInternal_
, a
.simdInternal_
) };
515 static inline SimdDInt32 gmx_simdcall
operator|(SimdDInt32 a
, SimdDInt32 b
)
517 return { vec_or(a
.simdInternal_
, b
.simdInternal_
) };
520 static inline SimdDInt32 gmx_simdcall
operator^(SimdDInt32 a
, SimdDInt32 b
)
522 return { vec_xor(a
.simdInternal_
, b
.simdInternal_
) };
525 static inline SimdDInt32 gmx_simdcall
operator+(SimdDInt32 a
, SimdDInt32 b
)
527 return { vec_add(a
.simdInternal_
, b
.simdInternal_
) };
530 static inline SimdDInt32 gmx_simdcall
operator-(SimdDInt32 a
, SimdDInt32 b
)
532 return { vec_sub(a
.simdInternal_
, b
.simdInternal_
) };
535 static inline SimdDInt32 gmx_simdcall
operator*(SimdDInt32 a
, SimdDInt32 b
)
537 return { a
.simdInternal_
* b
.simdInternal_
};
540 static inline SimdDIBool gmx_simdcall
operator==(SimdDInt32 a
, SimdDInt32 b
)
542 return { vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
) };
545 static inline SimdDIBool gmx_simdcall
testBits(SimdDInt32 a
)
547 return { vec_cmpgt(reinterpret_cast<__vector
unsigned int>(a
.simdInternal_
), vec_splats(0U)) };
550 static inline SimdDIBool gmx_simdcall
operator<(SimdDInt32 a
, SimdDInt32 b
)
552 return { vec_cmplt(a
.simdInternal_
, b
.simdInternal_
) };
555 static inline SimdDIBool gmx_simdcall
operator&&(SimdDIBool a
, SimdDIBool b
)
557 return { vec_and(a
.simdInternal_
, b
.simdInternal_
) };
560 static inline SimdDIBool gmx_simdcall
operator||(SimdDIBool a
, SimdDIBool b
)
562 return { vec_or(a
.simdInternal_
, b
.simdInternal_
) };
565 static inline bool gmx_simdcall
anyTrue(SimdDIBool a
)
567 return vec_any_ne(a
.simdInternal_
, reinterpret_cast<__vector vsxBool
int>(vec_splats(0)));
570 static inline SimdDInt32 gmx_simdcall
selectByMask(SimdDInt32 a
, SimdDIBool m
)
572 return { vec_and(a
.simdInternal_
, reinterpret_cast<__vector
signed int>(m
.simdInternal_
)) };
575 static inline SimdDInt32 gmx_simdcall
selectByNotMask(SimdDInt32 a
, SimdDIBool m
)
577 return { vec_andc(a
.simdInternal_
, reinterpret_cast<__vector
signed int>(m
.simdInternal_
)) };
580 static inline SimdDInt32 gmx_simdcall
blend(SimdDInt32 a
, SimdDInt32 b
, SimdDIBool sel
)
582 return { vec_sel(a
.simdInternal_
, b
.simdInternal_
, sel
.simdInternal_
) };
585 static inline SimdDInt32 gmx_simdcall
cvttR2I(SimdDouble a
)
587 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
588 // gcc up to at least version 6.1 is missing intrinsics for converting double to/from int - use inline asm
589 const __vector
unsigned char perm
= { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
592 __asm__("xvcvdpsxws %x0,%x1" : "=wa"(ix
) : "wd"(a
.simdInternal_
));
594 return { reinterpret_cast<__vector
signed int>(vec_perm(ix
, ix
, perm
)) };
596 return { vec_cts(a
.simdInternal_
, 0) };
600 static inline SimdDInt32 gmx_simdcall
cvtR2I(SimdDouble a
)
602 return cvttR2I(round(a
));
605 static inline SimdDouble gmx_simdcall
cvtI2R(SimdDInt32 a
)
607 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
608 // gcc up to at least version 4.9 is missing intrinsics for converting double to/from int - use inline asm
610 # ifndef __BIG_ENDIAN__
611 const __vector
unsigned char perm
= { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
612 a
.simdInternal_
= vec_perm(a
.simdInternal_
, a
.simdInternal_
, perm
);
615 __asm__("xvcvsxwdp %x0,%x1" : "=wd"(x
) : "wa"(a
.simdInternal_
));
619 return { vec_ctd(a
.simdInternal_
, 0) };
623 static inline SimdDIBool gmx_simdcall
cvtB2IB(SimdDBool a
)
625 return { reinterpret_cast<__vector vsxBool
int>(a
.simdInternal_
) };
628 static inline SimdDBool gmx_simdcall
cvtIB2B(SimdDIBool a
)
630 return { reinterpret_cast<__vector vsxBool
long long>(a
.simdInternal_
) };
633 static inline void gmx_simdcall
cvtF2DD(SimdFloat f
, SimdDouble
* d0
, SimdDouble
* d1
)
635 __vector
float fA
, fB
;
636 fA
= vec_mergeh(f
.simdInternal_
, f
.simdInternal_
); /* 0011 */
637 fB
= vec_mergel(f
.simdInternal_
, f
.simdInternal_
); /* 2233 */
638 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
639 // gcc-4.9 is missing double-to-float/float-to-double conversions.
640 __asm__("xvcvspdp %x0,%x1" : "=wd"(d0
->simdInternal_
) : "wf"(fA
));
641 __asm__("xvcvspdp %x0,%x1" : "=wd"(d1
->simdInternal_
) : "wf"(fB
));
643 d0
->simdInternal_
= vec_cvf(fA
); /* 01 */
644 d1
->simdInternal_
= vec_cvf(fB
); /* 23 */
648 static inline SimdFloat gmx_simdcall
cvtDD2F(SimdDouble d0
, SimdDouble d1
)
650 __vector
float fA
, fB
, fC
, fD
, fE
;
651 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
652 // gcc-4.9 is missing double-to-float/float-to-double conversions.
653 __asm__("xvcvdpsp %x0,%x1" : "=wf"(fA
) : "wd"(d0
.simdInternal_
));
654 __asm__("xvcvdpsp %x0,%x1" : "=wf"(fB
) : "wd"(d1
.simdInternal_
));
656 fA
= vec_cvf(d0
.simdInternal_
); /* 0x1x */
657 fB
= vec_cvf(d1
.simdInternal_
); /* 2x3x */
659 fC
= vec_mergeh(fA
, fB
); /* 02xx */
660 fD
= vec_mergel(fA
, fB
); /* 13xx */
661 fE
= vec_mergeh(fC
, fD
); /* 0123 */
665 static inline SimdDouble gmx_simdcall
copysign(SimdDouble x
, SimdDouble y
)
667 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
669 __asm__("xvcpsgndp %x0,%x1,%x2" : "=wd"(res
) : "wd"(y
.simdInternal_
), "wd"(x
.simdInternal_
));
672 return { vec_cpsgn(y
.simdInternal_
, x
.simdInternal_
) };
678 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H