2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
38 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
42 #include "gromacs/math/utilities.h"
43 #include "gromacs/utility/basedefinitions.h"
45 #include "impl_ibm_vsx_definitions.h"
55 // gcc-4.9 does not recognize that we use the parameter
56 SimdFloat(float gmx_unused f
) : simdInternal_(vec_splats(f
)) {}
58 // Internal utility constructor to simplify return statements
59 SimdFloat(__vector
float simd
) : simdInternal_(simd
) {}
61 __vector
float simdInternal_
;
69 // gcc-4.9 does not recognize that we use the parameter
70 SimdFInt32(std::int32_t gmx_unused i
) : simdInternal_(vec_splats(i
)) {}
72 // Internal utility constructor to simplify return statements
73 SimdFInt32(__vector
signed int simd
) : simdInternal_(simd
) {}
75 __vector
signed int simdInternal_
;
84 simdInternal_(reinterpret_cast<__vector vsxBool
int>(vec_splats(b
? 0xFFFFFFFF : 0)))
88 // Internal utility constructor to simplify return statements
89 SimdFBool(__vector vsxBool
int simd
) : simdInternal_(simd
) {}
91 __vector vsxBool
int simdInternal_
;
100 simdInternal_(reinterpret_cast<__vector vsxBool
int>(vec_splats(b
? 0xFFFFFFFF : 0)))
104 // Internal utility constructor to simplify return statements
105 SimdFIBool(__vector vsxBool
int simd
) : simdInternal_(simd
) {}
107 __vector vsxBool
int simdInternal_
;
110 // Note that the interfaces we use here have been a mess in xlc;
111 // currently version 13.1.5 is required.
113 static inline SimdFloat gmx_simdcall
simdLoad(const float* m
, SimdFloatTag
= {})
115 return { *reinterpret_cast<const __vector
float*>(m
) };
118 static inline void gmx_simdcall
store(float* m
, SimdFloat a
)
120 *reinterpret_cast<__vector
float*>(m
) = a
.simdInternal_
;
123 static inline SimdFloat gmx_simdcall
simdLoadU(const float* m
, SimdFloatTag
= {})
128 *reinterpret_cast<const __vector
float*>(m
)
135 static inline void gmx_simdcall
storeU(float* m
, SimdFloat a
)
138 *reinterpret_cast<__vector
float*>(m
) = a
.simdInternal_
;
140 vec_xst(a
.simdInternal_
, 0, m
);
144 static inline SimdFloat gmx_simdcall
setZeroF()
146 return { vec_splats(0.0F
) };
149 static inline SimdFInt32 gmx_simdcall
simdLoad(const std::int32_t* m
, SimdFInt32Tag
)
151 return { *reinterpret_cast<const __vector
int*>(m
) };
154 static inline void gmx_simdcall
store(std::int32_t* m
, SimdFInt32 a
)
156 *reinterpret_cast<__vector
int*>(m
) = a
.simdInternal_
;
159 static inline SimdFInt32 gmx_simdcall
simdLoadU(const std::int32_t* m
, SimdFInt32Tag
)
164 *reinterpret_cast<const __vector
int*>(m
)
171 static inline void gmx_simdcall
storeU(std::int32_t* m
, SimdFInt32 a
)
174 *reinterpret_cast<__vector
int*>(m
) = a
.simdInternal_
;
176 vec_xst(a
.simdInternal_
, 0, m
);
180 static inline SimdFInt32 gmx_simdcall
setZeroFI()
182 return { vec_splats(static_cast<int>(0)) };
185 // gcc-4.9 does not detect that vec_extract() uses its argument
187 static inline std::int32_t gmx_simdcall
extract(SimdFInt32 gmx_unused a
)
189 return vec_extract(a
.simdInternal_
, index
);
192 static inline SimdFloat gmx_simdcall
operator&(SimdFloat a
, SimdFloat b
)
194 return { vec_and(a
.simdInternal_
, b
.simdInternal_
) };
197 static inline SimdFloat gmx_simdcall
andNot(SimdFloat a
, SimdFloat b
)
199 return { vec_andc(b
.simdInternal_
, a
.simdInternal_
) };
202 static inline SimdFloat gmx_simdcall
operator|(SimdFloat a
, SimdFloat b
)
204 return { vec_or(a
.simdInternal_
, b
.simdInternal_
) };
207 static inline SimdFloat gmx_simdcall
operator^(SimdFloat a
, SimdFloat b
)
209 return { vec_xor(a
.simdInternal_
, b
.simdInternal_
) };
212 static inline SimdFloat gmx_simdcall
operator+(SimdFloat a
, SimdFloat b
)
214 return { vec_add(a
.simdInternal_
, b
.simdInternal_
) };
217 static inline SimdFloat gmx_simdcall
operator-(SimdFloat a
, SimdFloat b
)
219 return { vec_sub(a
.simdInternal_
, b
.simdInternal_
) };
222 static inline SimdFloat gmx_simdcall
operator-(SimdFloat x
)
224 return { -x
.simdInternal_
};
227 static inline SimdFloat gmx_simdcall
operator*(SimdFloat a
, SimdFloat b
)
229 return { vec_mul(a
.simdInternal_
, b
.simdInternal_
) };
232 static inline SimdFloat gmx_simdcall
fma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
234 return { vec_madd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
237 static inline SimdFloat gmx_simdcall
fms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
239 return { vec_msub(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
242 static inline SimdFloat gmx_simdcall
fnma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
244 return { vec_nmsub(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
247 static inline SimdFloat gmx_simdcall
fnms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
249 return { vec_nmadd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
) };
252 static inline SimdFloat gmx_simdcall
rsqrt(SimdFloat x
)
254 return { vec_rsqrte(x
.simdInternal_
) };
257 static inline SimdFloat gmx_simdcall
rcp(SimdFloat x
)
259 return { vec_re(x
.simdInternal_
) };
262 static inline SimdFloat gmx_simdcall
maskAdd(SimdFloat a
, SimdFloat b
, SimdFBool m
)
264 return { vec_add(a
.simdInternal_
,
265 vec_and(b
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
))) };
268 static inline SimdFloat gmx_simdcall
maskzMul(SimdFloat a
, SimdFloat b
, SimdFBool m
)
270 SimdFloat prod
= a
* b
;
272 return { vec_and(prod
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
)) };
275 static inline SimdFloat gmx_simdcall
maskzFma(SimdFloat a
, SimdFloat b
, SimdFloat c
, SimdFBool m
)
277 SimdFloat prod
= fma(a
, b
, c
);
279 return { vec_and(prod
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
)) };
282 static inline SimdFloat gmx_simdcall
maskzRsqrt(SimdFloat x
, SimdFBool m
)
285 x
.simdInternal_
= vec_sel(vec_splats(1.0F
), x
.simdInternal_
, m
.simdInternal_
);
287 return { vec_and(vec_rsqrte(x
.simdInternal_
), reinterpret_cast<__vector
float>(m
.simdInternal_
)) };
290 static inline SimdFloat gmx_simdcall
maskzRcp(SimdFloat x
, SimdFBool m
)
293 x
.simdInternal_
= vec_sel(vec_splats(1.0F
), x
.simdInternal_
, m
.simdInternal_
);
295 return { vec_and(vec_re(x
.simdInternal_
), reinterpret_cast<__vector
float>(m
.simdInternal_
)) };
298 static inline SimdFloat gmx_simdcall
abs(SimdFloat x
)
300 return { vec_abs(x
.simdInternal_
) };
303 static inline SimdFloat gmx_simdcall
max(SimdFloat a
, SimdFloat b
)
305 return { vec_max(a
.simdInternal_
, b
.simdInternal_
) };
308 static inline SimdFloat gmx_simdcall
min(SimdFloat a
, SimdFloat b
)
310 return { vec_min(a
.simdInternal_
, b
.simdInternal_
) };
313 static inline SimdFloat gmx_simdcall
round(SimdFloat x
)
315 return { vec_round(x
.simdInternal_
) };
318 static inline SimdFloat gmx_simdcall
trunc(SimdFloat x
)
320 return { vec_trunc(x
.simdInternal_
) };
323 template<MathOptimization opt
= MathOptimization::Safe
>
324 static inline SimdFloat gmx_simdcall
frexp(SimdFloat value
, SimdFInt32
* exponent
)
326 const __vector
float exponentMask
= reinterpret_cast<__vector
float>(vec_splats(0x7F800000U
));
327 const __vector
signed int exponentBias
= vec_splats(126);
328 const __vector
float half
= vec_splats(0.5F
);
329 __vector
signed int iExponent
;
331 __vector vsxBool
int valueIsZero
=
332 vec_cmpeq(value
.simdInternal_
, reinterpret_cast<__vector
float>(vec_splats(0.0)));
334 iExponent
= reinterpret_cast<__vector
signed int>(vec_and(value
.simdInternal_
, exponentMask
));
335 iExponent
= vec_sub(vec_sr(iExponent
, vec_splats(23U)), exponentBias
);
336 iExponent
= vec_andc(iExponent
, reinterpret_cast<__vector
int>(valueIsZero
));
338 __vector
float result
= vec_or(vec_andc(value
.simdInternal_
, exponentMask
), half
);
339 result
= vec_sel(result
, value
.simdInternal_
, valueIsZero
);
341 exponent
->simdInternal_
= iExponent
;
346 template<MathOptimization opt
= MathOptimization::Safe
>
347 static inline SimdFloat gmx_simdcall
ldexp(SimdFloat value
, SimdFInt32 exponent
)
349 const __vector
signed int exponentBias
= vec_splats(127);
350 __vector
signed int iExponent
;
352 iExponent
= vec_add(exponent
.simdInternal_
, exponentBias
);
354 if (opt
== MathOptimization::Safe
)
356 // Make sure biased argument is not negative
357 iExponent
= vec_max(iExponent
, vec_splat_s32(0));
360 iExponent
= vec_sl(iExponent
, vec_splats(23U));
362 return { vec_mul(value
.simdInternal_
, reinterpret_cast<__vector
float>(iExponent
)) };
365 static inline float gmx_simdcall
reduce(SimdFloat x
)
367 const __vector
unsigned char perm1
= { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
368 const __vector
unsigned char perm2
= { 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3 };
370 x
.simdInternal_
= vec_add(x
.simdInternal_
, vec_perm(x
.simdInternal_
, x
.simdInternal_
, perm1
));
371 x
.simdInternal_
= vec_add(x
.simdInternal_
, vec_perm(x
.simdInternal_
, x
.simdInternal_
, perm2
));
372 return vec_extract(x
.simdInternal_
, 0);
375 static inline SimdFBool gmx_simdcall
operator==(SimdFloat a
, SimdFloat b
)
377 return { vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
) };
380 static inline SimdFBool gmx_simdcall
operator!=(SimdFloat a
, SimdFloat b
)
382 return { vec_or(vec_cmpgt(a
.simdInternal_
, b
.simdInternal_
),
383 vec_cmplt(a
.simdInternal_
, b
.simdInternal_
)) };
386 static inline SimdFBool gmx_simdcall
operator<(SimdFloat a
, SimdFloat b
)
388 return { vec_cmplt(a
.simdInternal_
, b
.simdInternal_
) };
391 static inline SimdFBool gmx_simdcall
operator<=(SimdFloat a
, SimdFloat b
)
393 return { vec_cmple(a
.simdInternal_
, b
.simdInternal_
) };
396 static inline SimdFBool gmx_simdcall
testBits(SimdFloat a
)
398 return { vec_cmpgt(reinterpret_cast<__vector
unsigned int>(a
.simdInternal_
), vec_splats(0U)) };
401 static inline SimdFBool gmx_simdcall
operator&&(SimdFBool a
, SimdFBool b
)
403 return { vec_and(a
.simdInternal_
, b
.simdInternal_
) };
406 static inline SimdFBool gmx_simdcall
operator||(SimdFBool a
, SimdFBool b
)
408 return { vec_or(a
.simdInternal_
, b
.simdInternal_
) };
411 static inline bool gmx_simdcall
anyTrue(SimdFBool a
)
413 return vec_any_ne(a
.simdInternal_
, reinterpret_cast<__vector vsxBool
int>(vec_splats(0)));
416 static inline SimdFloat gmx_simdcall
selectByMask(SimdFloat a
, SimdFBool m
)
418 return { vec_and(a
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
)) };
421 static inline SimdFloat gmx_simdcall
selectByNotMask(SimdFloat a
, SimdFBool m
)
423 return { vec_andc(a
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
)) };
426 static inline SimdFloat gmx_simdcall
blend(SimdFloat a
, SimdFloat b
, SimdFBool sel
)
428 return { vec_sel(a
.simdInternal_
, b
.simdInternal_
, sel
.simdInternal_
) };
431 static inline SimdFInt32 gmx_simdcall
operator&(SimdFInt32 a
, SimdFInt32 b
)
433 return { vec_and(a
.simdInternal_
, b
.simdInternal_
) };
436 static inline SimdFInt32 gmx_simdcall
andNot(SimdFInt32 a
, SimdFInt32 b
)
438 return { vec_andc(b
.simdInternal_
, a
.simdInternal_
) };
441 static inline SimdFInt32 gmx_simdcall
operator|(SimdFInt32 a
, SimdFInt32 b
)
443 return { vec_or(a
.simdInternal_
, b
.simdInternal_
) };
446 static inline SimdFInt32 gmx_simdcall
operator^(SimdFInt32 a
, SimdFInt32 b
)
448 return { vec_xor(a
.simdInternal_
, b
.simdInternal_
) };
451 static inline SimdFInt32 gmx_simdcall
operator+(SimdFInt32 a
, SimdFInt32 b
)
453 return { vec_add(a
.simdInternal_
, b
.simdInternal_
) };
456 static inline SimdFInt32 gmx_simdcall
operator-(SimdFInt32 a
, SimdFInt32 b
)
458 return { vec_sub(a
.simdInternal_
, b
.simdInternal_
) };
461 static inline SimdFInt32 gmx_simdcall
operator*(SimdFInt32 a
, SimdFInt32 b
)
463 return { a
.simdInternal_
* b
.simdInternal_
};
466 static inline SimdFIBool gmx_simdcall
operator==(SimdFInt32 a
, SimdFInt32 b
)
468 return { vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
) };
471 static inline SimdFIBool gmx_simdcall
testBits(SimdFInt32 a
)
473 return { vec_cmpgt(reinterpret_cast<__vector
unsigned int>(a
.simdInternal_
), vec_splats(0U)) };
476 static inline SimdFIBool gmx_simdcall
operator<(SimdFInt32 a
, SimdFInt32 b
)
478 return { vec_cmplt(a
.simdInternal_
, b
.simdInternal_
) };
481 static inline SimdFIBool gmx_simdcall
operator&&(SimdFIBool a
, SimdFIBool b
)
483 return { vec_and(a
.simdInternal_
, b
.simdInternal_
) };
486 static inline SimdFIBool gmx_simdcall
operator||(SimdFIBool a
, SimdFIBool b
)
488 return { vec_or(a
.simdInternal_
, b
.simdInternal_
) };
491 static inline bool gmx_simdcall
anyTrue(SimdFIBool a
)
493 return vec_any_ne(a
.simdInternal_
, reinterpret_cast<__vector vsxBool
int>(vec_splats(0)));
496 static inline SimdFInt32 gmx_simdcall
selectByMask(SimdFInt32 a
, SimdFIBool m
)
498 return { vec_and(a
.simdInternal_
, reinterpret_cast<__vector
signed int>(m
.simdInternal_
)) };
501 static inline SimdFInt32 gmx_simdcall
selectByNotMask(SimdFInt32 a
, SimdFIBool m
)
503 return { vec_andc(a
.simdInternal_
, reinterpret_cast<__vector
signed int>(m
.simdInternal_
)) };
506 static inline SimdFInt32 gmx_simdcall
blend(SimdFInt32 a
, SimdFInt32 b
, SimdFIBool sel
)
508 return { vec_sel(a
.simdInternal_
, b
.simdInternal_
, sel
.simdInternal_
) };
511 static inline SimdFInt32 gmx_simdcall
cvtR2I(SimdFloat a
)
513 return { vec_cts(vec_round(a
.simdInternal_
), 0) };
516 static inline SimdFInt32 gmx_simdcall
cvttR2I(SimdFloat a
)
518 return { vec_cts(a
.simdInternal_
, 0) };
521 static inline SimdFloat gmx_simdcall
cvtI2R(SimdFInt32 a
)
523 return { vec_ctf(a
.simdInternal_
, 0) };
526 static inline SimdFIBool gmx_simdcall
cvtB2IB(SimdFBool a
)
528 return { a
.simdInternal_
};
531 static inline SimdFBool gmx_simdcall
cvtIB2B(SimdFIBool a
)
533 return { a
.simdInternal_
};
536 static inline SimdFloat gmx_simdcall
copysign(SimdFloat x
, SimdFloat y
)
538 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
540 __asm__("xvcpsgnsp %x0,%x1,%x2" : "=wf"(res
) : "wf"(y
.simdInternal_
), "wf"(x
.simdInternal_
));
543 return { vec_cpsgn(y
.simdInternal_
, x
.simdInternal_
) };
549 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H