src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H
  38
  39 #include <math.h>
  40 #ifdef __clang__
  41 #include <qpxmath.h>
  42 #endif
  43
  44 #include "impl_ibm_qpx_common.h"
  45
  46 /****************************************************
  47  *      SINGLE PRECISION SIMD IMPLEMENTATION        *
  48  ****************************************************/
  49 #define gmx_simd_float_t          vector4double
  50 #ifdef NDEBUG
  51 #    define gmx_simd_load_f(m)    vec_ld(0, (float *)(m))
  52 #    define gmx_simd_store_f(m, a) vec_st(a, 0, (float *)(m))
  53 #else
  54 #    define gmx_simd_load_f(m)    vec_lda(0, (float *)(m))
  55 #    define gmx_simd_store_f(m, a) vec_sta(a, 0, (float *)(m))
  56 #endif
  57 #    define gmx_simd_load1_f(m)   vec_lds(0, (float *)(m))
  58 #define gmx_simd_set1_f(x)        vec_splats(x)
  59 /* No support for unaligned load/store */
  60 #define gmx_simd_setzero_f        gmx_simd_setzero_ibm_qpx
  61 #define gmx_simd_add_f(a, b)       vec_add(a, b)
  62 #define gmx_simd_sub_f(a, b)       vec_sub(a, b)
  63 #define gmx_simd_mul_f(a, b)       vec_mul(a, b)
  64 #define gmx_simd_fmadd_f(a, b, c)   vec_madd(a, b, c)
  65 #define gmx_simd_fmsub_f(a, b, c)   vec_msub(a, b, c)
  66 /* IBM uses an alternative FMA definition, so -a*b+c=-(a*b-c) is "nmsub" */
  67 #define gmx_simd_fnmadd_f(a, b, c)  vec_nmsub(a, b, c)
  68 /* IBM uses an alternative FMA definition, so -a*b-c=-(a*b+c) is "nmadd" */
  69 #define gmx_simd_fnmsub_f(a, b, c)  vec_nmadd(a, b, c)
  70 /* gmx_simd_and_f not supported - no bitwise logical ops */
  71 /* gmx_simd_andnot_f not supported - no bitwise logical ops */
  72 /* gmx_simd_or_f not supported - no bitwise logical ops */
  73 /* gmx_simd_xor_f not supported - no bitwise logical ops */
  74 #define gmx_simd_rsqrt_f(a)       vec_rsqrte(a)
  75 #define gmx_simd_rcp_f(a)         vec_re(a)
  76 #define gmx_simd_fabs_f(a)        vec_abs(a)
  77 #define gmx_simd_fneg_f           gmx_simd_fneg_ibm_qpx
  78 #define gmx_simd_max_f(a, b)       vec_sel(b, a, vec_sub(a, b))
  79 #define gmx_simd_min_f(a, b)       vec_sel(b, a, vec_sub(b, a))
  80 /* Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
  81  * of gmx_simd_round_f(), since vec_round() does not adhere to the FP control
  82  * word rounding scheme. We rely on float-to-float and float-to-integer
  83  * rounding being the same for half-way values in a few algorithms.
  84  */
  85 #define gmx_simd_round_f(a)       vec_cfid(vec_ctid(a))
  86 #define gmx_simd_trunc_f(a)       vec_trunc(a)
  87 #define gmx_simd_fraction_f(x)    vec_sub(x, vec_trunc(x))
  88 #define gmx_simd_get_exponent_f(a) gmx_simd_get_exponent_ibm_qpx(a)
  89 #define gmx_simd_get_mantissa_f(a) gmx_simd_get_mantissa_ibm_qpx(a)
  90 #define gmx_simd_set_exponent_f(a) gmx_simd_set_exponent_ibm_qpx(a)
  91 /* integer datatype corresponding to float: gmx_simd_fint32_t */
  92 #define gmx_simd_fint32_t         vector4double
  93 #ifdef NDEBUG
  94 #    define gmx_simd_load_fi(m)   vec_ldia(0, (int *)(m))
  95 #else
  96 #    define gmx_simd_load_fi(m)   vec_ldiaa(0, (int *)(m))
  97 #endif
  98 #define gmx_simd_set1_fi(i)       gmx_simd_set1_int_ibm_qpx(i)
  99 #define gmx_simd_store_fi(m, x)    vec_st(x, 0, (int *)(m))
 100 #define gmx_simd_setzero_fi       gmx_simd_setzero_ibm_qpx
 101 #define gmx_simd_cvt_f2i(a)       vec_ctiw(a)
 102 #define gmx_simd_cvtt_f2i(a)      vec_ctiwz(a)
 103 #define gmx_simd_cvt_i2f(a)       vec_cfid(a)
 104 /* Integer simd extract not available */
 105 /* Integer logical ops on gmx_simd_fint32_t not supported */
 106 /* Integer arithmetic ops on gmx_simd_fint32_t not supported */
 107 /* Boolean & comparison operations on gmx_simd_float_t */
 108 #define gmx_simd_fbool_t          vector4double
 109 #define gmx_simd_cmpeq_f(a, b)     vec_cmpeq(a, b)
 110 #define gmx_simd_cmplt_f(a, b)     vec_cmplt((a), (b))
 111 #define gmx_simd_cmple_f(a, b)     gmx_simd_or_fb(vec_cmpeq(a, b), vec_cmplt(a, b))
 112 #define gmx_simd_and_fb(a, b)      vec_and(a, b)
 113 #define gmx_simd_or_fb(a, b)       vec_or(a, b)
 114 #define gmx_simd_anytrue_fb(a)    gmx_simd_anytrue_bool_ibm_qpx(a)
 115 #define gmx_simd_blendzero_f(a, sel) vec_sel(vec_splats(0.0), a, sel)
 116 #define gmx_simd_blendnotzero_f(a, sel) vec_sel(a, vec_splats(0.0), sel)
 117 #define gmx_simd_blendv_f(a, b, sel)  vec_sel(a, b, sel)
 118 #define gmx_simd_reduce_f(a)       gmx_simd_reduce_ibm_qpx(a)
 119
 120
 121 /* Boolean & comparison operations on gmx_simd_fint32_t not supported */
 122 /* Conversions between different booleans not supported */
 123
 124 /* Note: Since QPX registers are always double internally, the single
 125  * precision defines use several double precision helper functions.
 126  */
 127
 128 #endif /* GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H */