src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_SPARC64_HPC_ACE_H
  37 #define GMX_SIMD_IMPL_SPARC64_HPC_ACE_H
  38
  39 #include <math.h>
  40
  41 /* Fujitsu header borrows the name from SSE2, since some instructions have aliases.
  42  * Environment/compiler version GM-1.2.0-17 seems to be buggy; when -Xg is
  43  * defined to enable GNUC extensions, this sets _ISOC99_SOURCE, which in
  44  * turn causes all intrinsics to be declared inline _instead_ of static. This
  45  * leads to duplicate symbol errors at link time.
  46  * To work around this we unset this before including the HPC-ACE header, and
  47  * reset the value afterwards.
  48  */
  49 #ifdef _ISOC99_SOURCE
  50 #    undef _ISOC99_SOURCE
  51 #    define SAVE_ISOC99_SOURCE
  52 #endif
  53
  54 #include <emmintrin.h>
  55
  56 #ifdef SAVE_ISOC99_SOURCE
  57 #    define _ISOC99_SOURCE
  58 #    undef SAVE_ISOC99_SOURCE
  59 #endif
  60
  61
  62 /* Sparc64 HPC-ACE SIMD instruction wrappers
  63  *
  64  * Please see documentation in gromacs/simd/simd.h for defines.
  65  */
  66
  67 /* Capability definitions for Sparc64 HPC-ACE */
  68 /* HPC-ACE is actually double-only on the register level, but we also implement
  69  * a single-precision interface where we only offer single-precision accuracy
  70  * in math functions - this can save quite a few cycles.
  71  */
  72 #define GMX_SIMD_HAVE_FLOAT
  73 #define GMX_SIMD_HAVE_DOUBLE
  74 #define GMX_SIMD_HAVE_HARDWARE
  75 #undef  GMX_SIMD_HAVE_LOADU
  76 #undef  GMX_SIMD_HAVE_STOREU
  77 #define GMX_SIMD_HAVE_LOGICAL
  78 #define GMX_SIMD_HAVE_FMA
  79 #undef  GMX_SIMD_HAVE_FRACTION
  80 #define GMX_SIMD_HAVE_FINT32
  81 #define GMX_SIMD_HAVE_FINT32_EXTRACT
  82 #define GMX_SIMD_HAVE_FINT32_LOGICAL
  83 #undef  GMX_SIMD_HAVE_FINT32_ARITHMETICS
  84 #define GMX_SIMD_HAVE_DINT32
  85 #define GMX_SIMD_HAVE_DINT32_EXTRACT
  86 #define GMX_SIMD_HAVE_DINT32_LOGICAL
  87 #undef  GMX_SIMD_HAVE_DINT32_ARITHMETICS
  88 #undef  GMX_SIMD4_HAVE_FLOAT
  89 #undef  GMX_SIMD4_HAVE_DOUBLE
  90
  91 /* Implementation details */
  92 #define GMX_SIMD_FLOAT_WIDTH         2
  93 #define GMX_SIMD_DOUBLE_WIDTH        2
  94 #define GMX_SIMD_FINT32_WIDTH        2
  95 #define GMX_SIMD_DINT32_WIDTH        2
  96 #define GMX_SIMD_RSQRT_BITS         10
  97 #define GMX_SIMD_RCP_BITS            9
  98
  99 /* HPC-ACE is a bit strange; some instructions like
 100  * shifts only work on _integer_ versions of SIMD
 101  * registers, but there are no intrinsics to load
 102  * or convert, or even to cast. The only way to use
 103  * them is to declare unions with the SIMD integer
 104  * type. However, this will lead to extra load ops,
 105  * and the normal real-to-int and int-to-real
 106  * conversions work purely on the v2r8 fp regs.
 107  * Since our most common usage is to convert and
 108  * then extract the result for table lookups, we
 109  * define the gmx_simd_fint32_t datatype to use
 110  * the v2r8 rather than v2i8 SIMD type.
 111  */
 112
 113 /****************************************************
 114  *      SINGLE PRECISION SIMD IMPLEMENTATION        *
 115  ****************************************************/
 116 #define gmx_simd_float_t          _fjsp_v2r8
 117 #define gmx_simd_load_f           gmx_simd_load_f_sparc64_hpc_ace
 118 #define gmx_simd_load1_f(m)       _fjsp_set_v2r8((*m), (*m))
 119 #define gmx_simd_set1_f(a)        _fjsp_set_v2r8(a, a)
 120 #define gmx_simd_store_f          gmx_simd_store_f_sparc64_hpc_ace
 121 #define gmx_simd_loadu_f          gmx_simd_load_f
 122 /* No unaligned store of gmx_simd_float_t */
 123 #define gmx_simd_setzero_f        _fjsp_setzero_v2r8
 124 #define gmx_simd_add_f            _fjsp_add_v2r8
 125 #define gmx_simd_sub_f            _fjsp_sub_v2r8
 126 #define gmx_simd_mul_f            _fjsp_mul_v2r8
 127 #define gmx_simd_fmadd_f(a, b, c)   _fjsp_madd_v2r8(a, b, c)
 128 #define gmx_simd_fmsub_f(a, b, c)   _fjsp_msub_v2r8(a, b, c)
 129 #define gmx_simd_fnmadd_f(a, b, c)  _fjsp_nmsub_v2r8(a, b, c)
 130 #define gmx_simd_fnmsub_f(a, b, c)  _fjsp_nmadd_v2r8(a, b, c)
 131 #define gmx_simd_and_f            _fjsp_and_v2r8
 132 #define gmx_simd_andnot_f         _fjsp_andnot1_v2r8
 133 #define gmx_simd_or_f             _fjsp_or_v2r8
 134 #define gmx_simd_xor_f            _fjsp_xor_v2r8
 135 #define gmx_simd_rsqrt_f          _fjsp_rsqrta_v2r8
 136 #define gmx_simd_rcp_f            _fjsp_rcpa_v2r8
 137 #define gmx_simd_fabs_f(x)        _fjsp_abs_v2r8(x)
 138 #define gmx_simd_fneg_f(x)        _fjsp_neg_v2r8(x)
 139 #define gmx_simd_max_f            _fjsp_max_v2r8
 140 #define gmx_simd_min_f            _fjsp_min_v2r8
 141 #define gmx_simd_round_f(x)       gmx_simd_round_d(x)
 142 #define gmx_simd_trunc_f(x)       gmx_simd_trunc_d(x)
 143 #define gmx_simd_fraction_f(x)    gmx_simd_sub_f(x, gmx_simd_trunc_f(x))
 144 #define gmx_simd_get_exponent_f   gmx_simd_get_exponent_d_sparc64_hpc_ace
 145 #define gmx_simd_get_mantissa_f   gmx_simd_get_mantissa_d_sparc64_hpc_ace
 146 #define gmx_simd_set_exponent_f   gmx_simd_set_exponent_d_sparc64_hpc_ace
 147 /* integer datatype corresponding to float: gmx_simd_fint32_t */
 148 #define gmx_simd_fint32_t         _fjsp_v2r8
 149 #define gmx_simd_load_fi(m)       gmx_simd_load_di_sparc64_hpc_ace(m)
 150 #define gmx_simd_set1_fi(i)       gmx_simd_set1_di_sparc64_hpc_ace(i)
 151 #define gmx_simd_store_fi(m, x)   gmx_simd_store_di_sparc64_hpc_ace(m, x)
 152 #define gmx_simd_loadu_fi         gmx_simd_load_fi
 153 /* No unaligned store of gmx_simd_fint32_t */
 154 #define gmx_simd_setzero_fi       _fjsp_setzero_v2r8
 155 #define gmx_simd_cvt_f2i          gmx_simd_cvt_d2i
 156 #define gmx_simd_cvtt_f2i         _fjsp_dtox_v2r8
 157 #define gmx_simd_cvt_i2f          _fjsp_xtod_v2r8
 158 #define gmx_simd_extract_fi      gmx_simd_extract_di_sparc64_hpc_ace
 159 /* Integer logical ops on gmx_simd_fint32_t */
 160 /* Shifts are horrible since they require memory re-loads. */
 161 #define gmx_simd_slli_fi          gmx_simd_slli_di_sparc64_hpc_ace
 162 #define gmx_simd_srli_fi          gmx_simd_srli_di_sparc64_hpc_ace
 163 #define gmx_simd_and_fi           _fjsp_and_v2r8
 164 #define gmx_simd_andnot_fi(a, b)   _fjsp_andnot1_v2r8(a, b)
 165 #define gmx_simd_or_fi            _fjsp_or_v2r8
 166 #define gmx_simd_xor_fi           _fjsp_xor_v2r8
 167 /* No integer arithmetic ops on gmx_simd_fint32_t */
 168 /* Boolean & comparison operations on gmx_simd_float_t */
 169 #define gmx_simd_fbool_t          _fjsp_v2r8
 170 #define gmx_simd_cmpeq_f          _fjsp_cmpeq_v2r8
 171 #define gmx_simd_cmplt_f          _fjsp_cmplt_v2r8
 172 #define gmx_simd_cmple_f          _fjsp_cmple_v2r8
 173 #define gmx_simd_and_fb           _fjsp_and_v2r8
 174 #define gmx_simd_or_fb            _fjsp_or_v2r8
 175 #define gmx_simd_anytrue_fb       gmx_simd_anytrue_d_sparc64_hpc_ace
 176 #define gmx_simd_blendzero_f      _fjsp_and_v2r8
 177 #define gmx_simd_blendnotzero_f(a, sel) _fjsp_andnot1_v2r8(sel, a)
 178 #define gmx_simd_blendv_f(a, b, s) _fjsp_selmov_v2r8(b, a, s)
 179 #define gmx_simd_reduce_f(a)       gmx_simd_reduce_d_sparc64_hpc_ace(a)
 180 /* No boolean & comparison operations on gmx_simd_fint32_t */
 181 /* No conversions between different booleans */
 182
 183 /****************************************************
 184  *      DOUBLE PRECISION SIMD IMPLEMENTATION        *
 185  ****************************************************/
 186 #define gmx_simd_double_t          _fjsp_v2r8
 187 #define gmx_simd_load_d            _fjsp_load_v2r8
 188 #define gmx_simd_load1_d(m)        _fjsp_set_v2r8((*m), (*m))
 189 #define gmx_simd_set1_d(a)         _fjsp_set_v2r8(a, a)
 190 #define gmx_simd_store_d           _fjsp_store_v2r8
 191 #define gmx_simd_loadu_d           gmx_simd_load_d
 192 /* No unaligned store of gmx_simd_double_t */
 193 #define gmx_simd_setzero_d         _fjsp_setzero_v2r8
 194 #define gmx_simd_add_d             _fjsp_add_v2r8
 195 #define gmx_simd_sub_d             _fjsp_sub_v2r8
 196 #define gmx_simd_mul_d             _fjsp_mul_v2r8
 197 #define gmx_simd_fmadd_d(a, b, c)   _fjsp_madd_v2r8(a, b, c)
 198 #define gmx_simd_fmsub_d(a, b, c)   _fjsp_msub_v2r8(a, b, c)
 199 #define gmx_simd_fnmadd_d(a, b, c)  _fjsp_nmsub_v2r8(a, b, c)
 200 #define gmx_simd_fnmsub_d(a, b, c)  _fjsp_nmadd_v2r8(a, b, c)
 201 #define gmx_simd_and_d             _fjsp_and_v2r8
 202 #define gmx_simd_andnot_d          _fjsp_andnot1_v2r8
 203 #define gmx_simd_or_d              _fjsp_or_v2r8
 204 #define gmx_simd_xor_d             _fjsp_xor_v2r8
 205 #define gmx_simd_rsqrt_d(x)        _fjsp_rsqrta_v2r8(x)
 206 #define gmx_simd_rcp_d(x)          _fjsp_rcpa_v2r8(x)
 207 #define gmx_simd_fabs_d(x)         _fjsp_abs_v2r8(x)
 208 #define gmx_simd_fneg_d(x)         _fjsp_neg_v2r8(x)
 209 #define gmx_simd_max_d             _fjsp_max_v2r8
 210 #define gmx_simd_min_d             _fjsp_min_v2r8
 211 #define gmx_simd_round_d(x)        gmx_simd_cvt_i2d(gmx_simd_cvt_d2i(x))
 212 #define gmx_simd_trunc_d(x)        gmx_simd_cvt_i2d(gmx_simd_cvtt_d2i(x))
 213 #define gmx_simd_fraction_d(x)     gmx_simd_sub_d(x, gmx_simd_trunc_d(x))
 214 #define gmx_simd_get_exponent_d    gmx_simd_get_exponent_d_sparc64_hpc_ace
 215 #define gmx_simd_get_mantissa_d    gmx_simd_get_mantissa_d_sparc64_hpc_ace
 216 #define gmx_simd_set_exponent_d    gmx_simd_set_exponent_d_sparc64_hpc_ace
 217 /* integer datatype corresponding to double: gmx_simd_dint32_t */
 218 #define gmx_simd_dint32_t          _fjsp_v2r8
 219 #define gmx_simd_load_di(m)        gmx_simd_load_di_sparc64_hpc_ace(m)
 220 #define gmx_simd_set1_di(i)        gmx_simd_set1_di_sparc64_hpc_ace(i)
 221 #define gmx_simd_store_di(m, x)    gmx_simd_store_di_sparc64_hpc_ace(m, x)
 222 #define gmx_simd_loadu_di          gmx_simd_load_di
 223 /* No unaligned store of gmx_simd_dint32_t */
 224 #define gmx_simd_setzero_di        _fjsp_setzero_v2r8
 225 #define gmx_simd_cvt_d2i           gmx_simd_cvt_d2i_sparc64_hpc_ace
 226 #define gmx_simd_cvtt_d2i          _fjsp_dtox_v2r8
 227 #define gmx_simd_cvt_i2d           _fjsp_xtod_v2r8
 228 #define gmx_simd_extract_di        gmx_simd_extract_di_sparc64_hpc_ace
 229 /* Integer logical ops on gmx_simd_dint32_t */
 230 #define gmx_simd_slli_di           gmx_simd_slli_di_sparc64_hpc_ace
 231 #define gmx_simd_srli_di           gmx_simd_srli_di_sparc64_hpc_ace
 232 #define gmx_simd_and_di            _fjsp_and_v2r8
 233 #define gmx_simd_andnot_di         _fjsp_andnot1_v2r8
 234 #define gmx_simd_or_di             _fjsp_or_v2r8
 235 #define gmx_simd_xor_di            _fjsp_xor_v2r8
 236 /* Integer arithmetic ops on integer datatype corresponding to double */
 237 /* Boolean & comparison operations on gmx_simd_double_t */
 238 #define gmx_simd_dbool_t           _fjsp_v2r8
 239 #define gmx_simd_cmpeq_d           _fjsp_cmpeq_v2r8
 240 #define gmx_simd_cmplt_d           _fjsp_cmplt_v2r8
 241 #define gmx_simd_cmple_d           _fjsp_cmple_v2r8
 242 #define gmx_simd_and_db            _fjsp_and_v2r8
 243 #define gmx_simd_or_db             _fjsp_or_v2r8
 244 #define gmx_simd_anytrue_db         gmx_simd_anytrue_d_sparc64_hpc_ace
 245 #define gmx_simd_blendzero_d        _fjsp_and_v2r8
 246 #define gmx_simd_blendnotzero_d(a, sel)  _fjsp_andnot1_v2r8(sel, a)
 247 #define gmx_simd_blendv_d(a, b, sel) _fjsp_selmov_v2r8(b, a, sel)
 248 #define gmx_simd_reduce_d(a)        gmx_simd_reduce_d_sparc64_hpc_ace(a)
 249
 250 /* No boolean & comparison operations on gmx_simd_dint32_t */
 251 /* Float/double conversion */
 252 #define gmx_simd_cvt_f2d(f)         (f)
 253 #define gmx_simd_cvt_d2f(d)         (d)
 254
 255
 256 /****************************************************
 257  * SINGLE PRECISION IMPLEMENTATION HELPER FUNCTIONS *
 258  ****************************************************/
 259 static gmx_inline gmx_simd_float_t
 260 gmx_simd_load_f_sparc64_hpc_ace(const float *m)
 261 {
 262     /* We are not allowed to cast single-to-double registers, but we can
 263      * masquerade the memory location as a variable of type _fjsp_v2r4.
 264      */
 265     const _fjsp_v2r4 *p = (const _fjsp_v2r4 *)m;
 266     _fjsp_v2r4        simd;
 267
 268     simd = *p;
 269     return _fjsp_stod_v2r8(simd);
 270 }
 271
 272 static gmx_inline void
 273 gmx_simd_store_f_sparc64_hpc_ace(float *m, gmx_simd_float_t x)
 274 {
 275     /* We are not allowed to cast single-to-double registers, but we can
 276      * masquerade the memory location as a variable of type _fjsp_v2r4.
 277      */
 278     _fjsp_v2r4 *p = (_fjsp_v2r4 *)m;
 279     *p = _fjsp_dtos_v2r4(x);
 280 }
 281
 282 static gmx_inline gmx_simd_dint32_t
 283 gmx_simd_load_di_sparc64_hpc_ace(const int *m)
 284 {
 285     union
 286     {
 287         _fjsp_v2r8       simd;
 288         long long int    i[2];
 289     }
 290     conv;
 291
 292     conv.i[0] = m[0];
 293     conv.i[1] = m[1];
 294
 295     return _fjsp_load_v2r8( (double *) &(conv.simd) );
 296 }
 297
 298 static gmx_inline void
 299 gmx_simd_store_di_sparc64_hpc_ace(int *m, gmx_simd_dint32_t x)
 300 {
 301     union
 302     {
 303         _fjsp_v2r8       simd;
 304         long long int    i[2];
 305     }
 306     conv;
 307
 308     _fjsp_store_v2r8( (double *) &(conv.simd), x );
 309
 310     m[0] = conv.i[0];
 311     m[1] = conv.i[1];
 312 }
 313
 314 static gmx_inline gmx_simd_dint32_t
 315 gmx_simd_set1_di_sparc64_hpc_ace(int i)
 316 {
 317     union
 318     {
 319         _fjsp_v2r8       simd;
 320         long long int    i[2];
 321     }
 322     conv;
 323
 324     conv.i[0] = i;
 325     conv.i[1] = i;
 326
 327     return _fjsp_load_v2r8( (double *) &(conv.simd) );
 328 }
 329
 330 static gmx_inline int
 331 gmx_simd_extract_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
 332 {
 333     long long int res;
 334     /* This conditional should be optimized away at compile time */
 335     if (i == 0)
 336     {
 337         _fjsp_storel_v2r8((double *)&res, x);
 338     }
 339     else
 340     {
 341         _fjsp_storeh_v2r8((double *)&res, x);
 342     }
 343     return (int)res;
 344 }
 345
 346 static gmx_inline gmx_simd_dint32_t
 347 gmx_simd_slli_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
 348 {
 349     _fjsp_v2i8 ix = *((_fjsp_v2i8 *)&x);
 350     ix = _fjsp_slli_v2i8(ix, i);
 351     x  = *((_fjsp_v2r8 *)&ix);
 352     return x;
 353 }
 354
 355 static gmx_inline gmx_simd_dint32_t
 356 gmx_simd_srli_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
 357 {
 358     _fjsp_v2i8 ix = *((_fjsp_v2i8 *)&x);
 359     ix = _fjsp_srli_v2i8(ix, i);
 360     x  = *((_fjsp_v2r8 *)&ix);
 361     return x;
 362 }
 363
 364 static gmx_inline gmx_simd_dint32_t
 365 gmx_simd_cvt_d2i_sparc64_hpc_ace(gmx_simd_double_t x)
 366 {
 367     _fjsp_v2r8 signbit = _fjsp_set_v2r8(-0.0, -0.0);
 368     _fjsp_v2r8 half    = _fjsp_set_v2r8(0.5, 0.5);
 369
 370     x = _fjsp_add_v2r8(x, _fjsp_or_v2r8(_fjsp_and_v2r8(signbit, x), half));
 371     return _fjsp_dtox_v2r8(x);
 372 }
 373
 374 static gmx_inline int
 375 gmx_simd_anytrue_d_sparc64_hpc_ace(gmx_simd_dbool_t x)
 376 {
 377     long long int i;
 378     x = _fjsp_or_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
 379     _fjsp_storel_v2r8((double *)&i, x);
 380     return (i != 0LL);
 381 }
 382
 383 static gmx_inline double
 384 gmx_simd_reduce_d_sparc64_hpc_ace(gmx_simd_double_t x)
 385 {
 386     double d;
 387     x = _fjsp_add_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
 388     _fjsp_storel_v2r8(&d, x);
 389     return d;
 390 }
 391
 392
 393 static gmx_inline gmx_simd_double_t
 394 gmx_simd_get_exponent_d_sparc64_hpc_ace(gmx_simd_double_t x)
 395 {
 396     /* HPC-ACE cannot cast _fjsp_v2r8 to _fjsp_v4i4, so to perform shifts we
 397      * would need to store and reload. Since we are only operating on two
 398      * numbers it is likely more efficient to do the operations directly on
 399      * normal registers.
 400      */
 401     const gmx_int64_t    expmask   = 0x7ff0000000000000LL;
 402     const gmx_int64_t    expbias   = 1023LL;
 403
 404     union
 405     {
 406         _fjsp_v2r8       simd;
 407         long long int    i[2];
 408     }
 409     conv;
 410
 411     _fjsp_store_v2r8( (double *) &conv.simd, x);
 412     conv.i[0] = ((conv.i[0] & expmask) >> 52) - expbias;
 413     conv.i[1] = ((conv.i[1] & expmask) >> 52) - expbias;
 414     x         = _fjsp_load_v2r8( (double *) &conv.simd);
 415     return _fjsp_xtod_v2r8(x);
 416 }
 417
 418 static gmx_inline gmx_simd_double_t
 419 gmx_simd_get_mantissa_d_sparc64_hpc_ace(gmx_simd_double_t x)
 420 {
 421     gmx_int64_t       mantmask[2] = {0x000fffffffffffffLL, 0x000fffffffffffffLL};
 422     gmx_simd_double_t one         = _fjsp_set_v2r8(1.0, 1.0);
 423
 424     x = _fjsp_and_v2r8(x, _fjsp_load_v2r8((double *)mantmask));
 425     return _fjsp_or_v2r8(x, one);
 426 }
 427
 428 static gmx_inline gmx_simd_double_t
 429 gmx_simd_set_exponent_d_sparc64_hpc_ace(gmx_simd_double_t x)
 430 {
 431     const gmx_int64_t    expbias   = 1023;
 432     union
 433     {
 434         _fjsp_v2r8       simd;
 435         long long int    i[2];
 436     }
 437     conv;
 438
 439
 440     _fjsp_store_v2r8( (double *) &conv.simd, gmx_simd_cvt_d2i_sparc64_hpc_ace(x));
 441     conv.i[0] = (conv.i[0] + expbias) << 52;
 442     conv.i[1] = (conv.i[1] + expbias) << 52;
 443
 444     return _fjsp_load_v2r8( (double *) &conv.simd);
 445 }
 446
 447
 448 /* No SIMD4 support, since both single & double are only 2-wide */
 449
 450
 451 #endif /* GMX_SIMD_IMPL_SPARC64_HPC_ACE_H */