src/gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_X86_SSE4_1_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPL_X86_SSE4_1_SIMD_FLOAT_H
  38
  39 #include "config.h"
  40
  41 #include <smmintrin.h>
  42
  43 #include "gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_float.h"
  44
  45 namespace gmx
  46 {
  47
  48 template<int index>
  49 static inline std::int32_t gmx_simdcall
  50 extract(SimdFInt32 a)
  51 {
  52     return _mm_extract_epi32(a.simdInternal_, index);
  53 }
  54
  55 static inline SimdFloat
  56 maskzRsqrt(SimdFloat x, SimdFBool m)
  57 {
  58 #ifndef NDEBUG
  59     x.simdInternal_ = _mm_blendv_ps(_mm_set1_ps(1.0f), x.simdInternal_, m.simdInternal_);
  60 #endif
  61     return {
  62                _mm_and_ps(_mm_rsqrt_ps(x.simdInternal_), m.simdInternal_)
  63     };
  64 }
  65
  66 static inline SimdFloat
  67 maskzRcp(SimdFloat x, SimdFBool m)
  68 {
  69 #ifndef NDEBUG
  70     x.simdInternal_ = _mm_blendv_ps(_mm_set1_ps(1.0f), x.simdInternal_, m.simdInternal_);
  71 #endif
  72     return {
  73                _mm_and_ps(_mm_rcp_ps(x.simdInternal_), m.simdInternal_)
  74     };
  75 }
  76
  77 static inline SimdFloat gmx_simdcall
  78 round(SimdFloat x)
  79 {
  80     return {
  81                _mm_round_ps(x.simdInternal_, _MM_FROUND_NINT)
  82     };
  83 }
  84
  85 static inline SimdFloat gmx_simdcall
  86 trunc(SimdFloat x)
  87 {
  88     return {
  89                _mm_round_ps(x.simdInternal_, _MM_FROUND_TRUNC)
  90     };
  91 }
  92
  93 static inline SimdFloat gmx_simdcall
  94 blend(SimdFloat a, SimdFloat b, SimdFBool sel)
  95 {
  96     return {
  97                _mm_blendv_ps(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
  98     };
  99 }
 100
 101 static inline SimdFInt32 gmx_simdcall
 102 operator*(SimdFInt32 a, SimdFInt32 b)
 103 {
 104     return {
 105                _mm_mullo_epi32(a.simdInternal_, b.simdInternal_)
 106     };
 107 }
 108
 109 static inline SimdFInt32 gmx_simdcall
 110 blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
 111 {
 112     return {
 113                _mm_blendv_epi8(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 114     };
 115 }
 116
 117 template <MathOptimization opt = MathOptimization::Safe>
 118 static inline SimdFloat gmx_simdcall
 119 ldexp(SimdFloat value, SimdFInt32 exponent)
 120 {
 121     const __m128i exponentBias = _mm_set1_epi32(127);
 122     __m128i       iExponent;
 123
 124     iExponent = _mm_add_epi32(exponent.simdInternal_, exponentBias);
 125
 126     if (opt == MathOptimization::Safe)
 127     {
 128         // Make sure biased argument is not negative
 129         iExponent = _mm_max_epi32(iExponent, _mm_setzero_si128());
 130     }
 131
 132     iExponent = _mm_slli_epi32( iExponent, 23);
 133
 134     return {
 135                _mm_mul_ps(value.simdInternal_, _mm_castsi128_ps(iExponent))
 136     };
 137 }
 138
 139 }      // namespace gmx
 140
 141 #endif // GMX_SIMD_IMPL_X86_SSE4_1_SIMD_FLOAT_H