2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_X86_AVX2_256_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPL_X86_AVX2_256_SIMD_DOUBLE_H
41 #include <immintrin.h>
43 #include "impl_x86_avx2_256_common.h"
45 /****************************************************
46 * DOUBLE PRECISION SIMD IMPLEMENTATION *
47 ****************************************************/
48 #undef gmx_simd_fmadd_d
49 #define gmx_simd_fmadd_d _mm256_fmadd_pd
50 #undef gmx_simd_fmsub_d
51 #define gmx_simd_fmsub_d _mm256_fmsub_pd
52 #undef gmx_simd_fnmadd_d
53 #define gmx_simd_fnmadd_d _mm256_fnmadd_pd
54 #undef gmx_simd_fnmsub_d
55 #define gmx_simd_fnmsub_d _mm256_fnmsub_pd
56 #undef gmx_simd_get_exponent_d
57 #define gmx_simd_get_exponent_d gmx_simd_get_exponent_d_avx2_256
58 #undef gmx_simd_set_exponent_d
59 #define gmx_simd_set_exponent_d gmx_simd_set_exponent_d_avx2_256
60 #undef gmx_simd_cvt_db2dib
61 #define gmx_simd_cvt_db2dib gmx_simd_cvt_db2dib_avx2_256
62 #undef gmx_simd_cvt_dib2db
63 #define gmx_simd_cvt_dib2db gmx_simd_cvt_dib2db_avx2_256
65 /*********************************************************
66 * SIMD DOUBLE PRECISION IMPLEMENTATION HELPER FUNCTIONS *
67 *********************************************************/
68 static gmx_inline gmx_simd_double_t gmx_simdcall
69 gmx_simd_get_exponent_d_avx2_256(gmx_simd_double_t x
)
71 const __m256d expmask
= _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FF0000000000000LL
));
72 const __m256i expbias
= _mm256_set1_epi64x(1023LL);
76 iexp
= _mm256_castpd_si256(_mm256_and_pd(x
, expmask
));
77 iexp
= _mm256_sub_epi64(_mm256_srli_epi64(iexp
, 52), expbias
);
78 iexp
= _mm256_shuffle_epi32(iexp
, _MM_SHUFFLE(3, 1, 2, 0));
80 iexp128
= _mm256_extractf128_si256(iexp
, 1);
81 iexp128
= _mm_unpacklo_epi64(_mm256_castsi256_si128(iexp
), iexp128
);
82 return _mm256_cvtepi32_pd(iexp128
);
85 static gmx_inline gmx_simd_double_t gmx_simdcall
86 gmx_simd_set_exponent_d_avx2_256(gmx_simd_double_t x
)
88 const __m256i expbias
= _mm256_set1_epi64x(1023LL);
89 __m256i iexp
= _mm256_cvtepi32_epi64(_mm256_cvtpd_epi32(x
));
91 iexp
= _mm256_slli_epi64(_mm256_add_epi64(iexp
, expbias
), 52);
92 return _mm256_castsi256_pd(iexp
);
95 static gmx_inline gmx_simd_dibool_t gmx_simdcall
96 gmx_simd_cvt_db2dib_avx2_256(gmx_simd_dbool_t a
)
98 __m128i ia
= _mm256_castsi256_si128(_mm256_castpd_si256(a
));
99 __m128i ib
= _mm256_extractf128_si256(_mm256_castpd_si256(a
), 0x1);
101 ia
= _mm_packs_epi32(ia
, ib
);
106 static gmx_inline gmx_simd_dbool_t gmx_simdcall
107 gmx_simd_cvt_dib2db_avx2_256(gmx_simd_dibool_t ia
)
109 __m128d lo
= _mm_castsi128_pd(_mm_unpacklo_epi32(ia
, ia
));
110 __m128d hi
= _mm_castsi128_pd(_mm_unpackhi_epi32(ia
, ia
));
112 return _mm256_insertf128_pd(_mm256_castpd128_pd256(lo
), hi
, 0x1);
115 #endif /* GMX_SIMD_IMPL_X86_AVX2_256_SIMD_DOUBLE_H */