added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / include / gmx_avx_double.h
blob190b4ccc0dab1bac888795e83614726d3561819c
1 /*
2 * This source code is part of
4 * G R O M A C S
6 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 * Copyright (c) 2001-2012, The GROMACS Development Team
9 * Gromacs is a library for molecular simulation and trajectory analysis,
10 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 * a full list of developers and information, check out http://www.gromacs.org
13 * This program is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU Lesser General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option) any
16 * later version.
17 * As a special exception, you may use this file as part of a free software
18 * library without restriction. Specifically, if other files instantiate
19 * templates or use macros or inline functions from this file, or you compile
20 * this file and link it with other files to produce an executable, this
21 * file does not by itself cause the resulting executable to be covered by
22 * the GNU Lesser General Public License.
24 * In plain-speak: do not worry about classes/macros/templates either - only
25 * changes to the library have to be LGPL, not an application linking with it.
27 * To help fund GROMACS development, we humbly ask that you cite
28 * the papers people have written on it - you can find them on the website!
30 #ifndef _gmx_avx_double_h_
31 #define _gmx_avx_double_h_
33 /* We require AVX now! */
35 #include <immintrin.h> /* AVX */
37 static inline __m256d
38 gmx_mm256_invsqrt_pd(__m256d x)
40 /* There is no double precision AVX rsqrt instruction.
41 * But using a single precision rsqrt still gives the full precision.
43 const __m256d half = _mm256_set_pd(0.5,0.5,0.5,0.5);
44 const __m256d three = _mm256_set_pd(3.0,3.0,3.0,3.0);
46 __m256d lu = _mm256_cvtps_pd(_mm_rsqrt_ps(_mm256_cvtpd_ps(x)));
48 lu = _mm256_mul_pd(half,_mm256_mul_pd(_mm256_sub_pd(three,_mm256_mul_pd(_mm256_mul_pd(lu,lu),x)),lu));
49 return _mm256_mul_pd(half,_mm256_mul_pd(_mm256_sub_pd(three,_mm256_mul_pd(_mm256_mul_pd(lu,lu),x)),lu));
52 static inline __m256d
53 gmx_mm256_calc_rsq_pd(__m256d dx, __m256d dy, __m256d dz)
55 return _mm256_add_pd( _mm256_add_pd( _mm256_mul_pd(dx,dx), _mm256_mul_pd(dy,dy) ), _mm256_mul_pd(dz,dz) );
58 /* Normal sum of four xmm registers */
59 #define gmx_mm256_sum4_pd(t0,t1,t2,t3) _mm256_add_pd(_mm256_add_pd(t0,t1),_mm256_add_pd(t2,t3))
61 #endif /* gmx_avx_double_h_ */