Fixed LJ-14 error in free energy
[gromacs.git] / include / gmx_avx_single.h
blobf0697e1021b6ab6ed4303688ca0f56497ccfb3a2
1 /*
2 * This source code is part of
4 * G R O M A C S
6 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 * Copyright (c) 2001-2012, The GROMACS Development Team
9 * Gromacs is a library for molecular simulation and trajectory analysis,
10 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 * a full list of developers and information, check out http://www.gromacs.org
13 * This program is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU Lesser General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option) any
16 * later version.
17 * As a special exception, you may use this file as part of a free software
18 * library without restriction. Specifically, if other files instantiate
19 * templates or use macros or inline functions from this file, or you compile
20 * this file and link it with other files to produce an executable, this
21 * file does not by itself cause the resulting executable to be covered by
22 * the GNU Lesser General Public License.
24 * In plain-speak: do not worry about classes/macros/templates either - only
25 * changes to the library have to be LGPL, not an application linking with it.
27 * To help fund GROMACS development, we humbly ask that you cite
28 * the papers people have written on it - you can find them on the website!
30 #ifndef _gmx_avx_single_h_
31 #define _gmx_avx_single_h_
33 /* We require AVX now! */
35 #include <immintrin.h> /* AVX */
37 static inline __m256
38 gmx_mm256_invsqrt_ps(__m256 x)
40 const __m256 half = _mm256_set_ps(0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5);
41 const __m256 three = _mm256_set_ps(3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0);
43 __m256 lu = _mm256_rsqrt_ps(x);
45 return _mm256_mul_ps(half,_mm256_mul_ps(_mm256_sub_ps(three,_mm256_mul_ps(_mm256_mul_ps(lu,lu),x)),lu));
48 static inline __m256
49 gmx_mm256_calc_rsq_ps(__m256 dx, __m256 dy, __m256 dz)
51 return _mm256_add_ps( _mm256_add_ps( _mm256_mul_ps(dx,dx), _mm256_mul_ps(dy,dy) ), _mm256_mul_ps(dz,dz) );
54 /* Normal sum of four xmm registers */
55 #define gmx_mm256_sum4_ps(t0,t1,t2,t3) _mm256_add_ps(_mm256_add_ps(t0,t1),_mm256_add_ps(t2,t3))
57 #endif /* gmx_avx_single_h_ */