added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / include / gmx_x86_simd_macros.h
blobb896d396baadfa88505d90541c207911e91ccac9
1 /*
2 * This source code is part of
4 * G R O M A C S
6 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 * Copyright (c) 2001-2012, The GROMACS Development Team
9 * Gromacs is a library for molecular simulation and trajectory analysis,
10 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 * a full list of developers and information, check out http://www.gromacs.org
13 * This program is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU Lesser General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option) any
16 * later version.
17 * As a special exception, you may use this file as part of a free software
18 * library without restriction. Specifically, if other files instantiate
19 * templates or use macros or inline functions from this file, or you compile
20 * this file and link it with other files to produce an executable, this
21 * file does not by itself cause the resulting executable to be covered by
22 * the GNU Lesser General Public License.
24 * In plain-speak: do not worry about classes/macros/templates either - only
25 * changes to the library have to be LGPL, not an application linking with it.
27 * To help fund GROMACS development, we humbly ask that you cite
28 * the papers people have written on it - you can find them on the website!
31 /* Undefine all defines used below so we can include this file multiple times
32 * with different settings from the same source file.
35 /* NOTE: floor and blend are NOT available with SSE2 only acceleration */
37 #undef GMX_X86_SIMD_WIDTH_HERE
39 #undef gmx_epi32
41 #undef gmx_mm_pr
43 #undef gmx_load_pr
44 #undef gmx_load1_pr
45 #undef gmx_set1_pr
46 #undef gmx_setzero_pr
47 #undef gmx_store_pr
48 #undef gmx_storeu_pr
50 #undef gmx_add_pr
51 #undef gmx_sub_pr
52 #undef gmx_mul_pr
53 #undef gmx_max_pr
54 #undef gmx_cmplt_pr
55 #undef gmx_and_pr
56 #undef gmx_or_pr
57 #undef gmx_andnot_pr
59 #undef gmx_floor_pr
60 #undef gmx_blendv_pr
62 #undef gmx_movemask_pr
64 #undef gmx_mm_castsi128_pr
66 #undef gmx_cvttpr_epi32
67 #undef gmx_cvtepi32_pr
69 #undef gmx_invsqrt_pr
70 #undef gmx_calc_rsq_pr
71 #undef gmx_sum4_pr
74 /* By defining GMX_MM128_HERE or GMX_MM256_HERE before including this file
75 * the same intrinsics, with defines, can be compiled for either 128 or 256
76 * bit wide SSE or AVX instructions.
77 * The gmx_ prefix is replaced by _mm_ or _mm256_ (SSE or AVX).
78 * The _pr suffix is replaced by _ps or _pd (single or double precision).
79 * Note that compiler settings will decide if 128-bit intrinsics will
80 * be translated into SSE or AVX instructions.
83 #if !defined GMX_MM128_HERE && !defined GMX_MM256_HERE
84 "You should define GMX_MM128_HERE or GMX_MM256_HERE"
85 #endif
87 #if defined GMX_MM128_HERE && defined GMX_MM256_HERE
88 "You should not define both GMX_MM128_HERE and GMX_MM256_HERE"
89 #endif
91 #ifdef GMX_MM128_HERE
93 #define gmx_epi32 __m128i
95 #ifndef GMX_DOUBLE
97 #include "gmx_x86_simd_single.h"
99 #define GMX_X86_SIMD_WIDTH_HERE 4
101 #define gmx_mm_pr __m128
103 #define gmx_load_pr _mm_load_ps
104 #define gmx_load1_pr _mm_load1_ps
105 #define gmx_set1_pr _mm_set1_ps
106 #define gmx_setzero_pr _mm_setzero_ps
107 #define gmx_store_pr _mm_store_ps
108 #define gmx_storeu_pr _mm_storeu_ps
110 #define gmx_add_pr _mm_add_ps
111 #define gmx_sub_pr _mm_sub_ps
112 #define gmx_mul_pr _mm_mul_ps
113 #define gmx_max_pr _mm_max_ps
114 #define gmx_cmplt_pr _mm_cmplt_ps
115 #define gmx_and_pr _mm_and_ps
116 #define gmx_or_pr _mm_or_ps
117 #define gmx_andnot_pr _mm_andnot_ps
119 #define gmx_floor_pr _mm_floor_ps
120 #define gmx_blendv_pr _mm_blendv_ps
122 #define gmx_movemask_pr _mm_movemask_ps
124 #define gmx_mm_castsi128_pr gmx_mm_castsi128_ps
126 #define gmx_cvttpr_epi32 _mm_cvttps_epi32
127 #define gmx_cvtepi32_pr _mm_cvtepi32_ps
129 #define gmx_invsqrt_pr gmx_mm_invsqrt_ps
130 #define gmx_calc_rsq_pr gmx_mm_calc_rsq_ps
131 #define gmx_sum4_pr gmx_mm_sum4_ps
133 #else /* ifndef GMX_DOUBLE */
135 #include "gmx_x86_simd_double.h"
137 #define GMX_X86_SIMD_WIDTH_HERE 2
139 #define gmx_mm_pr __m128d
141 #define gmx_load_pr _mm_load_pd
142 #define gmx_load1_pr _mm_load1_pd
143 #define gmx_set1_pr _mm_set1_pd
144 #define gmx_setzero_pr _mm_setzero_pd
145 #define gmx_store_pr _mm_store_pd
146 #define gmx_storeu_pr _mm_storeu_pd
148 #define gmx_add_pr _mm_add_pd
149 #define gmx_sub_pr _mm_sub_pd
150 #define gmx_mul_pr _mm_mul_pd
151 #define gmx_max_pr _mm_max_pd
152 #define gmx_cmplt_pr _mm_cmplt_pd
153 #define gmx_and_pr _mm_and_pd
154 #define gmx_or_pr _mm_or_pd
155 #define gmx_andnot_pr _mm_andnot_pd
157 #define gmx_floor_pr _mm_floor_pd
158 #define gmx_blendv_pr _mm_blendv_pd
160 #define gmx_movemask_pr _mm_movemask_pd
162 #define gmx_mm_castsi128_pr gmx_mm_castsi128_pd
164 #define gmx_cvttpr_epi32 _mm_cvttpd_epi32
165 #define gmx_cvtepi32_pr _mm_cvtepi32_pd
167 #define gmx_invsqrt_pr gmx_mm_invsqrt_pd
168 #define gmx_calc_rsq_pr gmx_mm_calc_rsq_pd
169 #define gmx_sum4_pr gmx_mm_sum4_pd
171 #endif /* ifndef GMX_DOUBLE */
173 #endif /* GMX_MM128_HERE */
175 #ifdef GMX_MM256_HERE
177 #define gmx_epi32 __m256i
179 #ifndef GMX_DOUBLE
181 #include "gmx_x86_simd_single.h"
183 #define GMX_X86_SIMD_WIDTH_HERE 8
185 #define gmx_mm_pr __m256
187 #define gmx_load_pr _mm256_load_ps
188 #define gmx_load1_pr(x) _mm256_set1_ps((x)[0])
189 #define gmx_set1_pr _mm256_set1_ps
190 #define gmx_setzero_pr _mm256_setzero_ps
191 #define gmx_store_pr _mm256_store_ps
192 #define gmx_storeu_pr _mm256_storeu_ps
194 #define gmx_add_pr _mm256_add_ps
195 #define gmx_sub_pr _mm256_sub_ps
196 #define gmx_mul_pr _mm256_mul_ps
197 #define gmx_max_pr _mm256_max_ps
198 /* Not-equal (ordered, non-signaling) */
199 #define gmx_cmpneq_pr(x,y) _mm256_cmp_ps(x,y,0x0c)
200 /* Less-than (ordered, non-signaling) */
201 #define gmx_cmplt_pr(x,y) _mm256_cmp_ps(x,y,0x11)
202 #define gmx_and_pr _mm256_and_ps
203 #define gmx_or_pr _mm256_or_ps
204 #define gmx_andnot_pr _mm256_andnot_ps
206 #define gmx_floor_pr _mm256_floor_ps
207 #define gmx_blendv_pr _mm256_blendv_ps
209 #define gmx_movemask_pr _mm256_movemask_ps
211 #define gmx_mm_castsi256_pr _mm256_castsi256_ps
213 #define gmx_cvttpr_epi32 _mm256_cvttps_epi32
215 #define gmx_invsqrt_pr gmx_mm256_invsqrt_ps
216 #define gmx_calc_rsq_pr gmx_mm256_calc_rsq_ps
217 #define gmx_sum4_pr gmx_mm256_sum4_ps
219 #else
221 #include "gmx_x86_simd_double.h"
223 #define GMX_X86_SIMD_WIDTH_HERE 4
225 #define gmx_mm_pr __m256d
227 #define gmx_load_pr _mm256_load_pd
228 #define gmx_load1_pr(x) _mm256_set1_pd((x)[0])
229 #define gmx_set1_pr _mm256_set1_pd
230 #define gmx_setzero_pr _mm256_setzero_pd
231 #define gmx_store_pr _mm256_store_pd
232 #define gmx_storeu_pr _mm256_storeu_pd
234 #define gmx_add_pr _mm256_add_pd
235 #define gmx_sub_pr _mm256_sub_pd
236 #define gmx_mul_pr _mm256_mul_pd
237 #define gmx_max_pr _mm256_max_pd
238 /* Not-equal (ordered, non-signaling) */
239 #define gmx_cmpneq_pr(x,y) _mm256_cmp_pd(x,y,0x0c)
240 /* Less-than (ordered, non-signaling) */
241 #define gmx_cmplt_pr(x,y) _mm256_cmp_pd(x,y,0x11)
242 #define gmx_and_pr _mm256_and_pd
243 #define gmx_or_pr _mm256_or_pd
244 #define gmx_andnot_pr _mm256_andnot_pd
246 #define gmx_floor_pr _mm256_floor_pd
247 #define gmx_blendv_pr _mm256_blendv_pd
249 #define gmx_movemask_pr _mm256_movemask_pd
251 #define gmx_mm_castsi256_pr _mm256_castsi256_pd
253 #define gmx_cvttpr_epi32 _mm256_cvttpd_epi32
255 #define gmx_invsqrt_pr gmx_mm256_invsqrt_pd
256 #define gmx_calc_rsq_pr gmx_mm256_calc_rsq_pd
257 #define gmx_sum4_pr gmx_mm256_sum4_pd
259 #endif
261 #endif /* GMX_MM256_HERE */