introduced general 4-wide SIMD support
[gromacs.git] / include / gmx_simd_ref.h
blob082132ed5c7cea2e461252a896933e421a2fe4e4
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2012, The GROMACS Development Team
6 * Copyright (c) 2012,2013, by the GROMACS development team, led by
7 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
8 * others, as listed in the AUTHORS file in the top-level source
9 * directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 #ifndef _gmx_simd_ref_h_
39 #define _gmx_simd_ref_h_
41 /* This file contains a reference plain-C implementation of arbitrary width.
42 * This code is only useful for testing and documentation.
43 * The SIMD width is set by defining GMX_SIMD_REF_WIDTH before including.
47 #ifndef GMX_SIMD_REF_WIDTH
48 #error "GMX_SIMD_REF_WIDTH should be defined before including gmx_simd_ref.h"
49 #endif
51 #include <math.h>
53 /* float/double SIMD register type */
54 typedef struct {
55 real r[GMX_SIMD_REF_WIDTH];
56 } gmx_simd_ref_pr;
58 /* boolean SIMD register type */
59 typedef struct {
60 char r[GMX_SIMD_REF_WIDTH];
61 } gmx_simd_ref_pb;
63 /* integer SIMD register type, only for table indexing and exclusion masks */
64 typedef struct {
65 int r[GMX_SIMD_REF_WIDTH];
66 } gmx_simd_ref_epi32;
67 #define GMX_SIMD_REF_EPI32_WIDTH GMX_SIMD_REF_WIDTH
69 /* Load GMX_SIMD_REF_WIDTH reals for memory starting at r */
70 static gmx_inline gmx_simd_ref_pr
71 gmx_simd_ref_load_pr(const real *r)
73 gmx_simd_ref_pr a;
74 int i;
76 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
78 a.r[i] = r[i];
81 return a;
84 /* Set all SIMD register elements to *r */
85 static gmx_inline gmx_simd_ref_pr
86 gmx_simd_ref_load1_pr(const real *r)
88 gmx_simd_ref_pr a;
89 int i;
91 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
93 a.r[i] = *r;
96 return a;
99 /* Set all SIMD register elements to r */
100 static gmx_inline gmx_simd_ref_pr
101 gmx_simd_ref_set1_pr(real r)
103 gmx_simd_ref_pr a;
104 int i;
106 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
108 a.r[i] = r;
111 return a;
114 /* Set all SIMD register elements to 0 */
115 static gmx_inline gmx_simd_ref_pr
116 gmx_simd_ref_setzero_pr()
118 gmx_simd_ref_pr a;
119 int i;
121 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
123 a.r[i] = 0.0;
126 return a;
129 static gmx_inline void
130 gmx_simd_ref_store_pr(real *dest, gmx_simd_ref_pr src)
132 int i;
134 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
136 dest[i] = src.r[i];
140 static gmx_inline gmx_simd_ref_pr
141 gmx_simd_ref_add_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
143 gmx_simd_ref_pr c;
144 int i;
146 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
148 c.r[i] = a.r[i] + b.r[i];
151 return c;
154 static gmx_inline gmx_simd_ref_pr
155 gmx_simd_ref_sub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
157 gmx_simd_ref_pr c;
158 int i;
160 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
162 c.r[i] = a.r[i] - b.r[i];
165 return c;
168 static gmx_inline gmx_simd_ref_pr
169 gmx_simd_ref_mul_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
171 gmx_simd_ref_pr c;
172 int i;
174 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
176 c.r[i] = a.r[i]*b.r[i];
179 return c;
182 static gmx_inline gmx_simd_ref_pr
183 gmx_simd_ref_madd_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
185 gmx_simd_ref_pr d;
186 int i;
188 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
190 d.r[i] = a.r[i]*b.r[i] + c.r[i];
193 return d;
196 static gmx_inline gmx_simd_ref_pr
197 gmx_simd_ref_nmsub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
199 gmx_simd_ref_pr d;
200 int i;
202 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
204 d.r[i] = -a.r[i]*b.r[i] + c.r[i];
207 return d;
210 static gmx_inline gmx_simd_ref_pr
211 gmx_simd_ref_max_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
213 gmx_simd_ref_pr c;
214 int i;
216 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
218 c.r[i] = (a.r[i] >= b.r[i] ? a.r[i] : b.r[i]);
221 return c;
224 static gmx_inline gmx_simd_ref_pr
225 gmx_simd_ref_blendzero_pr(gmx_simd_ref_pr a, gmx_simd_ref_pb b)
227 gmx_simd_ref_pr c;
228 int i;
230 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
232 c.r[i] = (b.r[i] ? a.r[i] : 0.0);
235 return c;
238 /* Note that this reference implementation rounds away from zero,
239 * whereas most SIMD intrinsics will round to nearest even. Since this
240 * function is only used for periodic image calculations, the rounding
241 * of mantissas close to 0.5 is irrelevant, except in testing. This
242 * could be fixed by using rint/rintf, but the bigger problem is that
243 * MSVC does not support full C99, and none of the round or rint
244 * functions are defined. It's much easier to approximately implement
245 * round() than rint(), so we do that and hope we never get bitten in
246 * testing. (Thanks, Microsoft.)
248 static gmx_inline gmx_simd_ref_pr
249 gmx_simd_ref_round_pr(gmx_simd_ref_pr a)
251 gmx_simd_ref_pr b;
252 int i;
254 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
256 #ifdef _MSC_VER
257 int temp = (a.r[i] >= 0.)
258 ? (a.r[i] + 0.5)
259 : (a.r[i] - 0.5);
260 b.r[i] = (real) temp;
261 #elif defined GMX_DOUBLE
262 b.r[i] = round(a.r[i]);
263 #else
264 b.r[i] = roundf(a.r[i]);
265 #endif
268 return b;
271 /* Not required, only used to speed up the nbnxn tabulated PME kernels */
272 static gmx_inline gmx_simd_ref_pr
273 gmx_simd_ref_floor_pr(gmx_simd_ref_pr a)
275 gmx_simd_ref_pr b;
276 int i;
278 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
280 #ifdef GMX_DOUBLE
281 b.r[i] = floor(a.r[i]);
282 #else
283 b.r[i] = floorf(a.r[i]);
284 #endif
287 return b;
290 /* Not required, only used when blendv is faster than comparison */
291 static gmx_inline gmx_simd_ref_pr
292 gmx_simd_ref_blendv_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
294 gmx_simd_ref_pr d;
295 int i;
297 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
299 d.r[i] = (c.r[i] >= 0) ? a.r[i] : b.r[i];
302 return d;
305 /* Copy the sign of a to b, assumes b >= 0 for efficiency */
306 static gmx_inline gmx_simd_ref_pr
307 gmx_simd_ref_cpsgn_nonneg_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
309 gmx_simd_ref_pr c;
310 int i;
312 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
314 c.r[i] = (a.r[i] >= 0) ? b.r[i] : -b.r[i];
317 return c;
320 /* Very specific operation required in the non-bonded kernels */
321 static gmx_inline gmx_simd_ref_pr
322 gmx_simd_ref_masknot_add_pr(gmx_simd_ref_pb a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
324 gmx_simd_ref_pr d;
325 int i;
327 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
329 d.r[i] = a.r[i] ? b.r[i] : b.r[i] + c.r[i];
332 return d;
335 /* Comparison */
336 static gmx_inline gmx_simd_ref_pb
337 gmx_simd_ref_cmplt_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
339 gmx_simd_ref_pb c;
340 int i;
342 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
344 c.r[i] = (a.r[i] < b.r[i]);
347 return c;
350 /* Logical AND on SIMD booleans */
351 static gmx_inline gmx_simd_ref_pb
352 gmx_simd_ref_and_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
354 gmx_simd_ref_pb c;
355 int i;
357 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
359 c.r[i] = (a.r[i] && b.r[i]);
362 return c;
365 /* Logical OR on SIMD booleans */
366 static gmx_inline gmx_simd_ref_pb
367 gmx_simd_ref_or_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
369 gmx_simd_ref_pb c;
370 int i;
372 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
374 c.r[i] = (a.r[i] || b.r[i]);
377 return c;
380 /* Returns a single int (0/1) which tells if any of the booleans is True */
381 static gmx_inline int
382 gmx_simd_ref_anytrue_pb(gmx_simd_ref_pb a)
384 int anytrue;
385 int i;
387 anytrue = 0;
388 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
390 if (a.r[i])
392 anytrue = 1;
396 return anytrue;
399 /* Conversions only used for PME table lookup */
400 static gmx_inline gmx_simd_ref_epi32
401 gmx_simd_ref_cvttpr_epi32(gmx_simd_ref_pr a)
403 gmx_simd_ref_epi32 b;
404 int i;
406 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
408 b.r[i] = (int)a.r[i];
411 return b;
414 /* These two function only need to be approximate, Newton-Raphson iteration
415 * is used for full accuracy in gmx_invsqrt_pr and gmx_inv_pr.
417 static gmx_inline gmx_simd_ref_pr
418 gmx_simd_ref_rsqrt_pr(gmx_simd_ref_pr a)
420 gmx_simd_ref_pr b;
421 int i;
423 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
425 #ifdef GMX_DOUBLE
426 b.r[i] = 1.0/sqrt(a.r[i]);
427 #else
428 b.r[i] = 1.0/sqrtf(a.r[i]);
429 #endif
432 return b;
435 static gmx_inline gmx_simd_ref_pr
436 gmx_simd_ref_rcp_pr(gmx_simd_ref_pr a)
438 gmx_simd_ref_pr b;
439 int i;
441 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
443 b.r[i] = 1.0/a.r[i];
446 return b;
449 static gmx_inline gmx_simd_ref_pr
450 gmx_simd_ref_exp_pr(gmx_simd_ref_pr a)
452 gmx_simd_ref_pr b;
453 int i;
455 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
457 #ifdef GMX_DOUBLE
458 b.r[i] = exp(a.r[i]);
459 #else
460 b.r[i] = expf(a.r[i]);
461 #endif
464 return b;
467 static gmx_inline gmx_simd_ref_pr
468 gmx_simd_ref_sqrt_pr(gmx_simd_ref_pr a)
470 gmx_simd_ref_pr b;
471 int i;
473 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
475 #ifdef GMX_DOUBLE
476 b.r[i] = sqrt(a.r[i]);
477 #else
478 b.r[i] = sqrtf(a.r[i]);
479 #endif
482 return b;
485 static gmx_inline int
486 gmx_simd_ref_sincos_pr(gmx_simd_ref_pr a,
487 gmx_simd_ref_pr *s, gmx_simd_ref_pr *c)
489 int i;
491 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
493 s->r[i] = sin(a.r[i]);
494 c->r[i] = cos(a.r[i]);
497 return 0;
500 static gmx_inline gmx_simd_ref_pr
501 gmx_simd_ref_acos_pr(gmx_simd_ref_pr a)
503 gmx_simd_ref_pr b;
504 int i;
506 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
508 b.r[i] = acos(a.r[i]);
511 return b;
514 static gmx_inline gmx_simd_ref_pr
515 gmx_simd_ref_atan2_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
517 gmx_simd_ref_pr c;
518 int i;
520 for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
522 c.r[i] = atan2(a.r[i], b.r[i]);
525 return c;
528 #endif /* _gmx_simd_ref_h_ */