Split SIMD implementations into 4 files
[gromacs.git] / src / gromacs / simd / simd.h
blobe6ef8753dd98f0c1750ef29ff1c9cae81a7350dd
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal
37 * \defgroup module_simd SIMD intrinsics interface (simd)
38 * \ingroup group_utilitymodules
40 * \brief Provides an architecture-independent way of doing SIMD coding.
42 * Overview of the SIMD implementation is provided in \ref page_simd.
43 * The details are documented in simd.h and the reference implementation
44 * impl_reference.h.
46 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
49 #ifndef GMX_SIMD_SIMD_H
50 #define GMX_SIMD_SIMD_H
52 /*! \libinternal \file
54 * \brief Definitions, capabilities, and wrappers for SIMD module.
56 * The macros in this file are intended to be used for writing
57 * architecture-independent SIMD intrinsics code.
58 * To support a new architecture, adding a new sub-include with macros here
59 * should be (nearly) all that is needed.
61 * The defines in this top-level file will set default Gromacs real precision
62 * operations to either single or double precision based on whether
63 * GMX_DOUBLE is defined. The actual implementation - including e.g.
64 * conversion operations specifically between single and double - is documented
65 * in impl_reference.h.
67 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
69 * \inlibraryapi
70 * \ingroup module_simd
73 #include "config.h"
75 #include <stddef.h>
77 #include "gromacs/utility/basedefinitions.h"
79 /* Forward declarations so memory allocation can be used in implementations */
80 static gmx_inline float * gmx_simd_align_f(float *p);
81 static gmx_inline double * gmx_simd_align_d(double *p);
82 static gmx_inline int * gmx_simd_align_fi(int *p);
83 static gmx_inline int * gmx_simd_align_di(int *p);
84 static gmx_inline float * gmx_simd4_align_f(float *p);
85 static gmx_inline double * gmx_simd4_align_d(double *p);
87 /*! \cond libapi */
88 /*! \addtogroup module_simd */
89 /*! \{ */
91 /*! \name SIMD predefined macros to describe high-level capabilities
93 * These macros are used to describe the features available in default
94 * Gromacs real precision. They are set from the lower-level implementation
95 * files that have macros describing single and double precision individually,
96 * as well as the implementation details.
97 * \{
100 /* Intel MIC is a bit special since it is a co-processor. This means the rest
101 * of GROMACS (which runs on the CPU) can use a default SIMD set like AVX.
102 * All functions in this SIMD module are static, so it will work perfectly fine
103 * to include this file with different SIMD definitions for different files.
105 #if GMX_SIMD_X86_AVX_512ER
106 # include "impl_x86_avx_512er/impl_x86_avx_512er.h"
107 #elif GMX_SIMD_X86_AVX_512F
108 # include "impl_x86_avx_512f/impl_x86_avx_512f.h"
109 #elif GMX_SIMD_X86_MIC
110 # include "impl_intel_mic/impl_intel_mic.h"
111 #elif GMX_SIMD_X86_AVX2_256
112 # include "impl_x86_avx2_256/impl_x86_avx2_256.h"
113 #elif GMX_SIMD_X86_AVX_256
114 # include "impl_x86_avx_256/impl_x86_avx_256.h"
115 #elif GMX_SIMD_X86_AVX_128_FMA
116 # include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
117 #elif GMX_SIMD_X86_SSE4_1
118 # include "impl_x86_sse4_1/impl_x86_sse4_1.h"
119 #elif GMX_SIMD_X86_SSE2
120 # include "impl_x86_sse2/impl_x86_sse2.h"
121 #elif GMX_SIMD_ARM_NEON
122 # include "impl_arm_neon/impl_arm_neon.h"
123 #elif GMX_SIMD_ARM_NEON_ASIMD
124 # include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
125 #elif GMX_SIMD_IBM_QPX
126 # include "impl_ibm_qpx/impl_ibm_qpx.h"
127 #elif GMX_SIMD_IBM_VMX
128 # include "impl_ibm_vmx/impl_ibm_vmx.h"
129 #elif GMX_SIMD_IBM_VSX
130 # include "impl_ibm_vsx/impl_ibm_vsx.h"
131 #elif GMX_SIMD_SPARC64_HPC_ACE
132 # include "impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h"
133 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
134 /* Plain C SIMD reference implementation, also serves as documentation. */
135 # include "impl_reference/impl_reference.h"
136 #else
137 # include "impl_none/impl_none.h"
138 #endif
140 /* These convenience macros are ugly hacks where some source files still make
141 * assumptions about the SIMD architecture. They will be removed as we implement
142 * the new verlet kernels, but for now we need them, and to make sure they
143 * always have values 0 or 1 we define them here rather than in the implementations.
145 #define GMX_SIMD_X86_AVX2_256_OR_HIGHER (GMX_SIMD_X86_AVX2_256)
146 #define GMX_SIMD_X86_AVX_256_OR_HIGHER (GMX_SIMD_X86_AVX2_256_OR_HIGHER || GMX_SIMD_X86_AVX_256)
147 #define GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER (GMX_SIMD_X86_AVX_128_FMA)
148 #define GMX_SIMD_X86_SSE4_1_OR_HIGHER (GMX_SIMD_X86_AVX_256_OR_HIGHER || GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER || GMX_SIMD_X86_SSE4_1)
149 #define GMX_SIMD_X86_SSE2_OR_HIGHER (GMX_SIMD_X86_SSE4_1_OR_HIGHER || GMX_SIMD_X86_SSE2)
151 /*! \} */
153 /*! \name SIMD memory alignment operations
154 * \{
157 /*! \brief
158 * Align a float pointer for usage with SIMD instructions.
160 * You should typically \a not call this function directly (unless you explicitly
161 * want single precision even when GMX_DOUBLE is set), but use the
162 * \ref gmx_simd_align_r macro to align memory in default Gromacs real precision.
164 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_FLOAT_WIDTH extra elements.
166 * \return Aligned pointer (>=p) suitable for loading/storing float fp SIMD.
167 * If \ref GMX_SIMD_HAVE_FLOAT is not set, p will be returned unchanged.
169 * Start by allocating an extra \ref GMX_SIMD_FLOAT_WIDTH float elements of memory,
170 * and then call this function. The returned pointer will be greater or equal
171 * to the one you provided, and point to an address inside your provided memory
172 * that is aligned to the SIMD width.
174 static gmx_inline float *
175 gmx_simd_align_f(float *p)
177 #if GMX_SIMD_HAVE_FLOAT
178 return (float *)(((size_t)((p)+GMX_SIMD_FLOAT_WIDTH-1)) & (~((size_t)(GMX_SIMD_FLOAT_WIDTH*sizeof(float)-1))));
179 #else
180 return p;
181 #endif
184 /*! \brief
185 * Align a double pointer for usage with SIMD instructions.
187 * You should typically \a not call this function directly (unless you explicitly
188 * want double precision even when GMX_DOUBLE is not set), but use the
189 * \ref gmx_simd_align_r macro to align memory in default Gromacs real precision.
191 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_DOUBLE_WIDTH extra elements.
193 * \return Aligned pointer (>=p) suitable for loading/storing double fp SIMD.
194 * If \ref GMX_SIMD_HAVE_DOUBLE is not set, p will be returned unchanged.
196 * Start by allocating an extra \ref GMX_SIMD_DOUBLE_WIDTH double elements of memory,
197 * and then call this function. The returned pointer will be greater or equal
198 * to the one you provided, and point to an address inside your provided memory
199 * that is aligned to the SIMD width.
201 static gmx_inline double *
202 gmx_simd_align_d(double *p)
204 #if GMX_SIMD_HAVE_DOUBLE
205 return (double *)(((size_t)((p)+GMX_SIMD_DOUBLE_WIDTH-1)) & (~((size_t)(GMX_SIMD_DOUBLE_WIDTH*sizeof(double)-1))));
206 #else
207 return p;
208 #endif
211 /*! \brief
212 * Align a (float) integer pointer for usage with SIMD instructions.
214 * You should typically \a not call this function directly (unless you explicitly
215 * want integers corresponding to single precision even when GMX_DOUBLE is
216 * set), but use the \ref gmx_simd_align_i macro to align integer memory
217 * corresponding to Gromacs default floating-point precision.
219 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_FINT32_WIDTH extra elements.
221 * \return Aligned pointer (>=p) suitable for loading/storing float-integer SIMD.
222 * If \ref GMX_SIMD_HAVE_FINT32 is not set, p will be returned unchanged.
224 * This routine provides aligned memory for usage with \ref gmx_simd_fint32_t. You
225 * should have allocated an extra \ref GMX_SIMD_FINT32_WIDTH * sizeof(int) bytes. The
226 * reason why we need to separate float-integer vs. double-integer is that the
227 * width of registers after conversions from the floating-point types might not
228 * be identical, or even supported, in both cases.
230 static gmx_inline int *
231 gmx_simd_align_fi(int *p)
233 #if GMX_SIMD_HAVE_FINT32
234 return (int *)(((size_t)((p)+GMX_SIMD_FINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_FINT32_WIDTH*sizeof(int)-1))));
235 #else
236 return p;
237 #endif
240 /*! \brief
241 * Align a (double) integer pointer for usage with SIMD instructions.
243 * You should typically \a not call this function directly (unless you explicitly
244 * want integers corresponding to doublele precision even when GMX_DOUBLE is
245 * not set), but use the \ref gmx_simd_align_i macro to align integer memory
246 * corresponding to Gromacs default floating-point precision.
248 * \param p Pointer to memory, allocate at least \ref GMX_SIMD_DINT32_WIDTH extra elements.
250 * \return Aligned pointer (>=p) suitable for loading/storing double-integer SIMD.
251 * If \ref GMX_SIMD_HAVE_DINT32 is not set, p will be returned unchanged.
253 * This routine provides aligned memory for usage with \ref gmx_simd_dint32_t. You
254 * should have allocated an extra \ref GMX_SIMD_DINT32_WIDTH*sizeof(int) bytes. The
255 * reason why we need to separate float-integer vs. double-integer is that the
256 * width of registers after conversions from the floating-point types might not
257 * be identical, or even supported, in both cases.
259 static gmx_inline int *
260 gmx_simd_align_di(int *p)
262 #if GMX_SIMD_HAVE_DINT32
263 return (int *)(((size_t)((p)+GMX_SIMD_DINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_DINT32_WIDTH*sizeof(int)-1))));
264 #else
265 return p;
266 #endif
269 /*! \brief
270 * Align a float pointer for usage with SIMD4 instructions.
272 * You should typically \a not call this function directly (unless you explicitly
273 * want single precision even when GMX_DOUBLE is set), but use the
274 * \ref gmx_simd4_align_r macro to align memory in default Gromacs real precision.
276 * \param p Pointer to memory, allocate at least \ref GMX_SIMD4_WIDTH extra elements.
278 * \return Aligned pointer (>=p) suitable for loading/storing float SIMD.
279 * If \ref GMX_SIMD4_HAVE_FLOAT is not set, p will be returned unchanged.
281 * This routine provides aligned memory for usage with \ref gmx_simd4_float_t.
282 * should have allocated an extra \ref GMX_SIMD4_WIDTH * sizeof(float) bytes.
284 static gmx_inline float *
285 gmx_simd4_align_f(float *p)
287 #if GMX_SIMD4_HAVE_FLOAT
288 return (float *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(float)-1))));
289 #else
290 return p;
291 #endif
294 /*! \brief
295 * Align a double pointer for usage with SIMD4 instructions.
297 * You should typically \a not call this function directly (unless you explicitly
298 * want double precision even when GMX_DOUBLE is not set), but use the
299 * \ref gmx_simd4_align_r macro to align memory in default Gromacs real precision.
301 * \param p Pointer to memory, allocate at least \ref GMX_SIMD4_WIDTH extra elements.
303 * \return Aligned pointer (>=p) suitable for loading/storing float SIMD.
304 * If \ref GMX_SIMD4_HAVE_DOUBLE is not set, p will be returned unchanged.
306 * This routine provides aligned memory for usage with \ref gmx_simd4_double_t.
307 * should have allocated an extra \ref GMX_SIMD4_WIDTH * sizeof(double) bytes.
309 static gmx_inline double *
310 gmx_simd4_align_d(double *p)
312 #if GMX_SIMD4_HAVE_DOUBLE
313 return (double *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(double)-1))));
314 #else
315 return p;
316 #endif
319 /*! \} */
322 /* Define Gromacs "real" precision macros depending on Gromacs config. Note
323 * that conversions float-to-double and v.v. are not included here since they
324 * are not precision-dependent - find them in the implementation files.
326 #ifdef GMX_DOUBLE
327 /* Double floating-point. The documentation is in the float part below */
328 # define gmx_simd_real_t gmx_simd_double_t
329 # define gmx_simd_load_r gmx_simd_load_d
330 # define gmx_simd_load1_r gmx_simd_load1_d
331 # define gmx_simd_set1_r gmx_simd_set1_d
332 # define gmx_simd_store_r gmx_simd_store_d
333 # define gmx_simd_loadu_r gmx_simd_loadu_d
334 # define gmx_simd_storeu_r gmx_simd_storeu_d
335 # define gmx_simd_setzero_r gmx_simd_setzero_d
336 # define gmx_simd_add_r gmx_simd_add_d
337 # define gmx_simd_sub_r gmx_simd_sub_d
338 # define gmx_simd_mul_r gmx_simd_mul_d
339 # define gmx_simd_fmadd_r gmx_simd_fmadd_d
340 # define gmx_simd_fmsub_r gmx_simd_fmsub_d
341 # define gmx_simd_fnmadd_r gmx_simd_fnmadd_d
342 # define gmx_simd_fnmsub_r gmx_simd_fnmsub_d
343 # define gmx_simd_and_r gmx_simd_and_d
344 # define gmx_simd_andnot_r gmx_simd_andnot_d
345 # define gmx_simd_or_r gmx_simd_or_d
346 # define gmx_simd_xor_r gmx_simd_xor_d
347 # define gmx_simd_rsqrt_r gmx_simd_rsqrt_d
348 # define gmx_simd_rcp_r gmx_simd_rcp_d
349 # define gmx_simd_fabs_r gmx_simd_fabs_d
350 # define gmx_simd_fneg_r gmx_simd_fneg_d
351 # define gmx_simd_max_r gmx_simd_max_d
352 # define gmx_simd_min_r gmx_simd_min_d
353 # define gmx_simd_round_r gmx_simd_round_d
354 # define gmx_simd_trunc_r gmx_simd_trunc_d
355 # define gmx_simd_fraction_r gmx_simd_fraction_d
356 # define gmx_simd_get_exponent_r gmx_simd_get_exponent_d
357 # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_d
358 # define gmx_simd_set_exponent_r gmx_simd_set_exponent_d
359 /* Double integer and conversions */
360 # define gmx_simd_int32_t gmx_simd_dint32_t
361 # define gmx_simd_load_i gmx_simd_load_di
362 # define gmx_simd_set1_i gmx_simd_set1_di
363 # define gmx_simd_store_i gmx_simd_store_di
364 # define gmx_simd_loadu_i gmx_simd_loadu_di
365 # define gmx_simd_storeu_i gmx_simd_storeu_di
366 # define gmx_simd_setzero_i gmx_simd_setzero_di
367 # define gmx_simd_cvt_r2i gmx_simd_cvt_d2i
368 # define gmx_simd_cvtt_r2i gmx_simd_cvtt_d2i
369 # define gmx_simd_cvt_i2r gmx_simd_cvt_i2d
370 # define gmx_simd_extract_i gmx_simd_extract_di
371 # define gmx_simd_slli_i gmx_simd_slli_di
372 # define gmx_simd_srli_i gmx_simd_srli_di
373 # define gmx_simd_and_i gmx_simd_and_di
374 # define gmx_simd_andnot_i gmx_simd_andnot_di
375 # define gmx_simd_or_i gmx_simd_or_di
376 # define gmx_simd_xor_i gmx_simd_xor_di
377 # define gmx_simd_add_i gmx_simd_add_di
378 # define gmx_simd_sub_i gmx_simd_sub_di
379 # define gmx_simd_mul_i gmx_simd_mul_di
380 /* Double booleans and selection */
381 # define gmx_simd_bool_t gmx_simd_dbool_t
382 # define gmx_simd_cmpeq_r gmx_simd_cmpeq_d
383 # define gmx_simd_cmplt_r gmx_simd_cmplt_d
384 # define gmx_simd_cmple_r gmx_simd_cmple_d
385 # define gmx_simd_and_b gmx_simd_and_db
386 # define gmx_simd_or_b gmx_simd_or_db
387 # define gmx_simd_anytrue_b gmx_simd_anytrue_db
388 # define gmx_simd_blendzero_r gmx_simd_blendzero_d
389 # define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_d
390 # define gmx_simd_blendv_r gmx_simd_blendv_d
391 # define gmx_simd_reduce_r gmx_simd_reduce_d
392 # define gmx_simd_ibool_t gmx_simd_dibool_t
393 # define gmx_simd_cmpeq_i gmx_simd_cmpeq_di
394 # define gmx_simd_cmplt_i gmx_simd_cmplt_di
395 # define gmx_simd_and_ib gmx_simd_and_dib
396 # define gmx_simd_or_ib gmx_simd_or_dib
397 # define gmx_simd_anytrue_ib gmx_simd_anytrue_dib
398 # define gmx_simd_blendzero_i gmx_simd_blendzero_di
399 # define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_di
400 # define gmx_simd_blendv_i gmx_simd_blendv_di
401 /* Conversions between integer and double floating-point booleans */
402 # define gmx_simd_cvt_b2ib gmx_simd_cvt_db2dib
403 # define gmx_simd_cvt_ib2b gmx_simd_cvt_dib2db
405 /* SIMD4 double fp - we only support a subset of SIMD instructions for SIMD4 */
406 # define gmx_simd4_real_t gmx_simd4_double_t
407 # define gmx_simd4_load_r gmx_simd4_load_d
408 # define gmx_simd4_load1_r gmx_simd4_load1_d
409 # define gmx_simd4_set1_r gmx_simd4_set1_d
410 # define gmx_simd4_store_r gmx_simd4_store_d
411 # define gmx_simd4_loadu_r gmx_simd4_loadu_d
412 # define gmx_simd4_storeu_r gmx_simd4_storeu_d
413 # define gmx_simd4_setzero_r gmx_simd4_setzero_d
414 # define gmx_simd4_add_r gmx_simd4_add_d
415 # define gmx_simd4_sub_r gmx_simd4_sub_d
416 # define gmx_simd4_mul_r gmx_simd4_mul_d
417 # define gmx_simd4_fmadd_r gmx_simd4_fmadd_d
418 # define gmx_simd4_fmsub_r gmx_simd4_fmsub_d
419 # define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_d
420 # define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_d
421 # define gmx_simd4_and_r gmx_simd4_and_d
422 # define gmx_simd4_andnot_r gmx_simd4_andnot_d
423 # define gmx_simd4_or_r gmx_simd4_or_d
424 # define gmx_simd4_xor_r gmx_simd4_xor_d
425 # define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_d
426 # define gmx_simd4_fabs_r gmx_simd4_fabs_d
427 # define gmx_simd4_fneg_r gmx_simd4_fneg_d
428 # define gmx_simd4_max_r gmx_simd4_max_d
429 # define gmx_simd4_min_r gmx_simd4_min_d
430 # define gmx_simd4_round_r gmx_simd4_round_d
431 # define gmx_simd4_trunc_r gmx_simd4_trunc_d
432 # define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_d
433 # define gmx_simd4_bool_t gmx_simd4_dbool_t
434 # define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_d
435 # define gmx_simd4_cmplt_r gmx_simd4_cmplt_d
436 # define gmx_simd4_cmple_r gmx_simd4_cmple_d
437 # define gmx_simd4_and_b gmx_simd4_and_db
438 # define gmx_simd4_or_b gmx_simd4_or_db
439 # define gmx_simd4_anytrue_b gmx_simd4_anytrue_db
440 # define gmx_simd4_blendzero_r gmx_simd4_blendzero_d
441 # define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_d
442 # define gmx_simd4_blendv_r gmx_simd4_blendv_d
443 # define gmx_simd4_reduce_r gmx_simd4_reduce_d
445 /* Memory allocation */
446 # define gmx_simd_align_r gmx_simd_align_d
447 # define gmx_simd_align_i gmx_simd_align_di
448 # define gmx_simd4_align_r gmx_simd4_align_d
450 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE
451 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
452 # define GMX_SIMD_HAVE_INT32 GMX_SIMD_HAVE_DINT32
453 # define GMX_SIMD_INT32_WIDTH GMX_SIMD_DINT32_WIDTH
454 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT
455 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL
456 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS
457 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE
459 #else /* GMX_DOUBLE */
461 /*! \name SIMD data types
463 * The actual storage of these types is implementation dependent. The
464 * documentation is generated from the reference implementation, but for
465 * normal usage this will likely not be what you are using.
466 * \{
468 /*! \brief Real precision floating-point SIMD datatype.
470 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
472 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_double_t
473 * internally, otherwise \ref gmx_simd_float_t.
475 # define gmx_simd_real_t gmx_simd_float_t
477 /*! \brief 32-bit integer SIMD type.
479 * This type is only available if \ref GMX_SIMD_HAVE_INT32 is 1.
481 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dint32_t
482 * internally, otherwise \ref gmx_simd_fint32_t. This might seem a strange
483 * implementation detail, but it is because some SIMD implementations use
484 * different types/widths of integers registers when converting from
485 * double vs. single precision floating point. As long as you just use
486 * this type you will not have to worry about precision.
488 # define gmx_simd_int32_t gmx_simd_fint32_t
490 /*! \brief Boolean SIMD type for usage with \ref gmx_simd_real_t.
492 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
494 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dbool_t
495 * internally, otherwise \ref gmx_simd_fbool_t. This is necessary since some
496 * SIMD implementations use bitpatterns for marking truth, so single-
497 * vs. double precision booleans are not necessarily exchangable.
498 * As long as you just use this type you will not have to worry about precision.
500 * See \ref gmx_simd_ibool_t for an explanation of real vs. integer booleans.
502 # define gmx_simd_bool_t gmx_simd_fbool_t
504 /*! \brief Boolean SIMD type for usage with \ref gmx_simd_int32_t.
506 * This type is only available if \ref GMX_SIMD_HAVE_INT32 is 1.
508 * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dibool_t
509 * internally, otherwise \ref gmx_simd_fibool_t. This is necessary since some
510 * SIMD implementations use bitpatterns for marking truth, so single-
511 * vs. double precision booleans are not necessarily exchangable, and while
512 * a double-precision boolean might be represented with a 64-bit mask, the
513 * corresponding integer might only use a 32-bit mask.
515 * We provide conversion routines for these cases, so the only thing you need to
516 * keep in mind is to use \ref gmx_simd_bool_t when working with
517 * \ref gmx_simd_real_t while you pick \ref gmx_simd_ibool_t when working with
518 * \ref gmx_simd_int32_t.
520 * To convert between them, use \ref gmx_simd_cvt_b2ib and \ref gmx_simd_cvt_ib2b.
522 # define gmx_simd_ibool_t gmx_simd_fibool_t
525 /*! \}
526 * \name SIMD load/store operations on gmx_simd_real_t
528 * \note Unaligned load/stores are only available when
529 * \ref GMX_SIMD_HAVE_LOADU and \ref GMX_SIMD_HAVE_STOREU are set, respectively.
530 * \{
533 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from aligned memory to \ref gmx_simd_real_t
535 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load_d,
536 * otherwise \ref gmx_simd_load_f.
538 * \copydetails gmx_simd_load_f
540 # define gmx_simd_load_r gmx_simd_load_f
542 /*! \brief Set all elements in \ref gmx_simd_real_t from single value in memory.
544 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load1_d,
545 * otherwise \ref gmx_simd_load1_f.
547 * \copydetails gmx_simd_load1_f
549 # define gmx_simd_load1_r gmx_simd_load1_f
551 /*! \brief Set all elements in \ref gmx_simd_real_t from a scalar.
553 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set1_d,
554 * otherwise \ref gmx_simd_set1_f.
556 * \copydetails gmx_simd_set1_f
558 # define gmx_simd_set1_r gmx_simd_set1_f
560 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_real_t to aligned memory.
562 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_store_d,
563 * otherwise \ref gmx_simd_store_f.
565 * \copydetails gmx_simd_store_f
567 # define gmx_simd_store_r gmx_simd_store_f
569 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from unaligned memory to \ref gmx_simd_real_t.
571 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_loadu_d,
572 * otherwise \ref gmx_simd_loadu_f.
574 * \copydetails gmx_simd_loadu_f
576 # define gmx_simd_loadu_r gmx_simd_loadu_f
578 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_real_t to unaligned memory.
580 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_storeu_d,
581 * otherwise \ref gmx_simd_storeu_f.
583 * \copydetails gmx_simd_storeu_f
585 # define gmx_simd_storeu_r gmx_simd_storeu_f
587 /*! \brief Set all elements in \ref gmx_simd_real_t to 0.0.
589 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_setzero_d,
590 * otherwise \ref gmx_simd_setzero_f.
592 * \copydetails gmx_simd_setzero_f
594 # define gmx_simd_setzero_r gmx_simd_setzero_f
596 /*! \}
597 * \name SIMD load/store operations on gmx_simd_int32_t
599 * \note Unaligned load/stores are only available when
600 * \ref GMX_SIMD_HAVE_LOADU and \ref GMX_SIMD_HAVE_STOREU are set, respectively.
601 * \{
604 /*! \brief Load \ref GMX_SIMD_INT32_WIDTH values from aligned memory to \ref gmx_simd_int32_t .
606 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_load_di ,
607 * otherwise \ref gmx_simd_load_fi .
609 * \copydetails gmx_simd_load_fi
611 # define gmx_simd_load_i gmx_simd_load_fi
613 /*! \brief Set all elements in \ref gmx_simd_int32_t from a single integer.
615 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set1_di ,
616 * otherwise \ref gmx_simd_set1_fi .
618 * \copydetails gmx_simd_set1_fi
620 # define gmx_simd_set1_i gmx_simd_set1_fi
622 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_int32_t to aligned memory.
624 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_store_di ,
625 * otherwise \ref gmx_simd_store_fi .
627 * \copydetails gmx_simd_store_fi
629 # define gmx_simd_store_i gmx_simd_store_fi
631 /*! \brief Load \ref GMX_SIMD_REAL_WIDTH values from unaligned memory to \ref gmx_simd_int32_t.
633 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_loadu_di ,
634 * otherwise \ref gmx_simd_loadu_fi .
636 * \copydetails gmx_simd_loadu_fi
638 # define gmx_simd_loadu_i gmx_simd_loadu_fi
640 /*! \brief Store \ref GMX_SIMD_REAL_WIDTH values from \ref gmx_simd_int32_t to unaligned memory.
642 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_storeu_di ,
643 * otherwise \ref gmx_simd_storeu_fi .
645 * \copydetails gmx_simd_storeu_fi
647 # define gmx_simd_storeu_i gmx_simd_storeu_fi
649 /*! \brief Extract single integer from \ref gmx_simd_int32_t element.
651 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_extract_di ,
652 * otherwise \ref gmx_simd_extract_fi .
654 * \copydetails gmx_simd_extract_fi
656 # define gmx_simd_extract_i gmx_simd_extract_fi
658 /*! \brief Set all elements in \ref gmx_simd_int32_t to 0.
660 * If GMX_DOUBLE is defined, it will be aliased to \ref gmx_simd_setzero_di ,
661 * otherwise \ref gmx_simd_setzero_fi .
663 * \copydetails gmx_simd_setzero_fi
665 # define gmx_simd_setzero_i gmx_simd_setzero_fi
668 /*! \}
669 * \name SIMD floating-point logical operations on gmx_simd_real_t
671 * These instructions are available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
672 * \{
675 /*! \brief Bitwise \a and on two \ref gmx_simd_real_t.
677 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_d,
678 * otherwise \ref gmx_simd_and_f.
680 * \copydetails gmx_simd_and_f
682 # define gmx_simd_and_r gmx_simd_and_f
684 /*! \brief Bitwise \a and-not on two \ref gmx_simd_real_t; 1st arg is complemented.
686 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_andnot_d,
687 * otherwise \ref gmx_simd_andnot_f.
689 * \copydetails gmx_simd_andnot_f
691 # define gmx_simd_andnot_r gmx_simd_andnot_f
693 /*! \brief Bitwise \a or on two \ref gmx_simd_real_t.
695 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_d,
696 * otherwise \ref gmx_simd_or_f.
698 * \copydetails gmx_simd_or_f
700 # define gmx_simd_or_r gmx_simd_or_f
702 /*! \brief Bitwise \a exclusive-or on two \ref gmx_simd_real_t.
704 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_xor_d,
705 * otherwise \ref gmx_simd_xor_f.
707 * \copydetails gmx_simd_xor_f
709 # define gmx_simd_xor_r gmx_simd_xor_f
711 /*! \}
712 * \name SIMD floating-point arithmetic operations on gmx_simd_real_t
713 * \{
716 /*! \brief SIMD a+b for two \ref gmx_simd_real_t.
718 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_add_d,
719 * otherwise \ref gmx_simd_add_f.
721 * \copydetails gmx_simd_add_f
723 # define gmx_simd_add_r gmx_simd_add_f
725 /*! \brief SIMD a-b for two \ref gmx_simd_real_t.
727 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_sub_d,
728 * otherwise \ref gmx_simd_sub_f.
730 * \copydetails gmx_simd_sub_f
732 # define gmx_simd_sub_r gmx_simd_sub_f
734 /*! \brief SIMD a*b for two \ref gmx_simd_real_t.
736 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_mul_d,
737 * otherwise \ref gmx_simd_mul_f.
739 * \copydetails gmx_simd_mul_f
741 # define gmx_simd_mul_r gmx_simd_mul_f
743 /*! \brief SIMD a*b+c for three \ref gmx_simd_real_t.
745 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fmadd_d,
746 * otherwise \ref gmx_simd_fmadd_f.
748 * \copydetails gmx_simd_fmadd_f
750 # define gmx_simd_fmadd_r gmx_simd_fmadd_f
752 /*! \brief SIMD a*b-c for three \ref gmx_simd_real_t.
754 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fmsub_d,
755 * otherwise \ref gmx_simd_fmsub_f.
757 * \copydetails gmx_simd_fmsub_f
759 # define gmx_simd_fmsub_r gmx_simd_fmsub_f
761 /*! \brief SIMD -a*b+c for three \ref gmx_simd_real_t.
763 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fnmadd_d,
764 * otherwise \ref gmx_simd_fnmadd_f.
766 * \copydetails gmx_simd_fnmadd_f
768 # define gmx_simd_fnmadd_r gmx_simd_fnmadd_f
770 /*! \brief SIMD -a*b-c for three \ref gmx_simd_real_t.
772 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fnmsub_d,
773 * otherwise \ref gmx_simd_fnmsub_f.
775 * \copydetails gmx_simd_fnmsub_f
777 # define gmx_simd_fnmsub_r gmx_simd_fnmsub_f
779 /*! \brief SIMD table lookup for 1/sqrt(x) approximation.
781 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_rsqrt_d,
782 * otherwise \ref gmx_simd_rsqrt_f.
784 * \copydetails gmx_simd_rsqrt_f
786 # define gmx_simd_rsqrt_r gmx_simd_rsqrt_f
788 /*! \brief SIMD table lookup for 1/x approximation.
790 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_rcp_d,
791 * otherwise \ref gmx_simd_rcp_f.
793 * \copydetails gmx_simd_rcp_f
795 # define gmx_simd_rcp_r gmx_simd_rcp_f
797 /*! \brief SIMD fabs(x) for \ref gmx_simd_real_t.
799 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fabs_d,
800 * otherwise \ref gmx_simd_fabs_f.
802 * \copydetails gmx_simd_fabs_f
804 # define gmx_simd_fabs_r gmx_simd_fabs_f
806 /*! \brief SIMD -x for \ref gmx_simd_real_t.
808 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fneg_d,
809 * otherwise \ref gmx_simd_fneg_f.
811 * \copydetails gmx_simd_fneg_f
813 # define gmx_simd_fneg_r gmx_simd_fneg_f
815 /*! \brief SIMD max(a,b) for each element in \ref gmx_simd_real_t.
817 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_max_d,
818 * otherwise \ref gmx_simd_max_f.
820 * \copydetails gmx_simd_max_f
822 # define gmx_simd_max_r gmx_simd_max_f
824 /*! \brief SIMD min(a,b) for each element in \ref gmx_simd_real_t.
826 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_min_d,
827 * otherwise \ref gmx_simd_min_f.
829 * \copydetails gmx_simd_min_f
831 # define gmx_simd_min_r gmx_simd_min_f
833 /*! \brief Round \ref gmx_simd_real_t to nearest int, return \ref gmx_simd_real_t.
835 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_round_d,
836 * otherwise \ref gmx_simd_round_f.
838 * \copydetails gmx_simd_round_f
840 # define gmx_simd_round_r gmx_simd_round_f
842 /*! \brief Truncate \ref gmx_simd_real_t towards 0, return \ref gmx_simd_real_t.
844 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_trunc_d,
845 * otherwise \ref gmx_simd_trunc_f.
847 * \copydetails gmx_simd_trunc_f
849 # define gmx_simd_trunc_r gmx_simd_trunc_f
851 /*! \brief SIMD Fraction, i.e. x-trunc(x) for \ref gmx_simd_real_t.
853 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_fraction_d,
854 * otherwise \ref gmx_simd_fraction_f.
856 * \copydetails gmx_simd_fraction_f
858 # define gmx_simd_fraction_r gmx_simd_fraction_f
860 /*! \brief Return the FP exponent of a SIMD \ref gmx_simd_real_t as a \ref gmx_simd_real_t.
862 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_exponent_d,
863 * otherwise \ref gmx_simd_get_exponent_f.
865 * \copydetails gmx_simd_get_exponent_f
867 # define gmx_simd_get_exponent_r gmx_simd_get_exponent_f
869 /*! \brief Return the FP mantissa of a SIMD \ref gmx_simd_real_t as a \ref gmx_simd_real_t.
871 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_mantissa_d,
872 * otherwise \ref gmx_simd_get_mantissa_f.
874 * \copydetails gmx_simd_get_mantissa_f
876 # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_f
878 /*! \brief Set the exponent of a SIMD \ref gmx_simd_real_t from a \ref gmx_simd_real_t.
880 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_set_exponent_d,
881 * otherwise \ref gmx_simd_set_exponent_f.
883 * \copydetails gmx_simd_set_exponent_f
885 # define gmx_simd_set_exponent_r gmx_simd_set_exponent_f
887 /*! \}
888 * \name SIMD comparison, boolean, and select operations for gmx_simd_real_t
889 * \{
892 /*! \brief SIMD a==b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
894 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmpeq_d,
895 * otherwise \ref gmx_simd_cmpeq_f.
897 * \copydetails gmx_simd_cmpeq_f
899 # define gmx_simd_cmpeq_r gmx_simd_cmpeq_f
901 /*! \brief SIMD a<b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
903 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmplt_d,
904 * otherwise \ref gmx_simd_cmplt_f.
906 * \copydetails gmx_simd_cmplt_f
908 # define gmx_simd_cmplt_r gmx_simd_cmplt_f
910 /*! \brief SIMD a<=b for \ref gmx_simd_real_t. Returns a \ref gmx_simd_bool_t.
912 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmple_d,
913 * otherwise \ref gmx_simd_cmple_f.
915 * \copydetails gmx_simd_cmple_f
917 # define gmx_simd_cmple_r gmx_simd_cmple_f
919 /*! \brief For each element, the result boolean is true if both arguments are true
921 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_db,
922 * otherwise \ref gmx_simd_and_fb.
924 * \copydetails gmx_simd_and_fb
926 # define gmx_simd_and_b gmx_simd_and_fb
928 /*! \brief For each element, the result boolean is true if either argument is true
930 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_db,
931 * otherwise \ref gmx_simd_or_fb.
933 * \copydetails gmx_simd_or_fb
935 # define gmx_simd_or_b gmx_simd_or_fb
937 /*! \brief Return nonzero if any element in gmx_simd_bool_t is true, otherwise 0.
939 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_anytrue_db,
940 * otherwise \ref gmx_simd_anytrue_fb.
942 * \copydetails gmx_simd_anytrue_fb
944 # define gmx_simd_anytrue_b gmx_simd_anytrue_fb
946 /*! \brief Selects elements from \ref gmx_simd_real_t where boolean is true, otherwise 0.
948 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendzero_d,
949 * otherwise \ref gmx_simd_blendzero_f.
951 * \copydetails gmx_simd_blendzero_f
953 * \sa gmx_simd_blendzero_i
955 # define gmx_simd_blendzero_r gmx_simd_blendzero_f
957 /*! \brief Selects elements from \ref gmx_simd_real_t where boolean is false, otherwise 0.
959 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendnotzero_d,
960 * otherwise \ref gmx_simd_blendnotzero_f.
962 * \copydetails gmx_simd_blendnotzero_f
964 # define gmx_simd_blendnotzero_r gmx_simd_blendnotzero_f
966 /*! \brief Selects from 2nd real SIMD arg where boolean is true, otherwise 1st arg.
968 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendv_d,
969 * otherwise \ref gmx_simd_blendv_f.
971 * \copydetails gmx_simd_blendv_f
973 # define gmx_simd_blendv_r gmx_simd_blendv_f
975 /*! \brief Return sum of all elements in SIMD floating-point variable.
977 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_reduce_d,
978 * otherwise \ref gmx_simd_reduce_f.
980 * \copydetails gmx_simd_reduce_f
982 # define gmx_simd_reduce_r gmx_simd_reduce_f
984 /*! \}
985 * \name SIMD integer logical operations on gmx_simd_int32_t
987 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_LOGICAL is 1.
988 * \{
991 /*! \brief Shift each element in \ref gmx_simd_int32_t left by immediate
993 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_slli_di,
994 * otherwise \ref gmx_simd_slli_fi.
996 * \copydetails gmx_simd_slli_fi
998 # define gmx_simd_slli_i gmx_simd_slli_fi
1000 /*! \brief Shift each element in \ref gmx_simd_int32_t right by immediate
1002 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_srli_di,
1003 * otherwise \ref gmx_simd_srli_fi.
1005 * \copydetails gmx_simd_srli_fi
1007 # define gmx_simd_srli_i gmx_simd_srli_fi
1009 /*! \brief Bitwise \a and on two \ref gmx_simd_int32_t.
1011 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_di,
1012 * otherwise \ref gmx_simd_and_fi.
1014 * \copydetails gmx_simd_and_fi
1016 # define gmx_simd_and_i gmx_simd_and_fi
1018 /*! \brief Bitwise \a and-not on two \ref gmx_simd_int32_t; 1st arg is complemented.
1020 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_andnot_di,
1021 * otherwise \ref gmx_simd_andnot_fi.
1023 * \copydetails gmx_simd_andnot_fi
1025 # define gmx_simd_andnot_i gmx_simd_andnot_fi
1027 /*! \brief Bitwise \a or on two \ref gmx_simd_int32_t.
1029 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_di,
1030 * otherwise \ref gmx_simd_or_fi.
1032 * \copydetails gmx_simd_or_fi
1034 # define gmx_simd_or_i gmx_simd_or_fi
1036 /*! \brief Bitwise \a xor on two \ref gmx_simd_int32_t.
1038 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_xor_di,
1039 * otherwise \ref gmx_simd_xor_fi.
1041 * \copydetails gmx_simd_xor_fi
1043 # define gmx_simd_xor_i gmx_simd_xor_fi
1045 /*! \}
1046 * \name SIMD integer arithmetic operations on gmx_simd_int32_t
1048 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
1049 * \{
1052 /*! \brief SIMD a+b for two \ref gmx_simd_int32_t.
1054 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_add_di,
1055 * otherwise \ref gmx_simd_add_fi.
1057 * \copydetails gmx_simd_add_fi
1059 # define gmx_simd_add_i gmx_simd_add_fi
1061 /*! \brief SIMD a-b for two \ref gmx_simd_int32_t.
1063 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_sub_di,
1064 * otherwise \ref gmx_simd_sub_fi.
1066 * \copydetails gmx_simd_sub_fi
1068 # define gmx_simd_sub_i gmx_simd_sub_fi
1070 /*! \brief SIMD a*b for two \ref gmx_simd_int32_t.
1072 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_mul_di,
1073 * otherwise \ref gmx_simd_mul_fi.
1075 * \copydetails gmx_simd_mul_fi
1077 # define gmx_simd_mul_i gmx_simd_mul_fi
1079 /*! \}
1080 * \name SIMD integer comparison, booleans, and selection on gmx_simd_int32_t
1082 * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
1083 * \{
1086 /*! \brief Returns boolean describing whether a==b, for \ref gmx_simd_int32_t
1088 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmpeq_di,
1089 * otherwise \ref gmx_simd_cmpeq_fi.
1091 * \copydetails gmx_simd_cmpeq_fi
1093 # define gmx_simd_cmpeq_i gmx_simd_cmpeq_fi
1095 /*! \brief Returns boolean describing whether a<b, for \ref gmx_simd_int32_t
1097 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cmplt_di,
1098 * otherwise \ref gmx_simd_cmplt_fi.
1100 * \copydetails gmx_simd_cmplt_fi
1102 # define gmx_simd_cmplt_i gmx_simd_cmplt_fi
1104 /*! \brief For each element, the result boolean is true if both arguments are true
1106 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_and_dib,
1107 * otherwise \ref gmx_simd_and_fib.
1109 * \copydetails gmx_simd_and_fib
1111 # define gmx_simd_and_ib gmx_simd_and_fib
1113 /*! \brief For each element, the result boolean is true if either argument is true.
1115 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_dib,
1116 * otherwise \ref gmx_simd_or_fib.
1118 * \copydetails gmx_simd_or_fib
1120 # define gmx_simd_or_ib gmx_simd_or_fib
1122 /*! \brief Return nonzero if any element in gmx_simd_ibool_t is true, otherwise 0.
1124 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_anytrue_dib,
1125 * otherwise \ref gmx_simd_anytrue_fib.
1127 * \copydetails gmx_simd_anytrue_fib
1129 # define gmx_simd_anytrue_ib gmx_simd_anytrue_fib
1131 /*! \brief Selects elements from \ref gmx_simd_int32_t where boolean is true, otherwise 0.
1133 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendzero_di,
1134 * otherwise \ref gmx_simd_blendzero_fi.
1136 * \copydetails gmx_simd_blendzero_fi
1138 # define gmx_simd_blendzero_i gmx_simd_blendzero_fi
1140 /*! \brief Selects elements from \ref gmx_simd_int32_t where boolean is false, otherwise 0.
1142 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendnotzero_di,
1143 * otherwise \ref gmx_simd_blendnotzero_fi.
1145 * \copydetails gmx_simd_blendnotzero_fi
1147 # define gmx_simd_blendnotzero_i gmx_simd_blendnotzero_fi
1149 /*! \brief Selects from 2nd int SIMD arg where boolean is true, otherwise 1st arg.
1151 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_blendv_di,
1152 * otherwise \ref gmx_simd_blendv_fi.
1154 * \copydetails gmx_simd_blendv_fi
1156 # define gmx_simd_blendv_i gmx_simd_blendv_fi
1158 /*! \}
1159 * \name SIMD conversion operations
1161 * These instructions are available when both types involved in the conversion
1162 * are defined, e.g. if \ref GMX_SIMD_HAVE_REAL and \ref GMX_SIMD_HAVE_INT32
1163 * are 1 for real-to-integer conversion.
1164 * \{
1167 /*! \brief Convert gmx_simd_real_t to gmx_simd_int32_t, round to nearest integer.
1169 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_d2i,
1170 * otherwise \ref gmx_simd_cvt_f2i.
1172 * \copydetails gmx_simd_cvt_f2i
1174 # define gmx_simd_cvt_r2i gmx_simd_cvt_f2i
1176 /*! \brief Convert gmx_simd_real_t to gmx_simd_int32_t, truncate towards zero
1178 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvtt_d2i,
1179 * otherwise \ref gmx_simd_cvtt_f2i.
1181 * \copydetails gmx_simd_cvtt_f2i
1183 # define gmx_simd_cvtt_r2i gmx_simd_cvtt_f2i
1185 /*! \brief Convert gmx_simd_int32_t to gmx_simd_real_t
1187 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_i2d,
1188 * otherwise \ref gmx_simd_cvt_i2f.
1190 * \copydetails gmx_simd_cvt_i2f
1192 # define gmx_simd_cvt_i2r gmx_simd_cvt_i2f
1194 /*! \brief Convert from gmx_simd_bool_t to gmx_simd_ibool_t
1196 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_db2dib,
1197 * otherwise \ref gmx_simd_cvt_fb2fib.
1199 * \copydetails gmx_simd_cvt_fb2fib
1201 # define gmx_simd_cvt_b2ib gmx_simd_cvt_fb2fib
1203 /*! \brief Convert from gmx_simd_ibool_t to gmx_simd_bool_t
1205 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_cvt_dib2db,
1206 * otherwise \ref gmx_simd_cvt_fib2fb.
1208 * \copydetails gmx_simd_cvt_fib2fb
1210 # define gmx_simd_cvt_ib2b gmx_simd_cvt_fib2fb
1213 /*! \}
1214 * \name SIMD memory alignment operations
1215 * \{
1218 /*! \brief Align real memory for SIMD usage.
1220 * This routine will only align memory if \ref GMX_SIMD_HAVE_REAL is 1.
1221 * Otherwise the original pointer will be returned.
1223 * Start by allocating an extra \ref GMX_SIMD_REAL_WIDTH float elements of memory,
1224 * and then call this function. The returned pointer will be greater or equal
1225 * to the one you provided, and point to an address inside your provided memory
1226 * that is aligned to the SIMD width.
1228 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_align_d,
1229 * otherwise \ref gmx_simd_align_f. For detailed documentation, see the
1230 * precision-specific implementation routines.
1232 # define gmx_simd_align_r gmx_simd_align_f
1234 /*! \brief Align integer memory for SIMD usage.
1236 * This routine will only align memory if \ref GMX_SIMD_HAVE_INT32 is 1.
1237 * Otherwise the original pointer will be returned.
1239 * Start by allocating an extra \ref GMX_SIMD_INT32_WIDTH elements of memory,
1240 * and then call this function. The returned pointer will be greater or equal
1241 * to the one you provided, and point to an address inside your provided memory
1242 * that is aligned to the SIMD width.
1244 * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_align_di,
1245 * otherwise \ref gmx_simd_align_fi. For detailed documentation, see the
1246 * precision-specific implementation routines.
1248 # define gmx_simd_align_i gmx_simd_align_fi
1250 /*! \} */
1252 /*! \name SIMD4 - constant width-four SIMD datatypes
1254 * These operations are only meant to be used for a few coordinate
1255 * manipulation and grid interpolation routines, so we only support a subset
1256 * of operations for SIMD4. To avoid repeating all the documentation from
1257 * the generic width SIMD routines, we only provide brief documentation for
1258 * these operations. Follow the link to the implementation documentation or the
1259 * reference to the corresponding generic SIMD routine. The format will be
1260 * exactly the same, but they have SIMD replaced with SIMD4.
1261 * \{
1264 /*! \brief SIMD real datatype guaranteed to be 4 elements wide, if available.
1266 * All the SIMD4 datatypes and operations behave like their counterparts for
1267 * the generic SIMD implementation, but they might be implemented with different
1268 * registers, or not supported at all. It is important that you check the
1269 * define \ref GMX_SIMD4_HAVE_REAL before using it.
1271 * Just as the normal SIMD operations, all SIMD4 types and routines will
1272 * be aliased to either single or double precision ones based on whether
1273 * GMX_DOUBLE is defined.
1275 * \note There is no support for integer or math operations in SIMD4.
1277 # define gmx_simd4_real_t gmx_simd4_float_t
1279 /*! \brief Boolean for \ref gmx_simd4_real_t comparision/selection */
1280 # define gmx_simd4_bool_t gmx_simd4_fbool_t
1282 /*! \brief Load aligned data to gmx_simd4_real_t.
1284 * \copydetails gmx_simd4_load_f
1286 # define gmx_simd4_load_r gmx_simd4_load_f
1288 /*! \brief Load single element to gmx_simd4_real_t
1290 * \copydetails gmx_simd4_load1_f
1292 # define gmx_simd4_load1_r gmx_simd4_load1_f
1294 /*! \brief Set gmx_simd4_real_t from scalar value
1296 * \copydetails gmx_simd4_set1_f
1298 # define gmx_simd4_set1_r gmx_simd4_set1_f
1300 /*! \brief store aligned data from gmx_simd4_real_t
1302 * \copydetails gmx_simd4_store_f
1304 # define gmx_simd4_store_r gmx_simd4_store_f
1306 /*! \brief Load unaligned data to gmx_simd4_real_t
1308 * \copydetails gmx_simd4_loadu_f
1310 # define gmx_simd4_loadu_r gmx_simd4_loadu_f
1312 /*! \brief Store unaligned data from gmx_simd4_real_t
1314 * \copydetails gmx_simd4_storeu_f
1316 # define gmx_simd4_storeu_r gmx_simd4_storeu_f
1318 /*! \brief Set all elements in gmx_simd4_real_t to 0.0
1320 * \copydetails gmx_simd4_setzero_f
1322 # define gmx_simd4_setzero_r gmx_simd4_setzero_f
1324 /*! \brief Bitwise and for two gmx_simd4_real_t
1326 * \copydetails gmx_simd4_and_f
1328 # define gmx_simd4_and_r gmx_simd4_and_f
1330 /*! \brief Bitwise and-not for two gmx_simd4_real_t. 1st arg is complemented.
1332 * \copydetails gmx_simd4_andnot_f
1334 # define gmx_simd4_andnot_r gmx_simd4_andnot_f
1336 /*! \brief Bitwise or for two gmx_simd4_real_t
1338 * \copydetails gmx_simd4_or_f
1340 # define gmx_simd4_or_r gmx_simd4_or_f
1342 /*! \brief Bitwise xor for two gmx_simd4_real_t
1344 * \copydetails gmx_simd4_xor_f
1346 # define gmx_simd4_xor_r gmx_simd4_xor_f
1348 /*! \brief a+b for \ref gmx_simd4_real_t
1350 * \copydetails gmx_simd4_add_f
1352 # define gmx_simd4_add_r gmx_simd4_add_f
1354 /*! \brief a-b for \ref gmx_simd4_real_t
1356 * \copydetails gmx_simd4_sub_f
1358 # define gmx_simd4_sub_r gmx_simd4_sub_f
1360 /*! \brief a*b for \ref gmx_simd4_real_t
1362 * \copydetails gmx_simd4_mul_f
1364 # define gmx_simd4_mul_r gmx_simd4_mul_f
1366 /*! \brief a*b+c for \ref gmx_simd4_real_t
1368 * \copydetails gmx_simd4_fmadd_f
1370 # define gmx_simd4_fmadd_r gmx_simd4_fmadd_f
1372 /*! \brief a*b-c for \ref gmx_simd4_real_t
1374 * \copydetails gmx_simd4_fmsub_f
1376 # define gmx_simd4_fmsub_r gmx_simd4_fmsub_f
1378 /*! \brief -a*b+c for \ref gmx_simd4_real_t
1380 * \copydetails gmx_simd4_fnmadd_f
1382 # define gmx_simd4_fnmadd_r gmx_simd4_fnmadd_f
1384 /*! \brief -a*b-c for \ref gmx_simd4_real_t
1386 * \copydetails gmx_simd4_fnmsub_f
1388 # define gmx_simd4_fnmsub_r gmx_simd4_fnmsub_f
1390 /*! \brief 1/sqrt(x) approximate lookup for \ref gmx_simd4_real_t
1392 * \copydetails gmx_simd4_rsqrt_f
1394 # define gmx_simd4_rsqrt_r gmx_simd4_rsqrt_f
1396 /*! \brief fabs(x) for \ref gmx_simd4_real_t
1398 * \copydetails gmx_simd4_fabs_f
1400 # define gmx_simd4_fabs_r gmx_simd4_fabs_f
1402 /*! \brief Change sign (-x) for \ref gmx_simd4_real_t
1404 * \copydetails gmx_simd4_fneg_f
1406 # define gmx_simd4_fneg_r gmx_simd4_fneg_f
1408 /*! \brief Select maximum of each pair of elements from args for \ref gmx_simd4_real_t
1410 * \copydetails gmx_simd4_max_f
1412 # define gmx_simd4_max_r gmx_simd4_max_f
1414 /*! \brief Select minimum of each pair of elements from args for \ref gmx_simd4_real_t
1416 * \copydetails gmx_simd4_min_f
1418 # define gmx_simd4_min_r gmx_simd4_min_f
1420 /*! \brief Round \ref gmx_simd4_real_t to nearest integer, return \ref gmx_simd4_real_t
1422 * \copydetails gmx_simd4_round_f
1424 # define gmx_simd4_round_r gmx_simd4_round_f
1426 /*! \brief Truncate \ref gmx_simd4_real_t towards zero, return \ref gmx_simd4_real_t
1428 * \copydetails gmx_simd4_trunc_f
1430 # define gmx_simd4_trunc_r gmx_simd4_trunc_f
1432 /*! \brief Scalar product of first three elements of two \ref gmx_simd4_real_t *
1434 * \copydetails gmx_simd4_dotproduct3_f
1436 # define gmx_simd4_dotproduct3_r gmx_simd4_dotproduct3_f
1438 /*! \brief Return booleans whether a==b for each element two \ref gmx_simd4_real_t
1440 * \copydetails gmx_simd4_cmpeq_f
1442 # define gmx_simd4_cmpeq_r gmx_simd4_cmpeq_f
1443 /*! \brief Return booleans whether a<b for each element two \ref gmx_simd4_real_t
1445 * \copydetails gmx_simd4_cmplt_f
1447 # define gmx_simd4_cmplt_r gmx_simd4_cmplt_f
1448 /*! \brief Return booleans whether a<=b for each element two \ref gmx_simd4_real_t
1450 * \copydetails gmx_simd4_cmple_f
1452 # define gmx_simd4_cmple_r gmx_simd4_cmple_f
1454 /*! \brief Logical and for two \ref gmx_simd4_bool_t
1456 * \copydetails gmx_simd4_and_fb
1458 # define gmx_simd4_and_b gmx_simd4_and_fb
1459 /*! \brief Logical or for two \ref gmx_simd4_bool_t
1461 * \copydetails gmx_simd4_or_fb
1463 # define gmx_simd4_or_b gmx_simd4_or_fb
1465 /*! \brief Return nonzero if any element in \ref gmx_simd4_bool_t is true, otherwise 0
1467 * \copydetails gmx_simd4_anytrue_fb
1469 # define gmx_simd4_anytrue_b gmx_simd4_anytrue_fb
1471 /*! \brief Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg
1473 * \copydetails gmx_simd4_blendzero_f
1475 # define gmx_simd4_blendzero_r gmx_simd4_blendzero_f
1477 /*! \brief Selects from 2nd real SIMD4 arg where boolean is false, otherwise 1st arg
1479 * \copydetails gmx_simd4_blendnotzero_f
1481 # define gmx_simd4_blendnotzero_r gmx_simd4_blendnotzero_f
1483 /*! \brief Selects from 2nd real SIMD4 arg where boolean is true, otherwise 1st arg
1485 * \copydetails gmx_simd4_blendv_f
1487 # define gmx_simd4_blendv_r gmx_simd4_blendv_f
1489 /*! \brief Return sum of all elements in SIMD4 floating-point variable.
1491 * \copydetails gmx_simd4_reduce_f
1493 # define gmx_simd4_reduce_r gmx_simd4_reduce_f
1495 /*! \brief Align real memory for SIMD4 usage.
1497 * \copydetails gmx_simd4_align_f
1499 # define gmx_simd4_align_r gmx_simd4_align_f
1501 /*! \} */
1503 /*! \name SIMD predefined macros to describe high-level capabilities
1504 * \{
1507 /*! \brief 1 if gmx_simd_real_t is available, otherwise 0.
1509 * if GMX_DOUBLE is defined, this will be aliased to
1510 * \ref GMX_SIMD_HAVE_DOUBLE, otherwise GMX_SIMD_HAVE_FLOAT.
1512 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT
1514 /*! \brief Width of gmx_simd_real_t.
1516 * if GMX_DOUBLE is defined, this will be aliased to
1517 * \ref GMX_SIMD_DOUBLE_WIDTH, otherwise GMX_SIMD_FLOAT_WIDTH.
1519 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
1521 /*! \brief 1 if gmx_simd_int32_t is available, otherwise 0.
1523 * if GMX_DOUBLE is defined, this will be aliased to
1524 * \ref GMX_SIMD_HAVE_DINT32, otherwise GMX_SIMD_HAVE_FINT32.
1526 # define GMX_SIMD_HAVE_INT32 GMX_SIMD_HAVE_FINT32
1528 /*! \brief Width of gmx_simd_int32_t.
1530 * if GMX_DOUBLE is defined, this will be aliased to
1531 * \ref GMX_SIMD_DINT32_WIDTH, otherwise GMX_SIMD_FINT32_WIDTH.
1533 # define GMX_SIMD_INT32_WIDTH GMX_SIMD_FINT32_WIDTH
1535 /*! \brief 1 if gmx_simd_extract_i() is available, otherwise 0.
1537 * if GMX_DOUBLE is defined, this will correspond to
1538 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT, otherwise GMX_SIMD_HAVE_FINT32_EXTRACT.
1540 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT
1542 /*! \brief 1 if logical ops are supported on gmx_simd_int32_t, otherwise 0.
1544 * if GMX_DOUBLE is defined, this will correspond to
1545 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL, otherwise GMX_SIMD_HAVE_FINT32_LOGICAL.
1547 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL
1549 /*! \brief 1 if arithmetic ops are supported on gmx_simd_int32_t, otherwise 0.
1551 * if GMX_DOUBLE is defined, this will be aliased to
1552 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS, otherwise GMX_SIMD_HAVE_FINT32_ARITHMETICS.
1554 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS
1556 /*! \brief 1 if gmx_simd4_real_t is available, otherwise 0.
1558 * if GMX_DOUBLE is defined, this will be aliased to
1559 * \ref GMX_SIMD4_HAVE_DOUBLE, otherwise GMX_SIMD4_HAVE_FLOAT.
1561 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT
1564 /*! \} */
1566 #endif /* GMX_DOUBLE */
1568 /*! \} */
1569 /*! \endcond */
1571 #if 0
1572 /* Finally, a hack to cover a possible corner case of using an
1573 explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
1574 GMX_SIMD_HAVE_REAL.
1576 Such code is expected to include simd.h to get those symbols
1577 defined, but the actual definitions are in the implemention headers
1578 included by simd.h. check-source.py is not a full preprocessor, so
1579 it does not see the definitions in the implementation headers as
1580 belonging to simd.h, thus it cannot check that simd.h is being used
1581 correctly in the above hypothetical corner case. However, the
1582 checker also does not parse #if 0, so we can fool the checker into
1583 thinking that definition occurs here, and that will work well
1584 enough.
1586 If there's ever other kinds of SIMD code that might have the same
1587 problem, we might want to add other variables here.
1589 # define GMX_SIMD_HAVE_FLOAT 1
1590 # define GMX_SIMD_HAVE_DOUBLE 1
1592 #endif /* 0 */
1594 #endif /* GMX_SIMD_SIMD_H */