Fixed synchronization counter update for pwr6 kernels
[gromacs.git] / src / gmxlib / nonbonded / nb_kerneltype.h
blob5b88a901714289df389de53c3864fe03600c8df0
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 * Gromacs 4.0 Copyright (c) 1991-2003
4 * David van der Spoel, Erik Lindahl, University of Groningen.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * To help us fund GROMACS development, we humbly ask that you cite
12 * the research papers on the package. Check out http://www.gromacs.org
14 * And Hey:
15 * Gnomes, ROck Monsters And Chili Sauce
18 #ifndef _NB_KERNEL_H_
19 #define _NB_KERNEL_H_
21 #include <types/simple.h>
23 /** \file
24 * \brief Function interfaces and nonbonded kernel meta-data.
26 * \internal
28 * This file contains the definitions of the call sequences and
29 * parameter documetation for the nonbonded kernels, both the
30 * optimized level1/level2 kernels in Fortran or C, and the more
31 * general level0 normal and free energy kernels.
32 * It also defines an information structure used to record
33 * data about nonbonded kernels, their type of call sequence,
34 * and the number of flops used in the outer and inner loops.
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 #if 0
41 } /* fixes auto-indentation problems */
42 #endif
47 /* Temporary structure to be able to pass 2 GB data through the
48 * work data pointer argument.
50 typedef struct
52 real gb_epsilon_solvent; /* Epsilon for solvent */
53 real epsilon_r; /* Epsilon for inner dielectric */
54 real * gpol; /* Polarization energy group */
55 } gmx_gbdata_t;
58 /** Interface to level1 optimized nonbonded kernels.
60 * \internal
62 * To simplify the jungle of nonbonded function calls in earlier
63 * versions of Gromacs, we have changed most nonbonded kernels
64 * to use the same call sequence, even if some of the parameters
65 * are ununsed in a particular kernel.
67 * This function call sequence is used by all level1 kernels. When this is
68 * written it is actually used by the level2 kernels too, but that might
69 * change in the future - don't assume they are identical.
70 * All arguments are passed as pointers to make to interface identical across
71 * the Fortran and C implementations. For the same reason (Fortran & assembly)
72 * we cannot use structures.
74 * The nonbonded kernels should never be called directly - use
75 * the setup and wrapper routines in gmx_nonbonded.h instead.
77 * \param nri Number of neighborlists/i-particles/outer-particles.
78 * The outer loop of the nonbonded kernel will iterate
79 * over these indices. This number must always
80 * be provided,
81 * \param iinr Atom number (starting on 0) for each index 0..(nri-1)
82 * in the outer loop. These are the 'owner'/'parent'
83 * particles of each neighborlist. It is quite common
84 * that a particle occurs several times with different
85 * shift indices. This is the field 'ilist' in
86 * the neighborlist. This array must always be provided.
87 * \param jindex This array specifies the index of the first j neighbor
88 * in the array jjnr[] which belongs to the corresponding
89 * i atom. The j neighbors for all lists are merged into
90 * one continous array, so we also get the first index for
91 * the next list in the next element. The size of this list
92 * is nri+1, and the last position contains the number of
93 * entries in the j list. Confused? It is actually pretty
94 * straightforward; for index <tt>i</tt> the i atom is
95 * <tt>iinr[i]</tt>, and the atom indices of the neighbors
96 * to interact with are in positions <tt>jindex[i]</tt>
97 * through <tt>(jindex[i+1]-1)</tt> in <tt>jjnr[]</tt>.
98 * This corresponds to ilist_jindex in the neighborlist.
99 * This array must always be provided.
100 * \param jjnr Array with the j particle neighbors of list i in positions
101 * <tt>jindex[i]</tt> through <tt>(jindex[i+1]-1)</tt>. This
102 * field is called jlist in the neighborlist structure.
103 * This array must always be provided.
104 * \param shift Array with shift vector index corresponding to each
105 * iinr neighborlist. In most codes, periodic boundary
106 * conditions are applied by checking pairwise distances
107 * of particles, and correcting differences of more than
108 * half a box. This is costly (latencies) and unnecessary
109 * since we already decided which periodic copy to use during
110 * neighborsearching. We solve this by introducing
111 * \a shiftvectors and a \a shiftindex. The shiftvectors is
112 * normally an array of 3*3*3 vectors, corresponding to
113 * displacements +- one box unit in one or more directions.
114 * The central box (no displacement) is always in the middle,
115 * index 13 in the case of 3*3*3 shift vectors.
116 * Imagine a particle interacting with two particles in the
117 * current box, and three particles in the box to the left.
118 * We represent this with two ilist entries, one with shift
119 * index=13 (central box) and one with shift index=12.
120 * all neighbors in each sublist will have the same shift,
121 * so in the nonbonded interaction, we only have to add the
122 * shift vector to the outer (i) particle coordinates before
123 * starting the loop over neighbors. This extracts periodic
124 * boundary condition calculations from the inner loops,
125 * increasing performance significantly. This array must
126 * always be provided, but if you do not use any periodic
127 * boundary conditions you could set all elements to zero
128 * and provide a shiftvec array of length three, with all
129 * elements there too being zero.
130 * \param shiftvec The shift vectors for each index. Since the code needs to
131 * interface with Fortran or assembly, we have to use
132 * pointers to float/double instead of the gmx_rvec_t type.
133 * The x/y/z shift distances for shift index idx are in
134 * <tt>shiftvec[3*idx]</tt>, <tt>shiftvec[3*idx+1]</tt>,
135 * and <tt>shiftvec[3*idx+2]</tt>. This is fully binary
136 * compatible with an array of gmx_rvec_t, so you can simply
137 * use a typecast when calling. The length of the array is
138 * three times the number of shift indices. This array
139 * must be provided, but see the comment for shift.
140 * \param fshift Forces from 'other periodic boxes' for each shift index.
141 * The shift concept does not affect the forces on individual
142 * atoms, but it will have an effect on the virial. To
143 * account for this, we add the total force on the i particle
144 * in each outer loop to the current shift index in this
145 * array, which has exactly the same dimensions as the
146 * shiftvec array above. The Gromacs manual describes how
147 * this is used to calculate the effect in the virial.
148 * Dimension is identical to shiftvec.
149 * For kernels that do not calculate forces this can be NULL.
150 * \param gid Energy group index for each i neighborlist index; this
151 * corresponds to ilist_groupindex in the neighborlist and is
152 * used to decompose energies into groupwise contributions.
153 * If an i particle belongs to group A, and has interactions
154 * both with groups A,B, and C, you will get three different
155 * i list entries with energy group indices corresponding to
156 * A-A, A-B, and A-C neighbors, in addition to possible
157 * copies due to different shift indices. The energies will
158 * be added to this index (i.e. index=gid[i]) in the vc and
159 * vnb arrays. This array must always be provided.
160 * If you don't want to decompose energies all groups should
161 * be set to index 0 (length or the array nri).
162 * \param pos Coordinates of atoms, starting on 0 as all arrays. To
163 * interface with fortran and assembly this is a simple list
164 * of floats, with x/y/z of atom n in positions
165 * <tt>pos[3*n]</tt>, <tt>pos[3*n+1]</tt>, and
166 * <tt>pos[3*n+2]</tt>. This is binary compatible with,
167 * and can be typecast from, an array of gmx_rvec_t.
168 * This array must always be provided.
169 * \param faction Forces of atoms. To interface with fortran and assembly
170 * this is a simple list of floats, with x/y/z of atom n in
171 * positions <tt>pos[3*n]</tt>, <tt>pos[3*n+1]</tt>, and
172 * <tt>pos[3*n+2]</tt>. This is binary compatible with,
173 * and can be typecast from, an array of gmx_rvec_t.
174 * For kernels that do not calculate forces this can be NULL.
175 * \param charge Array with the charge of atom n in position n.
176 * If you are calling a kernel without coulomb interaction
177 * this parameter can safely be set to NULL.
178 * \param facel Factor to multiply all electrostatic interactions with;
179 * this is essentially 1/(4*pi*epsilon). For kernels without
180 * coulomb interaction the value will not be used. In the
181 * fortran version (pass-by-reference) it can be NULL in
182 * that case.
183 * \param krf The 'k' constant for reaction-field electrostatic
184 * interactions. Can be zero/NULL for non-RF interactions.
185 * \param crf The 'c' constant for reaction-field electrostatic
186 * interactions. Can be zero/NULL for non-RF interactions.
187 * \param vc List of Coulomb energies, the length is equal to the
188 * number of energy group combinations. As described for
189 * gid above, the coulomb energy of each outer/i-particle
190 * list will be added to vc[gid[i]]. Can be NULL for
191 * kernels without electrostatic interaction.
192 * \param type Array with the index of the Van der Waals type for each
193 * atom, starting on 0 . This is used to look up the VdW
194 * parameters. When no VdW interactions are calculated
195 * (i.e. only Coulomb), this can be set to NULL.
196 * \param ntype Number of VdW types, used to look up the VdW parameters.
197 * This parameter can be zero (NULL for pass-by-reference)
198 * if the kernel does not include VdW interactions.
199 * \param vdwparam Array with Van der Waals parameters. The contents and
200 * size depends on the number of parameters required for
201 * the selected VdW interaction (3 for B-ham, 2 for LJ, etc).
202 * There are a total of ntype*ntype pair combinations, and
203 * the size of this array is the number of such pairs times
204 * the number of parameters.
205 * With 'nparam' parameters, and 'ntype' types in total,
206 * the first parameter for interactions between atoms of VdW
207 * type 'ti' and VdW type 'tj' is located in
208 * <tt>vdwparam[nparam*(ntype*ti+tj)]</tt>, and the
209 * additional parameters follow directly in the next
210 * position(s). Note that the array corresponds to a
211 * symmetric matrix - you should get the same parameters if
212 * you swap ti and tj. This parameter can be NULL
213 * if the kernel does not include VdW interactions.
214 * \param vvdw List of Van der Waals energies, the length is equal to
215 * the number of energy group combinations. As described
216 * for gid above, the coulomb energy of each outer list
217 * will be added to vc[gid[i]]. Can be NULL for
218 * kernels without electrostatic interaction.
219 * \param tabscale Distance between table points in the vftab table.
220 * This is always the same for Coulomb and VdW when both
221 * are tabulated. If the kernel does not include any
222 * tabulated interactions it can be zero, or NULL when
223 * passed-by-reference.
224 * \param vftab Table data for Coulomb and/or VdW interactions.
225 * Each 'point' in the table consists of the four
226 * floating-point numbers Y,F,G,H as described in the
227 * Gromacs manual. If the kernel only tabulates the
228 * Coulomb interaction this is followed by the next
229 * point. If both Coulomb and Van der Waals interactions
230 * are tabulated it is instead followed by another
231 * four numbers for the dispersion table at the same
232 * point, and then finally the repulsion table, i.e.
233 * a total of 12 numbers per table point. If only
234 * Van der Waals interactions are tabulated the
235 * Dispersion and Repulsion table (in that order) use
236 * a total of 8 floating-point numbers per point.
237 * This array only needs to be provided for kernels with
238 * tabulated interactions - otherwise it can be NULL.
239 * \param invsqrta Array with the inverse square root of the born radius
240 * of each atom. This can safely be NULL when the kernel
241 * does not calculate Generalized Born interactions.
242 * \param dvda Array where the derivative of the potential with respect
243 * to the born radius for each atom will be written. This
244 * is necessary to calculate the effect of born radius
245 * derivatives on forces. However, just as for the force
246 * arrays it must be provided even when not calculating
247 * forces, since an implementation might not provide a
248 * non-force version of the routine. When the kernel does
249 * not calculate Generalized Born interactions it can
250 * safely be set to NULL, though.
251 * \param gbtabscale Distance between (scaled) table points for tabulated
252 * Generalized Born interactions. If the kernel does not
253 * calculate Generalized Born interactions it can be zero,
254 * or NULL when passed-by-reference. Note that this is
255 * usually different from the standard (non-GB) table scale.
256 * \param gbtab Table data for Generalized Born Coulomb interactions.
257 * Since these interactions contain parameters that
258 * enter in a non-trivial way I have introduced a trick
259 * where the tabulated interaction is a function of
260 * the distance scaled with the square roots of the two
261 * born radii. Apart from this it contains similar
262 * data (Y,F,G,H) as the normal Coulomb/VdW tables.
263 * See comments in the table code for details. This
264 * can safely be set to NULL if you are not using
265 * Generalized Born interactions.
266 * \param nthreads Number of threads calling the kernel concurrently.
267 * This value is only used to optimize the chunk size
268 * of the neighborlists processed by each thread, so
269 * it won't cause incorrect results if you get it wrong.
270 * \param count Pointer to a common counter to synchronize
271 * multithreaded execution. This is normally the counter
272 * in the neighborlist. It must be set to zero before
273 * calling the kernel to get correct results. Note that
274 * this is necessary even for a single thread if Gromacs
275 * is compiled with thread support (meaning it is
276 * a mandatory parameter).
277 * \param mtx Pointer to a mutex protecting the counter, masked as
278 * a pointer-to-void since thread support is optional.
279 * However, if Gromacs is compiled with threads you \a must
280 * provide a mutex - it would hurt performance to check
281 * it for each iteration in the nonbonded kernel. The
282 * mutex is normally created automatically by the
283 * neighborlist initialization routine gmx_nlist_init().
284 * (Yet another reason not to call kernels directly).
285 * \param outeriter The number of iterations performed in the outer loop
286 * of the kernel will be written to this integer. If
287 * multiple threads are calling the loop this will be
288 * the part of nri handled by this thread.
289 * This value is only used to get accurate flop accounting.
290 * Since we cannot check for NULL pointers in Fortran it must
291 * always be provided, but you can discard
292 * the result if you don't need it.
293 * \param inneriter The number of iterations performed in the inner loop
294 * of the kernel will be written to this integer; compare
295 * with nouter. Since we cannot check for NULL pointers in
296 * Fortran it must always be provided, but you can discard
297 * the result if you don't need it.
298 * \param work Double precision work array - to be used for optimization.
300 * \warning There is \a very little error control inside
301 * the nonbonded kernels in order to maximize performance.
302 * It cannot be overemphasized that you should never call
303 * them directly, but rely on the higher-level routines.
305 typedef void
306 nb_kernel_t(int * nri,
307 int * iinr,
308 int * jindex,
309 int * jjnr,
310 int * shift,
311 real * shiftvec,
312 real * fshift,
313 int * gid,
314 real * pos,
315 real * faction,
316 real * charge,
317 real * facel,
318 real * krf,
319 real * crf,
320 real * vc,
321 int * type,
322 int * ntype,
323 real * vdwparam,
324 real * vvdw,
325 real * tabscale,
326 real * vftab,
327 real * invsqrta,
328 real * dvda,
329 real * gbtabscale,
330 real * gbtab,
331 int * nthreads,
332 int * count,
333 void * mtx,
334 int * outeriter,
335 int * inneriter,
336 real * work);
340 #ifdef __cplusplus
342 #endif
345 #endif /* _NB_KERNEL_H_ */