Remove all unnecessary HAVE_CONFIG_H
[gromacs.git] / src / gromacs / mdlib / forcerec.c
blobe0fd0d8eb813a45c68feb17d99d0404c1d32aff9
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "config.h"
39 #include <assert.h>
40 #include <math.h>
41 #include <string.h>
43 #include "typedefs.h"
44 #include "types/commrec.h"
45 #include "gromacs/math/vec.h"
46 #include "gromacs/math/utilities.h"
47 #include "macros.h"
48 #include "macros.h"
49 #include "gromacs/math/units.h"
50 #include "force.h"
51 #include "tables.h"
52 #include "nonbonded.h"
53 #include "names.h"
54 #include "network.h"
55 #include "ns.h"
56 #include "txtdump.h"
57 #include "coulomb.h"
58 #include "md_support.h"
59 #include "md_logging.h"
60 #include "domdec.h"
61 #include "qmmm.h"
62 #include "copyrite.h"
63 #include "gromacs/topology/mtop_util.h"
64 #include "nbnxn_simd.h"
65 #include "nbnxn_search.h"
66 #include "nbnxn_atomdata.h"
67 #include "nbnxn_consts.h"
68 #include "gmx_omp_nthreads.h"
69 #include "gmx_detect_hardware.h"
70 #include "inputrec.h"
72 #include "gromacs/pbcutil/ishift.h"
73 #include "gromacs/pbcutil/pbc.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/smalloc.h"
77 #include "types/nbnxn_cuda_types_ext.h"
78 #include "gpu_utils.h"
79 #include "nbnxn_cuda_data_mgmt.h"
80 #include "pmalloc_cuda.h"
82 t_forcerec *mk_forcerec(void)
84 t_forcerec *fr;
86 snew(fr, 1);
88 return fr;
91 #ifdef DEBUG
92 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
94 int i, j;
96 for (i = 0; (i < atnr); i++)
98 for (j = 0; (j < atnr); j++)
100 fprintf(fp, "%2d - %2d", i, j);
101 if (bBHAM)
103 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
104 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
106 else
108 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
109 C12(nbfp, atnr, i, j)/12.0);
114 #endif
116 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
118 real *nbfp;
119 int i, j, k, atnr;
121 atnr = idef->atnr;
122 if (bBHAM)
124 snew(nbfp, 3*atnr*atnr);
125 for (i = k = 0; (i < atnr); i++)
127 for (j = 0; (j < atnr); j++, k++)
129 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
130 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
131 /* nbfp now includes the 6.0 derivative prefactor */
132 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
136 else
138 snew(nbfp, 2*atnr*atnr);
139 for (i = k = 0; (i < atnr); i++)
141 for (j = 0; (j < atnr); j++, k++)
143 /* nbfp now includes the 6.0/12.0 derivative prefactors */
144 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
145 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
150 return nbfp;
153 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
155 int i, j, k, atnr;
156 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
157 real *grid;
159 /* For LJ-PME simulations, we correct the energies with the reciprocal space
160 * inside of the cut-off. To do this the non-bonded kernels needs to have
161 * access to the C6-values used on the reciprocal grid in pme.c
164 atnr = idef->atnr;
165 snew(grid, 2*atnr*atnr);
166 for (i = k = 0; (i < atnr); i++)
168 for (j = 0; (j < atnr); j++, k++)
170 c6i = idef->iparams[i*(atnr+1)].lj.c6;
171 c12i = idef->iparams[i*(atnr+1)].lj.c12;
172 c6j = idef->iparams[j*(atnr+1)].lj.c6;
173 c12j = idef->iparams[j*(atnr+1)].lj.c12;
174 c6 = sqrt(c6i * c6j);
175 if (fr->ljpme_combination_rule == eljpmeLB
176 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
178 sigmai = pow(c12i / c6i, 1.0/6.0);
179 sigmaj = pow(c12j / c6j, 1.0/6.0);
180 epsi = c6i * c6i / c12i;
181 epsj = c6j * c6j / c12j;
182 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
184 /* Store the elements at the same relative positions as C6 in nbfp in order
185 * to simplify access in the kernels
187 grid[2*(atnr*i+j)] = c6*6.0;
190 return grid;
193 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
195 real *nbfp;
196 int i, j, k, atnr;
197 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
198 real c6, c12;
200 atnr = idef->atnr;
201 snew(nbfp, 2*atnr*atnr);
202 for (i = 0; i < atnr; ++i)
204 for (j = 0; j < atnr; ++j)
206 c6i = idef->iparams[i*(atnr+1)].lj.c6;
207 c12i = idef->iparams[i*(atnr+1)].lj.c12;
208 c6j = idef->iparams[j*(atnr+1)].lj.c6;
209 c12j = idef->iparams[j*(atnr+1)].lj.c12;
210 c6 = sqrt(c6i * c6j);
211 c12 = sqrt(c12i * c12j);
212 if (comb_rule == eCOMB_ARITHMETIC
213 && !gmx_numzero(c6) && !gmx_numzero(c12))
215 sigmai = pow(c12i / c6i, 1.0/6.0);
216 sigmaj = pow(c12j / c6j, 1.0/6.0);
217 epsi = c6i * c6i / c12i;
218 epsj = c6j * c6j / c12j;
219 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
220 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
222 C6(nbfp, atnr, i, j) = c6*6.0;
223 C12(nbfp, atnr, i, j) = c12*12.0;
226 return nbfp;
229 /* This routine sets fr->solvent_opt to the most common solvent in the
230 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
231 * the fr->solvent_type array with the correct type (or esolNO).
233 * Charge groups that fulfill the conditions but are not identical to the
234 * most common one will be marked as esolNO in the solvent_type array.
236 * TIP3p is identical to SPC for these purposes, so we call it
237 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
239 * NOTE: QM particle should not
240 * become an optimized solvent. Not even if there is only one charge
241 * group in the Qm
244 typedef struct
246 int model;
247 int count;
248 int vdwtype[4];
249 real charge[4];
250 } solvent_parameters_t;
252 static void
253 check_solvent_cg(const gmx_moltype_t *molt,
254 int cg0,
255 int nmol,
256 const unsigned char *qm_grpnr,
257 const t_grps *qm_grps,
258 t_forcerec * fr,
259 int *n_solvent_parameters,
260 solvent_parameters_t **solvent_parameters_p,
261 int cginfo,
262 int *cg_sp)
264 const t_blocka *excl;
265 t_atom *atom;
266 int j, k;
267 int j0, j1, nj;
268 gmx_bool perturbed;
269 gmx_bool has_vdw[4];
270 gmx_bool match;
271 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
272 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
273 int tjA;
274 gmx_bool qm;
275 solvent_parameters_t *solvent_parameters;
277 /* We use a list with parameters for each solvent type.
278 * Every time we discover a new molecule that fulfills the basic
279 * conditions for a solvent we compare with the previous entries
280 * in these lists. If the parameters are the same we just increment
281 * the counter for that type, and otherwise we create a new type
282 * based on the current molecule.
284 * Once we've finished going through all molecules we check which
285 * solvent is most common, and mark all those molecules while we
286 * clear the flag on all others.
289 solvent_parameters = *solvent_parameters_p;
291 /* Mark the cg first as non optimized */
292 *cg_sp = -1;
294 /* Check if this cg has no exclusions with atoms in other charge groups
295 * and all atoms inside the charge group excluded.
296 * We only have 3 or 4 atom solvent loops.
298 if (GET_CGINFO_EXCL_INTER(cginfo) ||
299 !GET_CGINFO_EXCL_INTRA(cginfo))
301 return;
304 /* Get the indices of the first atom in this charge group */
305 j0 = molt->cgs.index[cg0];
306 j1 = molt->cgs.index[cg0+1];
308 /* Number of atoms in our molecule */
309 nj = j1 - j0;
311 if (debug)
313 fprintf(debug,
314 "Moltype '%s': there are %d atoms in this charge group\n",
315 *molt->name, nj);
318 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
319 * otherwise skip it.
321 if (nj < 3 || nj > 4)
323 return;
326 /* Check if we are doing QM on this group */
327 qm = FALSE;
328 if (qm_grpnr != NULL)
330 for (j = j0; j < j1 && !qm; j++)
332 qm = (qm_grpnr[j] < qm_grps->nr - 1);
335 /* Cannot use solvent optimization with QM */
336 if (qm)
338 return;
341 atom = molt->atoms.atom;
343 /* Still looks like a solvent, time to check parameters */
345 /* If it is perturbed (free energy) we can't use the solvent loops,
346 * so then we just skip to the next molecule.
348 perturbed = FALSE;
350 for (j = j0; j < j1 && !perturbed; j++)
352 perturbed = PERTURBED(atom[j]);
355 if (perturbed)
357 return;
360 /* Now it's only a question if the VdW and charge parameters
361 * are OK. Before doing the check we compare and see if they are
362 * identical to a possible previous solvent type.
363 * First we assign the current types and charges.
365 for (j = 0; j < nj; j++)
367 tmp_vdwtype[j] = atom[j0+j].type;
368 tmp_charge[j] = atom[j0+j].q;
371 /* Does it match any previous solvent type? */
372 for (k = 0; k < *n_solvent_parameters; k++)
374 match = TRUE;
377 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
378 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
379 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
381 match = FALSE;
384 /* Check that types & charges match for all atoms in molecule */
385 for (j = 0; j < nj && match == TRUE; j++)
387 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
389 match = FALSE;
391 if (tmp_charge[j] != solvent_parameters[k].charge[j])
393 match = FALSE;
396 if (match == TRUE)
398 /* Congratulations! We have a matched solvent.
399 * Flag it with this type for later processing.
401 *cg_sp = k;
402 solvent_parameters[k].count += nmol;
404 /* We are done with this charge group */
405 return;
409 /* If we get here, we have a tentative new solvent type.
410 * Before we add it we must check that it fulfills the requirements
411 * of the solvent optimized loops. First determine which atoms have
412 * VdW interactions.
414 for (j = 0; j < nj; j++)
416 has_vdw[j] = FALSE;
417 tjA = tmp_vdwtype[j];
419 /* Go through all other tpes and see if any have non-zero
420 * VdW parameters when combined with this one.
422 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
424 /* We already checked that the atoms weren't perturbed,
425 * so we only need to check state A now.
427 if (fr->bBHAM)
429 has_vdw[j] = (has_vdw[j] ||
430 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
431 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
432 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
434 else
436 /* Standard LJ */
437 has_vdw[j] = (has_vdw[j] ||
438 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
439 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
444 /* Now we know all we need to make the final check and assignment. */
445 if (nj == 3)
447 /* So, is it an SPC?
448 * For this we require thatn all atoms have charge,
449 * the charges on atom 2 & 3 should be the same, and only
450 * atom 1 might have VdW.
452 if (has_vdw[1] == FALSE &&
453 has_vdw[2] == FALSE &&
454 tmp_charge[0] != 0 &&
455 tmp_charge[1] != 0 &&
456 tmp_charge[2] == tmp_charge[1])
458 srenew(solvent_parameters, *n_solvent_parameters+1);
459 solvent_parameters[*n_solvent_parameters].model = esolSPC;
460 solvent_parameters[*n_solvent_parameters].count = nmol;
461 for (k = 0; k < 3; k++)
463 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
464 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
467 *cg_sp = *n_solvent_parameters;
468 (*n_solvent_parameters)++;
471 else if (nj == 4)
473 /* Or could it be a TIP4P?
474 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
475 * Only atom 1 mght have VdW.
477 if (has_vdw[1] == FALSE &&
478 has_vdw[2] == FALSE &&
479 has_vdw[3] == FALSE &&
480 tmp_charge[0] == 0 &&
481 tmp_charge[1] != 0 &&
482 tmp_charge[2] == tmp_charge[1] &&
483 tmp_charge[3] != 0)
485 srenew(solvent_parameters, *n_solvent_parameters+1);
486 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
487 solvent_parameters[*n_solvent_parameters].count = nmol;
488 for (k = 0; k < 4; k++)
490 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
491 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
494 *cg_sp = *n_solvent_parameters;
495 (*n_solvent_parameters)++;
499 *solvent_parameters_p = solvent_parameters;
502 static void
503 check_solvent(FILE * fp,
504 const gmx_mtop_t * mtop,
505 t_forcerec * fr,
506 cginfo_mb_t *cginfo_mb)
508 const t_block * cgs;
509 const t_block * mols;
510 const gmx_moltype_t *molt;
511 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
512 int n_solvent_parameters;
513 solvent_parameters_t *solvent_parameters;
514 int **cg_sp;
515 int bestsp, bestsol;
517 if (debug)
519 fprintf(debug, "Going to determine what solvent types we have.\n");
522 mols = &mtop->mols;
524 n_solvent_parameters = 0;
525 solvent_parameters = NULL;
526 /* Allocate temporary array for solvent type */
527 snew(cg_sp, mtop->nmolblock);
529 cg_offset = 0;
530 at_offset = 0;
531 for (mb = 0; mb < mtop->nmolblock; mb++)
533 molt = &mtop->moltype[mtop->molblock[mb].type];
534 cgs = &molt->cgs;
535 /* Here we have to loop over all individual molecules
536 * because we need to check for QMMM particles.
538 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
539 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
540 nmol = mtop->molblock[mb].nmol/nmol_ch;
541 for (mol = 0; mol < nmol_ch; mol++)
543 cgm = mol*cgs->nr;
544 am = mol*cgs->index[cgs->nr];
545 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
547 check_solvent_cg(molt, cg_mol, nmol,
548 mtop->groups.grpnr[egcQMMM] ?
549 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
550 &mtop->groups.grps[egcQMMM],
552 &n_solvent_parameters, &solvent_parameters,
553 cginfo_mb[mb].cginfo[cgm+cg_mol],
554 &cg_sp[mb][cgm+cg_mol]);
557 cg_offset += cgs->nr;
558 at_offset += cgs->index[cgs->nr];
561 /* Puh! We finished going through all charge groups.
562 * Now find the most common solvent model.
565 /* Most common solvent this far */
566 bestsp = -2;
567 for (i = 0; i < n_solvent_parameters; i++)
569 if (bestsp == -2 ||
570 solvent_parameters[i].count > solvent_parameters[bestsp].count)
572 bestsp = i;
576 if (bestsp >= 0)
578 bestsol = solvent_parameters[bestsp].model;
580 else
582 bestsol = esolNO;
585 #ifdef DISABLE_WATER_NLIST
586 bestsol = esolNO;
587 #endif
589 fr->nWatMol = 0;
590 for (mb = 0; mb < mtop->nmolblock; mb++)
592 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
593 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
594 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
596 if (cg_sp[mb][i] == bestsp)
598 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
599 fr->nWatMol += nmol;
601 else
603 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
606 sfree(cg_sp[mb]);
608 sfree(cg_sp);
610 if (bestsol != esolNO && fp != NULL)
612 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
613 esol_names[bestsol],
614 solvent_parameters[bestsp].count);
617 sfree(solvent_parameters);
618 fr->solvent_opt = bestsol;
621 enum {
622 acNONE = 0, acCONSTRAINT, acSETTLE
625 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
626 t_forcerec *fr, gmx_bool bNoSolvOpt,
627 gmx_bool *bFEP_NonBonded,
628 gmx_bool *bExcl_IntraCGAll_InterCGNone)
630 const t_block *cgs;
631 const t_blocka *excl;
632 const gmx_moltype_t *molt;
633 const gmx_molblock_t *molb;
634 cginfo_mb_t *cginfo_mb;
635 gmx_bool *type_VDW;
636 int *cginfo;
637 int cg_offset, a_offset, cgm, am;
638 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
639 int *a_con;
640 int ftype;
641 int ia;
642 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
644 ncg_tot = ncg_mtop(mtop);
645 snew(cginfo_mb, mtop->nmolblock);
647 snew(type_VDW, fr->ntype);
648 for (ai = 0; ai < fr->ntype; ai++)
650 type_VDW[ai] = FALSE;
651 for (j = 0; j < fr->ntype; j++)
653 type_VDW[ai] = type_VDW[ai] ||
654 fr->bBHAM ||
655 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
656 C12(fr->nbfp, fr->ntype, ai, j) != 0;
660 *bFEP_NonBonded = FALSE;
661 *bExcl_IntraCGAll_InterCGNone = TRUE;
663 excl_nalloc = 10;
664 snew(bExcl, excl_nalloc);
665 cg_offset = 0;
666 a_offset = 0;
667 for (mb = 0; mb < mtop->nmolblock; mb++)
669 molb = &mtop->molblock[mb];
670 molt = &mtop->moltype[molb->type];
671 cgs = &molt->cgs;
672 excl = &molt->excls;
674 /* Check if the cginfo is identical for all molecules in this block.
675 * If so, we only need an array of the size of one molecule.
676 * Otherwise we make an array of #mol times #cgs per molecule.
678 bId = TRUE;
679 am = 0;
680 for (m = 0; m < molb->nmol; m++)
682 am = m*cgs->index[cgs->nr];
683 for (cg = 0; cg < cgs->nr; cg++)
685 a0 = cgs->index[cg];
686 a1 = cgs->index[cg+1];
687 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
688 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
690 bId = FALSE;
692 if (mtop->groups.grpnr[egcQMMM] != NULL)
694 for (ai = a0; ai < a1; ai++)
696 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
697 mtop->groups.grpnr[egcQMMM][a_offset +ai])
699 bId = FALSE;
706 cginfo_mb[mb].cg_start = cg_offset;
707 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
708 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
709 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
710 cginfo = cginfo_mb[mb].cginfo;
712 /* Set constraints flags for constrained atoms */
713 snew(a_con, molt->atoms.nr);
714 for (ftype = 0; ftype < F_NRE; ftype++)
716 if (interaction_function[ftype].flags & IF_CONSTRAINT)
718 int nral;
720 nral = NRAL(ftype);
721 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
723 int a;
725 for (a = 0; a < nral; a++)
727 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
728 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
734 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
736 cgm = m*cgs->nr;
737 am = m*cgs->index[cgs->nr];
738 for (cg = 0; cg < cgs->nr; cg++)
740 a0 = cgs->index[cg];
741 a1 = cgs->index[cg+1];
743 /* Store the energy group in cginfo */
744 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
745 SET_CGINFO_GID(cginfo[cgm+cg], gid);
747 /* Check the intra/inter charge group exclusions */
748 if (a1-a0 > excl_nalloc)
750 excl_nalloc = a1 - a0;
751 srenew(bExcl, excl_nalloc);
753 /* bExclIntraAll: all intra cg interactions excluded
754 * bExclInter: any inter cg interactions excluded
756 bExclIntraAll = TRUE;
757 bExclInter = FALSE;
758 bHaveVDW = FALSE;
759 bHaveQ = FALSE;
760 bHavePerturbedAtoms = FALSE;
761 for (ai = a0; ai < a1; ai++)
763 /* Check VDW and electrostatic interactions */
764 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
765 type_VDW[molt->atoms.atom[ai].typeB]);
766 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
767 molt->atoms.atom[ai].qB != 0);
769 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
771 /* Clear the exclusion list for atom ai */
772 for (aj = a0; aj < a1; aj++)
774 bExcl[aj-a0] = FALSE;
776 /* Loop over all the exclusions of atom ai */
777 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
779 aj = excl->a[j];
780 if (aj < a0 || aj >= a1)
782 bExclInter = TRUE;
784 else
786 bExcl[aj-a0] = TRUE;
789 /* Check if ai excludes a0 to a1 */
790 for (aj = a0; aj < a1; aj++)
792 if (!bExcl[aj-a0])
794 bExclIntraAll = FALSE;
798 switch (a_con[ai])
800 case acCONSTRAINT:
801 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
802 break;
803 case acSETTLE:
804 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
805 break;
806 default:
807 break;
810 if (bExclIntraAll)
812 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
814 if (bExclInter)
816 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
818 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
820 /* The size in cginfo is currently only read with DD */
821 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
823 if (bHaveVDW)
825 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
827 if (bHaveQ)
829 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
831 if (bHavePerturbedAtoms && fr->efep != efepNO)
833 SET_CGINFO_FEP(cginfo[cgm+cg]);
834 *bFEP_NonBonded = TRUE;
836 /* Store the charge group size */
837 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
839 if (!bExclIntraAll || bExclInter)
841 *bExcl_IntraCGAll_InterCGNone = FALSE;
846 sfree(a_con);
848 cg_offset += molb->nmol*cgs->nr;
849 a_offset += molb->nmol*cgs->index[cgs->nr];
851 sfree(bExcl);
853 /* the solvent optimizer is called after the QM is initialized,
854 * because we don't want to have the QM subsystemto become an
855 * optimized solvent
858 check_solvent(fplog, mtop, fr, cginfo_mb);
860 if (getenv("GMX_NO_SOLV_OPT"))
862 if (fplog)
864 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
865 "Disabling all solvent optimization\n");
867 fr->solvent_opt = esolNO;
869 if (bNoSolvOpt)
871 fr->solvent_opt = esolNO;
873 if (!fr->solvent_opt)
875 for (mb = 0; mb < mtop->nmolblock; mb++)
877 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
879 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
884 return cginfo_mb;
887 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
889 int ncg, mb, cg;
890 int *cginfo;
892 ncg = cgi_mb[nmb-1].cg_end;
893 snew(cginfo, ncg);
894 mb = 0;
895 for (cg = 0; cg < ncg; cg++)
897 while (cg >= cgi_mb[mb].cg_end)
899 mb++;
901 cginfo[cg] =
902 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
905 return cginfo;
908 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
910 /*This now calculates sum for q and c6*/
911 double qsum, q2sum, q, c6sum, c6;
912 int mb, nmol, i;
913 const t_atoms *atoms;
915 qsum = 0;
916 q2sum = 0;
917 c6sum = 0;
918 for (mb = 0; mb < mtop->nmolblock; mb++)
920 nmol = mtop->molblock[mb].nmol;
921 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
922 for (i = 0; i < atoms->nr; i++)
924 q = atoms->atom[i].q;
925 qsum += nmol*q;
926 q2sum += nmol*q*q;
927 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
928 c6sum += nmol*c6;
931 fr->qsum[0] = qsum;
932 fr->q2sum[0] = q2sum;
933 fr->c6sum[0] = c6sum;
935 if (fr->efep != efepNO)
937 qsum = 0;
938 q2sum = 0;
939 c6sum = 0;
940 for (mb = 0; mb < mtop->nmolblock; mb++)
942 nmol = mtop->molblock[mb].nmol;
943 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
944 for (i = 0; i < atoms->nr; i++)
946 q = atoms->atom[i].qB;
947 qsum += nmol*q;
948 q2sum += nmol*q*q;
949 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
950 c6sum += nmol*c6;
952 fr->qsum[1] = qsum;
953 fr->q2sum[1] = q2sum;
954 fr->c6sum[1] = c6sum;
957 else
959 fr->qsum[1] = fr->qsum[0];
960 fr->q2sum[1] = fr->q2sum[0];
961 fr->c6sum[1] = fr->c6sum[0];
963 if (log)
965 if (fr->efep == efepNO)
967 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
969 else
971 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
972 fr->qsum[0], fr->qsum[1]);
977 void update_forcerec(t_forcerec *fr, matrix box)
979 if (fr->eeltype == eelGRF)
981 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
982 fr->rcoulomb, fr->temp, fr->zsquare, box,
983 &fr->kappa, &fr->k_rf, &fr->c_rf);
987 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
989 const t_atoms *atoms, *atoms_tpi;
990 const t_blocka *excl;
991 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
992 gmx_int64_t npair, npair_ij, tmpi, tmpj;
993 double csix, ctwelve;
994 int ntp, *typecount;
995 gmx_bool bBHAM;
996 real *nbfp;
997 real *nbfp_comb = NULL;
999 ntp = fr->ntype;
1000 bBHAM = fr->bBHAM;
1001 nbfp = fr->nbfp;
1003 /* For LJ-PME, we want to correct for the difference between the
1004 * actual C6 values and the C6 values used by the LJ-PME based on
1005 * combination rules. */
1007 if (EVDW_PME(fr->vdwtype))
1009 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1010 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1011 for (tpi = 0; tpi < ntp; ++tpi)
1013 for (tpj = 0; tpj < ntp; ++tpj)
1015 C6(nbfp_comb, ntp, tpi, tpj) =
1016 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1017 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1020 nbfp = nbfp_comb;
1022 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1024 csix = 0;
1025 ctwelve = 0;
1026 npair = 0;
1027 nexcl = 0;
1028 if (!fr->n_tpi)
1030 /* Count the types so we avoid natoms^2 operations */
1031 snew(typecount, ntp);
1032 gmx_mtop_count_atomtypes(mtop, q, typecount);
1034 for (tpi = 0; tpi < ntp; tpi++)
1036 for (tpj = tpi; tpj < ntp; tpj++)
1038 tmpi = typecount[tpi];
1039 tmpj = typecount[tpj];
1040 if (tpi != tpj)
1042 npair_ij = tmpi*tmpj;
1044 else
1046 npair_ij = tmpi*(tmpi - 1)/2;
1048 if (bBHAM)
1050 /* nbfp now includes the 6.0 derivative prefactor */
1051 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1053 else
1055 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1056 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1057 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1059 npair += npair_ij;
1062 sfree(typecount);
1063 /* Subtract the excluded pairs.
1064 * The main reason for substracting exclusions is that in some cases
1065 * some combinations might never occur and the parameters could have
1066 * any value. These unused values should not influence the dispersion
1067 * correction.
1069 for (mb = 0; mb < mtop->nmolblock; mb++)
1071 nmol = mtop->molblock[mb].nmol;
1072 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1073 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1074 for (i = 0; (i < atoms->nr); i++)
1076 if (q == 0)
1078 tpi = atoms->atom[i].type;
1080 else
1082 tpi = atoms->atom[i].typeB;
1084 j1 = excl->index[i];
1085 j2 = excl->index[i+1];
1086 for (j = j1; j < j2; j++)
1088 k = excl->a[j];
1089 if (k > i)
1091 if (q == 0)
1093 tpj = atoms->atom[k].type;
1095 else
1097 tpj = atoms->atom[k].typeB;
1099 if (bBHAM)
1101 /* nbfp now includes the 6.0 derivative prefactor */
1102 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1104 else
1106 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1107 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1108 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1110 nexcl += nmol;
1116 else
1118 /* Only correct for the interaction of the test particle
1119 * with the rest of the system.
1121 atoms_tpi =
1122 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1124 npair = 0;
1125 for (mb = 0; mb < mtop->nmolblock; mb++)
1127 nmol = mtop->molblock[mb].nmol;
1128 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1129 for (j = 0; j < atoms->nr; j++)
1131 nmolc = nmol;
1132 /* Remove the interaction of the test charge group
1133 * with itself.
1135 if (mb == mtop->nmolblock-1)
1137 nmolc--;
1139 if (mb == 0 && nmol == 1)
1141 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1144 if (q == 0)
1146 tpj = atoms->atom[j].type;
1148 else
1150 tpj = atoms->atom[j].typeB;
1152 for (i = 0; i < fr->n_tpi; i++)
1154 if (q == 0)
1156 tpi = atoms_tpi->atom[i].type;
1158 else
1160 tpi = atoms_tpi->atom[i].typeB;
1162 if (bBHAM)
1164 /* nbfp now includes the 6.0 derivative prefactor */
1165 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1167 else
1169 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1170 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1171 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1173 npair += nmolc;
1178 if (npair - nexcl <= 0 && fplog)
1180 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1181 csix = 0;
1182 ctwelve = 0;
1184 else
1186 csix /= npair - nexcl;
1187 ctwelve /= npair - nexcl;
1189 if (debug)
1191 fprintf(debug, "Counted %d exclusions\n", nexcl);
1192 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1193 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1195 fr->avcsix[q] = csix;
1196 fr->avctwelve[q] = ctwelve;
1199 if (EVDW_PME(fr->vdwtype))
1201 sfree(nbfp_comb);
1204 if (fplog != NULL)
1206 if (fr->eDispCorr == edispcAllEner ||
1207 fr->eDispCorr == edispcAllEnerPres)
1209 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1210 fr->avcsix[0], fr->avctwelve[0]);
1212 else
1214 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1220 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1221 const gmx_mtop_t *mtop)
1223 const t_atoms *at1, *at2;
1224 int mt1, mt2, i, j, tpi, tpj, ntypes;
1225 real b, bmin;
1226 real *nbfp;
1228 if (fplog)
1230 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1232 nbfp = fr->nbfp;
1233 ntypes = fr->ntype;
1235 bmin = -1;
1236 fr->bham_b_max = 0;
1237 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1239 at1 = &mtop->moltype[mt1].atoms;
1240 for (i = 0; (i < at1->nr); i++)
1242 tpi = at1->atom[i].type;
1243 if (tpi >= ntypes)
1245 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1248 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1250 at2 = &mtop->moltype[mt2].atoms;
1251 for (j = 0; (j < at2->nr); j++)
1253 tpj = at2->atom[j].type;
1254 if (tpj >= ntypes)
1256 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1258 b = BHAMB(nbfp, ntypes, tpi, tpj);
1259 if (b > fr->bham_b_max)
1261 fr->bham_b_max = b;
1263 if ((b < bmin) || (bmin == -1))
1265 bmin = b;
1271 if (fplog)
1273 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1274 bmin, fr->bham_b_max);
1278 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1279 t_forcerec *fr, real rtab,
1280 const t_commrec *cr,
1281 const char *tabfn, char *eg1, char *eg2,
1282 t_nblists *nbl)
1284 char buf[STRLEN];
1285 int i, j;
1287 if (tabfn == NULL)
1289 if (debug)
1291 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1293 return;
1296 sprintf(buf, "%s", tabfn);
1297 if (eg1 && eg2)
1299 /* Append the two energy group names */
1300 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1301 eg1, eg2, ftp2ext(efXVG));
1303 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1304 /* Copy the contents of the table to separate coulomb and LJ tables too,
1305 * to improve cache performance.
1307 /* For performance reasons we want
1308 * the table data to be aligned to 16-byte. The pointers could be freed
1309 * but currently aren't.
1311 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1312 nbl->table_elec.format = nbl->table_elec_vdw.format;
1313 nbl->table_elec.r = nbl->table_elec_vdw.r;
1314 nbl->table_elec.n = nbl->table_elec_vdw.n;
1315 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1316 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1317 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1318 nbl->table_elec.ninteractions = 1;
1319 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1320 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1322 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1323 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1324 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1325 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1326 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1327 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1328 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1329 nbl->table_vdw.ninteractions = 2;
1330 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1331 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1333 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1335 for (j = 0; j < 4; j++)
1337 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1339 for (j = 0; j < 8; j++)
1341 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1346 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1347 int *ncount, int **count)
1349 const gmx_moltype_t *molt;
1350 const t_ilist *il;
1351 int mt, ftype, stride, i, j, tabnr;
1353 for (mt = 0; mt < mtop->nmoltype; mt++)
1355 molt = &mtop->moltype[mt];
1356 for (ftype = 0; ftype < F_NRE; ftype++)
1358 if (ftype == ftype1 || ftype == ftype2)
1360 il = &molt->ilist[ftype];
1361 stride = 1 + NRAL(ftype);
1362 for (i = 0; i < il->nr; i += stride)
1364 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1365 if (tabnr < 0)
1367 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1369 if (tabnr >= *ncount)
1371 srenew(*count, tabnr+1);
1372 for (j = *ncount; j < tabnr+1; j++)
1374 (*count)[j] = 0;
1376 *ncount = tabnr+1;
1378 (*count)[tabnr]++;
1385 static bondedtable_t *make_bonded_tables(FILE *fplog,
1386 int ftype1, int ftype2,
1387 const gmx_mtop_t *mtop,
1388 const char *basefn, const char *tabext)
1390 int i, ncount, *count;
1391 char tabfn[STRLEN];
1392 bondedtable_t *tab;
1394 tab = NULL;
1396 ncount = 0;
1397 count = NULL;
1398 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1400 if (ncount > 0)
1402 snew(tab, ncount);
1403 for (i = 0; i < ncount; i++)
1405 if (count[i] > 0)
1407 sprintf(tabfn, "%s", basefn);
1408 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1409 tabext, i, ftp2ext(efXVG));
1410 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1413 sfree(count);
1416 return tab;
1419 void forcerec_set_ranges(t_forcerec *fr,
1420 int ncg_home, int ncg_force,
1421 int natoms_force,
1422 int natoms_force_constr, int natoms_f_novirsum)
1424 fr->cg0 = 0;
1425 fr->hcg = ncg_home;
1427 /* fr->ncg_force is unused in the standard code,
1428 * but it can be useful for modified code dealing with charge groups.
1430 fr->ncg_force = ncg_force;
1431 fr->natoms_force = natoms_force;
1432 fr->natoms_force_constr = natoms_force_constr;
1434 if (fr->natoms_force_constr > fr->nalloc_force)
1436 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1438 if (fr->bTwinRange)
1440 srenew(fr->f_twin, fr->nalloc_force);
1444 if (fr->bF_NoVirSum)
1446 fr->f_novirsum_n = natoms_f_novirsum;
1447 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1449 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1450 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1453 else
1455 fr->f_novirsum_n = 0;
1459 static real cutoff_inf(real cutoff)
1461 if (cutoff == 0)
1463 cutoff = GMX_CUTOFF_INF;
1466 return cutoff;
1469 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1470 t_forcerec *fr, const t_inputrec *ir,
1471 const char *tabfn, const gmx_mtop_t *mtop,
1472 matrix box)
1474 char buf[STRLEN];
1475 int i, j;
1477 if (tabfn == NULL)
1479 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1480 return;
1483 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1485 sprintf(buf, "%s", tabfn);
1486 for (i = 0; i < ir->adress->n_tf_grps; i++)
1488 j = ir->adress->tf_table_index[i]; /* get energy group index */
1489 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1490 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1491 if (fp)
1493 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1495 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1500 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1502 gmx_bool bAllvsAll;
1504 bAllvsAll =
1506 ir->rlist == 0 &&
1507 ir->rcoulomb == 0 &&
1508 ir->rvdw == 0 &&
1509 ir->ePBC == epbcNONE &&
1510 ir->vdwtype == evdwCUT &&
1511 ir->coulombtype == eelCUT &&
1512 ir->efep == efepNO &&
1513 (ir->implicit_solvent == eisNO ||
1514 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1515 ir->gb_algorithm == egbHCT ||
1516 ir->gb_algorithm == egbOBC))) &&
1517 getenv("GMX_NO_ALLVSALL") == NULL
1520 if (bAllvsAll && ir->opts.ngener > 1)
1522 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1524 if (bPrintNote)
1526 if (MASTER(cr))
1528 fprintf(stderr, "\n%s\n", note);
1530 if (fp != NULL)
1532 fprintf(fp, "\n%s\n", note);
1535 bAllvsAll = FALSE;
1538 if (bAllvsAll && fp && MASTER(cr))
1540 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1543 return bAllvsAll;
1547 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1549 int t, i;
1551 /* These thread local data structures are used for bondeds only */
1552 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1554 if (fr->nthreads > 1)
1556 snew(fr->f_t, fr->nthreads);
1557 /* Thread 0 uses the global force and energy arrays */
1558 for (t = 1; t < fr->nthreads; t++)
1560 fr->f_t[t].f = NULL;
1561 fr->f_t[t].f_nalloc = 0;
1562 snew(fr->f_t[t].fshift, SHIFTS);
1563 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1564 for (i = 0; i < egNR; i++)
1566 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1573 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1574 const t_commrec *cr,
1575 const t_inputrec *ir,
1576 gmx_bool bGPU)
1578 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1580 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1581 bGPU ? "GPUs" : "SIMD kernels",
1582 bGPU ? "CPU only" : "plain-C kernels");
1583 return FALSE;
1586 return TRUE;
1590 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1591 int *kernel_type,
1592 int *ewald_excl)
1594 *kernel_type = nbnxnk4x4_PlainC;
1595 *ewald_excl = ewaldexclTable;
1597 #ifdef GMX_NBNXN_SIMD
1599 #ifdef GMX_NBNXN_SIMD_4XN
1600 *kernel_type = nbnxnk4xN_SIMD_4xN;
1601 #endif
1602 #ifdef GMX_NBNXN_SIMD_2XNN
1603 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1604 #endif
1606 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1607 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1608 * Currently this is based on the SIMD acceleration choice,
1609 * but it might be better to decide this at runtime based on CPU.
1611 * 4xN calculates more (zero) interactions, but has less pair-search
1612 * work and much better kernel instruction scheduling.
1614 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1615 * which doesn't have FMA, both the analytical and tabulated Ewald
1616 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1617 * 2x(4+4) because it results in significantly fewer pairs.
1618 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1619 * 10% with HT, 50% without HT. As we currently don't detect the actual
1620 * use of HT, use 4x8 to avoid a potential performance hit.
1621 * On Intel Haswell 4x8 is always faster.
1623 *kernel_type = nbnxnk4xN_SIMD_4xN;
1625 #ifndef GMX_SIMD_HAVE_FMA
1626 if (EEL_PME_EWALD(ir->coulombtype) ||
1627 EVDW_PME(ir->vdwtype))
1629 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1630 * There are enough instructions to make 2x(4+4) efficient.
1632 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1634 #endif
1635 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1638 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1640 #ifdef GMX_NBNXN_SIMD_4XN
1641 *kernel_type = nbnxnk4xN_SIMD_4xN;
1642 #else
1643 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1644 #endif
1646 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1648 #ifdef GMX_NBNXN_SIMD_2XNN
1649 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1650 #else
1651 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1652 #endif
1655 /* Analytical Ewald exclusion correction is only an option in
1656 * the SIMD kernel.
1657 * Since table lookup's don't parallelize with SIMD, analytical
1658 * will probably always be faster for a SIMD width of 8 or more.
1659 * With FMA analytical is sometimes faster for a width if 4 as well.
1660 * On BlueGene/Q, this is faster regardless of precision.
1661 * In single precision, this is faster on Bulldozer.
1663 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1664 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1665 defined GMX_SIMD_IBM_QPX
1666 *ewald_excl = ewaldexclAnalytical;
1667 #endif
1668 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1670 *ewald_excl = ewaldexclTable;
1672 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1674 *ewald_excl = ewaldexclAnalytical;
1678 #endif /* GMX_NBNXN_SIMD */
1682 const char *lookup_nbnxn_kernel_name(int kernel_type)
1684 const char *returnvalue = NULL;
1685 switch (kernel_type)
1687 case nbnxnkNotSet:
1688 returnvalue = "not set";
1689 break;
1690 case nbnxnk4x4_PlainC:
1691 returnvalue = "plain C";
1692 break;
1693 case nbnxnk4xN_SIMD_4xN:
1694 case nbnxnk4xN_SIMD_2xNN:
1695 #ifdef GMX_NBNXN_SIMD
1696 #if defined GMX_SIMD_X86_SSE2
1697 returnvalue = "SSE2";
1698 #elif defined GMX_SIMD_X86_SSE4_1
1699 returnvalue = "SSE4.1";
1700 #elif defined GMX_SIMD_X86_AVX_128_FMA
1701 returnvalue = "AVX_128_FMA";
1702 #elif defined GMX_SIMD_X86_AVX_256
1703 returnvalue = "AVX_256";
1704 #elif defined GMX_SIMD_X86_AVX2_256
1705 returnvalue = "AVX2_256";
1706 #else
1707 returnvalue = "SIMD";
1708 #endif
1709 #else /* GMX_NBNXN_SIMD */
1710 returnvalue = "not available";
1711 #endif /* GMX_NBNXN_SIMD */
1712 break;
1713 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1714 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1716 case nbnxnkNR:
1717 default:
1718 gmx_fatal(FARGS, "Illegal kernel type selected");
1719 returnvalue = NULL;
1720 break;
1722 return returnvalue;
1725 static void pick_nbnxn_kernel(FILE *fp,
1726 const t_commrec *cr,
1727 gmx_bool use_simd_kernels,
1728 gmx_bool bUseGPU,
1729 gmx_bool bEmulateGPU,
1730 const t_inputrec *ir,
1731 int *kernel_type,
1732 int *ewald_excl,
1733 gmx_bool bDoNonbonded)
1735 assert(kernel_type);
1737 *kernel_type = nbnxnkNotSet;
1738 *ewald_excl = ewaldexclTable;
1740 if (bEmulateGPU)
1742 *kernel_type = nbnxnk8x8x8_PlainC;
1744 if (bDoNonbonded)
1746 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1749 else if (bUseGPU)
1751 *kernel_type = nbnxnk8x8x8_CUDA;
1754 if (*kernel_type == nbnxnkNotSet)
1756 /* LJ PME with LB combination rule does 7 mesh operations.
1757 * This so slow that we don't compile SIMD non-bonded kernels for that.
1759 if (use_simd_kernels &&
1760 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1762 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1764 else
1766 *kernel_type = nbnxnk4x4_PlainC;
1770 if (bDoNonbonded && fp != NULL)
1772 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1773 lookup_nbnxn_kernel_name(*kernel_type),
1774 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1775 nbnxn_kernel_to_cj_size(*kernel_type));
1777 if (nbnxnk4x4_PlainC == *kernel_type ||
1778 nbnxnk8x8x8_PlainC == *kernel_type)
1780 md_print_warn(cr, fp,
1781 "WARNING: Using the slow %s kernels. This should\n"
1782 "not happen during routine usage on supported platforms.\n\n",
1783 lookup_nbnxn_kernel_name(*kernel_type));
1788 static void pick_nbnxn_resources(const t_commrec *cr,
1789 const gmx_hw_info_t *hwinfo,
1790 gmx_bool bDoNonbonded,
1791 gmx_bool *bUseGPU,
1792 gmx_bool *bEmulateGPU,
1793 const gmx_gpu_opt_t *gpu_opt)
1795 gmx_bool bEmulateGPUEnvVarSet;
1796 char gpu_err_str[STRLEN];
1798 *bUseGPU = FALSE;
1800 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1802 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1803 * GPUs (currently) only handle non-bonded calculations, we will
1804 * automatically switch to emulation if non-bonded calculations are
1805 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1806 * way to turn off GPU initialization, data movement, and cleanup.
1808 * GPU emulation can be useful to assess the performance one can expect by
1809 * adding GPU(s) to the machine. The conditional below allows this even
1810 * if mdrun is compiled without GPU acceleration support.
1811 * Note that you should freezing the system as otherwise it will explode.
1813 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1814 (!bDoNonbonded &&
1815 gpu_opt->ncuda_dev_use > 0));
1817 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1819 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1821 /* Each PP node will use the intra-node id-th device from the
1822 * list of detected/selected GPUs. */
1823 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1824 &hwinfo->gpu_info, gpu_opt))
1826 /* At this point the init should never fail as we made sure that
1827 * we have all the GPUs we need. If it still does, we'll bail. */
1828 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1829 cr->nodeid,
1830 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1831 cr->rank_pp_intranode),
1832 gpu_err_str);
1835 /* Here we actually turn on hardware GPU acceleration */
1836 *bUseGPU = TRUE;
1840 gmx_bool uses_simple_tables(int cutoff_scheme,
1841 nonbonded_verlet_t *nbv,
1842 int group)
1844 gmx_bool bUsesSimpleTables = TRUE;
1845 int grp_index;
1847 switch (cutoff_scheme)
1849 case ecutsGROUP:
1850 bUsesSimpleTables = TRUE;
1851 break;
1852 case ecutsVERLET:
1853 assert(NULL != nbv && NULL != nbv->grp);
1854 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1855 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1856 break;
1857 default:
1858 gmx_incons("unimplemented");
1860 return bUsesSimpleTables;
1863 static void init_ewald_f_table(interaction_const_t *ic,
1864 gmx_bool bUsesSimpleTables,
1865 real rtab)
1867 real maxr;
1869 if (bUsesSimpleTables)
1871 /* Get the Ewald table spacing based on Coulomb and/or LJ
1872 * Ewald coefficients and rtol.
1874 ic->tabq_scale = ewald_spline3_table_scale(ic);
1876 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1877 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1879 else
1881 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1882 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1883 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1886 sfree_aligned(ic->tabq_coul_FDV0);
1887 sfree_aligned(ic->tabq_coul_F);
1888 sfree_aligned(ic->tabq_coul_V);
1890 sfree_aligned(ic->tabq_vdw_FDV0);
1891 sfree_aligned(ic->tabq_vdw_F);
1892 sfree_aligned(ic->tabq_vdw_V);
1894 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1896 /* Create the original table data in FDV0 */
1897 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1898 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1899 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1900 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1901 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1904 if (EVDW_PME(ic->vdwtype))
1906 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1907 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1908 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1909 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1910 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1914 void init_interaction_const_tables(FILE *fp,
1915 interaction_const_t *ic,
1916 gmx_bool bUsesSimpleTables,
1917 real rtab)
1919 real spacing;
1921 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1923 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1925 if (fp != NULL)
1927 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1928 1/ic->tabq_scale, ic->tabq_size);
1933 static void clear_force_switch_constants(shift_consts_t *sc)
1935 sc->c2 = 0;
1936 sc->c3 = 0;
1937 sc->cpot = 0;
1940 static void force_switch_constants(real p,
1941 real rsw, real rc,
1942 shift_consts_t *sc)
1944 /* Here we determine the coefficient for shifting the force to zero
1945 * between distance rsw and the cut-off rc.
1946 * For a potential of r^-p, we have force p*r^-(p+1).
1947 * But to save flops we absorb p in the coefficient.
1948 * Thus we get:
1949 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1950 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1952 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1953 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1954 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1957 static void potential_switch_constants(real rsw, real rc,
1958 switch_consts_t *sc)
1960 /* The switch function is 1 at rsw and 0 at rc.
1961 * The derivative and second derivate are zero at both ends.
1962 * rsw = max(r - r_switch, 0)
1963 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1964 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1965 * force = force*dsw - potential*sw
1966 * potential *= sw
1968 sc->c3 = -10*pow(rc - rsw, -3);
1969 sc->c4 = 15*pow(rc - rsw, -4);
1970 sc->c5 = -6*pow(rc - rsw, -5);
1973 static void
1974 init_interaction_const(FILE *fp,
1975 const t_commrec gmx_unused *cr,
1976 interaction_const_t **interaction_const,
1977 const t_forcerec *fr,
1978 real rtab)
1980 interaction_const_t *ic;
1981 gmx_bool bUsesSimpleTables = TRUE;
1983 snew(ic, 1);
1985 /* Just allocate something so we can free it */
1986 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1987 snew_aligned(ic->tabq_coul_F, 16, 32);
1988 snew_aligned(ic->tabq_coul_V, 16, 32);
1990 ic->rlist = fr->rlist;
1991 ic->rlistlong = fr->rlistlong;
1993 /* Lennard-Jones */
1994 ic->vdwtype = fr->vdwtype;
1995 ic->vdw_modifier = fr->vdw_modifier;
1996 ic->rvdw = fr->rvdw;
1997 ic->rvdw_switch = fr->rvdw_switch;
1998 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1999 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
2000 ic->sh_lj_ewald = 0;
2001 clear_force_switch_constants(&ic->dispersion_shift);
2002 clear_force_switch_constants(&ic->repulsion_shift);
2004 switch (ic->vdw_modifier)
2006 case eintmodPOTSHIFT:
2007 /* Only shift the potential, don't touch the force */
2008 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2009 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2010 if (EVDW_PME(ic->vdwtype))
2012 real crc2;
2014 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2015 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2017 break;
2018 case eintmodFORCESWITCH:
2019 /* Switch the force, switch and shift the potential */
2020 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2021 &ic->dispersion_shift);
2022 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2023 &ic->repulsion_shift);
2024 break;
2025 case eintmodPOTSWITCH:
2026 /* Switch the potential and force */
2027 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2028 &ic->vdw_switch);
2029 break;
2030 case eintmodNONE:
2031 case eintmodEXACTCUTOFF:
2032 /* Nothing to do here */
2033 break;
2034 default:
2035 gmx_incons("unimplemented potential modifier");
2038 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2040 /* Electrostatics */
2041 ic->eeltype = fr->eeltype;
2042 ic->coulomb_modifier = fr->coulomb_modifier;
2043 ic->rcoulomb = fr->rcoulomb;
2044 ic->epsilon_r = fr->epsilon_r;
2045 ic->epsfac = fr->epsfac;
2046 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2048 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2050 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2052 else
2054 ic->sh_ewald = 0;
2057 /* Reaction-field */
2058 if (EEL_RF(ic->eeltype))
2060 ic->epsilon_rf = fr->epsilon_rf;
2061 ic->k_rf = fr->k_rf;
2062 ic->c_rf = fr->c_rf;
2064 else
2066 /* For plain cut-off we might use the reaction-field kernels */
2067 ic->epsilon_rf = ic->epsilon_r;
2068 ic->k_rf = 0;
2069 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2071 ic->c_rf = 1/ic->rcoulomb;
2073 else
2075 ic->c_rf = 0;
2079 if (fp != NULL)
2081 real dispersion_shift;
2083 dispersion_shift = ic->dispersion_shift.cpot;
2084 if (EVDW_PME(ic->vdwtype))
2086 dispersion_shift -= ic->sh_lj_ewald;
2088 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2089 ic->repulsion_shift.cpot, dispersion_shift);
2091 if (ic->eeltype == eelCUT)
2093 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2095 else if (EEL_PME(ic->eeltype))
2097 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2099 fprintf(fp, "\n");
2102 *interaction_const = ic;
2104 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2106 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2108 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2109 * also sharing texture references. To keep the code simple, we don't
2110 * treat texture references as shared resources, but this means that
2111 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2112 * Hence, to ensure that the non-bonded kernels don't start before all
2113 * texture binding operations are finished, we need to wait for all ranks
2114 * to arrive here before continuing.
2116 * Note that we could omit this barrier if GPUs are not shared (or
2117 * texture objects are used), but as this is initialization code, there
2118 * is not point in complicating things.
2120 #ifdef GMX_THREAD_MPI
2121 if (PAR(cr))
2123 gmx_barrier(cr);
2125 #endif /* GMX_THREAD_MPI */
2128 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2129 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2132 static void init_nb_verlet(FILE *fp,
2133 nonbonded_verlet_t **nb_verlet,
2134 gmx_bool bFEP_NonBonded,
2135 const t_inputrec *ir,
2136 const t_forcerec *fr,
2137 const t_commrec *cr,
2138 const char *nbpu_opt)
2140 nonbonded_verlet_t *nbv;
2141 int i;
2142 char *env;
2143 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2145 nbnxn_alloc_t *nb_alloc;
2146 nbnxn_free_t *nb_free;
2148 snew(nbv, 1);
2150 pick_nbnxn_resources(cr, fr->hwinfo,
2151 fr->bNonbonded,
2152 &nbv->bUseGPU,
2153 &bEmulateGPU,
2154 fr->gpu_opt);
2156 nbv->nbs = NULL;
2158 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2159 for (i = 0; i < nbv->ngrp; i++)
2161 nbv->grp[i].nbl_lists.nnbl = 0;
2162 nbv->grp[i].nbat = NULL;
2163 nbv->grp[i].kernel_type = nbnxnkNotSet;
2165 if (i == 0) /* local */
2167 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2168 nbv->bUseGPU, bEmulateGPU, ir,
2169 &nbv->grp[i].kernel_type,
2170 &nbv->grp[i].ewald_excl,
2171 fr->bNonbonded);
2173 else /* non-local */
2175 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2177 /* Use GPU for local, select a CPU kernel for non-local */
2178 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2179 FALSE, FALSE, ir,
2180 &nbv->grp[i].kernel_type,
2181 &nbv->grp[i].ewald_excl,
2182 fr->bNonbonded);
2184 bHybridGPURun = TRUE;
2186 else
2188 /* Use the same kernel for local and non-local interactions */
2189 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2190 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2195 if (nbv->bUseGPU)
2197 /* init the NxN GPU data; the last argument tells whether we'll have
2198 * both local and non-local NB calculation on GPU */
2199 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2200 &fr->hwinfo->gpu_info, fr->gpu_opt,
2201 cr->rank_pp_intranode,
2202 (nbv->ngrp > 1) && !bHybridGPURun);
2204 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2206 char *end;
2208 nbv->min_ci_balanced = strtol(env, &end, 10);
2209 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2211 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2214 if (debug)
2216 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2217 nbv->min_ci_balanced);
2220 else
2222 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2223 if (debug)
2225 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2226 nbv->min_ci_balanced);
2230 else
2232 nbv->min_ci_balanced = 0;
2235 *nb_verlet = nbv;
2237 nbnxn_init_search(&nbv->nbs,
2238 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2239 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2240 bFEP_NonBonded,
2241 gmx_omp_nthreads_get(emntNonbonded));
2243 for (i = 0; i < nbv->ngrp; i++)
2245 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2247 nb_alloc = &pmalloc;
2248 nb_free = &pfree;
2250 else
2252 nb_alloc = NULL;
2253 nb_free = NULL;
2256 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2257 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2258 /* 8x8x8 "non-simple" lists are ATM always combined */
2259 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2260 nb_alloc, nb_free);
2262 if (i == 0 ||
2263 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2265 gmx_bool bSimpleList;
2266 int enbnxninitcombrule;
2268 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2270 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2272 /* Plain LJ cut-off: we can optimize with combination rules */
2273 enbnxninitcombrule = enbnxninitcombruleDETECT;
2275 else if (fr->vdwtype == evdwPME)
2277 /* LJ-PME: we need to use a combination rule for the grid */
2278 if (fr->ljpme_combination_rule == eljpmeGEOM)
2280 enbnxninitcombrule = enbnxninitcombruleGEOM;
2282 else
2284 enbnxninitcombrule = enbnxninitcombruleLB;
2287 else
2289 /* We use a full combination matrix: no rule required */
2290 enbnxninitcombrule = enbnxninitcombruleNONE;
2294 snew(nbv->grp[i].nbat, 1);
2295 nbnxn_atomdata_init(fp,
2296 nbv->grp[i].nbat,
2297 nbv->grp[i].kernel_type,
2298 enbnxninitcombrule,
2299 fr->ntype, fr->nbfp,
2300 ir->opts.ngener,
2301 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2302 nb_alloc, nb_free);
2304 else
2306 nbv->grp[i].nbat = nbv->grp[0].nbat;
2311 void init_forcerec(FILE *fp,
2312 const output_env_t oenv,
2313 t_forcerec *fr,
2314 t_fcdata *fcd,
2315 const t_inputrec *ir,
2316 const gmx_mtop_t *mtop,
2317 const t_commrec *cr,
2318 matrix box,
2319 const char *tabfn,
2320 const char *tabafn,
2321 const char *tabpfn,
2322 const char *tabbfn,
2323 const char *nbpu_opt,
2324 gmx_bool bNoSolvOpt,
2325 real print_force)
2327 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2328 real rtab;
2329 char *env;
2330 double dbl;
2331 const t_block *cgs;
2332 gmx_bool bGenericKernelOnly;
2333 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2334 gmx_bool bFEP_NonBonded;
2335 t_nblists *nbl;
2336 int *nm_ind, egp_flags;
2338 if (fr->hwinfo == NULL)
2340 /* Detect hardware, gather information.
2341 * In mdrun, hwinfo has already been set before calling init_forcerec.
2342 * Here we ignore GPUs, as tools will not use them anyhow.
2344 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2347 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2348 fr->use_simd_kernels = TRUE;
2350 fr->bDomDec = DOMAINDECOMP(cr);
2352 natoms = mtop->natoms;
2354 if (check_box(ir->ePBC, box))
2356 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2359 /* Test particle insertion ? */
2360 if (EI_TPI(ir->eI))
2362 /* Set to the size of the molecule to be inserted (the last one) */
2363 /* Because of old style topologies, we have to use the last cg
2364 * instead of the last molecule type.
2366 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2367 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2368 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2370 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2373 else
2375 fr->n_tpi = 0;
2378 /* Copy AdResS parameters */
2379 if (ir->bAdress)
2381 fr->adress_type = ir->adress->type;
2382 fr->adress_const_wf = ir->adress->const_wf;
2383 fr->adress_ex_width = ir->adress->ex_width;
2384 fr->adress_hy_width = ir->adress->hy_width;
2385 fr->adress_icor = ir->adress->icor;
2386 fr->adress_site = ir->adress->site;
2387 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2388 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2391 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2392 for (i = 0; i < ir->adress->n_energy_grps; i++)
2394 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2397 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2398 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2399 for (i = 0; i < fr->n_adress_tf_grps; i++)
2401 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2403 copy_rvec(ir->adress->refs, fr->adress_refs);
2405 else
2407 fr->adress_type = eAdressOff;
2408 fr->adress_do_hybridpairs = FALSE;
2411 /* Copy the user determined parameters */
2412 fr->userint1 = ir->userint1;
2413 fr->userint2 = ir->userint2;
2414 fr->userint3 = ir->userint3;
2415 fr->userint4 = ir->userint4;
2416 fr->userreal1 = ir->userreal1;
2417 fr->userreal2 = ir->userreal2;
2418 fr->userreal3 = ir->userreal3;
2419 fr->userreal4 = ir->userreal4;
2421 /* Shell stuff */
2422 fr->fc_stepsize = ir->fc_stepsize;
2424 /* Free energy */
2425 fr->efep = ir->efep;
2426 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2427 if (ir->fepvals->bScCoul)
2429 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2430 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2432 else
2434 fr->sc_alphacoul = 0;
2435 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2437 fr->sc_power = ir->fepvals->sc_power;
2438 fr->sc_r_power = ir->fepvals->sc_r_power;
2439 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2441 env = getenv("GMX_SCSIGMA_MIN");
2442 if (env != NULL)
2444 dbl = 0;
2445 sscanf(env, "%lf", &dbl);
2446 fr->sc_sigma6_min = pow(dbl, 6);
2447 if (fp)
2449 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2453 fr->bNonbonded = TRUE;
2454 if (getenv("GMX_NO_NONBONDED") != NULL)
2456 /* turn off non-bonded calculations */
2457 fr->bNonbonded = FALSE;
2458 md_print_warn(cr, fp,
2459 "Found environment variable GMX_NO_NONBONDED.\n"
2460 "Disabling nonbonded calculations.\n");
2463 bGenericKernelOnly = FALSE;
2465 /* We now check in the NS code whether a particular combination of interactions
2466 * can be used with water optimization, and disable it if that is not the case.
2469 if (getenv("GMX_NB_GENERIC") != NULL)
2471 if (fp != NULL)
2473 fprintf(fp,
2474 "Found environment variable GMX_NB_GENERIC.\n"
2475 "Disabling all interaction-specific nonbonded kernels, will only\n"
2476 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2478 bGenericKernelOnly = TRUE;
2481 if (bGenericKernelOnly == TRUE)
2483 bNoSolvOpt = TRUE;
2486 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2488 fr->use_simd_kernels = FALSE;
2489 if (fp != NULL)
2491 fprintf(fp,
2492 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2493 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2497 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2499 /* Check if we can/should do all-vs-all kernels */
2500 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2501 fr->AllvsAll_work = NULL;
2502 fr->AllvsAll_workgb = NULL;
2504 /* All-vs-all kernels have not been implemented in 4.6, and
2505 * the SIMD group kernels are also buggy in this case. Non-SIMD
2506 * group kernels are OK. See Redmine #1249. */
2507 if (fr->bAllvsAll)
2509 fr->bAllvsAll = FALSE;
2510 fr->use_simd_kernels = FALSE;
2511 if (fp != NULL)
2513 fprintf(fp,
2514 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2515 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2516 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2517 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2518 "we are proceeding by disabling all CPU architecture-specific\n"
2519 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2520 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2524 /* Neighbour searching stuff */
2525 fr->cutoff_scheme = ir->cutoff_scheme;
2526 fr->bGrid = (ir->ns_type == ensGRID);
2527 fr->ePBC = ir->ePBC;
2529 if (fr->cutoff_scheme == ecutsGROUP)
2531 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2532 "removed in a future release when 'verlet' supports all interaction forms.\n";
2534 if (MASTER(cr))
2536 fprintf(stderr, "\n%s\n", note);
2538 if (fp != NULL)
2540 fprintf(fp, "\n%s\n", note);
2544 /* Determine if we will do PBC for distances in bonded interactions */
2545 if (fr->ePBC == epbcNONE)
2547 fr->bMolPBC = FALSE;
2549 else
2551 if (!DOMAINDECOMP(cr))
2553 /* The group cut-off scheme and SHAKE assume charge groups
2554 * are whole, but not using molpbc is faster in most cases.
2556 if (fr->cutoff_scheme == ecutsGROUP ||
2557 (ir->eConstrAlg == econtSHAKE &&
2558 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2559 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2561 fr->bMolPBC = ir->bPeriodicMols;
2563 else
2565 fr->bMolPBC = TRUE;
2566 if (getenv("GMX_USE_GRAPH") != NULL)
2568 fr->bMolPBC = FALSE;
2569 if (fp)
2571 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2576 else
2578 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2581 fr->bGB = (ir->implicit_solvent == eisGBSA);
2583 fr->rc_scaling = ir->refcoord_scaling;
2584 copy_rvec(ir->posres_com, fr->posres_com);
2585 copy_rvec(ir->posres_comB, fr->posres_comB);
2586 fr->rlist = cutoff_inf(ir->rlist);
2587 fr->rlistlong = cutoff_inf(ir->rlistlong);
2588 fr->eeltype = ir->coulombtype;
2589 fr->vdwtype = ir->vdwtype;
2590 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2592 fr->coulomb_modifier = ir->coulomb_modifier;
2593 fr->vdw_modifier = ir->vdw_modifier;
2595 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2596 switch (fr->eeltype)
2598 case eelCUT:
2599 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2600 break;
2602 case eelRF:
2603 case eelGRF:
2604 case eelRF_NEC:
2605 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2606 break;
2608 case eelRF_ZERO:
2609 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2610 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2611 break;
2613 case eelSWITCH:
2614 case eelSHIFT:
2615 case eelUSER:
2616 case eelENCADSHIFT:
2617 case eelPMESWITCH:
2618 case eelPMEUSER:
2619 case eelPMEUSERSWITCH:
2620 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2621 break;
2623 case eelPME:
2624 case eelEWALD:
2625 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2626 break;
2628 default:
2629 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2630 break;
2633 /* Vdw: Translate from mdp settings to kernel format */
2634 switch (fr->vdwtype)
2636 case evdwCUT:
2637 if (fr->bBHAM)
2639 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2641 else
2643 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2645 break;
2646 case evdwPME:
2647 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2648 break;
2650 case evdwSWITCH:
2651 case evdwSHIFT:
2652 case evdwUSER:
2653 case evdwENCADSHIFT:
2654 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2655 break;
2657 default:
2658 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2659 break;
2662 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2663 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2664 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2666 fr->rvdw = cutoff_inf(ir->rvdw);
2667 fr->rvdw_switch = ir->rvdw_switch;
2668 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2669 fr->rcoulomb_switch = ir->rcoulomb_switch;
2671 fr->bTwinRange = fr->rlistlong > fr->rlist;
2672 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2674 fr->reppow = mtop->ffparams.reppow;
2676 if (ir->cutoff_scheme == ecutsGROUP)
2678 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2679 && !EVDW_PME(fr->vdwtype));
2680 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2681 fr->bcoultab = !(fr->eeltype == eelCUT ||
2682 fr->eeltype == eelEWALD ||
2683 fr->eeltype == eelPME ||
2684 fr->eeltype == eelRF ||
2685 fr->eeltype == eelRF_ZERO);
2687 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2688 * going to be faster to tabulate the interaction than calling the generic kernel.
2689 * However, if generic kernels have been requested we keep things analytically.
2691 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2692 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2693 bGenericKernelOnly == FALSE)
2695 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2697 fr->bcoultab = TRUE;
2698 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2699 * which would otherwise need two tables.
2703 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2704 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2705 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2706 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2708 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2710 fr->bcoultab = TRUE;
2714 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2716 fr->bcoultab = TRUE;
2718 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2720 fr->bvdwtab = TRUE;
2723 if (getenv("GMX_REQUIRE_TABLES"))
2725 fr->bvdwtab = TRUE;
2726 fr->bcoultab = TRUE;
2729 if (fp)
2731 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2732 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2735 if (fr->bvdwtab == TRUE)
2737 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2738 fr->nbkernel_vdw_modifier = eintmodNONE;
2740 if (fr->bcoultab == TRUE)
2742 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2743 fr->nbkernel_elec_modifier = eintmodNONE;
2747 if (ir->cutoff_scheme == ecutsVERLET)
2749 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2751 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2753 fr->bvdwtab = FALSE;
2754 fr->bcoultab = FALSE;
2757 /* Tables are used for direct ewald sum */
2758 if (fr->bEwald)
2760 if (EEL_PME(ir->coulombtype))
2762 if (fp)
2764 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2766 if (ir->coulombtype == eelP3M_AD)
2768 please_cite(fp, "Hockney1988");
2769 please_cite(fp, "Ballenegger2012");
2771 else
2773 please_cite(fp, "Essmann95a");
2776 if (ir->ewald_geometry == eewg3DC)
2778 if (fp)
2780 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2782 please_cite(fp, "In-Chul99a");
2785 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2786 init_ewald_tab(&(fr->ewald_table), ir, fp);
2787 if (fp)
2789 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2790 1/fr->ewaldcoeff_q);
2794 if (EVDW_PME(ir->vdwtype))
2796 if (fp)
2798 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2800 please_cite(fp, "Essmann95a");
2801 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2802 if (fp)
2804 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2805 1/fr->ewaldcoeff_lj);
2809 /* Electrostatics */
2810 fr->epsilon_r = ir->epsilon_r;
2811 fr->epsilon_rf = ir->epsilon_rf;
2812 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2814 /* Parameters for generalized RF */
2815 fr->zsquare = 0.0;
2816 fr->temp = 0.0;
2818 if (fr->eeltype == eelGRF)
2820 init_generalized_rf(fp, mtop, ir, fr);
2823 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2824 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2825 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2826 IR_ELEC_FIELD(*ir) ||
2827 (fr->adress_icor != eAdressICOff)
2830 if (fr->cutoff_scheme == ecutsGROUP &&
2831 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2833 /* Count the total number of charge groups */
2834 fr->cg_nalloc = ncg_mtop(mtop);
2835 srenew(fr->cg_cm, fr->cg_nalloc);
2837 if (fr->shift_vec == NULL)
2839 snew(fr->shift_vec, SHIFTS);
2842 if (fr->fshift == NULL)
2844 snew(fr->fshift, SHIFTS);
2847 if (fr->nbfp == NULL)
2849 fr->ntype = mtop->ffparams.atnr;
2850 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2851 if (EVDW_PME(fr->vdwtype))
2853 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2857 /* Copy the energy group exclusions */
2858 fr->egp_flags = ir->opts.egp_flags;
2860 /* Van der Waals stuff */
2861 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2863 if (fr->rvdw_switch >= fr->rvdw)
2865 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2866 fr->rvdw_switch, fr->rvdw);
2868 if (fp)
2870 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2871 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2872 fr->rvdw_switch, fr->rvdw);
2876 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2878 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2881 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2883 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2886 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2888 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2891 if (fp)
2893 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2894 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2897 fr->eDispCorr = ir->eDispCorr;
2898 if (ir->eDispCorr != edispcNO)
2900 set_avcsixtwelve(fp, fr, mtop);
2903 if (fr->bBHAM)
2905 set_bham_b_max(fp, fr, mtop);
2908 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2910 /* Copy the GBSA data (radius, volume and surftens for each
2911 * atomtype) from the topology atomtype section to forcerec.
2913 snew(fr->atype_radius, fr->ntype);
2914 snew(fr->atype_vol, fr->ntype);
2915 snew(fr->atype_surftens, fr->ntype);
2916 snew(fr->atype_gb_radius, fr->ntype);
2917 snew(fr->atype_S_hct, fr->ntype);
2919 if (mtop->atomtypes.nr > 0)
2921 for (i = 0; i < fr->ntype; i++)
2923 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2925 for (i = 0; i < fr->ntype; i++)
2927 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2929 for (i = 0; i < fr->ntype; i++)
2931 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2933 for (i = 0; i < fr->ntype; i++)
2935 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2937 for (i = 0; i < fr->ntype; i++)
2939 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2943 /* Generate the GB table if needed */
2944 if (fr->bGB)
2946 #ifdef GMX_DOUBLE
2947 fr->gbtabscale = 2000;
2948 #else
2949 fr->gbtabscale = 500;
2950 #endif
2952 fr->gbtabr = 100;
2953 fr->gbtab = make_gb_table(oenv, fr);
2955 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2957 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2958 if (!DOMAINDECOMP(cr))
2960 make_local_gb(cr, fr->born, ir->gb_algorithm);
2964 /* Set the charge scaling */
2965 if (fr->epsilon_r != 0)
2967 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2969 else
2971 /* eps = 0 is infinite dieletric: no coulomb interactions */
2972 fr->epsfac = 0;
2975 /* Reaction field constants */
2976 if (EEL_RF(fr->eeltype))
2978 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2979 fr->rcoulomb, fr->temp, fr->zsquare, box,
2980 &fr->kappa, &fr->k_rf, &fr->c_rf);
2983 /*This now calculates sum for q and c6*/
2984 set_chargesum(fp, fr, mtop);
2986 /* if we are using LR electrostatics, and they are tabulated,
2987 * the tables will contain modified coulomb interactions.
2988 * Since we want to use the non-shifted ones for 1-4
2989 * coulombic interactions, we must have an extra set of tables.
2992 /* Construct tables.
2993 * A little unnecessary to make both vdw and coul tables sometimes,
2994 * but what the heck... */
2996 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2997 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2999 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
3000 fr->coulomb_modifier != eintmodNONE ||
3001 fr->vdw_modifier != eintmodNONE ||
3002 fr->bBHAM || fr->bEwald) &&
3003 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
3004 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
3005 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
3007 negp_pp = ir->opts.ngener - ir->nwall;
3008 negptable = 0;
3009 if (!bMakeTables)
3011 bSomeNormalNbListsAreInUse = TRUE;
3012 fr->nnblists = 1;
3014 else
3016 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3017 for (egi = 0; egi < negp_pp; egi++)
3019 for (egj = egi; egj < negp_pp; egj++)
3021 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3022 if (!(egp_flags & EGP_EXCL))
3024 if (egp_flags & EGP_TABLE)
3026 negptable++;
3028 else
3030 bSomeNormalNbListsAreInUse = TRUE;
3035 if (bSomeNormalNbListsAreInUse)
3037 fr->nnblists = negptable + 1;
3039 else
3041 fr->nnblists = negptable;
3043 if (fr->nnblists > 1)
3045 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3049 if (ir->adress)
3051 fr->nnblists *= 2;
3054 snew(fr->nblists, fr->nnblists);
3056 /* This code automatically gives table length tabext without cut-off's,
3057 * in that case grompp should already have checked that we do not need
3058 * normal tables and we only generate tables for 1-4 interactions.
3060 rtab = ir->rlistlong + ir->tabext;
3062 if (bMakeTables)
3064 /* make tables for ordinary interactions */
3065 if (bSomeNormalNbListsAreInUse)
3067 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3068 if (ir->adress)
3070 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3072 if (!bMakeSeparate14Table)
3074 fr->tab14 = fr->nblists[0].table_elec_vdw;
3076 m = 1;
3078 else
3080 m = 0;
3082 if (negptable > 0)
3084 /* Read the special tables for certain energy group pairs */
3085 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3086 for (egi = 0; egi < negp_pp; egi++)
3088 for (egj = egi; egj < negp_pp; egj++)
3090 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3091 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3093 nbl = &(fr->nblists[m]);
3094 if (fr->nnblists > 1)
3096 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3098 /* Read the table file with the two energy groups names appended */
3099 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3100 *mtop->groups.grpname[nm_ind[egi]],
3101 *mtop->groups.grpname[nm_ind[egj]],
3102 &fr->nblists[m]);
3103 if (ir->adress)
3105 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3106 *mtop->groups.grpname[nm_ind[egi]],
3107 *mtop->groups.grpname[nm_ind[egj]],
3108 &fr->nblists[fr->nnblists/2+m]);
3110 m++;
3112 else if (fr->nnblists > 1)
3114 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3120 else if ((fr->eDispCorr != edispcNO) &&
3121 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3122 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3123 (fr->vdw_modifier == eintmodPOTSHIFT)))
3125 /* Tables might not be used for the potential modifier interactions per se, but
3126 * we still need them to evaluate switch/shift dispersion corrections in this case.
3128 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3131 if (bMakeSeparate14Table)
3133 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3134 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3135 GMX_MAKETABLES_14ONLY);
3138 /* Read AdResS Thermo Force table if needed */
3139 if (fr->adress_icor == eAdressICThermoForce)
3141 /* old todo replace */
3143 if (ir->adress->n_tf_grps > 0)
3145 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3148 else
3150 /* load the default table */
3151 snew(fr->atf_tabs, 1);
3152 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3156 /* Wall stuff */
3157 fr->nwall = ir->nwall;
3158 if (ir->nwall && ir->wall_type == ewtTABLE)
3160 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3163 if (fcd && tabbfn)
3165 fcd->bondtab = make_bonded_tables(fp,
3166 F_TABBONDS, F_TABBONDSNC,
3167 mtop, tabbfn, "b");
3168 fcd->angletab = make_bonded_tables(fp,
3169 F_TABANGLES, -1,
3170 mtop, tabbfn, "a");
3171 fcd->dihtab = make_bonded_tables(fp,
3172 F_TABDIHS, -1,
3173 mtop, tabbfn, "d");
3175 else
3177 if (debug)
3179 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3183 /* QM/MM initialization if requested
3185 if (ir->bQMMM)
3187 fprintf(stderr, "QM/MM calculation requested.\n");
3190 fr->bQMMM = ir->bQMMM;
3191 fr->qr = mk_QMMMrec();
3193 /* Set all the static charge group info */
3194 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3195 &bFEP_NonBonded,
3196 &fr->bExcl_IntraCGAll_InterCGNone);
3197 if (DOMAINDECOMP(cr))
3199 fr->cginfo = NULL;
3201 else
3203 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3206 if (!DOMAINDECOMP(cr))
3208 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3209 mtop->natoms, mtop->natoms, mtop->natoms);
3212 fr->print_force = print_force;
3215 /* coarse load balancing vars */
3216 fr->t_fnbf = 0.;
3217 fr->t_wait = 0.;
3218 fr->timesteps = 0;
3220 /* Initialize neighbor search */
3221 init_ns(fp, cr, &fr->ns, fr, mtop);
3223 if (cr->duty & DUTY_PP)
3225 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3227 if (ir->bAdress)
3229 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3234 /* Initialize the thread working data for bonded interactions */
3235 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3237 snew(fr->excl_load, fr->nthreads+1);
3239 if (fr->cutoff_scheme == ecutsVERLET)
3241 if (ir->rcoulomb != ir->rvdw)
3243 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3246 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3249 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3250 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3252 if (ir->eDispCorr != edispcNO)
3254 calc_enervirdiff(fp, ir->eDispCorr, fr);
3258 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3259 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3260 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3262 void pr_forcerec(FILE *fp, t_forcerec *fr)
3264 int i;
3266 pr_real(fp, fr->rlist);
3267 pr_real(fp, fr->rcoulomb);
3268 pr_real(fp, fr->fudgeQQ);
3269 pr_bool(fp, fr->bGrid);
3270 pr_bool(fp, fr->bTwinRange);
3271 /*pr_int(fp,fr->cg0);
3272 pr_int(fp,fr->hcg);*/
3273 for (i = 0; i < fr->nnblists; i++)
3275 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3277 pr_real(fp, fr->rcoulomb_switch);
3278 pr_real(fp, fr->rcoulomb);
3280 fflush(fp);
3283 void forcerec_set_excl_load(t_forcerec *fr,
3284 const gmx_localtop_t *top)
3286 const int *ind, *a;
3287 int t, i, j, ntot, n, ntarget;
3289 ind = top->excls.index;
3290 a = top->excls.a;
3292 ntot = 0;
3293 for (i = 0; i < top->excls.nr; i++)
3295 for (j = ind[i]; j < ind[i+1]; j++)
3297 if (a[j] > i)
3299 ntot++;
3304 fr->excl_load[0] = 0;
3305 n = 0;
3306 i = 0;
3307 for (t = 1; t <= fr->nthreads; t++)
3309 ntarget = (ntot*t)/fr->nthreads;
3310 while (i < top->excls.nr && n < ntarget)
3312 for (j = ind[i]; j < ind[i+1]; j++)
3314 if (a[j] > i)
3316 n++;
3319 i++;
3321 fr->excl_load[t] = i;