Move types/ishift.h to pbcutil/
[gromacs.git] / src / gromacs / mdlib / forcerec.c
blobc999e409144979472660e9954fed3ef6eae50806
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
41 #include <assert.h>
42 #include <math.h>
43 #include <string.h>
45 #include "typedefs.h"
46 #include "types/commrec.h"
47 #include "gromacs/math/vec.h"
48 #include "gromacs/math/utilities.h"
49 #include "macros.h"
50 #include "macros.h"
51 #include "gromacs/math/units.h"
52 #include "force.h"
53 #include "tables.h"
54 #include "nonbonded.h"
55 #include "names.h"
56 #include "network.h"
57 #include "ns.h"
58 #include "txtdump.h"
59 #include "coulomb.h"
60 #include "md_support.h"
61 #include "md_logging.h"
62 #include "domdec.h"
63 #include "qmmm.h"
64 #include "copyrite.h"
65 #include "mtop_util.h"
66 #include "nbnxn_simd.h"
67 #include "nbnxn_search.h"
68 #include "nbnxn_atomdata.h"
69 #include "nbnxn_consts.h"
70 #include "gmx_omp_nthreads.h"
71 #include "gmx_detect_hardware.h"
72 #include "inputrec.h"
74 #include "gromacs/pbcutil/ishift.h"
75 #include "gromacs/pbcutil/pbc.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
79 #include "types/nbnxn_cuda_types_ext.h"
80 #include "gpu_utils.h"
81 #include "nbnxn_cuda_data_mgmt.h"
82 #include "pmalloc_cuda.h"
84 t_forcerec *mk_forcerec(void)
86 t_forcerec *fr;
88 snew(fr, 1);
90 return fr;
93 #ifdef DEBUG
94 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
96 int i, j;
98 for (i = 0; (i < atnr); i++)
100 for (j = 0; (j < atnr); j++)
102 fprintf(fp, "%2d - %2d", i, j);
103 if (bBHAM)
105 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
106 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
108 else
110 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
111 C12(nbfp, atnr, i, j)/12.0);
116 #endif
118 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
120 real *nbfp;
121 int i, j, k, atnr;
123 atnr = idef->atnr;
124 if (bBHAM)
126 snew(nbfp, 3*atnr*atnr);
127 for (i = k = 0; (i < atnr); i++)
129 for (j = 0; (j < atnr); j++, k++)
131 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
132 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
133 /* nbfp now includes the 6.0 derivative prefactor */
134 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
138 else
140 snew(nbfp, 2*atnr*atnr);
141 for (i = k = 0; (i < atnr); i++)
143 for (j = 0; (j < atnr); j++, k++)
145 /* nbfp now includes the 6.0/12.0 derivative prefactors */
146 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
147 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
152 return nbfp;
155 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
157 int i, j, k, atnr;
158 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
159 real *grid;
161 /* For LJ-PME simulations, we correct the energies with the reciprocal space
162 * inside of the cut-off. To do this the non-bonded kernels needs to have
163 * access to the C6-values used on the reciprocal grid in pme.c
166 atnr = idef->atnr;
167 snew(grid, 2*atnr*atnr);
168 for (i = k = 0; (i < atnr); i++)
170 for (j = 0; (j < atnr); j++, k++)
172 c6i = idef->iparams[i*(atnr+1)].lj.c6;
173 c12i = idef->iparams[i*(atnr+1)].lj.c12;
174 c6j = idef->iparams[j*(atnr+1)].lj.c6;
175 c12j = idef->iparams[j*(atnr+1)].lj.c12;
176 c6 = sqrt(c6i * c6j);
177 if (fr->ljpme_combination_rule == eljpmeLB
178 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
180 sigmai = pow(c12i / c6i, 1.0/6.0);
181 sigmaj = pow(c12j / c6j, 1.0/6.0);
182 epsi = c6i * c6i / c12i;
183 epsj = c6j * c6j / c12j;
184 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
186 /* Store the elements at the same relative positions as C6 in nbfp in order
187 * to simplify access in the kernels
189 grid[2*(atnr*i+j)] = c6*6.0;
192 return grid;
195 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
197 real *nbfp;
198 int i, j, k, atnr;
199 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
200 real c6, c12;
202 atnr = idef->atnr;
203 snew(nbfp, 2*atnr*atnr);
204 for (i = 0; i < atnr; ++i)
206 for (j = 0; j < atnr; ++j)
208 c6i = idef->iparams[i*(atnr+1)].lj.c6;
209 c12i = idef->iparams[i*(atnr+1)].lj.c12;
210 c6j = idef->iparams[j*(atnr+1)].lj.c6;
211 c12j = idef->iparams[j*(atnr+1)].lj.c12;
212 c6 = sqrt(c6i * c6j);
213 c12 = sqrt(c12i * c12j);
214 if (comb_rule == eCOMB_ARITHMETIC
215 && !gmx_numzero(c6) && !gmx_numzero(c12))
217 sigmai = pow(c12i / c6i, 1.0/6.0);
218 sigmaj = pow(c12j / c6j, 1.0/6.0);
219 epsi = c6i * c6i / c12i;
220 epsj = c6j * c6j / c12j;
221 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
222 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
224 C6(nbfp, atnr, i, j) = c6*6.0;
225 C12(nbfp, atnr, i, j) = c12*12.0;
228 return nbfp;
231 /* This routine sets fr->solvent_opt to the most common solvent in the
232 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
233 * the fr->solvent_type array with the correct type (or esolNO).
235 * Charge groups that fulfill the conditions but are not identical to the
236 * most common one will be marked as esolNO in the solvent_type array.
238 * TIP3p is identical to SPC for these purposes, so we call it
239 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
241 * NOTE: QM particle should not
242 * become an optimized solvent. Not even if there is only one charge
243 * group in the Qm
246 typedef struct
248 int model;
249 int count;
250 int vdwtype[4];
251 real charge[4];
252 } solvent_parameters_t;
254 static void
255 check_solvent_cg(const gmx_moltype_t *molt,
256 int cg0,
257 int nmol,
258 const unsigned char *qm_grpnr,
259 const t_grps *qm_grps,
260 t_forcerec * fr,
261 int *n_solvent_parameters,
262 solvent_parameters_t **solvent_parameters_p,
263 int cginfo,
264 int *cg_sp)
266 const t_blocka *excl;
267 t_atom *atom;
268 int j, k;
269 int j0, j1, nj;
270 gmx_bool perturbed;
271 gmx_bool has_vdw[4];
272 gmx_bool match;
273 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
274 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
275 int tjA;
276 gmx_bool qm;
277 solvent_parameters_t *solvent_parameters;
279 /* We use a list with parameters for each solvent type.
280 * Every time we discover a new molecule that fulfills the basic
281 * conditions for a solvent we compare with the previous entries
282 * in these lists. If the parameters are the same we just increment
283 * the counter for that type, and otherwise we create a new type
284 * based on the current molecule.
286 * Once we've finished going through all molecules we check which
287 * solvent is most common, and mark all those molecules while we
288 * clear the flag on all others.
291 solvent_parameters = *solvent_parameters_p;
293 /* Mark the cg first as non optimized */
294 *cg_sp = -1;
296 /* Check if this cg has no exclusions with atoms in other charge groups
297 * and all atoms inside the charge group excluded.
298 * We only have 3 or 4 atom solvent loops.
300 if (GET_CGINFO_EXCL_INTER(cginfo) ||
301 !GET_CGINFO_EXCL_INTRA(cginfo))
303 return;
306 /* Get the indices of the first atom in this charge group */
307 j0 = molt->cgs.index[cg0];
308 j1 = molt->cgs.index[cg0+1];
310 /* Number of atoms in our molecule */
311 nj = j1 - j0;
313 if (debug)
315 fprintf(debug,
316 "Moltype '%s': there are %d atoms in this charge group\n",
317 *molt->name, nj);
320 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
321 * otherwise skip it.
323 if (nj < 3 || nj > 4)
325 return;
328 /* Check if we are doing QM on this group */
329 qm = FALSE;
330 if (qm_grpnr != NULL)
332 for (j = j0; j < j1 && !qm; j++)
334 qm = (qm_grpnr[j] < qm_grps->nr - 1);
337 /* Cannot use solvent optimization with QM */
338 if (qm)
340 return;
343 atom = molt->atoms.atom;
345 /* Still looks like a solvent, time to check parameters */
347 /* If it is perturbed (free energy) we can't use the solvent loops,
348 * so then we just skip to the next molecule.
350 perturbed = FALSE;
352 for (j = j0; j < j1 && !perturbed; j++)
354 perturbed = PERTURBED(atom[j]);
357 if (perturbed)
359 return;
362 /* Now it's only a question if the VdW and charge parameters
363 * are OK. Before doing the check we compare and see if they are
364 * identical to a possible previous solvent type.
365 * First we assign the current types and charges.
367 for (j = 0; j < nj; j++)
369 tmp_vdwtype[j] = atom[j0+j].type;
370 tmp_charge[j] = atom[j0+j].q;
373 /* Does it match any previous solvent type? */
374 for (k = 0; k < *n_solvent_parameters; k++)
376 match = TRUE;
379 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
380 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
381 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
383 match = FALSE;
386 /* Check that types & charges match for all atoms in molecule */
387 for (j = 0; j < nj && match == TRUE; j++)
389 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
391 match = FALSE;
393 if (tmp_charge[j] != solvent_parameters[k].charge[j])
395 match = FALSE;
398 if (match == TRUE)
400 /* Congratulations! We have a matched solvent.
401 * Flag it with this type for later processing.
403 *cg_sp = k;
404 solvent_parameters[k].count += nmol;
406 /* We are done with this charge group */
407 return;
411 /* If we get here, we have a tentative new solvent type.
412 * Before we add it we must check that it fulfills the requirements
413 * of the solvent optimized loops. First determine which atoms have
414 * VdW interactions.
416 for (j = 0; j < nj; j++)
418 has_vdw[j] = FALSE;
419 tjA = tmp_vdwtype[j];
421 /* Go through all other tpes and see if any have non-zero
422 * VdW parameters when combined with this one.
424 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
426 /* We already checked that the atoms weren't perturbed,
427 * so we only need to check state A now.
429 if (fr->bBHAM)
431 has_vdw[j] = (has_vdw[j] ||
432 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
433 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
434 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
436 else
438 /* Standard LJ */
439 has_vdw[j] = (has_vdw[j] ||
440 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
441 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
446 /* Now we know all we need to make the final check and assignment. */
447 if (nj == 3)
449 /* So, is it an SPC?
450 * For this we require thatn all atoms have charge,
451 * the charges on atom 2 & 3 should be the same, and only
452 * atom 1 might have VdW.
454 if (has_vdw[1] == FALSE &&
455 has_vdw[2] == FALSE &&
456 tmp_charge[0] != 0 &&
457 tmp_charge[1] != 0 &&
458 tmp_charge[2] == tmp_charge[1])
460 srenew(solvent_parameters, *n_solvent_parameters+1);
461 solvent_parameters[*n_solvent_parameters].model = esolSPC;
462 solvent_parameters[*n_solvent_parameters].count = nmol;
463 for (k = 0; k < 3; k++)
465 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
466 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
469 *cg_sp = *n_solvent_parameters;
470 (*n_solvent_parameters)++;
473 else if (nj == 4)
475 /* Or could it be a TIP4P?
476 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
477 * Only atom 1 mght have VdW.
479 if (has_vdw[1] == FALSE &&
480 has_vdw[2] == FALSE &&
481 has_vdw[3] == FALSE &&
482 tmp_charge[0] == 0 &&
483 tmp_charge[1] != 0 &&
484 tmp_charge[2] == tmp_charge[1] &&
485 tmp_charge[3] != 0)
487 srenew(solvent_parameters, *n_solvent_parameters+1);
488 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
489 solvent_parameters[*n_solvent_parameters].count = nmol;
490 for (k = 0; k < 4; k++)
492 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
493 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
496 *cg_sp = *n_solvent_parameters;
497 (*n_solvent_parameters)++;
501 *solvent_parameters_p = solvent_parameters;
504 static void
505 check_solvent(FILE * fp,
506 const gmx_mtop_t * mtop,
507 t_forcerec * fr,
508 cginfo_mb_t *cginfo_mb)
510 const t_block * cgs;
511 const t_block * mols;
512 const gmx_moltype_t *molt;
513 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
514 int n_solvent_parameters;
515 solvent_parameters_t *solvent_parameters;
516 int **cg_sp;
517 int bestsp, bestsol;
519 if (debug)
521 fprintf(debug, "Going to determine what solvent types we have.\n");
524 mols = &mtop->mols;
526 n_solvent_parameters = 0;
527 solvent_parameters = NULL;
528 /* Allocate temporary array for solvent type */
529 snew(cg_sp, mtop->nmolblock);
531 cg_offset = 0;
532 at_offset = 0;
533 for (mb = 0; mb < mtop->nmolblock; mb++)
535 molt = &mtop->moltype[mtop->molblock[mb].type];
536 cgs = &molt->cgs;
537 /* Here we have to loop over all individual molecules
538 * because we need to check for QMMM particles.
540 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
541 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
542 nmol = mtop->molblock[mb].nmol/nmol_ch;
543 for (mol = 0; mol < nmol_ch; mol++)
545 cgm = mol*cgs->nr;
546 am = mol*cgs->index[cgs->nr];
547 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
549 check_solvent_cg(molt, cg_mol, nmol,
550 mtop->groups.grpnr[egcQMMM] ?
551 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
552 &mtop->groups.grps[egcQMMM],
554 &n_solvent_parameters, &solvent_parameters,
555 cginfo_mb[mb].cginfo[cgm+cg_mol],
556 &cg_sp[mb][cgm+cg_mol]);
559 cg_offset += cgs->nr;
560 at_offset += cgs->index[cgs->nr];
563 /* Puh! We finished going through all charge groups.
564 * Now find the most common solvent model.
567 /* Most common solvent this far */
568 bestsp = -2;
569 for (i = 0; i < n_solvent_parameters; i++)
571 if (bestsp == -2 ||
572 solvent_parameters[i].count > solvent_parameters[bestsp].count)
574 bestsp = i;
578 if (bestsp >= 0)
580 bestsol = solvent_parameters[bestsp].model;
582 else
584 bestsol = esolNO;
587 #ifdef DISABLE_WATER_NLIST
588 bestsol = esolNO;
589 #endif
591 fr->nWatMol = 0;
592 for (mb = 0; mb < mtop->nmolblock; mb++)
594 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
595 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
596 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
598 if (cg_sp[mb][i] == bestsp)
600 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
601 fr->nWatMol += nmol;
603 else
605 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
608 sfree(cg_sp[mb]);
610 sfree(cg_sp);
612 if (bestsol != esolNO && fp != NULL)
614 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
615 esol_names[bestsol],
616 solvent_parameters[bestsp].count);
619 sfree(solvent_parameters);
620 fr->solvent_opt = bestsol;
623 enum {
624 acNONE = 0, acCONSTRAINT, acSETTLE
627 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
628 t_forcerec *fr, gmx_bool bNoSolvOpt,
629 gmx_bool *bFEP_NonBonded,
630 gmx_bool *bExcl_IntraCGAll_InterCGNone)
632 const t_block *cgs;
633 const t_blocka *excl;
634 const gmx_moltype_t *molt;
635 const gmx_molblock_t *molb;
636 cginfo_mb_t *cginfo_mb;
637 gmx_bool *type_VDW;
638 int *cginfo;
639 int cg_offset, a_offset, cgm, am;
640 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
641 int *a_con;
642 int ftype;
643 int ia;
644 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
646 ncg_tot = ncg_mtop(mtop);
647 snew(cginfo_mb, mtop->nmolblock);
649 snew(type_VDW, fr->ntype);
650 for (ai = 0; ai < fr->ntype; ai++)
652 type_VDW[ai] = FALSE;
653 for (j = 0; j < fr->ntype; j++)
655 type_VDW[ai] = type_VDW[ai] ||
656 fr->bBHAM ||
657 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
658 C12(fr->nbfp, fr->ntype, ai, j) != 0;
662 *bFEP_NonBonded = FALSE;
663 *bExcl_IntraCGAll_InterCGNone = TRUE;
665 excl_nalloc = 10;
666 snew(bExcl, excl_nalloc);
667 cg_offset = 0;
668 a_offset = 0;
669 for (mb = 0; mb < mtop->nmolblock; mb++)
671 molb = &mtop->molblock[mb];
672 molt = &mtop->moltype[molb->type];
673 cgs = &molt->cgs;
674 excl = &molt->excls;
676 /* Check if the cginfo is identical for all molecules in this block.
677 * If so, we only need an array of the size of one molecule.
678 * Otherwise we make an array of #mol times #cgs per molecule.
680 bId = TRUE;
681 am = 0;
682 for (m = 0; m < molb->nmol; m++)
684 am = m*cgs->index[cgs->nr];
685 for (cg = 0; cg < cgs->nr; cg++)
687 a0 = cgs->index[cg];
688 a1 = cgs->index[cg+1];
689 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
690 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
692 bId = FALSE;
694 if (mtop->groups.grpnr[egcQMMM] != NULL)
696 for (ai = a0; ai < a1; ai++)
698 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
699 mtop->groups.grpnr[egcQMMM][a_offset +ai])
701 bId = FALSE;
708 cginfo_mb[mb].cg_start = cg_offset;
709 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
710 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
711 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
712 cginfo = cginfo_mb[mb].cginfo;
714 /* Set constraints flags for constrained atoms */
715 snew(a_con, molt->atoms.nr);
716 for (ftype = 0; ftype < F_NRE; ftype++)
718 if (interaction_function[ftype].flags & IF_CONSTRAINT)
720 int nral;
722 nral = NRAL(ftype);
723 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
725 int a;
727 for (a = 0; a < nral; a++)
729 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
730 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
736 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
738 cgm = m*cgs->nr;
739 am = m*cgs->index[cgs->nr];
740 for (cg = 0; cg < cgs->nr; cg++)
742 a0 = cgs->index[cg];
743 a1 = cgs->index[cg+1];
745 /* Store the energy group in cginfo */
746 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
747 SET_CGINFO_GID(cginfo[cgm+cg], gid);
749 /* Check the intra/inter charge group exclusions */
750 if (a1-a0 > excl_nalloc)
752 excl_nalloc = a1 - a0;
753 srenew(bExcl, excl_nalloc);
755 /* bExclIntraAll: all intra cg interactions excluded
756 * bExclInter: any inter cg interactions excluded
758 bExclIntraAll = TRUE;
759 bExclInter = FALSE;
760 bHaveVDW = FALSE;
761 bHaveQ = FALSE;
762 bHavePerturbedAtoms = FALSE;
763 for (ai = a0; ai < a1; ai++)
765 /* Check VDW and electrostatic interactions */
766 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
767 type_VDW[molt->atoms.atom[ai].typeB]);
768 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
769 molt->atoms.atom[ai].qB != 0);
771 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
773 /* Clear the exclusion list for atom ai */
774 for (aj = a0; aj < a1; aj++)
776 bExcl[aj-a0] = FALSE;
778 /* Loop over all the exclusions of atom ai */
779 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
781 aj = excl->a[j];
782 if (aj < a0 || aj >= a1)
784 bExclInter = TRUE;
786 else
788 bExcl[aj-a0] = TRUE;
791 /* Check if ai excludes a0 to a1 */
792 for (aj = a0; aj < a1; aj++)
794 if (!bExcl[aj-a0])
796 bExclIntraAll = FALSE;
800 switch (a_con[ai])
802 case acCONSTRAINT:
803 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
804 break;
805 case acSETTLE:
806 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
807 break;
808 default:
809 break;
812 if (bExclIntraAll)
814 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
816 if (bExclInter)
818 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
820 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
822 /* The size in cginfo is currently only read with DD */
823 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
825 if (bHaveVDW)
827 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
829 if (bHaveQ)
831 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
833 if (bHavePerturbedAtoms && fr->efep != efepNO)
835 SET_CGINFO_FEP(cginfo[cgm+cg]);
836 *bFEP_NonBonded = TRUE;
838 /* Store the charge group size */
839 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
841 if (!bExclIntraAll || bExclInter)
843 *bExcl_IntraCGAll_InterCGNone = FALSE;
848 sfree(a_con);
850 cg_offset += molb->nmol*cgs->nr;
851 a_offset += molb->nmol*cgs->index[cgs->nr];
853 sfree(bExcl);
855 /* the solvent optimizer is called after the QM is initialized,
856 * because we don't want to have the QM subsystemto become an
857 * optimized solvent
860 check_solvent(fplog, mtop, fr, cginfo_mb);
862 if (getenv("GMX_NO_SOLV_OPT"))
864 if (fplog)
866 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
867 "Disabling all solvent optimization\n");
869 fr->solvent_opt = esolNO;
871 if (bNoSolvOpt)
873 fr->solvent_opt = esolNO;
875 if (!fr->solvent_opt)
877 for (mb = 0; mb < mtop->nmolblock; mb++)
879 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
881 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
886 return cginfo_mb;
889 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
891 int ncg, mb, cg;
892 int *cginfo;
894 ncg = cgi_mb[nmb-1].cg_end;
895 snew(cginfo, ncg);
896 mb = 0;
897 for (cg = 0; cg < ncg; cg++)
899 while (cg >= cgi_mb[mb].cg_end)
901 mb++;
903 cginfo[cg] =
904 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
907 return cginfo;
910 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
912 /*This now calculates sum for q and c6*/
913 double qsum, q2sum, q, c6sum, c6;
914 int mb, nmol, i;
915 const t_atoms *atoms;
917 qsum = 0;
918 q2sum = 0;
919 c6sum = 0;
920 for (mb = 0; mb < mtop->nmolblock; mb++)
922 nmol = mtop->molblock[mb].nmol;
923 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
924 for (i = 0; i < atoms->nr; i++)
926 q = atoms->atom[i].q;
927 qsum += nmol*q;
928 q2sum += nmol*q*q;
929 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
930 c6sum += nmol*c6;
933 fr->qsum[0] = qsum;
934 fr->q2sum[0] = q2sum;
935 fr->c6sum[0] = c6sum;
937 if (fr->efep != efepNO)
939 qsum = 0;
940 q2sum = 0;
941 c6sum = 0;
942 for (mb = 0; mb < mtop->nmolblock; mb++)
944 nmol = mtop->molblock[mb].nmol;
945 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
946 for (i = 0; i < atoms->nr; i++)
948 q = atoms->atom[i].qB;
949 qsum += nmol*q;
950 q2sum += nmol*q*q;
951 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
952 c6sum += nmol*c6;
954 fr->qsum[1] = qsum;
955 fr->q2sum[1] = q2sum;
956 fr->c6sum[1] = c6sum;
959 else
961 fr->qsum[1] = fr->qsum[0];
962 fr->q2sum[1] = fr->q2sum[0];
963 fr->c6sum[1] = fr->c6sum[0];
965 if (log)
967 if (fr->efep == efepNO)
969 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
971 else
973 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
974 fr->qsum[0], fr->qsum[1]);
979 void update_forcerec(t_forcerec *fr, matrix box)
981 if (fr->eeltype == eelGRF)
983 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
984 fr->rcoulomb, fr->temp, fr->zsquare, box,
985 &fr->kappa, &fr->k_rf, &fr->c_rf);
989 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
991 const t_atoms *atoms, *atoms_tpi;
992 const t_blocka *excl;
993 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
994 gmx_int64_t npair, npair_ij, tmpi, tmpj;
995 double csix, ctwelve;
996 int ntp, *typecount;
997 gmx_bool bBHAM;
998 real *nbfp;
999 real *nbfp_comb = NULL;
1001 ntp = fr->ntype;
1002 bBHAM = fr->bBHAM;
1003 nbfp = fr->nbfp;
1005 /* For LJ-PME, we want to correct for the difference between the
1006 * actual C6 values and the C6 values used by the LJ-PME based on
1007 * combination rules. */
1009 if (EVDW_PME(fr->vdwtype))
1011 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1012 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1013 for (tpi = 0; tpi < ntp; ++tpi)
1015 for (tpj = 0; tpj < ntp; ++tpj)
1017 C6(nbfp_comb, ntp, tpi, tpj) =
1018 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1019 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1022 nbfp = nbfp_comb;
1024 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1026 csix = 0;
1027 ctwelve = 0;
1028 npair = 0;
1029 nexcl = 0;
1030 if (!fr->n_tpi)
1032 /* Count the types so we avoid natoms^2 operations */
1033 snew(typecount, ntp);
1034 gmx_mtop_count_atomtypes(mtop, q, typecount);
1036 for (tpi = 0; tpi < ntp; tpi++)
1038 for (tpj = tpi; tpj < ntp; tpj++)
1040 tmpi = typecount[tpi];
1041 tmpj = typecount[tpj];
1042 if (tpi != tpj)
1044 npair_ij = tmpi*tmpj;
1046 else
1048 npair_ij = tmpi*(tmpi - 1)/2;
1050 if (bBHAM)
1052 /* nbfp now includes the 6.0 derivative prefactor */
1053 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1055 else
1057 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1058 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1059 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1061 npair += npair_ij;
1064 sfree(typecount);
1065 /* Subtract the excluded pairs.
1066 * The main reason for substracting exclusions is that in some cases
1067 * some combinations might never occur and the parameters could have
1068 * any value. These unused values should not influence the dispersion
1069 * correction.
1071 for (mb = 0; mb < mtop->nmolblock; mb++)
1073 nmol = mtop->molblock[mb].nmol;
1074 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1075 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1076 for (i = 0; (i < atoms->nr); i++)
1078 if (q == 0)
1080 tpi = atoms->atom[i].type;
1082 else
1084 tpi = atoms->atom[i].typeB;
1086 j1 = excl->index[i];
1087 j2 = excl->index[i+1];
1088 for (j = j1; j < j2; j++)
1090 k = excl->a[j];
1091 if (k > i)
1093 if (q == 0)
1095 tpj = atoms->atom[k].type;
1097 else
1099 tpj = atoms->atom[k].typeB;
1101 if (bBHAM)
1103 /* nbfp now includes the 6.0 derivative prefactor */
1104 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1106 else
1108 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1109 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1110 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1112 nexcl += nmol;
1118 else
1120 /* Only correct for the interaction of the test particle
1121 * with the rest of the system.
1123 atoms_tpi =
1124 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1126 npair = 0;
1127 for (mb = 0; mb < mtop->nmolblock; mb++)
1129 nmol = mtop->molblock[mb].nmol;
1130 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1131 for (j = 0; j < atoms->nr; j++)
1133 nmolc = nmol;
1134 /* Remove the interaction of the test charge group
1135 * with itself.
1137 if (mb == mtop->nmolblock-1)
1139 nmolc--;
1141 if (mb == 0 && nmol == 1)
1143 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1146 if (q == 0)
1148 tpj = atoms->atom[j].type;
1150 else
1152 tpj = atoms->atom[j].typeB;
1154 for (i = 0; i < fr->n_tpi; i++)
1156 if (q == 0)
1158 tpi = atoms_tpi->atom[i].type;
1160 else
1162 tpi = atoms_tpi->atom[i].typeB;
1164 if (bBHAM)
1166 /* nbfp now includes the 6.0 derivative prefactor */
1167 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1169 else
1171 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1172 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1173 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1175 npair += nmolc;
1180 if (npair - nexcl <= 0 && fplog)
1182 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1183 csix = 0;
1184 ctwelve = 0;
1186 else
1188 csix /= npair - nexcl;
1189 ctwelve /= npair - nexcl;
1191 if (debug)
1193 fprintf(debug, "Counted %d exclusions\n", nexcl);
1194 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1195 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1197 fr->avcsix[q] = csix;
1198 fr->avctwelve[q] = ctwelve;
1201 if (EVDW_PME(fr->vdwtype))
1203 sfree(nbfp_comb);
1206 if (fplog != NULL)
1208 if (fr->eDispCorr == edispcAllEner ||
1209 fr->eDispCorr == edispcAllEnerPres)
1211 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1212 fr->avcsix[0], fr->avctwelve[0]);
1214 else
1216 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1222 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1223 const gmx_mtop_t *mtop)
1225 const t_atoms *at1, *at2;
1226 int mt1, mt2, i, j, tpi, tpj, ntypes;
1227 real b, bmin;
1228 real *nbfp;
1230 if (fplog)
1232 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1234 nbfp = fr->nbfp;
1235 ntypes = fr->ntype;
1237 bmin = -1;
1238 fr->bham_b_max = 0;
1239 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1241 at1 = &mtop->moltype[mt1].atoms;
1242 for (i = 0; (i < at1->nr); i++)
1244 tpi = at1->atom[i].type;
1245 if (tpi >= ntypes)
1247 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1250 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1252 at2 = &mtop->moltype[mt2].atoms;
1253 for (j = 0; (j < at2->nr); j++)
1255 tpj = at2->atom[j].type;
1256 if (tpj >= ntypes)
1258 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1260 b = BHAMB(nbfp, ntypes, tpi, tpj);
1261 if (b > fr->bham_b_max)
1263 fr->bham_b_max = b;
1265 if ((b < bmin) || (bmin == -1))
1267 bmin = b;
1273 if (fplog)
1275 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1276 bmin, fr->bham_b_max);
1280 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1281 t_forcerec *fr, real rtab,
1282 const t_commrec *cr,
1283 const char *tabfn, char *eg1, char *eg2,
1284 t_nblists *nbl)
1286 char buf[STRLEN];
1287 int i, j;
1289 if (tabfn == NULL)
1291 if (debug)
1293 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1295 return;
1298 sprintf(buf, "%s", tabfn);
1299 if (eg1 && eg2)
1301 /* Append the two energy group names */
1302 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1303 eg1, eg2, ftp2ext(efXVG));
1305 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1306 /* Copy the contents of the table to separate coulomb and LJ tables too,
1307 * to improve cache performance.
1309 /* For performance reasons we want
1310 * the table data to be aligned to 16-byte. The pointers could be freed
1311 * but currently aren't.
1313 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1314 nbl->table_elec.format = nbl->table_elec_vdw.format;
1315 nbl->table_elec.r = nbl->table_elec_vdw.r;
1316 nbl->table_elec.n = nbl->table_elec_vdw.n;
1317 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1318 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1319 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1320 nbl->table_elec.ninteractions = 1;
1321 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1322 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1324 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1325 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1326 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1327 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1328 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1329 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1330 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1331 nbl->table_vdw.ninteractions = 2;
1332 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1333 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1335 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1337 for (j = 0; j < 4; j++)
1339 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1341 for (j = 0; j < 8; j++)
1343 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1348 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1349 int *ncount, int **count)
1351 const gmx_moltype_t *molt;
1352 const t_ilist *il;
1353 int mt, ftype, stride, i, j, tabnr;
1355 for (mt = 0; mt < mtop->nmoltype; mt++)
1357 molt = &mtop->moltype[mt];
1358 for (ftype = 0; ftype < F_NRE; ftype++)
1360 if (ftype == ftype1 || ftype == ftype2)
1362 il = &molt->ilist[ftype];
1363 stride = 1 + NRAL(ftype);
1364 for (i = 0; i < il->nr; i += stride)
1366 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1367 if (tabnr < 0)
1369 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1371 if (tabnr >= *ncount)
1373 srenew(*count, tabnr+1);
1374 for (j = *ncount; j < tabnr+1; j++)
1376 (*count)[j] = 0;
1378 *ncount = tabnr+1;
1380 (*count)[tabnr]++;
1387 static bondedtable_t *make_bonded_tables(FILE *fplog,
1388 int ftype1, int ftype2,
1389 const gmx_mtop_t *mtop,
1390 const char *basefn, const char *tabext)
1392 int i, ncount, *count;
1393 char tabfn[STRLEN];
1394 bondedtable_t *tab;
1396 tab = NULL;
1398 ncount = 0;
1399 count = NULL;
1400 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1402 if (ncount > 0)
1404 snew(tab, ncount);
1405 for (i = 0; i < ncount; i++)
1407 if (count[i] > 0)
1409 sprintf(tabfn, "%s", basefn);
1410 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1411 tabext, i, ftp2ext(efXVG));
1412 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1415 sfree(count);
1418 return tab;
1421 void forcerec_set_ranges(t_forcerec *fr,
1422 int ncg_home, int ncg_force,
1423 int natoms_force,
1424 int natoms_force_constr, int natoms_f_novirsum)
1426 fr->cg0 = 0;
1427 fr->hcg = ncg_home;
1429 /* fr->ncg_force is unused in the standard code,
1430 * but it can be useful for modified code dealing with charge groups.
1432 fr->ncg_force = ncg_force;
1433 fr->natoms_force = natoms_force;
1434 fr->natoms_force_constr = natoms_force_constr;
1436 if (fr->natoms_force_constr > fr->nalloc_force)
1438 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1440 if (fr->bTwinRange)
1442 srenew(fr->f_twin, fr->nalloc_force);
1446 if (fr->bF_NoVirSum)
1448 fr->f_novirsum_n = natoms_f_novirsum;
1449 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1451 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1452 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1455 else
1457 fr->f_novirsum_n = 0;
1461 static real cutoff_inf(real cutoff)
1463 if (cutoff == 0)
1465 cutoff = GMX_CUTOFF_INF;
1468 return cutoff;
1471 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1472 t_forcerec *fr, const t_inputrec *ir,
1473 const char *tabfn, const gmx_mtop_t *mtop,
1474 matrix box)
1476 char buf[STRLEN];
1477 int i, j;
1479 if (tabfn == NULL)
1481 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1482 return;
1485 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1487 sprintf(buf, "%s", tabfn);
1488 for (i = 0; i < ir->adress->n_tf_grps; i++)
1490 j = ir->adress->tf_table_index[i]; /* get energy group index */
1491 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1492 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1493 if (fp)
1495 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1497 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1502 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1504 gmx_bool bAllvsAll;
1506 bAllvsAll =
1508 ir->rlist == 0 &&
1509 ir->rcoulomb == 0 &&
1510 ir->rvdw == 0 &&
1511 ir->ePBC == epbcNONE &&
1512 ir->vdwtype == evdwCUT &&
1513 ir->coulombtype == eelCUT &&
1514 ir->efep == efepNO &&
1515 (ir->implicit_solvent == eisNO ||
1516 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1517 ir->gb_algorithm == egbHCT ||
1518 ir->gb_algorithm == egbOBC))) &&
1519 getenv("GMX_NO_ALLVSALL") == NULL
1522 if (bAllvsAll && ir->opts.ngener > 1)
1524 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1526 if (bPrintNote)
1528 if (MASTER(cr))
1530 fprintf(stderr, "\n%s\n", note);
1532 if (fp != NULL)
1534 fprintf(fp, "\n%s\n", note);
1537 bAllvsAll = FALSE;
1540 if (bAllvsAll && fp && MASTER(cr))
1542 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1545 return bAllvsAll;
1549 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1551 int t, i;
1553 /* These thread local data structures are used for bondeds only */
1554 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1556 if (fr->nthreads > 1)
1558 snew(fr->f_t, fr->nthreads);
1559 /* Thread 0 uses the global force and energy arrays */
1560 for (t = 1; t < fr->nthreads; t++)
1562 fr->f_t[t].f = NULL;
1563 fr->f_t[t].f_nalloc = 0;
1564 snew(fr->f_t[t].fshift, SHIFTS);
1565 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1566 for (i = 0; i < egNR; i++)
1568 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1575 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1576 const t_commrec *cr,
1577 const t_inputrec *ir,
1578 gmx_bool bGPU)
1580 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1582 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1583 bGPU ? "GPUs" : "SIMD kernels",
1584 bGPU ? "CPU only" : "plain-C kernels");
1585 return FALSE;
1588 return TRUE;
1592 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1593 int *kernel_type,
1594 int *ewald_excl)
1596 *kernel_type = nbnxnk4x4_PlainC;
1597 *ewald_excl = ewaldexclTable;
1599 #ifdef GMX_NBNXN_SIMD
1601 #ifdef GMX_NBNXN_SIMD_4XN
1602 *kernel_type = nbnxnk4xN_SIMD_4xN;
1603 #endif
1604 #ifdef GMX_NBNXN_SIMD_2XNN
1605 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1606 #endif
1608 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1609 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1610 * Currently this is based on the SIMD acceleration choice,
1611 * but it might be better to decide this at runtime based on CPU.
1613 * 4xN calculates more (zero) interactions, but has less pair-search
1614 * work and much better kernel instruction scheduling.
1616 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1617 * which doesn't have FMA, both the analytical and tabulated Ewald
1618 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1619 * 2x(4+4) because it results in significantly fewer pairs.
1620 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1621 * 10% with HT, 50% without HT. As we currently don't detect the actual
1622 * use of HT, use 4x8 to avoid a potential performance hit.
1623 * On Intel Haswell 4x8 is always faster.
1625 *kernel_type = nbnxnk4xN_SIMD_4xN;
1627 #ifndef GMX_SIMD_HAVE_FMA
1628 if (EEL_PME_EWALD(ir->coulombtype) ||
1629 EVDW_PME(ir->vdwtype))
1631 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1632 * There are enough instructions to make 2x(4+4) efficient.
1634 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1636 #endif
1637 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1640 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1642 #ifdef GMX_NBNXN_SIMD_4XN
1643 *kernel_type = nbnxnk4xN_SIMD_4xN;
1644 #else
1645 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1646 #endif
1648 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1650 #ifdef GMX_NBNXN_SIMD_2XNN
1651 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1652 #else
1653 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1654 #endif
1657 /* Analytical Ewald exclusion correction is only an option in
1658 * the SIMD kernel.
1659 * Since table lookup's don't parallelize with SIMD, analytical
1660 * will probably always be faster for a SIMD width of 8 or more.
1661 * With FMA analytical is sometimes faster for a width if 4 as well.
1662 * On BlueGene/Q, this is faster regardless of precision.
1663 * In single precision, this is faster on Bulldozer.
1665 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1666 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1667 defined GMX_SIMD_IBM_QPX
1668 *ewald_excl = ewaldexclAnalytical;
1669 #endif
1670 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1672 *ewald_excl = ewaldexclTable;
1674 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1676 *ewald_excl = ewaldexclAnalytical;
1680 #endif /* GMX_NBNXN_SIMD */
1684 const char *lookup_nbnxn_kernel_name(int kernel_type)
1686 const char *returnvalue = NULL;
1687 switch (kernel_type)
1689 case nbnxnkNotSet:
1690 returnvalue = "not set";
1691 break;
1692 case nbnxnk4x4_PlainC:
1693 returnvalue = "plain C";
1694 break;
1695 case nbnxnk4xN_SIMD_4xN:
1696 case nbnxnk4xN_SIMD_2xNN:
1697 #ifdef GMX_NBNXN_SIMD
1698 #if defined GMX_SIMD_X86_SSE2
1699 returnvalue = "SSE2";
1700 #elif defined GMX_SIMD_X86_SSE4_1
1701 returnvalue = "SSE4.1";
1702 #elif defined GMX_SIMD_X86_AVX_128_FMA
1703 returnvalue = "AVX_128_FMA";
1704 #elif defined GMX_SIMD_X86_AVX_256
1705 returnvalue = "AVX_256";
1706 #elif defined GMX_SIMD_X86_AVX2_256
1707 returnvalue = "AVX2_256";
1708 #else
1709 returnvalue = "SIMD";
1710 #endif
1711 #else /* GMX_NBNXN_SIMD */
1712 returnvalue = "not available";
1713 #endif /* GMX_NBNXN_SIMD */
1714 break;
1715 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1716 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1718 case nbnxnkNR:
1719 default:
1720 gmx_fatal(FARGS, "Illegal kernel type selected");
1721 returnvalue = NULL;
1722 break;
1724 return returnvalue;
1727 static void pick_nbnxn_kernel(FILE *fp,
1728 const t_commrec *cr,
1729 gmx_bool use_simd_kernels,
1730 gmx_bool bUseGPU,
1731 gmx_bool bEmulateGPU,
1732 const t_inputrec *ir,
1733 int *kernel_type,
1734 int *ewald_excl,
1735 gmx_bool bDoNonbonded)
1737 assert(kernel_type);
1739 *kernel_type = nbnxnkNotSet;
1740 *ewald_excl = ewaldexclTable;
1742 if (bEmulateGPU)
1744 *kernel_type = nbnxnk8x8x8_PlainC;
1746 if (bDoNonbonded)
1748 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1751 else if (bUseGPU)
1753 *kernel_type = nbnxnk8x8x8_CUDA;
1756 if (*kernel_type == nbnxnkNotSet)
1758 /* LJ PME with LB combination rule does 7 mesh operations.
1759 * This so slow that we don't compile SIMD non-bonded kernels for that.
1761 if (use_simd_kernels &&
1762 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1764 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1766 else
1768 *kernel_type = nbnxnk4x4_PlainC;
1772 if (bDoNonbonded && fp != NULL)
1774 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1775 lookup_nbnxn_kernel_name(*kernel_type),
1776 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1777 nbnxn_kernel_to_cj_size(*kernel_type));
1781 static void pick_nbnxn_resources(const t_commrec *cr,
1782 const gmx_hw_info_t *hwinfo,
1783 gmx_bool bDoNonbonded,
1784 gmx_bool *bUseGPU,
1785 gmx_bool *bEmulateGPU,
1786 const gmx_gpu_opt_t *gpu_opt)
1788 gmx_bool bEmulateGPUEnvVarSet;
1789 char gpu_err_str[STRLEN];
1791 *bUseGPU = FALSE;
1793 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1795 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1796 * GPUs (currently) only handle non-bonded calculations, we will
1797 * automatically switch to emulation if non-bonded calculations are
1798 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1799 * way to turn off GPU initialization, data movement, and cleanup.
1801 * GPU emulation can be useful to assess the performance one can expect by
1802 * adding GPU(s) to the machine. The conditional below allows this even
1803 * if mdrun is compiled without GPU acceleration support.
1804 * Note that you should freezing the system as otherwise it will explode.
1806 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1807 (!bDoNonbonded &&
1808 gpu_opt->ncuda_dev_use > 0));
1810 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1812 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1814 /* Each PP node will use the intra-node id-th device from the
1815 * list of detected/selected GPUs. */
1816 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1817 &hwinfo->gpu_info, gpu_opt))
1819 /* At this point the init should never fail as we made sure that
1820 * we have all the GPUs we need. If it still does, we'll bail. */
1821 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1822 cr->nodeid,
1823 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1824 cr->rank_pp_intranode),
1825 gpu_err_str);
1828 /* Here we actually turn on hardware GPU acceleration */
1829 *bUseGPU = TRUE;
1833 gmx_bool uses_simple_tables(int cutoff_scheme,
1834 nonbonded_verlet_t *nbv,
1835 int group)
1837 gmx_bool bUsesSimpleTables = TRUE;
1838 int grp_index;
1840 switch (cutoff_scheme)
1842 case ecutsGROUP:
1843 bUsesSimpleTables = TRUE;
1844 break;
1845 case ecutsVERLET:
1846 assert(NULL != nbv && NULL != nbv->grp);
1847 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1848 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1849 break;
1850 default:
1851 gmx_incons("unimplemented");
1853 return bUsesSimpleTables;
1856 static void init_ewald_f_table(interaction_const_t *ic,
1857 gmx_bool bUsesSimpleTables,
1858 real rtab)
1860 real maxr;
1862 if (bUsesSimpleTables)
1864 /* With a spacing of 0.0005 we are at the force summation accuracy
1865 * for the SSE kernels for "normal" atomistic simulations.
1867 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1868 ic->rcoulomb);
1870 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1871 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1873 else
1875 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1876 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1877 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1880 sfree_aligned(ic->tabq_coul_FDV0);
1881 sfree_aligned(ic->tabq_coul_F);
1882 sfree_aligned(ic->tabq_coul_V);
1884 sfree_aligned(ic->tabq_vdw_FDV0);
1885 sfree_aligned(ic->tabq_vdw_F);
1886 sfree_aligned(ic->tabq_vdw_V);
1888 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1890 /* Create the original table data in FDV0 */
1891 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1892 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1893 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1894 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1895 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1898 if (EVDW_PME(ic->vdwtype))
1900 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1901 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1902 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1903 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1904 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1908 void init_interaction_const_tables(FILE *fp,
1909 interaction_const_t *ic,
1910 gmx_bool bUsesSimpleTables,
1911 real rtab)
1913 real spacing;
1915 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1917 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1919 if (fp != NULL)
1921 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1922 1/ic->tabq_scale, ic->tabq_size);
1927 static void clear_force_switch_constants(shift_consts_t *sc)
1929 sc->c2 = 0;
1930 sc->c3 = 0;
1931 sc->cpot = 0;
1934 static void force_switch_constants(real p,
1935 real rsw, real rc,
1936 shift_consts_t *sc)
1938 /* Here we determine the coefficient for shifting the force to zero
1939 * between distance rsw and the cut-off rc.
1940 * For a potential of r^-p, we have force p*r^-(p+1).
1941 * But to save flops we absorb p in the coefficient.
1942 * Thus we get:
1943 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1944 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1946 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1947 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1948 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1951 static void potential_switch_constants(real rsw, real rc,
1952 switch_consts_t *sc)
1954 /* The switch function is 1 at rsw and 0 at rc.
1955 * The derivative and second derivate are zero at both ends.
1956 * rsw = max(r - r_switch, 0)
1957 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1958 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1959 * force = force*dsw - potential*sw
1960 * potential *= sw
1962 sc->c3 = -10*pow(rc - rsw, -3);
1963 sc->c4 = 15*pow(rc - rsw, -4);
1964 sc->c5 = -6*pow(rc - rsw, -5);
1967 static void
1968 init_interaction_const(FILE *fp,
1969 const t_commrec gmx_unused *cr,
1970 interaction_const_t **interaction_const,
1971 const t_forcerec *fr,
1972 real rtab)
1974 interaction_const_t *ic;
1975 gmx_bool bUsesSimpleTables = TRUE;
1977 snew(ic, 1);
1979 /* Just allocate something so we can free it */
1980 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1981 snew_aligned(ic->tabq_coul_F, 16, 32);
1982 snew_aligned(ic->tabq_coul_V, 16, 32);
1984 ic->rlist = fr->rlist;
1985 ic->rlistlong = fr->rlistlong;
1987 /* Lennard-Jones */
1988 ic->vdwtype = fr->vdwtype;
1989 ic->vdw_modifier = fr->vdw_modifier;
1990 ic->rvdw = fr->rvdw;
1991 ic->rvdw_switch = fr->rvdw_switch;
1992 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1993 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1994 ic->sh_lj_ewald = 0;
1995 clear_force_switch_constants(&ic->dispersion_shift);
1996 clear_force_switch_constants(&ic->repulsion_shift);
1998 switch (ic->vdw_modifier)
2000 case eintmodPOTSHIFT:
2001 /* Only shift the potential, don't touch the force */
2002 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2003 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2004 if (EVDW_PME(ic->vdwtype))
2006 real crc2;
2008 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2009 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2011 break;
2012 case eintmodFORCESWITCH:
2013 /* Switch the force, switch and shift the potential */
2014 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2015 &ic->dispersion_shift);
2016 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2017 &ic->repulsion_shift);
2018 break;
2019 case eintmodPOTSWITCH:
2020 /* Switch the potential and force */
2021 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2022 &ic->vdw_switch);
2023 break;
2024 case eintmodNONE:
2025 case eintmodEXACTCUTOFF:
2026 /* Nothing to do here */
2027 break;
2028 default:
2029 gmx_incons("unimplemented potential modifier");
2032 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2034 /* Electrostatics */
2035 ic->eeltype = fr->eeltype;
2036 ic->coulomb_modifier = fr->coulomb_modifier;
2037 ic->rcoulomb = fr->rcoulomb;
2038 ic->epsilon_r = fr->epsilon_r;
2039 ic->epsfac = fr->epsfac;
2040 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2042 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2044 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2046 else
2048 ic->sh_ewald = 0;
2051 /* Reaction-field */
2052 if (EEL_RF(ic->eeltype))
2054 ic->epsilon_rf = fr->epsilon_rf;
2055 ic->k_rf = fr->k_rf;
2056 ic->c_rf = fr->c_rf;
2058 else
2060 /* For plain cut-off we might use the reaction-field kernels */
2061 ic->epsilon_rf = ic->epsilon_r;
2062 ic->k_rf = 0;
2063 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2065 ic->c_rf = 1/ic->rcoulomb;
2067 else
2069 ic->c_rf = 0;
2073 if (fp != NULL)
2075 real dispersion_shift;
2077 dispersion_shift = ic->dispersion_shift.cpot;
2078 if (EVDW_PME(ic->vdwtype))
2080 dispersion_shift -= ic->sh_lj_ewald;
2082 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2083 ic->repulsion_shift.cpot, dispersion_shift);
2085 if (ic->eeltype == eelCUT)
2087 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2089 else if (EEL_PME(ic->eeltype))
2091 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2093 fprintf(fp, "\n");
2096 *interaction_const = ic;
2098 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2100 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2102 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2103 * also sharing texture references. To keep the code simple, we don't
2104 * treat texture references as shared resources, but this means that
2105 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2106 * Hence, to ensure that the non-bonded kernels don't start before all
2107 * texture binding operations are finished, we need to wait for all ranks
2108 * to arrive here before continuing.
2110 * Note that we could omit this barrier if GPUs are not shared (or
2111 * texture objects are used), but as this is initialization code, there
2112 * is not point in complicating things.
2114 #ifdef GMX_THREAD_MPI
2115 if (PAR(cr))
2117 gmx_barrier(cr);
2119 #endif /* GMX_THREAD_MPI */
2122 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2123 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2126 static void init_nb_verlet(FILE *fp,
2127 nonbonded_verlet_t **nb_verlet,
2128 gmx_bool bFEP_NonBonded,
2129 const t_inputrec *ir,
2130 const t_forcerec *fr,
2131 const t_commrec *cr,
2132 const char *nbpu_opt)
2134 nonbonded_verlet_t *nbv;
2135 int i;
2136 char *env;
2137 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2139 nbnxn_alloc_t *nb_alloc;
2140 nbnxn_free_t *nb_free;
2142 snew(nbv, 1);
2144 pick_nbnxn_resources(cr, fr->hwinfo,
2145 fr->bNonbonded,
2146 &nbv->bUseGPU,
2147 &bEmulateGPU,
2148 fr->gpu_opt);
2150 nbv->nbs = NULL;
2152 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2153 for (i = 0; i < nbv->ngrp; i++)
2155 nbv->grp[i].nbl_lists.nnbl = 0;
2156 nbv->grp[i].nbat = NULL;
2157 nbv->grp[i].kernel_type = nbnxnkNotSet;
2159 if (i == 0) /* local */
2161 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2162 nbv->bUseGPU, bEmulateGPU, ir,
2163 &nbv->grp[i].kernel_type,
2164 &nbv->grp[i].ewald_excl,
2165 fr->bNonbonded);
2167 else /* non-local */
2169 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2171 /* Use GPU for local, select a CPU kernel for non-local */
2172 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2173 FALSE, FALSE, ir,
2174 &nbv->grp[i].kernel_type,
2175 &nbv->grp[i].ewald_excl,
2176 fr->bNonbonded);
2178 bHybridGPURun = TRUE;
2180 else
2182 /* Use the same kernel for local and non-local interactions */
2183 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2184 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2189 if (nbv->bUseGPU)
2191 /* init the NxN GPU data; the last argument tells whether we'll have
2192 * both local and non-local NB calculation on GPU */
2193 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2194 &fr->hwinfo->gpu_info, fr->gpu_opt,
2195 cr->rank_pp_intranode,
2196 (nbv->ngrp > 1) && !bHybridGPURun);
2198 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2200 char *end;
2202 nbv->min_ci_balanced = strtol(env, &end, 10);
2203 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2205 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2208 if (debug)
2210 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2211 nbv->min_ci_balanced);
2214 else
2216 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2217 if (debug)
2219 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2220 nbv->min_ci_balanced);
2224 else
2226 nbv->min_ci_balanced = 0;
2229 *nb_verlet = nbv;
2231 nbnxn_init_search(&nbv->nbs,
2232 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2233 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2234 bFEP_NonBonded,
2235 gmx_omp_nthreads_get(emntNonbonded));
2237 for (i = 0; i < nbv->ngrp; i++)
2239 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2241 nb_alloc = &pmalloc;
2242 nb_free = &pfree;
2244 else
2246 nb_alloc = NULL;
2247 nb_free = NULL;
2250 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2251 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2252 /* 8x8x8 "non-simple" lists are ATM always combined */
2253 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2254 nb_alloc, nb_free);
2256 if (i == 0 ||
2257 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2259 gmx_bool bSimpleList;
2260 int enbnxninitcombrule;
2262 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2264 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2266 /* Plain LJ cut-off: we can optimize with combination rules */
2267 enbnxninitcombrule = enbnxninitcombruleDETECT;
2269 else if (fr->vdwtype == evdwPME)
2271 /* LJ-PME: we need to use a combination rule for the grid */
2272 if (fr->ljpme_combination_rule == eljpmeGEOM)
2274 enbnxninitcombrule = enbnxninitcombruleGEOM;
2276 else
2278 enbnxninitcombrule = enbnxninitcombruleLB;
2281 else
2283 /* We use a full combination matrix: no rule required */
2284 enbnxninitcombrule = enbnxninitcombruleNONE;
2288 snew(nbv->grp[i].nbat, 1);
2289 nbnxn_atomdata_init(fp,
2290 nbv->grp[i].nbat,
2291 nbv->grp[i].kernel_type,
2292 enbnxninitcombrule,
2293 fr->ntype, fr->nbfp,
2294 ir->opts.ngener,
2295 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2296 nb_alloc, nb_free);
2298 else
2300 nbv->grp[i].nbat = nbv->grp[0].nbat;
2305 void init_forcerec(FILE *fp,
2306 const output_env_t oenv,
2307 t_forcerec *fr,
2308 t_fcdata *fcd,
2309 const t_inputrec *ir,
2310 const gmx_mtop_t *mtop,
2311 const t_commrec *cr,
2312 matrix box,
2313 const char *tabfn,
2314 const char *tabafn,
2315 const char *tabpfn,
2316 const char *tabbfn,
2317 const char *nbpu_opt,
2318 gmx_bool bNoSolvOpt,
2319 real print_force)
2321 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2322 real rtab;
2323 char *env;
2324 double dbl;
2325 const t_block *cgs;
2326 gmx_bool bGenericKernelOnly;
2327 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2328 gmx_bool bFEP_NonBonded;
2329 t_nblists *nbl;
2330 int *nm_ind, egp_flags;
2332 if (fr->hwinfo == NULL)
2334 /* Detect hardware, gather information.
2335 * In mdrun, hwinfo has already been set before calling init_forcerec.
2336 * Here we ignore GPUs, as tools will not use them anyhow.
2338 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2341 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2342 fr->use_simd_kernels = TRUE;
2344 fr->bDomDec = DOMAINDECOMP(cr);
2346 natoms = mtop->natoms;
2348 if (check_box(ir->ePBC, box))
2350 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2353 /* Test particle insertion ? */
2354 if (EI_TPI(ir->eI))
2356 /* Set to the size of the molecule to be inserted (the last one) */
2357 /* Because of old style topologies, we have to use the last cg
2358 * instead of the last molecule type.
2360 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2361 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2362 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2364 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2367 else
2369 fr->n_tpi = 0;
2372 /* Copy AdResS parameters */
2373 if (ir->bAdress)
2375 fr->adress_type = ir->adress->type;
2376 fr->adress_const_wf = ir->adress->const_wf;
2377 fr->adress_ex_width = ir->adress->ex_width;
2378 fr->adress_hy_width = ir->adress->hy_width;
2379 fr->adress_icor = ir->adress->icor;
2380 fr->adress_site = ir->adress->site;
2381 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2382 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2385 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2386 for (i = 0; i < ir->adress->n_energy_grps; i++)
2388 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2391 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2392 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2393 for (i = 0; i < fr->n_adress_tf_grps; i++)
2395 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2397 copy_rvec(ir->adress->refs, fr->adress_refs);
2399 else
2401 fr->adress_type = eAdressOff;
2402 fr->adress_do_hybridpairs = FALSE;
2405 /* Copy the user determined parameters */
2406 fr->userint1 = ir->userint1;
2407 fr->userint2 = ir->userint2;
2408 fr->userint3 = ir->userint3;
2409 fr->userint4 = ir->userint4;
2410 fr->userreal1 = ir->userreal1;
2411 fr->userreal2 = ir->userreal2;
2412 fr->userreal3 = ir->userreal3;
2413 fr->userreal4 = ir->userreal4;
2415 /* Shell stuff */
2416 fr->fc_stepsize = ir->fc_stepsize;
2418 /* Free energy */
2419 fr->efep = ir->efep;
2420 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2421 if (ir->fepvals->bScCoul)
2423 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2424 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2426 else
2428 fr->sc_alphacoul = 0;
2429 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2431 fr->sc_power = ir->fepvals->sc_power;
2432 fr->sc_r_power = ir->fepvals->sc_r_power;
2433 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2435 env = getenv("GMX_SCSIGMA_MIN");
2436 if (env != NULL)
2438 dbl = 0;
2439 sscanf(env, "%lf", &dbl);
2440 fr->sc_sigma6_min = pow(dbl, 6);
2441 if (fp)
2443 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2447 fr->bNonbonded = TRUE;
2448 if (getenv("GMX_NO_NONBONDED") != NULL)
2450 /* turn off non-bonded calculations */
2451 fr->bNonbonded = FALSE;
2452 md_print_warn(cr, fp,
2453 "Found environment variable GMX_NO_NONBONDED.\n"
2454 "Disabling nonbonded calculations.\n");
2457 bGenericKernelOnly = FALSE;
2459 /* We now check in the NS code whether a particular combination of interactions
2460 * can be used with water optimization, and disable it if that is not the case.
2463 if (getenv("GMX_NB_GENERIC") != NULL)
2465 if (fp != NULL)
2467 fprintf(fp,
2468 "Found environment variable GMX_NB_GENERIC.\n"
2469 "Disabling all interaction-specific nonbonded kernels, will only\n"
2470 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2472 bGenericKernelOnly = TRUE;
2475 if (bGenericKernelOnly == TRUE)
2477 bNoSolvOpt = TRUE;
2480 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2482 fr->use_simd_kernels = FALSE;
2483 if (fp != NULL)
2485 fprintf(fp,
2486 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2487 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2491 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2493 /* Check if we can/should do all-vs-all kernels */
2494 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2495 fr->AllvsAll_work = NULL;
2496 fr->AllvsAll_workgb = NULL;
2498 /* All-vs-all kernels have not been implemented in 4.6, and
2499 * the SIMD group kernels are also buggy in this case. Non-SIMD
2500 * group kernels are OK. See Redmine #1249. */
2501 if (fr->bAllvsAll)
2503 fr->bAllvsAll = FALSE;
2504 fr->use_simd_kernels = FALSE;
2505 if (fp != NULL)
2507 fprintf(fp,
2508 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2509 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2510 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2511 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2512 "we are proceeding by disabling all CPU architecture-specific\n"
2513 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2514 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2518 /* Neighbour searching stuff */
2519 fr->cutoff_scheme = ir->cutoff_scheme;
2520 fr->bGrid = (ir->ns_type == ensGRID);
2521 fr->ePBC = ir->ePBC;
2523 if (fr->cutoff_scheme == ecutsGROUP)
2525 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2526 "removed in a future release when 'verlet' supports all interaction forms.\n";
2528 if (MASTER(cr))
2530 fprintf(stderr, "\n%s\n", note);
2532 if (fp != NULL)
2534 fprintf(fp, "\n%s\n", note);
2538 /* Determine if we will do PBC for distances in bonded interactions */
2539 if (fr->ePBC == epbcNONE)
2541 fr->bMolPBC = FALSE;
2543 else
2545 if (!DOMAINDECOMP(cr))
2547 /* The group cut-off scheme and SHAKE assume charge groups
2548 * are whole, but not using molpbc is faster in most cases.
2550 if (fr->cutoff_scheme == ecutsGROUP ||
2551 (ir->eConstrAlg == econtSHAKE &&
2552 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2553 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2555 fr->bMolPBC = ir->bPeriodicMols;
2557 else
2559 fr->bMolPBC = TRUE;
2560 if (getenv("GMX_USE_GRAPH") != NULL)
2562 fr->bMolPBC = FALSE;
2563 if (fp)
2565 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2570 else
2572 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2575 fr->bGB = (ir->implicit_solvent == eisGBSA);
2577 fr->rc_scaling = ir->refcoord_scaling;
2578 copy_rvec(ir->posres_com, fr->posres_com);
2579 copy_rvec(ir->posres_comB, fr->posres_comB);
2580 fr->rlist = cutoff_inf(ir->rlist);
2581 fr->rlistlong = cutoff_inf(ir->rlistlong);
2582 fr->eeltype = ir->coulombtype;
2583 fr->vdwtype = ir->vdwtype;
2584 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2586 fr->coulomb_modifier = ir->coulomb_modifier;
2587 fr->vdw_modifier = ir->vdw_modifier;
2589 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2590 switch (fr->eeltype)
2592 case eelCUT:
2593 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2594 break;
2596 case eelRF:
2597 case eelGRF:
2598 case eelRF_NEC:
2599 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2600 break;
2602 case eelRF_ZERO:
2603 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2604 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2605 break;
2607 case eelSWITCH:
2608 case eelSHIFT:
2609 case eelUSER:
2610 case eelENCADSHIFT:
2611 case eelPMESWITCH:
2612 case eelPMEUSER:
2613 case eelPMEUSERSWITCH:
2614 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2615 break;
2617 case eelPME:
2618 case eelEWALD:
2619 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2620 break;
2622 default:
2623 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2624 break;
2627 /* Vdw: Translate from mdp settings to kernel format */
2628 switch (fr->vdwtype)
2630 case evdwCUT:
2631 if (fr->bBHAM)
2633 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2635 else
2637 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2639 break;
2640 case evdwPME:
2641 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2642 break;
2644 case evdwSWITCH:
2645 case evdwSHIFT:
2646 case evdwUSER:
2647 case evdwENCADSHIFT:
2648 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2649 break;
2651 default:
2652 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2653 break;
2656 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2657 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2658 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2660 fr->bTwinRange = fr->rlistlong > fr->rlist;
2661 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2663 fr->reppow = mtop->ffparams.reppow;
2665 if (ir->cutoff_scheme == ecutsGROUP)
2667 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2668 && !EVDW_PME(fr->vdwtype));
2669 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2670 fr->bcoultab = !(fr->eeltype == eelCUT ||
2671 fr->eeltype == eelEWALD ||
2672 fr->eeltype == eelPME ||
2673 fr->eeltype == eelRF ||
2674 fr->eeltype == eelRF_ZERO);
2676 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2677 * going to be faster to tabulate the interaction than calling the generic kernel.
2679 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2681 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2683 fr->bcoultab = TRUE;
2686 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2687 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2688 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2689 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2691 if (fr->rcoulomb != fr->rvdw)
2693 fr->bcoultab = TRUE;
2697 if (getenv("GMX_REQUIRE_TABLES"))
2699 fr->bvdwtab = TRUE;
2700 fr->bcoultab = TRUE;
2703 if (fp)
2705 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2706 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2709 if (fr->bvdwtab == TRUE)
2711 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2712 fr->nbkernel_vdw_modifier = eintmodNONE;
2714 if (fr->bcoultab == TRUE)
2716 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2717 fr->nbkernel_elec_modifier = eintmodNONE;
2721 if (ir->cutoff_scheme == ecutsVERLET)
2723 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2725 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2727 fr->bvdwtab = FALSE;
2728 fr->bcoultab = FALSE;
2731 /* Tables are used for direct ewald sum */
2732 if (fr->bEwald)
2734 if (EEL_PME(ir->coulombtype))
2736 if (fp)
2738 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2740 if (ir->coulombtype == eelP3M_AD)
2742 please_cite(fp, "Hockney1988");
2743 please_cite(fp, "Ballenegger2012");
2745 else
2747 please_cite(fp, "Essmann95a");
2750 if (ir->ewald_geometry == eewg3DC)
2752 if (fp)
2754 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2756 please_cite(fp, "In-Chul99a");
2759 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2760 init_ewald_tab(&(fr->ewald_table), ir, fp);
2761 if (fp)
2763 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2764 1/fr->ewaldcoeff_q);
2768 if (EVDW_PME(ir->vdwtype))
2770 if (fp)
2772 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2774 please_cite(fp, "Essmann95a");
2775 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2776 if (fp)
2778 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2779 1/fr->ewaldcoeff_lj);
2783 /* Electrostatics */
2784 fr->epsilon_r = ir->epsilon_r;
2785 fr->epsilon_rf = ir->epsilon_rf;
2786 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2787 fr->rcoulomb_switch = ir->rcoulomb_switch;
2788 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2790 /* Parameters for generalized RF */
2791 fr->zsquare = 0.0;
2792 fr->temp = 0.0;
2794 if (fr->eeltype == eelGRF)
2796 init_generalized_rf(fp, mtop, ir, fr);
2799 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2800 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2801 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2802 IR_ELEC_FIELD(*ir) ||
2803 (fr->adress_icor != eAdressICOff)
2806 if (fr->cutoff_scheme == ecutsGROUP &&
2807 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2809 /* Count the total number of charge groups */
2810 fr->cg_nalloc = ncg_mtop(mtop);
2811 srenew(fr->cg_cm, fr->cg_nalloc);
2813 if (fr->shift_vec == NULL)
2815 snew(fr->shift_vec, SHIFTS);
2818 if (fr->fshift == NULL)
2820 snew(fr->fshift, SHIFTS);
2823 if (fr->nbfp == NULL)
2825 fr->ntype = mtop->ffparams.atnr;
2826 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2827 if (EVDW_PME(fr->vdwtype))
2829 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2833 /* Copy the energy group exclusions */
2834 fr->egp_flags = ir->opts.egp_flags;
2836 /* Van der Waals stuff */
2837 fr->rvdw = cutoff_inf(ir->rvdw);
2838 fr->rvdw_switch = ir->rvdw_switch;
2839 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2841 if (fr->rvdw_switch >= fr->rvdw)
2843 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2844 fr->rvdw_switch, fr->rvdw);
2846 if (fp)
2848 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2849 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2850 fr->rvdw_switch, fr->rvdw);
2854 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2856 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2859 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2861 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2864 if (fp)
2866 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2867 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2870 fr->eDispCorr = ir->eDispCorr;
2871 if (ir->eDispCorr != edispcNO)
2873 set_avcsixtwelve(fp, fr, mtop);
2876 if (fr->bBHAM)
2878 set_bham_b_max(fp, fr, mtop);
2881 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2883 /* Copy the GBSA data (radius, volume and surftens for each
2884 * atomtype) from the topology atomtype section to forcerec.
2886 snew(fr->atype_radius, fr->ntype);
2887 snew(fr->atype_vol, fr->ntype);
2888 snew(fr->atype_surftens, fr->ntype);
2889 snew(fr->atype_gb_radius, fr->ntype);
2890 snew(fr->atype_S_hct, fr->ntype);
2892 if (mtop->atomtypes.nr > 0)
2894 for (i = 0; i < fr->ntype; i++)
2896 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2898 for (i = 0; i < fr->ntype; i++)
2900 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2902 for (i = 0; i < fr->ntype; i++)
2904 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2906 for (i = 0; i < fr->ntype; i++)
2908 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2910 for (i = 0; i < fr->ntype; i++)
2912 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2916 /* Generate the GB table if needed */
2917 if (fr->bGB)
2919 #ifdef GMX_DOUBLE
2920 fr->gbtabscale = 2000;
2921 #else
2922 fr->gbtabscale = 500;
2923 #endif
2925 fr->gbtabr = 100;
2926 fr->gbtab = make_gb_table(oenv, fr);
2928 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2930 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2931 if (!DOMAINDECOMP(cr))
2933 make_local_gb(cr, fr->born, ir->gb_algorithm);
2937 /* Set the charge scaling */
2938 if (fr->epsilon_r != 0)
2940 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2942 else
2944 /* eps = 0 is infinite dieletric: no coulomb interactions */
2945 fr->epsfac = 0;
2948 /* Reaction field constants */
2949 if (EEL_RF(fr->eeltype))
2951 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2952 fr->rcoulomb, fr->temp, fr->zsquare, box,
2953 &fr->kappa, &fr->k_rf, &fr->c_rf);
2956 /*This now calculates sum for q and c6*/
2957 set_chargesum(fp, fr, mtop);
2959 /* if we are using LR electrostatics, and they are tabulated,
2960 * the tables will contain modified coulomb interactions.
2961 * Since we want to use the non-shifted ones for 1-4
2962 * coulombic interactions, we must have an extra set of tables.
2965 /* Construct tables.
2966 * A little unnecessary to make both vdw and coul tables sometimes,
2967 * but what the heck... */
2969 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2970 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2972 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2973 fr->bBHAM || fr->bEwald) &&
2974 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2975 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2976 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2978 negp_pp = ir->opts.ngener - ir->nwall;
2979 negptable = 0;
2980 if (!bMakeTables)
2982 bSomeNormalNbListsAreInUse = TRUE;
2983 fr->nnblists = 1;
2985 else
2987 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2988 for (egi = 0; egi < negp_pp; egi++)
2990 for (egj = egi; egj < negp_pp; egj++)
2992 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2993 if (!(egp_flags & EGP_EXCL))
2995 if (egp_flags & EGP_TABLE)
2997 negptable++;
2999 else
3001 bSomeNormalNbListsAreInUse = TRUE;
3006 if (bSomeNormalNbListsAreInUse)
3008 fr->nnblists = negptable + 1;
3010 else
3012 fr->nnblists = negptable;
3014 if (fr->nnblists > 1)
3016 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3020 if (ir->adress)
3022 fr->nnblists *= 2;
3025 snew(fr->nblists, fr->nnblists);
3027 /* This code automatically gives table length tabext without cut-off's,
3028 * in that case grompp should already have checked that we do not need
3029 * normal tables and we only generate tables for 1-4 interactions.
3031 rtab = ir->rlistlong + ir->tabext;
3033 if (bMakeTables)
3035 /* make tables for ordinary interactions */
3036 if (bSomeNormalNbListsAreInUse)
3038 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3039 if (ir->adress)
3041 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3043 if (!bMakeSeparate14Table)
3045 fr->tab14 = fr->nblists[0].table_elec_vdw;
3047 m = 1;
3049 else
3051 m = 0;
3053 if (negptable > 0)
3055 /* Read the special tables for certain energy group pairs */
3056 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3057 for (egi = 0; egi < negp_pp; egi++)
3059 for (egj = egi; egj < negp_pp; egj++)
3061 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3062 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3064 nbl = &(fr->nblists[m]);
3065 if (fr->nnblists > 1)
3067 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3069 /* Read the table file with the two energy groups names appended */
3070 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3071 *mtop->groups.grpname[nm_ind[egi]],
3072 *mtop->groups.grpname[nm_ind[egj]],
3073 &fr->nblists[m]);
3074 if (ir->adress)
3076 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3077 *mtop->groups.grpname[nm_ind[egi]],
3078 *mtop->groups.grpname[nm_ind[egj]],
3079 &fr->nblists[fr->nnblists/2+m]);
3081 m++;
3083 else if (fr->nnblists > 1)
3085 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3091 if (bMakeSeparate14Table)
3093 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3094 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3095 GMX_MAKETABLES_14ONLY);
3098 /* Read AdResS Thermo Force table if needed */
3099 if (fr->adress_icor == eAdressICThermoForce)
3101 /* old todo replace */
3103 if (ir->adress->n_tf_grps > 0)
3105 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3108 else
3110 /* load the default table */
3111 snew(fr->atf_tabs, 1);
3112 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3116 /* Wall stuff */
3117 fr->nwall = ir->nwall;
3118 if (ir->nwall && ir->wall_type == ewtTABLE)
3120 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3123 if (fcd && tabbfn)
3125 fcd->bondtab = make_bonded_tables(fp,
3126 F_TABBONDS, F_TABBONDSNC,
3127 mtop, tabbfn, "b");
3128 fcd->angletab = make_bonded_tables(fp,
3129 F_TABANGLES, -1,
3130 mtop, tabbfn, "a");
3131 fcd->dihtab = make_bonded_tables(fp,
3132 F_TABDIHS, -1,
3133 mtop, tabbfn, "d");
3135 else
3137 if (debug)
3139 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3143 /* QM/MM initialization if requested
3145 if (ir->bQMMM)
3147 fprintf(stderr, "QM/MM calculation requested.\n");
3150 fr->bQMMM = ir->bQMMM;
3151 fr->qr = mk_QMMMrec();
3153 /* Set all the static charge group info */
3154 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3155 &bFEP_NonBonded,
3156 &fr->bExcl_IntraCGAll_InterCGNone);
3157 if (DOMAINDECOMP(cr))
3159 fr->cginfo = NULL;
3161 else
3163 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3166 if (!DOMAINDECOMP(cr))
3168 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3169 mtop->natoms, mtop->natoms, mtop->natoms);
3172 fr->print_force = print_force;
3175 /* coarse load balancing vars */
3176 fr->t_fnbf = 0.;
3177 fr->t_wait = 0.;
3178 fr->timesteps = 0;
3180 /* Initialize neighbor search */
3181 init_ns(fp, cr, &fr->ns, fr, mtop);
3183 if (cr->duty & DUTY_PP)
3185 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3187 if (ir->bAdress)
3189 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3194 /* Initialize the thread working data for bonded interactions */
3195 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3197 snew(fr->excl_load, fr->nthreads+1);
3199 if (fr->cutoff_scheme == ecutsVERLET)
3201 if (ir->rcoulomb != ir->rvdw)
3203 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3206 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3209 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3210 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3212 if (ir->eDispCorr != edispcNO)
3214 calc_enervirdiff(fp, ir->eDispCorr, fr);
3218 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3219 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3220 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3222 void pr_forcerec(FILE *fp, t_forcerec *fr)
3224 int i;
3226 pr_real(fp, fr->rlist);
3227 pr_real(fp, fr->rcoulomb);
3228 pr_real(fp, fr->fudgeQQ);
3229 pr_bool(fp, fr->bGrid);
3230 pr_bool(fp, fr->bTwinRange);
3231 /*pr_int(fp,fr->cg0);
3232 pr_int(fp,fr->hcg);*/
3233 for (i = 0; i < fr->nnblists; i++)
3235 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3237 pr_real(fp, fr->rcoulomb_switch);
3238 pr_real(fp, fr->rcoulomb);
3240 fflush(fp);
3243 void forcerec_set_excl_load(t_forcerec *fr,
3244 const gmx_localtop_t *top)
3246 const int *ind, *a;
3247 int t, i, j, ntot, n, ntarget;
3249 ind = top->excls.index;
3250 a = top->excls.a;
3252 ntot = 0;
3253 for (i = 0; i < top->excls.nr; i++)
3255 for (j = ind[i]; j < ind[i+1]; j++)
3257 if (a[j] > i)
3259 ntot++;
3264 fr->excl_load[0] = 0;
3265 n = 0;
3266 i = 0;
3267 for (t = 1; t <= fr->nthreads; t++)
3269 ntarget = (ntot*t)/fr->nthreads;
3270 while (i < top->excls.nr && n < ntarget)
3272 for (j = ind[i]; j < ind[i+1]; j++)
3274 if (a[j] > i)
3276 n++;
3279 i++;
3281 fr->excl_load[t] = i;