Free-energy support for LJ-PME
[gromacs.git] / src / gromacs / mdlib / forcerec.c
blob019a6826f07ff41dbd3f364676dcccc172bd3b67
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
41 #include <math.h>
42 #include <string.h>
43 #include <assert.h>
44 #include "sysstuff.h"
45 #include "typedefs.h"
46 #include "types/commrec.h"
47 #include "vec.h"
48 #include "gromacs/math/utilities.h"
49 #include "macros.h"
50 #include "smalloc.h"
51 #include "macros.h"
52 #include "gmx_fatal.h"
53 #include "gmx_fatal_collective.h"
54 #include "physics.h"
55 #include "force.h"
56 #include "tables.h"
57 #include "nonbonded.h"
58 #include "invblock.h"
59 #include "names.h"
60 #include "network.h"
61 #include "pbc.h"
62 #include "ns.h"
63 #include "mshift.h"
64 #include "txtdump.h"
65 #include "coulomb.h"
66 #include "md_support.h"
67 #include "md_logging.h"
68 #include "domdec.h"
69 #include "qmmm.h"
70 #include "copyrite.h"
71 #include "mtop_util.h"
72 #include "nbnxn_simd.h"
73 #include "nbnxn_search.h"
74 #include "nbnxn_atomdata.h"
75 #include "nbnxn_consts.h"
76 #include "gmx_omp_nthreads.h"
77 #include "gmx_detect_hardware.h"
78 #include "inputrec.h"
80 #ifdef _MSC_VER
81 /* MSVC definition for __cpuid() */
82 #include <intrin.h>
83 #endif
85 #include "types/nbnxn_cuda_types_ext.h"
86 #include "gpu_utils.h"
87 #include "nbnxn_cuda_data_mgmt.h"
88 #include "pmalloc_cuda.h"
90 t_forcerec *mk_forcerec(void)
92 t_forcerec *fr;
94 snew(fr, 1);
96 return fr;
99 #ifdef DEBUG
100 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
102 int i, j;
104 for (i = 0; (i < atnr); i++)
106 for (j = 0; (j < atnr); j++)
108 fprintf(fp, "%2d - %2d", i, j);
109 if (bBHAM)
111 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
112 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
114 else
116 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
117 C12(nbfp, atnr, i, j)/12.0);
122 #endif
124 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
126 real *nbfp;
127 int i, j, k, atnr;
129 atnr = idef->atnr;
130 if (bBHAM)
132 snew(nbfp, 3*atnr*atnr);
133 for (i = k = 0; (i < atnr); i++)
135 for (j = 0; (j < atnr); j++, k++)
137 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
138 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
139 /* nbfp now includes the 6.0 derivative prefactor */
140 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
144 else
146 snew(nbfp, 2*atnr*atnr);
147 for (i = k = 0; (i < atnr); i++)
149 for (j = 0; (j < atnr); j++, k++)
151 /* nbfp now includes the 6.0/12.0 derivative prefactors */
152 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
153 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
158 return nbfp;
161 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
163 int i, j, k, atnr;
164 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
165 real *grid;
167 /* For LJ-PME simulations, we correct the energies with the reciprocal space
168 * inside of the cut-off. To do this the non-bonded kernels needs to have
169 * access to the C6-values used on the reciprocal grid in pme.c
172 atnr = idef->atnr;
173 snew(grid, 2*atnr*atnr);
174 for (i = k = 0; (i < atnr); i++)
176 for (j = 0; (j < atnr); j++, k++)
178 c6i = idef->iparams[i*(atnr+1)].lj.c6;
179 c12i = idef->iparams[i*(atnr+1)].lj.c12;
180 c6j = idef->iparams[j*(atnr+1)].lj.c6;
181 c12j = idef->iparams[j*(atnr+1)].lj.c12;
182 c6 = sqrt(c6i * c6j);
183 if (fr->ljpme_combination_rule == eljpmeLB
184 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
186 sigmai = pow(c12i / c6i, 1.0/6.0);
187 sigmaj = pow(c12j / c6j, 1.0/6.0);
188 epsi = c6i * c6i / c12i;
189 epsj = c6j * c6j / c12j;
190 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
192 /* Store the elements at the same relative positions as C6 in nbfp in order
193 * to simplify access in the kernels
195 grid[2*(atnr*i+j)] = c6*6.0;
198 return grid;
201 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
203 real *nbfp;
204 int i, j, k, atnr;
205 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
206 real c6, c12;
208 atnr = idef->atnr;
209 snew(nbfp, 2*atnr*atnr);
210 for (i = 0; i < atnr; ++i)
212 for (j = 0; j < atnr; ++j)
214 c6i = idef->iparams[i*(atnr+1)].lj.c6;
215 c12i = idef->iparams[i*(atnr+1)].lj.c12;
216 c6j = idef->iparams[j*(atnr+1)].lj.c6;
217 c12j = idef->iparams[j*(atnr+1)].lj.c12;
218 c6 = sqrt(c6i * c6j);
219 c12 = sqrt(c12i * c12j);
220 if (comb_rule == eCOMB_ARITHMETIC
221 && !gmx_numzero(c6) && !gmx_numzero(c12))
223 sigmai = pow(c12i / c6i, 1.0/6.0);
224 sigmaj = pow(c12j / c6j, 1.0/6.0);
225 epsi = c6i * c6i / c12i;
226 epsj = c6j * c6j / c12j;
227 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
228 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
230 C6(nbfp, atnr, i, j) = c6*6.0;
231 C12(nbfp, atnr, i, j) = c12*12.0;
234 return nbfp;
237 /* This routine sets fr->solvent_opt to the most common solvent in the
238 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
239 * the fr->solvent_type array with the correct type (or esolNO).
241 * Charge groups that fulfill the conditions but are not identical to the
242 * most common one will be marked as esolNO in the solvent_type array.
244 * TIP3p is identical to SPC for these purposes, so we call it
245 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
247 * NOTE: QM particle should not
248 * become an optimized solvent. Not even if there is only one charge
249 * group in the Qm
252 typedef struct
254 int model;
255 int count;
256 int vdwtype[4];
257 real charge[4];
258 } solvent_parameters_t;
260 static void
261 check_solvent_cg(const gmx_moltype_t *molt,
262 int cg0,
263 int nmol,
264 const unsigned char *qm_grpnr,
265 const t_grps *qm_grps,
266 t_forcerec * fr,
267 int *n_solvent_parameters,
268 solvent_parameters_t **solvent_parameters_p,
269 int cginfo,
270 int *cg_sp)
272 const t_blocka *excl;
273 t_atom *atom;
274 int j, k;
275 int j0, j1, nj;
276 gmx_bool perturbed;
277 gmx_bool has_vdw[4];
278 gmx_bool match;
279 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
280 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
281 int tjA;
282 gmx_bool qm;
283 solvent_parameters_t *solvent_parameters;
285 /* We use a list with parameters for each solvent type.
286 * Every time we discover a new molecule that fulfills the basic
287 * conditions for a solvent we compare with the previous entries
288 * in these lists. If the parameters are the same we just increment
289 * the counter for that type, and otherwise we create a new type
290 * based on the current molecule.
292 * Once we've finished going through all molecules we check which
293 * solvent is most common, and mark all those molecules while we
294 * clear the flag on all others.
297 solvent_parameters = *solvent_parameters_p;
299 /* Mark the cg first as non optimized */
300 *cg_sp = -1;
302 /* Check if this cg has no exclusions with atoms in other charge groups
303 * and all atoms inside the charge group excluded.
304 * We only have 3 or 4 atom solvent loops.
306 if (GET_CGINFO_EXCL_INTER(cginfo) ||
307 !GET_CGINFO_EXCL_INTRA(cginfo))
309 return;
312 /* Get the indices of the first atom in this charge group */
313 j0 = molt->cgs.index[cg0];
314 j1 = molt->cgs.index[cg0+1];
316 /* Number of atoms in our molecule */
317 nj = j1 - j0;
319 if (debug)
321 fprintf(debug,
322 "Moltype '%s': there are %d atoms in this charge group\n",
323 *molt->name, nj);
326 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
327 * otherwise skip it.
329 if (nj < 3 || nj > 4)
331 return;
334 /* Check if we are doing QM on this group */
335 qm = FALSE;
336 if (qm_grpnr != NULL)
338 for (j = j0; j < j1 && !qm; j++)
340 qm = (qm_grpnr[j] < qm_grps->nr - 1);
343 /* Cannot use solvent optimization with QM */
344 if (qm)
346 return;
349 atom = molt->atoms.atom;
351 /* Still looks like a solvent, time to check parameters */
353 /* If it is perturbed (free energy) we can't use the solvent loops,
354 * so then we just skip to the next molecule.
356 perturbed = FALSE;
358 for (j = j0; j < j1 && !perturbed; j++)
360 perturbed = PERTURBED(atom[j]);
363 if (perturbed)
365 return;
368 /* Now it's only a question if the VdW and charge parameters
369 * are OK. Before doing the check we compare and see if they are
370 * identical to a possible previous solvent type.
371 * First we assign the current types and charges.
373 for (j = 0; j < nj; j++)
375 tmp_vdwtype[j] = atom[j0+j].type;
376 tmp_charge[j] = atom[j0+j].q;
379 /* Does it match any previous solvent type? */
380 for (k = 0; k < *n_solvent_parameters; k++)
382 match = TRUE;
385 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
386 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
387 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
389 match = FALSE;
392 /* Check that types & charges match for all atoms in molecule */
393 for (j = 0; j < nj && match == TRUE; j++)
395 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
397 match = FALSE;
399 if (tmp_charge[j] != solvent_parameters[k].charge[j])
401 match = FALSE;
404 if (match == TRUE)
406 /* Congratulations! We have a matched solvent.
407 * Flag it with this type for later processing.
409 *cg_sp = k;
410 solvent_parameters[k].count += nmol;
412 /* We are done with this charge group */
413 return;
417 /* If we get here, we have a tentative new solvent type.
418 * Before we add it we must check that it fulfills the requirements
419 * of the solvent optimized loops. First determine which atoms have
420 * VdW interactions.
422 for (j = 0; j < nj; j++)
424 has_vdw[j] = FALSE;
425 tjA = tmp_vdwtype[j];
427 /* Go through all other tpes and see if any have non-zero
428 * VdW parameters when combined with this one.
430 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
432 /* We already checked that the atoms weren't perturbed,
433 * so we only need to check state A now.
435 if (fr->bBHAM)
437 has_vdw[j] = (has_vdw[j] ||
438 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
439 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
440 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
442 else
444 /* Standard LJ */
445 has_vdw[j] = (has_vdw[j] ||
446 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
447 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
452 /* Now we know all we need to make the final check and assignment. */
453 if (nj == 3)
455 /* So, is it an SPC?
456 * For this we require thatn all atoms have charge,
457 * the charges on atom 2 & 3 should be the same, and only
458 * atom 1 might have VdW.
460 if (has_vdw[1] == FALSE &&
461 has_vdw[2] == FALSE &&
462 tmp_charge[0] != 0 &&
463 tmp_charge[1] != 0 &&
464 tmp_charge[2] == tmp_charge[1])
466 srenew(solvent_parameters, *n_solvent_parameters+1);
467 solvent_parameters[*n_solvent_parameters].model = esolSPC;
468 solvent_parameters[*n_solvent_parameters].count = nmol;
469 for (k = 0; k < 3; k++)
471 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
472 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
475 *cg_sp = *n_solvent_parameters;
476 (*n_solvent_parameters)++;
479 else if (nj == 4)
481 /* Or could it be a TIP4P?
482 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
483 * Only atom 1 mght have VdW.
485 if (has_vdw[1] == FALSE &&
486 has_vdw[2] == FALSE &&
487 has_vdw[3] == FALSE &&
488 tmp_charge[0] == 0 &&
489 tmp_charge[1] != 0 &&
490 tmp_charge[2] == tmp_charge[1] &&
491 tmp_charge[3] != 0)
493 srenew(solvent_parameters, *n_solvent_parameters+1);
494 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
495 solvent_parameters[*n_solvent_parameters].count = nmol;
496 for (k = 0; k < 4; k++)
498 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
499 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
502 *cg_sp = *n_solvent_parameters;
503 (*n_solvent_parameters)++;
507 *solvent_parameters_p = solvent_parameters;
510 static void
511 check_solvent(FILE * fp,
512 const gmx_mtop_t * mtop,
513 t_forcerec * fr,
514 cginfo_mb_t *cginfo_mb)
516 const t_block * cgs;
517 const t_block * mols;
518 const gmx_moltype_t *molt;
519 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
520 int n_solvent_parameters;
521 solvent_parameters_t *solvent_parameters;
522 int **cg_sp;
523 int bestsp, bestsol;
525 if (debug)
527 fprintf(debug, "Going to determine what solvent types we have.\n");
530 mols = &mtop->mols;
532 n_solvent_parameters = 0;
533 solvent_parameters = NULL;
534 /* Allocate temporary array for solvent type */
535 snew(cg_sp, mtop->nmolblock);
537 cg_offset = 0;
538 at_offset = 0;
539 for (mb = 0; mb < mtop->nmolblock; mb++)
541 molt = &mtop->moltype[mtop->molblock[mb].type];
542 cgs = &molt->cgs;
543 /* Here we have to loop over all individual molecules
544 * because we need to check for QMMM particles.
546 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
547 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
548 nmol = mtop->molblock[mb].nmol/nmol_ch;
549 for (mol = 0; mol < nmol_ch; mol++)
551 cgm = mol*cgs->nr;
552 am = mol*cgs->index[cgs->nr];
553 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
555 check_solvent_cg(molt, cg_mol, nmol,
556 mtop->groups.grpnr[egcQMMM] ?
557 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
558 &mtop->groups.grps[egcQMMM],
560 &n_solvent_parameters, &solvent_parameters,
561 cginfo_mb[mb].cginfo[cgm+cg_mol],
562 &cg_sp[mb][cgm+cg_mol]);
565 cg_offset += cgs->nr;
566 at_offset += cgs->index[cgs->nr];
569 /* Puh! We finished going through all charge groups.
570 * Now find the most common solvent model.
573 /* Most common solvent this far */
574 bestsp = -2;
575 for (i = 0; i < n_solvent_parameters; i++)
577 if (bestsp == -2 ||
578 solvent_parameters[i].count > solvent_parameters[bestsp].count)
580 bestsp = i;
584 if (bestsp >= 0)
586 bestsol = solvent_parameters[bestsp].model;
588 else
590 bestsol = esolNO;
593 #ifdef DISABLE_WATER_NLIST
594 bestsol = esolNO;
595 #endif
597 fr->nWatMol = 0;
598 for (mb = 0; mb < mtop->nmolblock; mb++)
600 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
601 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
602 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
604 if (cg_sp[mb][i] == bestsp)
606 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
607 fr->nWatMol += nmol;
609 else
611 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
614 sfree(cg_sp[mb]);
616 sfree(cg_sp);
618 if (bestsol != esolNO && fp != NULL)
620 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
621 esol_names[bestsol],
622 solvent_parameters[bestsp].count);
625 sfree(solvent_parameters);
626 fr->solvent_opt = bestsol;
629 enum {
630 acNONE = 0, acCONSTRAINT, acSETTLE
633 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
634 t_forcerec *fr, gmx_bool bNoSolvOpt,
635 gmx_bool *bFEP_NonBonded,
636 gmx_bool *bExcl_IntraCGAll_InterCGNone)
638 const t_block *cgs;
639 const t_blocka *excl;
640 const gmx_moltype_t *molt;
641 const gmx_molblock_t *molb;
642 cginfo_mb_t *cginfo_mb;
643 gmx_bool *type_VDW;
644 int *cginfo;
645 int cg_offset, a_offset, cgm, am;
646 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
647 int *a_con;
648 int ftype;
649 int ia;
650 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
652 ncg_tot = ncg_mtop(mtop);
653 snew(cginfo_mb, mtop->nmolblock);
655 snew(type_VDW, fr->ntype);
656 for (ai = 0; ai < fr->ntype; ai++)
658 type_VDW[ai] = FALSE;
659 for (j = 0; j < fr->ntype; j++)
661 type_VDW[ai] = type_VDW[ai] ||
662 fr->bBHAM ||
663 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
664 C12(fr->nbfp, fr->ntype, ai, j) != 0;
668 *bFEP_NonBonded = FALSE;
669 *bExcl_IntraCGAll_InterCGNone = TRUE;
671 excl_nalloc = 10;
672 snew(bExcl, excl_nalloc);
673 cg_offset = 0;
674 a_offset = 0;
675 for (mb = 0; mb < mtop->nmolblock; mb++)
677 molb = &mtop->molblock[mb];
678 molt = &mtop->moltype[molb->type];
679 cgs = &molt->cgs;
680 excl = &molt->excls;
682 /* Check if the cginfo is identical for all molecules in this block.
683 * If so, we only need an array of the size of one molecule.
684 * Otherwise we make an array of #mol times #cgs per molecule.
686 bId = TRUE;
687 am = 0;
688 for (m = 0; m < molb->nmol; m++)
690 am = m*cgs->index[cgs->nr];
691 for (cg = 0; cg < cgs->nr; cg++)
693 a0 = cgs->index[cg];
694 a1 = cgs->index[cg+1];
695 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
696 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
698 bId = FALSE;
700 if (mtop->groups.grpnr[egcQMMM] != NULL)
702 for (ai = a0; ai < a1; ai++)
704 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
705 mtop->groups.grpnr[egcQMMM][a_offset +ai])
707 bId = FALSE;
714 cginfo_mb[mb].cg_start = cg_offset;
715 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
716 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
717 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
718 cginfo = cginfo_mb[mb].cginfo;
720 /* Set constraints flags for constrained atoms */
721 snew(a_con, molt->atoms.nr);
722 for (ftype = 0; ftype < F_NRE; ftype++)
724 if (interaction_function[ftype].flags & IF_CONSTRAINT)
726 int nral;
728 nral = NRAL(ftype);
729 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
731 int a;
733 for (a = 0; a < nral; a++)
735 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
736 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
742 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
744 cgm = m*cgs->nr;
745 am = m*cgs->index[cgs->nr];
746 for (cg = 0; cg < cgs->nr; cg++)
748 a0 = cgs->index[cg];
749 a1 = cgs->index[cg+1];
751 /* Store the energy group in cginfo */
752 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
753 SET_CGINFO_GID(cginfo[cgm+cg], gid);
755 /* Check the intra/inter charge group exclusions */
756 if (a1-a0 > excl_nalloc)
758 excl_nalloc = a1 - a0;
759 srenew(bExcl, excl_nalloc);
761 /* bExclIntraAll: all intra cg interactions excluded
762 * bExclInter: any inter cg interactions excluded
764 bExclIntraAll = TRUE;
765 bExclInter = FALSE;
766 bHaveVDW = FALSE;
767 bHaveQ = FALSE;
768 bHavePerturbedAtoms = FALSE;
769 for (ai = a0; ai < a1; ai++)
771 /* Check VDW and electrostatic interactions */
772 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
773 type_VDW[molt->atoms.atom[ai].typeB]);
774 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
775 molt->atoms.atom[ai].qB != 0);
777 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
779 /* Clear the exclusion list for atom ai */
780 for (aj = a0; aj < a1; aj++)
782 bExcl[aj-a0] = FALSE;
784 /* Loop over all the exclusions of atom ai */
785 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
787 aj = excl->a[j];
788 if (aj < a0 || aj >= a1)
790 bExclInter = TRUE;
792 else
794 bExcl[aj-a0] = TRUE;
797 /* Check if ai excludes a0 to a1 */
798 for (aj = a0; aj < a1; aj++)
800 if (!bExcl[aj-a0])
802 bExclIntraAll = FALSE;
806 switch (a_con[ai])
808 case acCONSTRAINT:
809 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
810 break;
811 case acSETTLE:
812 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
813 break;
814 default:
815 break;
818 if (bExclIntraAll)
820 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
822 if (bExclInter)
824 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
826 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
828 /* The size in cginfo is currently only read with DD */
829 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
831 if (bHaveVDW)
833 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
835 if (bHaveQ)
837 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
839 if (bHavePerturbedAtoms && fr->efep != efepNO)
841 SET_CGINFO_FEP(cginfo[cgm+cg]);
842 *bFEP_NonBonded = TRUE;
844 /* Store the charge group size */
845 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
847 if (!bExclIntraAll || bExclInter)
849 *bExcl_IntraCGAll_InterCGNone = FALSE;
854 sfree(a_con);
856 cg_offset += molb->nmol*cgs->nr;
857 a_offset += molb->nmol*cgs->index[cgs->nr];
859 sfree(bExcl);
861 /* the solvent optimizer is called after the QM is initialized,
862 * because we don't want to have the QM subsystemto become an
863 * optimized solvent
866 check_solvent(fplog, mtop, fr, cginfo_mb);
868 if (getenv("GMX_NO_SOLV_OPT"))
870 if (fplog)
872 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
873 "Disabling all solvent optimization\n");
875 fr->solvent_opt = esolNO;
877 if (bNoSolvOpt)
879 fr->solvent_opt = esolNO;
881 if (!fr->solvent_opt)
883 for (mb = 0; mb < mtop->nmolblock; mb++)
885 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
887 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
892 return cginfo_mb;
895 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
897 int ncg, mb, cg;
898 int *cginfo;
900 ncg = cgi_mb[nmb-1].cg_end;
901 snew(cginfo, ncg);
902 mb = 0;
903 for (cg = 0; cg < ncg; cg++)
905 while (cg >= cgi_mb[mb].cg_end)
907 mb++;
909 cginfo[cg] =
910 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
913 return cginfo;
916 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
918 /*This now calculates sum for q and c6*/
919 double qsum, q2sum, q, c6sum, c6;
920 int mb, nmol, i;
921 const t_atoms *atoms;
923 qsum = 0;
924 q2sum = 0;
925 c6sum = 0;
926 for (mb = 0; mb < mtop->nmolblock; mb++)
928 nmol = mtop->molblock[mb].nmol;
929 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
930 for (i = 0; i < atoms->nr; i++)
932 q = atoms->atom[i].q;
933 qsum += nmol*q;
934 q2sum += nmol*q*q;
935 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
936 c6sum += nmol*c6;
939 fr->qsum[0] = qsum;
940 fr->q2sum[0] = q2sum;
941 fr->c6sum[0] = c6sum;
943 if (fr->efep != efepNO)
945 qsum = 0;
946 q2sum = 0;
947 c6sum = 0;
948 for (mb = 0; mb < mtop->nmolblock; mb++)
950 nmol = mtop->molblock[mb].nmol;
951 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
952 for (i = 0; i < atoms->nr; i++)
954 q = atoms->atom[i].qB;
955 qsum += nmol*q;
956 q2sum += nmol*q*q;
957 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
958 c6sum += nmol*c6;
960 fr->qsum[1] = qsum;
961 fr->q2sum[1] = q2sum;
962 fr->c6sum[1] = c6sum;
965 else
967 fr->qsum[1] = fr->qsum[0];
968 fr->q2sum[1] = fr->q2sum[0];
969 fr->c6sum[1] = fr->c6sum[0];
971 if (log)
973 if (fr->efep == efepNO)
975 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
977 else
979 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
980 fr->qsum[0], fr->qsum[1]);
985 void update_forcerec(t_forcerec *fr, matrix box)
987 if (fr->eeltype == eelGRF)
989 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
990 fr->rcoulomb, fr->temp, fr->zsquare, box,
991 &fr->kappa, &fr->k_rf, &fr->c_rf);
995 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
997 const t_atoms *atoms, *atoms_tpi;
998 const t_blocka *excl;
999 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
1000 gmx_int64_t npair, npair_ij, tmpi, tmpj;
1001 double csix, ctwelve;
1002 int ntp, *typecount;
1003 gmx_bool bBHAM;
1004 real *nbfp;
1005 real *nbfp_comb = NULL;
1007 ntp = fr->ntype;
1008 bBHAM = fr->bBHAM;
1009 nbfp = fr->nbfp;
1011 /* For LJ-PME, we want to correct for the difference between the
1012 * actual C6 values and the C6 values used by the LJ-PME based on
1013 * combination rules. */
1015 if (EVDW_PME(fr->vdwtype))
1017 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1018 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1019 for (tpi = 0; tpi < ntp; ++tpi)
1021 for (tpj = 0; tpj < ntp; ++tpj)
1023 C6(nbfp_comb, ntp, tpi, tpj) =
1024 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1025 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1028 nbfp = nbfp_comb;
1030 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1032 csix = 0;
1033 ctwelve = 0;
1034 npair = 0;
1035 nexcl = 0;
1036 if (!fr->n_tpi)
1038 /* Count the types so we avoid natoms^2 operations */
1039 snew(typecount, ntp);
1040 gmx_mtop_count_atomtypes(mtop, q, typecount);
1042 for (tpi = 0; tpi < ntp; tpi++)
1044 for (tpj = tpi; tpj < ntp; tpj++)
1046 tmpi = typecount[tpi];
1047 tmpj = typecount[tpj];
1048 if (tpi != tpj)
1050 npair_ij = tmpi*tmpj;
1052 else
1054 npair_ij = tmpi*(tmpi - 1)/2;
1056 if (bBHAM)
1058 /* nbfp now includes the 6.0 derivative prefactor */
1059 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1061 else
1063 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1064 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1065 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1067 npair += npair_ij;
1070 sfree(typecount);
1071 /* Subtract the excluded pairs.
1072 * The main reason for substracting exclusions is that in some cases
1073 * some combinations might never occur and the parameters could have
1074 * any value. These unused values should not influence the dispersion
1075 * correction.
1077 for (mb = 0; mb < mtop->nmolblock; mb++)
1079 nmol = mtop->molblock[mb].nmol;
1080 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1081 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1082 for (i = 0; (i < atoms->nr); i++)
1084 if (q == 0)
1086 tpi = atoms->atom[i].type;
1088 else
1090 tpi = atoms->atom[i].typeB;
1092 j1 = excl->index[i];
1093 j2 = excl->index[i+1];
1094 for (j = j1; j < j2; j++)
1096 k = excl->a[j];
1097 if (k > i)
1099 if (q == 0)
1101 tpj = atoms->atom[k].type;
1103 else
1105 tpj = atoms->atom[k].typeB;
1107 if (bBHAM)
1109 /* nbfp now includes the 6.0 derivative prefactor */
1110 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1112 else
1114 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1115 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1116 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1118 nexcl += nmol;
1124 else
1126 /* Only correct for the interaction of the test particle
1127 * with the rest of the system.
1129 atoms_tpi =
1130 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1132 npair = 0;
1133 for (mb = 0; mb < mtop->nmolblock; mb++)
1135 nmol = mtop->molblock[mb].nmol;
1136 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1137 for (j = 0; j < atoms->nr; j++)
1139 nmolc = nmol;
1140 /* Remove the interaction of the test charge group
1141 * with itself.
1143 if (mb == mtop->nmolblock-1)
1145 nmolc--;
1147 if (mb == 0 && nmol == 1)
1149 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1152 if (q == 0)
1154 tpj = atoms->atom[j].type;
1156 else
1158 tpj = atoms->atom[j].typeB;
1160 for (i = 0; i < fr->n_tpi; i++)
1162 if (q == 0)
1164 tpi = atoms_tpi->atom[i].type;
1166 else
1168 tpi = atoms_tpi->atom[i].typeB;
1170 if (bBHAM)
1172 /* nbfp now includes the 6.0 derivative prefactor */
1173 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1175 else
1177 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1178 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1179 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1181 npair += nmolc;
1186 if (npair - nexcl <= 0 && fplog)
1188 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1189 csix = 0;
1190 ctwelve = 0;
1192 else
1194 csix /= npair - nexcl;
1195 ctwelve /= npair - nexcl;
1197 if (debug)
1199 fprintf(debug, "Counted %d exclusions\n", nexcl);
1200 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1201 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1203 fr->avcsix[q] = csix;
1204 fr->avctwelve[q] = ctwelve;
1207 if (EVDW_PME(fr->vdwtype))
1209 sfree(nbfp_comb);
1212 if (fplog != NULL)
1214 if (fr->eDispCorr == edispcAllEner ||
1215 fr->eDispCorr == edispcAllEnerPres)
1217 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1218 fr->avcsix[0], fr->avctwelve[0]);
1220 else
1222 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1228 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1229 const gmx_mtop_t *mtop)
1231 const t_atoms *at1, *at2;
1232 int mt1, mt2, i, j, tpi, tpj, ntypes;
1233 real b, bmin;
1234 real *nbfp;
1236 if (fplog)
1238 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1240 nbfp = fr->nbfp;
1241 ntypes = fr->ntype;
1243 bmin = -1;
1244 fr->bham_b_max = 0;
1245 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1247 at1 = &mtop->moltype[mt1].atoms;
1248 for (i = 0; (i < at1->nr); i++)
1250 tpi = at1->atom[i].type;
1251 if (tpi >= ntypes)
1253 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1256 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1258 at2 = &mtop->moltype[mt2].atoms;
1259 for (j = 0; (j < at2->nr); j++)
1261 tpj = at2->atom[j].type;
1262 if (tpj >= ntypes)
1264 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1266 b = BHAMB(nbfp, ntypes, tpi, tpj);
1267 if (b > fr->bham_b_max)
1269 fr->bham_b_max = b;
1271 if ((b < bmin) || (bmin == -1))
1273 bmin = b;
1279 if (fplog)
1281 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1282 bmin, fr->bham_b_max);
1286 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1287 t_forcerec *fr, real rtab,
1288 const t_commrec *cr,
1289 const char *tabfn, char *eg1, char *eg2,
1290 t_nblists *nbl)
1292 char buf[STRLEN];
1293 int i, j;
1295 if (tabfn == NULL)
1297 if (debug)
1299 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1301 return;
1304 sprintf(buf, "%s", tabfn);
1305 if (eg1 && eg2)
1307 /* Append the two energy group names */
1308 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1309 eg1, eg2, ftp2ext(efXVG));
1311 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1312 /* Copy the contents of the table to separate coulomb and LJ tables too,
1313 * to improve cache performance.
1315 /* For performance reasons we want
1316 * the table data to be aligned to 16-byte. The pointers could be freed
1317 * but currently aren't.
1319 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1320 nbl->table_elec.format = nbl->table_elec_vdw.format;
1321 nbl->table_elec.r = nbl->table_elec_vdw.r;
1322 nbl->table_elec.n = nbl->table_elec_vdw.n;
1323 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1324 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1325 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1326 nbl->table_elec.ninteractions = 1;
1327 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1328 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1330 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1331 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1332 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1333 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1334 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1335 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1336 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1337 nbl->table_vdw.ninteractions = 2;
1338 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1339 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1341 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1343 for (j = 0; j < 4; j++)
1345 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1347 for (j = 0; j < 8; j++)
1349 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1354 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1355 int *ncount, int **count)
1357 const gmx_moltype_t *molt;
1358 const t_ilist *il;
1359 int mt, ftype, stride, i, j, tabnr;
1361 for (mt = 0; mt < mtop->nmoltype; mt++)
1363 molt = &mtop->moltype[mt];
1364 for (ftype = 0; ftype < F_NRE; ftype++)
1366 if (ftype == ftype1 || ftype == ftype2)
1368 il = &molt->ilist[ftype];
1369 stride = 1 + NRAL(ftype);
1370 for (i = 0; i < il->nr; i += stride)
1372 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1373 if (tabnr < 0)
1375 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1377 if (tabnr >= *ncount)
1379 srenew(*count, tabnr+1);
1380 for (j = *ncount; j < tabnr+1; j++)
1382 (*count)[j] = 0;
1384 *ncount = tabnr+1;
1386 (*count)[tabnr]++;
1393 static bondedtable_t *make_bonded_tables(FILE *fplog,
1394 int ftype1, int ftype2,
1395 const gmx_mtop_t *mtop,
1396 const char *basefn, const char *tabext)
1398 int i, ncount, *count;
1399 char tabfn[STRLEN];
1400 bondedtable_t *tab;
1402 tab = NULL;
1404 ncount = 0;
1405 count = NULL;
1406 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1408 if (ncount > 0)
1410 snew(tab, ncount);
1411 for (i = 0; i < ncount; i++)
1413 if (count[i] > 0)
1415 sprintf(tabfn, "%s", basefn);
1416 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1417 tabext, i, ftp2ext(efXVG));
1418 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1421 sfree(count);
1424 return tab;
1427 void forcerec_set_ranges(t_forcerec *fr,
1428 int ncg_home, int ncg_force,
1429 int natoms_force,
1430 int natoms_force_constr, int natoms_f_novirsum)
1432 fr->cg0 = 0;
1433 fr->hcg = ncg_home;
1435 /* fr->ncg_force is unused in the standard code,
1436 * but it can be useful for modified code dealing with charge groups.
1438 fr->ncg_force = ncg_force;
1439 fr->natoms_force = natoms_force;
1440 fr->natoms_force_constr = natoms_force_constr;
1442 if (fr->natoms_force_constr > fr->nalloc_force)
1444 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1446 if (fr->bTwinRange)
1448 srenew(fr->f_twin, fr->nalloc_force);
1452 if (fr->bF_NoVirSum)
1454 fr->f_novirsum_n = natoms_f_novirsum;
1455 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1457 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1458 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1461 else
1463 fr->f_novirsum_n = 0;
1467 static real cutoff_inf(real cutoff)
1469 if (cutoff == 0)
1471 cutoff = GMX_CUTOFF_INF;
1474 return cutoff;
1477 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1478 t_forcerec *fr, const t_inputrec *ir,
1479 const char *tabfn, const gmx_mtop_t *mtop,
1480 matrix box)
1482 char buf[STRLEN];
1483 int i, j;
1485 if (tabfn == NULL)
1487 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1488 return;
1491 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1493 sprintf(buf, "%s", tabfn);
1494 for (i = 0; i < ir->adress->n_tf_grps; i++)
1496 j = ir->adress->tf_table_index[i]; /* get energy group index */
1497 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1498 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1499 if (fp)
1501 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1503 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1508 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1510 gmx_bool bAllvsAll;
1512 bAllvsAll =
1514 ir->rlist == 0 &&
1515 ir->rcoulomb == 0 &&
1516 ir->rvdw == 0 &&
1517 ir->ePBC == epbcNONE &&
1518 ir->vdwtype == evdwCUT &&
1519 ir->coulombtype == eelCUT &&
1520 ir->efep == efepNO &&
1521 (ir->implicit_solvent == eisNO ||
1522 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1523 ir->gb_algorithm == egbHCT ||
1524 ir->gb_algorithm == egbOBC))) &&
1525 getenv("GMX_NO_ALLVSALL") == NULL
1528 if (bAllvsAll && ir->opts.ngener > 1)
1530 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1532 if (bPrintNote)
1534 if (MASTER(cr))
1536 fprintf(stderr, "\n%s\n", note);
1538 if (fp != NULL)
1540 fprintf(fp, "\n%s\n", note);
1543 bAllvsAll = FALSE;
1546 if (bAllvsAll && fp && MASTER(cr))
1548 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1551 return bAllvsAll;
1555 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1557 int t, i;
1559 /* These thread local data structures are used for bondeds only */
1560 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1562 if (fr->nthreads > 1)
1564 snew(fr->f_t, fr->nthreads);
1565 /* Thread 0 uses the global force and energy arrays */
1566 for (t = 1; t < fr->nthreads; t++)
1568 fr->f_t[t].f = NULL;
1569 fr->f_t[t].f_nalloc = 0;
1570 snew(fr->f_t[t].fshift, SHIFTS);
1571 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1572 for (i = 0; i < egNR; i++)
1574 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1581 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1582 const t_commrec *cr,
1583 const t_inputrec *ir,
1584 gmx_bool bGPU)
1586 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1588 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1589 bGPU ? "GPUs" : "SIMD kernels",
1590 bGPU ? "CPU only" : "plain-C kernels");
1591 return FALSE;
1594 return TRUE;
1598 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1599 int *kernel_type,
1600 int *ewald_excl)
1602 *kernel_type = nbnxnk4x4_PlainC;
1603 *ewald_excl = ewaldexclTable;
1605 #ifdef GMX_NBNXN_SIMD
1607 #ifdef GMX_NBNXN_SIMD_4XN
1608 *kernel_type = nbnxnk4xN_SIMD_4xN;
1609 #endif
1610 #ifdef GMX_NBNXN_SIMD_2XNN
1611 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1612 #endif
1614 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1615 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1616 * Currently this is based on the SIMD acceleration choice,
1617 * but it might be better to decide this at runtime based on CPU.
1619 * 4xN calculates more (zero) interactions, but has less pair-search
1620 * work and much better kernel instruction scheduling.
1622 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1623 * which doesn't have FMA, both the analytical and tabulated Ewald
1624 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1625 * 2x(4+4) because it results in significantly fewer pairs.
1626 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1627 * 10% with HT, 50% without HT. As we currently don't detect the actual
1628 * use of HT, use 4x8 to avoid a potential performance hit.
1629 * On Intel Haswell 4x8 is always faster.
1631 *kernel_type = nbnxnk4xN_SIMD_4xN;
1633 #ifndef GMX_SIMD_HAVE_FMA
1634 if (EEL_PME_EWALD(ir->coulombtype) ||
1635 EVDW_PME(ir->vdwtype))
1637 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1638 * There are enough instructions to make 2x(4+4) efficient.
1640 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1642 #endif
1643 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1646 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1648 #ifdef GMX_NBNXN_SIMD_4XN
1649 *kernel_type = nbnxnk4xN_SIMD_4xN;
1650 #else
1651 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1652 #endif
1654 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1656 #ifdef GMX_NBNXN_SIMD_2XNN
1657 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1658 #else
1659 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1660 #endif
1663 /* Analytical Ewald exclusion correction is only an option in
1664 * the SIMD kernel.
1665 * Since table lookup's don't parallelize with SIMD, analytical
1666 * will probably always be faster for a SIMD width of 8 or more.
1667 * With FMA analytical is sometimes faster for a width if 4 as well.
1668 * On BlueGene/Q, this is faster regardless of precision.
1669 * In single precision, this is faster on Bulldozer.
1671 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1672 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1673 defined GMX_SIMD_IBM_QPX
1674 *ewald_excl = ewaldexclAnalytical;
1675 #endif
1676 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1678 *ewald_excl = ewaldexclTable;
1680 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1682 *ewald_excl = ewaldexclAnalytical;
1686 #endif /* GMX_NBNXN_SIMD */
1690 const char *lookup_nbnxn_kernel_name(int kernel_type)
1692 const char *returnvalue = NULL;
1693 switch (kernel_type)
1695 case nbnxnkNotSet:
1696 returnvalue = "not set";
1697 break;
1698 case nbnxnk4x4_PlainC:
1699 returnvalue = "plain C";
1700 break;
1701 case nbnxnk4xN_SIMD_4xN:
1702 case nbnxnk4xN_SIMD_2xNN:
1703 #ifdef GMX_NBNXN_SIMD
1704 #if defined GMX_SIMD_X86_SSE2
1705 returnvalue = "SSE2";
1706 #elif defined GMX_SIMD_X86_SSE4_1
1707 returnvalue = "SSE4.1";
1708 #elif defined GMX_SIMD_X86_AVX_128_FMA
1709 returnvalue = "AVX_128_FMA";
1710 #elif defined GMX_SIMD_X86_AVX_256
1711 returnvalue = "AVX_256";
1712 #elif defined GMX_SIMD_X86_AVX2_256
1713 returnvalue = "AVX2_256";
1714 #else
1715 returnvalue = "SIMD";
1716 #endif
1717 #else /* GMX_NBNXN_SIMD */
1718 returnvalue = "not available";
1719 #endif /* GMX_NBNXN_SIMD */
1720 break;
1721 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1722 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1724 case nbnxnkNR:
1725 default:
1726 gmx_fatal(FARGS, "Illegal kernel type selected");
1727 returnvalue = NULL;
1728 break;
1730 return returnvalue;
1733 static void pick_nbnxn_kernel(FILE *fp,
1734 const t_commrec *cr,
1735 gmx_bool use_simd_kernels,
1736 gmx_bool bUseGPU,
1737 gmx_bool bEmulateGPU,
1738 const t_inputrec *ir,
1739 int *kernel_type,
1740 int *ewald_excl,
1741 gmx_bool bDoNonbonded)
1743 assert(kernel_type);
1745 *kernel_type = nbnxnkNotSet;
1746 *ewald_excl = ewaldexclTable;
1748 if (bEmulateGPU)
1750 *kernel_type = nbnxnk8x8x8_PlainC;
1752 if (bDoNonbonded)
1754 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1757 else if (bUseGPU)
1759 *kernel_type = nbnxnk8x8x8_CUDA;
1762 if (*kernel_type == nbnxnkNotSet)
1764 /* LJ PME with LB combination rule does 7 mesh operations.
1765 * This so slow that we don't compile SIMD non-bonded kernels for that.
1767 if (use_simd_kernels &&
1768 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1770 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1772 else
1774 *kernel_type = nbnxnk4x4_PlainC;
1778 if (bDoNonbonded && fp != NULL)
1780 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1781 lookup_nbnxn_kernel_name(*kernel_type),
1782 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1783 nbnxn_kernel_to_cj_size(*kernel_type));
1787 static void pick_nbnxn_resources(const t_commrec *cr,
1788 const gmx_hw_info_t *hwinfo,
1789 gmx_bool bDoNonbonded,
1790 gmx_bool *bUseGPU,
1791 gmx_bool *bEmulateGPU,
1792 const gmx_gpu_opt_t *gpu_opt)
1794 gmx_bool bEmulateGPUEnvVarSet;
1795 char gpu_err_str[STRLEN];
1797 *bUseGPU = FALSE;
1799 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1801 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1802 * GPUs (currently) only handle non-bonded calculations, we will
1803 * automatically switch to emulation if non-bonded calculations are
1804 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1805 * way to turn off GPU initialization, data movement, and cleanup.
1807 * GPU emulation can be useful to assess the performance one can expect by
1808 * adding GPU(s) to the machine. The conditional below allows this even
1809 * if mdrun is compiled without GPU acceleration support.
1810 * Note that you should freezing the system as otherwise it will explode.
1812 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1813 (!bDoNonbonded &&
1814 gpu_opt->ncuda_dev_use > 0));
1816 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1818 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1820 /* Each PP node will use the intra-node id-th device from the
1821 * list of detected/selected GPUs. */
1822 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1823 &hwinfo->gpu_info, gpu_opt))
1825 /* At this point the init should never fail as we made sure that
1826 * we have all the GPUs we need. If it still does, we'll bail. */
1827 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1828 cr->nodeid,
1829 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1830 cr->rank_pp_intranode),
1831 gpu_err_str);
1834 /* Here we actually turn on hardware GPU acceleration */
1835 *bUseGPU = TRUE;
1839 gmx_bool uses_simple_tables(int cutoff_scheme,
1840 nonbonded_verlet_t *nbv,
1841 int group)
1843 gmx_bool bUsesSimpleTables = TRUE;
1844 int grp_index;
1846 switch (cutoff_scheme)
1848 case ecutsGROUP:
1849 bUsesSimpleTables = TRUE;
1850 break;
1851 case ecutsVERLET:
1852 assert(NULL != nbv && NULL != nbv->grp);
1853 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1854 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1855 break;
1856 default:
1857 gmx_incons("unimplemented");
1859 return bUsesSimpleTables;
1862 static void init_ewald_f_table(interaction_const_t *ic,
1863 gmx_bool bUsesSimpleTables,
1864 real rtab)
1866 real maxr;
1868 if (bUsesSimpleTables)
1870 /* With a spacing of 0.0005 we are at the force summation accuracy
1871 * for the SSE kernels for "normal" atomistic simulations.
1873 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1874 ic->rcoulomb);
1876 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1877 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1879 else
1881 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1882 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1883 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1886 sfree_aligned(ic->tabq_coul_FDV0);
1887 sfree_aligned(ic->tabq_coul_F);
1888 sfree_aligned(ic->tabq_coul_V);
1890 sfree_aligned(ic->tabq_vdw_FDV0);
1891 sfree_aligned(ic->tabq_vdw_F);
1892 sfree_aligned(ic->tabq_vdw_V);
1894 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1896 /* Create the original table data in FDV0 */
1897 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1898 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1899 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1900 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1901 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1904 if (EVDW_PME(ic->vdwtype))
1906 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1907 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1908 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1909 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1910 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1914 void init_interaction_const_tables(FILE *fp,
1915 interaction_const_t *ic,
1916 gmx_bool bUsesSimpleTables,
1917 real rtab)
1919 real spacing;
1921 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1923 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1925 if (fp != NULL)
1927 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1928 1/ic->tabq_scale, ic->tabq_size);
1933 static void clear_force_switch_constants(shift_consts_t *sc)
1935 sc->c2 = 0;
1936 sc->c3 = 0;
1937 sc->cpot = 0;
1940 static void force_switch_constants(real p,
1941 real rsw, real rc,
1942 shift_consts_t *sc)
1944 /* Here we determine the coefficient for shifting the force to zero
1945 * between distance rsw and the cut-off rc.
1946 * For a potential of r^-p, we have force p*r^-(p+1).
1947 * But to save flops we absorb p in the coefficient.
1948 * Thus we get:
1949 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1950 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1952 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1953 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1954 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1957 static void potential_switch_constants(real rsw, real rc,
1958 switch_consts_t *sc)
1960 /* The switch function is 1 at rsw and 0 at rc.
1961 * The derivative and second derivate are zero at both ends.
1962 * rsw = max(r - r_switch, 0)
1963 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1964 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1965 * force = force*dsw - potential*sw
1966 * potential *= sw
1968 sc->c3 = -10*pow(rc - rsw, -3);
1969 sc->c4 = 15*pow(rc - rsw, -4);
1970 sc->c5 = -6*pow(rc - rsw, -5);
1973 static void
1974 init_interaction_const(FILE *fp,
1975 const t_commrec gmx_unused *cr,
1976 interaction_const_t **interaction_const,
1977 const t_forcerec *fr,
1978 real rtab)
1980 interaction_const_t *ic;
1981 gmx_bool bUsesSimpleTables = TRUE;
1983 snew(ic, 1);
1985 /* Just allocate something so we can free it */
1986 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1987 snew_aligned(ic->tabq_coul_F, 16, 32);
1988 snew_aligned(ic->tabq_coul_V, 16, 32);
1990 ic->rlist = fr->rlist;
1991 ic->rlistlong = fr->rlistlong;
1993 /* Lennard-Jones */
1994 ic->vdwtype = fr->vdwtype;
1995 ic->vdw_modifier = fr->vdw_modifier;
1996 ic->rvdw = fr->rvdw;
1997 ic->rvdw_switch = fr->rvdw_switch;
1998 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1999 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
2000 ic->sh_lj_ewald = 0;
2001 clear_force_switch_constants(&ic->dispersion_shift);
2002 clear_force_switch_constants(&ic->repulsion_shift);
2004 switch (ic->vdw_modifier)
2006 case eintmodPOTSHIFT:
2007 /* Only shift the potential, don't touch the force */
2008 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2009 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2010 if (EVDW_PME(ic->vdwtype))
2012 real crc2;
2014 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2015 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2017 break;
2018 case eintmodFORCESWITCH:
2019 /* Switch the force, switch and shift the potential */
2020 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2021 &ic->dispersion_shift);
2022 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2023 &ic->repulsion_shift);
2024 break;
2025 case eintmodPOTSWITCH:
2026 /* Switch the potential and force */
2027 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2028 &ic->vdw_switch);
2029 break;
2030 case eintmodNONE:
2031 case eintmodEXACTCUTOFF:
2032 /* Nothing to do here */
2033 break;
2034 default:
2035 gmx_incons("unimplemented potential modifier");
2038 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2040 /* Electrostatics */
2041 ic->eeltype = fr->eeltype;
2042 ic->coulomb_modifier = fr->coulomb_modifier;
2043 ic->rcoulomb = fr->rcoulomb;
2044 ic->epsilon_r = fr->epsilon_r;
2045 ic->epsfac = fr->epsfac;
2046 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2048 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2050 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2052 else
2054 ic->sh_ewald = 0;
2057 /* Reaction-field */
2058 if (EEL_RF(ic->eeltype))
2060 ic->epsilon_rf = fr->epsilon_rf;
2061 ic->k_rf = fr->k_rf;
2062 ic->c_rf = fr->c_rf;
2064 else
2066 /* For plain cut-off we might use the reaction-field kernels */
2067 ic->epsilon_rf = ic->epsilon_r;
2068 ic->k_rf = 0;
2069 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2071 ic->c_rf = 1/ic->rcoulomb;
2073 else
2075 ic->c_rf = 0;
2079 if (fp != NULL)
2081 real dispersion_shift;
2083 dispersion_shift = ic->dispersion_shift.cpot;
2084 if (EVDW_PME(ic->vdwtype))
2086 dispersion_shift -= ic->sh_lj_ewald;
2088 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2089 ic->repulsion_shift.cpot, dispersion_shift);
2091 if (ic->eeltype == eelCUT)
2093 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2095 else if (EEL_PME(ic->eeltype))
2097 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2099 fprintf(fp, "\n");
2102 *interaction_const = ic;
2104 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2106 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2108 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2109 * also sharing texture references. To keep the code simple, we don't
2110 * treat texture references as shared resources, but this means that
2111 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2112 * Hence, to ensure that the non-bonded kernels don't start before all
2113 * texture binding operations are finished, we need to wait for all ranks
2114 * to arrive here before continuing.
2116 * Note that we could omit this barrier if GPUs are not shared (or
2117 * texture objects are used), but as this is initialization code, there
2118 * is not point in complicating things.
2120 #ifdef GMX_THREAD_MPI
2121 if (PAR(cr))
2123 gmx_barrier(cr);
2125 #endif /* GMX_THREAD_MPI */
2128 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2129 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2132 static void init_nb_verlet(FILE *fp,
2133 nonbonded_verlet_t **nb_verlet,
2134 gmx_bool bFEP_NonBonded,
2135 const t_inputrec *ir,
2136 const t_forcerec *fr,
2137 const t_commrec *cr,
2138 const char *nbpu_opt)
2140 nonbonded_verlet_t *nbv;
2141 int i;
2142 char *env;
2143 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2145 nbnxn_alloc_t *nb_alloc;
2146 nbnxn_free_t *nb_free;
2148 snew(nbv, 1);
2150 pick_nbnxn_resources(cr, fr->hwinfo,
2151 fr->bNonbonded,
2152 &nbv->bUseGPU,
2153 &bEmulateGPU,
2154 fr->gpu_opt);
2156 nbv->nbs = NULL;
2158 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2159 for (i = 0; i < nbv->ngrp; i++)
2161 nbv->grp[i].nbl_lists.nnbl = 0;
2162 nbv->grp[i].nbat = NULL;
2163 nbv->grp[i].kernel_type = nbnxnkNotSet;
2165 if (i == 0) /* local */
2167 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2168 nbv->bUseGPU, bEmulateGPU, ir,
2169 &nbv->grp[i].kernel_type,
2170 &nbv->grp[i].ewald_excl,
2171 fr->bNonbonded);
2173 else /* non-local */
2175 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2177 /* Use GPU for local, select a CPU kernel for non-local */
2178 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2179 FALSE, FALSE, ir,
2180 &nbv->grp[i].kernel_type,
2181 &nbv->grp[i].ewald_excl,
2182 fr->bNonbonded);
2184 bHybridGPURun = TRUE;
2186 else
2188 /* Use the same kernel for local and non-local interactions */
2189 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2190 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2195 if (nbv->bUseGPU)
2197 /* init the NxN GPU data; the last argument tells whether we'll have
2198 * both local and non-local NB calculation on GPU */
2199 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2200 &fr->hwinfo->gpu_info, fr->gpu_opt,
2201 cr->rank_pp_intranode,
2202 (nbv->ngrp > 1) && !bHybridGPURun);
2204 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2206 char *end;
2208 nbv->min_ci_balanced = strtol(env, &end, 10);
2209 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2211 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2214 if (debug)
2216 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2217 nbv->min_ci_balanced);
2220 else
2222 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2223 if (debug)
2225 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2226 nbv->min_ci_balanced);
2230 else
2232 nbv->min_ci_balanced = 0;
2235 *nb_verlet = nbv;
2237 nbnxn_init_search(&nbv->nbs,
2238 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2239 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2240 bFEP_NonBonded,
2241 gmx_omp_nthreads_get(emntNonbonded));
2243 for (i = 0; i < nbv->ngrp; i++)
2245 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2247 nb_alloc = &pmalloc;
2248 nb_free = &pfree;
2250 else
2252 nb_alloc = NULL;
2253 nb_free = NULL;
2256 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2257 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2258 /* 8x8x8 "non-simple" lists are ATM always combined */
2259 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2260 nb_alloc, nb_free);
2262 if (i == 0 ||
2263 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2265 gmx_bool bSimpleList;
2266 int enbnxninitcombrule;
2268 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2270 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2272 /* Plain LJ cut-off: we can optimize with combination rules */
2273 enbnxninitcombrule = enbnxninitcombruleDETECT;
2275 else if (fr->vdwtype == evdwPME)
2277 /* LJ-PME: we need to use a combination rule for the grid */
2278 if (fr->ljpme_combination_rule == eljpmeGEOM)
2280 enbnxninitcombrule = enbnxninitcombruleGEOM;
2282 else
2284 enbnxninitcombrule = enbnxninitcombruleLB;
2287 else
2289 /* We use a full combination matrix: no rule required */
2290 enbnxninitcombrule = enbnxninitcombruleNONE;
2294 snew(nbv->grp[i].nbat, 1);
2295 nbnxn_atomdata_init(fp,
2296 nbv->grp[i].nbat,
2297 nbv->grp[i].kernel_type,
2298 enbnxninitcombrule,
2299 fr->ntype, fr->nbfp,
2300 ir->opts.ngener,
2301 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2302 nb_alloc, nb_free);
2304 else
2306 nbv->grp[i].nbat = nbv->grp[0].nbat;
2311 void init_forcerec(FILE *fp,
2312 const output_env_t oenv,
2313 t_forcerec *fr,
2314 t_fcdata *fcd,
2315 const t_inputrec *ir,
2316 const gmx_mtop_t *mtop,
2317 const t_commrec *cr,
2318 matrix box,
2319 const char *tabfn,
2320 const char *tabafn,
2321 const char *tabpfn,
2322 const char *tabbfn,
2323 const char *nbpu_opt,
2324 gmx_bool bNoSolvOpt,
2325 real print_force)
2327 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2328 real rtab;
2329 char *env;
2330 double dbl;
2331 const t_block *cgs;
2332 gmx_bool bGenericKernelOnly;
2333 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2334 gmx_bool bFEP_NonBonded;
2335 t_nblists *nbl;
2336 int *nm_ind, egp_flags;
2338 if (fr->hwinfo == NULL)
2340 /* Detect hardware, gather information.
2341 * In mdrun, hwinfo has already been set before calling init_forcerec.
2342 * Here we ignore GPUs, as tools will not use them anyhow.
2344 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2347 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2348 fr->use_simd_kernels = TRUE;
2350 fr->bDomDec = DOMAINDECOMP(cr);
2352 natoms = mtop->natoms;
2354 if (check_box(ir->ePBC, box))
2356 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2359 /* Test particle insertion ? */
2360 if (EI_TPI(ir->eI))
2362 /* Set to the size of the molecule to be inserted (the last one) */
2363 /* Because of old style topologies, we have to use the last cg
2364 * instead of the last molecule type.
2366 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2367 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2368 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2370 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2373 else
2375 fr->n_tpi = 0;
2378 /* Copy AdResS parameters */
2379 if (ir->bAdress)
2381 fr->adress_type = ir->adress->type;
2382 fr->adress_const_wf = ir->adress->const_wf;
2383 fr->adress_ex_width = ir->adress->ex_width;
2384 fr->adress_hy_width = ir->adress->hy_width;
2385 fr->adress_icor = ir->adress->icor;
2386 fr->adress_site = ir->adress->site;
2387 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2388 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2391 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2392 for (i = 0; i < ir->adress->n_energy_grps; i++)
2394 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2397 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2398 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2399 for (i = 0; i < fr->n_adress_tf_grps; i++)
2401 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2403 copy_rvec(ir->adress->refs, fr->adress_refs);
2405 else
2407 fr->adress_type = eAdressOff;
2408 fr->adress_do_hybridpairs = FALSE;
2411 /* Copy the user determined parameters */
2412 fr->userint1 = ir->userint1;
2413 fr->userint2 = ir->userint2;
2414 fr->userint3 = ir->userint3;
2415 fr->userint4 = ir->userint4;
2416 fr->userreal1 = ir->userreal1;
2417 fr->userreal2 = ir->userreal2;
2418 fr->userreal3 = ir->userreal3;
2419 fr->userreal4 = ir->userreal4;
2421 /* Shell stuff */
2422 fr->fc_stepsize = ir->fc_stepsize;
2424 /* Free energy */
2425 fr->efep = ir->efep;
2426 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2427 if (ir->fepvals->bScCoul)
2429 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2430 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2432 else
2434 fr->sc_alphacoul = 0;
2435 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2437 fr->sc_power = ir->fepvals->sc_power;
2438 fr->sc_r_power = ir->fepvals->sc_r_power;
2439 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2441 env = getenv("GMX_SCSIGMA_MIN");
2442 if (env != NULL)
2444 dbl = 0;
2445 sscanf(env, "%lf", &dbl);
2446 fr->sc_sigma6_min = pow(dbl, 6);
2447 if (fp)
2449 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2453 fr->bNonbonded = TRUE;
2454 if (getenv("GMX_NO_NONBONDED") != NULL)
2456 /* turn off non-bonded calculations */
2457 fr->bNonbonded = FALSE;
2458 md_print_warn(cr, fp,
2459 "Found environment variable GMX_NO_NONBONDED.\n"
2460 "Disabling nonbonded calculations.\n");
2463 bGenericKernelOnly = FALSE;
2465 /* We now check in the NS code whether a particular combination of interactions
2466 * can be used with water optimization, and disable it if that is not the case.
2469 if (getenv("GMX_NB_GENERIC") != NULL)
2471 if (fp != NULL)
2473 fprintf(fp,
2474 "Found environment variable GMX_NB_GENERIC.\n"
2475 "Disabling all interaction-specific nonbonded kernels, will only\n"
2476 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2478 bGenericKernelOnly = TRUE;
2481 if (bGenericKernelOnly == TRUE)
2483 bNoSolvOpt = TRUE;
2486 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2488 fr->use_simd_kernels = FALSE;
2489 if (fp != NULL)
2491 fprintf(fp,
2492 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2493 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2497 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2499 /* Check if we can/should do all-vs-all kernels */
2500 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2501 fr->AllvsAll_work = NULL;
2502 fr->AllvsAll_workgb = NULL;
2504 /* All-vs-all kernels have not been implemented in 4.6, and
2505 * the SIMD group kernels are also buggy in this case. Non-SIMD
2506 * group kernels are OK. See Redmine #1249. */
2507 if (fr->bAllvsAll)
2509 fr->bAllvsAll = FALSE;
2510 fr->use_simd_kernels = FALSE;
2511 if (fp != NULL)
2513 fprintf(fp,
2514 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2515 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2516 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2517 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2518 "we are proceeding by disabling all CPU architecture-specific\n"
2519 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2520 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2524 /* Neighbour searching stuff */
2525 fr->cutoff_scheme = ir->cutoff_scheme;
2526 fr->bGrid = (ir->ns_type == ensGRID);
2527 fr->ePBC = ir->ePBC;
2529 if (fr->cutoff_scheme == ecutsGROUP)
2531 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2532 "removed in a future release when 'verlet' supports all interaction forms.\n";
2534 if (MASTER(cr))
2536 fprintf(stderr, "\n%s\n", note);
2538 if (fp != NULL)
2540 fprintf(fp, "\n%s\n", note);
2544 /* Determine if we will do PBC for distances in bonded interactions */
2545 if (fr->ePBC == epbcNONE)
2547 fr->bMolPBC = FALSE;
2549 else
2551 if (!DOMAINDECOMP(cr))
2553 /* The group cut-off scheme and SHAKE assume charge groups
2554 * are whole, but not using molpbc is faster in most cases.
2556 if (fr->cutoff_scheme == ecutsGROUP ||
2557 (ir->eConstrAlg == econtSHAKE &&
2558 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2559 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2561 fr->bMolPBC = ir->bPeriodicMols;
2563 else
2565 fr->bMolPBC = TRUE;
2566 if (getenv("GMX_USE_GRAPH") != NULL)
2568 fr->bMolPBC = FALSE;
2569 if (fp)
2571 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2576 else
2578 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2581 fr->bGB = (ir->implicit_solvent == eisGBSA);
2583 fr->rc_scaling = ir->refcoord_scaling;
2584 copy_rvec(ir->posres_com, fr->posres_com);
2585 copy_rvec(ir->posres_comB, fr->posres_comB);
2586 fr->rlist = cutoff_inf(ir->rlist);
2587 fr->rlistlong = cutoff_inf(ir->rlistlong);
2588 fr->eeltype = ir->coulombtype;
2589 fr->vdwtype = ir->vdwtype;
2590 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2592 fr->coulomb_modifier = ir->coulomb_modifier;
2593 fr->vdw_modifier = ir->vdw_modifier;
2595 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2596 switch (fr->eeltype)
2598 case eelCUT:
2599 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2600 break;
2602 case eelRF:
2603 case eelGRF:
2604 case eelRF_NEC:
2605 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2606 break;
2608 case eelRF_ZERO:
2609 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2610 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2611 break;
2613 case eelSWITCH:
2614 case eelSHIFT:
2615 case eelUSER:
2616 case eelENCADSHIFT:
2617 case eelPMESWITCH:
2618 case eelPMEUSER:
2619 case eelPMEUSERSWITCH:
2620 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2621 break;
2623 case eelPME:
2624 case eelEWALD:
2625 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2626 break;
2628 default:
2629 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2630 break;
2633 /* Vdw: Translate from mdp settings to kernel format */
2634 switch (fr->vdwtype)
2636 case evdwCUT:
2637 if (fr->bBHAM)
2639 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2641 else
2643 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2645 break;
2646 case evdwPME:
2647 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2648 break;
2650 case evdwSWITCH:
2651 case evdwSHIFT:
2652 case evdwUSER:
2653 case evdwENCADSHIFT:
2654 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2655 break;
2657 default:
2658 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2659 break;
2662 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2663 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2664 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2666 fr->bTwinRange = fr->rlistlong > fr->rlist;
2667 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2669 fr->reppow = mtop->ffparams.reppow;
2671 if (ir->cutoff_scheme == ecutsGROUP)
2673 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2674 && !EVDW_PME(fr->vdwtype));
2675 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2676 fr->bcoultab = !(fr->eeltype == eelCUT ||
2677 fr->eeltype == eelEWALD ||
2678 fr->eeltype == eelPME ||
2679 fr->eeltype == eelRF ||
2680 fr->eeltype == eelRF_ZERO);
2682 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2683 * going to be faster to tabulate the interaction than calling the generic kernel.
2685 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2687 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2689 fr->bcoultab = TRUE;
2692 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2693 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2694 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2695 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2697 if (fr->rcoulomb != fr->rvdw)
2699 fr->bcoultab = TRUE;
2703 if (getenv("GMX_REQUIRE_TABLES"))
2705 fr->bvdwtab = TRUE;
2706 fr->bcoultab = TRUE;
2709 if (fp)
2711 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2712 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2715 if (fr->bvdwtab == TRUE)
2717 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2718 fr->nbkernel_vdw_modifier = eintmodNONE;
2720 if (fr->bcoultab == TRUE)
2722 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2723 fr->nbkernel_elec_modifier = eintmodNONE;
2727 if (ir->cutoff_scheme == ecutsVERLET)
2729 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2731 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2733 fr->bvdwtab = FALSE;
2734 fr->bcoultab = FALSE;
2737 /* Tables are used for direct ewald sum */
2738 if (fr->bEwald)
2740 if (EEL_PME(ir->coulombtype))
2742 if (fp)
2744 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2746 if (ir->coulombtype == eelP3M_AD)
2748 please_cite(fp, "Hockney1988");
2749 please_cite(fp, "Ballenegger2012");
2751 else
2753 please_cite(fp, "Essmann95a");
2756 if (ir->ewald_geometry == eewg3DC)
2758 if (fp)
2760 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2762 please_cite(fp, "In-Chul99a");
2765 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2766 init_ewald_tab(&(fr->ewald_table), ir, fp);
2767 if (fp)
2769 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2770 1/fr->ewaldcoeff_q);
2774 if (EVDW_PME(ir->vdwtype))
2776 if (fp)
2778 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2780 please_cite(fp, "Essmann95a");
2781 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2782 if (fp)
2784 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2785 1/fr->ewaldcoeff_lj);
2789 /* Electrostatics */
2790 fr->epsilon_r = ir->epsilon_r;
2791 fr->epsilon_rf = ir->epsilon_rf;
2792 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2793 fr->rcoulomb_switch = ir->rcoulomb_switch;
2794 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2796 /* Parameters for generalized RF */
2797 fr->zsquare = 0.0;
2798 fr->temp = 0.0;
2800 if (fr->eeltype == eelGRF)
2802 init_generalized_rf(fp, mtop, ir, fr);
2805 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2806 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2807 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2808 IR_ELEC_FIELD(*ir) ||
2809 (fr->adress_icor != eAdressICOff)
2812 if (fr->cutoff_scheme == ecutsGROUP &&
2813 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2815 /* Count the total number of charge groups */
2816 fr->cg_nalloc = ncg_mtop(mtop);
2817 srenew(fr->cg_cm, fr->cg_nalloc);
2819 if (fr->shift_vec == NULL)
2821 snew(fr->shift_vec, SHIFTS);
2824 if (fr->fshift == NULL)
2826 snew(fr->fshift, SHIFTS);
2829 if (fr->nbfp == NULL)
2831 fr->ntype = mtop->ffparams.atnr;
2832 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2833 if (EVDW_PME(fr->vdwtype))
2835 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2839 /* Copy the energy group exclusions */
2840 fr->egp_flags = ir->opts.egp_flags;
2842 /* Van der Waals stuff */
2843 fr->rvdw = cutoff_inf(ir->rvdw);
2844 fr->rvdw_switch = ir->rvdw_switch;
2845 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2847 if (fr->rvdw_switch >= fr->rvdw)
2849 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2850 fr->rvdw_switch, fr->rvdw);
2852 if (fp)
2854 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2855 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2856 fr->rvdw_switch, fr->rvdw);
2860 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2862 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2865 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2867 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2870 if (fp)
2872 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2873 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2876 fr->eDispCorr = ir->eDispCorr;
2877 if (ir->eDispCorr != edispcNO)
2879 set_avcsixtwelve(fp, fr, mtop);
2882 if (fr->bBHAM)
2884 set_bham_b_max(fp, fr, mtop);
2887 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2889 /* Copy the GBSA data (radius, volume and surftens for each
2890 * atomtype) from the topology atomtype section to forcerec.
2892 snew(fr->atype_radius, fr->ntype);
2893 snew(fr->atype_vol, fr->ntype);
2894 snew(fr->atype_surftens, fr->ntype);
2895 snew(fr->atype_gb_radius, fr->ntype);
2896 snew(fr->atype_S_hct, fr->ntype);
2898 if (mtop->atomtypes.nr > 0)
2900 for (i = 0; i < fr->ntype; i++)
2902 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2904 for (i = 0; i < fr->ntype; i++)
2906 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2908 for (i = 0; i < fr->ntype; i++)
2910 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2912 for (i = 0; i < fr->ntype; i++)
2914 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2916 for (i = 0; i < fr->ntype; i++)
2918 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2922 /* Generate the GB table if needed */
2923 if (fr->bGB)
2925 #ifdef GMX_DOUBLE
2926 fr->gbtabscale = 2000;
2927 #else
2928 fr->gbtabscale = 500;
2929 #endif
2931 fr->gbtabr = 100;
2932 fr->gbtab = make_gb_table(oenv, fr);
2934 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2936 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2937 if (!DOMAINDECOMP(cr))
2939 make_local_gb(cr, fr->born, ir->gb_algorithm);
2943 /* Set the charge scaling */
2944 if (fr->epsilon_r != 0)
2946 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2948 else
2950 /* eps = 0 is infinite dieletric: no coulomb interactions */
2951 fr->epsfac = 0;
2954 /* Reaction field constants */
2955 if (EEL_RF(fr->eeltype))
2957 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2958 fr->rcoulomb, fr->temp, fr->zsquare, box,
2959 &fr->kappa, &fr->k_rf, &fr->c_rf);
2962 /*This now calculates sum for q and c6*/
2963 set_chargesum(fp, fr, mtop);
2965 /* if we are using LR electrostatics, and they are tabulated,
2966 * the tables will contain modified coulomb interactions.
2967 * Since we want to use the non-shifted ones for 1-4
2968 * coulombic interactions, we must have an extra set of tables.
2971 /* Construct tables.
2972 * A little unnecessary to make both vdw and coul tables sometimes,
2973 * but what the heck... */
2975 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2976 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2978 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2979 fr->bBHAM || fr->bEwald) &&
2980 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2981 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2982 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2984 negp_pp = ir->opts.ngener - ir->nwall;
2985 negptable = 0;
2986 if (!bMakeTables)
2988 bSomeNormalNbListsAreInUse = TRUE;
2989 fr->nnblists = 1;
2991 else
2993 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2994 for (egi = 0; egi < negp_pp; egi++)
2996 for (egj = egi; egj < negp_pp; egj++)
2998 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2999 if (!(egp_flags & EGP_EXCL))
3001 if (egp_flags & EGP_TABLE)
3003 negptable++;
3005 else
3007 bSomeNormalNbListsAreInUse = TRUE;
3012 if (bSomeNormalNbListsAreInUse)
3014 fr->nnblists = negptable + 1;
3016 else
3018 fr->nnblists = negptable;
3020 if (fr->nnblists > 1)
3022 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3026 if (ir->adress)
3028 fr->nnblists *= 2;
3031 snew(fr->nblists, fr->nnblists);
3033 /* This code automatically gives table length tabext without cut-off's,
3034 * in that case grompp should already have checked that we do not need
3035 * normal tables and we only generate tables for 1-4 interactions.
3037 rtab = ir->rlistlong + ir->tabext;
3039 if (bMakeTables)
3041 /* make tables for ordinary interactions */
3042 if (bSomeNormalNbListsAreInUse)
3044 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3045 if (ir->adress)
3047 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3049 if (!bMakeSeparate14Table)
3051 fr->tab14 = fr->nblists[0].table_elec_vdw;
3053 m = 1;
3055 else
3057 m = 0;
3059 if (negptable > 0)
3061 /* Read the special tables for certain energy group pairs */
3062 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3063 for (egi = 0; egi < negp_pp; egi++)
3065 for (egj = egi; egj < negp_pp; egj++)
3067 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3068 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3070 nbl = &(fr->nblists[m]);
3071 if (fr->nnblists > 1)
3073 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3075 /* Read the table file with the two energy groups names appended */
3076 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3077 *mtop->groups.grpname[nm_ind[egi]],
3078 *mtop->groups.grpname[nm_ind[egj]],
3079 &fr->nblists[m]);
3080 if (ir->adress)
3082 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3083 *mtop->groups.grpname[nm_ind[egi]],
3084 *mtop->groups.grpname[nm_ind[egj]],
3085 &fr->nblists[fr->nnblists/2+m]);
3087 m++;
3089 else if (fr->nnblists > 1)
3091 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3097 if (bMakeSeparate14Table)
3099 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3100 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3101 GMX_MAKETABLES_14ONLY);
3104 /* Read AdResS Thermo Force table if needed */
3105 if (fr->adress_icor == eAdressICThermoForce)
3107 /* old todo replace */
3109 if (ir->adress->n_tf_grps > 0)
3111 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3114 else
3116 /* load the default table */
3117 snew(fr->atf_tabs, 1);
3118 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3122 /* Wall stuff */
3123 fr->nwall = ir->nwall;
3124 if (ir->nwall && ir->wall_type == ewtTABLE)
3126 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3129 if (fcd && tabbfn)
3131 fcd->bondtab = make_bonded_tables(fp,
3132 F_TABBONDS, F_TABBONDSNC,
3133 mtop, tabbfn, "b");
3134 fcd->angletab = make_bonded_tables(fp,
3135 F_TABANGLES, -1,
3136 mtop, tabbfn, "a");
3137 fcd->dihtab = make_bonded_tables(fp,
3138 F_TABDIHS, -1,
3139 mtop, tabbfn, "d");
3141 else
3143 if (debug)
3145 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3149 /* QM/MM initialization if requested
3151 if (ir->bQMMM)
3153 fprintf(stderr, "QM/MM calculation requested.\n");
3156 fr->bQMMM = ir->bQMMM;
3157 fr->qr = mk_QMMMrec();
3159 /* Set all the static charge group info */
3160 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3161 &bFEP_NonBonded,
3162 &fr->bExcl_IntraCGAll_InterCGNone);
3163 if (DOMAINDECOMP(cr))
3165 fr->cginfo = NULL;
3167 else
3169 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3172 if (!DOMAINDECOMP(cr))
3174 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3175 mtop->natoms, mtop->natoms, mtop->natoms);
3178 fr->print_force = print_force;
3181 /* coarse load balancing vars */
3182 fr->t_fnbf = 0.;
3183 fr->t_wait = 0.;
3184 fr->timesteps = 0;
3186 /* Initialize neighbor search */
3187 init_ns(fp, cr, &fr->ns, fr, mtop);
3189 if (cr->duty & DUTY_PP)
3191 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3193 if (ir->bAdress)
3195 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3200 /* Initialize the thread working data for bonded interactions */
3201 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3203 snew(fr->excl_load, fr->nthreads+1);
3205 if (fr->cutoff_scheme == ecutsVERLET)
3207 if (ir->rcoulomb != ir->rvdw)
3209 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3212 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3215 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3216 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3218 if (ir->eDispCorr != edispcNO)
3220 calc_enervirdiff(fp, ir->eDispCorr, fr);
3224 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3225 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3226 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3228 void pr_forcerec(FILE *fp, t_forcerec *fr)
3230 int i;
3232 pr_real(fp, fr->rlist);
3233 pr_real(fp, fr->rcoulomb);
3234 pr_real(fp, fr->fudgeQQ);
3235 pr_bool(fp, fr->bGrid);
3236 pr_bool(fp, fr->bTwinRange);
3237 /*pr_int(fp,fr->cg0);
3238 pr_int(fp,fr->hcg);*/
3239 for (i = 0; i < fr->nnblists; i++)
3241 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3243 pr_real(fp, fr->rcoulomb_switch);
3244 pr_real(fp, fr->rcoulomb);
3246 fflush(fp);
3249 void forcerec_set_excl_load(t_forcerec *fr,
3250 const gmx_localtop_t *top)
3252 const int *ind, *a;
3253 int t, i, j, ntot, n, ntarget;
3255 ind = top->excls.index;
3256 a = top->excls.a;
3258 ntot = 0;
3259 for (i = 0; i < top->excls.nr; i++)
3261 for (j = ind[i]; j < ind[i+1]; j++)
3263 if (a[j] > i)
3265 ntot++;
3270 fr->excl_load[0] = 0;
3271 n = 0;
3272 i = 0;
3273 for (t = 1; t <= fr->nthreads; t++)
3275 ntarget = (ntot*t)/fr->nthreads;
3276 while (i < top->excls.nr && n < ntarget)
3278 for (j = ind[i]; j < ind[i+1]; j++)
3280 if (a[j] > i)
3282 n++;
3285 i++;
3287 fr->excl_load[t] = i;