Implemented LJ-PME nbnxn kernels
[gromacs.git] / src / gromacs / mdlib / forcerec.c
blob64fef1f839d830bb614b3b15b17a32d03b9b68fb
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
41 #include <math.h>
42 #include <string.h>
43 #include <assert.h>
44 #include "sysstuff.h"
45 #include "typedefs.h"
46 #include "vec.h"
47 #include "gromacs/math/utilities.h"
48 #include "macros.h"
49 #include "smalloc.h"
50 #include "macros.h"
51 #include "gmx_fatal.h"
52 #include "gmx_fatal_collective.h"
53 #include "physics.h"
54 #include "force.h"
55 #include "tables.h"
56 #include "nonbonded.h"
57 #include "invblock.h"
58 #include "names.h"
59 #include "network.h"
60 #include "pbc.h"
61 #include "ns.h"
62 #include "mshift.h"
63 #include "txtdump.h"
64 #include "coulomb.h"
65 #include "md_support.h"
66 #include "md_logging.h"
67 #include "domdec.h"
68 #include "qmmm.h"
69 #include "copyrite.h"
70 #include "mtop_util.h"
71 #include "nbnxn_search.h"
72 #include "nbnxn_atomdata.h"
73 #include "nbnxn_consts.h"
74 #include "gmx_omp_nthreads.h"
75 #include "gmx_detect_hardware.h"
76 #include "inputrec.h"
78 #ifdef _MSC_VER
79 /* MSVC definition for __cpuid() */
80 #include <intrin.h>
81 #endif
83 #include "types/nbnxn_cuda_types_ext.h"
84 #include "gpu_utils.h"
85 #include "nbnxn_cuda_data_mgmt.h"
86 #include "pmalloc_cuda.h"
88 t_forcerec *mk_forcerec(void)
90 t_forcerec *fr;
92 snew(fr, 1);
94 return fr;
97 #ifdef DEBUG
98 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
100 int i, j;
102 for (i = 0; (i < atnr); i++)
104 for (j = 0; (j < atnr); j++)
106 fprintf(fp, "%2d - %2d", i, j);
107 if (bBHAM)
109 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
110 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
112 else
114 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
115 C12(nbfp, atnr, i, j)/12.0);
120 #endif
122 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
124 real *nbfp;
125 int i, j, k, atnr;
127 atnr = idef->atnr;
128 if (bBHAM)
130 snew(nbfp, 3*atnr*atnr);
131 for (i = k = 0; (i < atnr); i++)
133 for (j = 0; (j < atnr); j++, k++)
135 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
136 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
137 /* nbfp now includes the 6.0 derivative prefactor */
138 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
142 else
144 snew(nbfp, 2*atnr*atnr);
145 for (i = k = 0; (i < atnr); i++)
147 for (j = 0; (j < atnr); j++, k++)
149 /* nbfp now includes the 6.0/12.0 derivative prefactors */
150 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
151 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
156 return nbfp;
159 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
161 real *nbfp;
162 int i, j, k, atnr;
163 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
164 real c6, c12;
166 atnr = idef->atnr;
167 snew(nbfp, 2*atnr*atnr);
168 for (i = 0; i < atnr; ++i)
170 for (j = 0; j < atnr; ++j)
172 c6i = idef->iparams[i*(atnr+1)].lj.c6;
173 c12i = idef->iparams[i*(atnr+1)].lj.c12;
174 c6j = idef->iparams[j*(atnr+1)].lj.c6;
175 c12j = idef->iparams[j*(atnr+1)].lj.c12;
176 c6 = sqrt(c6i * c6j);
177 c12 = sqrt(c12i * c12j);
178 if (comb_rule == eCOMB_ARITHMETIC
179 && !gmx_numzero(c6) && !gmx_numzero(c12))
181 sigmai = pow(c12i / c6i, 1.0/6.0);
182 sigmaj = pow(c12j / c6j, 1.0/6.0);
183 epsi = c6i * c6i / c12i;
184 epsj = c6j * c6j / c12j;
185 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
186 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
188 C6(nbfp, atnr, i, j) = c6*6.0;
189 C12(nbfp, atnr, i, j) = c12*12.0;
192 return nbfp;
195 /* This routine sets fr->solvent_opt to the most common solvent in the
196 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
197 * the fr->solvent_type array with the correct type (or esolNO).
199 * Charge groups that fulfill the conditions but are not identical to the
200 * most common one will be marked as esolNO in the solvent_type array.
202 * TIP3p is identical to SPC for these purposes, so we call it
203 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
205 * NOTE: QM particle should not
206 * become an optimized solvent. Not even if there is only one charge
207 * group in the Qm
210 typedef struct
212 int model;
213 int count;
214 int vdwtype[4];
215 real charge[4];
216 } solvent_parameters_t;
218 static void
219 check_solvent_cg(const gmx_moltype_t *molt,
220 int cg0,
221 int nmol,
222 const unsigned char *qm_grpnr,
223 const t_grps *qm_grps,
224 t_forcerec * fr,
225 int *n_solvent_parameters,
226 solvent_parameters_t **solvent_parameters_p,
227 int cginfo,
228 int *cg_sp)
230 const t_blocka * excl;
231 t_atom *atom;
232 int j, k;
233 int j0, j1, nj;
234 gmx_bool perturbed;
235 gmx_bool has_vdw[4];
236 gmx_bool match;
237 real tmp_charge[4];
238 int tmp_vdwtype[4];
239 int tjA;
240 gmx_bool qm;
241 solvent_parameters_t *solvent_parameters;
243 /* We use a list with parameters for each solvent type.
244 * Every time we discover a new molecule that fulfills the basic
245 * conditions for a solvent we compare with the previous entries
246 * in these lists. If the parameters are the same we just increment
247 * the counter for that type, and otherwise we create a new type
248 * based on the current molecule.
250 * Once we've finished going through all molecules we check which
251 * solvent is most common, and mark all those molecules while we
252 * clear the flag on all others.
255 solvent_parameters = *solvent_parameters_p;
257 /* Mark the cg first as non optimized */
258 *cg_sp = -1;
260 /* Check if this cg has no exclusions with atoms in other charge groups
261 * and all atoms inside the charge group excluded.
262 * We only have 3 or 4 atom solvent loops.
264 if (GET_CGINFO_EXCL_INTER(cginfo) ||
265 !GET_CGINFO_EXCL_INTRA(cginfo))
267 return;
270 /* Get the indices of the first atom in this charge group */
271 j0 = molt->cgs.index[cg0];
272 j1 = molt->cgs.index[cg0+1];
274 /* Number of atoms in our molecule */
275 nj = j1 - j0;
277 if (debug)
279 fprintf(debug,
280 "Moltype '%s': there are %d atoms in this charge group\n",
281 *molt->name, nj);
284 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
285 * otherwise skip it.
287 if (nj < 3 || nj > 4)
289 return;
292 /* Check if we are doing QM on this group */
293 qm = FALSE;
294 if (qm_grpnr != NULL)
296 for (j = j0; j < j1 && !qm; j++)
298 qm = (qm_grpnr[j] < qm_grps->nr - 1);
301 /* Cannot use solvent optimization with QM */
302 if (qm)
304 return;
307 atom = molt->atoms.atom;
309 /* Still looks like a solvent, time to check parameters */
311 /* If it is perturbed (free energy) we can't use the solvent loops,
312 * so then we just skip to the next molecule.
314 perturbed = FALSE;
316 for (j = j0; j < j1 && !perturbed; j++)
318 perturbed = PERTURBED(atom[j]);
321 if (perturbed)
323 return;
326 /* Now it's only a question if the VdW and charge parameters
327 * are OK. Before doing the check we compare and see if they are
328 * identical to a possible previous solvent type.
329 * First we assign the current types and charges.
331 for (j = 0; j < nj; j++)
333 tmp_vdwtype[j] = atom[j0+j].type;
334 tmp_charge[j] = atom[j0+j].q;
337 /* Does it match any previous solvent type? */
338 for (k = 0; k < *n_solvent_parameters; k++)
340 match = TRUE;
343 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
344 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
345 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
347 match = FALSE;
350 /* Check that types & charges match for all atoms in molecule */
351 for (j = 0; j < nj && match == TRUE; j++)
353 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
355 match = FALSE;
357 if (tmp_charge[j] != solvent_parameters[k].charge[j])
359 match = FALSE;
362 if (match == TRUE)
364 /* Congratulations! We have a matched solvent.
365 * Flag it with this type for later processing.
367 *cg_sp = k;
368 solvent_parameters[k].count += nmol;
370 /* We are done with this charge group */
371 return;
375 /* If we get here, we have a tentative new solvent type.
376 * Before we add it we must check that it fulfills the requirements
377 * of the solvent optimized loops. First determine which atoms have
378 * VdW interactions.
380 for (j = 0; j < nj; j++)
382 has_vdw[j] = FALSE;
383 tjA = tmp_vdwtype[j];
385 /* Go through all other tpes and see if any have non-zero
386 * VdW parameters when combined with this one.
388 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
390 /* We already checked that the atoms weren't perturbed,
391 * so we only need to check state A now.
393 if (fr->bBHAM)
395 has_vdw[j] = (has_vdw[j] ||
396 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
397 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
398 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
400 else
402 /* Standard LJ */
403 has_vdw[j] = (has_vdw[j] ||
404 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
405 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
410 /* Now we know all we need to make the final check and assignment. */
411 if (nj == 3)
413 /* So, is it an SPC?
414 * For this we require thatn all atoms have charge,
415 * the charges on atom 2 & 3 should be the same, and only
416 * atom 1 might have VdW.
418 if (has_vdw[1] == FALSE &&
419 has_vdw[2] == FALSE &&
420 tmp_charge[0] != 0 &&
421 tmp_charge[1] != 0 &&
422 tmp_charge[2] == tmp_charge[1])
424 srenew(solvent_parameters, *n_solvent_parameters+1);
425 solvent_parameters[*n_solvent_parameters].model = esolSPC;
426 solvent_parameters[*n_solvent_parameters].count = nmol;
427 for (k = 0; k < 3; k++)
429 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
430 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
433 *cg_sp = *n_solvent_parameters;
434 (*n_solvent_parameters)++;
437 else if (nj == 4)
439 /* Or could it be a TIP4P?
440 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
441 * Only atom 1 mght have VdW.
443 if (has_vdw[1] == FALSE &&
444 has_vdw[2] == FALSE &&
445 has_vdw[3] == FALSE &&
446 tmp_charge[0] == 0 &&
447 tmp_charge[1] != 0 &&
448 tmp_charge[2] == tmp_charge[1] &&
449 tmp_charge[3] != 0)
451 srenew(solvent_parameters, *n_solvent_parameters+1);
452 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
453 solvent_parameters[*n_solvent_parameters].count = nmol;
454 for (k = 0; k < 4; k++)
456 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
457 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
460 *cg_sp = *n_solvent_parameters;
461 (*n_solvent_parameters)++;
465 *solvent_parameters_p = solvent_parameters;
468 static void
469 check_solvent(FILE * fp,
470 const gmx_mtop_t * mtop,
471 t_forcerec * fr,
472 cginfo_mb_t *cginfo_mb)
474 const t_block * cgs;
475 const t_block * mols;
476 const gmx_moltype_t *molt;
477 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
478 int n_solvent_parameters;
479 solvent_parameters_t *solvent_parameters;
480 int **cg_sp;
481 int bestsp, bestsol;
483 if (debug)
485 fprintf(debug, "Going to determine what solvent types we have.\n");
488 mols = &mtop->mols;
490 n_solvent_parameters = 0;
491 solvent_parameters = NULL;
492 /* Allocate temporary array for solvent type */
493 snew(cg_sp, mtop->nmolblock);
495 cg_offset = 0;
496 at_offset = 0;
497 for (mb = 0; mb < mtop->nmolblock; mb++)
499 molt = &mtop->moltype[mtop->molblock[mb].type];
500 cgs = &molt->cgs;
501 /* Here we have to loop over all individual molecules
502 * because we need to check for QMMM particles.
504 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
505 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
506 nmol = mtop->molblock[mb].nmol/nmol_ch;
507 for (mol = 0; mol < nmol_ch; mol++)
509 cgm = mol*cgs->nr;
510 am = mol*cgs->index[cgs->nr];
511 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
513 check_solvent_cg(molt, cg_mol, nmol,
514 mtop->groups.grpnr[egcQMMM] ?
515 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
516 &mtop->groups.grps[egcQMMM],
518 &n_solvent_parameters, &solvent_parameters,
519 cginfo_mb[mb].cginfo[cgm+cg_mol],
520 &cg_sp[mb][cgm+cg_mol]);
523 cg_offset += cgs->nr;
524 at_offset += cgs->index[cgs->nr];
527 /* Puh! We finished going through all charge groups.
528 * Now find the most common solvent model.
531 /* Most common solvent this far */
532 bestsp = -2;
533 for (i = 0; i < n_solvent_parameters; i++)
535 if (bestsp == -2 ||
536 solvent_parameters[i].count > solvent_parameters[bestsp].count)
538 bestsp = i;
542 if (bestsp >= 0)
544 bestsol = solvent_parameters[bestsp].model;
546 else
548 bestsol = esolNO;
551 #ifdef DISABLE_WATER_NLIST
552 bestsol = esolNO;
553 #endif
555 fr->nWatMol = 0;
556 for (mb = 0; mb < mtop->nmolblock; mb++)
558 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
559 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
560 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
562 if (cg_sp[mb][i] == bestsp)
564 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
565 fr->nWatMol += nmol;
567 else
569 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
572 sfree(cg_sp[mb]);
574 sfree(cg_sp);
576 if (bestsol != esolNO && fp != NULL)
578 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
579 esol_names[bestsol],
580 solvent_parameters[bestsp].count);
583 sfree(solvent_parameters);
584 fr->solvent_opt = bestsol;
587 enum {
588 acNONE = 0, acCONSTRAINT, acSETTLE
591 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
592 t_forcerec *fr, gmx_bool bNoSolvOpt,
593 gmx_bool *bExcl_IntraCGAll_InterCGNone)
595 const t_block *cgs;
596 const t_blocka *excl;
597 const gmx_moltype_t *molt;
598 const gmx_molblock_t *molb;
599 cginfo_mb_t *cginfo_mb;
600 gmx_bool *type_VDW;
601 int *cginfo;
602 int cg_offset, a_offset, cgm, am;
603 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
604 int *a_con;
605 int ftype;
606 int ia;
607 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
609 ncg_tot = ncg_mtop(mtop);
610 snew(cginfo_mb, mtop->nmolblock);
612 snew(type_VDW, fr->ntype);
613 for (ai = 0; ai < fr->ntype; ai++)
615 type_VDW[ai] = FALSE;
616 for (j = 0; j < fr->ntype; j++)
618 type_VDW[ai] = type_VDW[ai] ||
619 fr->bBHAM ||
620 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
621 C12(fr->nbfp, fr->ntype, ai, j) != 0;
625 *bExcl_IntraCGAll_InterCGNone = TRUE;
627 excl_nalloc = 10;
628 snew(bExcl, excl_nalloc);
629 cg_offset = 0;
630 a_offset = 0;
631 for (mb = 0; mb < mtop->nmolblock; mb++)
633 molb = &mtop->molblock[mb];
634 molt = &mtop->moltype[molb->type];
635 cgs = &molt->cgs;
636 excl = &molt->excls;
638 /* Check if the cginfo is identical for all molecules in this block.
639 * If so, we only need an array of the size of one molecule.
640 * Otherwise we make an array of #mol times #cgs per molecule.
642 bId = TRUE;
643 am = 0;
644 for (m = 0; m < molb->nmol; m++)
646 am = m*cgs->index[cgs->nr];
647 for (cg = 0; cg < cgs->nr; cg++)
649 a0 = cgs->index[cg];
650 a1 = cgs->index[cg+1];
651 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
652 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
654 bId = FALSE;
656 if (mtop->groups.grpnr[egcQMMM] != NULL)
658 for (ai = a0; ai < a1; ai++)
660 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
661 mtop->groups.grpnr[egcQMMM][a_offset +ai])
663 bId = FALSE;
670 cginfo_mb[mb].cg_start = cg_offset;
671 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
672 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
673 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
674 cginfo = cginfo_mb[mb].cginfo;
676 /* Set constraints flags for constrained atoms */
677 snew(a_con, molt->atoms.nr);
678 for (ftype = 0; ftype < F_NRE; ftype++)
680 if (interaction_function[ftype].flags & IF_CONSTRAINT)
682 int nral;
684 nral = NRAL(ftype);
685 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
687 int a;
689 for (a = 0; a < nral; a++)
691 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
692 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
698 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
700 cgm = m*cgs->nr;
701 am = m*cgs->index[cgs->nr];
702 for (cg = 0; cg < cgs->nr; cg++)
704 a0 = cgs->index[cg];
705 a1 = cgs->index[cg+1];
707 /* Store the energy group in cginfo */
708 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
709 SET_CGINFO_GID(cginfo[cgm+cg], gid);
711 /* Check the intra/inter charge group exclusions */
712 if (a1-a0 > excl_nalloc)
714 excl_nalloc = a1 - a0;
715 srenew(bExcl, excl_nalloc);
717 /* bExclIntraAll: all intra cg interactions excluded
718 * bExclInter: any inter cg interactions excluded
720 bExclIntraAll = TRUE;
721 bExclInter = FALSE;
722 bHaveVDW = FALSE;
723 bHaveQ = FALSE;
724 for (ai = a0; ai < a1; ai++)
726 /* Check VDW and electrostatic interactions */
727 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
728 type_VDW[molt->atoms.atom[ai].typeB]);
729 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
730 molt->atoms.atom[ai].qB != 0);
732 /* Clear the exclusion list for atom ai */
733 for (aj = a0; aj < a1; aj++)
735 bExcl[aj-a0] = FALSE;
737 /* Loop over all the exclusions of atom ai */
738 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
740 aj = excl->a[j];
741 if (aj < a0 || aj >= a1)
743 bExclInter = TRUE;
745 else
747 bExcl[aj-a0] = TRUE;
750 /* Check if ai excludes a0 to a1 */
751 for (aj = a0; aj < a1; aj++)
753 if (!bExcl[aj-a0])
755 bExclIntraAll = FALSE;
759 switch (a_con[ai])
761 case acCONSTRAINT:
762 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
763 break;
764 case acSETTLE:
765 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
766 break;
767 default:
768 break;
771 if (bExclIntraAll)
773 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
775 if (bExclInter)
777 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
779 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
781 /* The size in cginfo is currently only read with DD */
782 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
784 if (bHaveVDW)
786 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
788 if (bHaveQ)
790 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
792 /* Store the charge group size */
793 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
795 if (!bExclIntraAll || bExclInter)
797 *bExcl_IntraCGAll_InterCGNone = FALSE;
802 sfree(a_con);
804 cg_offset += molb->nmol*cgs->nr;
805 a_offset += molb->nmol*cgs->index[cgs->nr];
807 sfree(bExcl);
809 /* the solvent optimizer is called after the QM is initialized,
810 * because we don't want to have the QM subsystemto become an
811 * optimized solvent
814 check_solvent(fplog, mtop, fr, cginfo_mb);
816 if (getenv("GMX_NO_SOLV_OPT"))
818 if (fplog)
820 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
821 "Disabling all solvent optimization\n");
823 fr->solvent_opt = esolNO;
825 if (bNoSolvOpt)
827 fr->solvent_opt = esolNO;
829 if (!fr->solvent_opt)
831 for (mb = 0; mb < mtop->nmolblock; mb++)
833 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
835 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
840 return cginfo_mb;
843 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
845 int ncg, mb, cg;
846 int *cginfo;
848 ncg = cgi_mb[nmb-1].cg_end;
849 snew(cginfo, ncg);
850 mb = 0;
851 for (cg = 0; cg < ncg; cg++)
853 while (cg >= cgi_mb[mb].cg_end)
855 mb++;
857 cginfo[cg] =
858 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
861 return cginfo;
864 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
866 /*This now calculates sum for q and c6*/
867 double qsum, q2sum, q, c6sum, c6;
868 int mb, nmol, i;
869 const t_atoms *atoms;
871 qsum = 0;
872 q2sum = 0;
873 c6sum = 0;
874 for (mb = 0; mb < mtop->nmolblock; mb++)
876 nmol = mtop->molblock[mb].nmol;
877 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
878 for (i = 0; i < atoms->nr; i++)
880 q = atoms->atom[i].q;
881 qsum += nmol*q;
882 q2sum += nmol*q*q;
883 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
884 c6sum += nmol*c6;
887 fr->qsum[0] = qsum;
888 fr->q2sum[0] = q2sum;
889 fr->c6sum[0] = c6sum;
891 if (fr->efep != efepNO)
893 qsum = 0;
894 q2sum = 0;
895 c6sum = 0;
896 for (mb = 0; mb < mtop->nmolblock; mb++)
898 nmol = mtop->molblock[mb].nmol;
899 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
900 for (i = 0; i < atoms->nr; i++)
902 q = atoms->atom[i].qB;
903 qsum += nmol*q;
904 q2sum += nmol*q*q;
905 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
906 c6sum += nmol*c6;
908 fr->qsum[1] = qsum;
909 fr->q2sum[1] = q2sum;
910 fr->c6sum[1] = c6sum;
913 else
915 fr->qsum[1] = fr->qsum[0];
916 fr->q2sum[1] = fr->q2sum[0];
917 fr->c6sum[1] = fr->c6sum[0];
919 if (log)
921 if (fr->efep == efepNO)
923 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
925 else
927 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
928 fr->qsum[0], fr->qsum[1]);
933 void update_forcerec(t_forcerec *fr, matrix box)
935 if (fr->eeltype == eelGRF)
937 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
938 fr->rcoulomb, fr->temp, fr->zsquare, box,
939 &fr->kappa, &fr->k_rf, &fr->c_rf);
943 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
945 const t_atoms *atoms, *atoms_tpi;
946 const t_blocka *excl;
947 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
948 gmx_int64_t npair, npair_ij, tmpi, tmpj;
949 double csix, ctwelve;
950 int ntp, *typecount;
951 gmx_bool bBHAM;
952 real *nbfp;
953 real *nbfp_comb = NULL;
955 ntp = fr->ntype;
956 bBHAM = fr->bBHAM;
957 nbfp = fr->nbfp;
959 /* For LJ-PME, we want to correct for the difference between the
960 * actual C6 values and the C6 values used by the LJ-PME based on
961 * combination rules. */
963 if (EVDW_PME(fr->vdwtype))
965 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
966 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
967 for (tpi = 0; tpi < ntp; ++tpi)
969 for (tpj = 0; tpj < ntp; ++tpj)
971 C6(nbfp_comb, ntp, tpi, tpj) =
972 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
973 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
976 nbfp = nbfp_comb;
978 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
980 csix = 0;
981 ctwelve = 0;
982 npair = 0;
983 nexcl = 0;
984 if (!fr->n_tpi)
986 /* Count the types so we avoid natoms^2 operations */
987 snew(typecount, ntp);
988 gmx_mtop_count_atomtypes(mtop, q, typecount);
990 for (tpi = 0; tpi < ntp; tpi++)
992 for (tpj = tpi; tpj < ntp; tpj++)
994 tmpi = typecount[tpi];
995 tmpj = typecount[tpj];
996 if (tpi != tpj)
998 npair_ij = tmpi*tmpj;
1000 else
1002 npair_ij = tmpi*(tmpi - 1)/2;
1004 if (bBHAM)
1006 /* nbfp now includes the 6.0 derivative prefactor */
1007 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1009 else
1011 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1012 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1013 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1015 npair += npair_ij;
1018 sfree(typecount);
1019 /* Subtract the excluded pairs.
1020 * The main reason for substracting exclusions is that in some cases
1021 * some combinations might never occur and the parameters could have
1022 * any value. These unused values should not influence the dispersion
1023 * correction.
1025 for (mb = 0; mb < mtop->nmolblock; mb++)
1027 nmol = mtop->molblock[mb].nmol;
1028 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1029 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1030 for (i = 0; (i < atoms->nr); i++)
1032 if (q == 0)
1034 tpi = atoms->atom[i].type;
1036 else
1038 tpi = atoms->atom[i].typeB;
1040 j1 = excl->index[i];
1041 j2 = excl->index[i+1];
1042 for (j = j1; j < j2; j++)
1044 k = excl->a[j];
1045 if (k > i)
1047 if (q == 0)
1049 tpj = atoms->atom[k].type;
1051 else
1053 tpj = atoms->atom[k].typeB;
1055 if (bBHAM)
1057 /* nbfp now includes the 6.0 derivative prefactor */
1058 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1060 else
1062 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1063 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1064 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1066 nexcl += nmol;
1072 else
1074 /* Only correct for the interaction of the test particle
1075 * with the rest of the system.
1077 atoms_tpi =
1078 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1080 npair = 0;
1081 for (mb = 0; mb < mtop->nmolblock; mb++)
1083 nmol = mtop->molblock[mb].nmol;
1084 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1085 for (j = 0; j < atoms->nr; j++)
1087 nmolc = nmol;
1088 /* Remove the interaction of the test charge group
1089 * with itself.
1091 if (mb == mtop->nmolblock-1)
1093 nmolc--;
1095 if (mb == 0 && nmol == 1)
1097 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1100 if (q == 0)
1102 tpj = atoms->atom[j].type;
1104 else
1106 tpj = atoms->atom[j].typeB;
1108 for (i = 0; i < fr->n_tpi; i++)
1110 if (q == 0)
1112 tpi = atoms_tpi->atom[i].type;
1114 else
1116 tpi = atoms_tpi->atom[i].typeB;
1118 if (bBHAM)
1120 /* nbfp now includes the 6.0 derivative prefactor */
1121 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1123 else
1125 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1126 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1127 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1129 npair += nmolc;
1134 if (npair - nexcl <= 0 && fplog)
1136 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1137 csix = 0;
1138 ctwelve = 0;
1140 else
1142 csix /= npair - nexcl;
1143 ctwelve /= npair - nexcl;
1145 if (debug)
1147 fprintf(debug, "Counted %d exclusions\n", nexcl);
1148 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1149 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1151 fr->avcsix[q] = csix;
1152 fr->avctwelve[q] = ctwelve;
1155 if (EVDW_PME(fr->vdwtype))
1157 sfree(nbfp_comb);
1160 if (fplog != NULL)
1162 if (fr->eDispCorr == edispcAllEner ||
1163 fr->eDispCorr == edispcAllEnerPres)
1165 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1166 fr->avcsix[0], fr->avctwelve[0]);
1168 else
1170 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1176 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1177 const gmx_mtop_t *mtop)
1179 const t_atoms *at1, *at2;
1180 int mt1, mt2, i, j, tpi, tpj, ntypes;
1181 real b, bmin;
1182 real *nbfp;
1184 if (fplog)
1186 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1188 nbfp = fr->nbfp;
1189 ntypes = fr->ntype;
1191 bmin = -1;
1192 fr->bham_b_max = 0;
1193 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1195 at1 = &mtop->moltype[mt1].atoms;
1196 for (i = 0; (i < at1->nr); i++)
1198 tpi = at1->atom[i].type;
1199 if (tpi >= ntypes)
1201 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1204 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1206 at2 = &mtop->moltype[mt2].atoms;
1207 for (j = 0; (j < at2->nr); j++)
1209 tpj = at2->atom[j].type;
1210 if (tpj >= ntypes)
1212 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1214 b = BHAMB(nbfp, ntypes, tpi, tpj);
1215 if (b > fr->bham_b_max)
1217 fr->bham_b_max = b;
1219 if ((b < bmin) || (bmin == -1))
1221 bmin = b;
1227 if (fplog)
1229 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1230 bmin, fr->bham_b_max);
1234 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1235 t_forcerec *fr, real rtab,
1236 const t_commrec *cr,
1237 const char *tabfn, char *eg1, char *eg2,
1238 t_nblists *nbl)
1240 char buf[STRLEN];
1241 int i, j;
1243 if (tabfn == NULL)
1245 if (debug)
1247 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1249 return;
1252 sprintf(buf, "%s", tabfn);
1253 if (eg1 && eg2)
1255 /* Append the two energy group names */
1256 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1257 eg1, eg2, ftp2ext(efXVG));
1259 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1260 /* Copy the contents of the table to separate coulomb and LJ tables too,
1261 * to improve cache performance.
1263 /* For performance reasons we want
1264 * the table data to be aligned to 16-byte. The pointers could be freed
1265 * but currently aren't.
1267 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1268 nbl->table_elec.format = nbl->table_elec_vdw.format;
1269 nbl->table_elec.r = nbl->table_elec_vdw.r;
1270 nbl->table_elec.n = nbl->table_elec_vdw.n;
1271 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1272 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1273 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1274 nbl->table_elec.ninteractions = 1;
1275 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1276 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1278 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1279 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1280 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1281 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1282 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1283 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1284 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1285 nbl->table_vdw.ninteractions = 2;
1286 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1287 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1289 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1291 for (j = 0; j < 4; j++)
1293 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1295 for (j = 0; j < 8; j++)
1297 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1302 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1303 int *ncount, int **count)
1305 const gmx_moltype_t *molt;
1306 const t_ilist *il;
1307 int mt, ftype, stride, i, j, tabnr;
1309 for (mt = 0; mt < mtop->nmoltype; mt++)
1311 molt = &mtop->moltype[mt];
1312 for (ftype = 0; ftype < F_NRE; ftype++)
1314 if (ftype == ftype1 || ftype == ftype2)
1316 il = &molt->ilist[ftype];
1317 stride = 1 + NRAL(ftype);
1318 for (i = 0; i < il->nr; i += stride)
1320 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1321 if (tabnr < 0)
1323 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1325 if (tabnr >= *ncount)
1327 srenew(*count, tabnr+1);
1328 for (j = *ncount; j < tabnr+1; j++)
1330 (*count)[j] = 0;
1332 *ncount = tabnr+1;
1334 (*count)[tabnr]++;
1341 static bondedtable_t *make_bonded_tables(FILE *fplog,
1342 int ftype1, int ftype2,
1343 const gmx_mtop_t *mtop,
1344 const char *basefn, const char *tabext)
1346 int i, ncount, *count;
1347 char tabfn[STRLEN];
1348 bondedtable_t *tab;
1350 tab = NULL;
1352 ncount = 0;
1353 count = NULL;
1354 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1356 if (ncount > 0)
1358 snew(tab, ncount);
1359 for (i = 0; i < ncount; i++)
1361 if (count[i] > 0)
1363 sprintf(tabfn, "%s", basefn);
1364 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1365 tabext, i, ftp2ext(efXVG));
1366 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1369 sfree(count);
1372 return tab;
1375 void forcerec_set_ranges(t_forcerec *fr,
1376 int ncg_home, int ncg_force,
1377 int natoms_force,
1378 int natoms_force_constr, int natoms_f_novirsum)
1380 fr->cg0 = 0;
1381 fr->hcg = ncg_home;
1383 /* fr->ncg_force is unused in the standard code,
1384 * but it can be useful for modified code dealing with charge groups.
1386 fr->ncg_force = ncg_force;
1387 fr->natoms_force = natoms_force;
1388 fr->natoms_force_constr = natoms_force_constr;
1390 if (fr->natoms_force_constr > fr->nalloc_force)
1392 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1394 if (fr->bTwinRange)
1396 srenew(fr->f_twin, fr->nalloc_force);
1400 if (fr->bF_NoVirSum)
1402 fr->f_novirsum_n = natoms_f_novirsum;
1403 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1405 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1406 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1409 else
1411 fr->f_novirsum_n = 0;
1415 static real cutoff_inf(real cutoff)
1417 if (cutoff == 0)
1419 cutoff = GMX_CUTOFF_INF;
1422 return cutoff;
1425 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1426 t_forcerec *fr, const t_inputrec *ir,
1427 const char *tabfn, const gmx_mtop_t *mtop,
1428 matrix box)
1430 char buf[STRLEN];
1431 int i, j;
1433 if (tabfn == NULL)
1435 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1436 return;
1439 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1441 sprintf(buf, "%s", tabfn);
1442 for (i = 0; i < ir->adress->n_tf_grps; i++)
1444 j = ir->adress->tf_table_index[i]; /* get energy group index */
1445 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1446 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1447 if (fp)
1449 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1451 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1456 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1458 gmx_bool bAllvsAll;
1460 bAllvsAll =
1462 ir->rlist == 0 &&
1463 ir->rcoulomb == 0 &&
1464 ir->rvdw == 0 &&
1465 ir->ePBC == epbcNONE &&
1466 ir->vdwtype == evdwCUT &&
1467 ir->coulombtype == eelCUT &&
1468 ir->efep == efepNO &&
1469 (ir->implicit_solvent == eisNO ||
1470 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1471 ir->gb_algorithm == egbHCT ||
1472 ir->gb_algorithm == egbOBC))) &&
1473 getenv("GMX_NO_ALLVSALL") == NULL
1476 if (bAllvsAll && ir->opts.ngener > 1)
1478 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1480 if (bPrintNote)
1482 if (MASTER(cr))
1484 fprintf(stderr, "\n%s\n", note);
1486 if (fp != NULL)
1488 fprintf(fp, "\n%s\n", note);
1491 bAllvsAll = FALSE;
1494 if (bAllvsAll && fp && MASTER(cr))
1496 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1499 return bAllvsAll;
1503 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1505 int t, i;
1507 /* These thread local data structures are used for bondeds only */
1508 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1510 if (fr->nthreads > 1)
1512 snew(fr->f_t, fr->nthreads);
1513 /* Thread 0 uses the global force and energy arrays */
1514 for (t = 1; t < fr->nthreads; t++)
1516 fr->f_t[t].f = NULL;
1517 fr->f_t[t].f_nalloc = 0;
1518 snew(fr->f_t[t].fshift, SHIFTS);
1519 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1520 for (i = 0; i < egNR; i++)
1522 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1529 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1530 const t_commrec *cr,
1531 const t_inputrec *ir,
1532 gmx_bool bGPU)
1534 /* TODO: remove these GPU specific restrictions by implementing CUDA kernels */
1535 if (bGPU)
1537 if (ir->vdw_modifier == eintmodFORCESWITCH ||
1538 ir->vdw_modifier == eintmodPOTSWITCH ||
1539 ir->vdwtype == evdwPME)
1541 md_print_warn(cr, fplog, "LJ switch functions and LJ-PME are not yet supported on the GPU, falling back to CPU only\n");
1542 return FALSE;
1546 if (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB)
1548 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1549 bGPU ? "GPUs" : "SIMD kernels",
1550 bGPU ? "CPU only" : "plain-C kernels");
1551 return FALSE;
1554 return TRUE;
1558 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1559 int *kernel_type,
1560 int *ewald_excl)
1562 *kernel_type = nbnxnk4x4_PlainC;
1563 *ewald_excl = ewaldexclTable;
1565 #ifdef GMX_NBNXN_SIMD
1567 #ifdef GMX_NBNXN_SIMD_4XN
1568 *kernel_type = nbnxnk4xN_SIMD_4xN;
1569 #endif
1570 #ifdef GMX_NBNXN_SIMD_2XNN
1571 /* We expect the 2xNN kernels to be faster in most cases */
1572 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1573 #endif
1575 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_SIMD_X86_AVX_256_OR_HIGHER
1576 if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1578 /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1579 * 10% with HT, 50% without HT, but extra zeros interactions
1580 * can compensate. As we currently don't detect the actual use
1581 * of HT, switch to 4x8 to avoid a potential performance hit.
1583 *kernel_type = nbnxnk4xN_SIMD_4xN;
1585 #endif
1586 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1588 #ifdef GMX_NBNXN_SIMD_4XN
1589 *kernel_type = nbnxnk4xN_SIMD_4xN;
1590 #else
1591 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1592 #endif
1594 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1596 #ifdef GMX_NBNXN_SIMD_2XNN
1597 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1598 #else
1599 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1600 #endif
1603 /* Analytical Ewald exclusion correction is only an option in
1604 * the SIMD kernel. On BlueGene/Q, this is faster regardless
1605 * of precision. In single precision, this is faster on
1606 * Bulldozer, and slightly faster on Sandy Bridge.
1608 #if ((defined GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER || defined GMX_SIMD_X86_AVX_256_OR_HIGHER || defined __MIC__) && !defined GMX_DOUBLE) || (defined GMX_SIMD_IBM_QPX)
1609 *ewald_excl = ewaldexclAnalytical;
1610 #endif
1611 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1613 *ewald_excl = ewaldexclTable;
1615 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1617 *ewald_excl = ewaldexclAnalytical;
1621 #endif /* GMX_NBNXN_SIMD */
1625 const char *lookup_nbnxn_kernel_name(int kernel_type)
1627 const char *returnvalue = NULL;
1628 switch (kernel_type)
1630 case nbnxnkNotSet:
1631 returnvalue = "not set";
1632 break;
1633 case nbnxnk4x4_PlainC:
1634 returnvalue = "plain C";
1635 break;
1636 case nbnxnk4xN_SIMD_4xN:
1637 case nbnxnk4xN_SIMD_2xNN:
1638 #ifdef GMX_NBNXN_SIMD
1639 #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
1640 /* We have x86 SSE2 compatible SIMD */
1641 #ifdef GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER
1642 returnvalue = "AVX-128-FMA";
1643 #else
1644 #if defined GMX_SIMD_X86_AVX_256_OR_HIGHER || defined __AVX__
1645 /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1646 * on compiler flags. As we use nearly identical intrinsics,
1647 * compiling for AVX without an AVX macros effectively results
1648 * in AVX kernels.
1649 * For gcc we check for __AVX__
1650 * At least a check for icc should be added (if there is a macro)
1652 #if defined GMX_SIMD_X86_AVX_256_OR_HIGHER && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1653 returnvalue = "AVX-256";
1654 #else
1655 returnvalue = "AVX-128";
1656 #endif
1657 #else
1658 #ifdef GMX_SIMD_X86_SSE4_1_OR_HIGHER
1659 returnvalue = "SSE4.1";
1660 #else
1661 returnvalue = "SSE2";
1662 #endif
1663 #endif
1664 #endif
1665 #else /* GMX_SIMD_X86_SSE2_OR_HIGHER */
1666 /* not GMX_SIMD_X86_SSE2_OR_HIGHER, but other SIMD */
1667 returnvalue = "SIMD";
1668 #endif /* GMX_SIMD_X86_SSE2_OR_HIGHER */
1669 #else /* GMX_NBNXN_SIMD */
1670 returnvalue = "not available";
1671 #endif /* GMX_NBNXN_SIMD */
1672 break;
1673 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1674 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1676 case nbnxnkNR:
1677 default:
1678 gmx_fatal(FARGS, "Illegal kernel type selected");
1679 returnvalue = NULL;
1680 break;
1682 return returnvalue;
1685 static void pick_nbnxn_kernel(FILE *fp,
1686 const t_commrec *cr,
1687 gmx_bool use_simd_kernels,
1688 gmx_bool bUseGPU,
1689 gmx_bool bEmulateGPU,
1690 const t_inputrec *ir,
1691 int *kernel_type,
1692 int *ewald_excl,
1693 gmx_bool bDoNonbonded)
1695 assert(kernel_type);
1697 *kernel_type = nbnxnkNotSet;
1698 *ewald_excl = ewaldexclTable;
1700 if (bEmulateGPU)
1702 *kernel_type = nbnxnk8x8x8_PlainC;
1704 if (bDoNonbonded)
1706 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1709 else if (bUseGPU)
1711 *kernel_type = nbnxnk8x8x8_CUDA;
1714 if (*kernel_type == nbnxnkNotSet)
1716 /* LJ PME with LB combination rule does 7 mesh operations.
1717 * This so slow that we don't compile SIMD non-bonded kernels for that.
1719 if (use_simd_kernels &&
1720 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1722 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1724 else
1726 *kernel_type = nbnxnk4x4_PlainC;
1730 if (bDoNonbonded && fp != NULL)
1732 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1733 lookup_nbnxn_kernel_name(*kernel_type),
1734 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1735 nbnxn_kernel_to_cj_size(*kernel_type));
1739 static void pick_nbnxn_resources(const t_commrec *cr,
1740 const gmx_hw_info_t *hwinfo,
1741 gmx_bool bDoNonbonded,
1742 gmx_bool *bUseGPU,
1743 gmx_bool *bEmulateGPU,
1744 const gmx_gpu_opt_t *gpu_opt)
1746 gmx_bool bEmulateGPUEnvVarSet;
1747 char gpu_err_str[STRLEN];
1749 *bUseGPU = FALSE;
1751 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1753 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1754 * GPUs (currently) only handle non-bonded calculations, we will
1755 * automatically switch to emulation if non-bonded calculations are
1756 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1757 * way to turn off GPU initialization, data movement, and cleanup.
1759 * GPU emulation can be useful to assess the performance one can expect by
1760 * adding GPU(s) to the machine. The conditional below allows this even
1761 * if mdrun is compiled without GPU acceleration support.
1762 * Note that you should freezing the system as otherwise it will explode.
1764 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1765 (!bDoNonbonded &&
1766 gpu_opt->ncuda_dev_use > 0));
1768 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1770 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1772 /* Each PP node will use the intra-node id-th device from the
1773 * list of detected/selected GPUs. */
1774 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1775 &hwinfo->gpu_info, gpu_opt))
1777 /* At this point the init should never fail as we made sure that
1778 * we have all the GPUs we need. If it still does, we'll bail. */
1779 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1780 cr->nodeid,
1781 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1782 cr->rank_pp_intranode),
1783 gpu_err_str);
1786 /* Here we actually turn on hardware GPU acceleration */
1787 *bUseGPU = TRUE;
1791 gmx_bool uses_simple_tables(int cutoff_scheme,
1792 nonbonded_verlet_t *nbv,
1793 int group)
1795 gmx_bool bUsesSimpleTables = TRUE;
1796 int grp_index;
1798 switch (cutoff_scheme)
1800 case ecutsGROUP:
1801 bUsesSimpleTables = TRUE;
1802 break;
1803 case ecutsVERLET:
1804 assert(NULL != nbv && NULL != nbv->grp);
1805 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1806 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1807 break;
1808 default:
1809 gmx_incons("unimplemented");
1811 return bUsesSimpleTables;
1814 static void init_ewald_f_table(interaction_const_t *ic,
1815 gmx_bool bUsesSimpleTables,
1816 real rtab)
1818 real maxr;
1820 if (bUsesSimpleTables)
1822 /* With a spacing of 0.0005 we are at the force summation accuracy
1823 * for the SSE kernels for "normal" atomistic simulations.
1825 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1826 ic->rcoulomb);
1828 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1829 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1831 else
1833 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1834 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1835 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1838 sfree_aligned(ic->tabq_coul_FDV0);
1839 sfree_aligned(ic->tabq_coul_F);
1840 sfree_aligned(ic->tabq_coul_V);
1842 /* Create the original table data in FDV0 */
1843 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1844 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1845 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1846 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1847 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1850 void init_interaction_const_tables(FILE *fp,
1851 interaction_const_t *ic,
1852 gmx_bool bUsesSimpleTables,
1853 real rtab)
1855 real spacing;
1857 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1859 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1861 if (fp != NULL)
1863 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1864 1/ic->tabq_scale, ic->tabq_size);
1869 static void clear_force_switch_constants(shift_consts_t *sc)
1871 sc->c2 = 0;
1872 sc->c3 = 0;
1873 sc->cpot = 0;
1876 static void force_switch_constants(real p,
1877 real rsw, real rc,
1878 shift_consts_t *sc)
1880 /* Here we determine the coefficient for shifting the force to zero
1881 * between distance rsw and the cut-off rc.
1882 * For a potential of r^-p, we have force p*r^-(p+1).
1883 * But to save flops we absorb p in the coefficient.
1884 * Thus we get:
1885 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1886 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1888 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1889 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1890 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1893 static void potential_switch_constants(real rsw, real rc,
1894 switch_consts_t *sc)
1896 /* The switch function is 1 at rsw and 0 at rc.
1897 * The derivative and second derivate are zero at both ends.
1898 * rsw = max(r - r_switch, 0)
1899 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1900 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1901 * force = force*dsw - potential*sw
1902 * potential *= sw
1904 sc->c3 = -10*pow(rc - rsw, -3);
1905 sc->c4 = 15*pow(rc - rsw, -4);
1906 sc->c5 = -6*pow(rc - rsw, -5);
1909 static void
1910 init_interaction_const(FILE *fp,
1911 const t_commrec gmx_unused *cr,
1912 interaction_const_t **interaction_const,
1913 const t_forcerec *fr,
1914 real rtab)
1916 interaction_const_t *ic;
1917 gmx_bool bUsesSimpleTables = TRUE;
1919 snew(ic, 1);
1921 /* Just allocate something so we can free it */
1922 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1923 snew_aligned(ic->tabq_coul_F, 16, 32);
1924 snew_aligned(ic->tabq_coul_V, 16, 32);
1926 ic->rlist = fr->rlist;
1927 ic->rlistlong = fr->rlistlong;
1929 /* Lennard-Jones */
1930 ic->vdwtype = fr->vdwtype;
1931 ic->vdw_modifier = fr->vdw_modifier;
1932 ic->rvdw = fr->rvdw;
1933 ic->rvdw_switch = fr->rvdw_switch;
1934 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1935 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1936 ic->sh_lj_ewald = 0;
1937 clear_force_switch_constants(&ic->dispersion_shift);
1938 clear_force_switch_constants(&ic->repulsion_shift);
1940 switch (ic->vdw_modifier)
1942 case eintmodPOTSHIFT:
1943 /* Only shift the potential, don't touch the force */
1944 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
1945 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
1946 if (EVDW_PME(ic->vdwtype))
1948 real crc2;
1950 if (fr->cutoff_scheme == ecutsGROUP)
1952 gmx_fatal(FARGS, "Potential-shift is not (yet) implemented for LJ-PME with cutoff-scheme=group");
1954 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
1955 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
1957 break;
1958 case eintmodFORCESWITCH:
1959 /* Switch the force, switch and shift the potential */
1960 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1961 &ic->dispersion_shift);
1962 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1963 &ic->repulsion_shift);
1964 break;
1965 case eintmodPOTSWITCH:
1966 /* Switch the potential and force */
1967 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
1968 &ic->vdw_switch);
1969 break;
1970 case eintmodNONE:
1971 case eintmodEXACTCUTOFF:
1972 /* Nothing to do here */
1973 break;
1974 default:
1975 gmx_incons("unimplemented potential modifier");
1978 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
1980 /* Electrostatics */
1981 ic->eeltype = fr->eeltype;
1982 ic->coulomb_modifier = fr->coulomb_modifier;
1983 ic->rcoulomb = fr->rcoulomb;
1984 ic->epsilon_r = fr->epsilon_r;
1985 ic->epsfac = fr->epsfac;
1986 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
1988 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1990 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1992 else
1994 ic->sh_ewald = 0;
1997 /* Reaction-field */
1998 if (EEL_RF(ic->eeltype))
2000 ic->epsilon_rf = fr->epsilon_rf;
2001 ic->k_rf = fr->k_rf;
2002 ic->c_rf = fr->c_rf;
2004 else
2006 /* For plain cut-off we might use the reaction-field kernels */
2007 ic->epsilon_rf = ic->epsilon_r;
2008 ic->k_rf = 0;
2009 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2011 ic->c_rf = 1/ic->rcoulomb;
2013 else
2015 ic->c_rf = 0;
2019 if (fp != NULL)
2021 real dispersion_shift;
2023 dispersion_shift = ic->dispersion_shift.cpot;
2024 if (EVDW_PME(ic->vdwtype))
2026 dispersion_shift -= ic->sh_lj_ewald;
2028 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2029 ic->repulsion_shift.cpot, dispersion_shift);
2031 if (ic->eeltype == eelCUT)
2033 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2035 else if (EEL_PME(ic->eeltype))
2037 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2039 fprintf(fp, "\n");
2042 *interaction_const = ic;
2044 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2046 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2048 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2049 * also sharing texture references. To keep the code simple, we don't
2050 * treat texture references as shared resources, but this means that
2051 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2052 * Hence, to ensure that the non-bonded kernels don't start before all
2053 * texture binding operations are finished, we need to wait for all ranks
2054 * to arrive here before continuing.
2056 * Note that we could omit this barrier if GPUs are not shared (or
2057 * texture objects are used), but as this is initialization code, there
2058 * is not point in complicating things.
2060 #ifdef GMX_THREAD_MPI
2061 if (PAR(cr))
2063 gmx_barrier(cr);
2065 #endif /* GMX_THREAD_MPI */
2068 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2069 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2072 static void init_nb_verlet(FILE *fp,
2073 nonbonded_verlet_t **nb_verlet,
2074 const t_inputrec *ir,
2075 const t_forcerec *fr,
2076 const t_commrec *cr,
2077 const char *nbpu_opt)
2079 nonbonded_verlet_t *nbv;
2080 int i;
2081 char *env;
2082 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2084 nbnxn_alloc_t *nb_alloc;
2085 nbnxn_free_t *nb_free;
2087 snew(nbv, 1);
2089 pick_nbnxn_resources(cr, fr->hwinfo,
2090 fr->bNonbonded,
2091 &nbv->bUseGPU,
2092 &bEmulateGPU,
2093 fr->gpu_opt);
2095 nbv->nbs = NULL;
2097 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2098 for (i = 0; i < nbv->ngrp; i++)
2100 nbv->grp[i].nbl_lists.nnbl = 0;
2101 nbv->grp[i].nbat = NULL;
2102 nbv->grp[i].kernel_type = nbnxnkNotSet;
2104 if (i == 0) /* local */
2106 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2107 nbv->bUseGPU, bEmulateGPU, ir,
2108 &nbv->grp[i].kernel_type,
2109 &nbv->grp[i].ewald_excl,
2110 fr->bNonbonded);
2112 else /* non-local */
2114 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2116 /* Use GPU for local, select a CPU kernel for non-local */
2117 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2118 FALSE, FALSE, ir,
2119 &nbv->grp[i].kernel_type,
2120 &nbv->grp[i].ewald_excl,
2121 fr->bNonbonded);
2123 bHybridGPURun = TRUE;
2125 else
2127 /* Use the same kernel for local and non-local interactions */
2128 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2129 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2134 if (nbv->bUseGPU)
2136 /* init the NxN GPU data; the last argument tells whether we'll have
2137 * both local and non-local NB calculation on GPU */
2138 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2139 &fr->hwinfo->gpu_info, fr->gpu_opt,
2140 cr->rank_pp_intranode,
2141 (nbv->ngrp > 1) && !bHybridGPURun);
2143 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2145 char *end;
2147 nbv->min_ci_balanced = strtol(env, &end, 10);
2148 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2150 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2153 if (debug)
2155 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2156 nbv->min_ci_balanced);
2159 else
2161 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2162 if (debug)
2164 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2165 nbv->min_ci_balanced);
2169 else
2171 nbv->min_ci_balanced = 0;
2174 *nb_verlet = nbv;
2176 nbnxn_init_search(&nbv->nbs,
2177 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2178 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2179 gmx_omp_nthreads_get(emntNonbonded));
2181 for (i = 0; i < nbv->ngrp; i++)
2183 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2185 nb_alloc = &pmalloc;
2186 nb_free = &pfree;
2188 else
2190 nb_alloc = NULL;
2191 nb_free = NULL;
2194 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2195 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2196 /* 8x8x8 "non-simple" lists are ATM always combined */
2197 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2198 nb_alloc, nb_free);
2200 if (i == 0 ||
2201 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2203 gmx_bool bSimpleList;
2204 int enbnxninitcombrule;
2206 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2208 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2210 /* Plain LJ cut-off: we can optimize with combination rules */
2211 enbnxninitcombrule = enbnxninitcombruleDETECT;
2213 else if (fr->vdwtype == evdwPME)
2215 /* LJ-PME: we need to use a combination rule for the grid */
2216 if (fr->ljpme_combination_rule == eljpmeGEOM)
2218 enbnxninitcombrule = enbnxninitcombruleGEOM;
2220 else
2222 enbnxninitcombrule = enbnxninitcombruleLB;
2225 else
2227 /* We use a full combination matrix: no rule required */
2228 enbnxninitcombrule = enbnxninitcombruleNONE;
2232 snew(nbv->grp[i].nbat, 1);
2233 nbnxn_atomdata_init(fp,
2234 nbv->grp[i].nbat,
2235 nbv->grp[i].kernel_type,
2236 enbnxninitcombrule,
2237 fr->ntype, fr->nbfp,
2238 ir->opts.ngener,
2239 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2240 nb_alloc, nb_free);
2242 else
2244 nbv->grp[i].nbat = nbv->grp[0].nbat;
2249 void init_forcerec(FILE *fp,
2250 const output_env_t oenv,
2251 t_forcerec *fr,
2252 t_fcdata *fcd,
2253 const t_inputrec *ir,
2254 const gmx_mtop_t *mtop,
2255 const t_commrec *cr,
2256 matrix box,
2257 const char *tabfn,
2258 const char *tabafn,
2259 const char *tabpfn,
2260 const char *tabbfn,
2261 const char *nbpu_opt,
2262 gmx_bool bNoSolvOpt,
2263 real print_force)
2265 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2266 real rtab;
2267 char *env;
2268 double dbl;
2269 const t_block *cgs;
2270 gmx_bool bGenericKernelOnly;
2271 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2272 t_nblists *nbl;
2273 int *nm_ind, egp_flags;
2275 if (fr->hwinfo == NULL)
2277 /* Detect hardware, gather information.
2278 * In mdrun, hwinfo has already been set before calling init_forcerec.
2279 * Here we ignore GPUs, as tools will not use them anyhow.
2281 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2284 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2285 fr->use_simd_kernels = TRUE;
2287 fr->bDomDec = DOMAINDECOMP(cr);
2289 natoms = mtop->natoms;
2291 if (check_box(ir->ePBC, box))
2293 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2296 /* Test particle insertion ? */
2297 if (EI_TPI(ir->eI))
2299 /* Set to the size of the molecule to be inserted (the last one) */
2300 /* Because of old style topologies, we have to use the last cg
2301 * instead of the last molecule type.
2303 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2304 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2305 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2307 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2310 else
2312 fr->n_tpi = 0;
2315 /* Copy AdResS parameters */
2316 if (ir->bAdress)
2318 fr->adress_type = ir->adress->type;
2319 fr->adress_const_wf = ir->adress->const_wf;
2320 fr->adress_ex_width = ir->adress->ex_width;
2321 fr->adress_hy_width = ir->adress->hy_width;
2322 fr->adress_icor = ir->adress->icor;
2323 fr->adress_site = ir->adress->site;
2324 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2325 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2328 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2329 for (i = 0; i < ir->adress->n_energy_grps; i++)
2331 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2334 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2335 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2336 for (i = 0; i < fr->n_adress_tf_grps; i++)
2338 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2340 copy_rvec(ir->adress->refs, fr->adress_refs);
2342 else
2344 fr->adress_type = eAdressOff;
2345 fr->adress_do_hybridpairs = FALSE;
2348 /* Copy the user determined parameters */
2349 fr->userint1 = ir->userint1;
2350 fr->userint2 = ir->userint2;
2351 fr->userint3 = ir->userint3;
2352 fr->userint4 = ir->userint4;
2353 fr->userreal1 = ir->userreal1;
2354 fr->userreal2 = ir->userreal2;
2355 fr->userreal3 = ir->userreal3;
2356 fr->userreal4 = ir->userreal4;
2358 /* Shell stuff */
2359 fr->fc_stepsize = ir->fc_stepsize;
2361 /* Free energy */
2362 fr->efep = ir->efep;
2363 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2364 if (ir->fepvals->bScCoul)
2366 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2367 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2369 else
2371 fr->sc_alphacoul = 0;
2372 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2374 fr->sc_power = ir->fepvals->sc_power;
2375 fr->sc_r_power = ir->fepvals->sc_r_power;
2376 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2378 env = getenv("GMX_SCSIGMA_MIN");
2379 if (env != NULL)
2381 dbl = 0;
2382 sscanf(env, "%lf", &dbl);
2383 fr->sc_sigma6_min = pow(dbl, 6);
2384 if (fp)
2386 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2390 fr->bNonbonded = TRUE;
2391 if (getenv("GMX_NO_NONBONDED") != NULL)
2393 /* turn off non-bonded calculations */
2394 fr->bNonbonded = FALSE;
2395 md_print_warn(cr, fp,
2396 "Found environment variable GMX_NO_NONBONDED.\n"
2397 "Disabling nonbonded calculations.\n");
2400 bGenericKernelOnly = FALSE;
2402 /* We now check in the NS code whether a particular combination of interactions
2403 * can be used with water optimization, and disable it if that is not the case.
2406 if (getenv("GMX_NB_GENERIC") != NULL)
2408 if (fp != NULL)
2410 fprintf(fp,
2411 "Found environment variable GMX_NB_GENERIC.\n"
2412 "Disabling all interaction-specific nonbonded kernels, will only\n"
2413 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2415 bGenericKernelOnly = TRUE;
2418 if (bGenericKernelOnly == TRUE)
2420 bNoSolvOpt = TRUE;
2423 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2425 fr->use_simd_kernels = FALSE;
2426 if (fp != NULL)
2428 fprintf(fp,
2429 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2430 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2434 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2436 /* Check if we can/should do all-vs-all kernels */
2437 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2438 fr->AllvsAll_work = NULL;
2439 fr->AllvsAll_workgb = NULL;
2441 /* All-vs-all kernels have not been implemented in 4.6, and
2442 * the SIMD group kernels are also buggy in this case. Non-SIMD
2443 * group kernels are OK. See Redmine #1249. */
2444 if (fr->bAllvsAll)
2446 fr->bAllvsAll = FALSE;
2447 fr->use_simd_kernels = FALSE;
2448 if (fp != NULL)
2450 fprintf(fp,
2451 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2452 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2453 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2454 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2455 "we are proceeding by disabling all CPU architecture-specific\n"
2456 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2457 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2461 /* Neighbour searching stuff */
2462 fr->cutoff_scheme = ir->cutoff_scheme;
2463 fr->bGrid = (ir->ns_type == ensGRID);
2464 fr->ePBC = ir->ePBC;
2466 /* Determine if we will do PBC for distances in bonded interactions */
2467 if (fr->ePBC == epbcNONE)
2469 fr->bMolPBC = FALSE;
2471 else
2473 if (!DOMAINDECOMP(cr))
2475 /* The group cut-off scheme and SHAKE assume charge groups
2476 * are whole, but not using molpbc is faster in most cases.
2478 if (fr->cutoff_scheme == ecutsGROUP ||
2479 (ir->eConstrAlg == econtSHAKE &&
2480 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2481 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2483 fr->bMolPBC = ir->bPeriodicMols;
2485 else
2487 fr->bMolPBC = TRUE;
2488 if (getenv("GMX_USE_GRAPH") != NULL)
2490 fr->bMolPBC = FALSE;
2491 if (fp)
2493 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2498 else
2500 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2503 fr->bGB = (ir->implicit_solvent == eisGBSA);
2505 fr->rc_scaling = ir->refcoord_scaling;
2506 copy_rvec(ir->posres_com, fr->posres_com);
2507 copy_rvec(ir->posres_comB, fr->posres_comB);
2508 fr->rlist = cutoff_inf(ir->rlist);
2509 fr->rlistlong = cutoff_inf(ir->rlistlong);
2510 fr->eeltype = ir->coulombtype;
2511 fr->vdwtype = ir->vdwtype;
2512 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2514 fr->coulomb_modifier = ir->coulomb_modifier;
2515 fr->vdw_modifier = ir->vdw_modifier;
2517 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2518 switch (fr->eeltype)
2520 case eelCUT:
2521 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2522 break;
2524 case eelRF:
2525 case eelGRF:
2526 case eelRF_NEC:
2527 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2528 break;
2530 case eelRF_ZERO:
2531 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2532 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2533 break;
2535 case eelSWITCH:
2536 case eelSHIFT:
2537 case eelUSER:
2538 case eelENCADSHIFT:
2539 case eelPMESWITCH:
2540 case eelPMEUSER:
2541 case eelPMEUSERSWITCH:
2542 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2543 break;
2545 case eelPME:
2546 case eelEWALD:
2547 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2548 break;
2550 default:
2551 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2552 break;
2555 /* Vdw: Translate from mdp settings to kernel format */
2556 switch (fr->vdwtype)
2558 case evdwCUT:
2559 case evdwPME:
2560 if (fr->bBHAM)
2562 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2564 else
2566 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2568 break;
2570 case evdwSWITCH:
2571 case evdwSHIFT:
2572 case evdwUSER:
2573 case evdwENCADSHIFT:
2574 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2575 break;
2577 default:
2578 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2579 break;
2582 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2583 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2584 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2586 fr->bTwinRange = fr->rlistlong > fr->rlist;
2587 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2589 fr->reppow = mtop->ffparams.reppow;
2591 if (ir->cutoff_scheme == ecutsGROUP)
2593 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2594 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2595 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2596 fr->bcoultab = !(fr->eeltype == eelCUT ||
2597 fr->eeltype == eelEWALD ||
2598 fr->eeltype == eelPME ||
2599 fr->eeltype == eelRF ||
2600 fr->eeltype == eelRF_ZERO);
2602 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2603 * going to be faster to tabulate the interaction than calling the generic kernel.
2605 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2607 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2609 fr->bcoultab = TRUE;
2612 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2613 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2614 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2615 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2617 if (fr->rcoulomb != fr->rvdw)
2619 fr->bcoultab = TRUE;
2623 if (getenv("GMX_REQUIRE_TABLES"))
2625 fr->bvdwtab = TRUE;
2626 fr->bcoultab = TRUE;
2629 if (fp)
2631 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2632 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2635 if (fr->bvdwtab == TRUE)
2637 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2638 fr->nbkernel_vdw_modifier = eintmodNONE;
2640 if (fr->bcoultab == TRUE)
2642 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2643 fr->nbkernel_elec_modifier = eintmodNONE;
2647 if (ir->cutoff_scheme == ecutsVERLET)
2649 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2651 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2653 fr->bvdwtab = FALSE;
2654 fr->bcoultab = FALSE;
2657 /* Tables are used for direct ewald sum */
2658 if (fr->bEwald)
2660 if (EEL_PME(ir->coulombtype))
2662 if (fp)
2664 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2666 if (ir->coulombtype == eelP3M_AD)
2668 please_cite(fp, "Hockney1988");
2669 please_cite(fp, "Ballenegger2012");
2671 else
2673 please_cite(fp, "Essmann95a");
2676 if (ir->ewald_geometry == eewg3DC)
2678 if (fp)
2680 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2682 please_cite(fp, "In-Chul99a");
2685 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2686 init_ewald_tab(&(fr->ewald_table), ir, fp);
2687 if (fp)
2689 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2690 1/fr->ewaldcoeff_q);
2694 if (EVDW_PME(ir->vdwtype))
2696 if (fp)
2698 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2700 please_cite(fp, "Essmann95a");
2701 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2702 if (fp)
2704 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2705 1/fr->ewaldcoeff_lj);
2709 /* Electrostatics */
2710 fr->epsilon_r = ir->epsilon_r;
2711 fr->epsilon_rf = ir->epsilon_rf;
2712 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2713 fr->rcoulomb_switch = ir->rcoulomb_switch;
2714 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2716 /* Parameters for generalized RF */
2717 fr->zsquare = 0.0;
2718 fr->temp = 0.0;
2720 if (fr->eeltype == eelGRF)
2722 init_generalized_rf(fp, mtop, ir, fr);
2725 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2726 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2727 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2728 IR_ELEC_FIELD(*ir) ||
2729 (fr->adress_icor != eAdressICOff)
2732 if (fr->cutoff_scheme == ecutsGROUP &&
2733 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2735 /* Count the total number of charge groups */
2736 fr->cg_nalloc = ncg_mtop(mtop);
2737 srenew(fr->cg_cm, fr->cg_nalloc);
2739 if (fr->shift_vec == NULL)
2741 snew(fr->shift_vec, SHIFTS);
2744 if (fr->fshift == NULL)
2746 snew(fr->fshift, SHIFTS);
2749 if (fr->nbfp == NULL)
2751 fr->ntype = mtop->ffparams.atnr;
2752 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2755 /* Copy the energy group exclusions */
2756 fr->egp_flags = ir->opts.egp_flags;
2758 /* Van der Waals stuff */
2759 fr->rvdw = cutoff_inf(ir->rvdw);
2760 fr->rvdw_switch = ir->rvdw_switch;
2761 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2763 if (fr->rvdw_switch >= fr->rvdw)
2765 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2766 fr->rvdw_switch, fr->rvdw);
2768 if (fp)
2770 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2771 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2772 fr->rvdw_switch, fr->rvdw);
2776 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2778 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2781 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2783 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2786 if (fp)
2788 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2789 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2792 fr->eDispCorr = ir->eDispCorr;
2793 if (ir->eDispCorr != edispcNO)
2795 set_avcsixtwelve(fp, fr, mtop);
2798 if (fr->bBHAM)
2800 set_bham_b_max(fp, fr, mtop);
2803 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2805 /* Copy the GBSA data (radius, volume and surftens for each
2806 * atomtype) from the topology atomtype section to forcerec.
2808 snew(fr->atype_radius, fr->ntype);
2809 snew(fr->atype_vol, fr->ntype);
2810 snew(fr->atype_surftens, fr->ntype);
2811 snew(fr->atype_gb_radius, fr->ntype);
2812 snew(fr->atype_S_hct, fr->ntype);
2814 if (mtop->atomtypes.nr > 0)
2816 for (i = 0; i < fr->ntype; i++)
2818 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2820 for (i = 0; i < fr->ntype; i++)
2822 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2824 for (i = 0; i < fr->ntype; i++)
2826 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2828 for (i = 0; i < fr->ntype; i++)
2830 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2832 for (i = 0; i < fr->ntype; i++)
2834 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2838 /* Generate the GB table if needed */
2839 if (fr->bGB)
2841 #ifdef GMX_DOUBLE
2842 fr->gbtabscale = 2000;
2843 #else
2844 fr->gbtabscale = 500;
2845 #endif
2847 fr->gbtabr = 100;
2848 fr->gbtab = make_gb_table(oenv, fr);
2850 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2852 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2853 if (!DOMAINDECOMP(cr))
2855 make_local_gb(cr, fr->born, ir->gb_algorithm);
2859 /* Set the charge scaling */
2860 if (fr->epsilon_r != 0)
2862 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2864 else
2866 /* eps = 0 is infinite dieletric: no coulomb interactions */
2867 fr->epsfac = 0;
2870 /* Reaction field constants */
2871 if (EEL_RF(fr->eeltype))
2873 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2874 fr->rcoulomb, fr->temp, fr->zsquare, box,
2875 &fr->kappa, &fr->k_rf, &fr->c_rf);
2878 /*This now calculates sum for q and c6*/
2879 set_chargesum(fp, fr, mtop);
2881 /* if we are using LR electrostatics, and they are tabulated,
2882 * the tables will contain modified coulomb interactions.
2883 * Since we want to use the non-shifted ones for 1-4
2884 * coulombic interactions, we must have an extra set of tables.
2887 /* Construct tables.
2888 * A little unnecessary to make both vdw and coul tables sometimes,
2889 * but what the heck... */
2891 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2892 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2894 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2895 fr->bBHAM || fr->bEwald) &&
2896 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2897 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2898 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2900 negp_pp = ir->opts.ngener - ir->nwall;
2901 negptable = 0;
2902 if (!bMakeTables)
2904 bSomeNormalNbListsAreInUse = TRUE;
2905 fr->nnblists = 1;
2907 else
2909 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2910 for (egi = 0; egi < negp_pp; egi++)
2912 for (egj = egi; egj < negp_pp; egj++)
2914 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2915 if (!(egp_flags & EGP_EXCL))
2917 if (egp_flags & EGP_TABLE)
2919 negptable++;
2921 else
2923 bSomeNormalNbListsAreInUse = TRUE;
2928 if (bSomeNormalNbListsAreInUse)
2930 fr->nnblists = negptable + 1;
2932 else
2934 fr->nnblists = negptable;
2936 if (fr->nnblists > 1)
2938 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2942 if (ir->adress)
2944 fr->nnblists *= 2;
2947 snew(fr->nblists, fr->nnblists);
2949 /* This code automatically gives table length tabext without cut-off's,
2950 * in that case grompp should already have checked that we do not need
2951 * normal tables and we only generate tables for 1-4 interactions.
2953 rtab = ir->rlistlong + ir->tabext;
2955 if (bMakeTables)
2957 /* make tables for ordinary interactions */
2958 if (bSomeNormalNbListsAreInUse)
2960 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2961 if (ir->adress)
2963 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2965 if (!bMakeSeparate14Table)
2967 fr->tab14 = fr->nblists[0].table_elec_vdw;
2969 m = 1;
2971 else
2973 m = 0;
2975 if (negptable > 0)
2977 /* Read the special tables for certain energy group pairs */
2978 nm_ind = mtop->groups.grps[egcENER].nm_ind;
2979 for (egi = 0; egi < negp_pp; egi++)
2981 for (egj = egi; egj < negp_pp; egj++)
2983 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2984 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2986 nbl = &(fr->nblists[m]);
2987 if (fr->nnblists > 1)
2989 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2991 /* Read the table file with the two energy groups names appended */
2992 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2993 *mtop->groups.grpname[nm_ind[egi]],
2994 *mtop->groups.grpname[nm_ind[egj]],
2995 &fr->nblists[m]);
2996 if (ir->adress)
2998 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2999 *mtop->groups.grpname[nm_ind[egi]],
3000 *mtop->groups.grpname[nm_ind[egj]],
3001 &fr->nblists[fr->nnblists/2+m]);
3003 m++;
3005 else if (fr->nnblists > 1)
3007 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3013 if (bMakeSeparate14Table)
3015 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3016 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3017 GMX_MAKETABLES_14ONLY);
3020 /* Read AdResS Thermo Force table if needed */
3021 if (fr->adress_icor == eAdressICThermoForce)
3023 /* old todo replace */
3025 if (ir->adress->n_tf_grps > 0)
3027 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3030 else
3032 /* load the default table */
3033 snew(fr->atf_tabs, 1);
3034 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3038 /* Wall stuff */
3039 fr->nwall = ir->nwall;
3040 if (ir->nwall && ir->wall_type == ewtTABLE)
3042 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3045 if (fcd && tabbfn)
3047 fcd->bondtab = make_bonded_tables(fp,
3048 F_TABBONDS, F_TABBONDSNC,
3049 mtop, tabbfn, "b");
3050 fcd->angletab = make_bonded_tables(fp,
3051 F_TABANGLES, -1,
3052 mtop, tabbfn, "a");
3053 fcd->dihtab = make_bonded_tables(fp,
3054 F_TABDIHS, -1,
3055 mtop, tabbfn, "d");
3057 else
3059 if (debug)
3061 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3065 /* QM/MM initialization if requested
3067 if (ir->bQMMM)
3069 fprintf(stderr, "QM/MM calculation requested.\n");
3072 fr->bQMMM = ir->bQMMM;
3073 fr->qr = mk_QMMMrec();
3075 /* Set all the static charge group info */
3076 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3077 &fr->bExcl_IntraCGAll_InterCGNone);
3078 if (DOMAINDECOMP(cr))
3080 fr->cginfo = NULL;
3082 else
3084 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3087 if (!DOMAINDECOMP(cr))
3089 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3090 mtop->natoms, mtop->natoms, mtop->natoms);
3093 fr->print_force = print_force;
3096 /* coarse load balancing vars */
3097 fr->t_fnbf = 0.;
3098 fr->t_wait = 0.;
3099 fr->timesteps = 0;
3101 /* Initialize neighbor search */
3102 init_ns(fp, cr, &fr->ns, fr, mtop);
3104 if (cr->duty & DUTY_PP)
3106 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3108 if (ir->bAdress)
3110 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3115 /* Initialize the thread working data for bonded interactions */
3116 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3118 snew(fr->excl_load, fr->nthreads+1);
3120 if (fr->cutoff_scheme == ecutsVERLET)
3122 if (ir->rcoulomb != ir->rvdw)
3124 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3127 init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
3130 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3131 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3133 if (ir->eDispCorr != edispcNO)
3135 calc_enervirdiff(fp, ir->eDispCorr, fr);
3139 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3140 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3141 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3143 void pr_forcerec(FILE *fp, t_forcerec *fr)
3145 int i;
3147 pr_real(fp, fr->rlist);
3148 pr_real(fp, fr->rcoulomb);
3149 pr_real(fp, fr->fudgeQQ);
3150 pr_bool(fp, fr->bGrid);
3151 pr_bool(fp, fr->bTwinRange);
3152 /*pr_int(fp,fr->cg0);
3153 pr_int(fp,fr->hcg);*/
3154 for (i = 0; i < fr->nnblists; i++)
3156 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3158 pr_real(fp, fr->rcoulomb_switch);
3159 pr_real(fp, fr->rcoulomb);
3161 fflush(fp);
3164 void forcerec_set_excl_load(t_forcerec *fr,
3165 const gmx_localtop_t *top)
3167 const int *ind, *a;
3168 int t, i, j, ntot, n, ntarget;
3170 ind = top->excls.index;
3171 a = top->excls.a;
3173 ntot = 0;
3174 for (i = 0; i < top->excls.nr; i++)
3176 for (j = ind[i]; j < ind[i+1]; j++)
3178 if (a[j] > i)
3180 ntot++;
3185 fr->excl_load[0] = 0;
3186 n = 0;
3187 i = 0;
3188 for (t = 1; t <= fr->nthreads; t++)
3190 ntarget = (ntot*t)/fr->nthreads;
3191 while (i < top->excls.nr && n < ntarget)
3193 for (j = ind[i]; j < ind[i+1]; j++)
3195 if (a[j] > i)
3197 n++;
3200 i++;
3202 fr->excl_load[t] = i;