added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / src / kernel / runner.c
blob9cab66fb00c6fa933087cf797ad29e71f44815e0
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #ifdef __linux
40 #define _GNU_SOURCE
41 #include <sched.h>
42 #include <sys/syscall.h>
43 #endif
44 #include <signal.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <assert.h>
49 #include "typedefs.h"
50 #include "smalloc.h"
51 #include "sysstuff.h"
52 #include "statutil.h"
53 #include "mdrun.h"
54 #include "md_logging.h"
55 #include "md_support.h"
56 #include "network.h"
57 #include "pull.h"
58 #include "names.h"
59 #include "disre.h"
60 #include "orires.h"
61 #include "pme.h"
62 #include "mdatoms.h"
63 #include "repl_ex.h"
64 #include "qmmm.h"
65 #include "mpelogging.h"
66 #include "domdec.h"
67 #include "partdec.h"
68 #include "coulomb.h"
69 #include "constr.h"
70 #include "mvdata.h"
71 #include "checkpoint.h"
72 #include "mtop_util.h"
73 #include "sighandler.h"
74 #include "tpxio.h"
75 #include "txtdump.h"
76 #include "gmx_detect_hardware.h"
77 #include "gmx_omp_nthreads.h"
78 #include "pull_rotation.h"
79 #include "calc_verletbuf.h"
80 #include "nbnxn_search.h"
81 #include "../mdlib/nbnxn_consts.h"
82 #include "gmx_fatal_collective.h"
83 #include "membed.h"
84 #include "md_openmm.h"
85 #include "gmx_omp.h"
87 #ifdef GMX_LIB_MPI
88 #include <mpi.h>
89 #endif
90 #ifdef GMX_THREAD_MPI
91 #include "tmpi.h"
92 #endif
94 #ifdef GMX_FAHCORE
95 #include "corewrap.h"
96 #endif
98 #ifdef GMX_OPENMM
99 #include "md_openmm.h"
100 #endif
102 #include "gpu_utils.h"
103 #include "nbnxn_cuda_data_mgmt.h"
105 typedef struct {
106 gmx_integrator_t *func;
107 } gmx_intp_t;
109 /* The array should match the eI array in include/types/enums.h */
110 #ifdef GMX_OPENMM /* FIXME do_md_openmm needs fixing */
111 const gmx_intp_t integrator[eiNR] = { {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm},{do_md_openmm}};
112 #else
113 const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
114 #endif
116 gmx_large_int_t deform_init_init_step_tpx;
117 matrix deform_init_box_tpx;
118 #ifdef GMX_THREAD_MPI
119 tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
120 #endif
123 #ifdef GMX_THREAD_MPI
124 struct mdrunner_arglist
126 gmx_hw_opt_t *hw_opt;
127 FILE *fplog;
128 t_commrec *cr;
129 int nfile;
130 const t_filenm *fnm;
131 output_env_t oenv;
132 gmx_bool bVerbose;
133 gmx_bool bCompact;
134 int nstglobalcomm;
135 ivec ddxyz;
136 int dd_node_order;
137 real rdd;
138 real rconstr;
139 const char *dddlb_opt;
140 real dlb_scale;
141 const char *ddcsx;
142 const char *ddcsy;
143 const char *ddcsz;
144 const char *nbpu_opt;
145 int nsteps_cmdline;
146 int nstepout;
147 int resetstep;
148 int nmultisim;
149 int repl_ex_nst;
150 int repl_ex_nex;
151 int repl_ex_seed;
152 real pforce;
153 real cpt_period;
154 real max_hours;
155 const char *deviceOptions;
156 unsigned long Flags;
157 int ret; /* return value */
161 /* The function used for spawning threads. Extracts the mdrunner()
162 arguments from its one argument and calls mdrunner(), after making
163 a commrec. */
164 static void mdrunner_start_fn(void *arg)
166 struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
167 struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure
168 that it's thread-local. This doesn't
169 copy pointed-to items, of course,
170 but those are all const. */
171 t_commrec *cr; /* we need a local version of this */
172 FILE *fplog=NULL;
173 t_filenm *fnm;
175 fnm = dup_tfn(mc.nfile, mc.fnm);
177 cr = init_par_threads(mc.cr);
179 if (MASTER(cr))
181 fplog=mc.fplog;
184 mda->ret=mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
185 mc.bVerbose, mc.bCompact, mc.nstglobalcomm,
186 mc.ddxyz, mc.dd_node_order, mc.rdd,
187 mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
188 mc.ddcsx, mc.ddcsy, mc.ddcsz,
189 mc.nbpu_opt,
190 mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
191 mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
192 mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
195 /* called by mdrunner() to start a specific number of threads (including
196 the main thread) for thread-parallel runs. This in turn calls mdrunner()
197 for each thread.
198 All options besides nthreads are the same as for mdrunner(). */
199 static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
200 FILE *fplog,t_commrec *cr,int nfile,
201 const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
202 gmx_bool bCompact, int nstglobalcomm,
203 ivec ddxyz,int dd_node_order,real rdd,real rconstr,
204 const char *dddlb_opt,real dlb_scale,
205 const char *ddcsx,const char *ddcsy,const char *ddcsz,
206 const char *nbpu_opt,
207 int nsteps_cmdline, int nstepout,int resetstep,
208 int nmultisim,int repl_ex_nst,int repl_ex_nex, int repl_ex_seed,
209 real pforce,real cpt_period, real max_hours,
210 const char *deviceOptions, unsigned long Flags)
212 int ret;
213 struct mdrunner_arglist *mda;
214 t_commrec *crn; /* the new commrec */
215 t_filenm *fnmn;
217 /* first check whether we even need to start tMPI */
218 if (hw_opt->nthreads_tmpi < 2)
220 return cr;
223 /* a few small, one-time, almost unavoidable memory leaks: */
224 snew(mda,1);
225 fnmn=dup_tfn(nfile, fnm);
227 /* fill the data structure to pass as void pointer to thread start fn */
228 mda->hw_opt=hw_opt;
229 mda->fplog=fplog;
230 mda->cr=cr;
231 mda->nfile=nfile;
232 mda->fnm=fnmn;
233 mda->oenv=oenv;
234 mda->bVerbose=bVerbose;
235 mda->bCompact=bCompact;
236 mda->nstglobalcomm=nstglobalcomm;
237 mda->ddxyz[XX]=ddxyz[XX];
238 mda->ddxyz[YY]=ddxyz[YY];
239 mda->ddxyz[ZZ]=ddxyz[ZZ];
240 mda->dd_node_order=dd_node_order;
241 mda->rdd=rdd;
242 mda->rconstr=rconstr;
243 mda->dddlb_opt=dddlb_opt;
244 mda->dlb_scale=dlb_scale;
245 mda->ddcsx=ddcsx;
246 mda->ddcsy=ddcsy;
247 mda->ddcsz=ddcsz;
248 mda->nbpu_opt=nbpu_opt;
249 mda->nsteps_cmdline=nsteps_cmdline;
250 mda->nstepout=nstepout;
251 mda->resetstep=resetstep;
252 mda->nmultisim=nmultisim;
253 mda->repl_ex_nst=repl_ex_nst;
254 mda->repl_ex_nex=repl_ex_nex;
255 mda->repl_ex_seed=repl_ex_seed;
256 mda->pforce=pforce;
257 mda->cpt_period=cpt_period;
258 mda->max_hours=max_hours;
259 mda->deviceOptions=deviceOptions;
260 mda->Flags=Flags;
262 fprintf(stderr, "Starting %d tMPI threads\n",hw_opt->nthreads_tmpi);
263 fflush(stderr);
264 /* now spawn new threads that start mdrunner_start_fn(), while
265 the main thread returns */
266 ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi,
267 mdrunner_start_fn, (void*)(mda) );
268 if (ret!=TMPI_SUCCESS)
269 return NULL;
271 /* make a new comm_rec to reflect the new situation */
272 crn=init_par_threads(cr);
273 return crn;
277 static int get_tmpi_omp_thread_distribution(const gmx_hw_opt_t *hw_opt,
278 int nthreads_tot,
279 int ngpu)
281 int nthreads_tmpi;
283 /* There are no separate PME nodes here, as we ensured in
284 * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes
285 * and a conditional ensures we would not have ended up here.
286 * Note that separate PME nodes might be switched on later.
288 if (ngpu > 0)
290 nthreads_tmpi = ngpu;
291 if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)
293 nthreads_tmpi = nthreads_tot;
296 else if (hw_opt->nthreads_omp > 0)
298 if (hw_opt->nthreads_omp > nthreads_tot)
300 gmx_fatal(FARGS,"More OpenMP threads requested (%d) than the total number of threads requested (%d)",hw_opt->nthreads_omp,nthreads_tot);
302 nthreads_tmpi = nthreads_tot/hw_opt->nthreads_omp;
304 else
306 /* TODO choose nthreads_omp based on hardware topology
307 when we have a hardware topology detection library */
308 /* Don't use OpenMP parallelization */
309 nthreads_tmpi = nthreads_tot;
312 return nthreads_tmpi;
316 /* Get the number of threads to use for thread-MPI based on how many
317 * were requested, which algorithms we're using,
318 * and how many particles there are.
319 * At the point we have already called check_and_update_hw_opt.
320 * Thus all options should be internally consistent and consistent
321 * with the hardware, except that ntmpi could be larger than #GPU.
323 static int get_nthreads_mpi(gmx_hw_info_t *hwinfo,
324 gmx_hw_opt_t *hw_opt,
325 t_inputrec *inputrec, gmx_mtop_t *mtop,
326 const t_commrec *cr,
327 FILE *fplog)
329 int nthreads_tot_max,nthreads_tmpi,nthreads_new,ngpu;
330 int min_atoms_per_mpi_thread;
331 char *env;
332 char sbuf[STRLEN];
333 gmx_bool bCanUseGPU;
335 if (hw_opt->nthreads_tmpi > 0)
337 /* Trivial, return right away */
338 return hw_opt->nthreads_tmpi;
341 /* How many total (#tMPI*#OpenMP) threads can we start? */
342 if (hw_opt->nthreads_tot > 0)
344 nthreads_tot_max = hw_opt->nthreads_tot;
346 else
348 nthreads_tot_max = tMPI_Thread_get_hw_number();
351 bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU);
352 if (bCanUseGPU)
354 ngpu = hwinfo->gpu_info.ncuda_dev_use;
356 else
358 ngpu = 0;
361 nthreads_tmpi =
362 get_tmpi_omp_thread_distribution(hw_opt,nthreads_tot_max,ngpu);
364 if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
366 /* Steps are divided over the nodes iso splitting the atoms */
367 min_atoms_per_mpi_thread = 0;
369 else
371 if (bCanUseGPU)
373 min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU;
375 else
377 min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD;
381 /* Check if an algorithm does not support parallel simulation. */
382 if (nthreads_tmpi != 1 &&
383 ( inputrec->eI == eiLBFGS ||
384 inputrec->coulombtype == eelEWALD ) )
386 nthreads_tmpi = 1;
388 md_print_warn(cr,fplog,"The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
389 if (hw_opt->nthreads_tmpi > nthreads_tmpi)
391 gmx_fatal(FARGS,"You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
394 else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)
396 /* the thread number was chosen automatically, but there are too many
397 threads (too few atoms per thread) */
398 nthreads_new = max(1,mtop->natoms/min_atoms_per_mpi_thread);
400 if (nthreads_new > 8 || (nthreads_tmpi == 8 && nthreads_new > 4))
402 /* TODO replace this once we have proper HT detection
403 * Use only multiples of 4 above 8 threads
404 * or with an 8-core processor
405 * (to avoid 6 threads on 8 core processors with 4 real cores).
407 nthreads_new = (nthreads_new/4)*4;
409 else if (nthreads_new > 4)
411 /* Avoid 5 or 7 threads */
412 nthreads_new = (nthreads_new/2)*2;
415 nthreads_tmpi = nthreads_new;
417 fprintf(stderr,"\n");
418 fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
419 fprintf(stderr," only starting %d thread-MPI threads.\n",nthreads_tmpi);
420 fprintf(stderr," You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");
423 return nthreads_tmpi;
425 #endif /* GMX_THREAD_MPI */
428 /* Environment variable for setting nstlist */
429 static const char* NSTLIST_ENVVAR = "GMX_NSTLIST";
430 /* Try to increase nstlist when using a GPU with nstlist less than this */
431 static const int NSTLIST_GPU_ENOUGH = 20;
432 /* Increase nstlist until the non-bonded cost increases more than this factor */
433 static const float NBNXN_GPU_LIST_OK_FAC = 1.25;
434 /* Don't increase nstlist beyond a non-bonded cost increases of this factor */
435 static const float NBNXN_GPU_LIST_MAX_FAC = 1.40;
437 /* Try to increase nstlist when running on a GPU */
438 static void increase_nstlist(FILE *fp,t_commrec *cr,
439 t_inputrec *ir,const gmx_mtop_t *mtop,matrix box)
441 char *env;
442 int nstlist_orig,nstlist_prev;
443 verletbuf_list_setup_t ls;
444 real rlist_inc,rlist_ok,rlist_max,rlist_new,rlist_prev;
445 int i;
446 t_state state_tmp;
447 gmx_bool bBox,bDD,bCont;
448 const char *nstl_fmt="\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
449 const char *vbd_err="Can not increase nstlist for GPU run because verlet-buffer-drift is not set or used";
450 const char *box_err="Can not increase nstlist for GPU run because the box is too small";
451 const char *dd_err ="Can not increase nstlist for GPU run because of domain decomposition limitations";
452 char buf[STRLEN];
454 /* Number of + nstlist alternative values to try when switching */
455 const int nstl[]={ 20, 25, 40, 50 };
456 #define NNSTL sizeof(nstl)/sizeof(nstl[0])
458 env = getenv(NSTLIST_ENVVAR);
459 if (env == NULL)
461 if (fp != NULL)
463 fprintf(fp,nstl_fmt,ir->nstlist);
467 if (ir->verletbuf_drift == 0)
469 gmx_fatal(FARGS,"You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
472 if (ir->verletbuf_drift < 0)
474 if (MASTER(cr))
476 fprintf(stderr,"%s\n",vbd_err);
478 if (fp != NULL)
480 fprintf(fp,"%s\n",vbd_err);
483 return;
486 nstlist_orig = ir->nstlist;
487 if (env != NULL)
489 sprintf(buf,"Getting nstlist from environment variable GMX_NSTLIST=%s",env);
490 if (MASTER(cr))
492 fprintf(stderr,"%s\n",buf);
494 if (fp != NULL)
496 fprintf(fp,"%s\n",buf);
498 sscanf(env,"%d",&ir->nstlist);
501 verletbuf_get_list_setup(TRUE,&ls);
503 /* Allow rlist to make the list double the size of the cut-off sphere */
504 rlist_inc = nbnxn_get_rlist_effective_inc(NBNXN_GPU_CLUSTER_SIZE,mtop->natoms/det(box));
505 rlist_ok = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_OK_FAC,1.0/3.0) - rlist_inc;
506 rlist_max = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_MAX_FAC,1.0/3.0) - rlist_inc;
507 if (debug)
509 fprintf(debug,"GPU nstlist tuning: rlist_inc %.3f rlist_max %.3f\n",
510 rlist_inc,rlist_max);
513 i = 0;
514 nstlist_prev = nstlist_orig;
515 rlist_prev = ir->rlist;
518 if (env == NULL)
520 ir->nstlist = nstl[i];
523 /* Set the pair-list buffer size in ir */
524 calc_verlet_buffer_size(mtop,det(box),ir,ir->verletbuf_drift,&ls,
525 NULL,&rlist_new);
527 /* Does rlist fit in the box? */
528 bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC,box));
529 bDD = TRUE;
530 if (bBox && DOMAINDECOMP(cr))
532 /* Check if rlist fits in the domain decomposition */
533 if (inputrec2nboundeddim(ir) < DIM)
535 gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
537 copy_mat(box,state_tmp.box);
538 bDD = change_dd_cutoff(cr,&state_tmp,ir,rlist_new);
541 bCont = FALSE;
543 if (env == NULL)
545 if (bBox && bDD && rlist_new <= rlist_max)
547 /* Increase nstlist */
548 nstlist_prev = ir->nstlist;
549 rlist_prev = rlist_new;
550 bCont = (i+1 < NNSTL && rlist_new < rlist_ok);
552 else
554 /* Stick with the previous nstlist */
555 ir->nstlist = nstlist_prev;
556 rlist_new = rlist_prev;
557 bBox = TRUE;
558 bDD = TRUE;
562 i++;
564 while (bCont);
566 if (!bBox || !bDD)
568 gmx_warning(!bBox ? box_err : dd_err);
569 if (fp != NULL)
571 fprintf(fp,"\n%s\n",bBox ? box_err : dd_err);
573 ir->nstlist = nstlist_orig;
575 else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
577 sprintf(buf,"Changing nstlist from %d to %d, rlist from %g to %g",
578 nstlist_orig,ir->nstlist,
579 ir->rlist,rlist_new);
580 if (MASTER(cr))
582 fprintf(stderr,"%s\n\n",buf);
584 if (fp != NULL)
586 fprintf(fp,"%s\n\n",buf);
588 ir->rlist = rlist_new;
589 ir->rlistlong = rlist_new;
593 static void prepare_verlet_scheme(FILE *fplog,
594 gmx_hw_info_t *hwinfo,
595 t_commrec *cr,
596 gmx_hw_opt_t *hw_opt,
597 const char *nbpu_opt,
598 t_inputrec *ir,
599 const gmx_mtop_t *mtop,
600 matrix box,
601 gmx_bool *bUseGPU)
603 /* Here we only check for GPU usage on the MPI master process,
604 * as here we don't know how many GPUs we will use yet.
605 * We check for a GPU on all processes later.
607 *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL);
609 if (ir->verletbuf_drift > 0)
611 /* Update the Verlet buffer size for the current run setup */
612 verletbuf_list_setup_t ls;
613 real rlist_new;
615 /* Here we assume CPU acceleration is on. But as currently
616 * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
617 * and 4x2 gives a larger buffer than 4x4, this is ok.
619 verletbuf_get_list_setup(*bUseGPU,&ls);
621 calc_verlet_buffer_size(mtop,det(box),ir,
622 ir->verletbuf_drift,&ls,
623 NULL,&rlist_new);
624 if (rlist_new != ir->rlist)
626 if (fplog != NULL)
628 fprintf(fplog,"\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
629 ir->rlist,rlist_new,
630 ls.cluster_size_i,ls.cluster_size_j);
632 ir->rlist = rlist_new;
633 ir->rlistlong = rlist_new;
637 /* With GPU or emulation we should check nstlist for performance */
638 if ((EI_DYNAMICS(ir->eI) &&
639 *bUseGPU &&
640 ir->nstlist < NSTLIST_GPU_ENOUGH) ||
641 getenv(NSTLIST_ENVVAR) != NULL)
643 /* Choose a better nstlist */
644 increase_nstlist(fplog,cr,ir,mtop,box);
648 static void convert_to_verlet_scheme(FILE *fplog,
649 t_inputrec *ir,
650 gmx_mtop_t *mtop,real box_vol)
652 char *conv_mesg="Converting input file with group cut-off scheme to the Verlet cut-off scheme";
654 md_print_warn(NULL,fplog,"%s\n",conv_mesg);
656 ir->cutoff_scheme = ecutsVERLET;
657 ir->verletbuf_drift = 0.005;
659 if (ir->rcoulomb != ir->rvdw)
661 gmx_fatal(FARGS,"The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
664 if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
666 gmx_fatal(FARGS,"User non-bonded potentials are not (yet) supported with the Verlet scheme");
668 else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype))
670 md_print_warn(NULL,fplog,"Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials");
672 if (EVDW_SWITCHED(ir->vdwtype))
674 ir->vdwtype = evdwCUT;
676 if (EEL_SWITCHED(ir->coulombtype))
678 if (EEL_FULL(ir->coulombtype))
680 /* With full electrostatic only PME can be switched */
681 ir->coulombtype = eelPME;
683 else
685 md_print_warn(NULL,fplog,"NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n",eel_names[ir->coulombtype]);
686 ir->coulombtype = eelRF;
687 ir->epsilon_rf = 0.0;
691 /* We set the target energy drift to a small number.
692 * Note that this is only for testing. For production the user
693 * should think about this and set the mdp options.
695 ir->verletbuf_drift = 1e-4;
698 if (inputrec2nboundeddim(ir) != 3)
700 gmx_fatal(FARGS,"Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
703 if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
705 gmx_fatal(FARGS,"Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
708 if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
710 verletbuf_list_setup_t ls;
712 verletbuf_get_list_setup(FALSE,&ls);
713 calc_verlet_buffer_size(mtop,box_vol,ir,ir->verletbuf_drift,&ls,
714 NULL,&ir->rlist);
716 else
718 ir->verletbuf_drift = -1;
719 ir->rlist = 1.05*max(ir->rvdw,ir->rcoulomb);
722 gmx_mtop_remove_chargegroups(mtop);
726 /* Set CPU affinity. Can be important for performance.
727 On some systems (e.g. Cray) CPU Affinity is set by default.
728 But default assigning doesn't work (well) with only some ranks
729 having threads. This causes very low performance.
730 External tools have cumbersome syntax for setting affinity
731 in the case that only some ranks have threads.
732 Thus it is important that GROMACS sets the affinity internally
733 if only PME is using threads.
735 static void set_cpu_affinity(FILE *fplog,
736 const t_commrec *cr,
737 const gmx_hw_opt_t *hw_opt,
738 int nthreads_pme,
739 const gmx_hw_info_t *hwinfo,
740 const t_inputrec *inputrec)
742 #ifdef GMX_OPENMP /* TODO: actually we could do this even without OpenMP?! */
743 #ifdef __linux /* TODO: only linux? why not everywhere if sched_setaffinity is available */
744 if (hw_opt->bThreadPinning)
746 int thread, nthread_local, nthread_node, nthread_hw_max, nphyscore;
747 int offset;
748 char *env;
750 /* threads on this MPI process or TMPI thread */
751 if (cr->duty & DUTY_PP)
753 nthread_local = gmx_omp_nthreads_get(emntNonbonded);
755 else
757 nthread_local = gmx_omp_nthreads_get(emntPME);
760 /* map the current process to cores */
761 thread = 0;
762 nthread_node = nthread_local;
763 #ifdef GMX_MPI
764 if (PAR(cr) || MULTISIM(cr))
766 /* We need to determine a scan of the thread counts in this
767 * compute node.
769 MPI_Comm comm_intra;
771 MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),cr->nodeid_intra,
772 &comm_intra);
773 MPI_Scan(&nthread_local,&thread,1,MPI_INT,MPI_SUM,comm_intra);
774 /* MPI_Scan is inclusive, but here we need exclusive */
775 thread -= nthread_local;
776 /* Get the total number of threads on this physical node */
777 MPI_Allreduce(&nthread_local,&nthread_node,1,MPI_INT,MPI_SUM,comm_intra);
778 MPI_Comm_free(&comm_intra);
780 #endif
782 offset = 0;
783 if (hw_opt->core_pinning_offset > 0)
785 offset = hw_opt->core_pinning_offset;
786 if (SIMMASTER(cr))
788 fprintf(stderr, "Applying core pinning offset %d\n", offset);
790 if (fplog)
792 fprintf(fplog, "Applying core pinning offset %d\n", offset);
796 /* With Intel Hyper-Threading enabled, we want to pin consecutive
797 * threads to physical cores when using more threads than physical
798 * cores or when the user requests so.
800 nthread_hw_max = hwinfo->nthreads_hw_avail;
801 nphyscore = -1;
802 if (hw_opt->bPinHyperthreading ||
803 (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
804 nthread_node > nthread_hw_max/2 && getenv("GMX_DISABLE_PINHT") == NULL))
806 if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) != GMX_CPUID_X86_SMT_ENABLED)
808 /* We print to stderr on all processes, as we might have
809 * different settings on different physical nodes.
811 if (gmx_cpuid_vendor(hwinfo->cpuid_info) != GMX_CPUID_VENDOR_INTEL)
813 md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
814 "but non-Intel CPU detected (vendor: %s)\n",
815 gmx_cpuid_vendor_string[gmx_cpuid_vendor(hwinfo->cpuid_info)]);
817 else
819 md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
820 "but the CPU detected does not have Intel Hyper-Threading support "
821 "(or it is turned off)\n");
824 nphyscore = nthread_hw_max/2;
826 if (SIMMASTER(cr))
828 fprintf(stderr, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
829 nphyscore);
831 if (fplog)
833 fprintf(fplog, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
834 nphyscore);
838 /* set the per-thread affinity */
839 #pragma omp parallel firstprivate(thread) num_threads(nthread_local)
841 cpu_set_t mask;
842 int core;
844 CPU_ZERO(&mask);
845 thread += gmx_omp_get_thread_num();
846 if (nphyscore <= 0)
848 core = offset + thread;
850 else
852 /* Lock pairs of threads to the same hyperthreaded core */
853 core = offset + thread/2 + (thread % 2)*nphyscore;
855 CPU_SET(core, &mask);
856 sched_setaffinity((pid_t) syscall (SYS_gettid), sizeof(cpu_set_t), &mask);
859 #endif /* __linux */
860 #endif /* GMX_OPENMP */
864 static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
865 int cutoff_scheme)
867 gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp);
869 #ifndef GMX_THREAD_MPI
870 if (hw_opt->nthreads_tot > 0)
872 gmx_fatal(FARGS,"Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
874 if (hw_opt->nthreads_tmpi > 0)
876 gmx_fatal(FARGS,"Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
878 #endif
880 if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
882 /* We have the same number of OpenMP threads for PP and PME processes,
883 * thus we can perform several consistency checks.
885 if (hw_opt->nthreads_tmpi > 0 &&
886 hw_opt->nthreads_omp > 0 &&
887 hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
889 gmx_fatal(FARGS,"The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
890 hw_opt->nthreads_tot,hw_opt->nthreads_tmpi,hw_opt->nthreads_omp);
893 if (hw_opt->nthreads_tmpi > 0 &&
894 hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
896 gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
897 hw_opt->nthreads_tot,hw_opt->nthreads_tmpi);
900 if (hw_opt->nthreads_omp > 0 &&
901 hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
903 gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
904 hw_opt->nthreads_tot,hw_opt->nthreads_omp);
907 if (hw_opt->nthreads_tmpi > 0 &&
908 hw_opt->nthreads_omp <= 0)
910 hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
914 #ifndef GMX_OPENMP
915 if (hw_opt->nthreads_omp > 1)
917 gmx_fatal(FARGS,"OpenMP threads are requested, but Gromacs was compiled without OpenMP support");
919 #endif
921 if (cutoff_scheme == ecutsGROUP)
923 /* We only have OpenMP support for PME only nodes */
924 if (hw_opt->nthreads_omp > 1)
926 gmx_fatal(FARGS,"OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
927 ecutscheme_names[cutoff_scheme],
928 ecutscheme_names[ecutsVERLET]);
930 hw_opt->nthreads_omp = 1;
933 if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
935 gmx_fatal(FARGS,"You need to specify -ntomp in addition to -ntomp_pme");
938 if (hw_opt->nthreads_tot == 1)
940 hw_opt->nthreads_tmpi = 1;
942 if (hw_opt->nthreads_omp > 1)
944 gmx_fatal(FARGS,"You requested %d OpenMP threads with %d total threads",
945 hw_opt->nthreads_tmpi,hw_opt->nthreads_tot);
947 hw_opt->nthreads_omp = 1;
950 if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
952 hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
955 if (debug)
957 fprintf(debug,"hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
958 hw_opt->nthreads_tot,
959 hw_opt->nthreads_tmpi,
960 hw_opt->nthreads_omp,
961 hw_opt->nthreads_omp_pme,
962 hw_opt->gpu_id!=NULL ? hw_opt->gpu_id : "");
968 /* Override the value in inputrec with value passed on the command line (if any) */
969 static void override_nsteps_cmdline(FILE *fplog,
970 int nsteps_cmdline,
971 t_inputrec *ir,
972 const t_commrec *cr)
974 assert(ir);
975 assert(cr);
977 /* override with anything else than the default -2 */
978 if (nsteps_cmdline > -2)
980 char stmp[STRLEN];
982 ir->nsteps = nsteps_cmdline;
983 if (EI_DYNAMICS(ir->eI))
985 sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps, %.3f ps",
986 nsteps_cmdline, nsteps_cmdline*ir->delta_t);
988 else
990 sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps",
991 nsteps_cmdline);
994 md_print_warn(cr, fplog, "%s\n", stmp);
998 /* Data structure set by SIMMASTER which needs to be passed to all nodes
999 * before the other nodes have read the tpx file and called gmx_detect_hardware.
1001 typedef struct {
1002 int cutoff_scheme; /* The cutoff-scheme from inputrec_t */
1003 gmx_bool bUseGPU; /* Use GPU or GPU emulation */
1004 } master_inf_t;
1006 int mdrunner(gmx_hw_opt_t *hw_opt,
1007 FILE *fplog,t_commrec *cr,int nfile,
1008 const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
1009 gmx_bool bCompact, int nstglobalcomm,
1010 ivec ddxyz,int dd_node_order,real rdd,real rconstr,
1011 const char *dddlb_opt,real dlb_scale,
1012 const char *ddcsx,const char *ddcsy,const char *ddcsz,
1013 const char *nbpu_opt,
1014 int nsteps_cmdline, int nstepout,int resetstep,
1015 int nmultisim,int repl_ex_nst,int repl_ex_nex,
1016 int repl_ex_seed, real pforce,real cpt_period,real max_hours,
1017 const char *deviceOptions, unsigned long Flags)
1019 gmx_bool bForceUseGPU,bTryUseGPU;
1020 double nodetime=0,realtime;
1021 t_inputrec *inputrec;
1022 t_state *state=NULL;
1023 matrix box;
1024 gmx_ddbox_t ddbox={0};
1025 int npme_major,npme_minor;
1026 real tmpr1,tmpr2;
1027 t_nrnb *nrnb;
1028 gmx_mtop_t *mtop=NULL;
1029 t_mdatoms *mdatoms=NULL;
1030 t_forcerec *fr=NULL;
1031 t_fcdata *fcd=NULL;
1032 real ewaldcoeff=0;
1033 gmx_pme_t *pmedata=NULL;
1034 gmx_vsite_t *vsite=NULL;
1035 gmx_constr_t constr;
1036 int i,m,nChargePerturbed=-1,status,nalloc;
1037 char *gro;
1038 gmx_wallcycle_t wcycle;
1039 gmx_bool bReadRNG,bReadEkin;
1040 int list;
1041 gmx_runtime_t runtime;
1042 int rc;
1043 gmx_large_int_t reset_counters;
1044 gmx_edsam_t ed=NULL;
1045 t_commrec *cr_old=cr;
1046 int nthreads_pme=1;
1047 int nthreads_pp=1;
1048 gmx_membed_t membed=NULL;
1049 gmx_hw_info_t *hwinfo=NULL;
1050 master_inf_t minf={-1,FALSE};
1052 /* CAUTION: threads may be started later on in this function, so
1053 cr doesn't reflect the final parallel state right now */
1054 snew(inputrec,1);
1055 snew(mtop,1);
1057 if (Flags & MD_APPENDFILES)
1059 fplog = NULL;
1062 bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
1063 bTryUseGPU = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
1065 snew(state,1);
1066 if (SIMMASTER(cr))
1068 /* Read (nearly) all data required for the simulation */
1069 read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
1071 if (inputrec->cutoff_scheme != ecutsVERLET &&
1072 ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
1074 convert_to_verlet_scheme(fplog,inputrec,mtop,det(state->box));
1077 /* Detect hardware, gather information. With tMPI only thread 0 does it
1078 * and after threads are started broadcasts hwinfo around. */
1079 snew(hwinfo, 1);
1080 gmx_detect_hardware(fplog, hwinfo, cr,
1081 bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
1083 minf.cutoff_scheme = inputrec->cutoff_scheme;
1084 minf.bUseGPU = FALSE;
1086 if (inputrec->cutoff_scheme == ecutsVERLET)
1088 prepare_verlet_scheme(fplog,hwinfo,cr,hw_opt,nbpu_opt,
1089 inputrec,mtop,state->box,
1090 &minf.bUseGPU);
1092 else if (hwinfo->bCanUseGPU)
1094 md_print_warn(cr,fplog,
1095 "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
1096 " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
1097 " (for quick performance testing you can use the -testverlet option)\n");
1099 if (bForceUseGPU)
1101 gmx_fatal(FARGS,"GPU requested, but can't be used without cutoff-scheme=Verlet");
1105 #ifndef GMX_THREAD_MPI
1106 if (PAR(cr))
1108 gmx_bcast_sim(sizeof(minf),&minf,cr);
1110 #endif
1111 if (minf.bUseGPU && cr->npmenodes == -1)
1113 /* Don't automatically use PME-only nodes with GPUs */
1114 cr->npmenodes = 0;
1117 #ifdef GMX_THREAD_MPI
1118 /* With thread-MPI inputrec is only set here on the master thread */
1119 if (SIMMASTER(cr))
1120 #endif
1122 check_and_update_hw_opt(hw_opt,minf.cutoff_scheme);
1124 #ifdef GMX_THREAD_MPI
1125 if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
1127 gmx_fatal(FARGS,"You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");
1129 #endif
1131 if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&
1132 cr->npmenodes <= 0)
1134 gmx_fatal(FARGS,"You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");
1138 #ifdef GMX_THREAD_MPI
1139 if (SIMMASTER(cr))
1141 /* NOW the threads will be started: */
1142 hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
1143 hw_opt,
1144 inputrec, mtop,
1145 cr, fplog);
1146 if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
1148 hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
1151 if (hw_opt->nthreads_tmpi > 1)
1153 /* now start the threads. */
1154 cr=mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
1155 oenv, bVerbose, bCompact, nstglobalcomm,
1156 ddxyz, dd_node_order, rdd, rconstr,
1157 dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
1158 nbpu_opt,
1159 nsteps_cmdline, nstepout, resetstep, nmultisim,
1160 repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
1161 cpt_period, max_hours, deviceOptions,
1162 Flags);
1163 /* the main thread continues here with a new cr. We don't deallocate
1164 the old cr because other threads may still be reading it. */
1165 if (cr == NULL)
1167 gmx_comm("Failed to spawn threads");
1171 #endif
1172 /* END OF CAUTION: cr is now reliable */
1174 /* g_membed initialisation *
1175 * Because we change the mtop, init_membed is called before the init_parallel *
1176 * (in case we ever want to make it run in parallel) */
1177 if (opt2bSet("-membed",nfile,fnm))
1179 if (MASTER(cr))
1181 fprintf(stderr,"Initializing membed");
1183 membed = init_membed(fplog,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
1186 if (PAR(cr))
1188 /* now broadcast everything to the non-master nodes/threads: */
1189 init_parallel(fplog, cr, inputrec, mtop);
1191 /* This check needs to happen after get_nthreads_mpi() */
1192 if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC))
1194 gmx_fatal_collective(FARGS,cr,NULL,
1195 "The Verlet cut-off scheme is not supported with particle decomposition.\n"
1196 "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads.");
1199 if (fplog != NULL)
1201 pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
1204 #if defined GMX_THREAD_MPI
1205 /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer
1206 * to the other threads -- slightly uncool, but works fine, just need to
1207 * make sure that the data doesn't get freed twice. */
1208 if (cr->nnodes > 1)
1210 if (!SIMMASTER(cr))
1212 snew(hwinfo, 1);
1214 gmx_bcast(sizeof(&hwinfo), &hwinfo, cr);
1216 #else
1217 if (PAR(cr) && !SIMMASTER(cr))
1219 /* now we have inputrec on all nodes, can run the detection */
1220 /* TODO: perhaps it's better to propagate within a node instead? */
1221 snew(hwinfo, 1);
1222 gmx_detect_hardware(fplog, hwinfo, cr,
1223 bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
1225 #endif
1227 /* now make sure the state is initialized and propagated */
1228 set_state_entries(state,inputrec,cr->nnodes);
1230 /* remove when vv and rerun works correctly! */
1231 if (PAR(cr) && EI_VV(inputrec->eI) && ((Flags & MD_RERUN) || (Flags & MD_RERUN_VSITE)))
1233 gmx_fatal(FARGS,
1234 "Currently can't do velocity verlet with rerun in parallel.");
1237 /* A parallel command line option consistency check that we can
1238 only do after any threads have started. */
1239 if (!PAR(cr) &&
1240 (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
1242 gmx_fatal(FARGS,
1243 "The -dd or -npme option request a parallel simulation, "
1244 #ifndef GMX_MPI
1245 "but %s was compiled without threads or MPI enabled"
1246 #else
1247 #ifdef GMX_THREAD_MPI
1248 "but the number of threads (option -nt) is 1"
1249 #else
1250 "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
1251 #endif
1252 #endif
1253 , ShortProgram()
1257 if ((Flags & MD_RERUN) &&
1258 (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
1260 gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
1263 if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog) && PAR(cr))
1265 /* All-vs-all loops do not work with domain decomposition */
1266 Flags |= MD_PARTDEC;
1269 if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
1271 if (cr->npmenodes > 0)
1273 if (!EEL_PME(inputrec->coulombtype))
1275 gmx_fatal_collective(FARGS,cr,NULL,
1276 "PME nodes are requested, but the system does not use PME electrostatics");
1278 if (Flags & MD_PARTDEC)
1280 gmx_fatal_collective(FARGS,cr,NULL,
1281 "PME nodes are requested, but particle decomposition does not support separate PME nodes");
1285 cr->npmenodes = 0;
1288 #ifdef GMX_FAHCORE
1289 fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
1290 #endif
1292 /* NMR restraints must be initialized before load_checkpoint,
1293 * since with time averaging the history is added to t_state.
1294 * For proper consistency check we therefore need to extend
1295 * t_state here.
1296 * So the PME-only nodes (if present) will also initialize
1297 * the distance restraints.
1299 snew(fcd,1);
1301 /* This needs to be called before read_checkpoint to extend the state */
1302 init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
1304 if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
1306 if (PAR(cr) && !(Flags & MD_PARTDEC))
1308 gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
1310 /* Orientation restraints */
1311 if (MASTER(cr))
1313 init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
1314 state);
1318 if (DEFORM(*inputrec))
1320 /* Store the deform reference box before reading the checkpoint */
1321 if (SIMMASTER(cr))
1323 copy_mat(state->box,box);
1325 if (PAR(cr))
1327 gmx_bcast(sizeof(box),box,cr);
1329 /* Because we do not have the update struct available yet
1330 * in which the reference values should be stored,
1331 * we store them temporarily in static variables.
1332 * This should be thread safe, since they are only written once
1333 * and with identical values.
1335 #ifdef GMX_THREAD_MPI
1336 tMPI_Thread_mutex_lock(&deform_init_box_mutex);
1337 #endif
1338 deform_init_init_step_tpx = inputrec->init_step;
1339 copy_mat(box,deform_init_box_tpx);
1340 #ifdef GMX_THREAD_MPI
1341 tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
1342 #endif
1345 if (opt2bSet("-cpi",nfile,fnm))
1347 /* Check if checkpoint file exists before doing continuation.
1348 * This way we can use identical input options for the first and subsequent runs...
1350 if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
1352 load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
1353 cr,Flags & MD_PARTDEC,ddxyz,
1354 inputrec,state,&bReadRNG,&bReadEkin,
1355 (Flags & MD_APPENDFILES),
1356 (Flags & MD_APPENDFILESSET));
1358 if (bReadRNG)
1360 Flags |= MD_READ_RNG;
1362 if (bReadEkin)
1364 Flags |= MD_READ_EKIN;
1369 if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
1370 #ifdef GMX_THREAD_MPI
1371 /* With thread MPI only the master node/thread exists in mdrun.c,
1372 * therefore non-master nodes need to open the "seppot" log file here.
1374 || (!MASTER(cr) && (Flags & MD_SEPPOT))
1375 #endif
1378 gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
1379 Flags,&fplog);
1382 /* override nsteps with value from cmdline */
1383 override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
1385 if (SIMMASTER(cr))
1387 copy_mat(state->box,box);
1390 if (PAR(cr))
1392 gmx_bcast(sizeof(box),box,cr);
1395 /* Essential dynamics */
1396 if (opt2bSet("-ei",nfile,fnm))
1398 /* Open input and output files, allocate space for ED data structure */
1399 ed = ed_open(nfile,fnm,Flags,cr);
1402 if (PAR(cr) && !((Flags & MD_PARTDEC) ||
1403 EI_TPI(inputrec->eI) ||
1404 inputrec->eI == eiNM))
1406 cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
1407 dddlb_opt,dlb_scale,
1408 ddcsx,ddcsy,ddcsz,
1409 mtop,inputrec,
1410 box,state->x,
1411 &ddbox,&npme_major,&npme_minor);
1413 make_dd_communicators(fplog,cr,dd_node_order);
1415 /* Set overallocation to avoid frequent reallocation of arrays */
1416 set_over_alloc_dd(TRUE);
1418 else
1420 /* PME, if used, is done on all nodes with 1D decomposition */
1421 cr->npmenodes = 0;
1422 cr->duty = (DUTY_PP | DUTY_PME);
1423 npme_major = 1;
1424 npme_minor = 1;
1425 if (!EI_TPI(inputrec->eI))
1427 npme_major = cr->nnodes;
1430 if (inputrec->ePBC == epbcSCREW)
1432 gmx_fatal(FARGS,
1433 "pbc=%s is only implemented with domain decomposition",
1434 epbc_names[inputrec->ePBC]);
1438 if (PAR(cr))
1440 /* After possible communicator splitting in make_dd_communicators.
1441 * we can set up the intra/inter node communication.
1443 gmx_setup_nodecomm(fplog,cr);
1446 /* Initialize per-node process ID and counters. */
1447 gmx_init_intra_counters(cr);
1449 #ifdef GMX_MPI
1450 md_print_info(cr,fplog,"Using %d MPI %s\n",
1451 cr->nnodes,
1452 #ifdef GMX_THREAD_MPI
1453 cr->nnodes==1 ? "thread" : "threads"
1454 #else
1455 cr->nnodes==1 ? "process" : "processes"
1456 #endif
1458 #endif
1460 gmx_omp_nthreads_init(fplog, cr,
1461 hwinfo->nthreads_hw_avail,
1462 hw_opt->nthreads_omp,
1463 hw_opt->nthreads_omp_pme,
1464 (cr->duty & DUTY_PP) == 0,
1465 inputrec->cutoff_scheme == ecutsVERLET);
1467 gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU);
1469 /* getting number of PP/PME threads
1470 PME: env variable should be read only on one node to make sure it is
1471 identical everywhere;
1473 /* TODO nthreads_pp is only used for pinning threads.
1474 * This is a temporary solution until we have a hw topology library.
1476 nthreads_pp = gmx_omp_nthreads_get(emntNonbonded);
1477 nthreads_pme = gmx_omp_nthreads_get(emntPME);
1479 wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pp,nthreads_pme);
1481 if (PAR(cr))
1483 /* Master synchronizes its value of reset_counters with all nodes
1484 * including PME only nodes */
1485 reset_counters = wcycle_get_reset_counters(wcycle);
1486 gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
1487 wcycle_set_reset_counters(wcycle, reset_counters);
1490 snew(nrnb,1);
1491 if (cr->duty & DUTY_PP)
1493 /* For domain decomposition we allocate dynamically
1494 * in dd_partition_system.
1496 if (DOMAINDECOMP(cr))
1498 bcast_state_setup(cr,state);
1500 else
1502 if (PAR(cr))
1504 bcast_state(cr,state,TRUE);
1508 /* Initiate forcerecord */
1509 fr = mk_forcerec();
1510 fr->hwinfo = hwinfo;
1511 init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
1512 opt2fn("-table",nfile,fnm),
1513 opt2fn("-tabletf",nfile,fnm),
1514 opt2fn("-tablep",nfile,fnm),
1515 opt2fn("-tableb",nfile,fnm),
1516 nbpu_opt,
1517 FALSE,pforce);
1519 /* version for PCA_NOT_READ_NODE (see md.c) */
1520 /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
1521 "nofile","nofile","nofile","nofile",FALSE,pforce);
1523 fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
1525 /* Initialize QM-MM */
1526 if(fr->bQMMM)
1528 init_QMMMrec(cr,box,mtop,inputrec,fr);
1531 /* Initialize the mdatoms structure.
1532 * mdatoms is not filled with atom data,
1533 * as this can not be done now with domain decomposition.
1535 mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
1537 /* Initialize the virtual site communication */
1538 vsite = init_vsite(mtop,cr);
1540 calc_shifts(box,fr->shift_vec);
1542 /* With periodic molecules the charge groups should be whole at start up
1543 * and the virtual sites should not be far from their proper positions.
1545 if (!inputrec->bContinuation && MASTER(cr) &&
1546 !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1548 /* Make molecules whole at start of run */
1549 if (fr->ePBC != epbcNONE)
1551 do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
1553 if (vsite)
1555 /* Correct initial vsite positions are required
1556 * for the initial distribution in the domain decomposition
1557 * and for the initial shell prediction.
1559 construct_vsites_mtop(fplog,vsite,mtop,state->x);
1563 if (EEL_PME(fr->eeltype))
1565 ewaldcoeff = fr->ewaldcoeff;
1566 pmedata = &fr->pmedata;
1568 else
1570 pmedata = NULL;
1573 else
1575 /* This is a PME only node */
1577 /* We don't need the state */
1578 done_state(state);
1580 ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
1581 snew(pmedata,1);
1584 #if defined GMX_THREAD_MPI
1585 /* With the number of TMPI threads equal to the number of cores
1586 * we already pinned in thread-MPI, so don't pin again here.
1588 if (hw_opt->nthreads_tmpi != tMPI_Thread_get_hw_number())
1589 #endif
1591 /* Set the CPU affinity */
1592 set_cpu_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec);
1595 /* Initiate PME if necessary,
1596 * either on all nodes or on dedicated PME nodes only. */
1597 if (EEL_PME(inputrec->coulombtype))
1599 if (mdatoms)
1601 nChargePerturbed = mdatoms->nChargePerturbed;
1603 if (cr->npmenodes > 0)
1605 /* The PME only nodes need to know nChargePerturbed */
1606 gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
1609 if (cr->duty & DUTY_PME)
1611 status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
1612 mtop ? mtop->natoms : 0,nChargePerturbed,
1613 (Flags & MD_REPRODUCIBLE),nthreads_pme);
1614 if (status != 0)
1616 gmx_fatal(FARGS,"Error %d initializing PME",status);
1622 if (integrator[inputrec->eI].func == do_md
1623 #ifdef GMX_OPENMM
1625 integrator[inputrec->eI].func == do_md_openmm
1626 #endif
1629 /* Turn on signal handling on all nodes */
1631 * (A user signal from the PME nodes (if any)
1632 * is communicated to the PP nodes.
1634 signal_handler_install();
1637 if (cr->duty & DUTY_PP)
1639 if (inputrec->ePull != epullNO)
1641 /* Initialize pull code */
1642 init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv, inputrec->fepvals->init_lambda,
1643 EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
1646 if (inputrec->bRot)
1648 /* Initialize enforced rotation code */
1649 init_rot(fplog,inputrec,nfile,fnm,cr,state->x,box,mtop,oenv,
1650 bVerbose,Flags);
1653 constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
1655 if (DOMAINDECOMP(cr))
1657 dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
1658 Flags & MD_DDBONDCHECK,fr->cginfo_mb);
1660 set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
1662 setup_dd_grid(fplog,cr->dd);
1665 /* Now do whatever the user wants us to do (how flexible...) */
1666 integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
1667 oenv,bVerbose,bCompact,
1668 nstglobalcomm,
1669 vsite,constr,
1670 nstepout,inputrec,mtop,
1671 fcd,state,
1672 mdatoms,nrnb,wcycle,ed,fr,
1673 repl_ex_nst,repl_ex_nex,repl_ex_seed,
1674 membed,
1675 cpt_period,max_hours,
1676 deviceOptions,
1677 Flags,
1678 &runtime);
1680 if (inputrec->ePull != epullNO)
1682 finish_pull(fplog,inputrec->pull);
1685 if (inputrec->bRot)
1687 finish_rot(fplog,inputrec->rot);
1691 else
1693 /* do PME only */
1694 gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
1697 if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
1699 /* Some timing stats */
1700 if (SIMMASTER(cr))
1702 if (runtime.proc == 0)
1704 runtime.proc = runtime.real;
1707 else
1709 runtime.real = 0;
1713 wallcycle_stop(wcycle,ewcRUN);
1715 /* Finish up, write some stuff
1716 * if rerunMD, don't write last frame again
1718 finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
1719 inputrec,nrnb,wcycle,&runtime,
1720 fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
1721 nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
1722 nthreads_pp,
1723 EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
1725 if ((cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU)
1727 char gpu_err_str[STRLEN];
1729 /* free GPU memory and uninitialize GPU (by destroying the context) */
1730 nbnxn_cuda_free(fplog, fr->nbv->cu_nbv);
1732 if (!free_gpu(gpu_err_str))
1734 gmx_warning("On node %d failed to free GPU #%d: %s",
1735 cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
1739 if (opt2bSet("-membed",nfile,fnm))
1741 sfree(membed);
1744 #ifdef GMX_THREAD_MPI
1745 if (PAR(cr) && SIMMASTER(cr))
1746 #endif
1748 gmx_hardware_info_free(hwinfo);
1751 /* Does what it says */
1752 print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
1754 /* Close logfile already here if we were appending to it */
1755 if (MASTER(cr) && (Flags & MD_APPENDFILES))
1757 gmx_log_close(fplog);
1760 rc=(int)gmx_get_stop_condition();
1762 #ifdef GMX_THREAD_MPI
1763 /* we need to join all threads. The sub-threads join when they
1764 exit this function, but the master thread needs to be told to
1765 wait for that. */
1766 if (PAR(cr) && MASTER(cr))
1768 tMPI_Finalize();
1770 #endif
1772 return rc;