moved error-check after logfile parsing
[gromacs.git] / src / tools / gmx_tune_pme.c
blobc8e3b40ea51bba9f3607e0413e3472fdd1db0aa5
1 /*
2 *
3 * This source code is part of
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
10 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
11 * Copyright (c) 2001-2008, The GROMACS development team,
12 * check out http://www.gromacs.org for more information.
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * If you want to redistribute modifications, please consider that
20 * scientific software is very special. Version control is crucial -
21 * bugs must be traceable. We will be happy to consider code for
22 * inclusion in the official distribution, but derived work must not
23 * be called official GROMACS. Details are found in the README & COPYING
24 * files - if they are missing, get the official version at www.gromacs.org.
26 * To help us fund GROMACS development, we humbly ask that you cite
27 * the papers on the package - you can find them in the top README file.
29 * For more info, check our website at http://www.gromacs.org
31 * And Hey:
32 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
34 #include "statutil.h"
35 #include "typedefs.h"
36 #include "smalloc.h"
37 #include "vec.h"
38 #include "copyrite.h"
39 #include "statutil.h"
40 #include "tpxio.h"
41 #include "string2.h"
42 #include "readinp.h"
43 #include "calcgrid.h"
44 #include "checkpoint.h"
45 #include "gmx_ana.h"
46 #include "names.h"
50 enum {
51 ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
54 /* Enum for situations that can occur during log file parsing, the
55 * corresponding string entries can be found in do_the_tests() in
56 * const char* ParseLog[] */
57 enum {
58 eParselogOK,
59 eParselogNotFound,
60 eParselogNoPerfData,
61 eParselogTerm,
62 eParselogResetProblem,
63 eParselogNoDDGrid,
64 eParselogTPXVersion,
65 eParselogNotParallel,
66 eParselogFatal,
67 eParselogNr
71 typedef struct
73 int nPMEnodes; /* number of PME only nodes used in this test */
74 int nx, ny, nz; /* DD grid */
75 int guessPME; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
76 double *Gcycles; /* This can contain more than one value if doing multiple tests */
77 double Gcycles_Av;
78 float *ns_per_day;
79 float ns_per_day_Av;
80 float *PME_f_load; /* PME mesh/force load average*/
81 float PME_f_load_Av; /* Average average ;) ... */
82 char *mdrun_cmd_line; /* Mdrun command line used for this test */
83 } t_perf;
86 typedef struct
88 int nr_inputfiles; /* The number of tpr and mdp input files */
89 gmx_large_int_t orig_sim_steps; /* Number of steps to be done in the real simulation */
90 real *r_coulomb; /* The coulomb radii [0...nr_inputfiles] */
91 real *r_vdw; /* The vdW radii */
92 real *rlist; /* Neighbourlist cutoff radius */
93 real *rlistlong;
94 int *fourier_nx, *fourier_ny, *fourier_nz;
95 real *fourier_sp; /* Fourierspacing */
97 /* Original values as in inputfile: */
98 real orig_rcoulomb;
99 real orig_rvdw;
100 real orig_rlist, orig_rlistlong;
101 int orig_nk[DIM];
102 real orig_fs[DIM];
103 } t_inputinfo;
106 static void sep_line(FILE *fp)
108 fprintf(fp, "\n------------------------------------------------------------\n");
112 /* Wrapper for system calls */
113 static int gmx_system_call(char *command)
115 #ifdef GMX_NO_SYSTEM
116 gmx_fatal(FARGS,"No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n",command);
117 #else
118 return ( system(command) );
119 #endif
123 /* Check if string starts with substring */
124 static bool str_starts(const char *string, const char *substring)
126 return ( strncmp(string, substring, strlen(substring)) == 0);
130 static void cleandata(t_perf *perfdata, int test_nr)
132 perfdata->Gcycles[test_nr] = 0.0;
133 perfdata->ns_per_day[test_nr] = 0.0;
134 perfdata->PME_f_load[test_nr] = 0.0;
136 return;
140 static bool is_equal(real a, real b)
142 real diff, eps=1.0e-6;
145 diff = a - b;
147 if (diff < 0.0) diff = -diff;
149 if (diff < eps)
150 return TRUE;
151 else
152 return FALSE;
156 static void finalize(const char *fn_out)
158 char buf[STRLEN];
159 FILE *fp;
162 fp = fopen(fn_out,"r");
163 fprintf(stdout,"\n\n");
165 while( fgets(buf,STRLEN-1,fp) != NULL )
167 fprintf(stdout,"%s",buf);
169 fclose(fp);
170 fprintf(stdout,"\n\n");
171 thanx(stderr);
175 enum {eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr};
177 static int parse_logfile(const char *logfile, const char *errfile,
178 t_perf *perfdata, int test_nr, int presteps, gmx_large_int_t cpt_steps,
179 int nnodes)
181 FILE *fp;
182 char line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
183 const char matchstrdd[]="Domain decomposition grid";
184 const char matchstrcr[]="resetting all time and cycle counters";
185 const char matchstrbal[]="Average PME mesh/force load:";
186 const char matchstring[]="R E A L C Y C L E A N D T I M E A C C O U N T I N G";
187 const char errSIG[]="signal, stopping at the next";
188 int iFound;
189 int procs;
190 float dum1,dum2,dum3;
191 int npme;
192 gmx_large_int_t resetsteps=-1;
193 bool bFoundResetStr = FALSE;
194 bool bResetChecked = FALSE;
197 if (!gmx_fexist(logfile))
199 fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
200 cleandata(perfdata, test_nr);
201 return eParselogNotFound;
204 fp = fopen(logfile, "r");
205 perfdata->PME_f_load[test_nr] = -1.0;
206 perfdata->guessPME = -1;
208 iFound = eFoundNothing;
209 if (1 == nnodes)
210 iFound = eFoundDDStr; /* Skip some case statements */
212 while (fgets(line, STRLEN, fp) != NULL)
214 /* Remove leading spaces */
215 ltrim(line);
217 /* Check for TERM and INT signals from user: */
218 if ( strstr(line, errSIG) != NULL )
220 fclose(fp);
221 cleandata(perfdata, test_nr);
222 return eParselogTerm;
225 /* Check whether cycle resetting worked */
226 if (presteps > 0 && !bFoundResetStr)
228 if (strstr(line, matchstrcr) != NULL)
230 sprintf(dumstring, "Step %s", gmx_large_int_pfmt);
231 sscanf(line, dumstring, &resetsteps);
232 bFoundResetStr = TRUE;
233 if (resetsteps == presteps+cpt_steps)
235 bResetChecked = TRUE;
237 else
239 sprintf(dumstring , gmx_large_int_pfmt, resetsteps);
240 sprintf(dumstring2, gmx_large_int_pfmt, presteps+cpt_steps);
241 fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
242 " though they were supposed to be reset at step %s!\n",
243 dumstring, dumstring2);
248 /* Look for strings that appear in a certain order in the log file: */
249 switch(iFound)
251 case eFoundNothing:
252 /* Look for domain decomp grid and separate PME nodes: */
253 if (str_starts(line, matchstrdd))
255 sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
256 &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
257 if (perfdata->nPMEnodes == -1)
258 perfdata->guessPME = npme;
259 else if (perfdata->nPMEnodes != npme)
260 gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
261 iFound = eFoundDDStr;
263 /* Catch a few errors that might have occured: */
264 else if (str_starts(line, "There is no domain decomposition for"))
266 return eParselogNoDDGrid;
268 else if (str_starts(line, "reading tpx file"))
270 return eParselogTPXVersion;
272 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
274 return eParselogNotParallel;
276 break;
277 case eFoundDDStr:
278 /* Look for PME mesh/force balance (not necessarily present, though) */
279 if (str_starts(line, matchstrbal))
280 sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
281 /* Look for matchstring */
282 if (str_starts(line, matchstring))
283 iFound = eFoundAccountingStr;
284 break;
285 case eFoundAccountingStr:
286 /* Already found matchstring - look for cycle data */
287 if (str_starts(line, "Total "))
289 sscanf(line,"Total %d %lf",&procs,&(perfdata->Gcycles[test_nr]));
290 iFound = eFoundCycleStr;
292 break;
293 case eFoundCycleStr:
294 /* Already found cycle data - look for remaining performance info and return */
295 if (str_starts(line, "Performance:"))
297 sscanf(line,"%s %f %f %f %f", dumstring, &dum1, &dum2, &(perfdata->ns_per_day[test_nr]), &dum3);
298 fclose(fp);
299 if (bResetChecked || presteps == 0)
300 return eParselogOK;
301 else
302 return eParselogResetProblem;
304 break;
306 } /* while */
308 /* Check why there is no performance data in the log file.
309 * Did a fatal errors occur? */
310 if (gmx_fexist(errfile))
312 fp = fopen(errfile, "r");
313 while (fgets(line, STRLEN, fp) != NULL)
315 if ( str_starts(line, "Fatal error:") )
317 if (fgets(line, STRLEN, fp) != NULL)
318 fprintf(stderr, "\nWARNING: A fatal error has occured during this benchmark:\n"
319 "%s\n", line);
320 fclose(fp);
321 cleandata(perfdata, test_nr);
322 return eParselogFatal;
325 fclose(fp);
327 else
329 fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
332 /* Giving up ... we could not find out why there is no performance data in
333 * the log file. */
334 fprintf(stdout, "No performance data in log file.\n");
335 fclose(fp);
336 cleandata(perfdata, test_nr);
338 return eParselogNoPerfData;
342 static bool analyze_data(
343 FILE *fp,
344 const char *fn,
345 t_perf **perfdata,
346 int nnodes,
347 int ntprs,
348 int ntests,
349 int nrepeats,
350 t_inputinfo *info,
351 int *index_tpr, /* OUT: Nr of mdp file with best settings */
352 int *npme_optimal) /* OUT: Optimal number of PME nodes */
354 int i,j,k;
355 int line=0, line_win=-1;
356 int k_win=-1, i_win=-1, winPME;
357 double s=0.0; /* standard deviation */
358 t_perf *pd;
359 char strbuf[STRLEN];
360 char str_PME_f_load[13];
361 bool bCanUseOrigTPR;
364 if (nrepeats > 1)
366 sep_line(fp);
367 fprintf(fp, "Summary of successful runs:\n");
368 fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
369 if (nnodes > 1)
370 fprintf(fp, " DD grid");
371 fprintf(fp, "\n");
375 for (k=0; k<ntprs; k++)
377 for (i=0; i<ntests; i++)
379 /* Select the right dataset: */
380 pd = &(perfdata[k][i]);
382 pd->Gcycles_Av = 0.0;
383 pd->PME_f_load_Av = 0.0;
384 pd->ns_per_day_Av = 0.0;
386 if (pd->nPMEnodes == -1)
387 sprintf(strbuf, "(%3d)", pd->guessPME);
388 else
389 sprintf(strbuf, " ");
391 /* Get the average run time of a setting */
392 for (j=0; j<nrepeats; j++)
394 pd->Gcycles_Av += pd->Gcycles[j];
395 pd->PME_f_load_Av += pd->PME_f_load[j];
397 pd->Gcycles_Av /= nrepeats;
398 pd->PME_f_load_Av /= nrepeats;
400 for (j=0; j<nrepeats; j++)
402 if (pd->ns_per_day[j] > 0.0)
403 pd->ns_per_day_Av += pd->ns_per_day[j];
404 else
406 /* Somehow the performance number was not aquired for this run,
407 * therefor set the average to some negative value: */
408 pd->ns_per_day_Av = -1.0f*nrepeats;
409 break;
412 pd->ns_per_day_Av /= nrepeats;
414 /* Nicer output: */
415 if (pd->PME_f_load_Av > 0.0)
416 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
417 else
418 sprintf(str_PME_f_load, "%s", " - ");
421 /* We assume we had a successful run if both averages are positive */
422 if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
424 /* Output statistics if repeats were done */
425 if (nrepeats > 1)
427 /* Calculate the standard deviation */
428 s = 0.0;
429 for (j=0; j<nrepeats; j++)
430 s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
431 s /= (nrepeats - 1);
432 s = sqrt(s);
434 fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
435 line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
436 pd->ns_per_day_Av, str_PME_f_load);
437 if (nnodes > 1)
438 fprintf(fp, " %3d %3d %3d", pd->nx, pd->ny, pd->nz);
439 fprintf(fp, "\n");
441 /* Store the index of the best run found so far in 'winner': */
442 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
444 k_win = k;
445 i_win = i;
446 line_win = line;
448 line++;
453 if (k_win == -1)
454 gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
456 sep_line(fp);
458 winPME = perfdata[k_win][i_win].nPMEnodes;
459 if (winPME == -1)
460 sprintf(strbuf, "%s", "the automatic number of");
461 else
462 sprintf(strbuf, "%d", winPME);
463 fprintf(fp, "Best performance was achieved with %s PME nodes", strbuf);
464 if (nrepeats > 1)
465 fprintf(fp, " (see line %d)", line_win);
466 fprintf(fp, "\n");
468 /* Only mention settings if they were modified: */
469 bCanUseOrigTPR = TRUE;
470 if ( !is_equal(info->r_coulomb[k_win], info->orig_rcoulomb) )
472 fprintf(fp, "Optimized PME settings:\n"
473 " New Coulomb radius: %f nm (was %f nm)\n",
474 info->r_coulomb[k_win], info->orig_rcoulomb);
475 bCanUseOrigTPR = FALSE;
478 if ( !is_equal(info->r_vdw[k_win], info->orig_rvdw) )
480 fprintf(fp, " New Van der Waals radius: %f nm (was %f nm)\n",
481 info->r_vdw[k_win], info->orig_rvdw);
482 bCanUseOrigTPR = FALSE;
485 if ( ! (info->fourier_nx[k_win]==info->orig_nk[XX] &&
486 info->fourier_ny[k_win]==info->orig_nk[YY] &&
487 info->fourier_nz[k_win]==info->orig_nk[ZZ] ) )
489 fprintf(fp, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n",
490 info->fourier_nx[k_win], info->fourier_ny[k_win], info->fourier_nz[k_win],
491 info->orig_nk[XX], info->orig_nk[YY], info->orig_nk[ZZ]);
492 bCanUseOrigTPR = FALSE;
494 if (bCanUseOrigTPR && ntprs > 1)
495 fprintf(fp, "and original PME settings.\n");
497 fflush(fp);
499 /* Return the index of the mdp file that showed the highest performance
500 * and the optimal number of PME nodes */
501 *index_tpr = k_win;
502 *npme_optimal = winPME;
504 return bCanUseOrigTPR;
508 /* Get the commands we need to set up the runs from environment variables */
509 static void get_program_paths(bool bThreads, char *cmd_mpirun[], char cmd_np[],
510 char *cmd_mdrun[], int repeats)
512 char *command=NULL;
513 char *cp;
514 char *cp2;
515 char line[STRLEN];
516 FILE *fp;
517 const char def_mpirun[] = "mpirun";
518 const char def_mdrun[] = "mdrun";
519 const char filename[] = "benchtest.log";
520 const char match_mpi[] = "NNODES=";
521 const char match_mdrun[]= "Program: ";
522 const char empty_mpirun[] = "";
523 bool bMdrun = FALSE;
524 bool bMPI = FALSE;
527 /* Get the commands we need to set up the runs from environment variables */
528 if (!bThreads)
530 if ( (cp = getenv("MPIRUN")) != NULL)
531 *cmd_mpirun = strdup(cp);
532 else
533 *cmd_mpirun = strdup(def_mpirun);
535 else
537 *cmd_mpirun = strdup(empty_mpirun);
540 if ( (cp = getenv("MDRUN" )) != NULL )
541 *cmd_mdrun = strdup(cp);
542 else
543 *cmd_mdrun = strdup(def_mdrun);
546 /* If no simulations have to be performed, we are done here */
547 if (repeats <= 0)
548 return;
550 /* Run a small test to see whether mpirun + mdrun work */
551 fprintf(stdout, "Making sure that mdrun can be executed. ");
552 if (bThreads)
554 snew(command, strlen(*cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
555 sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", *cmd_mdrun, cmd_np, filename);
557 else
559 snew(command, strlen(*cmd_mpirun) + strlen(cmd_np) + strlen(*cmd_mdrun) + strlen(filename) + 50);
560 sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", *cmd_mpirun, cmd_np, *cmd_mdrun, filename);
562 fprintf(stdout, "Trying '%s' ... ", command);
563 make_backup(filename);
564 gmx_system_call(command);
566 /* Check if we find the characteristic string in the output: */
567 if (!gmx_fexist(filename))
568 gmx_fatal(FARGS, "Output from test run could not be found.");
570 fp = fopen(filename, "r");
571 /* We need to scan the whole output file, since sometimes the queuing system
572 * also writes stuff to stdout/err */
573 while ( !feof(fp) )
575 cp2=fgets(line, STRLEN, fp);
576 if (cp2!=NULL)
578 if ( str_starts(line, match_mdrun) )
579 bMdrun = TRUE;
580 if ( str_starts(line, match_mpi) )
581 bMPI = TRUE;
584 fclose(fp);
586 if (bThreads)
588 if (bMPI)
590 gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
591 "(%s)\n"
592 "seems to have been compiled with MPI instead.",
593 *cmd_mdrun);
596 else
598 if (bMdrun && !bMPI)
600 gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
601 "(%s)\n"
602 "seems to have been compiled without MPI support.",
603 *cmd_mdrun);
607 if (!bMdrun)
609 gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
610 filename);
613 fprintf(stdout, "passed.\n");
615 /* Clean up ... */
616 remove(filename);
617 sfree(command);
621 static void launch_simulation(
622 bool bLaunch, /* Should the simulation be launched? */
623 FILE *fp, /* General log file */
624 bool bThreads, /* whether to use threads */
625 char *cmd_mpirun, /* Command for mpirun */
626 char *cmd_np, /* Switch for -np or -nt or empty */
627 char *cmd_mdrun, /* Command for mdrun */
628 char *args_for_mdrun, /* Arguments for mdrun */
629 const char *simulation_tpr, /* This tpr will be simulated */
630 int nnodes, /* Number of nodes to run on */
631 int nPMEnodes) /* Number of PME nodes to use */
633 char *command;
636 /* Make enough space for the system call command,
637 * (100 extra chars for -npme ... etc. options should suffice): */
638 snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
640 /* Note that the -passall options requires args_for_mdrun to be at the end
641 * of the command line string */
642 if (bThreads)
644 sprintf(command, "%s%s-npme %d -s %s %s",
645 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
647 else
649 sprintf(command, "%s%s%s -npme %d -s %s %s",
650 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
653 fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch? "Using":"Please use", command);
654 sep_line(fp);
655 fflush(fp);
657 /* Now the real thing! */
658 if (bLaunch)
660 fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
661 sep_line(stdout);
662 fflush(stdout);
663 gmx_system_call(command);
664 thanx(fp);
669 static void modify_PMEsettings(
670 gmx_large_int_t simsteps, /* Set this value as number of time steps */
671 const char *fn_best_tpr, /* tpr file with the best performance */
672 const char *fn_sim_tpr) /* name of tpr file to be launched */
674 t_inputrec *ir;
675 t_state state;
676 gmx_mtop_t mtop;
677 char buf[200];
679 snew(ir,1);
680 read_tpx_state(fn_best_tpr,ir,&state,NULL,&mtop);
682 /* Set nsteps to the right value */
683 ir->nsteps = simsteps;
685 /* Write the tpr file which will be launched */
686 sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, gmx_large_int_pfmt);
687 fprintf(stdout,buf,ir->nsteps);
688 fflush(stdout);
689 write_tpx_state(fn_sim_tpr,ir,&state,&mtop);
691 sfree(ir);
695 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
697 /* Make additional TPR files with more computational load for the
698 * direct space processors: */
699 static void make_benchmark_tprs(
700 const char *fn_sim_tpr, /* READ : User-provided tpr file */
701 char *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files */
702 gmx_large_int_t benchsteps, /* Number of time steps for benchmark runs */
703 gmx_large_int_t statesteps, /* Step counter in checkpoint file */
704 real upfac, /* Scale rcoulomb inbetween downfac and upfac */
705 real downfac,
706 int ntprs, /* No. of TPRs to write, each with a different rcoulomb and fourierspacing */
707 real fourierspacing, /* Basic fourierspacing from tpr input file */
708 t_inputinfo *info, /* Contains information about mdp file options */
709 FILE *fp) /* Write the output here */
711 int i,j,d;
712 t_inputrec *ir;
713 t_state state;
714 gmx_mtop_t mtop;
715 real fac;
716 real nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials: */
717 char buf[200];
718 rvec box_size;
719 bool bNote = FALSE;
722 sprintf(buf, "Making benchmark tpr file%s with %s time steps", ntprs>1? "s":"", gmx_large_int_pfmt);
723 fprintf(stdout, buf, benchsteps);
724 if (statesteps > 0)
726 sprintf(buf, " (adding %s steps from checkpoint file)", gmx_large_int_pfmt);
727 fprintf(stdout, buf, statesteps);
728 benchsteps += statesteps;
730 fprintf(stdout, ".\n");
733 snew(ir,1);
734 read_tpx_state(fn_sim_tpr,ir,&state,NULL,&mtop);
736 /* Check if some kind of PME was chosen */
737 if (EEL_PME(ir->coulombtype) == FALSE)
738 gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
739 EELTYPE(eelPME));
741 /* Check if rcoulomb == rlist, which is necessary for plain PME. */
742 if ( (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist) )
744 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
745 EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
747 /* For other PME types, rcoulomb is allowed to be smaller than rlist */
748 else if (ir->rcoulomb > ir->rlist)
750 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
751 EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
754 /* Reduce the number of steps for the benchmarks */
755 info->orig_sim_steps = ir->nsteps;
756 ir->nsteps = benchsteps;
758 /* Determine length of triclinic box vectors */
759 for(d=0; d<DIM; d++)
761 box_size[d] = 0;
762 for(i=0;i<DIM;i++)
763 box_size[d] += state.box[d][i]*state.box[d][i];
764 box_size[d] = sqrt(box_size[d]);
767 /* Remember the original values: */
768 info->orig_rvdw = ir->rvdw;
769 info->orig_rcoulomb = ir->rcoulomb;
770 info->orig_rlist = ir->rlist;
771 info->orig_rlistlong = ir->rlistlong;
772 info->orig_nk[XX] = ir->nkx;
773 info->orig_nk[YY] = ir->nky;
774 info->orig_nk[ZZ] = ir->nkz;
775 info->orig_fs[XX] = box_size[XX]/ir->nkx; /* fourierspacing in x direction */
776 info->orig_fs[YY] = box_size[YY]/ir->nky;
777 info->orig_fs[ZZ] = box_size[ZZ]/ir->nkz;
779 /* For PME-switch potentials, keep the radial distance of the buffer region */
780 nlist_buffer = info->orig_rlist - info->orig_rcoulomb;
782 /* Print information about settings of which some are potentially modified: */
783 fprintf(fp, " Coulomb type : %s\n", EELTYPE(ir->coulombtype));
784 fprintf(fp, " Fourier nkx nky nkz : %d %d %d\n",
785 info->orig_nk[XX], info->orig_nk[YY], info->orig_nk[ZZ]);
786 fprintf(fp, " rcoulomb : %f nm\n", info->orig_rcoulomb);
787 fprintf(fp, " Van der Waals type : %s\n", EVDWTYPE(ir->vdwtype));
788 fprintf(fp, " rvdw : %f nm\n", info->orig_rvdw);
789 if (EVDW_SWITCHED(ir->vdwtype))
790 fprintf(fp, " rvdw_switch : %f nm\n", ir->rvdw_switch);
791 if (EPME_SWITCHED(ir->coulombtype))
792 fprintf(fp, " rlist : %f nm\n", info->orig_rlist);
793 if (info->orig_rlistlong != max_cutoff(ir->rvdw,ir->rcoulomb))
794 fprintf(fp, " rlistlong : %f nm\n", info->orig_rlistlong);
796 /* Print a descriptive line about the tpr settings tested */
797 fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
798 fprintf(fp, " No. scaling rcoulomb");
799 fprintf(fp, " nkx nky nkz");
800 if (fourierspacing > 0)
801 fprintf(fp, " spacing");
802 if (evdwCUT == ir->vdwtype)
803 fprintf(fp, " rvdw");
804 if (EPME_SWITCHED(ir->coulombtype))
805 fprintf(fp, " rlist");
806 if ( info->orig_rlistlong != max_cutoff(info->orig_rlist,max_cutoff(info->orig_rvdw,info->orig_rcoulomb)) )
807 fprintf(fp, " rlistlong");
808 fprintf(fp, " tpr file\n");
810 if (ntprs > 1)
812 fprintf(stdout, "Calculating PME grid points on the basis of ");
813 if (fourierspacing > 0)
814 fprintf(stdout, "a fourierspacing of %f nm\n", fourierspacing);
815 else
816 fprintf(stdout, "original nkx/nky/nkz settings from tpr file\n");
819 /* Loop to create the requested number of tpr input files */
820 for (j = 0; j < ntprs; j++)
822 /* Rcoulomb scaling factor for this file: */
823 if (ntprs == 1)
824 fac = downfac;
825 else
826 fac = (upfac-downfac)/(ntprs-1) * j + downfac;
827 fprintf(stdout, "--- Scaling factor %f ---\n", fac);
829 /* Scale the Coulomb radius */
830 ir->rcoulomb = info->orig_rcoulomb*fac;
832 /* Adjust other radii since various conditions neet to be fulfilled */
833 if (eelPME == ir->coulombtype)
835 /* plain PME, rcoulomb must be equal to rlist */
836 ir->rlist = ir->rcoulomb;
838 else
840 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
841 ir->rlist = ir->rcoulomb + nlist_buffer;
844 if (evdwCUT == ir->vdwtype)
846 /* For vdw cutoff, rvdw >= rlist */
847 ir->rvdw = max(info->orig_rvdw, ir->rlist);
850 ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
852 /* Try to reduce the number of reciprocal grid points in a smart way */
853 /* Did the user supply a value for fourierspacing on the command line? */
854 if (fourierspacing > 0)
856 info->fourier_sp[j] = fourierspacing*fac;
857 /* Calculate the optimal grid dimensions */
858 ir->nkx = 0;
859 ir->nky = 0;
860 ir->nkz = 0;
861 calc_grid(stdout,state.box,info->fourier_sp[j],&(ir->nkx),&(ir->nky),&(ir->nkz),1);
862 /* Check consistency */
863 if (0 == j)
864 if ((ir->nkx != info->orig_nk[XX]) || (ir->nky != info->orig_nk[YY]) || (ir->nkz != info->orig_nk[ZZ]))
866 fprintf(stderr, "WARNING: Original grid was %dx%dx%d. The fourierspacing of %f nm does not reproduce the grid\n"
867 " found in the tpr input file! Will use the new settings.\n",
868 info->orig_nk[XX],info->orig_nk[YY],info->orig_nk[ZZ],fourierspacing);
869 bNote = TRUE;
872 else
874 if (0 == j)
876 /* Print out fourierspacing from input tpr */
877 fprintf(stdout, "Input file fourier grid is %dx%dx%d\n",
878 info->orig_nk[XX], info->orig_nk[YY], info->orig_nk[ZZ]);
880 /* Reconstruct fourierspacing for each dimension from the input file */
881 ir->nkx=0;
882 calc_grid(stdout,state.box,info->orig_fs[XX]*fac,&(ir->nkx),&(ir->nky),&(ir->nkz),1);
883 ir->nky=0;
884 calc_grid(stdout,state.box,info->orig_fs[YY]*fac,&(ir->nkx),&(ir->nky),&(ir->nkz),1);
885 ir->nkz=0;
886 calc_grid(stdout,state.box,info->orig_fs[ZZ]*fac,&(ir->nkx),&(ir->nky),&(ir->nkz),1);
889 /* Save modified radii and fourier grid components for later output: */
890 info->r_coulomb[j] = ir->rcoulomb;
891 info->r_vdw[j] = ir->rvdw;
892 info->fourier_nx[j] = ir->nkx;
893 info->fourier_ny[j] = ir->nky;
894 info->fourier_nz[j] = ir->nkz;
895 info->rlist[j] = ir->rlist;
896 info->rlistlong[j] = ir->rlistlong;
898 /* Write the benchmark tpr file */
899 strncpy(fn_bench_tprs[j],fn_sim_tpr,strlen(fn_sim_tpr)-strlen(".tpr"));
900 sprintf(buf, "_bench%.2d.tpr", j);
901 strcat(fn_bench_tprs[j], buf);
902 fprintf(stdout,"Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
903 fprintf(stdout, gmx_large_int_pfmt, ir->nsteps);
904 fprintf(stdout,", scaling factor %f\n", fac);
905 write_tpx_state(fn_bench_tprs[j],ir,&state,&mtop);
907 /* Write information about modified tpr settings to log file */
908 fprintf(fp, "%4d%10f%10f", j, fac, ir->rcoulomb);
909 fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
910 if (fourierspacing > 0)
911 fprintf(fp, "%9f ", info->fourier_sp[j]);
912 if (evdwCUT == ir->vdwtype)
913 fprintf(fp, "%10f", ir->rvdw);
914 if (EPME_SWITCHED(ir->coulombtype))
915 fprintf(fp, "%10f", ir->rlist);
916 if ( info->orig_rlistlong != max_cutoff(info->orig_rlist,max_cutoff(info->orig_rvdw,info->orig_rcoulomb)) )
917 fprintf(fp, "%10f", ir->rlistlong);
918 fprintf(fp, " %-14s\n",fn_bench_tprs[j]);
920 /* Make it clear to the user that some additional settings were modified */
921 if ( !is_equal(ir->rvdw , info->orig_rvdw)
922 || !is_equal(ir->rlistlong , info->orig_rlistlong) )
924 bNote = TRUE;
927 if (bNote)
928 fprintf(fp, "\nNote that in addition to rcoulomb and the fourier grid\n"
929 "also other input settings were changed (see table above).\n"
930 "Please check if the modified settings are appropriate.\n");
931 fflush(stdout);
932 fflush(fp);
933 sfree(ir);
937 /* Whether these files are written depends on tpr (or mdp) settings,
938 * not on mdrun command line options! */
939 static bool tpr_triggers_file(const char *opt)
941 if ( (0 == strcmp(opt, "-pf"))
942 || (0 == strcmp(opt, "-px")) )
943 return TRUE;
944 else
945 return FALSE;
949 /* Rename the files we want to keep to some meaningful filename and
950 * delete the rest */
951 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
952 int nPMEnodes, int nr, bool bKeepStderr)
954 char numstring[STRLEN];
955 char newfilename[STRLEN];
956 const char *fn=NULL;
957 int i;
958 const char *opt;
961 fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
963 for (i=0; i<nfile; i++)
965 opt = (char *)fnm[i].opt;
966 if ( strcmp(opt, "-p") == 0 )
968 /* do nothing; keep this file */
971 else if (strcmp(opt, "-bg") == 0)
973 /* Give the log file a nice name so one can later see which parameters were used */
974 numstring[0] = '\0';
975 if (nr > 0)
976 sprintf(numstring, "_%d", nr);
977 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg",nfile,fnm), k, nnodes, nPMEnodes, numstring);
978 if (gmx_fexist(opt2fn("-bg",nfile,fnm)))
980 fprintf(stdout, "renaming log file to %s\n", newfilename);
981 make_backup(newfilename);
982 rename(opt2fn("-bg",nfile,fnm), newfilename);
985 else if (strcmp(opt, "-err") == 0)
987 /* This file contains the output of stderr. We want to keep it in
988 * cases where there have been problems. */
989 fn = opt2fn(opt, nfile, fnm);
990 numstring[0] = '\0';
991 if (nr > 0)
992 sprintf(numstring, "_%d", nr);
993 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
994 if (gmx_fexist(fn))
996 if (bKeepStderr)
998 fprintf(stdout, "Saving stderr output in %s\n", newfilename);
999 make_backup(newfilename);
1000 rename(fn, newfilename);
1002 else
1004 fprintf(stdout, "Deleting %s\n", fn);
1005 remove(fn);
1009 /* Delete the files which are created for each benchmark run: (options -b*) */
1010 else if ( ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt,nfile,fnm) || !is_optional(&fnm[i])) )
1011 || tpr_triggers_file(opt) )
1013 fn = opt2fn(opt, nfile, fnm);
1014 if (gmx_fexist(fn))
1016 fprintf(stdout, "Deleting %s\n", fn);
1017 remove(fn);
1024 /* Returns the largest common factor of n1 and n2 */
1025 static int largest_common_factor(int n1, int n2)
1027 int factor, nmax;
1029 nmax = min(n1, n2);
1030 for (factor=nmax; factor > 0; factor--)
1032 if ( 0==(n1 % factor) && 0==(n2 % factor) )
1034 return(factor);
1037 return 0; /* one for the compiler */
1040 enum {eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr};
1042 /* Create a list of numbers of PME nodes to test */
1043 static void make_npme_list(
1044 const char *npmevalues_opt, /* Make a complete list with all
1045 * possibilities or a short list that keeps only
1046 * reasonable numbers of PME nodes */
1047 int *nentries, /* Number of entries we put in the nPMEnodes list */
1048 int *nPMEnodes[], /* Each entry contains the value for -npme */
1049 int nnodes, /* Total number of nodes to do the tests on */
1050 int minPMEnodes, /* Minimum number of PME nodes */
1051 int maxPMEnodes) /* Maximum number of PME nodes */
1053 int i,npme,npp;
1054 int min_factor=1; /* We request that npp and npme have this minimal
1055 * largest common factor (depends on npp) */
1056 int nlistmax; /* Max. list size */
1057 int nlist; /* Actual number of entries in list */
1058 int eNPME;
1061 /* Do we need to check all possible values for -npme or is a reduced list enough? */
1062 if ( 0 == strcmp(npmevalues_opt, "all") )
1064 eNPME = eNpmeAll;
1066 else if ( 0 == strcmp(npmevalues_opt, "subset") )
1068 eNPME = eNpmeSubset;
1070 else if ( 0 == strcmp(npmevalues_opt, "auto") )
1072 if (nnodes <= 64)
1073 eNPME = eNpmeAll;
1074 else if (nnodes < 128)
1075 eNPME = eNpmeReduced;
1076 else
1077 eNPME = eNpmeSubset;
1079 else
1081 gmx_fatal(FARGS, "Unknown option for -npme in make_npme_list");
1084 /* Calculate how many entries we could possibly have (in case of -npme all) */
1085 if (nnodes > 2)
1087 nlistmax = maxPMEnodes - minPMEnodes + 3;
1088 if (0 == minPMEnodes)
1089 nlistmax--;
1091 else
1092 nlistmax = 1;
1094 /* Now make the actual list which is at most of size nlist */
1095 snew(*nPMEnodes, nlistmax);
1096 nlist = 0; /* start counting again, now the real entries in the list */
1097 for (i = 0; i < nlistmax - 2; i++)
1099 npme = maxPMEnodes - i;
1100 npp = nnodes-npme;
1101 switch (eNPME)
1103 case eNpmeAll:
1104 min_factor = 1;
1105 break;
1106 case eNpmeReduced:
1107 min_factor = 2;
1108 break;
1109 case eNpmeSubset:
1110 /* For 2d PME we want a common largest factor of at least the cube
1111 * root of the number of PP nodes */
1112 min_factor = (int) pow(npp, 1.0/3.0);
1113 break;
1114 default:
1115 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1116 break;
1118 if (largest_common_factor(npp, npme) >= min_factor)
1120 (*nPMEnodes)[nlist] = npme;
1121 nlist++;
1124 /* We always test 0 PME nodes and the automatic number */
1125 *nentries = nlist + 2;
1126 (*nPMEnodes)[nlist ] = 0;
1127 (*nPMEnodes)[nlist+1] = -1;
1129 fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1130 for (i=0; i<*nentries-1; i++)
1131 fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1132 fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1136 /* Allocate memory to store the performance data */
1137 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1139 int i, j, k;
1142 for (k=0; k<ntprs; k++)
1144 snew(perfdata[k], datasets);
1145 for (i=0; i<datasets; i++)
1147 for (j=0; j<repeats; j++)
1149 snew(perfdata[k][i].Gcycles , repeats);
1150 snew(perfdata[k][i].ns_per_day, repeats);
1151 snew(perfdata[k][i].PME_f_load, repeats);
1158 static void do_the_tests(
1159 FILE *fp, /* General g_tune_pme output file */
1160 char **tpr_names, /* Filenames of the input files to test */
1161 int maxPMEnodes, /* Max fraction of nodes to use for PME */
1162 int minPMEnodes, /* Min fraction of nodes to use for PME */
1163 const char *npmevalues_opt, /* Which -npme values should be tested */
1164 t_perf **perfdata, /* Here the performace data is stored */
1165 int *pmeentries, /* Entries in the nPMEnodes list */
1166 int repeats, /* Repeat each test this often */
1167 int nnodes, /* Total number of nodes = nPP + nPME */
1168 int nr_tprs, /* Total number of tpr files to test */
1169 bool bThreads, /* Threads or MPI? */
1170 char *cmd_mpirun, /* mpirun command string */
1171 char *cmd_np, /* "-np", "-n", whatever mpirun needs */
1172 char *cmd_mdrun, /* mdrun command string */
1173 char *cmd_args_bench, /* arguments for mdrun in a string */
1174 const t_filenm *fnm, /* List of filenames from command line */
1175 int nfile, /* Number of files specified on the cmdl. */
1176 int sim_part, /* For checkpointing */
1177 int presteps, /* DLB equilibration steps, is checked */
1178 gmx_large_int_t cpt_steps) /* Time step counter in the checkpoint */
1180 int i,nr,k,ret,count=0,totaltests;
1181 int *nPMEnodes=NULL;
1182 t_perf *pd=NULL;
1183 int cmdline_length;
1184 char *command, *cmd_stub;
1185 char buf[STRLEN];
1186 bool bResetProblem=FALSE;
1189 /* This string array corresponds to the eParselog enum type at the start
1190 * of this file */
1191 const char* ParseLog[] = {"OK.",
1192 "Logfile not found!",
1193 "No timings, logfile truncated?",
1194 "Run was terminated.",
1195 "Counters were not reset properly.",
1196 "No DD grid found for these settings.",
1197 "TPX version conflict!",
1198 "mdrun was not started in parallel!",
1199 "A fatal error occured!" };
1200 char str_PME_f_load[13];
1203 /* Allocate space for the mdrun command line. 100 extra characters should
1204 be more than enough for the -npme etcetera arguments */
1205 cmdline_length = strlen(cmd_mpirun)
1206 + strlen(cmd_np)
1207 + strlen(cmd_mdrun)
1208 + strlen(cmd_args_bench)
1209 + strlen(tpr_names[0]) + 100;
1210 snew(command , cmdline_length);
1211 snew(cmd_stub, cmdline_length);
1213 /* Construct the part of the command line that stays the same for all tests: */
1214 if (bThreads)
1216 sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1218 else
1220 sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1223 /* Create a list of numbers of PME nodes to test */
1224 make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1225 nnodes, minPMEnodes, maxPMEnodes);
1227 if (0 == repeats)
1229 fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1230 fclose(fp);
1231 finalize(opt2fn("-p", nfile, fnm));
1232 exit(0);
1235 /* Allocate one dataset for each tpr input file: */
1236 init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1238 /*****************************************/
1239 /* Main loop over all tpr files to test: */
1240 /*****************************************/
1241 totaltests = nr_tprs*(*pmeentries)*repeats;
1242 for (k=0; k<nr_tprs;k++)
1244 fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1245 fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n");
1246 /* Loop over various numbers of PME nodes: */
1247 for (i = 0; i < *pmeentries; i++)
1249 pd = &perfdata[k][i];
1251 /* Loop over the repeats for each scenario: */
1252 for (nr = 0; nr < repeats; nr++)
1254 pd->nPMEnodes = nPMEnodes[i];
1256 /* Add -npme and -s to the command line and save it. Note that
1257 * the -passall (if set) options requires cmd_args_bench to be
1258 * at the end of the command line string */
1259 snew(pd->mdrun_cmd_line, cmdline_length);
1260 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1261 cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1263 /* Do a benchmark simulation: */
1264 if (repeats > 1)
1265 sprintf(buf, ", pass %d/%d", nr+1, repeats);
1266 else
1267 buf[0]='\0';
1268 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1269 (100.0*count)/totaltests,
1270 k+1, nr_tprs, i+1, *pmeentries, buf);
1271 make_backup(opt2fn("-err",nfile,fnm));
1272 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err",nfile,fnm));
1273 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1274 gmx_system_call(command);
1276 /* Collect the performance data from the log file; also check stderr
1277 * for fatal errors */
1278 ret = parse_logfile(opt2fn("-bg",nfile,fnm), opt2fn("-err",nfile,fnm),
1279 pd, nr, presteps, cpt_steps, nnodes);
1280 if ((presteps > 0) && (ret == eParselogResetProblem))
1281 bResetProblem = TRUE;
1283 if (-1 == pd->nPMEnodes)
1284 sprintf(buf, "(%3d)", pd->guessPME);
1285 else
1286 sprintf(buf, " ");
1288 /* Nicer output */
1289 if (pd->PME_f_load[nr] > 0.0)
1290 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1291 else
1292 sprintf(str_PME_f_load, "%s", " - ");
1294 /* Write the data we got to disk */
1295 fprintf(fp, "%4d%s %12.3f %12.3f %s %s", pd->nPMEnodes,
1296 buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1297 if (! (ret==eParselogOK || ret==eParselogNoDDGrid || ret==eParselogNotFound) )
1298 fprintf(fp, " Check %s file for problems.", ret==eParselogFatal? "err":"log");
1299 fprintf(fp, "\n");
1300 fflush(fp);
1301 count++;
1303 /* Do some cleaning up and delete the files we do not need any more */
1304 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret==eParselogFatal);
1306 /* If the first run with this number of processors already failed, do not try again: */
1307 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1309 fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1310 count += repeats-(nr+1);
1311 break;
1313 } /* end of repeats loop */
1314 } /* end of -npme loop */
1315 } /* end of tpr file loop */
1316 if (bResetProblem)
1318 sep_line(fp);
1319 fprintf(fp, "WARNING: The cycle and time step counters could not be reset\n"
1320 "properly. The reason could be that mpirun did not manage to\n"
1321 "export the environment variable GMX_RESET_COUNTER. You might\n"
1322 "have to give a special switch to mpirun for that.\n"
1323 "Alternatively, you can manually set GMX_RESET_COUNTER to the\n"
1324 "value normally provided by -presteps.");
1325 sep_line(fp);
1327 sfree(command);
1328 sfree(cmd_stub);
1332 static void check_input(
1333 int nnodes,
1334 int repeats,
1335 int *ntprs,
1336 real *upfac,
1337 real *downfac,
1338 real maxPMEfraction,
1339 real minPMEfraction,
1340 real fourierspacing,
1341 gmx_large_int_t bench_nsteps,
1342 const t_filenm *fnm,
1343 int nfile,
1344 int sim_part,
1345 int presteps,
1346 int npargs,
1347 t_pargs *pa)
1349 /* Make sure the input file exists */
1350 if (!gmx_fexist(opt2fn("-s",nfile,fnm)))
1351 gmx_fatal(FARGS, "File %s not found.", opt2fn("-s",nfile,fnm));
1353 /* Make sure that the checkpoint file is not overwritten by the benchmark runs */
1354 if ( (0 == strcmp(opt2fn("-cpi",nfile,fnm), opt2fn("-cpo",nfile,fnm)) ) && (sim_part > 1) )
1355 gmx_fatal(FARGS, "Checkpoint input and output file must not be identical,\nbecause then the input file might change during the benchmarks.");
1357 /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1358 if (repeats < 0)
1359 gmx_fatal(FARGS, "Number of repeats < 0!");
1361 /* Check number of nodes */
1362 if (nnodes < 1)
1363 gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
1365 /* Automatically choose -ntpr if not set */
1366 if (*ntprs < 1)
1368 if (nnodes < 16)
1369 *ntprs = 1;
1370 else
1371 *ntprs = 3;
1372 fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs==1?"":"s");
1374 else
1376 if (1 == *ntprs)
1377 fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1380 if ( is_equal(*downfac,*upfac) && (*ntprs > 1) )
1382 fprintf(stderr, "WARNING: Resetting -ntpr to 1 since both scaling factors are the same.\n"
1383 "Please choose upfac unequal to downfac to test various PME grid settings\n");
1384 *ntprs = 1;
1387 /* Check whether max and min fraction are within required values */
1388 if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1389 gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1390 if (minPMEfraction > 0.5 || minPMEfraction < 0)
1391 gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1392 if (maxPMEfraction < minPMEfraction)
1393 gmx_fatal(FARGS, "-max must be larger or equal to -min");
1395 /* Check whether the number of steps - if it was set - has a reasonable value */
1396 if (bench_nsteps < 0)
1397 gmx_fatal(FARGS, "Number of steps must be positive.");
1399 if (bench_nsteps > 10000 || bench_nsteps < 100)
1401 fprintf(stderr, "WARNING: steps=");
1402 fprintf(stderr, gmx_large_int_pfmt, bench_nsteps);
1403 fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100)? "few" : "many");
1406 if (presteps < 0)
1408 gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1411 if (*upfac <= 0.0 || *downfac <= 0.0 || *downfac > *upfac)
1412 gmx_fatal(FARGS, "Both scaling factors must be larger than zero and upper\n"
1413 "scaling limit (%f) must be larger than lower limit (%f).",
1414 *upfac, *downfac);
1416 if (*downfac < 0.75 || *upfac > 1.5)
1417 fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1419 if (fourierspacing < 0)
1420 gmx_fatal(FARGS, "Please choose a positive value for fourierspacing.");
1422 /* Make shure that the scaling factor options are compatible with the number of tprs */
1423 if ( (1 == *ntprs) && ( opt2parg_bSet("-upfac",npargs,pa) || opt2parg_bSet("-downfac",npargs,pa) ) )
1425 if (opt2parg_bSet("-upfac",npargs,pa) && opt2parg_bSet("-downfac",npargs,pa) && !is_equal(*upfac,*downfac))
1427 gmx_fatal(FARGS, "Please specify -ntpr > 1 for both scaling factors to take effect.\n"
1428 "(upfac=%f, downfac=%f)\n", *upfac, *downfac);
1430 if (opt2parg_bSet("-upfac",npargs,pa))
1431 *downfac = *upfac;
1432 if (opt2parg_bSet("-downfac",npargs,pa))
1433 *upfac = *downfac;
1434 if (!is_equal(*upfac, 1.0))
1436 fprintf(stderr, "WARNING: Using a scaling factor of %f with -ntpr 1, thus not testing the original tpr settings.\n",
1437 *upfac);
1443 /* Returns TRUE when "opt" is a switch for g_tune_pme itself */
1444 static bool is_main_switch(char *opt)
1446 if ( (0 == strcmp(opt,"-s" ))
1447 || (0 == strcmp(opt,"-p" ))
1448 || (0 == strcmp(opt,"-launch" ))
1449 || (0 == strcmp(opt,"-r" ))
1450 || (0 == strcmp(opt,"-ntpr" ))
1451 || (0 == strcmp(opt,"-max" ))
1452 || (0 == strcmp(opt,"-min" ))
1453 || (0 == strcmp(opt,"-upfac" ))
1454 || (0 == strcmp(opt,"-downfac" ))
1455 || (0 == strcmp(opt,"-four" ))
1456 || (0 == strcmp(opt,"-steps" ))
1457 || (0 == strcmp(opt,"-simsteps" ))
1458 || (0 == strcmp(opt,"-resetstep"))
1459 || (0 == strcmp(opt,"-so" ))
1460 || (0 == strcmp(opt,"-npstring" ))
1461 || (0 == strcmp(opt,"-npme" ))
1462 || (0 == strcmp(opt,"-passall" )) )
1463 return TRUE;
1465 return FALSE;
1469 /* Returns TRUE when "opt" is needed at launch time */
1470 static bool is_launch_option(char *opt, bool bSet)
1472 if (bSet)
1473 return TRUE;
1474 else
1475 return FALSE;
1479 /* Returns TRUE when "opt" is needed at launch time */
1480 static bool is_launch_file(char *opt, bool bSet)
1482 /* We need all options that were set on the command line
1483 * and that do not start with -b */
1484 if (0 == strncmp(opt,"-b", 2))
1485 return FALSE;
1487 if (bSet)
1488 return TRUE;
1489 else
1490 return FALSE;
1494 /* Returns TRUE when "opt" gives an option needed for the benchmarks runs */
1495 static bool is_bench_option(char *opt, bool bSet)
1497 /* If option is set, we might need it for the benchmarks.
1498 * This includes -cpi */
1499 if (bSet)
1501 if ( (0 == strcmp(opt, "-append" ))
1502 || (0 == strcmp(opt, "-maxh" ))
1503 || (0 == strcmp(opt, "-deffnm" ))
1504 || (0 == strcmp(opt, "-resethway")) )
1505 return FALSE;
1506 else
1507 return TRUE;
1509 else
1510 return FALSE;
1514 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1515 static bool is_bench_file(char *opt, bool bSet, bool bOptional, bool bIsOutput)
1517 /* All options starting with "-b" are for _b_enchmark files exclusively */
1518 if (0 == strncmp(opt,"-b", 2))
1520 if (!bOptional || bSet)
1521 return TRUE;
1522 else
1523 return FALSE;
1525 else
1527 if (bIsOutput)
1528 return FALSE;
1529 else
1530 if (bSet) /* These are additional input files like -cpi -ei */
1531 return TRUE;
1532 else
1533 return FALSE;
1538 /* Adds 'buf' to 'str' */
1539 static void add_to_string(char **str, char *buf)
1541 int len;
1544 len = strlen(*str) + strlen(buf) + 1;
1545 srenew(*str, len);
1546 strcat(*str, buf);
1550 /* Create the command line for the benchmark as well as for the real run */
1551 static void create_command_line_snippets(
1552 bool bThreads,
1553 int presteps,
1554 int nfile,
1555 t_filenm fnm[],
1556 int npargs,
1557 t_pargs *pa,
1558 const char *procstring, /* How to pass the number of processors to $MPIRUN */
1559 char *cmd_np[], /* Actual command line snippet, e.g. '-np <N>' */
1560 char *cmd_args_bench[], /* command line arguments for benchmark runs */
1561 char *cmd_args_launch[], /* command line arguments for simulation run */
1562 char extra_args[]) /* Add this to the end of the command line */
1564 int i;
1565 char *opt;
1566 const char *name;
1567 char *np_or_nt;
1568 #define BUFLENGTH 255
1569 char buf[BUFLENGTH];
1570 char strbuf[BUFLENGTH];
1571 char strbuf2[BUFLENGTH];
1574 if (bThreads)
1575 np_or_nt=strdup("-nt");
1576 else
1577 np_or_nt=strdup("-np");
1579 /* strlen needs at least '\0' as a string: */
1580 snew(*cmd_args_bench ,1);
1581 snew(*cmd_args_launch,1);
1582 *cmd_args_launch[0]='\0';
1583 *cmd_args_bench[0] ='\0';
1586 /*******************************************/
1587 /* 1. Process other command line arguments */
1588 /*******************************************/
1589 for (i=0; i<npargs; i++)
1591 /* What command line switch are we currently processing: */
1592 opt = (char *)pa[i].option;
1593 /* Skip options not meant for mdrun */
1594 if (!is_main_switch(opt))
1596 /* Print it to a string buffer, strip away trailing whitespaces that pa_val also returns: */
1597 sprintf(strbuf2, "%s", pa_val(&pa[i],buf,BUFLENGTH));
1598 rtrim(strbuf2);
1599 sprintf(strbuf, "%s %s ", opt, strbuf2);
1600 /* We need the -np (or -nt) switch in a separate buffer - whether or not it was set! */
1601 if (0 == strcmp(opt,np_or_nt))
1603 if (strcmp(procstring, "none")==0 && !bThreads)
1605 /* Omit -np <N> entirely */
1606 snew(*cmd_np, 2);
1607 sprintf(*cmd_np, " ");
1609 else
1611 /* This is the normal case with -np <N> */
1612 snew(*cmd_np, strlen(procstring)+strlen(strbuf2)+4);
1613 sprintf(*cmd_np, " %s %s ", bThreads? "-nt" : procstring, strbuf2);
1616 else
1618 if (is_bench_option(opt,pa[i].bSet))
1619 add_to_string(cmd_args_bench, strbuf);
1621 if (is_launch_option(opt,pa[i].bSet))
1622 add_to_string(cmd_args_launch, strbuf);
1626 if (presteps > 0)
1628 /* Add equilibration steps to benchmark options */
1629 sprintf(strbuf, "-resetstep %d ", presteps);
1630 add_to_string(cmd_args_bench, strbuf);
1633 /********************/
1634 /* 2. Process files */
1635 /********************/
1636 for (i=0; i<nfile; i++)
1638 opt = (char *)fnm[i].opt;
1639 name = opt2fn(opt,nfile,fnm);
1641 /* Strbuf contains the options, now let's sort out where we need that */
1642 sprintf(strbuf, "%s %s ", opt, name);
1644 /* Skip options not meant for mdrun */
1645 if (!is_main_switch(opt))
1648 if ( is_bench_file(opt, opt2bSet(opt,nfile,fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1650 /* All options starting with -b* need the 'b' removed,
1651 * therefore overwrite strbuf */
1652 if (0 == strncmp(opt, "-b", 2))
1653 sprintf(strbuf, "-%s %s ", &opt[2], name);
1655 add_to_string(cmd_args_bench, strbuf);
1658 if ( is_launch_file(opt,opt2bSet(opt,nfile,fnm)) )
1659 add_to_string(cmd_args_launch, strbuf);
1663 add_to_string(cmd_args_bench , extra_args);
1664 add_to_string(cmd_args_launch, extra_args);
1665 #undef BUFLENGTH
1669 /* Set option opt */
1670 static void setopt(const char *opt,int nfile,t_filenm fnm[])
1672 int i;
1674 for(i=0; (i<nfile); i++)
1675 if (strcmp(opt,fnm[i].opt)==0)
1676 fnm[i].flag |= ffSET;
1680 static void couple_files_options(int nfile, t_filenm fnm[])
1682 int i;
1683 bool bSet,bBench;
1684 char *opt;
1685 char buf[20];
1688 for (i=0; i<nfile; i++)
1690 opt = (char *)fnm[i].opt;
1691 bSet = ((fnm[i].flag & ffSET) != 0);
1692 bBench = (0 == strncmp(opt,"-b", 2));
1694 /* Check optional files */
1695 /* If e.g. -eo is set, then -beo also needs to be set */
1696 if (is_optional(&fnm[i]) && bSet && !bBench)
1698 sprintf(buf, "-b%s", &opt[1]);
1699 setopt(buf,nfile,fnm);
1701 /* If -beo is set, then -eo also needs to be! */
1702 if (is_optional(&fnm[i]) && bSet && bBench)
1704 sprintf(buf, "-%s", &opt[2]);
1705 setopt(buf,nfile,fnm);
1711 static double gettime()
1713 #ifdef HAVE_GETTIMEOFDAY
1714 struct timeval t;
1715 struct timezone tz = { 0,0 };
1716 double seconds;
1718 gettimeofday(&t,&tz);
1720 seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
1722 return seconds;
1723 #else
1724 double seconds;
1726 seconds = time(NULL);
1728 return seconds;
1729 #endif
1733 #define BENCHSTEPS (1000)
1735 int gmx_tune_pme(int argc,char *argv[])
1737 const char *desc[] = {
1738 "For a given number [TT]-np[tt] or [TT]-nt[tt] of processors/threads, this program systematically",
1739 "times mdrun with various numbers of PME-only nodes and determines",
1740 "which setting is fastest. It will also test whether performance can",
1741 "be enhanced by shifting load from the reciprocal to the real space",
1742 "part of the Ewald sum. ",
1743 "Simply pass your [TT].tpr[tt] file to g_tune_pme together with other options",
1744 "for mdrun as needed.[PAR]",
1745 "Which executables are used can be set in the environment variables",
1746 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
1747 "will be used as defaults. Note that for certain MPI frameworks you",
1748 "need to provide a machine- or hostfile. This can also be passed",
1749 "via the MPIRUN variable, e.g.",
1750 "'export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"'[PAR]",
1751 "Please call g_tune_pme with the normal options you would pass to",
1752 "mdrun and add [TT]-np[tt] for the number of processors to perform the",
1753 "tests on, or [TT]-nt[tt] for the number of threads. You can also add [TT]-r[tt]",
1754 "to repeat each test several times to get better statistics. [PAR]",
1755 "g_tune_pme can test various real space / reciprocal space workloads",
1756 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
1757 "written with enlarged cutoffs and smaller fourier grids respectively.",
1758 "Typically, the first test (no. 0) will be with the settings from the input",
1759 "[TT].tpr[tt] file; the last test (no. [TT]ntpr[tt]) will have cutoffs multiplied",
1760 "by (and at the same time fourier grid dimensions divided by) the scaling",
1761 "factor [TT]-fac[tt] (default 1.2). The remaining [TT].tpr[tt] files will have equally",
1762 "spaced values inbetween these extremes. Note that you can set [TT]-ntpr[tt] to 1",
1763 "if you just want to find the optimal number of PME-only nodes; in that case",
1764 "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
1765 "For the benchmark runs, the default of 1000 time steps should suffice for most",
1766 "MD systems. The dynamic load balancing needs about 100 time steps",
1767 "to adapt to local load imbalances, therefore the time step counters",
1768 "are by default reset after 100 steps. For large systems",
1769 "(>1M atoms) you may have to set [TT]-resetstep[tt] to a higher value.",
1770 "From the 'DD' load imbalance entries in the md.log output file you",
1771 "can tell after how many steps the load is sufficiently balanced.[PAR]"
1772 "Example call: [TT]g_tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
1773 "After calling mdrun several times, detailed performance information",
1774 "is available in the output file perf.out. ",
1775 "Note that during the benchmarks a couple of temporary files are written",
1776 "(options -b*), these will be automatically deleted after each test.[PAR]",
1777 "If you want the simulation to be started automatically with the",
1778 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
1781 int nnodes =1;
1782 int repeats=2;
1783 int pmeentries=0; /* How many values for -npme do we actually test for each tpr file */
1784 real maxPMEfraction=0.50;
1785 real minPMEfraction=0.25;
1786 int maxPMEnodes, minPMEnodes;
1787 real downfac=1.0,upfac=1.2;
1788 int ntprs=0;
1789 real fs=0.0; /* 0 indicates: not set by the user */
1790 gmx_large_int_t bench_nsteps=BENCHSTEPS;
1791 gmx_large_int_t new_sim_nsteps=-1; /* -1 indicates: not set by the user */
1792 gmx_large_int_t cpt_steps=0; /* Step counter in .cpt input file */
1793 int presteps=100; /* Do a full cycle reset after presteps steps */
1794 bool bOverwrite=FALSE, bKeepTPR;
1795 bool bLaunch=FALSE;
1796 bool bPassAll=FALSE;
1797 char *ExtraArgs=NULL;
1798 char **tpr_names=NULL;
1799 const char *simulation_tpr=NULL;
1800 int best_npme, best_tpr;
1801 int sim_part = 1; /* For benchmarks with checkpoint files */
1803 /* Default program names if nothing else is found */
1804 char *cmd_mpirun=NULL, *cmd_mdrun=NULL;
1805 char *cmd_args_bench, *cmd_args_launch;
1806 char *cmd_np=NULL;
1808 t_perf **perfdata=NULL;
1809 t_inputinfo *info;
1810 int i;
1811 FILE *fp;
1812 t_commrec *cr;
1814 /* Print out how long the tuning took */
1815 double seconds;
1817 static t_filenm fnm[] = {
1818 /* g_tune_pme */
1819 { efOUT, "-p", "perf", ffWRITE },
1820 { efLOG, "-err", "errors", ffWRITE },
1821 { efTPX, "-so", "tuned", ffWRITE },
1822 /* mdrun: */
1823 { efTPX, NULL, NULL, ffREAD },
1824 { efTRN, "-o", NULL, ffWRITE },
1825 { efXTC, "-x", NULL, ffOPTWR },
1826 { efCPT, "-cpi", NULL, ffOPTRD },
1827 { efCPT, "-cpo", NULL, ffOPTWR },
1828 { efSTO, "-c", "confout", ffWRITE },
1829 { efEDR, "-e", "ener", ffWRITE },
1830 { efLOG, "-g", "md", ffWRITE },
1831 { efXVG, "-dhdl", "dhdl", ffOPTWR },
1832 { efXVG, "-field", "field", ffOPTWR },
1833 { efXVG, "-table", "table", ffOPTRD },
1834 { efXVG, "-tablep", "tablep", ffOPTRD },
1835 { efXVG, "-tableb", "table", ffOPTRD },
1836 { efTRX, "-rerun", "rerun", ffOPTRD },
1837 { efXVG, "-tpi", "tpi", ffOPTWR },
1838 { efXVG, "-tpid", "tpidist", ffOPTWR },
1839 { efEDI, "-ei", "sam", ffOPTRD },
1840 { efEDO, "-eo", "sam", ffOPTWR },
1841 { efGCT, "-j", "wham", ffOPTRD },
1842 { efGCT, "-jo", "bam", ffOPTWR },
1843 { efXVG, "-ffout", "gct", ffOPTWR },
1844 { efXVG, "-devout", "deviatie", ffOPTWR },
1845 { efXVG, "-runav", "runaver", ffOPTWR },
1846 { efXVG, "-px", "pullx", ffOPTWR },
1847 { efXVG, "-pf", "pullf", ffOPTWR },
1848 { efMTX, "-mtx", "nm", ffOPTWR },
1849 { efNDX, "-dn", "dipole", ffOPTWR },
1850 /* Output files that are deleted after each benchmark run */
1851 { efTRN, "-bo", "bench", ffWRITE },
1852 { efXTC, "-bx", "bench", ffWRITE },
1853 { efCPT, "-bcpo", "bench", ffWRITE },
1854 { efSTO, "-bc", "bench", ffWRITE },
1855 { efEDR, "-be", "bench", ffWRITE },
1856 { efLOG, "-bg", "bench", ffWRITE },
1857 { efEDO, "-beo", "bench", ffOPTWR },
1858 { efXVG, "-bdhdl", "benchdhdl",ffOPTWR },
1859 { efXVG, "-bfield", "benchfld" ,ffOPTWR },
1860 { efXVG, "-btpi", "benchtpi", ffOPTWR },
1861 { efXVG, "-btpid", "benchtpid",ffOPTWR },
1862 { efGCT, "-bjo", "bench", ffOPTWR },
1863 { efXVG, "-bffout", "benchgct", ffOPTWR },
1864 { efXVG, "-bdevout","benchdev", ffOPTWR },
1865 { efXVG, "-brunav", "benchrnav",ffOPTWR },
1866 { efXVG, "-bpx", "benchpx", ffOPTWR },
1867 { efXVG, "-bpf", "benchpf", ffOPTWR },
1868 { efMTX, "-bmtx", "benchn", ffOPTWR },
1869 { efNDX, "-bdn", "bench", ffOPTWR }
1872 /* Command line options of mdrun */
1873 bool bDDBondCheck = TRUE;
1874 bool bDDBondComm = TRUE;
1875 bool bVerbose = FALSE;
1876 bool bCompact = TRUE;
1877 bool bSepPot = FALSE;
1878 bool bRerunVSite = FALSE;
1879 bool bIonize = FALSE;
1880 bool bConfout = TRUE;
1881 bool bReproducible = FALSE;
1882 bool bThreads = FALSE;
1884 int nmultisim=0;
1885 int nstglobalcomm=-1;
1886 int repl_ex_nst=0;
1887 int repl_ex_seed=-1;
1888 int nstepout=100;
1889 int nthreads=1;
1891 const char *ddno_opt[ddnoNR+1] =
1892 { NULL, "interleave", "pp_pme", "cartesian", NULL };
1893 const char *dddlb_opt[] =
1894 { NULL, "auto", "no", "yes", NULL };
1895 const char *procstring[] =
1896 { NULL, "-np", "-n", "none", NULL };
1897 const char *npmevalues_opt[] =
1898 { NULL, "auto", "all", "subset", NULL };
1899 real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
1900 char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
1901 char *deffnm=NULL;
1902 #define STD_CPT_PERIOD (15.0)
1903 real cpt_period=STD_CPT_PERIOD,max_hours=-1;
1904 bool bAppendFiles=TRUE;
1905 bool bResetCountersHalfWay=FALSE;
1906 output_env_t oenv=NULL;
1908 t_pargs pa[] = {
1909 /***********************/
1910 /* g_tune_pme options: */
1911 /***********************/
1912 { "-np", FALSE, etINT, {&nnodes},
1913 "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
1914 { "-npstring", FALSE, etENUM, {procstring},
1915 "Specify the number of processors to $MPIRUN using this string"},
1916 { "-passall", FALSE, etBOOL, {&bPassAll},
1917 "HIDDENPut arguments unknown to mdrun at the end of the command line. Can e.g. be used for debugging purposes. "},
1918 { "-nt", FALSE, etINT, {&nthreads},
1919 "Number of threads to run the tests on (turns MPI & mpirun off)"},
1920 { "-r", FALSE, etINT, {&repeats},
1921 "Repeat each test this often" },
1922 { "-max", FALSE, etREAL, {&maxPMEfraction},
1923 "Max fraction of PME nodes to test with" },
1924 { "-min", FALSE, etREAL, {&minPMEfraction},
1925 "Min fraction of PME nodes to test with" },
1926 { "-npme", FALSE, etENUM, {npmevalues_opt},
1927 "Benchmark all possible values for -npme or just the subset that is expected to perform well"},
1928 { "-upfac", FALSE, etREAL, {&upfac},
1929 "Upper limit for rcoulomb scaling factor (Note that rcoulomb upscaling results in fourier grid downscaling)" },
1930 { "-downfac", FALSE, etREAL, {&downfac},
1931 "Lower limit for rcoulomb scaling factor" },
1932 { "-ntpr", FALSE, etINT, {&ntprs},
1933 "Number of tpr files to benchmark. Create these many files with scaling factors ranging from 1.0 to fac. If < 1, automatically choose the number of tpr files to test" },
1934 { "-four", FALSE, etREAL, {&fs},
1935 "Use this fourierspacing value instead of the grid found in the tpr input file. (Spacing applies to a scaling factor of 1.0 if multiple tpr files are written)" },
1936 { "-steps", FALSE, etGMX_LARGE_INT, {&bench_nsteps},
1937 "Take timings for these many steps in the benchmark runs" },
1938 { "-resetstep",FALSE, etINT, {&presteps},
1939 "Let dlb equilibrate these many steps before timings are taken (reset cycle counters after these many steps)" },
1940 { "-simsteps", FALSE, etGMX_LARGE_INT, {&new_sim_nsteps},
1941 "If non-negative, perform these many steps in the real run (overwrite nsteps from tpr, add cpt steps)" },
1942 { "-launch", FALSE, etBOOL, {&bLaunch},
1943 "Lauch the real simulation after optimization" },
1944 /******************/
1945 /* mdrun options: */
1946 /******************/
1947 { "-deffnm", FALSE, etSTR, {&deffnm},
1948 "Set the default filename for all file options at launch time" },
1949 { "-ddorder", FALSE, etENUM, {ddno_opt},
1950 "DD node order" },
1951 { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
1952 "Check for all bonded interactions with DD" },
1953 { "-ddbondcomm",FALSE, etBOOL, {&bDDBondComm},
1954 "HIDDENUse special bonded atom communication when -rdd > cut-off" },
1955 { "-rdd", FALSE, etREAL, {&rdd},
1956 "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
1957 { "-rcon", FALSE, etREAL, {&rconstr},
1958 "Maximum distance for P-LINCS (nm), 0 is estimate" },
1959 { "-dlb", FALSE, etENUM, {dddlb_opt},
1960 "Dynamic load balancing (with DD)" },
1961 { "-dds", FALSE, etREAL, {&dlb_scale},
1962 "Minimum allowed dlb scaling of the DD cell size" },
1963 { "-ddcsx", FALSE, etSTR, {&ddcsx},
1964 "HIDDENThe DD cell sizes in x" },
1965 { "-ddcsy", FALSE, etSTR, {&ddcsy},
1966 "HIDDENThe DD cell sizes in y" },
1967 { "-ddcsz", FALSE, etSTR, {&ddcsz},
1968 "HIDDENThe DD cell sizes in z" },
1969 { "-gcom", FALSE, etINT, {&nstglobalcomm},
1970 "Global communication frequency" },
1971 { "-v", FALSE, etBOOL, {&bVerbose},
1972 "Be loud and noisy" },
1973 { "-compact", FALSE, etBOOL, {&bCompact},
1974 "Write a compact log file" },
1975 { "-seppot", FALSE, etBOOL, {&bSepPot},
1976 "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
1977 { "-pforce", FALSE, etREAL, {&pforce},
1978 "Print all forces larger than this (kJ/mol nm)" },
1979 { "-reprod", FALSE, etBOOL, {&bReproducible},
1980 "Try to avoid optimizations that affect binary reproducibility" },
1981 { "-cpt", FALSE, etREAL, {&cpt_period},
1982 "Checkpoint interval (minutes)" },
1983 { "-append", FALSE, etBOOL, {&bAppendFiles},
1984 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
1985 { "-maxh", FALSE, etREAL, {&max_hours},
1986 "Terminate after 0.99 times this time (hours)" },
1987 { "-multi", FALSE, etINT, {&nmultisim},
1988 "Do multiple simulations in parallel" },
1989 { "-replex", FALSE, etINT, {&repl_ex_nst},
1990 "Attempt replica exchange every # steps" },
1991 { "-reseed", FALSE, etINT, {&repl_ex_seed},
1992 "Seed for replica exchange, -1 is generate a seed" },
1993 { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
1994 "HIDDENRecalculate virtual site coordinates with -rerun" },
1995 { "-ionize", FALSE, etBOOL, {&bIonize},
1996 "Do a simulation including the effect of an X-Ray bombardment on your system" },
1997 { "-confout", FALSE, etBOOL, {&bConfout},
1998 "HIDDENWrite the last configuration with -c and force checkpointing at the last step" },
1999 { "-stepout", FALSE, etINT, {&nstepout},
2000 "HIDDENFrequency of writing the remaining runtime" },
2001 { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
2002 "HIDDENReset the cycle counters after half the number of steps or halfway -maxh (launch only)" }
2006 #define NFILE asize(fnm)
2008 CopyRight(stderr,argv[0]);
2010 seconds = gettime();
2012 parse_common_args(&argc,argv,PCA_NOEXIT_ON_ARGS,
2013 NFILE,fnm,asize(pa),pa,asize(desc),desc,
2014 0,NULL,&oenv);
2016 /* Store the remaining unparsed command line entries in a string */
2017 snew(ExtraArgs, 1);
2018 ExtraArgs[0] = '\0';
2019 for (i=1; i<argc; i++) /* argc will now be 1 if everything was understood */
2021 add_to_string(&ExtraArgs, argv[i]);
2022 add_to_string(&ExtraArgs, " ");
2024 if ( !bPassAll && (ExtraArgs[0] != '\0') )
2026 fprintf(stderr, "\nWARNING: The following arguments you provided have no effect:\n"
2027 "%s\n"
2028 "Use the -passall option to force them to appear on the command lines\n"
2029 "for the benchmark simulations%s.\n\n",
2030 ExtraArgs, bLaunch? " and at launch time" : "");
2033 if (opt2parg_bSet("-nt",asize(pa),pa))
2035 bThreads=TRUE;
2036 if (opt2parg_bSet("-npstring",asize(pa),pa))
2037 fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2039 if (nnodes > 1)
2040 gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2041 /* and now we just set this; a bit of an ugly hack*/
2042 nnodes=nthreads;
2044 /* Automatically set -beo options if -eo is set etc. */
2045 couple_files_options(NFILE,fnm);
2047 /* Construct the command line arguments for benchmark runs
2048 * as well as for the simulation run
2050 create_command_line_snippets(bThreads,presteps,NFILE,fnm,asize(pa),pa,procstring[0],
2051 &cmd_np, &cmd_args_bench, &cmd_args_launch,
2052 bPassAll? ExtraArgs : (char *)"");
2054 /* Read in checkpoint file if requested */
2055 sim_part = 1;
2056 if(opt2bSet("-cpi",NFILE,fnm))
2058 snew(cr,1);
2059 cr->duty=DUTY_PP; /* makes the following routine happy */
2060 read_checkpoint_simulation_part(opt2fn("-cpi",NFILE,fnm),
2061 &sim_part,&cpt_steps,cr,
2062 FALSE,NULL,NULL);
2063 sfree(cr);
2064 sim_part++;
2065 /* sim_part will now be 1 if no checkpoint file was found */
2066 if (sim_part<=1)
2067 gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi",
2068 NFILE,
2069 fnm));
2072 /* Open performance output file and write header info */
2073 fp = ffopen(opt2fn("-p",NFILE,fnm),"w");
2075 /* Make a quick consistency check of command line parameters */
2076 check_input(nnodes, repeats, &ntprs, &upfac, &downfac, maxPMEfraction,
2077 minPMEfraction, fs, bench_nsteps, fnm, NFILE, sim_part, presteps,
2078 asize(pa),pa);
2080 /* Determine max and min number of PME nodes to test: */
2081 if (nnodes > 2)
2083 maxPMEnodes = floor(maxPMEfraction*nnodes);
2084 minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2085 fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2086 if (maxPMEnodes != minPMEnodes)
2087 fprintf(stdout, "- %d ", maxPMEnodes);
2088 fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2090 else
2092 maxPMEnodes = 0;
2093 minPMEnodes = 0;
2096 /* Get the commands we need to set up the runs from environment variables */
2097 get_program_paths(bThreads, &cmd_mpirun, cmd_np, &cmd_mdrun, repeats);
2099 /* Print some header info to file */
2100 sep_line(fp);
2101 fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n");
2102 sep_line(fp);
2103 fprintf(fp, "%s for Gromacs %s\n", ShortProgram(),GromacsVersion());
2104 if (!bThreads)
2106 fprintf(fp, "Number of nodes : %d\n", nnodes);
2107 fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun);
2108 if ( strcmp(procstring[0], "none") != 0)
2109 fprintf(fp, "Passing # of nodes via : %s\n", procstring[0]);
2110 else
2111 fprintf(fp, "Not setting number of nodes in system call\n");
2113 else
2114 fprintf(fp, "Number of threads : %d\n", nnodes);
2116 fprintf(fp, "The mdrun command is : %s\n", cmd_mdrun);
2117 fprintf(fp, "mdrun args benchmarks : %s\n", cmd_args_bench);
2118 fprintf(fp, "Benchmark steps : ");
2119 fprintf(fp, gmx_large_int_pfmt, bench_nsteps);
2120 fprintf(fp, "\n");
2121 fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2122 if (sim_part > 1)
2124 fprintf(fp, "Checkpoint time step : ");
2125 fprintf(fp, gmx_large_int_pfmt, cpt_steps);
2126 fprintf(fp, "\n");
2128 if (bLaunch)
2129 fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2130 if (!bPassAll && ExtraArgs[0] != '\0')
2131 fprintf(fp, "Unused arguments : %s\n", ExtraArgs);
2132 if (new_sim_nsteps >= 0)
2134 bOverwrite = TRUE;
2135 fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so",NFILE,fnm));
2136 fprintf(stderr, gmx_large_int_pfmt, new_sim_nsteps+cpt_steps);
2137 fprintf(stderr, " steps.\n");
2138 fprintf(fp, "Simulation steps : ");
2139 fprintf(fp, gmx_large_int_pfmt, new_sim_nsteps);
2140 fprintf(fp, "\n");
2142 if (repeats > 1)
2143 fprintf(fp, "Repeats for each test : %d\n", repeats);
2145 if (fs > 0.0)
2147 fprintf(fp, "Requested grid spacing : %f (tpr file will be changed accordingly)\n", fs);
2148 fprintf(fp, " This will be the grid spacing at a scaling factor of 1.0\n");
2151 fprintf(fp, "Input file : %s\n", opt2fn("-s",NFILE,fnm));
2153 /* Allocate memory for the inputinfo struct: */
2154 snew(info, 1);
2155 info->nr_inputfiles = ntprs;
2156 for (i=0; i<ntprs; i++)
2158 snew(info->r_coulomb , ntprs);
2159 snew(info->r_vdw , ntprs);
2160 snew(info->rlist , ntprs);
2161 snew(info->rlistlong , ntprs);
2162 snew(info->fourier_nx, ntprs);
2163 snew(info->fourier_ny, ntprs);
2164 snew(info->fourier_nz, ntprs);
2165 snew(info->fourier_sp, ntprs);
2167 /* Make alternative tpr files to test: */
2168 snew(tpr_names, ntprs);
2169 for (i=0; i<ntprs; i++)
2170 snew(tpr_names[i], STRLEN);
2172 make_benchmark_tprs(opt2fn("-s",NFILE,fnm), tpr_names, bench_nsteps+presteps,
2173 cpt_steps, upfac, downfac, ntprs, fs, info, fp);
2176 /********************************************************************************/
2177 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
2178 /********************************************************************************/
2179 snew(perfdata, ntprs);
2180 do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npmevalues_opt[0], perfdata, &pmeentries,
2181 repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2182 cmd_args_bench, fnm, NFILE, sim_part, presteps, cpt_steps);
2184 fprintf(fp, "\nTuning took%8.1f minutes.\n", (gettime()-seconds)/60.0);
2186 /* Analyse the results and give a suggestion for optimal settings: */
2187 bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2188 repeats, info, &best_tpr, &best_npme);
2190 /* Take the best-performing tpr file and enlarge nsteps to original value */
2191 if ( bKeepTPR && !bOverwrite && !(fs > 0.0) )
2193 simulation_tpr = opt2fn("-s",NFILE,fnm);
2195 else
2197 simulation_tpr = opt2fn("-so",NFILE,fnm);
2198 modify_PMEsettings(bOverwrite? (new_sim_nsteps+cpt_steps) :
2199 info->orig_sim_steps, tpr_names[best_tpr],
2200 simulation_tpr);
2203 /* Now start the real simulation if the user requested it ... */
2204 launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2205 cmd_args_launch, simulation_tpr, nnodes, best_npme);
2206 ffclose(fp);
2208 /* ... or simply print the performance results to screen: */
2209 if (!bLaunch)
2210 finalize(opt2fn("-p", NFILE, fnm));
2212 return 0;