Add TNG writing and reading support
[gromacs.git] / src / gromacs / gmxana / gmx_tune_pme.c
blobbda2ae89df34a73acb8e0d9f490283720ce4e7c8
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
40 #include <time.h>
41 #ifdef HAVE_SYS_TIME_H
42 #include <sys/time.h>
43 #endif
47 #include "gromacs/commandline/pargs.h"
48 #include "typedefs.h"
49 #include "smalloc.h"
50 #include "vec.h"
51 #include "copyrite.h"
52 #include "gromacs/fileio/tpxio.h"
53 #include "string2.h"
54 #include "readinp.h"
55 #include "calcgrid.h"
56 #include "checkpoint.h"
57 #include "macros.h"
58 #include "gmx_ana.h"
59 #include "names.h"
60 #include "perf_est.h"
61 #include "gromacs/timing/walltime_accounting.h"
64 /* Enum for situations that can occur during log file parsing, the
65 * corresponding string entries can be found in do_the_tests() in
66 * const char* ParseLog[] */
67 enum {
68 eParselogOK,
69 eParselogNotFound,
70 eParselogNoPerfData,
71 eParselogTerm,
72 eParselogResetProblem,
73 eParselogNoDDGrid,
74 eParselogTPXVersion,
75 eParselogNotParallel,
76 eParselogLargePrimeFactor,
77 eParselogFatal,
78 eParselogNr
82 typedef struct
84 int nPMEnodes; /* number of PME-only nodes used in this test */
85 int nx, ny, nz; /* DD grid */
86 int guessPME; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
87 double *Gcycles; /* This can contain more than one value if doing multiple tests */
88 double Gcycles_Av;
89 float *ns_per_day;
90 float ns_per_day_Av;
91 float *PME_f_load; /* PME mesh/force load average*/
92 float PME_f_load_Av; /* Average average ;) ... */
93 char *mdrun_cmd_line; /* Mdrun command line used for this test */
94 } t_perf;
97 typedef struct
99 int nr_inputfiles; /* The number of tpr and mdp input files */
100 gmx_int64_t orig_sim_steps; /* Number of steps to be done in the real simulation */
101 gmx_int64_t orig_init_step; /* Init step for the real simulation */
102 real *rcoulomb; /* The coulomb radii [0...nr_inputfiles] */
103 real *rvdw; /* The vdW radii */
104 real *rlist; /* Neighbourlist cutoff radius */
105 real *rlistlong;
106 int *nkx, *nky, *nkz;
107 real *fsx, *fsy, *fsz; /* Fourierspacing in x,y,z dimension */
108 } t_inputinfo;
111 static void sep_line(FILE *fp)
113 fprintf(fp, "\n------------------------------------------------------------\n");
117 /* Wrapper for system calls */
118 static int gmx_system_call(char *command)
120 #ifdef GMX_NO_SYSTEM
121 gmx_fatal(FARGS, "No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n", command);
122 #else
123 return ( system(command) );
124 #endif
128 /* Check if string starts with substring */
129 static gmx_bool str_starts(const char *string, const char *substring)
131 return ( strncmp(string, substring, strlen(substring)) == 0);
135 static void cleandata(t_perf *perfdata, int test_nr)
137 perfdata->Gcycles[test_nr] = 0.0;
138 perfdata->ns_per_day[test_nr] = 0.0;
139 perfdata->PME_f_load[test_nr] = 0.0;
141 return;
145 static gmx_bool is_equal(real a, real b)
147 real diff, eps = 1.0e-7;
150 diff = a - b;
152 if (diff < 0.0)
154 diff = -diff;
157 if (diff < eps)
159 return TRUE;
161 else
163 return FALSE;
168 static void finalize(const char *fn_out)
170 char buf[STRLEN];
171 FILE *fp;
174 fp = fopen(fn_out, "r");
175 fprintf(stdout, "\n\n");
177 while (fgets(buf, STRLEN-1, fp) != NULL)
179 fprintf(stdout, "%s", buf);
181 fclose(fp);
182 fprintf(stdout, "\n\n");
186 enum {
187 eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr
190 static int parse_logfile(const char *logfile, const char *errfile,
191 t_perf *perfdata, int test_nr, int presteps, gmx_int64_t cpt_steps,
192 int nnodes)
194 FILE *fp;
195 char line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
196 const char matchstrdd[] = "Domain decomposition grid";
197 const char matchstrcr[] = "resetting all time and cycle counters";
198 const char matchstrbal[] = "Average PME mesh/force load:";
199 const char matchstring[] = "R E A L C Y C L E A N D T I M E A C C O U N T I N G";
200 const char errSIG[] = "signal, stopping at the next";
201 int iFound;
202 int procs;
203 float dum1, dum2, dum3, dum4;
204 int ndum;
205 int npme;
206 gmx_int64_t resetsteps = -1;
207 gmx_bool bFoundResetStr = FALSE;
208 gmx_bool bResetChecked = FALSE;
211 if (!gmx_fexist(logfile))
213 fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
214 cleandata(perfdata, test_nr);
215 return eParselogNotFound;
218 fp = fopen(logfile, "r");
219 perfdata->PME_f_load[test_nr] = -1.0;
220 perfdata->guessPME = -1;
222 iFound = eFoundNothing;
223 if (1 == nnodes)
225 iFound = eFoundDDStr; /* Skip some case statements */
228 while (fgets(line, STRLEN, fp) != NULL)
230 /* Remove leading spaces */
231 ltrim(line);
233 /* Check for TERM and INT signals from user: */
234 if (strstr(line, errSIG) != NULL)
236 fclose(fp);
237 cleandata(perfdata, test_nr);
238 return eParselogTerm;
241 /* Check whether cycle resetting worked */
242 if (presteps > 0 && !bFoundResetStr)
244 if (strstr(line, matchstrcr) != NULL)
246 sprintf(dumstring, "step %s", "%"GMX_SCNd64);
247 sscanf(line, dumstring, &resetsteps);
248 bFoundResetStr = TRUE;
249 if (resetsteps == presteps+cpt_steps)
251 bResetChecked = TRUE;
253 else
255 sprintf(dumstring, "%"GMX_PRId64, resetsteps);
256 sprintf(dumstring2, "%"GMX_PRId64, presteps+cpt_steps);
257 fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
258 " though they were supposed to be reset at step %s!\n",
259 dumstring, dumstring2);
264 /* Look for strings that appear in a certain order in the log file: */
265 switch (iFound)
267 case eFoundNothing:
268 /* Look for domain decomp grid and separate PME nodes: */
269 if (str_starts(line, matchstrdd))
271 sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
272 &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
273 if (perfdata->nPMEnodes == -1)
275 perfdata->guessPME = npme;
277 else if (perfdata->nPMEnodes != npme)
279 gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
281 iFound = eFoundDDStr;
283 /* Catch a few errors that might have occured: */
284 else if (str_starts(line, "There is no domain decomposition for"))
286 fclose(fp);
287 return eParselogNoDDGrid;
289 else if (str_starts(line, "The number of nodes you selected"))
291 fclose(fp);
292 return eParselogLargePrimeFactor;
294 else if (str_starts(line, "reading tpx file"))
296 fclose(fp);
297 return eParselogTPXVersion;
299 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
301 fclose(fp);
302 return eParselogNotParallel;
304 break;
305 case eFoundDDStr:
306 /* Look for PME mesh/force balance (not necessarily present, though) */
307 if (str_starts(line, matchstrbal))
309 sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
311 /* Look for matchstring */
312 if (str_starts(line, matchstring))
314 iFound = eFoundAccountingStr;
316 break;
317 case eFoundAccountingStr:
318 /* Already found matchstring - look for cycle data */
319 if (str_starts(line, "Total "))
321 sscanf(line, "Total %d %lf", &procs, &(perfdata->Gcycles[test_nr]));
322 iFound = eFoundCycleStr;
324 break;
325 case eFoundCycleStr:
326 /* Already found cycle data - look for remaining performance info and return */
327 if (str_starts(line, "Performance:"))
329 ndum = sscanf(line, "%s %f %f %f %f", dumstring, &dum1, &dum2, &dum3, &dum4);
330 /* (ns/day) is the second last entry, depending on whether GMX_DETAILED_PERF_STATS was set in print_perf(), nrnb.c */
331 perfdata->ns_per_day[test_nr] = (ndum == 5) ? dum3 : dum1;
332 fclose(fp);
333 if (bResetChecked || presteps == 0)
335 return eParselogOK;
337 else
339 return eParselogResetProblem;
342 break;
344 } /* while */
346 /* Close the log file */
347 fclose(fp);
349 /* Check why there is no performance data in the log file.
350 * Did a fatal errors occur? */
351 if (gmx_fexist(errfile))
353 fp = fopen(errfile, "r");
354 while (fgets(line, STRLEN, fp) != NULL)
356 if (str_starts(line, "Fatal error:") )
358 if (fgets(line, STRLEN, fp) != NULL)
360 fprintf(stderr, "\nWARNING: An error occured during this benchmark:\n"
361 "%s\n", line);
363 fclose(fp);
364 cleandata(perfdata, test_nr);
365 return eParselogFatal;
368 fclose(fp);
370 else
372 fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
375 /* Giving up ... we could not find out why there is no performance data in
376 * the log file. */
377 fprintf(stdout, "No performance data in log file.\n");
378 cleandata(perfdata, test_nr);
380 return eParselogNoPerfData;
384 static gmx_bool analyze_data(
385 FILE *fp,
386 const char *fn,
387 t_perf **perfdata,
388 int nnodes,
389 int ntprs,
390 int ntests,
391 int nrepeats,
392 t_inputinfo *info,
393 int *index_tpr, /* OUT: Nr of mdp file with best settings */
394 int *npme_optimal) /* OUT: Optimal number of PME nodes */
396 int i, j, k;
397 int line = 0, line_win = -1;
398 int k_win = -1, i_win = -1, winPME;
399 double s = 0.0; /* standard deviation */
400 t_perf *pd;
401 char strbuf[STRLEN];
402 char str_PME_f_load[13];
403 gmx_bool bCanUseOrigTPR;
404 gmx_bool bRefinedCoul, bRefinedVdW, bRefinedGrid;
407 if (nrepeats > 1)
409 sep_line(fp);
410 fprintf(fp, "Summary of successful runs:\n");
411 fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
412 if (nnodes > 1)
414 fprintf(fp, " DD grid");
416 fprintf(fp, "\n");
420 for (k = 0; k < ntprs; k++)
422 for (i = 0; i < ntests; i++)
424 /* Select the right dataset: */
425 pd = &(perfdata[k][i]);
427 pd->Gcycles_Av = 0.0;
428 pd->PME_f_load_Av = 0.0;
429 pd->ns_per_day_Av = 0.0;
431 if (pd->nPMEnodes == -1)
433 sprintf(strbuf, "(%3d)", pd->guessPME);
435 else
437 sprintf(strbuf, " ");
440 /* Get the average run time of a setting */
441 for (j = 0; j < nrepeats; j++)
443 pd->Gcycles_Av += pd->Gcycles[j];
444 pd->PME_f_load_Av += pd->PME_f_load[j];
446 pd->Gcycles_Av /= nrepeats;
447 pd->PME_f_load_Av /= nrepeats;
449 for (j = 0; j < nrepeats; j++)
451 if (pd->ns_per_day[j] > 0.0)
453 pd->ns_per_day_Av += pd->ns_per_day[j];
455 else
457 /* Somehow the performance number was not aquired for this run,
458 * therefor set the average to some negative value: */
459 pd->ns_per_day_Av = -1.0f*nrepeats;
460 break;
463 pd->ns_per_day_Av /= nrepeats;
465 /* Nicer output: */
466 if (pd->PME_f_load_Av > 0.0)
468 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
470 else
472 sprintf(str_PME_f_load, "%s", " - ");
476 /* We assume we had a successful run if both averages are positive */
477 if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
479 /* Output statistics if repeats were done */
480 if (nrepeats > 1)
482 /* Calculate the standard deviation */
483 s = 0.0;
484 for (j = 0; j < nrepeats; j++)
486 s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
488 s /= (nrepeats - 1);
489 s = sqrt(s);
491 fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
492 line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
493 pd->ns_per_day_Av, str_PME_f_load);
494 if (nnodes > 1)
496 fprintf(fp, " %3d %3d %3d", pd->nx, pd->ny, pd->nz);
498 fprintf(fp, "\n");
500 /* Store the index of the best run found so far in 'winner': */
501 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
503 k_win = k;
504 i_win = i;
505 line_win = line;
507 line++;
512 if (k_win == -1)
514 gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
517 sep_line(fp);
519 winPME = perfdata[k_win][i_win].nPMEnodes;
521 if (1 == ntests)
523 /* We stuck to a fixed number of PME-only nodes */
524 sprintf(strbuf, "settings No. %d", k_win);
526 else
528 /* We have optimized the number of PME-only nodes */
529 if (winPME == -1)
531 sprintf(strbuf, "%s", "the automatic number of PME nodes");
533 else
535 sprintf(strbuf, "%d PME nodes", winPME);
538 fprintf(fp, "Best performance was achieved with %s", strbuf);
539 if ((nrepeats > 1) && (ntests > 1))
541 fprintf(fp, " (see line %d)", line_win);
543 fprintf(fp, "\n");
545 /* Only mention settings if they were modified: */
546 bRefinedCoul = !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]);
547 bRefinedVdW = !is_equal(info->rvdw[k_win], info->rvdw[0] );
548 bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] &&
549 info->nky[k_win] == info->nky[0] &&
550 info->nkz[k_win] == info->nkz[0]);
552 if (bRefinedCoul || bRefinedVdW || bRefinedGrid)
554 fprintf(fp, "Optimized PME settings:\n");
555 bCanUseOrigTPR = FALSE;
557 else
559 bCanUseOrigTPR = TRUE;
562 if (bRefinedCoul)
564 fprintf(fp, " New Coulomb radius: %f nm (was %f nm)\n", info->rcoulomb[k_win], info->rcoulomb[0]);
567 if (bRefinedVdW)
569 fprintf(fp, " New Van der Waals radius: %f nm (was %f nm)\n", info->rvdw[k_win], info->rvdw[0]);
572 if (bRefinedGrid)
574 fprintf(fp, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n", info->nkx[k_win], info->nky[k_win], info->nkz[k_win],
575 info->nkx[0], info->nky[0], info->nkz[0]);
578 if (bCanUseOrigTPR && ntprs > 1)
580 fprintf(fp, "and original PME settings.\n");
583 fflush(fp);
585 /* Return the index of the mdp file that showed the highest performance
586 * and the optimal number of PME nodes */
587 *index_tpr = k_win;
588 *npme_optimal = winPME;
590 return bCanUseOrigTPR;
594 /* Get the commands we need to set up the runs from environment variables */
595 static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char *cmd_mdrun[])
597 char *cp;
598 FILE *fp;
599 const char def_mpirun[] = "mpirun";
600 const char def_mdrun[] = "mdrun";
602 const char empty_mpirun[] = "";
604 /* Get the commands we need to set up the runs from environment variables */
605 if (!bThreads)
607 if ( (cp = getenv("MPIRUN")) != NULL)
609 *cmd_mpirun = strdup(cp);
611 else
613 *cmd_mpirun = strdup(def_mpirun);
616 else
618 *cmd_mpirun = strdup(empty_mpirun);
621 if ( (cp = getenv("MDRUN" )) != NULL)
623 *cmd_mdrun = strdup(cp);
625 else
627 *cmd_mdrun = strdup(def_mdrun);
631 /* Check that the commands will run mdrun (perhaps via mpirun) by
632 * running a very quick test simulation. Requires MPI environment to
633 * be available if applicable. */
634 static void check_mdrun_works(gmx_bool bThreads,
635 const char *cmd_mpirun,
636 const char *cmd_np,
637 const char *cmd_mdrun)
639 char *command = NULL;
640 char *cp;
641 char line[STRLEN];
642 FILE *fp;
643 const char filename[] = "benchtest.log";
645 /* This string should always be identical to the one in copyrite.c,
646 * gmx_print_version_info() in the defined(GMX_MPI) section */
647 const char match_mpi[] = "MPI library: MPI";
648 const char match_mdrun[] = "Program: ";
649 gmx_bool bMdrun = FALSE;
650 gmx_bool bMPI = FALSE;
652 /* Run a small test to see whether mpirun + mdrun work */
653 fprintf(stdout, "Making sure that mdrun can be executed. ");
654 if (bThreads)
656 snew(command, strlen(cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
657 sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", cmd_mdrun, cmd_np, filename);
659 else
661 snew(command, strlen(cmd_mpirun) + strlen(cmd_np) + strlen(cmd_mdrun) + strlen(filename) + 50);
662 sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", cmd_mpirun, cmd_np, cmd_mdrun, filename);
664 fprintf(stdout, "Trying '%s' ... ", command);
665 make_backup(filename);
666 gmx_system_call(command);
668 /* Check if we find the characteristic string in the output: */
669 if (!gmx_fexist(filename))
671 gmx_fatal(FARGS, "Output from test run could not be found.");
674 fp = fopen(filename, "r");
675 /* We need to scan the whole output file, since sometimes the queuing system
676 * also writes stuff to stdout/err */
677 while (!feof(fp) )
679 cp = fgets(line, STRLEN, fp);
680 if (cp != NULL)
682 if (str_starts(line, match_mdrun) )
684 bMdrun = TRUE;
686 if (str_starts(line, match_mpi) )
688 bMPI = TRUE;
692 fclose(fp);
694 if (bThreads)
696 if (bMPI)
698 gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
699 "(%s)\n"
700 "seems to have been compiled with MPI instead.",
701 *cmd_mdrun);
704 else
706 if (bMdrun && !bMPI)
708 gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
709 "(%s)\n"
710 "seems to have been compiled without MPI support.",
711 *cmd_mdrun);
715 if (!bMdrun)
717 gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
718 filename);
721 fprintf(stdout, "passed.\n");
723 /* Clean up ... */
724 remove(filename);
725 sfree(command);
729 static void launch_simulation(
730 gmx_bool bLaunch, /* Should the simulation be launched? */
731 FILE *fp, /* General log file */
732 gmx_bool bThreads, /* whether to use threads */
733 char *cmd_mpirun, /* Command for mpirun */
734 char *cmd_np, /* Switch for -np or -ntmpi or empty */
735 char *cmd_mdrun, /* Command for mdrun */
736 char *args_for_mdrun, /* Arguments for mdrun */
737 const char *simulation_tpr, /* This tpr will be simulated */
738 int nPMEnodes) /* Number of PME nodes to use */
740 char *command;
743 /* Make enough space for the system call command,
744 * (100 extra chars for -npme ... etc. options should suffice): */
745 snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
747 /* Note that the -passall options requires args_for_mdrun to be at the end
748 * of the command line string */
749 if (bThreads)
751 sprintf(command, "%s%s-npme %d -s %s %s",
752 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
754 else
756 sprintf(command, "%s%s%s -npme %d -s %s %s",
757 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
760 fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch ? "Using" : "Please use", command);
761 sep_line(fp);
762 fflush(fp);
764 /* Now the real thing! */
765 if (bLaunch)
767 fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
768 sep_line(stdout);
769 fflush(stdout);
770 gmx_system_call(command);
775 static void modify_PMEsettings(
776 gmx_int64_t simsteps, /* Set this value as number of time steps */
777 gmx_int64_t init_step, /* Set this value as init_step */
778 const char *fn_best_tpr, /* tpr file with the best performance */
779 const char *fn_sim_tpr) /* name of tpr file to be launched */
781 t_inputrec *ir;
782 t_state state;
783 gmx_mtop_t mtop;
784 char buf[200];
786 snew(ir, 1);
787 read_tpx_state(fn_best_tpr, ir, &state, NULL, &mtop);
789 /* Reset nsteps and init_step to the value of the input .tpr file */
790 ir->nsteps = simsteps;
791 ir->init_step = init_step;
793 /* Write the tpr file which will be launched */
794 sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, "%"GMX_PRId64);
795 fprintf(stdout, buf, ir->nsteps);
796 fflush(stdout);
797 write_tpx_state(fn_sim_tpr, ir, &state, &mtop);
799 sfree(ir);
803 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
805 /* Make additional TPR files with more computational load for the
806 * direct space processors: */
807 static void make_benchmark_tprs(
808 const char *fn_sim_tpr, /* READ : User-provided tpr file */
809 char *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files */
810 gmx_int64_t benchsteps, /* Number of time steps for benchmark runs */
811 gmx_int64_t statesteps, /* Step counter in checkpoint file */
812 real rmin, /* Minimal Coulomb radius */
813 real rmax, /* Maximal Coulomb radius */
814 real bScaleRvdw, /* Scale rvdw along with rcoulomb */
815 int *ntprs, /* No. of TPRs to write, each with a different
816 rcoulomb and fourierspacing */
817 t_inputinfo *info, /* Contains information about mdp file options */
818 FILE *fp) /* Write the output here */
820 int i, j, d;
821 t_inputrec *ir;
822 t_state state;
823 gmx_mtop_t mtop;
824 real nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials */
825 char buf[200];
826 rvec box_size;
827 gmx_bool bNote = FALSE;
828 real add; /* Add this to rcoul for the next test */
829 real fac = 1.0; /* Scaling factor for Coulomb radius */
830 real fourierspacing; /* Basic fourierspacing from tpr */
833 sprintf(buf, "Making benchmark tpr file%s with %s time step%s",
834 *ntprs > 1 ? "s" : "", "%"GMX_PRId64, benchsteps > 1 ? "s" : "");
835 fprintf(stdout, buf, benchsteps);
836 if (statesteps > 0)
838 sprintf(buf, " (adding %s steps from checkpoint file)", "%"GMX_PRId64);
839 fprintf(stdout, buf, statesteps);
840 benchsteps += statesteps;
842 fprintf(stdout, ".\n");
845 snew(ir, 1);
846 read_tpx_state(fn_sim_tpr, ir, &state, NULL, &mtop);
848 /* Check if some kind of PME was chosen */
849 if (EEL_PME(ir->coulombtype) == FALSE)
851 gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
852 EELTYPE(eelPME));
855 /* Check if rcoulomb == rlist, which is necessary for plain PME. */
856 if ( (ir->cutoff_scheme != ecutsVERLET) &&
857 (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist))
859 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
860 EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
862 /* For other PME types, rcoulomb is allowed to be smaller than rlist */
863 else if (ir->rcoulomb > ir->rlist)
865 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
866 EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
869 if (bScaleRvdw && ir->rvdw != ir->rcoulomb)
871 fprintf(stdout, "NOTE: input rvdw != rcoulomb, will not scale rvdw\n");
872 bScaleRvdw = FALSE;
875 /* Reduce the number of steps for the benchmarks */
876 info->orig_sim_steps = ir->nsteps;
877 ir->nsteps = benchsteps;
878 /* We must not use init_step from the input tpr file for the benchmarks */
879 info->orig_init_step = ir->init_step;
880 ir->init_step = 0;
882 /* For PME-switch potentials, keep the radial distance of the buffer region */
883 nlist_buffer = ir->rlist - ir->rcoulomb;
885 /* Determine length of triclinic box vectors */
886 for (d = 0; d < DIM; d++)
888 box_size[d] = 0;
889 for (i = 0; i < DIM; i++)
891 box_size[d] += state.box[d][i]*state.box[d][i];
893 box_size[d] = sqrt(box_size[d]);
896 if (ir->fourier_spacing > 0)
898 info->fsx[0] = ir->fourier_spacing;
899 info->fsy[0] = ir->fourier_spacing;
900 info->fsz[0] = ir->fourier_spacing;
902 else
904 /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
905 info->fsx[0] = box_size[XX]/ir->nkx;
906 info->fsy[0] = box_size[YY]/ir->nky;
907 info->fsz[0] = box_size[ZZ]/ir->nkz;
910 /* If no value for the fourierspacing was provided on the command line, we
911 * use the reconstruction from the tpr file */
912 if (ir->fourier_spacing > 0)
914 /* Use the spacing from the tpr */
915 fourierspacing = ir->fourier_spacing;
917 else
919 /* Use the maximum observed spacing */
920 fourierspacing = max(max(info->fsx[0], info->fsy[0]), info->fsz[0]);
923 fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing);
925 /* For performance comparisons the number of particles is useful to have */
926 fprintf(fp, " Number of particles : %d\n", mtop.natoms);
928 /* Print information about settings of which some are potentially modified: */
929 fprintf(fp, " Coulomb type : %s\n", EELTYPE(ir->coulombtype));
930 fprintf(fp, " Grid spacing x y z : %f %f %f\n",
931 box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz);
932 fprintf(fp, " Van der Waals type : %s\n", EVDWTYPE(ir->vdwtype));
933 if (EVDW_SWITCHED(ir->vdwtype))
935 fprintf(fp, " rvdw_switch : %f nm\n", ir->rvdw_switch);
937 if (EPME_SWITCHED(ir->coulombtype))
939 fprintf(fp, " rlist : %f nm\n", ir->rlist);
941 if (ir->rlistlong != max_cutoff(ir->rvdw, ir->rcoulomb))
943 fprintf(fp, " rlistlong : %f nm\n", ir->rlistlong);
946 /* Print a descriptive line about the tpr settings tested */
947 fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
948 fprintf(fp, " No. scaling rcoulomb");
949 fprintf(fp, " nkx nky nkz");
950 fprintf(fp, " spacing");
951 if (evdwCUT == ir->vdwtype)
953 fprintf(fp, " rvdw");
955 if (EPME_SWITCHED(ir->coulombtype))
957 fprintf(fp, " rlist");
959 if (ir->rlistlong != max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb)) )
961 fprintf(fp, " rlistlong");
963 fprintf(fp, " tpr file\n");
965 /* Loop to create the requested number of tpr input files */
966 for (j = 0; j < *ntprs; j++)
968 /* The first .tpr is the provided one, just need to modify nsteps,
969 * so skip the following block */
970 if (j != 0)
972 /* Determine which Coulomb radii rc to use in the benchmarks */
973 add = (rmax-rmin)/(*ntprs-1);
974 if (is_equal(rmin, info->rcoulomb[0]))
976 ir->rcoulomb = rmin + j*add;
978 else if (is_equal(rmax, info->rcoulomb[0]))
980 ir->rcoulomb = rmin + (j-1)*add;
982 else
984 /* rmin != rcoul != rmax, ergo test between rmin and rmax */
985 add = (rmax-rmin)/(*ntprs-2);
986 ir->rcoulomb = rmin + (j-1)*add;
989 /* Determine the scaling factor fac */
990 fac = ir->rcoulomb/info->rcoulomb[0];
992 /* Scale the Fourier grid spacing */
993 ir->nkx = ir->nky = ir->nkz = 0;
994 calc_grid(NULL, state.box, fourierspacing*fac, &ir->nkx, &ir->nky, &ir->nkz);
996 /* Adjust other radii since various conditions neet to be fulfilled */
997 if (eelPME == ir->coulombtype)
999 /* plain PME, rcoulomb must be equal to rlist */
1000 ir->rlist = ir->rcoulomb;
1002 else
1004 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
1005 ir->rlist = ir->rcoulomb + nlist_buffer;
1008 if (bScaleRvdw && evdwCUT == ir->vdwtype)
1010 /* For vdw cutoff, rvdw >= rlist */
1011 ir->rvdw = max(info->rvdw[0], ir->rlist);
1014 ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
1016 } /* end of "if (j != 0)" */
1018 /* for j==0: Save the original settings
1019 * for j >0: Save modified radii and Fourier grids */
1020 info->rcoulomb[j] = ir->rcoulomb;
1021 info->rvdw[j] = ir->rvdw;
1022 info->nkx[j] = ir->nkx;
1023 info->nky[j] = ir->nky;
1024 info->nkz[j] = ir->nkz;
1025 info->rlist[j] = ir->rlist;
1026 info->rlistlong[j] = ir->rlistlong;
1027 info->fsx[j] = fac*fourierspacing;
1028 info->fsy[j] = fac*fourierspacing;
1029 info->fsz[j] = fac*fourierspacing;
1031 /* Write the benchmark tpr file */
1032 strncpy(fn_bench_tprs[j], fn_sim_tpr, strlen(fn_sim_tpr)-strlen(".tpr"));
1033 sprintf(buf, "_bench%.2d.tpr", j);
1034 strcat(fn_bench_tprs[j], buf);
1035 fprintf(stdout, "Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
1036 fprintf(stdout, "%"GMX_PRId64, ir->nsteps);
1037 if (j > 0)
1039 fprintf(stdout, ", scaling factor %f\n", fac);
1041 else
1043 fprintf(stdout, ", unmodified settings\n");
1046 write_tpx_state(fn_bench_tprs[j], ir, &state, &mtop);
1048 /* Write information about modified tpr settings to log file */
1049 fprintf(fp, "%4d%10f%10f", j, fac, ir->rcoulomb);
1050 fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
1051 fprintf(fp, " %9f ", info->fsx[j]);
1052 if (evdwCUT == ir->vdwtype)
1054 fprintf(fp, "%10f", ir->rvdw);
1056 if (EPME_SWITCHED(ir->coulombtype))
1058 fprintf(fp, "%10f", ir->rlist);
1060 if (info->rlistlong[0] != max_cutoff(info->rlist[0], max_cutoff(info->rvdw[0], info->rcoulomb[0])) )
1062 fprintf(fp, "%10f", ir->rlistlong);
1064 fprintf(fp, " %-14s\n", fn_bench_tprs[j]);
1066 /* Make it clear to the user that some additional settings were modified */
1067 if (!is_equal(ir->rvdw, info->rvdw[0])
1068 || !is_equal(ir->rlistlong, info->rlistlong[0]) )
1070 bNote = TRUE;
1073 if (bNote)
1075 fprintf(fp, "\nNote that in addition to the Coulomb radius and the Fourier grid\n"
1076 "other input settings were also changed (see table above).\n"
1077 "Please check if the modified settings are appropriate.\n");
1079 fflush(stdout);
1080 fflush(fp);
1081 sfree(ir);
1085 /* Rename the files we want to keep to some meaningful filename and
1086 * delete the rest */
1087 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
1088 int nPMEnodes, int nr, gmx_bool bKeepStderr)
1090 char numstring[STRLEN];
1091 char newfilename[STRLEN];
1092 const char *fn = NULL;
1093 int i;
1094 const char *opt;
1097 fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
1099 for (i = 0; i < nfile; i++)
1101 opt = (char *)fnm[i].opt;
1102 if (strcmp(opt, "-p") == 0)
1104 /* do nothing; keep this file */
1107 else if (strcmp(opt, "-bg") == 0)
1109 /* Give the log file a nice name so one can later see which parameters were used */
1110 numstring[0] = '\0';
1111 if (nr > 0)
1113 sprintf(numstring, "_%d", nr);
1115 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg", nfile, fnm), k, nnodes, nPMEnodes, numstring);
1116 if (gmx_fexist(opt2fn("-bg", nfile, fnm)))
1118 fprintf(stdout, "renaming log file to %s\n", newfilename);
1119 make_backup(newfilename);
1120 rename(opt2fn("-bg", nfile, fnm), newfilename);
1123 else if (strcmp(opt, "-err") == 0)
1125 /* This file contains the output of stderr. We want to keep it in
1126 * cases where there have been problems. */
1127 fn = opt2fn(opt, nfile, fnm);
1128 numstring[0] = '\0';
1129 if (nr > 0)
1131 sprintf(numstring, "_%d", nr);
1133 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
1134 if (gmx_fexist(fn))
1136 if (bKeepStderr)
1138 fprintf(stdout, "Saving stderr output in %s\n", newfilename);
1139 make_backup(newfilename);
1140 rename(fn, newfilename);
1142 else
1144 fprintf(stdout, "Deleting %s\n", fn);
1145 remove(fn);
1149 /* Delete the files which are created for each benchmark run: (options -b*) */
1150 else if ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt, nfile, fnm) || !is_optional(&fnm[i])) )
1152 fn = opt2fn(opt, nfile, fnm);
1153 if (gmx_fexist(fn))
1155 fprintf(stdout, "Deleting %s\n", fn);
1156 remove(fn);
1163 /* Returns the largest common factor of n1 and n2 */
1164 static int largest_common_factor(int n1, int n2)
1166 int factor, nmax;
1168 nmax = min(n1, n2);
1169 for (factor = nmax; factor > 0; factor--)
1171 if (0 == (n1 % factor) && 0 == (n2 % factor) )
1173 return(factor);
1176 return 0; /* one for the compiler */
1179 enum {
1180 eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr
1183 /* Create a list of numbers of PME nodes to test */
1184 static void make_npme_list(
1185 const char *npmevalues_opt, /* Make a complete list with all
1186 * possibilities or a short list that keeps only
1187 * reasonable numbers of PME nodes */
1188 int *nentries, /* Number of entries we put in the nPMEnodes list */
1189 int *nPMEnodes[], /* Each entry contains the value for -npme */
1190 int nnodes, /* Total number of nodes to do the tests on */
1191 int minPMEnodes, /* Minimum number of PME nodes */
1192 int maxPMEnodes) /* Maximum number of PME nodes */
1194 int i, npme, npp;
1195 int min_factor = 1; /* We request that npp and npme have this minimal
1196 * largest common factor (depends on npp) */
1197 int nlistmax; /* Max. list size */
1198 int nlist; /* Actual number of entries in list */
1199 int eNPME = 0;
1202 /* Do we need to check all possible values for -npme or is a reduced list enough? */
1203 if (0 == strcmp(npmevalues_opt, "all") )
1205 eNPME = eNpmeAll;
1207 else if (0 == strcmp(npmevalues_opt, "subset") )
1209 eNPME = eNpmeSubset;
1211 else /* "auto" or "range" */
1213 if (nnodes <= 64)
1215 eNPME = eNpmeAll;
1217 else if (nnodes < 128)
1219 eNPME = eNpmeReduced;
1221 else
1223 eNPME = eNpmeSubset;
1227 /* Calculate how many entries we could possibly have (in case of -npme all) */
1228 if (nnodes > 2)
1230 nlistmax = maxPMEnodes - minPMEnodes + 3;
1231 if (0 == minPMEnodes)
1233 nlistmax--;
1236 else
1238 nlistmax = 1;
1241 /* Now make the actual list which is at most of size nlist */
1242 snew(*nPMEnodes, nlistmax);
1243 nlist = 0; /* start counting again, now the real entries in the list */
1244 for (i = 0; i < nlistmax - 2; i++)
1246 npme = maxPMEnodes - i;
1247 npp = nnodes-npme;
1248 switch (eNPME)
1250 case eNpmeAll:
1251 min_factor = 1;
1252 break;
1253 case eNpmeReduced:
1254 min_factor = 2;
1255 break;
1256 case eNpmeSubset:
1257 /* For 2d PME we want a common largest factor of at least the cube
1258 * root of the number of PP nodes */
1259 min_factor = (int) pow(npp, 1.0/3.0);
1260 break;
1261 default:
1262 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1263 break;
1265 if (largest_common_factor(npp, npme) >= min_factor)
1267 (*nPMEnodes)[nlist] = npme;
1268 nlist++;
1271 /* We always test 0 PME nodes and the automatic number */
1272 *nentries = nlist + 2;
1273 (*nPMEnodes)[nlist ] = 0;
1274 (*nPMEnodes)[nlist+1] = -1;
1276 fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1277 for (i = 0; i < *nentries-1; i++)
1279 fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1281 fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1285 /* Allocate memory to store the performance data */
1286 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1288 int i, j, k;
1291 for (k = 0; k < ntprs; k++)
1293 snew(perfdata[k], datasets);
1294 for (i = 0; i < datasets; i++)
1296 for (j = 0; j < repeats; j++)
1298 snew(perfdata[k][i].Gcycles, repeats);
1299 snew(perfdata[k][i].ns_per_day, repeats);
1300 snew(perfdata[k][i].PME_f_load, repeats);
1307 /* Check for errors on mdrun -h */
1308 static void make_sure_it_runs(char *mdrun_cmd_line, int length, FILE *fp)
1310 char *command, *msg;
1311 int ret;
1314 snew(command, length + 15);
1315 snew(msg, length + 500);
1317 fprintf(stdout, "Making sure the benchmarks can be executed ...\n");
1318 /* FIXME: mdrun -h no longer actually does anything useful.
1319 * It unconditionally prints the help, ignoring all other options. */
1320 sprintf(command, "%s-h -quiet", mdrun_cmd_line);
1321 ret = gmx_system_call(command);
1323 if (0 != ret)
1325 /* To prevent confusion, do not again issue a gmx_fatal here since we already
1326 * get the error message from mdrun itself */
1327 sprintf(msg, "Cannot run the benchmark simulations! Please check the error message of\n"
1328 "mdrun for the source of the problem. Did you provide a command line\n"
1329 "argument that neither g_tune_pme nor mdrun understands? Offending command:\n"
1330 "\n%s\n\n", command);
1332 fprintf(stderr, "%s", msg);
1333 sep_line(fp);
1334 fprintf(fp, "%s", msg);
1336 exit(ret);
1339 sfree(command);
1340 sfree(msg );
1344 static void do_the_tests(
1345 FILE *fp, /* General g_tune_pme output file */
1346 char **tpr_names, /* Filenames of the input files to test */
1347 int maxPMEnodes, /* Max fraction of nodes to use for PME */
1348 int minPMEnodes, /* Min fraction of nodes to use for PME */
1349 int npme_fixed, /* If >= -1, test fixed number of PME
1350 * nodes only */
1351 const char *npmevalues_opt, /* Which -npme values should be tested */
1352 t_perf **perfdata, /* Here the performace data is stored */
1353 int *pmeentries, /* Entries in the nPMEnodes list */
1354 int repeats, /* Repeat each test this often */
1355 int nnodes, /* Total number of nodes = nPP + nPME */
1356 int nr_tprs, /* Total number of tpr files to test */
1357 gmx_bool bThreads, /* Threads or MPI? */
1358 char *cmd_mpirun, /* mpirun command string */
1359 char *cmd_np, /* "-np", "-n", whatever mpirun needs */
1360 char *cmd_mdrun, /* mdrun command string */
1361 char *cmd_args_bench, /* arguments for mdrun in a string */
1362 const t_filenm *fnm, /* List of filenames from command line */
1363 int nfile, /* Number of files specified on the cmdl. */
1364 int presteps, /* DLB equilibration steps, is checked */
1365 gmx_int64_t cpt_steps) /* Time step counter in the checkpoint */
1367 int i, nr, k, ret, count = 0, totaltests;
1368 int *nPMEnodes = NULL;
1369 t_perf *pd = NULL;
1370 int cmdline_length;
1371 char *command, *cmd_stub;
1372 char buf[STRLEN];
1373 gmx_bool bResetProblem = FALSE;
1374 gmx_bool bFirst = TRUE;
1377 /* This string array corresponds to the eParselog enum type at the start
1378 * of this file */
1379 const char* ParseLog[] = {
1380 "OK.",
1381 "Logfile not found!",
1382 "No timings, logfile truncated?",
1383 "Run was terminated.",
1384 "Counters were not reset properly.",
1385 "No DD grid found for these settings.",
1386 "TPX version conflict!",
1387 "mdrun was not started in parallel!",
1388 "Number of PP nodes has a prime factor that is too large.",
1389 "An error occured."
1391 char str_PME_f_load[13];
1394 /* Allocate space for the mdrun command line. 100 extra characters should
1395 be more than enough for the -npme etcetera arguments */
1396 cmdline_length = strlen(cmd_mpirun)
1397 + strlen(cmd_np)
1398 + strlen(cmd_mdrun)
1399 + strlen(cmd_args_bench)
1400 + strlen(tpr_names[0]) + 100;
1401 snew(command, cmdline_length);
1402 snew(cmd_stub, cmdline_length);
1404 /* Construct the part of the command line that stays the same for all tests: */
1405 if (bThreads)
1407 sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1409 else
1411 sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1414 /* Create a list of numbers of PME nodes to test */
1415 if (npme_fixed < -1)
1417 make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1418 nnodes, minPMEnodes, maxPMEnodes);
1420 else
1422 *pmeentries = 1;
1423 snew(nPMEnodes, 1);
1424 nPMEnodes[0] = npme_fixed;
1425 fprintf(stderr, "Will use a fixed number of %d PME-only nodes.\n", nPMEnodes[0]);
1428 if (0 == repeats)
1430 fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1431 ffclose(fp);
1432 finalize(opt2fn("-p", nfile, fnm));
1433 exit(0);
1436 /* Allocate one dataset for each tpr input file: */
1437 init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1439 /*****************************************/
1440 /* Main loop over all tpr files to test: */
1441 /*****************************************/
1442 totaltests = nr_tprs*(*pmeentries)*repeats;
1443 for (k = 0; k < nr_tprs; k++)
1445 fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1446 fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n");
1447 /* Loop over various numbers of PME nodes: */
1448 for (i = 0; i < *pmeentries; i++)
1450 pd = &perfdata[k][i];
1452 /* Loop over the repeats for each scenario: */
1453 for (nr = 0; nr < repeats; nr++)
1455 pd->nPMEnodes = nPMEnodes[i];
1457 /* Add -npme and -s to the command line and save it. Note that
1458 * the -passall (if set) options requires cmd_args_bench to be
1459 * at the end of the command line string */
1460 snew(pd->mdrun_cmd_line, cmdline_length);
1461 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1462 cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1464 /* To prevent that all benchmarks fail due to a show-stopper argument
1465 * on the mdrun command line, we make a quick check with mdrun -h first */
1466 if (bFirst)
1468 make_sure_it_runs(pd->mdrun_cmd_line, cmdline_length, fp);
1470 bFirst = FALSE;
1472 /* Do a benchmark simulation: */
1473 if (repeats > 1)
1475 sprintf(buf, ", pass %d/%d", nr+1, repeats);
1477 else
1479 buf[0] = '\0';
1481 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1482 (100.0*count)/totaltests,
1483 k+1, nr_tprs, i+1, *pmeentries, buf);
1484 make_backup(opt2fn("-err", nfile, fnm));
1485 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err", nfile, fnm));
1486 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1487 gmx_system_call(command);
1489 /* Collect the performance data from the log file; also check stderr
1490 * for fatal errors */
1491 ret = parse_logfile(opt2fn("-bg", nfile, fnm), opt2fn("-err", nfile, fnm),
1492 pd, nr, presteps, cpt_steps, nnodes);
1493 if ((presteps > 0) && (ret == eParselogResetProblem))
1495 bResetProblem = TRUE;
1498 if (-1 == pd->nPMEnodes)
1500 sprintf(buf, "(%3d)", pd->guessPME);
1502 else
1504 sprintf(buf, " ");
1507 /* Nicer output */
1508 if (pd->PME_f_load[nr] > 0.0)
1510 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1512 else
1514 sprintf(str_PME_f_load, "%s", " - ");
1517 /* Write the data we got to disk */
1518 fprintf(fp, "%4d%s %12.3f %12.3f %s %s", pd->nPMEnodes,
1519 buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1520 if (!(ret == eParselogOK || ret == eParselogNoDDGrid || ret == eParselogNotFound) )
1522 fprintf(fp, " Check %s file for problems.", ret == eParselogFatal ? "err" : "log");
1524 fprintf(fp, "\n");
1525 fflush(fp);
1526 count++;
1528 /* Do some cleaning up and delete the files we do not need any more */
1529 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret == eParselogFatal);
1531 /* If the first run with this number of processors already failed, do not try again: */
1532 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1534 fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1535 count += repeats-(nr+1);
1536 break;
1538 } /* end of repeats loop */
1539 } /* end of -npme loop */
1540 } /* end of tpr file loop */
1542 if (bResetProblem)
1544 sep_line(fp);
1545 fprintf(fp, "WARNING: The cycle and time step counters could not be reset properly. ");
1546 sep_line(fp);
1548 sfree(command);
1549 sfree(cmd_stub);
1553 static void check_input(
1554 int nnodes,
1555 int repeats,
1556 int *ntprs,
1557 real *rmin,
1558 real rcoulomb,
1559 real *rmax,
1560 real maxPMEfraction,
1561 real minPMEfraction,
1562 int npme_fixed,
1563 gmx_int64_t bench_nsteps,
1564 const t_filenm *fnm,
1565 int nfile,
1566 int sim_part,
1567 int presteps,
1568 int npargs,
1569 t_pargs *pa)
1571 int old;
1574 /* Make sure the input file exists */
1575 if (!gmx_fexist(opt2fn("-s", nfile, fnm)))
1577 gmx_fatal(FARGS, "File %s not found.", opt2fn("-s", nfile, fnm));
1580 /* Make sure that the checkpoint file is not overwritten during benchmarking */
1581 if ( (0 == strcmp(opt2fn("-cpi", nfile, fnm), opt2fn("-bcpo", nfile, fnm)) ) && (sim_part > 1) )
1583 gmx_fatal(FARGS, "Checkpoint input (-cpi) and benchmark checkpoint output (-bcpo) files must not be identical.\n"
1584 "The checkpoint input file must not be overwritten during the benchmarks.\n");
1587 /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1588 if (repeats < 0)
1590 gmx_fatal(FARGS, "Number of repeats < 0!");
1593 /* Check number of nodes */
1594 if (nnodes < 1)
1596 gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
1599 /* Automatically choose -ntpr if not set */
1600 if (*ntprs < 1)
1602 if (nnodes < 16)
1604 *ntprs = 1;
1606 else
1608 *ntprs = 3;
1609 /* Set a reasonable scaling factor for rcoulomb */
1610 if (*rmax <= 0)
1612 *rmax = rcoulomb * 1.2;
1615 fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs == 1 ? "" : "s");
1617 else
1619 if (1 == *ntprs)
1621 fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1625 /* Make shure that rmin <= rcoulomb <= rmax */
1626 if (*rmin <= 0)
1628 *rmin = rcoulomb;
1630 if (*rmax <= 0)
1632 *rmax = rcoulomb;
1634 if (!(*rmin <= *rmax) )
1636 gmx_fatal(FARGS, "Please choose the Coulomb radii such that rmin <= rmax.\n"
1637 "rmin = %g, rmax = %g, actual rcoul from .tpr file = %g\n", *rmin, *rmax, rcoulomb);
1639 /* Add test scenarios if rmin or rmax were set */
1640 if (*ntprs <= 2)
1642 if (!is_equal(*rmin, rcoulomb) && (*ntprs == 1) )
1644 (*ntprs)++;
1645 fprintf(stderr, "NOTE: Setting -rmin to %g changed -ntpr to %d\n",
1646 *rmin, *ntprs);
1648 if (!is_equal(*rmax, rcoulomb) && (*ntprs == 1) )
1650 (*ntprs)++;
1651 fprintf(stderr, "NOTE: Setting -rmax to %g changed -ntpr to %d\n",
1652 *rmax, *ntprs);
1655 old = *ntprs;
1656 /* If one of rmin, rmax is set, we need 2 tpr files at minimum */
1657 if (!is_equal(*rmax, rcoulomb) || !is_equal(*rmin, rcoulomb) )
1659 *ntprs = max(*ntprs, 2);
1662 /* If both rmin, rmax are set, we need 3 tpr files at minimum */
1663 if (!is_equal(*rmax, rcoulomb) && !is_equal(*rmin, rcoulomb) )
1665 *ntprs = max(*ntprs, 3);
1668 if (old != *ntprs)
1670 fprintf(stderr, "NOTE: Your rmin, rmax setting changed -ntpr to %d\n", *ntprs);
1673 if (*ntprs > 1)
1675 if (is_equal(*rmin, rcoulomb) && is_equal(rcoulomb, *rmax)) /* We have just a single rc */
1677 fprintf(stderr, "WARNING: Resetting -ntpr to 1 since no Coulomb radius scaling is requested.\n"
1678 "Please set rmin < rmax to test Coulomb radii in the [rmin, rmax] interval\n"
1679 "with correspondingly adjusted PME grid settings\n");
1680 *ntprs = 1;
1684 /* Check whether max and min fraction are within required values */
1685 if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1687 gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1689 if (minPMEfraction > 0.5 || minPMEfraction < 0)
1691 gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1693 if (maxPMEfraction < minPMEfraction)
1695 gmx_fatal(FARGS, "-max must be larger or equal to -min");
1698 /* Check whether the number of steps - if it was set - has a reasonable value */
1699 if (bench_nsteps < 0)
1701 gmx_fatal(FARGS, "Number of steps must be positive.");
1704 if (bench_nsteps > 10000 || bench_nsteps < 100)
1706 fprintf(stderr, "WARNING: steps=");
1707 fprintf(stderr, "%"GMX_PRId64, bench_nsteps);
1708 fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100) ? "few" : "many");
1711 if (presteps < 0)
1713 gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1716 /* Check for rcoulomb scaling if more than one .tpr file is tested */
1717 if (*ntprs > 1)
1719 if (*rmin/rcoulomb < 0.75 || *rmax/rcoulomb > 1.25)
1721 fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1725 /* If a fixed number of PME nodes is set we do rcoulomb and PME gird tuning
1726 * only. We need to check whether the requested number of PME-only nodes
1727 * makes sense. */
1728 if (npme_fixed > -1)
1730 /* No more than 50% of all nodes can be assigned as PME-only nodes. */
1731 if (2*npme_fixed > nnodes)
1733 gmx_fatal(FARGS, "Cannot have more than %d PME-only nodes for a total of %d nodes (you chose %d).\n",
1734 nnodes/2, nnodes, npme_fixed);
1736 if ((npme_fixed > 0) && (5*npme_fixed < nnodes))
1738 fprintf(stderr, "WARNING: Only %g percent of the nodes are assigned as PME-only nodes.\n",
1739 100.0*((real)npme_fixed / (real)nnodes));
1741 if (opt2parg_bSet("-min", npargs, pa) || opt2parg_bSet("-max", npargs, pa))
1743 fprintf(stderr, "NOTE: The -min, -max, and -npme options have no effect when a\n"
1744 " fixed number of PME-only nodes is requested with -fix.\n");
1750 /* Returns TRUE when "opt" is needed at launch time */
1751 static gmx_bool is_launch_file(char *opt, gmx_bool bSet)
1753 /* Apart from the input .tpr and the output log files we need all options that
1754 * were set on the command line and that do not start with -b */
1755 if (0 == strncmp(opt, "-b", 2) || 0 == strncmp(opt, "-s", 2)
1756 || 0 == strncmp(opt, "-err", 4) || 0 == strncmp(opt, "-p", 2) )
1758 return FALSE;
1761 return bSet;
1765 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1766 static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput)
1768 /* Apart from the input .tpr, all files starting with "-b" are for
1769 * _b_enchmark files exclusively */
1770 if (0 == strncmp(opt, "-s", 2))
1772 return FALSE;
1775 if (0 == strncmp(opt, "-b", 2) || 0 == strncmp(opt, "-s", 2))
1777 if (!bOptional || bSet)
1779 return TRUE;
1781 else
1783 return FALSE;
1786 else
1788 if (bIsOutput)
1790 return FALSE;
1792 else
1794 if (bSet) /* These are additional input files like -cpi -ei */
1796 return TRUE;
1798 else
1800 return FALSE;
1807 /* Adds 'buf' to 'str' */
1808 static void add_to_string(char **str, char *buf)
1810 int len;
1813 len = strlen(*str) + strlen(buf) + 1;
1814 srenew(*str, len);
1815 strcat(*str, buf);
1819 /* Create the command line for the benchmark as well as for the real run */
1820 static void create_command_line_snippets(
1821 gmx_bool bAppendFiles,
1822 gmx_bool bKeepAndNumCPT,
1823 gmx_bool bResetHWay,
1824 int presteps,
1825 int nfile,
1826 t_filenm fnm[],
1827 char *cmd_args_bench[], /* command line arguments for benchmark runs */
1828 char *cmd_args_launch[], /* command line arguments for simulation run */
1829 char extra_args[]) /* Add this to the end of the command line */
1831 int i;
1832 char *opt;
1833 const char *name;
1834 char strbuf[STRLEN];
1837 /* strlen needs at least '\0' as a string: */
1838 snew(*cmd_args_bench, 1);
1839 snew(*cmd_args_launch, 1);
1840 *cmd_args_launch[0] = '\0';
1841 *cmd_args_bench[0] = '\0';
1844 /*******************************************/
1845 /* 1. Process other command line arguments */
1846 /*******************************************/
1847 if (presteps > 0)
1849 /* Add equilibration steps to benchmark options */
1850 sprintf(strbuf, "-resetstep %d ", presteps);
1851 add_to_string(cmd_args_bench, strbuf);
1853 /* These switches take effect only at launch time */
1854 if (FALSE == bAppendFiles)
1856 add_to_string(cmd_args_launch, "-noappend ");
1858 if (bKeepAndNumCPT)
1860 add_to_string(cmd_args_launch, "-cpnum ");
1862 if (bResetHWay)
1864 add_to_string(cmd_args_launch, "-resethway ");
1867 /********************/
1868 /* 2. Process files */
1869 /********************/
1870 for (i = 0; i < nfile; i++)
1872 opt = (char *)fnm[i].opt;
1873 name = opt2fn(opt, nfile, fnm);
1875 /* Strbuf contains the options, now let's sort out where we need that */
1876 sprintf(strbuf, "%s %s ", opt, name);
1878 if (is_bench_file(opt, opt2bSet(opt, nfile, fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1880 /* All options starting with -b* need the 'b' removed,
1881 * therefore overwrite strbuf */
1882 if (0 == strncmp(opt, "-b", 2))
1884 sprintf(strbuf, "-%s %s ", &opt[2], name);
1887 add_to_string(cmd_args_bench, strbuf);
1890 if (is_launch_file(opt, opt2bSet(opt, nfile, fnm)) )
1892 add_to_string(cmd_args_launch, strbuf);
1896 add_to_string(cmd_args_bench, extra_args);
1897 add_to_string(cmd_args_launch, extra_args);
1901 /* Set option opt */
1902 static void setopt(const char *opt, int nfile, t_filenm fnm[])
1904 int i;
1906 for (i = 0; (i < nfile); i++)
1908 if (strcmp(opt, fnm[i].opt) == 0)
1910 fnm[i].flag |= ffSET;
1916 /* This routine inspects the tpr file and ...
1917 * 1. checks for output files that get triggered by a tpr option. These output
1918 * files are marked as 'set' to allow for a proper cleanup after each
1919 * tuning run.
1920 * 2. returns the PME:PP load ratio
1921 * 3. returns rcoulomb from the tpr */
1922 static float inspect_tpr(int nfile, t_filenm fnm[], real *rcoulomb)
1924 gmx_bool bPull; /* Is pulling requested in .tpr file? */
1925 gmx_bool bTpi; /* Is test particle insertion requested? */
1926 gmx_bool bFree; /* Is a free energy simulation requested? */
1927 gmx_bool bNM; /* Is a normal mode analysis requested? */
1928 t_inputrec ir;
1929 t_state state;
1930 gmx_mtop_t mtop;
1933 /* Check tpr file for options that trigger extra output files */
1934 read_tpx_state(opt2fn("-s", nfile, fnm), &ir, &state, NULL, &mtop);
1935 bPull = (epullNO != ir.ePull);
1936 bFree = (efepNO != ir.efep );
1937 bNM = (eiNM == ir.eI );
1938 bTpi = EI_TPI(ir.eI);
1940 /* Set these output files on the tuning command-line */
1941 if (bPull)
1943 setopt("-pf", nfile, fnm);
1944 setopt("-px", nfile, fnm);
1946 if (bFree)
1948 setopt("-dhdl", nfile, fnm);
1950 if (bTpi)
1952 setopt("-tpi", nfile, fnm);
1953 setopt("-tpid", nfile, fnm);
1955 if (bNM)
1957 setopt("-mtx", nfile, fnm);
1960 *rcoulomb = ir.rcoulomb;
1962 /* Return the estimate for the number of PME nodes */
1963 return pme_load_estimate(&mtop, &ir, state.box);
1967 static void couple_files_options(int nfile, t_filenm fnm[])
1969 int i;
1970 gmx_bool bSet, bBench;
1971 char *opt;
1972 char buf[20];
1975 for (i = 0; i < nfile; i++)
1977 opt = (char *)fnm[i].opt;
1978 bSet = ((fnm[i].flag & ffSET) != 0);
1979 bBench = (0 == strncmp(opt, "-b", 2));
1981 /* Check optional files */
1982 /* If e.g. -eo is set, then -beo also needs to be set */
1983 if (is_optional(&fnm[i]) && bSet && !bBench)
1985 sprintf(buf, "-b%s", &opt[1]);
1986 setopt(buf, nfile, fnm);
1988 /* If -beo is set, then -eo also needs to be! */
1989 if (is_optional(&fnm[i]) && bSet && bBench)
1991 sprintf(buf, "-%s", &opt[2]);
1992 setopt(buf, nfile, fnm);
1998 #define BENCHSTEPS (1000)
2000 int gmx_tune_pme(int argc, char *argv[])
2002 const char *desc[] = {
2003 "For a given number [TT]-np[tt] or [TT]-ntmpi[tt] of processors/threads, [THISMODULE] systematically",
2004 "times [gmx-mdrun] with various numbers of PME-only nodes and determines",
2005 "which setting is fastest. It will also test whether performance can",
2006 "be enhanced by shifting load from the reciprocal to the real space",
2007 "part of the Ewald sum. ",
2008 "Simply pass your [TT].tpr[tt] file to [THISMODULE] together with other options",
2009 "for [gmx-mdrun] as needed.[PAR]",
2010 "Which executables are used can be set in the environment variables",
2011 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
2012 "will be used as defaults. Note that for certain MPI frameworks you",
2013 "need to provide a machine- or hostfile. This can also be passed",
2014 "via the MPIRUN variable, e.g.[PAR]",
2015 "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
2016 "Please call [THISMODULE] with the normal options you would pass to",
2017 "[gmx-mdrun] and add [TT]-np[tt] for the number of processors to perform the",
2018 "tests on, or [TT]-ntmpi[tt] for the number of threads. You can also add [TT]-r[tt]",
2019 "to repeat each test several times to get better statistics. [PAR]",
2020 "[THISMODULE] can test various real space / reciprocal space workloads",
2021 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
2022 "written with enlarged cutoffs and smaller Fourier grids respectively.",
2023 "Typically, the first test (number 0) will be with the settings from the input",
2024 "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff",
2025 "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ",
2026 "In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ",
2027 "The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier "
2028 "spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
2029 "if you just seek the optimal number of PME-only nodes; in that case",
2030 "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
2031 "For the benchmark runs, the default of 1000 time steps should suffice for most",
2032 "MD systems. The dynamic load balancing needs about 100 time steps",
2033 "to adapt to local load imbalances, therefore the time step counters",
2034 "are by default reset after 100 steps. For large systems (>1M atoms), as well as ",
2035 "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
2036 "From the 'DD' load imbalance entries in the md.log output file you",
2037 "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
2038 "[TT]gmx tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
2039 "After calling [gmx-mdrun] several times, detailed performance information",
2040 "is available in the output file [TT]perf.out[tt].",
2041 "[BB]Note[bb] that during the benchmarks, a couple of temporary files are written",
2042 "(options [TT]-b[tt]*), these will be automatically deleted after each test.[PAR]",
2043 "If you want the simulation to be started automatically with the",
2044 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
2047 int nnodes = 1;
2048 int repeats = 2;
2049 int pmeentries = 0; /* How many values for -npme do we actually test for each tpr file */
2050 real maxPMEfraction = 0.50;
2051 real minPMEfraction = 0.25;
2052 int maxPMEnodes, minPMEnodes;
2053 float guessPMEratio; /* guessed PME:PP ratio based on the tpr file */
2054 float guessPMEnodes;
2055 int npme_fixed = -2; /* If >= -1, use only this number
2056 * of PME-only nodes */
2057 int ntprs = 0;
2058 real rmin = 0.0, rmax = 0.0; /* min and max value for rcoulomb if scaling is requested */
2059 real rcoulomb = -1.0; /* Coulomb radius as set in .tpr file */
2060 gmx_bool bScaleRvdw = TRUE;
2061 gmx_int64_t bench_nsteps = BENCHSTEPS;
2062 gmx_int64_t new_sim_nsteps = -1; /* -1 indicates: not set by the user */
2063 gmx_int64_t cpt_steps = 0; /* Step counter in .cpt input file */
2064 int presteps = 100; /* Do a full cycle reset after presteps steps */
2065 gmx_bool bOverwrite = FALSE, bKeepTPR;
2066 gmx_bool bLaunch = FALSE;
2067 char *ExtraArgs = NULL;
2068 char **tpr_names = NULL;
2069 const char *simulation_tpr = NULL;
2070 int best_npme, best_tpr;
2071 int sim_part = 1; /* For benchmarks with checkpoint files */
2072 char bbuf[STRLEN];
2074 /* Default program names if nothing else is found */
2075 char *cmd_mpirun = NULL, *cmd_mdrun = NULL;
2076 char *cmd_args_bench, *cmd_args_launch;
2077 char *cmd_np = NULL;
2079 t_perf **perfdata = NULL;
2080 t_inputinfo *info;
2081 int i;
2082 FILE *fp;
2083 t_commrec *cr;
2085 /* Print out how long the tuning took */
2086 double seconds;
2088 static t_filenm fnm[] = {
2089 /* g_tune_pme */
2090 { efOUT, "-p", "perf", ffWRITE },
2091 { efLOG, "-err", "bencherr", ffWRITE },
2092 { efTPX, "-so", "tuned", ffWRITE },
2093 /* mdrun: */
2094 { efTPX, NULL, NULL, ffREAD },
2095 { efTRN, "-o", NULL, ffWRITE },
2096 { efCOMPRESSED, "-x", NULL, ffOPTWR },
2097 { efCPT, "-cpi", NULL, ffOPTRD },
2098 { efCPT, "-cpo", NULL, ffOPTWR },
2099 { efSTO, "-c", "confout", ffWRITE },
2100 { efEDR, "-e", "ener", ffWRITE },
2101 { efLOG, "-g", "md", ffWRITE },
2102 { efXVG, "-dhdl", "dhdl", ffOPTWR },
2103 { efXVG, "-field", "field", ffOPTWR },
2104 { efXVG, "-table", "table", ffOPTRD },
2105 { efXVG, "-tabletf", "tabletf", ffOPTRD },
2106 { efXVG, "-tablep", "tablep", ffOPTRD },
2107 { efXVG, "-tableb", "table", ffOPTRD },
2108 { efTRX, "-rerun", "rerun", ffOPTRD },
2109 { efXVG, "-tpi", "tpi", ffOPTWR },
2110 { efXVG, "-tpid", "tpidist", ffOPTWR },
2111 { efEDI, "-ei", "sam", ffOPTRD },
2112 { efXVG, "-eo", "edsam", ffOPTWR },
2113 { efXVG, "-devout", "deviatie", ffOPTWR },
2114 { efXVG, "-runav", "runaver", ffOPTWR },
2115 { efXVG, "-px", "pullx", ffOPTWR },
2116 { efXVG, "-pf", "pullf", ffOPTWR },
2117 { efXVG, "-ro", "rotation", ffOPTWR },
2118 { efLOG, "-ra", "rotangles", ffOPTWR },
2119 { efLOG, "-rs", "rotslabs", ffOPTWR },
2120 { efLOG, "-rt", "rottorque", ffOPTWR },
2121 { efMTX, "-mtx", "nm", ffOPTWR },
2122 { efNDX, "-dn", "dipole", ffOPTWR },
2123 /* Output files that are deleted after each benchmark run */
2124 { efTRN, "-bo", "bench", ffWRITE },
2125 { efXTC, "-bx", "bench", ffWRITE },
2126 { efCPT, "-bcpo", "bench", ffWRITE },
2127 { efSTO, "-bc", "bench", ffWRITE },
2128 { efEDR, "-be", "bench", ffWRITE },
2129 { efLOG, "-bg", "bench", ffWRITE },
2130 { efXVG, "-beo", "benchedo", ffOPTWR },
2131 { efXVG, "-bdhdl", "benchdhdl", ffOPTWR },
2132 { efXVG, "-bfield", "benchfld", ffOPTWR },
2133 { efXVG, "-btpi", "benchtpi", ffOPTWR },
2134 { efXVG, "-btpid", "benchtpid", ffOPTWR },
2135 { efXVG, "-bdevout", "benchdev", ffOPTWR },
2136 { efXVG, "-brunav", "benchrnav", ffOPTWR },
2137 { efXVG, "-bpx", "benchpx", ffOPTWR },
2138 { efXVG, "-bpf", "benchpf", ffOPTWR },
2139 { efXVG, "-bro", "benchrot", ffOPTWR },
2140 { efLOG, "-bra", "benchrota", ffOPTWR },
2141 { efLOG, "-brs", "benchrots", ffOPTWR },
2142 { efLOG, "-brt", "benchrott", ffOPTWR },
2143 { efMTX, "-bmtx", "benchn", ffOPTWR },
2144 { efNDX, "-bdn", "bench", ffOPTWR }
2147 gmx_bool bThreads = FALSE;
2149 int nthreads = 1;
2151 const char *procstring[] =
2152 { NULL, "-np", "-n", "none", NULL };
2153 const char *npmevalues_opt[] =
2154 { NULL, "auto", "all", "subset", NULL };
2156 gmx_bool bAppendFiles = TRUE;
2157 gmx_bool bKeepAndNumCPT = FALSE;
2158 gmx_bool bResetCountersHalfWay = FALSE;
2159 gmx_bool bBenchmark = TRUE;
2161 output_env_t oenv = NULL;
2163 t_pargs pa[] = {
2164 /***********************/
2165 /* g_tune_pme options: */
2166 /***********************/
2167 { "-np", FALSE, etINT, {&nnodes},
2168 "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
2169 { "-npstring", FALSE, etENUM, {procstring},
2170 "Specify the number of processors to [TT]$MPIRUN[tt] using this string"},
2171 { "-ntmpi", FALSE, etINT, {&nthreads},
2172 "Number of MPI-threads to run the tests on (turns MPI & mpirun off)"},
2173 { "-r", FALSE, etINT, {&repeats},
2174 "Repeat each test this often" },
2175 { "-max", FALSE, etREAL, {&maxPMEfraction},
2176 "Max fraction of PME nodes to test with" },
2177 { "-min", FALSE, etREAL, {&minPMEfraction},
2178 "Min fraction of PME nodes to test with" },
2179 { "-npme", FALSE, etENUM, {npmevalues_opt},
2180 "Within -min and -max, benchmark all possible values for [TT]-npme[tt], or just a reasonable subset. "
2181 "Auto neglects -min and -max and chooses reasonable values around a guess for npme derived from the .tpr"},
2182 { "-fix", FALSE, etINT, {&npme_fixed},
2183 "If >= -1, do not vary the number of PME-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
2184 { "-rmax", FALSE, etREAL, {&rmax},
2185 "If >0, maximal rcoulomb for -ntpr>1 (rcoulomb upscaling results in fourier grid downscaling)" },
2186 { "-rmin", FALSE, etREAL, {&rmin},
2187 "If >0, minimal rcoulomb for -ntpr>1" },
2188 { "-scalevdw", FALSE, etBOOL, {&bScaleRvdw},
2189 "Scale rvdw along with rcoulomb"},
2190 { "-ntpr", FALSE, etINT, {&ntprs},
2191 "Number of [TT].tpr[tt] files to benchmark. Create this many files with different rcoulomb scaling factors depending on -rmin and -rmax. "
2192 "If < 1, automatically choose the number of [TT].tpr[tt] files to test" },
2193 { "-steps", FALSE, etINT64, {&bench_nsteps},
2194 "Take timings for this many steps in the benchmark runs" },
2195 { "-resetstep", FALSE, etINT, {&presteps},
2196 "Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)" },
2197 { "-simsteps", FALSE, etINT64, {&new_sim_nsteps},
2198 "If non-negative, perform this many steps in the real run (overwrites nsteps from [TT].tpr[tt], add [TT].cpt[tt] steps)" },
2199 { "-launch", FALSE, etBOOL, {&bLaunch},
2200 "Launch the real simulation after optimization" },
2201 { "-bench", FALSE, etBOOL, {&bBenchmark},
2202 "Run the benchmarks or just create the input [TT].tpr[tt] files?" },
2203 /******************/
2204 /* mdrun options: */
2205 /******************/
2206 /* We let g_tune_pme parse and understand these options, because we need to
2207 * prevent that they appear on the mdrun command line for the benchmarks */
2208 { "-append", FALSE, etBOOL, {&bAppendFiles},
2209 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
2210 { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
2211 "Keep and number checkpoint files (launch only)" },
2212 { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
2213 "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
2216 #define NFILE asize(fnm)
2218 seconds = gmx_gettime();
2220 if (!parse_common_args(&argc, argv, PCA_NOEXIT_ON_ARGS,
2221 NFILE, fnm, asize(pa), pa, asize(desc), desc,
2222 0, NULL, &oenv))
2224 return 0;
2227 /* Store the remaining unparsed command line entries in a string which
2228 * is then attached to the mdrun command line */
2229 snew(ExtraArgs, 1);
2230 ExtraArgs[0] = '\0';
2231 for (i = 1; i < argc; i++) /* argc will now be 1 if everything was understood */
2233 add_to_string(&ExtraArgs, argv[i]);
2234 add_to_string(&ExtraArgs, " ");
2237 if (opt2parg_bSet("-ntmpi", asize(pa), pa))
2239 bThreads = TRUE;
2240 if (opt2parg_bSet("-npstring", asize(pa), pa))
2242 fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2245 if (nnodes > 1)
2247 gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2249 /* and now we just set this; a bit of an ugly hack*/
2250 nnodes = nthreads;
2252 /* Check for PME:PP ratio and whether tpr triggers additional output files */
2253 guessPMEratio = inspect_tpr(NFILE, fnm, &rcoulomb);
2255 /* Automatically set -beo options if -eo is set etc. */
2256 couple_files_options(NFILE, fnm);
2258 /* Construct the command line arguments for benchmark runs
2259 * as well as for the simulation run */
2260 if (bThreads)
2262 sprintf(bbuf, " -ntmpi %d ", nthreads);
2264 else
2266 /* This string will be used for MPI runs and will appear after the
2267 * mpirun command. */
2268 if (strcmp(procstring[0], "none") != 0)
2270 sprintf(bbuf, " %s %d ", procstring[0], nnodes);
2272 else
2274 sprintf(bbuf, " ");
2278 cmd_np = bbuf;
2280 create_command_line_snippets(bAppendFiles, bKeepAndNumCPT, bResetCountersHalfWay, presteps,
2281 NFILE, fnm, &cmd_args_bench, &cmd_args_launch, ExtraArgs);
2283 /* Read in checkpoint file if requested */
2284 sim_part = 1;
2285 if (opt2bSet("-cpi", NFILE, fnm))
2287 snew(cr, 1);
2288 cr->duty = DUTY_PP; /* makes the following routine happy */
2289 read_checkpoint_simulation_part(opt2fn("-cpi", NFILE, fnm),
2290 &sim_part, &cpt_steps, cr,
2291 FALSE, NFILE, fnm, NULL, NULL);
2292 sfree(cr);
2293 sim_part++;
2294 /* sim_part will now be 1 if no checkpoint file was found */
2295 if (sim_part <= 1)
2297 gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi", NFILE, fnm));
2301 /* Open performance output file and write header info */
2302 fp = ffopen(opt2fn("-p", NFILE, fnm), "w");
2304 /* Make a quick consistency check of command line parameters */
2305 check_input(nnodes, repeats, &ntprs, &rmin, rcoulomb, &rmax,
2306 maxPMEfraction, minPMEfraction, npme_fixed,
2307 bench_nsteps, fnm, NFILE, sim_part, presteps,
2308 asize(pa), pa);
2310 /* Determine the maximum and minimum number of PME nodes to test,
2311 * the actual list of settings is build in do_the_tests(). */
2312 if ((nnodes > 2) && (npme_fixed < -1))
2314 if (0 == strcmp(npmevalues_opt[0], "auto"))
2316 /* Determine the npme range automatically based on the PME:PP load guess */
2317 if (guessPMEratio > 1.0)
2319 /* More PME than PP work, probably we do not need separate PME nodes at all! */
2320 maxPMEnodes = nnodes/2;
2321 minPMEnodes = nnodes/2;
2323 else
2325 /* PME : PP load is in the range 0..1, let's test around the guess */
2326 guessPMEnodes = nnodes/(1.0 + 1.0/guessPMEratio);
2327 minPMEnodes = floor(0.7*guessPMEnodes);
2328 maxPMEnodes = ceil(1.6*guessPMEnodes);
2329 maxPMEnodes = min(maxPMEnodes, nnodes/2);
2332 else
2334 /* Determine the npme range based on user input */
2335 maxPMEnodes = floor(maxPMEfraction*nnodes);
2336 minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2337 fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2338 if (maxPMEnodes != minPMEnodes)
2340 fprintf(stdout, "- %d ", maxPMEnodes);
2342 fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2345 else
2347 maxPMEnodes = 0;
2348 minPMEnodes = 0;
2351 /* Get the commands we need to set up the runs from environment variables */
2352 get_program_paths(bThreads, &cmd_mpirun, &cmd_mdrun);
2353 if (bBenchmark && repeats > 0)
2355 check_mdrun_works(bThreads, cmd_mpirun, cmd_np, cmd_mdrun);
2358 /* Print some header info to file */
2359 sep_line(fp);
2360 fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n");
2361 sep_line(fp);
2362 fprintf(fp, "%s for Gromacs %s\n", ShortProgram(), GromacsVersion());
2363 if (!bThreads)
2365 fprintf(fp, "Number of nodes : %d\n", nnodes);
2366 fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun);
2367 if (strcmp(procstring[0], "none") != 0)
2369 fprintf(fp, "Passing # of nodes via : %s\n", procstring[0]);
2371 else
2373 fprintf(fp, "Not setting number of nodes in system call\n");
2376 else
2378 fprintf(fp, "Number of threads : %d\n", nnodes);
2381 fprintf(fp, "The mdrun command is : %s\n", cmd_mdrun);
2382 fprintf(fp, "mdrun args benchmarks : %s\n", cmd_args_bench);
2383 fprintf(fp, "Benchmark steps : ");
2384 fprintf(fp, "%"GMX_PRId64, bench_nsteps);
2385 fprintf(fp, "\n");
2386 fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2387 if (sim_part > 1)
2389 fprintf(fp, "Checkpoint time step : ");
2390 fprintf(fp, "%"GMX_PRId64, cpt_steps);
2391 fprintf(fp, "\n");
2393 fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2395 if (new_sim_nsteps >= 0)
2397 bOverwrite = TRUE;
2398 fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so", NFILE, fnm));
2399 fprintf(stderr, "%"GMX_PRId64, new_sim_nsteps+cpt_steps);
2400 fprintf(stderr, " steps.\n");
2401 fprintf(fp, "Simulation steps : ");
2402 fprintf(fp, "%"GMX_PRId64, new_sim_nsteps);
2403 fprintf(fp, "\n");
2405 if (repeats > 1)
2407 fprintf(fp, "Repeats for each test : %d\n", repeats);
2410 if (npme_fixed >= -1)
2412 fprintf(fp, "Fixing -npme at : %d\n", npme_fixed);
2415 fprintf(fp, "Input file : %s\n", opt2fn("-s", NFILE, fnm));
2416 fprintf(fp, " PME/PP load estimate : %g\n", guessPMEratio);
2418 /* Allocate memory for the inputinfo struct: */
2419 snew(info, 1);
2420 info->nr_inputfiles = ntprs;
2421 for (i = 0; i < ntprs; i++)
2423 snew(info->rcoulomb, ntprs);
2424 snew(info->rvdw, ntprs);
2425 snew(info->rlist, ntprs);
2426 snew(info->rlistlong, ntprs);
2427 snew(info->nkx, ntprs);
2428 snew(info->nky, ntprs);
2429 snew(info->nkz, ntprs);
2430 snew(info->fsx, ntprs);
2431 snew(info->fsy, ntprs);
2432 snew(info->fsz, ntprs);
2434 /* Make alternative tpr files to test: */
2435 snew(tpr_names, ntprs);
2436 for (i = 0; i < ntprs; i++)
2438 snew(tpr_names[i], STRLEN);
2441 /* It can be that ntprs is reduced by make_benchmark_tprs if not enough
2442 * different grids could be found. */
2443 make_benchmark_tprs(opt2fn("-s", NFILE, fnm), tpr_names, bench_nsteps+presteps,
2444 cpt_steps, rmin, rmax, bScaleRvdw, &ntprs, info, fp);
2446 /********************************************************************************/
2447 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
2448 /********************************************************************************/
2449 snew(perfdata, ntprs);
2450 if (bBenchmark)
2452 do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries,
2453 repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2454 cmd_args_bench, fnm, NFILE, presteps, cpt_steps);
2456 fprintf(fp, "\nTuning took%8.1f minutes.\n", (gmx_gettime()-seconds)/60.0);
2458 /* Analyse the results and give a suggestion for optimal settings: */
2459 bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2460 repeats, info, &best_tpr, &best_npme);
2462 /* Take the best-performing tpr file and enlarge nsteps to original value */
2463 if (bKeepTPR && !bOverwrite)
2465 simulation_tpr = opt2fn("-s", NFILE, fnm);
2467 else
2469 simulation_tpr = opt2fn("-so", NFILE, fnm);
2470 modify_PMEsettings(bOverwrite ? (new_sim_nsteps+cpt_steps) : info->orig_sim_steps,
2471 info->orig_init_step, tpr_names[best_tpr], simulation_tpr);
2474 /* Let's get rid of the temporary benchmark input files */
2475 for (i = 0; i < ntprs; i++)
2477 fprintf(stdout, "Deleting temporary benchmark input file %s\n", tpr_names[i]);
2478 remove(tpr_names[i]);
2481 /* Now start the real simulation if the user requested it ... */
2482 launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2483 cmd_args_launch, simulation_tpr, best_npme);
2485 ffclose(fp);
2487 /* ... or simply print the performance results to screen: */
2488 if (!bLaunch)
2490 finalize(opt2fn("-p", NFILE, fnm));
2493 return 0;