2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
41 #ifdef HAVE_SYS_TIME_H
47 #include "gromacs/commandline/pargs.h"
52 #include "gromacs/fileio/tpxio.h"
56 #include "checkpoint.h"
61 #include "gromacs/timing/walltime_accounting.h"
64 /* Enum for situations that can occur during log file parsing, the
65 * corresponding string entries can be found in do_the_tests() in
66 * const char* ParseLog[] */
72 eParselogResetProblem
,
76 eParselogLargePrimeFactor
,
84 int nPMEnodes
; /* number of PME-only nodes used in this test */
85 int nx
, ny
, nz
; /* DD grid */
86 int guessPME
; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
87 double *Gcycles
; /* This can contain more than one value if doing multiple tests */
91 float *PME_f_load
; /* PME mesh/force load average*/
92 float PME_f_load_Av
; /* Average average ;) ... */
93 char *mdrun_cmd_line
; /* Mdrun command line used for this test */
99 int nr_inputfiles
; /* The number of tpr and mdp input files */
100 gmx_int64_t orig_sim_steps
; /* Number of steps to be done in the real simulation */
101 gmx_int64_t orig_init_step
; /* Init step for the real simulation */
102 real
*rcoulomb
; /* The coulomb radii [0...nr_inputfiles] */
103 real
*rvdw
; /* The vdW radii */
104 real
*rlist
; /* Neighbourlist cutoff radius */
106 int *nkx
, *nky
, *nkz
;
107 real
*fsx
, *fsy
, *fsz
; /* Fourierspacing in x,y,z dimension */
111 static void sep_line(FILE *fp
)
113 fprintf(fp
, "\n------------------------------------------------------------\n");
117 /* Wrapper for system calls */
118 static int gmx_system_call(char *command
)
121 gmx_fatal(FARGS
, "No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n", command
);
123 return ( system(command
) );
128 /* Check if string starts with substring */
129 static gmx_bool
str_starts(const char *string
, const char *substring
)
131 return ( strncmp(string
, substring
, strlen(substring
)) == 0);
135 static void cleandata(t_perf
*perfdata
, int test_nr
)
137 perfdata
->Gcycles
[test_nr
] = 0.0;
138 perfdata
->ns_per_day
[test_nr
] = 0.0;
139 perfdata
->PME_f_load
[test_nr
] = 0.0;
145 static gmx_bool
is_equal(real a
, real b
)
147 real diff
, eps
= 1.0e-7;
168 static void finalize(const char *fn_out
)
174 fp
= fopen(fn_out
, "r");
175 fprintf(stdout
, "\n\n");
177 while (fgets(buf
, STRLEN
-1, fp
) != NULL
)
179 fprintf(stdout
, "%s", buf
);
182 fprintf(stdout
, "\n\n");
187 eFoundNothing
, eFoundDDStr
, eFoundAccountingStr
, eFoundCycleStr
190 static int parse_logfile(const char *logfile
, const char *errfile
,
191 t_perf
*perfdata
, int test_nr
, int presteps
, gmx_int64_t cpt_steps
,
195 char line
[STRLEN
], dumstring
[STRLEN
], dumstring2
[STRLEN
];
196 const char matchstrdd
[] = "Domain decomposition grid";
197 const char matchstrcr
[] = "resetting all time and cycle counters";
198 const char matchstrbal
[] = "Average PME mesh/force load:";
199 const char matchstring
[] = "R E A L C Y C L E A N D T I M E A C C O U N T I N G";
200 const char errSIG
[] = "signal, stopping at the next";
203 float dum1
, dum2
, dum3
, dum4
;
206 gmx_int64_t resetsteps
= -1;
207 gmx_bool bFoundResetStr
= FALSE
;
208 gmx_bool bResetChecked
= FALSE
;
211 if (!gmx_fexist(logfile
))
213 fprintf(stderr
, "WARNING: Could not find logfile %s.\n", logfile
);
214 cleandata(perfdata
, test_nr
);
215 return eParselogNotFound
;
218 fp
= fopen(logfile
, "r");
219 perfdata
->PME_f_load
[test_nr
] = -1.0;
220 perfdata
->guessPME
= -1;
222 iFound
= eFoundNothing
;
225 iFound
= eFoundDDStr
; /* Skip some case statements */
228 while (fgets(line
, STRLEN
, fp
) != NULL
)
230 /* Remove leading spaces */
233 /* Check for TERM and INT signals from user: */
234 if (strstr(line
, errSIG
) != NULL
)
237 cleandata(perfdata
, test_nr
);
238 return eParselogTerm
;
241 /* Check whether cycle resetting worked */
242 if (presteps
> 0 && !bFoundResetStr
)
244 if (strstr(line
, matchstrcr
) != NULL
)
246 sprintf(dumstring
, "step %s", "%"GMX_SCNd64
);
247 sscanf(line
, dumstring
, &resetsteps
);
248 bFoundResetStr
= TRUE
;
249 if (resetsteps
== presteps
+cpt_steps
)
251 bResetChecked
= TRUE
;
255 sprintf(dumstring
, "%"GMX_PRId64
, resetsteps
);
256 sprintf(dumstring2
, "%"GMX_PRId64
, presteps
+cpt_steps
);
257 fprintf(stderr
, "WARNING: Time step counters were reset at step %s,\n"
258 " though they were supposed to be reset at step %s!\n",
259 dumstring
, dumstring2
);
264 /* Look for strings that appear in a certain order in the log file: */
268 /* Look for domain decomp grid and separate PME nodes: */
269 if (str_starts(line
, matchstrdd
))
271 sscanf(line
, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
272 &(perfdata
->nx
), &(perfdata
->ny
), &(perfdata
->nz
), &npme
);
273 if (perfdata
->nPMEnodes
== -1)
275 perfdata
->guessPME
= npme
;
277 else if (perfdata
->nPMEnodes
!= npme
)
279 gmx_fatal(FARGS
, "PME nodes from command line and output file are not identical");
281 iFound
= eFoundDDStr
;
283 /* Catch a few errors that might have occured: */
284 else if (str_starts(line
, "There is no domain decomposition for"))
287 return eParselogNoDDGrid
;
289 else if (str_starts(line
, "The number of nodes you selected"))
292 return eParselogLargePrimeFactor
;
294 else if (str_starts(line
, "reading tpx file"))
297 return eParselogTPXVersion
;
299 else if (str_starts(line
, "The -dd or -npme option request a parallel simulation"))
302 return eParselogNotParallel
;
306 /* Look for PME mesh/force balance (not necessarily present, though) */
307 if (str_starts(line
, matchstrbal
))
309 sscanf(&line
[strlen(matchstrbal
)], "%f", &(perfdata
->PME_f_load
[test_nr
]));
311 /* Look for matchstring */
312 if (str_starts(line
, matchstring
))
314 iFound
= eFoundAccountingStr
;
317 case eFoundAccountingStr
:
318 /* Already found matchstring - look for cycle data */
319 if (str_starts(line
, "Total "))
321 sscanf(line
, "Total %d %lf", &procs
, &(perfdata
->Gcycles
[test_nr
]));
322 iFound
= eFoundCycleStr
;
326 /* Already found cycle data - look for remaining performance info and return */
327 if (str_starts(line
, "Performance:"))
329 ndum
= sscanf(line
, "%s %f %f %f %f", dumstring
, &dum1
, &dum2
, &dum3
, &dum4
);
330 /* (ns/day) is the second last entry, depending on whether GMX_DETAILED_PERF_STATS was set in print_perf(), nrnb.c */
331 perfdata
->ns_per_day
[test_nr
] = (ndum
== 5) ? dum3
: dum1
;
333 if (bResetChecked
|| presteps
== 0)
339 return eParselogResetProblem
;
346 /* Close the log file */
349 /* Check why there is no performance data in the log file.
350 * Did a fatal errors occur? */
351 if (gmx_fexist(errfile
))
353 fp
= fopen(errfile
, "r");
354 while (fgets(line
, STRLEN
, fp
) != NULL
)
356 if (str_starts(line
, "Fatal error:") )
358 if (fgets(line
, STRLEN
, fp
) != NULL
)
360 fprintf(stderr
, "\nWARNING: An error occured during this benchmark:\n"
364 cleandata(perfdata
, test_nr
);
365 return eParselogFatal
;
372 fprintf(stderr
, "WARNING: Could not find stderr file %s.\n", errfile
);
375 /* Giving up ... we could not find out why there is no performance data in
377 fprintf(stdout
, "No performance data in log file.\n");
378 cleandata(perfdata
, test_nr
);
380 return eParselogNoPerfData
;
384 static gmx_bool
analyze_data(
393 int *index_tpr
, /* OUT: Nr of mdp file with best settings */
394 int *npme_optimal
) /* OUT: Optimal number of PME nodes */
397 int line
= 0, line_win
= -1;
398 int k_win
= -1, i_win
= -1, winPME
;
399 double s
= 0.0; /* standard deviation */
402 char str_PME_f_load
[13];
403 gmx_bool bCanUseOrigTPR
;
404 gmx_bool bRefinedCoul
, bRefinedVdW
, bRefinedGrid
;
410 fprintf(fp
, "Summary of successful runs:\n");
411 fprintf(fp
, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
414 fprintf(fp
, " DD grid");
420 for (k
= 0; k
< ntprs
; k
++)
422 for (i
= 0; i
< ntests
; i
++)
424 /* Select the right dataset: */
425 pd
= &(perfdata
[k
][i
]);
427 pd
->Gcycles_Av
= 0.0;
428 pd
->PME_f_load_Av
= 0.0;
429 pd
->ns_per_day_Av
= 0.0;
431 if (pd
->nPMEnodes
== -1)
433 sprintf(strbuf
, "(%3d)", pd
->guessPME
);
437 sprintf(strbuf
, " ");
440 /* Get the average run time of a setting */
441 for (j
= 0; j
< nrepeats
; j
++)
443 pd
->Gcycles_Av
+= pd
->Gcycles
[j
];
444 pd
->PME_f_load_Av
+= pd
->PME_f_load
[j
];
446 pd
->Gcycles_Av
/= nrepeats
;
447 pd
->PME_f_load_Av
/= nrepeats
;
449 for (j
= 0; j
< nrepeats
; j
++)
451 if (pd
->ns_per_day
[j
] > 0.0)
453 pd
->ns_per_day_Av
+= pd
->ns_per_day
[j
];
457 /* Somehow the performance number was not aquired for this run,
458 * therefor set the average to some negative value: */
459 pd
->ns_per_day_Av
= -1.0f
*nrepeats
;
463 pd
->ns_per_day_Av
/= nrepeats
;
466 if (pd
->PME_f_load_Av
> 0.0)
468 sprintf(str_PME_f_load
, "%12.3f", pd
->PME_f_load_Av
);
472 sprintf(str_PME_f_load
, "%s", " - ");
476 /* We assume we had a successful run if both averages are positive */
477 if (pd
->Gcycles_Av
> 0.0 && pd
->ns_per_day_Av
> 0.0)
479 /* Output statistics if repeats were done */
482 /* Calculate the standard deviation */
484 for (j
= 0; j
< nrepeats
; j
++)
486 s
+= pow( pd
->Gcycles
[j
] - pd
->Gcycles_Av
, 2 );
491 fprintf(fp
, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
492 line
, k
, pd
->nPMEnodes
, strbuf
, pd
->Gcycles_Av
, s
,
493 pd
->ns_per_day_Av
, str_PME_f_load
);
496 fprintf(fp
, " %3d %3d %3d", pd
->nx
, pd
->ny
, pd
->nz
);
500 /* Store the index of the best run found so far in 'winner': */
501 if ( (k_win
== -1) || (pd
->Gcycles_Av
< perfdata
[k_win
][i_win
].Gcycles_Av
) )
514 gmx_fatal(FARGS
, "None of the runs was successful! Check %s for problems.", fn
);
519 winPME
= perfdata
[k_win
][i_win
].nPMEnodes
;
523 /* We stuck to a fixed number of PME-only nodes */
524 sprintf(strbuf
, "settings No. %d", k_win
);
528 /* We have optimized the number of PME-only nodes */
531 sprintf(strbuf
, "%s", "the automatic number of PME nodes");
535 sprintf(strbuf
, "%d PME nodes", winPME
);
538 fprintf(fp
, "Best performance was achieved with %s", strbuf
);
539 if ((nrepeats
> 1) && (ntests
> 1))
541 fprintf(fp
, " (see line %d)", line_win
);
545 /* Only mention settings if they were modified: */
546 bRefinedCoul
= !is_equal(info
->rcoulomb
[k_win
], info
->rcoulomb
[0]);
547 bRefinedVdW
= !is_equal(info
->rvdw
[k_win
], info
->rvdw
[0] );
548 bRefinedGrid
= !(info
->nkx
[k_win
] == info
->nkx
[0] &&
549 info
->nky
[k_win
] == info
->nky
[0] &&
550 info
->nkz
[k_win
] == info
->nkz
[0]);
552 if (bRefinedCoul
|| bRefinedVdW
|| bRefinedGrid
)
554 fprintf(fp
, "Optimized PME settings:\n");
555 bCanUseOrigTPR
= FALSE
;
559 bCanUseOrigTPR
= TRUE
;
564 fprintf(fp
, " New Coulomb radius: %f nm (was %f nm)\n", info
->rcoulomb
[k_win
], info
->rcoulomb
[0]);
569 fprintf(fp
, " New Van der Waals radius: %f nm (was %f nm)\n", info
->rvdw
[k_win
], info
->rvdw
[0]);
574 fprintf(fp
, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n", info
->nkx
[k_win
], info
->nky
[k_win
], info
->nkz
[k_win
],
575 info
->nkx
[0], info
->nky
[0], info
->nkz
[0]);
578 if (bCanUseOrigTPR
&& ntprs
> 1)
580 fprintf(fp
, "and original PME settings.\n");
585 /* Return the index of the mdp file that showed the highest performance
586 * and the optimal number of PME nodes */
588 *npme_optimal
= winPME
;
590 return bCanUseOrigTPR
;
594 /* Get the commands we need to set up the runs from environment variables */
595 static void get_program_paths(gmx_bool bThreads
, char *cmd_mpirun
[], char *cmd_mdrun
[])
599 const char def_mpirun
[] = "mpirun";
600 const char def_mdrun
[] = "mdrun";
602 const char empty_mpirun
[] = "";
604 /* Get the commands we need to set up the runs from environment variables */
607 if ( (cp
= getenv("MPIRUN")) != NULL
)
609 *cmd_mpirun
= strdup(cp
);
613 *cmd_mpirun
= strdup(def_mpirun
);
618 *cmd_mpirun
= strdup(empty_mpirun
);
621 if ( (cp
= getenv("MDRUN" )) != NULL
)
623 *cmd_mdrun
= strdup(cp
);
627 *cmd_mdrun
= strdup(def_mdrun
);
631 /* Check that the commands will run mdrun (perhaps via mpirun) by
632 * running a very quick test simulation. Requires MPI environment to
633 * be available if applicable. */
634 static void check_mdrun_works(gmx_bool bThreads
,
635 const char *cmd_mpirun
,
637 const char *cmd_mdrun
)
639 char *command
= NULL
;
643 const char filename
[] = "benchtest.log";
645 /* This string should always be identical to the one in copyrite.c,
646 * gmx_print_version_info() in the defined(GMX_MPI) section */
647 const char match_mpi
[] = "MPI library: MPI";
648 const char match_mdrun
[] = "Program: ";
649 gmx_bool bMdrun
= FALSE
;
650 gmx_bool bMPI
= FALSE
;
652 /* Run a small test to see whether mpirun + mdrun work */
653 fprintf(stdout
, "Making sure that mdrun can be executed. ");
656 snew(command
, strlen(cmd_mdrun
) + strlen(cmd_np
) + strlen(filename
) + 50);
657 sprintf(command
, "%s%s-version -maxh 0.001 1> %s 2>&1", cmd_mdrun
, cmd_np
, filename
);
661 snew(command
, strlen(cmd_mpirun
) + strlen(cmd_np
) + strlen(cmd_mdrun
) + strlen(filename
) + 50);
662 sprintf(command
, "%s%s%s -version -maxh 0.001 1> %s 2>&1", cmd_mpirun
, cmd_np
, cmd_mdrun
, filename
);
664 fprintf(stdout
, "Trying '%s' ... ", command
);
665 make_backup(filename
);
666 gmx_system_call(command
);
668 /* Check if we find the characteristic string in the output: */
669 if (!gmx_fexist(filename
))
671 gmx_fatal(FARGS
, "Output from test run could not be found.");
674 fp
= fopen(filename
, "r");
675 /* We need to scan the whole output file, since sometimes the queuing system
676 * also writes stuff to stdout/err */
679 cp
= fgets(line
, STRLEN
, fp
);
682 if (str_starts(line
, match_mdrun
) )
686 if (str_starts(line
, match_mpi
) )
698 gmx_fatal(FARGS
, "Need a threaded version of mdrun. This one\n"
700 "seems to have been compiled with MPI instead.",
708 gmx_fatal(FARGS
, "Need an MPI-enabled version of mdrun. This one\n"
710 "seems to have been compiled without MPI support.",
717 gmx_fatal(FARGS
, "Cannot execute mdrun. Please check %s for problems!",
721 fprintf(stdout
, "passed.\n");
729 static void launch_simulation(
730 gmx_bool bLaunch
, /* Should the simulation be launched? */
731 FILE *fp
, /* General log file */
732 gmx_bool bThreads
, /* whether to use threads */
733 char *cmd_mpirun
, /* Command for mpirun */
734 char *cmd_np
, /* Switch for -np or -ntmpi or empty */
735 char *cmd_mdrun
, /* Command for mdrun */
736 char *args_for_mdrun
, /* Arguments for mdrun */
737 const char *simulation_tpr
, /* This tpr will be simulated */
738 int nPMEnodes
) /* Number of PME nodes to use */
743 /* Make enough space for the system call command,
744 * (100 extra chars for -npme ... etc. options should suffice): */
745 snew(command
, strlen(cmd_mpirun
)+strlen(cmd_mdrun
)+strlen(cmd_np
)+strlen(args_for_mdrun
)+strlen(simulation_tpr
)+100);
747 /* Note that the -passall options requires args_for_mdrun to be at the end
748 * of the command line string */
751 sprintf(command
, "%s%s-npme %d -s %s %s",
752 cmd_mdrun
, cmd_np
, nPMEnodes
, simulation_tpr
, args_for_mdrun
);
756 sprintf(command
, "%s%s%s -npme %d -s %s %s",
757 cmd_mpirun
, cmd_np
, cmd_mdrun
, nPMEnodes
, simulation_tpr
, args_for_mdrun
);
760 fprintf(fp
, "%s this command line to launch the simulation:\n\n%s", bLaunch
? "Using" : "Please use", command
);
764 /* Now the real thing! */
767 fprintf(stdout
, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command
);
770 gmx_system_call(command
);
775 static void modify_PMEsettings(
776 gmx_int64_t simsteps
, /* Set this value as number of time steps */
777 gmx_int64_t init_step
, /* Set this value as init_step */
778 const char *fn_best_tpr
, /* tpr file with the best performance */
779 const char *fn_sim_tpr
) /* name of tpr file to be launched */
787 read_tpx_state(fn_best_tpr
, ir
, &state
, NULL
, &mtop
);
789 /* Reset nsteps and init_step to the value of the input .tpr file */
790 ir
->nsteps
= simsteps
;
791 ir
->init_step
= init_step
;
793 /* Write the tpr file which will be launched */
794 sprintf(buf
, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr
, "%"GMX_PRId64
);
795 fprintf(stdout
, buf
, ir
->nsteps
);
797 write_tpx_state(fn_sim_tpr
, ir
, &state
, &mtop
);
803 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
805 /* Make additional TPR files with more computational load for the
806 * direct space processors: */
807 static void make_benchmark_tprs(
808 const char *fn_sim_tpr
, /* READ : User-provided tpr file */
809 char *fn_bench_tprs
[], /* WRITE: Names of benchmark tpr files */
810 gmx_int64_t benchsteps
, /* Number of time steps for benchmark runs */
811 gmx_int64_t statesteps
, /* Step counter in checkpoint file */
812 real rmin
, /* Minimal Coulomb radius */
813 real rmax
, /* Maximal Coulomb radius */
814 real bScaleRvdw
, /* Scale rvdw along with rcoulomb */
815 int *ntprs
, /* No. of TPRs to write, each with a different
816 rcoulomb and fourierspacing */
817 t_inputinfo
*info
, /* Contains information about mdp file options */
818 FILE *fp
) /* Write the output here */
824 real nlist_buffer
; /* Thickness of the buffer regions for PME-switch potentials */
827 gmx_bool bNote
= FALSE
;
828 real add
; /* Add this to rcoul for the next test */
829 real fac
= 1.0; /* Scaling factor for Coulomb radius */
830 real fourierspacing
; /* Basic fourierspacing from tpr */
833 sprintf(buf
, "Making benchmark tpr file%s with %s time step%s",
834 *ntprs
> 1 ? "s" : "", "%"GMX_PRId64
, benchsteps
> 1 ? "s" : "");
835 fprintf(stdout
, buf
, benchsteps
);
838 sprintf(buf
, " (adding %s steps from checkpoint file)", "%"GMX_PRId64
);
839 fprintf(stdout
, buf
, statesteps
);
840 benchsteps
+= statesteps
;
842 fprintf(stdout
, ".\n");
846 read_tpx_state(fn_sim_tpr
, ir
, &state
, NULL
, &mtop
);
848 /* Check if some kind of PME was chosen */
849 if (EEL_PME(ir
->coulombtype
) == FALSE
)
851 gmx_fatal(FARGS
, "Can only do optimizations for simulations with %s electrostatics.",
855 /* Check if rcoulomb == rlist, which is necessary for plain PME. */
856 if ( (ir
->cutoff_scheme
!= ecutsVERLET
) &&
857 (eelPME
== ir
->coulombtype
) && !(ir
->rcoulomb
== ir
->rlist
))
859 gmx_fatal(FARGS
, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
860 EELTYPE(eelPME
), ir
->rcoulomb
, ir
->rlist
);
862 /* For other PME types, rcoulomb is allowed to be smaller than rlist */
863 else if (ir
->rcoulomb
> ir
->rlist
)
865 gmx_fatal(FARGS
, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
866 EELTYPE(ir
->coulombtype
), ir
->rcoulomb
, ir
->rlist
);
869 if (bScaleRvdw
&& ir
->rvdw
!= ir
->rcoulomb
)
871 fprintf(stdout
, "NOTE: input rvdw != rcoulomb, will not scale rvdw\n");
875 /* Reduce the number of steps for the benchmarks */
876 info
->orig_sim_steps
= ir
->nsteps
;
877 ir
->nsteps
= benchsteps
;
878 /* We must not use init_step from the input tpr file for the benchmarks */
879 info
->orig_init_step
= ir
->init_step
;
882 /* For PME-switch potentials, keep the radial distance of the buffer region */
883 nlist_buffer
= ir
->rlist
- ir
->rcoulomb
;
885 /* Determine length of triclinic box vectors */
886 for (d
= 0; d
< DIM
; d
++)
889 for (i
= 0; i
< DIM
; i
++)
891 box_size
[d
] += state
.box
[d
][i
]*state
.box
[d
][i
];
893 box_size
[d
] = sqrt(box_size
[d
]);
896 if (ir
->fourier_spacing
> 0)
898 info
->fsx
[0] = ir
->fourier_spacing
;
899 info
->fsy
[0] = ir
->fourier_spacing
;
900 info
->fsz
[0] = ir
->fourier_spacing
;
904 /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
905 info
->fsx
[0] = box_size
[XX
]/ir
->nkx
;
906 info
->fsy
[0] = box_size
[YY
]/ir
->nky
;
907 info
->fsz
[0] = box_size
[ZZ
]/ir
->nkz
;
910 /* If no value for the fourierspacing was provided on the command line, we
911 * use the reconstruction from the tpr file */
912 if (ir
->fourier_spacing
> 0)
914 /* Use the spacing from the tpr */
915 fourierspacing
= ir
->fourier_spacing
;
919 /* Use the maximum observed spacing */
920 fourierspacing
= max(max(info
->fsx
[0], info
->fsy
[0]), info
->fsz
[0]);
923 fprintf(stdout
, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing
);
925 /* For performance comparisons the number of particles is useful to have */
926 fprintf(fp
, " Number of particles : %d\n", mtop
.natoms
);
928 /* Print information about settings of which some are potentially modified: */
929 fprintf(fp
, " Coulomb type : %s\n", EELTYPE(ir
->coulombtype
));
930 fprintf(fp
, " Grid spacing x y z : %f %f %f\n",
931 box_size
[XX
]/ir
->nkx
, box_size
[YY
]/ir
->nky
, box_size
[ZZ
]/ir
->nkz
);
932 fprintf(fp
, " Van der Waals type : %s\n", EVDWTYPE(ir
->vdwtype
));
933 if (EVDW_SWITCHED(ir
->vdwtype
))
935 fprintf(fp
, " rvdw_switch : %f nm\n", ir
->rvdw_switch
);
937 if (EPME_SWITCHED(ir
->coulombtype
))
939 fprintf(fp
, " rlist : %f nm\n", ir
->rlist
);
941 if (ir
->rlistlong
!= max_cutoff(ir
->rvdw
, ir
->rcoulomb
))
943 fprintf(fp
, " rlistlong : %f nm\n", ir
->rlistlong
);
946 /* Print a descriptive line about the tpr settings tested */
947 fprintf(fp
, "\nWill try these real/reciprocal workload settings:\n");
948 fprintf(fp
, " No. scaling rcoulomb");
949 fprintf(fp
, " nkx nky nkz");
950 fprintf(fp
, " spacing");
951 if (evdwCUT
== ir
->vdwtype
)
953 fprintf(fp
, " rvdw");
955 if (EPME_SWITCHED(ir
->coulombtype
))
957 fprintf(fp
, " rlist");
959 if (ir
->rlistlong
!= max_cutoff(ir
->rlist
, max_cutoff(ir
->rvdw
, ir
->rcoulomb
)) )
961 fprintf(fp
, " rlistlong");
963 fprintf(fp
, " tpr file\n");
965 /* Loop to create the requested number of tpr input files */
966 for (j
= 0; j
< *ntprs
; j
++)
968 /* The first .tpr is the provided one, just need to modify nsteps,
969 * so skip the following block */
972 /* Determine which Coulomb radii rc to use in the benchmarks */
973 add
= (rmax
-rmin
)/(*ntprs
-1);
974 if (is_equal(rmin
, info
->rcoulomb
[0]))
976 ir
->rcoulomb
= rmin
+ j
*add
;
978 else if (is_equal(rmax
, info
->rcoulomb
[0]))
980 ir
->rcoulomb
= rmin
+ (j
-1)*add
;
984 /* rmin != rcoul != rmax, ergo test between rmin and rmax */
985 add
= (rmax
-rmin
)/(*ntprs
-2);
986 ir
->rcoulomb
= rmin
+ (j
-1)*add
;
989 /* Determine the scaling factor fac */
990 fac
= ir
->rcoulomb
/info
->rcoulomb
[0];
992 /* Scale the Fourier grid spacing */
993 ir
->nkx
= ir
->nky
= ir
->nkz
= 0;
994 calc_grid(NULL
, state
.box
, fourierspacing
*fac
, &ir
->nkx
, &ir
->nky
, &ir
->nkz
);
996 /* Adjust other radii since various conditions neet to be fulfilled */
997 if (eelPME
== ir
->coulombtype
)
999 /* plain PME, rcoulomb must be equal to rlist */
1000 ir
->rlist
= ir
->rcoulomb
;
1004 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
1005 ir
->rlist
= ir
->rcoulomb
+ nlist_buffer
;
1008 if (bScaleRvdw
&& evdwCUT
== ir
->vdwtype
)
1010 /* For vdw cutoff, rvdw >= rlist */
1011 ir
->rvdw
= max(info
->rvdw
[0], ir
->rlist
);
1014 ir
->rlistlong
= max_cutoff(ir
->rlist
, max_cutoff(ir
->rvdw
, ir
->rcoulomb
));
1016 } /* end of "if (j != 0)" */
1018 /* for j==0: Save the original settings
1019 * for j >0: Save modified radii and Fourier grids */
1020 info
->rcoulomb
[j
] = ir
->rcoulomb
;
1021 info
->rvdw
[j
] = ir
->rvdw
;
1022 info
->nkx
[j
] = ir
->nkx
;
1023 info
->nky
[j
] = ir
->nky
;
1024 info
->nkz
[j
] = ir
->nkz
;
1025 info
->rlist
[j
] = ir
->rlist
;
1026 info
->rlistlong
[j
] = ir
->rlistlong
;
1027 info
->fsx
[j
] = fac
*fourierspacing
;
1028 info
->fsy
[j
] = fac
*fourierspacing
;
1029 info
->fsz
[j
] = fac
*fourierspacing
;
1031 /* Write the benchmark tpr file */
1032 strncpy(fn_bench_tprs
[j
], fn_sim_tpr
, strlen(fn_sim_tpr
)-strlen(".tpr"));
1033 sprintf(buf
, "_bench%.2d.tpr", j
);
1034 strcat(fn_bench_tprs
[j
], buf
);
1035 fprintf(stdout
, "Writing benchmark tpr %s with nsteps=", fn_bench_tprs
[j
]);
1036 fprintf(stdout
, "%"GMX_PRId64
, ir
->nsteps
);
1039 fprintf(stdout
, ", scaling factor %f\n", fac
);
1043 fprintf(stdout
, ", unmodified settings\n");
1046 write_tpx_state(fn_bench_tprs
[j
], ir
, &state
, &mtop
);
1048 /* Write information about modified tpr settings to log file */
1049 fprintf(fp
, "%4d%10f%10f", j
, fac
, ir
->rcoulomb
);
1050 fprintf(fp
, "%5d%5d%5d", ir
->nkx
, ir
->nky
, ir
->nkz
);
1051 fprintf(fp
, " %9f ", info
->fsx
[j
]);
1052 if (evdwCUT
== ir
->vdwtype
)
1054 fprintf(fp
, "%10f", ir
->rvdw
);
1056 if (EPME_SWITCHED(ir
->coulombtype
))
1058 fprintf(fp
, "%10f", ir
->rlist
);
1060 if (info
->rlistlong
[0] != max_cutoff(info
->rlist
[0], max_cutoff(info
->rvdw
[0], info
->rcoulomb
[0])) )
1062 fprintf(fp
, "%10f", ir
->rlistlong
);
1064 fprintf(fp
, " %-14s\n", fn_bench_tprs
[j
]);
1066 /* Make it clear to the user that some additional settings were modified */
1067 if (!is_equal(ir
->rvdw
, info
->rvdw
[0])
1068 || !is_equal(ir
->rlistlong
, info
->rlistlong
[0]) )
1075 fprintf(fp
, "\nNote that in addition to the Coulomb radius and the Fourier grid\n"
1076 "other input settings were also changed (see table above).\n"
1077 "Please check if the modified settings are appropriate.\n");
1085 /* Rename the files we want to keep to some meaningful filename and
1086 * delete the rest */
1087 static void cleanup(const t_filenm
*fnm
, int nfile
, int k
, int nnodes
,
1088 int nPMEnodes
, int nr
, gmx_bool bKeepStderr
)
1090 char numstring
[STRLEN
];
1091 char newfilename
[STRLEN
];
1092 const char *fn
= NULL
;
1097 fprintf(stdout
, "Cleaning up, deleting benchmark temp files ...\n");
1099 for (i
= 0; i
< nfile
; i
++)
1101 opt
= (char *)fnm
[i
].opt
;
1102 if (strcmp(opt
, "-p") == 0)
1104 /* do nothing; keep this file */
1107 else if (strcmp(opt
, "-bg") == 0)
1109 /* Give the log file a nice name so one can later see which parameters were used */
1110 numstring
[0] = '\0';
1113 sprintf(numstring
, "_%d", nr
);
1115 sprintf(newfilename
, "%s_no%d_np%d_npme%d%s", opt2fn("-bg", nfile
, fnm
), k
, nnodes
, nPMEnodes
, numstring
);
1116 if (gmx_fexist(opt2fn("-bg", nfile
, fnm
)))
1118 fprintf(stdout
, "renaming log file to %s\n", newfilename
);
1119 make_backup(newfilename
);
1120 rename(opt2fn("-bg", nfile
, fnm
), newfilename
);
1123 else if (strcmp(opt
, "-err") == 0)
1125 /* This file contains the output of stderr. We want to keep it in
1126 * cases where there have been problems. */
1127 fn
= opt2fn(opt
, nfile
, fnm
);
1128 numstring
[0] = '\0';
1131 sprintf(numstring
, "_%d", nr
);
1133 sprintf(newfilename
, "%s_no%d_np%d_npme%d%s", fn
, k
, nnodes
, nPMEnodes
, numstring
);
1138 fprintf(stdout
, "Saving stderr output in %s\n", newfilename
);
1139 make_backup(newfilename
);
1140 rename(fn
, newfilename
);
1144 fprintf(stdout
, "Deleting %s\n", fn
);
1149 /* Delete the files which are created for each benchmark run: (options -b*) */
1150 else if ( (0 == strncmp(opt
, "-b", 2)) && (opt2bSet(opt
, nfile
, fnm
) || !is_optional(&fnm
[i
])) )
1152 fn
= opt2fn(opt
, nfile
, fnm
);
1155 fprintf(stdout
, "Deleting %s\n", fn
);
1163 /* Returns the largest common factor of n1 and n2 */
1164 static int largest_common_factor(int n1
, int n2
)
1169 for (factor
= nmax
; factor
> 0; factor
--)
1171 if (0 == (n1
% factor
) && 0 == (n2
% factor
) )
1176 return 0; /* one for the compiler */
1180 eNpmeAuto
, eNpmeAll
, eNpmeReduced
, eNpmeSubset
, eNpmeNr
1183 /* Create a list of numbers of PME nodes to test */
1184 static void make_npme_list(
1185 const char *npmevalues_opt
, /* Make a complete list with all
1186 * possibilities or a short list that keeps only
1187 * reasonable numbers of PME nodes */
1188 int *nentries
, /* Number of entries we put in the nPMEnodes list */
1189 int *nPMEnodes
[], /* Each entry contains the value for -npme */
1190 int nnodes
, /* Total number of nodes to do the tests on */
1191 int minPMEnodes
, /* Minimum number of PME nodes */
1192 int maxPMEnodes
) /* Maximum number of PME nodes */
1195 int min_factor
= 1; /* We request that npp and npme have this minimal
1196 * largest common factor (depends on npp) */
1197 int nlistmax
; /* Max. list size */
1198 int nlist
; /* Actual number of entries in list */
1202 /* Do we need to check all possible values for -npme or is a reduced list enough? */
1203 if (0 == strcmp(npmevalues_opt
, "all") )
1207 else if (0 == strcmp(npmevalues_opt
, "subset") )
1209 eNPME
= eNpmeSubset
;
1211 else /* "auto" or "range" */
1217 else if (nnodes
< 128)
1219 eNPME
= eNpmeReduced
;
1223 eNPME
= eNpmeSubset
;
1227 /* Calculate how many entries we could possibly have (in case of -npme all) */
1230 nlistmax
= maxPMEnodes
- minPMEnodes
+ 3;
1231 if (0 == minPMEnodes
)
1241 /* Now make the actual list which is at most of size nlist */
1242 snew(*nPMEnodes
, nlistmax
);
1243 nlist
= 0; /* start counting again, now the real entries in the list */
1244 for (i
= 0; i
< nlistmax
- 2; i
++)
1246 npme
= maxPMEnodes
- i
;
1257 /* For 2d PME we want a common largest factor of at least the cube
1258 * root of the number of PP nodes */
1259 min_factor
= (int) pow(npp
, 1.0/3.0);
1262 gmx_fatal(FARGS
, "Unknown option for eNPME in make_npme_list");
1265 if (largest_common_factor(npp
, npme
) >= min_factor
)
1267 (*nPMEnodes
)[nlist
] = npme
;
1271 /* We always test 0 PME nodes and the automatic number */
1272 *nentries
= nlist
+ 2;
1273 (*nPMEnodes
)[nlist
] = 0;
1274 (*nPMEnodes
)[nlist
+1] = -1;
1276 fprintf(stderr
, "Will try the following %d different values for -npme:\n", *nentries
);
1277 for (i
= 0; i
< *nentries
-1; i
++)
1279 fprintf(stderr
, "%d, ", (*nPMEnodes
)[i
]);
1281 fprintf(stderr
, "and %d (auto).\n", (*nPMEnodes
)[*nentries
-1]);
1285 /* Allocate memory to store the performance data */
1286 static void init_perfdata(t_perf
*perfdata
[], int ntprs
, int datasets
, int repeats
)
1291 for (k
= 0; k
< ntprs
; k
++)
1293 snew(perfdata
[k
], datasets
);
1294 for (i
= 0; i
< datasets
; i
++)
1296 for (j
= 0; j
< repeats
; j
++)
1298 snew(perfdata
[k
][i
].Gcycles
, repeats
);
1299 snew(perfdata
[k
][i
].ns_per_day
, repeats
);
1300 snew(perfdata
[k
][i
].PME_f_load
, repeats
);
1307 /* Check for errors on mdrun -h */
1308 static void make_sure_it_runs(char *mdrun_cmd_line
, int length
, FILE *fp
)
1310 char *command
, *msg
;
1314 snew(command
, length
+ 15);
1315 snew(msg
, length
+ 500);
1317 fprintf(stdout
, "Making sure the benchmarks can be executed ...\n");
1318 /* FIXME: mdrun -h no longer actually does anything useful.
1319 * It unconditionally prints the help, ignoring all other options. */
1320 sprintf(command
, "%s-h -quiet", mdrun_cmd_line
);
1321 ret
= gmx_system_call(command
);
1325 /* To prevent confusion, do not again issue a gmx_fatal here since we already
1326 * get the error message from mdrun itself */
1327 sprintf(msg
, "Cannot run the benchmark simulations! Please check the error message of\n"
1328 "mdrun for the source of the problem. Did you provide a command line\n"
1329 "argument that neither g_tune_pme nor mdrun understands? Offending command:\n"
1330 "\n%s\n\n", command
);
1332 fprintf(stderr
, "%s", msg
);
1334 fprintf(fp
, "%s", msg
);
1344 static void do_the_tests(
1345 FILE *fp
, /* General g_tune_pme output file */
1346 char **tpr_names
, /* Filenames of the input files to test */
1347 int maxPMEnodes
, /* Max fraction of nodes to use for PME */
1348 int minPMEnodes
, /* Min fraction of nodes to use for PME */
1349 int npme_fixed
, /* If >= -1, test fixed number of PME
1351 const char *npmevalues_opt
, /* Which -npme values should be tested */
1352 t_perf
**perfdata
, /* Here the performace data is stored */
1353 int *pmeentries
, /* Entries in the nPMEnodes list */
1354 int repeats
, /* Repeat each test this often */
1355 int nnodes
, /* Total number of nodes = nPP + nPME */
1356 int nr_tprs
, /* Total number of tpr files to test */
1357 gmx_bool bThreads
, /* Threads or MPI? */
1358 char *cmd_mpirun
, /* mpirun command string */
1359 char *cmd_np
, /* "-np", "-n", whatever mpirun needs */
1360 char *cmd_mdrun
, /* mdrun command string */
1361 char *cmd_args_bench
, /* arguments for mdrun in a string */
1362 const t_filenm
*fnm
, /* List of filenames from command line */
1363 int nfile
, /* Number of files specified on the cmdl. */
1364 int presteps
, /* DLB equilibration steps, is checked */
1365 gmx_int64_t cpt_steps
) /* Time step counter in the checkpoint */
1367 int i
, nr
, k
, ret
, count
= 0, totaltests
;
1368 int *nPMEnodes
= NULL
;
1371 char *command
, *cmd_stub
;
1373 gmx_bool bResetProblem
= FALSE
;
1374 gmx_bool bFirst
= TRUE
;
1377 /* This string array corresponds to the eParselog enum type at the start
1379 const char* ParseLog
[] = {
1381 "Logfile not found!",
1382 "No timings, logfile truncated?",
1383 "Run was terminated.",
1384 "Counters were not reset properly.",
1385 "No DD grid found for these settings.",
1386 "TPX version conflict!",
1387 "mdrun was not started in parallel!",
1388 "Number of PP nodes has a prime factor that is too large.",
1391 char str_PME_f_load
[13];
1394 /* Allocate space for the mdrun command line. 100 extra characters should
1395 be more than enough for the -npme etcetera arguments */
1396 cmdline_length
= strlen(cmd_mpirun
)
1399 + strlen(cmd_args_bench
)
1400 + strlen(tpr_names
[0]) + 100;
1401 snew(command
, cmdline_length
);
1402 snew(cmd_stub
, cmdline_length
);
1404 /* Construct the part of the command line that stays the same for all tests: */
1407 sprintf(cmd_stub
, "%s%s", cmd_mdrun
, cmd_np
);
1411 sprintf(cmd_stub
, "%s%s%s ", cmd_mpirun
, cmd_np
, cmd_mdrun
);
1414 /* Create a list of numbers of PME nodes to test */
1415 if (npme_fixed
< -1)
1417 make_npme_list(npmevalues_opt
, pmeentries
, &nPMEnodes
,
1418 nnodes
, minPMEnodes
, maxPMEnodes
);
1424 nPMEnodes
[0] = npme_fixed
;
1425 fprintf(stderr
, "Will use a fixed number of %d PME-only nodes.\n", nPMEnodes
[0]);
1430 fprintf(fp
, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1432 finalize(opt2fn("-p", nfile
, fnm
));
1436 /* Allocate one dataset for each tpr input file: */
1437 init_perfdata(perfdata
, nr_tprs
, *pmeentries
, repeats
);
1439 /*****************************************/
1440 /* Main loop over all tpr files to test: */
1441 /*****************************************/
1442 totaltests
= nr_tprs
*(*pmeentries
)*repeats
;
1443 for (k
= 0; k
< nr_tprs
; k
++)
1445 fprintf(fp
, "\nIndividual timings for input file %d (%s):\n", k
, tpr_names
[k
]);
1446 fprintf(fp
, "PME nodes Gcycles ns/day PME/f Remark\n");
1447 /* Loop over various numbers of PME nodes: */
1448 for (i
= 0; i
< *pmeentries
; i
++)
1450 pd
= &perfdata
[k
][i
];
1452 /* Loop over the repeats for each scenario: */
1453 for (nr
= 0; nr
< repeats
; nr
++)
1455 pd
->nPMEnodes
= nPMEnodes
[i
];
1457 /* Add -npme and -s to the command line and save it. Note that
1458 * the -passall (if set) options requires cmd_args_bench to be
1459 * at the end of the command line string */
1460 snew(pd
->mdrun_cmd_line
, cmdline_length
);
1461 sprintf(pd
->mdrun_cmd_line
, "%s-npme %d -s %s %s",
1462 cmd_stub
, pd
->nPMEnodes
, tpr_names
[k
], cmd_args_bench
);
1464 /* To prevent that all benchmarks fail due to a show-stopper argument
1465 * on the mdrun command line, we make a quick check with mdrun -h first */
1468 make_sure_it_runs(pd
->mdrun_cmd_line
, cmdline_length
, fp
);
1472 /* Do a benchmark simulation: */
1475 sprintf(buf
, ", pass %d/%d", nr
+1, repeats
);
1481 fprintf(stdout
, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1482 (100.0*count
)/totaltests
,
1483 k
+1, nr_tprs
, i
+1, *pmeentries
, buf
);
1484 make_backup(opt2fn("-err", nfile
, fnm
));
1485 sprintf(command
, "%s 1> /dev/null 2>%s", pd
->mdrun_cmd_line
, opt2fn("-err", nfile
, fnm
));
1486 fprintf(stdout
, "%s\n", pd
->mdrun_cmd_line
);
1487 gmx_system_call(command
);
1489 /* Collect the performance data from the log file; also check stderr
1490 * for fatal errors */
1491 ret
= parse_logfile(opt2fn("-bg", nfile
, fnm
), opt2fn("-err", nfile
, fnm
),
1492 pd
, nr
, presteps
, cpt_steps
, nnodes
);
1493 if ((presteps
> 0) && (ret
== eParselogResetProblem
))
1495 bResetProblem
= TRUE
;
1498 if (-1 == pd
->nPMEnodes
)
1500 sprintf(buf
, "(%3d)", pd
->guessPME
);
1508 if (pd
->PME_f_load
[nr
] > 0.0)
1510 sprintf(str_PME_f_load
, "%12.3f", pd
->PME_f_load
[nr
]);
1514 sprintf(str_PME_f_load
, "%s", " - ");
1517 /* Write the data we got to disk */
1518 fprintf(fp
, "%4d%s %12.3f %12.3f %s %s", pd
->nPMEnodes
,
1519 buf
, pd
->Gcycles
[nr
], pd
->ns_per_day
[nr
], str_PME_f_load
, ParseLog
[ret
]);
1520 if (!(ret
== eParselogOK
|| ret
== eParselogNoDDGrid
|| ret
== eParselogNotFound
) )
1522 fprintf(fp
, " Check %s file for problems.", ret
== eParselogFatal
? "err" : "log");
1528 /* Do some cleaning up and delete the files we do not need any more */
1529 cleanup(fnm
, nfile
, k
, nnodes
, pd
->nPMEnodes
, nr
, ret
== eParselogFatal
);
1531 /* If the first run with this number of processors already failed, do not try again: */
1532 if (pd
->Gcycles
[0] <= 0.0 && repeats
> 1)
1534 fprintf(stdout
, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1535 count
+= repeats
-(nr
+1);
1538 } /* end of repeats loop */
1539 } /* end of -npme loop */
1540 } /* end of tpr file loop */
1545 fprintf(fp
, "WARNING: The cycle and time step counters could not be reset properly. ");
1553 static void check_input(
1560 real maxPMEfraction
,
1561 real minPMEfraction
,
1563 gmx_int64_t bench_nsteps
,
1564 const t_filenm
*fnm
,
1574 /* Make sure the input file exists */
1575 if (!gmx_fexist(opt2fn("-s", nfile
, fnm
)))
1577 gmx_fatal(FARGS
, "File %s not found.", opt2fn("-s", nfile
, fnm
));
1580 /* Make sure that the checkpoint file is not overwritten during benchmarking */
1581 if ( (0 == strcmp(opt2fn("-cpi", nfile
, fnm
), opt2fn("-bcpo", nfile
, fnm
)) ) && (sim_part
> 1) )
1583 gmx_fatal(FARGS
, "Checkpoint input (-cpi) and benchmark checkpoint output (-bcpo) files must not be identical.\n"
1584 "The checkpoint input file must not be overwritten during the benchmarks.\n");
1587 /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1590 gmx_fatal(FARGS
, "Number of repeats < 0!");
1593 /* Check number of nodes */
1596 gmx_fatal(FARGS
, "Number of nodes/threads must be a positive integer.");
1599 /* Automatically choose -ntpr if not set */
1609 /* Set a reasonable scaling factor for rcoulomb */
1612 *rmax
= rcoulomb
* 1.2;
1615 fprintf(stderr
, "Will test %d tpr file%s.\n", *ntprs
, *ntprs
== 1 ? "" : "s");
1621 fprintf(stderr
, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1625 /* Make shure that rmin <= rcoulomb <= rmax */
1634 if (!(*rmin
<= *rmax
) )
1636 gmx_fatal(FARGS
, "Please choose the Coulomb radii such that rmin <= rmax.\n"
1637 "rmin = %g, rmax = %g, actual rcoul from .tpr file = %g\n", *rmin
, *rmax
, rcoulomb
);
1639 /* Add test scenarios if rmin or rmax were set */
1642 if (!is_equal(*rmin
, rcoulomb
) && (*ntprs
== 1) )
1645 fprintf(stderr
, "NOTE: Setting -rmin to %g changed -ntpr to %d\n",
1648 if (!is_equal(*rmax
, rcoulomb
) && (*ntprs
== 1) )
1651 fprintf(stderr
, "NOTE: Setting -rmax to %g changed -ntpr to %d\n",
1656 /* If one of rmin, rmax is set, we need 2 tpr files at minimum */
1657 if (!is_equal(*rmax
, rcoulomb
) || !is_equal(*rmin
, rcoulomb
) )
1659 *ntprs
= max(*ntprs
, 2);
1662 /* If both rmin, rmax are set, we need 3 tpr files at minimum */
1663 if (!is_equal(*rmax
, rcoulomb
) && !is_equal(*rmin
, rcoulomb
) )
1665 *ntprs
= max(*ntprs
, 3);
1670 fprintf(stderr
, "NOTE: Your rmin, rmax setting changed -ntpr to %d\n", *ntprs
);
1675 if (is_equal(*rmin
, rcoulomb
) && is_equal(rcoulomb
, *rmax
)) /* We have just a single rc */
1677 fprintf(stderr
, "WARNING: Resetting -ntpr to 1 since no Coulomb radius scaling is requested.\n"
1678 "Please set rmin < rmax to test Coulomb radii in the [rmin, rmax] interval\n"
1679 "with correspondingly adjusted PME grid settings\n");
1684 /* Check whether max and min fraction are within required values */
1685 if (maxPMEfraction
> 0.5 || maxPMEfraction
< 0)
1687 gmx_fatal(FARGS
, "-max must be between 0 and 0.5");
1689 if (minPMEfraction
> 0.5 || minPMEfraction
< 0)
1691 gmx_fatal(FARGS
, "-min must be between 0 and 0.5");
1693 if (maxPMEfraction
< minPMEfraction
)
1695 gmx_fatal(FARGS
, "-max must be larger or equal to -min");
1698 /* Check whether the number of steps - if it was set - has a reasonable value */
1699 if (bench_nsteps
< 0)
1701 gmx_fatal(FARGS
, "Number of steps must be positive.");
1704 if (bench_nsteps
> 10000 || bench_nsteps
< 100)
1706 fprintf(stderr
, "WARNING: steps=");
1707 fprintf(stderr
, "%"GMX_PRId64
, bench_nsteps
);
1708 fprintf(stderr
, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps
< 100) ? "few" : "many");
1713 gmx_fatal(FARGS
, "Cannot have a negative number of presteps.\n");
1716 /* Check for rcoulomb scaling if more than one .tpr file is tested */
1719 if (*rmin
/rcoulomb
< 0.75 || *rmax
/rcoulomb
> 1.25)
1721 fprintf(stderr
, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1725 /* If a fixed number of PME nodes is set we do rcoulomb and PME gird tuning
1726 * only. We need to check whether the requested number of PME-only nodes
1728 if (npme_fixed
> -1)
1730 /* No more than 50% of all nodes can be assigned as PME-only nodes. */
1731 if (2*npme_fixed
> nnodes
)
1733 gmx_fatal(FARGS
, "Cannot have more than %d PME-only nodes for a total of %d nodes (you chose %d).\n",
1734 nnodes
/2, nnodes
, npme_fixed
);
1736 if ((npme_fixed
> 0) && (5*npme_fixed
< nnodes
))
1738 fprintf(stderr
, "WARNING: Only %g percent of the nodes are assigned as PME-only nodes.\n",
1739 100.0*((real
)npme_fixed
/ (real
)nnodes
));
1741 if (opt2parg_bSet("-min", npargs
, pa
) || opt2parg_bSet("-max", npargs
, pa
))
1743 fprintf(stderr
, "NOTE: The -min, -max, and -npme options have no effect when a\n"
1744 " fixed number of PME-only nodes is requested with -fix.\n");
1750 /* Returns TRUE when "opt" is needed at launch time */
1751 static gmx_bool
is_launch_file(char *opt
, gmx_bool bSet
)
1753 /* Apart from the input .tpr and the output log files we need all options that
1754 * were set on the command line and that do not start with -b */
1755 if (0 == strncmp(opt
, "-b", 2) || 0 == strncmp(opt
, "-s", 2)
1756 || 0 == strncmp(opt
, "-err", 4) || 0 == strncmp(opt
, "-p", 2) )
1765 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1766 static gmx_bool
is_bench_file(char *opt
, gmx_bool bSet
, gmx_bool bOptional
, gmx_bool bIsOutput
)
1768 /* Apart from the input .tpr, all files starting with "-b" are for
1769 * _b_enchmark files exclusively */
1770 if (0 == strncmp(opt
, "-s", 2))
1775 if (0 == strncmp(opt
, "-b", 2) || 0 == strncmp(opt
, "-s", 2))
1777 if (!bOptional
|| bSet
)
1794 if (bSet
) /* These are additional input files like -cpi -ei */
1807 /* Adds 'buf' to 'str' */
1808 static void add_to_string(char **str
, char *buf
)
1813 len
= strlen(*str
) + strlen(buf
) + 1;
1819 /* Create the command line for the benchmark as well as for the real run */
1820 static void create_command_line_snippets(
1821 gmx_bool bAppendFiles
,
1822 gmx_bool bKeepAndNumCPT
,
1823 gmx_bool bResetHWay
,
1827 char *cmd_args_bench
[], /* command line arguments for benchmark runs */
1828 char *cmd_args_launch
[], /* command line arguments for simulation run */
1829 char extra_args
[]) /* Add this to the end of the command line */
1834 char strbuf
[STRLEN
];
1837 /* strlen needs at least '\0' as a string: */
1838 snew(*cmd_args_bench
, 1);
1839 snew(*cmd_args_launch
, 1);
1840 *cmd_args_launch
[0] = '\0';
1841 *cmd_args_bench
[0] = '\0';
1844 /*******************************************/
1845 /* 1. Process other command line arguments */
1846 /*******************************************/
1849 /* Add equilibration steps to benchmark options */
1850 sprintf(strbuf
, "-resetstep %d ", presteps
);
1851 add_to_string(cmd_args_bench
, strbuf
);
1853 /* These switches take effect only at launch time */
1854 if (FALSE
== bAppendFiles
)
1856 add_to_string(cmd_args_launch
, "-noappend ");
1860 add_to_string(cmd_args_launch
, "-cpnum ");
1864 add_to_string(cmd_args_launch
, "-resethway ");
1867 /********************/
1868 /* 2. Process files */
1869 /********************/
1870 for (i
= 0; i
< nfile
; i
++)
1872 opt
= (char *)fnm
[i
].opt
;
1873 name
= opt2fn(opt
, nfile
, fnm
);
1875 /* Strbuf contains the options, now let's sort out where we need that */
1876 sprintf(strbuf
, "%s %s ", opt
, name
);
1878 if (is_bench_file(opt
, opt2bSet(opt
, nfile
, fnm
), is_optional(&fnm
[i
]), is_output(&fnm
[i
])) )
1880 /* All options starting with -b* need the 'b' removed,
1881 * therefore overwrite strbuf */
1882 if (0 == strncmp(opt
, "-b", 2))
1884 sprintf(strbuf
, "-%s %s ", &opt
[2], name
);
1887 add_to_string(cmd_args_bench
, strbuf
);
1890 if (is_launch_file(opt
, opt2bSet(opt
, nfile
, fnm
)) )
1892 add_to_string(cmd_args_launch
, strbuf
);
1896 add_to_string(cmd_args_bench
, extra_args
);
1897 add_to_string(cmd_args_launch
, extra_args
);
1901 /* Set option opt */
1902 static void setopt(const char *opt
, int nfile
, t_filenm fnm
[])
1906 for (i
= 0; (i
< nfile
); i
++)
1908 if (strcmp(opt
, fnm
[i
].opt
) == 0)
1910 fnm
[i
].flag
|= ffSET
;
1916 /* This routine inspects the tpr file and ...
1917 * 1. checks for output files that get triggered by a tpr option. These output
1918 * files are marked as 'set' to allow for a proper cleanup after each
1920 * 2. returns the PME:PP load ratio
1921 * 3. returns rcoulomb from the tpr */
1922 static float inspect_tpr(int nfile
, t_filenm fnm
[], real
*rcoulomb
)
1924 gmx_bool bPull
; /* Is pulling requested in .tpr file? */
1925 gmx_bool bTpi
; /* Is test particle insertion requested? */
1926 gmx_bool bFree
; /* Is a free energy simulation requested? */
1927 gmx_bool bNM
; /* Is a normal mode analysis requested? */
1933 /* Check tpr file for options that trigger extra output files */
1934 read_tpx_state(opt2fn("-s", nfile
, fnm
), &ir
, &state
, NULL
, &mtop
);
1935 bPull
= (epullNO
!= ir
.ePull
);
1936 bFree
= (efepNO
!= ir
.efep
);
1937 bNM
= (eiNM
== ir
.eI
);
1938 bTpi
= EI_TPI(ir
.eI
);
1940 /* Set these output files on the tuning command-line */
1943 setopt("-pf", nfile
, fnm
);
1944 setopt("-px", nfile
, fnm
);
1948 setopt("-dhdl", nfile
, fnm
);
1952 setopt("-tpi", nfile
, fnm
);
1953 setopt("-tpid", nfile
, fnm
);
1957 setopt("-mtx", nfile
, fnm
);
1960 *rcoulomb
= ir
.rcoulomb
;
1962 /* Return the estimate for the number of PME nodes */
1963 return pme_load_estimate(&mtop
, &ir
, state
.box
);
1967 static void couple_files_options(int nfile
, t_filenm fnm
[])
1970 gmx_bool bSet
, bBench
;
1975 for (i
= 0; i
< nfile
; i
++)
1977 opt
= (char *)fnm
[i
].opt
;
1978 bSet
= ((fnm
[i
].flag
& ffSET
) != 0);
1979 bBench
= (0 == strncmp(opt
, "-b", 2));
1981 /* Check optional files */
1982 /* If e.g. -eo is set, then -beo also needs to be set */
1983 if (is_optional(&fnm
[i
]) && bSet
&& !bBench
)
1985 sprintf(buf
, "-b%s", &opt
[1]);
1986 setopt(buf
, nfile
, fnm
);
1988 /* If -beo is set, then -eo also needs to be! */
1989 if (is_optional(&fnm
[i
]) && bSet
&& bBench
)
1991 sprintf(buf
, "-%s", &opt
[2]);
1992 setopt(buf
, nfile
, fnm
);
1998 #define BENCHSTEPS (1000)
2000 int gmx_tune_pme(int argc
, char *argv
[])
2002 const char *desc
[] = {
2003 "For a given number [TT]-np[tt] or [TT]-ntmpi[tt] of processors/threads, [THISMODULE] systematically",
2004 "times [gmx-mdrun] with various numbers of PME-only nodes and determines",
2005 "which setting is fastest. It will also test whether performance can",
2006 "be enhanced by shifting load from the reciprocal to the real space",
2007 "part of the Ewald sum. ",
2008 "Simply pass your [TT].tpr[tt] file to [THISMODULE] together with other options",
2009 "for [gmx-mdrun] as needed.[PAR]",
2010 "Which executables are used can be set in the environment variables",
2011 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
2012 "will be used as defaults. Note that for certain MPI frameworks you",
2013 "need to provide a machine- or hostfile. This can also be passed",
2014 "via the MPIRUN variable, e.g.[PAR]",
2015 "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
2016 "Please call [THISMODULE] with the normal options you would pass to",
2017 "[gmx-mdrun] and add [TT]-np[tt] for the number of processors to perform the",
2018 "tests on, or [TT]-ntmpi[tt] for the number of threads. You can also add [TT]-r[tt]",
2019 "to repeat each test several times to get better statistics. [PAR]",
2020 "[THISMODULE] can test various real space / reciprocal space workloads",
2021 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
2022 "written with enlarged cutoffs and smaller Fourier grids respectively.",
2023 "Typically, the first test (number 0) will be with the settings from the input",
2024 "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff",
2025 "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ",
2026 "In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ",
2027 "The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier "
2028 "spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
2029 "if you just seek the optimal number of PME-only nodes; in that case",
2030 "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
2031 "For the benchmark runs, the default of 1000 time steps should suffice for most",
2032 "MD systems. The dynamic load balancing needs about 100 time steps",
2033 "to adapt to local load imbalances, therefore the time step counters",
2034 "are by default reset after 100 steps. For large systems (>1M atoms), as well as ",
2035 "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
2036 "From the 'DD' load imbalance entries in the md.log output file you",
2037 "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
2038 "[TT]gmx tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
2039 "After calling [gmx-mdrun] several times, detailed performance information",
2040 "is available in the output file [TT]perf.out[tt].",
2041 "[BB]Note[bb] that during the benchmarks, a couple of temporary files are written",
2042 "(options [TT]-b[tt]*), these will be automatically deleted after each test.[PAR]",
2043 "If you want the simulation to be started automatically with the",
2044 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
2049 int pmeentries
= 0; /* How many values for -npme do we actually test for each tpr file */
2050 real maxPMEfraction
= 0.50;
2051 real minPMEfraction
= 0.25;
2052 int maxPMEnodes
, minPMEnodes
;
2053 float guessPMEratio
; /* guessed PME:PP ratio based on the tpr file */
2054 float guessPMEnodes
;
2055 int npme_fixed
= -2; /* If >= -1, use only this number
2056 * of PME-only nodes */
2058 real rmin
= 0.0, rmax
= 0.0; /* min and max value for rcoulomb if scaling is requested */
2059 real rcoulomb
= -1.0; /* Coulomb radius as set in .tpr file */
2060 gmx_bool bScaleRvdw
= TRUE
;
2061 gmx_int64_t bench_nsteps
= BENCHSTEPS
;
2062 gmx_int64_t new_sim_nsteps
= -1; /* -1 indicates: not set by the user */
2063 gmx_int64_t cpt_steps
= 0; /* Step counter in .cpt input file */
2064 int presteps
= 100; /* Do a full cycle reset after presteps steps */
2065 gmx_bool bOverwrite
= FALSE
, bKeepTPR
;
2066 gmx_bool bLaunch
= FALSE
;
2067 char *ExtraArgs
= NULL
;
2068 char **tpr_names
= NULL
;
2069 const char *simulation_tpr
= NULL
;
2070 int best_npme
, best_tpr
;
2071 int sim_part
= 1; /* For benchmarks with checkpoint files */
2074 /* Default program names if nothing else is found */
2075 char *cmd_mpirun
= NULL
, *cmd_mdrun
= NULL
;
2076 char *cmd_args_bench
, *cmd_args_launch
;
2077 char *cmd_np
= NULL
;
2079 t_perf
**perfdata
= NULL
;
2085 /* Print out how long the tuning took */
2088 static t_filenm fnm
[] = {
2090 { efOUT
, "-p", "perf", ffWRITE
},
2091 { efLOG
, "-err", "bencherr", ffWRITE
},
2092 { efTPX
, "-so", "tuned", ffWRITE
},
2094 { efTPX
, NULL
, NULL
, ffREAD
},
2095 { efTRN
, "-o", NULL
, ffWRITE
},
2096 { efCOMPRESSED
, "-x", NULL
, ffOPTWR
},
2097 { efCPT
, "-cpi", NULL
, ffOPTRD
},
2098 { efCPT
, "-cpo", NULL
, ffOPTWR
},
2099 { efSTO
, "-c", "confout", ffWRITE
},
2100 { efEDR
, "-e", "ener", ffWRITE
},
2101 { efLOG
, "-g", "md", ffWRITE
},
2102 { efXVG
, "-dhdl", "dhdl", ffOPTWR
},
2103 { efXVG
, "-field", "field", ffOPTWR
},
2104 { efXVG
, "-table", "table", ffOPTRD
},
2105 { efXVG
, "-tabletf", "tabletf", ffOPTRD
},
2106 { efXVG
, "-tablep", "tablep", ffOPTRD
},
2107 { efXVG
, "-tableb", "table", ffOPTRD
},
2108 { efTRX
, "-rerun", "rerun", ffOPTRD
},
2109 { efXVG
, "-tpi", "tpi", ffOPTWR
},
2110 { efXVG
, "-tpid", "tpidist", ffOPTWR
},
2111 { efEDI
, "-ei", "sam", ffOPTRD
},
2112 { efXVG
, "-eo", "edsam", ffOPTWR
},
2113 { efXVG
, "-devout", "deviatie", ffOPTWR
},
2114 { efXVG
, "-runav", "runaver", ffOPTWR
},
2115 { efXVG
, "-px", "pullx", ffOPTWR
},
2116 { efXVG
, "-pf", "pullf", ffOPTWR
},
2117 { efXVG
, "-ro", "rotation", ffOPTWR
},
2118 { efLOG
, "-ra", "rotangles", ffOPTWR
},
2119 { efLOG
, "-rs", "rotslabs", ffOPTWR
},
2120 { efLOG
, "-rt", "rottorque", ffOPTWR
},
2121 { efMTX
, "-mtx", "nm", ffOPTWR
},
2122 { efNDX
, "-dn", "dipole", ffOPTWR
},
2123 /* Output files that are deleted after each benchmark run */
2124 { efTRN
, "-bo", "bench", ffWRITE
},
2125 { efXTC
, "-bx", "bench", ffWRITE
},
2126 { efCPT
, "-bcpo", "bench", ffWRITE
},
2127 { efSTO
, "-bc", "bench", ffWRITE
},
2128 { efEDR
, "-be", "bench", ffWRITE
},
2129 { efLOG
, "-bg", "bench", ffWRITE
},
2130 { efXVG
, "-beo", "benchedo", ffOPTWR
},
2131 { efXVG
, "-bdhdl", "benchdhdl", ffOPTWR
},
2132 { efXVG
, "-bfield", "benchfld", ffOPTWR
},
2133 { efXVG
, "-btpi", "benchtpi", ffOPTWR
},
2134 { efXVG
, "-btpid", "benchtpid", ffOPTWR
},
2135 { efXVG
, "-bdevout", "benchdev", ffOPTWR
},
2136 { efXVG
, "-brunav", "benchrnav", ffOPTWR
},
2137 { efXVG
, "-bpx", "benchpx", ffOPTWR
},
2138 { efXVG
, "-bpf", "benchpf", ffOPTWR
},
2139 { efXVG
, "-bro", "benchrot", ffOPTWR
},
2140 { efLOG
, "-bra", "benchrota", ffOPTWR
},
2141 { efLOG
, "-brs", "benchrots", ffOPTWR
},
2142 { efLOG
, "-brt", "benchrott", ffOPTWR
},
2143 { efMTX
, "-bmtx", "benchn", ffOPTWR
},
2144 { efNDX
, "-bdn", "bench", ffOPTWR
}
2147 gmx_bool bThreads
= FALSE
;
2151 const char *procstring
[] =
2152 { NULL
, "-np", "-n", "none", NULL
};
2153 const char *npmevalues_opt
[] =
2154 { NULL
, "auto", "all", "subset", NULL
};
2156 gmx_bool bAppendFiles
= TRUE
;
2157 gmx_bool bKeepAndNumCPT
= FALSE
;
2158 gmx_bool bResetCountersHalfWay
= FALSE
;
2159 gmx_bool bBenchmark
= TRUE
;
2161 output_env_t oenv
= NULL
;
2164 /***********************/
2165 /* g_tune_pme options: */
2166 /***********************/
2167 { "-np", FALSE
, etINT
, {&nnodes
},
2168 "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
2169 { "-npstring", FALSE
, etENUM
, {procstring
},
2170 "Specify the number of processors to [TT]$MPIRUN[tt] using this string"},
2171 { "-ntmpi", FALSE
, etINT
, {&nthreads
},
2172 "Number of MPI-threads to run the tests on (turns MPI & mpirun off)"},
2173 { "-r", FALSE
, etINT
, {&repeats
},
2174 "Repeat each test this often" },
2175 { "-max", FALSE
, etREAL
, {&maxPMEfraction
},
2176 "Max fraction of PME nodes to test with" },
2177 { "-min", FALSE
, etREAL
, {&minPMEfraction
},
2178 "Min fraction of PME nodes to test with" },
2179 { "-npme", FALSE
, etENUM
, {npmevalues_opt
},
2180 "Within -min and -max, benchmark all possible values for [TT]-npme[tt], or just a reasonable subset. "
2181 "Auto neglects -min and -max and chooses reasonable values around a guess for npme derived from the .tpr"},
2182 { "-fix", FALSE
, etINT
, {&npme_fixed
},
2183 "If >= -1, do not vary the number of PME-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
2184 { "-rmax", FALSE
, etREAL
, {&rmax
},
2185 "If >0, maximal rcoulomb for -ntpr>1 (rcoulomb upscaling results in fourier grid downscaling)" },
2186 { "-rmin", FALSE
, etREAL
, {&rmin
},
2187 "If >0, minimal rcoulomb for -ntpr>1" },
2188 { "-scalevdw", FALSE
, etBOOL
, {&bScaleRvdw
},
2189 "Scale rvdw along with rcoulomb"},
2190 { "-ntpr", FALSE
, etINT
, {&ntprs
},
2191 "Number of [TT].tpr[tt] files to benchmark. Create this many files with different rcoulomb scaling factors depending on -rmin and -rmax. "
2192 "If < 1, automatically choose the number of [TT].tpr[tt] files to test" },
2193 { "-steps", FALSE
, etINT64
, {&bench_nsteps
},
2194 "Take timings for this many steps in the benchmark runs" },
2195 { "-resetstep", FALSE
, etINT
, {&presteps
},
2196 "Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)" },
2197 { "-simsteps", FALSE
, etINT64
, {&new_sim_nsteps
},
2198 "If non-negative, perform this many steps in the real run (overwrites nsteps from [TT].tpr[tt], add [TT].cpt[tt] steps)" },
2199 { "-launch", FALSE
, etBOOL
, {&bLaunch
},
2200 "Launch the real simulation after optimization" },
2201 { "-bench", FALSE
, etBOOL
, {&bBenchmark
},
2202 "Run the benchmarks or just create the input [TT].tpr[tt] files?" },
2203 /******************/
2204 /* mdrun options: */
2205 /******************/
2206 /* We let g_tune_pme parse and understand these options, because we need to
2207 * prevent that they appear on the mdrun command line for the benchmarks */
2208 { "-append", FALSE
, etBOOL
, {&bAppendFiles
},
2209 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
2210 { "-cpnum", FALSE
, etBOOL
, {&bKeepAndNumCPT
},
2211 "Keep and number checkpoint files (launch only)" },
2212 { "-resethway", FALSE
, etBOOL
, {&bResetCountersHalfWay
},
2213 "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
2216 #define NFILE asize(fnm)
2218 seconds
= gmx_gettime();
2220 if (!parse_common_args(&argc
, argv
, PCA_NOEXIT_ON_ARGS
,
2221 NFILE
, fnm
, asize(pa
), pa
, asize(desc
), desc
,
2227 /* Store the remaining unparsed command line entries in a string which
2228 * is then attached to the mdrun command line */
2230 ExtraArgs
[0] = '\0';
2231 for (i
= 1; i
< argc
; i
++) /* argc will now be 1 if everything was understood */
2233 add_to_string(&ExtraArgs
, argv
[i
]);
2234 add_to_string(&ExtraArgs
, " ");
2237 if (opt2parg_bSet("-ntmpi", asize(pa
), pa
))
2240 if (opt2parg_bSet("-npstring", asize(pa
), pa
))
2242 fprintf(stderr
, "WARNING: -npstring has no effect when using threads.\n");
2247 gmx_fatal(FARGS
, "Can't run multi-threaded MPI simulation yet!");
2249 /* and now we just set this; a bit of an ugly hack*/
2252 /* Check for PME:PP ratio and whether tpr triggers additional output files */
2253 guessPMEratio
= inspect_tpr(NFILE
, fnm
, &rcoulomb
);
2255 /* Automatically set -beo options if -eo is set etc. */
2256 couple_files_options(NFILE
, fnm
);
2258 /* Construct the command line arguments for benchmark runs
2259 * as well as for the simulation run */
2262 sprintf(bbuf
, " -ntmpi %d ", nthreads
);
2266 /* This string will be used for MPI runs and will appear after the
2267 * mpirun command. */
2268 if (strcmp(procstring
[0], "none") != 0)
2270 sprintf(bbuf
, " %s %d ", procstring
[0], nnodes
);
2280 create_command_line_snippets(bAppendFiles
, bKeepAndNumCPT
, bResetCountersHalfWay
, presteps
,
2281 NFILE
, fnm
, &cmd_args_bench
, &cmd_args_launch
, ExtraArgs
);
2283 /* Read in checkpoint file if requested */
2285 if (opt2bSet("-cpi", NFILE
, fnm
))
2288 cr
->duty
= DUTY_PP
; /* makes the following routine happy */
2289 read_checkpoint_simulation_part(opt2fn("-cpi", NFILE
, fnm
),
2290 &sim_part
, &cpt_steps
, cr
,
2291 FALSE
, NFILE
, fnm
, NULL
, NULL
);
2294 /* sim_part will now be 1 if no checkpoint file was found */
2297 gmx_fatal(FARGS
, "Checkpoint file %s not found!", opt2fn("-cpi", NFILE
, fnm
));
2301 /* Open performance output file and write header info */
2302 fp
= ffopen(opt2fn("-p", NFILE
, fnm
), "w");
2304 /* Make a quick consistency check of command line parameters */
2305 check_input(nnodes
, repeats
, &ntprs
, &rmin
, rcoulomb
, &rmax
,
2306 maxPMEfraction
, minPMEfraction
, npme_fixed
,
2307 bench_nsteps
, fnm
, NFILE
, sim_part
, presteps
,
2310 /* Determine the maximum and minimum number of PME nodes to test,
2311 * the actual list of settings is build in do_the_tests(). */
2312 if ((nnodes
> 2) && (npme_fixed
< -1))
2314 if (0 == strcmp(npmevalues_opt
[0], "auto"))
2316 /* Determine the npme range automatically based on the PME:PP load guess */
2317 if (guessPMEratio
> 1.0)
2319 /* More PME than PP work, probably we do not need separate PME nodes at all! */
2320 maxPMEnodes
= nnodes
/2;
2321 minPMEnodes
= nnodes
/2;
2325 /* PME : PP load is in the range 0..1, let's test around the guess */
2326 guessPMEnodes
= nnodes
/(1.0 + 1.0/guessPMEratio
);
2327 minPMEnodes
= floor(0.7*guessPMEnodes
);
2328 maxPMEnodes
= ceil(1.6*guessPMEnodes
);
2329 maxPMEnodes
= min(maxPMEnodes
, nnodes
/2);
2334 /* Determine the npme range based on user input */
2335 maxPMEnodes
= floor(maxPMEfraction
*nnodes
);
2336 minPMEnodes
= max(floor(minPMEfraction
*nnodes
), 0);
2337 fprintf(stdout
, "Will try runs with %d ", minPMEnodes
);
2338 if (maxPMEnodes
!= minPMEnodes
)
2340 fprintf(stdout
, "- %d ", maxPMEnodes
);
2342 fprintf(stdout
, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2351 /* Get the commands we need to set up the runs from environment variables */
2352 get_program_paths(bThreads
, &cmd_mpirun
, &cmd_mdrun
);
2353 if (bBenchmark
&& repeats
> 0)
2355 check_mdrun_works(bThreads
, cmd_mpirun
, cmd_np
, cmd_mdrun
);
2358 /* Print some header info to file */
2360 fprintf(fp
, "\n P E R F O R M A N C E R E S U L T S\n");
2362 fprintf(fp
, "%s for Gromacs %s\n", ShortProgram(), GromacsVersion());
2365 fprintf(fp
, "Number of nodes : %d\n", nnodes
);
2366 fprintf(fp
, "The mpirun command is : %s\n", cmd_mpirun
);
2367 if (strcmp(procstring
[0], "none") != 0)
2369 fprintf(fp
, "Passing # of nodes via : %s\n", procstring
[0]);
2373 fprintf(fp
, "Not setting number of nodes in system call\n");
2378 fprintf(fp
, "Number of threads : %d\n", nnodes
);
2381 fprintf(fp
, "The mdrun command is : %s\n", cmd_mdrun
);
2382 fprintf(fp
, "mdrun args benchmarks : %s\n", cmd_args_bench
);
2383 fprintf(fp
, "Benchmark steps : ");
2384 fprintf(fp
, "%"GMX_PRId64
, bench_nsteps
);
2386 fprintf(fp
, "dlb equilibration steps : %d\n", presteps
);
2389 fprintf(fp
, "Checkpoint time step : ");
2390 fprintf(fp
, "%"GMX_PRId64
, cpt_steps
);
2393 fprintf(fp
, "mdrun args at launchtime: %s\n", cmd_args_launch
);
2395 if (new_sim_nsteps
>= 0)
2398 fprintf(stderr
, "Note: Simulation input file %s will have ", opt2fn("-so", NFILE
, fnm
));
2399 fprintf(stderr
, "%"GMX_PRId64
, new_sim_nsteps
+cpt_steps
);
2400 fprintf(stderr
, " steps.\n");
2401 fprintf(fp
, "Simulation steps : ");
2402 fprintf(fp
, "%"GMX_PRId64
, new_sim_nsteps
);
2407 fprintf(fp
, "Repeats for each test : %d\n", repeats
);
2410 if (npme_fixed
>= -1)
2412 fprintf(fp
, "Fixing -npme at : %d\n", npme_fixed
);
2415 fprintf(fp
, "Input file : %s\n", opt2fn("-s", NFILE
, fnm
));
2416 fprintf(fp
, " PME/PP load estimate : %g\n", guessPMEratio
);
2418 /* Allocate memory for the inputinfo struct: */
2420 info
->nr_inputfiles
= ntprs
;
2421 for (i
= 0; i
< ntprs
; i
++)
2423 snew(info
->rcoulomb
, ntprs
);
2424 snew(info
->rvdw
, ntprs
);
2425 snew(info
->rlist
, ntprs
);
2426 snew(info
->rlistlong
, ntprs
);
2427 snew(info
->nkx
, ntprs
);
2428 snew(info
->nky
, ntprs
);
2429 snew(info
->nkz
, ntprs
);
2430 snew(info
->fsx
, ntprs
);
2431 snew(info
->fsy
, ntprs
);
2432 snew(info
->fsz
, ntprs
);
2434 /* Make alternative tpr files to test: */
2435 snew(tpr_names
, ntprs
);
2436 for (i
= 0; i
< ntprs
; i
++)
2438 snew(tpr_names
[i
], STRLEN
);
2441 /* It can be that ntprs is reduced by make_benchmark_tprs if not enough
2442 * different grids could be found. */
2443 make_benchmark_tprs(opt2fn("-s", NFILE
, fnm
), tpr_names
, bench_nsteps
+presteps
,
2444 cpt_steps
, rmin
, rmax
, bScaleRvdw
, &ntprs
, info
, fp
);
2446 /********************************************************************************/
2447 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
2448 /********************************************************************************/
2449 snew(perfdata
, ntprs
);
2452 do_the_tests(fp
, tpr_names
, maxPMEnodes
, minPMEnodes
, npme_fixed
, npmevalues_opt
[0], perfdata
, &pmeentries
,
2453 repeats
, nnodes
, ntprs
, bThreads
, cmd_mpirun
, cmd_np
, cmd_mdrun
,
2454 cmd_args_bench
, fnm
, NFILE
, presteps
, cpt_steps
);
2456 fprintf(fp
, "\nTuning took%8.1f minutes.\n", (gmx_gettime()-seconds
)/60.0);
2458 /* Analyse the results and give a suggestion for optimal settings: */
2459 bKeepTPR
= analyze_data(fp
, opt2fn("-p", NFILE
, fnm
), perfdata
, nnodes
, ntprs
, pmeentries
,
2460 repeats
, info
, &best_tpr
, &best_npme
);
2462 /* Take the best-performing tpr file and enlarge nsteps to original value */
2463 if (bKeepTPR
&& !bOverwrite
)
2465 simulation_tpr
= opt2fn("-s", NFILE
, fnm
);
2469 simulation_tpr
= opt2fn("-so", NFILE
, fnm
);
2470 modify_PMEsettings(bOverwrite
? (new_sim_nsteps
+cpt_steps
) : info
->orig_sim_steps
,
2471 info
->orig_init_step
, tpr_names
[best_tpr
], simulation_tpr
);
2474 /* Let's get rid of the temporary benchmark input files */
2475 for (i
= 0; i
< ntprs
; i
++)
2477 fprintf(stdout
, "Deleting temporary benchmark input file %s\n", tpr_names
[i
]);
2478 remove(tpr_names
[i
]);
2481 /* Now start the real simulation if the user requested it ... */
2482 launch_simulation(bLaunch
, fp
, bThreads
, cmd_mpirun
, cmd_np
, cmd_mdrun
,
2483 cmd_args_launch
, simulation_tpr
, best_npme
);
2487 /* ... or simply print the performance results to screen: */
2490 finalize(opt2fn("-p", NFILE
, fnm
));