3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
9 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
10 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
11 * Copyright (c) 2001-2008, The GROMACS development team,
12 * check out http://www.gromacs.org for more information.
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * If you want to redistribute modifications, please consider that
20 * scientific software is very special. Version control is crucial -
21 * bugs must be traceable. We will be happy to consider code for
22 * inclusion in the official distribution, but derived work must not
23 * be called official GROMACS. Details are found in the README & COPYING
24 * files - if they are missing, get the official version at www.gromacs.org.
26 * To help us fund GROMACS development, we humbly ask that you cite
27 * the papers on the package - you can find them in the top README file.
29 * For more info, check our website at http://www.gromacs.org
32 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
44 #include "checkpoint.h"
51 ddnoSEL
, ddnoINTERLEAVE
, ddnoPP_PME
, ddnoCARTESIAN
, ddnoNR
54 /* Enum for situations that can occur during log file parsing, the
55 * corresponding string entries can be found in do_the_tests() in
56 * const char* ParseLog[] */
62 eParselogResetProblem
,
73 int nPMEnodes
; /* number of PME only nodes used in this test */
74 int nx
, ny
, nz
; /* DD grid */
75 int guessPME
; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
76 double *Gcycles
; /* This can contain more than one value if doing multiple tests */
80 float *PME_f_load
; /* PME mesh/force load average*/
81 float PME_f_load_Av
; /* Average average ;) ... */
82 char *mdrun_cmd_line
; /* Mdrun command line used for this test */
88 int nr_inputfiles
; /* The number of tpr and mdp input files */
89 gmx_large_int_t orig_sim_steps
; /* Number of steps to be done in the real simulation */
90 real
*r_coulomb
; /* The coulomb radii [0...nr_inputfiles] */
91 real
*r_vdw
; /* The vdW radii */
92 real
*rlist
; /* Neighbourlist cutoff radius */
94 int *fourier_nx
, *fourier_ny
, *fourier_nz
;
95 real
*fourier_sp
; /* Fourierspacing */
97 /* Original values as in inputfile: */
100 real orig_rlist
, orig_rlistlong
;
106 static void sep_line(FILE *fp
)
108 fprintf(fp
, "\n------------------------------------------------------------\n");
112 /* Wrapper for system calls */
113 static int gmx_system_call(char *command
)
116 gmx_fatal(FARGS
,"No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n",command
);
118 return ( system(command
) );
123 /* Check if string starts with substring */
124 static bool str_starts(const char *string
, const char *substring
)
126 return ( strncmp(string
, substring
, strlen(substring
)) == 0);
130 static void cleandata(t_perf
*perfdata
, int test_nr
)
132 perfdata
->Gcycles
[test_nr
] = 0.0;
133 perfdata
->ns_per_day
[test_nr
] = 0.0;
134 perfdata
->PME_f_load
[test_nr
] = 0.0;
140 static bool is_equal(real a
, real b
)
142 real diff
, eps
=1.0e-6;
147 if (diff
< 0.0) diff
= -diff
;
156 static void finalize(const char *fn_out
)
162 fp
= fopen(fn_out
,"r");
163 fprintf(stdout
,"\n\n");
165 while( fgets(buf
,STRLEN
-1,fp
) != NULL
)
167 fprintf(stdout
,"%s",buf
);
170 fprintf(stdout
,"\n\n");
175 enum {eFoundNothing
, eFoundDDStr
, eFoundAccountingStr
, eFoundCycleStr
};
177 static int parse_logfile(const char *logfile
, const char *errfile
,
178 t_perf
*perfdata
, int test_nr
, int presteps
, gmx_large_int_t cpt_steps
,
182 char line
[STRLEN
], dumstring
[STRLEN
], dumstring2
[STRLEN
];
183 const char matchstrdd
[]="Domain decomposition grid";
184 const char matchstrcr
[]="resetting all time and cycle counters";
185 const char matchstrbal
[]="Average PME mesh/force load:";
186 const char matchstring
[]="R E A L C Y C L E A N D T I M E A C C O U N T I N G";
187 const char errSIG
[]="signal, stopping at the next";
190 float dum1
,dum2
,dum3
;
192 gmx_large_int_t resetsteps
=-1;
193 bool bFoundResetStr
= FALSE
;
194 bool bResetChecked
= FALSE
;
197 if (!gmx_fexist(logfile
))
199 fprintf(stderr
, "WARNING: Could not find logfile %s.\n", logfile
);
200 cleandata(perfdata
, test_nr
);
201 return eParselogNotFound
;
204 fp
= fopen(logfile
, "r");
205 perfdata
->PME_f_load
[test_nr
] = -1.0;
206 perfdata
->guessPME
= -1;
208 iFound
= eFoundNothing
;
210 iFound
= eFoundDDStr
; /* Skip some case statements */
212 while (fgets(line
, STRLEN
, fp
) != NULL
)
214 /* Remove leading spaces */
217 /* Check for TERM and INT signals from user: */
218 if ( strstr(line
, errSIG
) != NULL
)
221 cleandata(perfdata
, test_nr
);
222 return eParselogTerm
;
225 /* Check whether cycle resetting worked */
226 if (presteps
> 0 && !bFoundResetStr
)
228 if (strstr(line
, matchstrcr
) != NULL
)
230 sprintf(dumstring
, "Step %s", gmx_large_int_pfmt
);
231 sscanf(line
, dumstring
, &resetsteps
);
232 bFoundResetStr
= TRUE
;
233 if (resetsteps
== presteps
+cpt_steps
)
235 bResetChecked
= TRUE
;
239 sprintf(dumstring
, gmx_large_int_pfmt
, resetsteps
);
240 sprintf(dumstring2
, gmx_large_int_pfmt
, presteps
+cpt_steps
);
241 fprintf(stderr
, "WARNING: Time step counters were reset at step %s,\n"
242 " though they were supposed to be reset at step %s!\n",
243 dumstring
, dumstring2
);
248 /* Look for strings that appear in a certain order in the log file: */
252 /* Look for domain decomp grid and separate PME nodes: */
253 if (str_starts(line
, matchstrdd
))
255 sscanf(line
, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
256 &(perfdata
->nx
), &(perfdata
->ny
), &(perfdata
->nz
), &npme
);
257 if (perfdata
->nPMEnodes
== -1)
258 perfdata
->guessPME
= npme
;
259 else if (perfdata
->nPMEnodes
!= npme
)
260 gmx_fatal(FARGS
, "PME nodes from command line and output file are not identical");
261 iFound
= eFoundDDStr
;
263 /* Catch a few errors that might have occured: */
264 else if (str_starts(line
, "There is no domain decomposition for"))
266 return eParselogNoDDGrid
;
268 else if (str_starts(line
, "reading tpx file"))
270 return eParselogTPXVersion
;
272 else if (str_starts(line
, "The -dd or -npme option request a parallel simulation"))
274 return eParselogNotParallel
;
278 /* Look for PME mesh/force balance (not necessarily present, though) */
279 if (str_starts(line
, matchstrbal
))
280 sscanf(&line
[strlen(matchstrbal
)], "%f", &(perfdata
->PME_f_load
[test_nr
]));
281 /* Look for matchstring */
282 if (str_starts(line
, matchstring
))
283 iFound
= eFoundAccountingStr
;
285 case eFoundAccountingStr
:
286 /* Already found matchstring - look for cycle data */
287 if (str_starts(line
, "Total "))
289 sscanf(line
,"Total %d %lf",&procs
,&(perfdata
->Gcycles
[test_nr
]));
290 iFound
= eFoundCycleStr
;
294 /* Already found cycle data - look for remaining performance info and return */
295 if (str_starts(line
, "Performance:"))
297 sscanf(line
,"%s %f %f %f %f", dumstring
, &dum1
, &dum2
, &(perfdata
->ns_per_day
[test_nr
]), &dum3
);
299 if (bResetChecked
|| presteps
== 0)
302 return eParselogResetProblem
;
308 /* Check why there is no performance data in the log file.
309 * Did a fatal errors occur? */
310 if (gmx_fexist(errfile
))
312 fp
= fopen(errfile
, "r");
313 while (fgets(line
, STRLEN
, fp
) != NULL
)
315 if ( str_starts(line
, "Fatal error:") )
317 if (fgets(line
, STRLEN
, fp
) != NULL
)
318 fprintf(stderr
, "\nWARNING: A fatal error has occured during this benchmark:\n"
321 cleandata(perfdata
, test_nr
);
322 return eParselogFatal
;
329 fprintf(stderr
, "WARNING: Could not find stderr file %s.\n", errfile
);
332 /* Giving up ... we could not find out why there is no performance data in
334 fprintf(stdout
, "No performance data in log file.\n");
336 cleandata(perfdata
, test_nr
);
338 return eParselogNoPerfData
;
342 static bool analyze_data(
351 int *index_tpr
, /* OUT: Nr of mdp file with best settings */
352 int *npme_optimal
) /* OUT: Optimal number of PME nodes */
355 int line
=0, line_win
=-1;
356 int k_win
=-1, i_win
=-1, winPME
;
357 double s
=0.0; /* standard deviation */
360 char str_PME_f_load
[13];
367 fprintf(fp
, "Summary of successful runs:\n");
368 fprintf(fp
, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
370 fprintf(fp
, " DD grid");
375 for (k
=0; k
<ntprs
; k
++)
377 for (i
=0; i
<ntests
; i
++)
379 /* Select the right dataset: */
380 pd
= &(perfdata
[k
][i
]);
382 pd
->Gcycles_Av
= 0.0;
383 pd
->PME_f_load_Av
= 0.0;
384 pd
->ns_per_day_Av
= 0.0;
386 if (pd
->nPMEnodes
== -1)
387 sprintf(strbuf
, "(%3d)", pd
->guessPME
);
389 sprintf(strbuf
, " ");
391 /* Get the average run time of a setting */
392 for (j
=0; j
<nrepeats
; j
++)
394 pd
->Gcycles_Av
+= pd
->Gcycles
[j
];
395 pd
->PME_f_load_Av
+= pd
->PME_f_load
[j
];
397 pd
->Gcycles_Av
/= nrepeats
;
398 pd
->PME_f_load_Av
/= nrepeats
;
400 for (j
=0; j
<nrepeats
; j
++)
402 if (pd
->ns_per_day
[j
] > 0.0)
403 pd
->ns_per_day_Av
+= pd
->ns_per_day
[j
];
406 /* Somehow the performance number was not aquired for this run,
407 * therefor set the average to some negative value: */
408 pd
->ns_per_day_Av
= -1.0f
*nrepeats
;
412 pd
->ns_per_day_Av
/= nrepeats
;
415 if (pd
->PME_f_load_Av
> 0.0)
416 sprintf(str_PME_f_load
, "%12.3f", pd
->PME_f_load_Av
);
418 sprintf(str_PME_f_load
, "%s", " - ");
421 /* We assume we had a successful run if both averages are positive */
422 if (pd
->Gcycles_Av
> 0.0 && pd
->ns_per_day_Av
> 0.0)
424 /* Output statistics if repeats were done */
427 /* Calculate the standard deviation */
429 for (j
=0; j
<nrepeats
; j
++)
430 s
+= pow( pd
->Gcycles
[j
] - pd
->Gcycles_Av
, 2 );
434 fprintf(fp
, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
435 line
, k
, pd
->nPMEnodes
, strbuf
, pd
->Gcycles_Av
, s
,
436 pd
->ns_per_day_Av
, str_PME_f_load
);
438 fprintf(fp
, " %3d %3d %3d", pd
->nx
, pd
->ny
, pd
->nz
);
441 /* Store the index of the best run found so far in 'winner': */
442 if ( (k_win
== -1) || (pd
->Gcycles_Av
< perfdata
[k_win
][i_win
].Gcycles_Av
) )
454 gmx_fatal(FARGS
, "None of the runs was successful! Check %s for problems.", fn
);
458 winPME
= perfdata
[k_win
][i_win
].nPMEnodes
;
460 sprintf(strbuf
, "%s", "the automatic number of");
462 sprintf(strbuf
, "%d", winPME
);
463 fprintf(fp
, "Best performance was achieved with %s PME nodes", strbuf
);
465 fprintf(fp
, " (see line %d)", line_win
);
468 /* Only mention settings if they were modified: */
469 bCanUseOrigTPR
= TRUE
;
470 if ( !is_equal(info
->r_coulomb
[k_win
], info
->orig_rcoulomb
) )
472 fprintf(fp
, "Optimized PME settings:\n"
473 " New Coulomb radius: %f nm (was %f nm)\n",
474 info
->r_coulomb
[k_win
], info
->orig_rcoulomb
);
475 bCanUseOrigTPR
= FALSE
;
478 if ( !is_equal(info
->r_vdw
[k_win
], info
->orig_rvdw
) )
480 fprintf(fp
, " New Van der Waals radius: %f nm (was %f nm)\n",
481 info
->r_vdw
[k_win
], info
->orig_rvdw
);
482 bCanUseOrigTPR
= FALSE
;
485 if ( ! (info
->fourier_nx
[k_win
]==info
->orig_nk
[XX
] &&
486 info
->fourier_ny
[k_win
]==info
->orig_nk
[YY
] &&
487 info
->fourier_nz
[k_win
]==info
->orig_nk
[ZZ
] ) )
489 fprintf(fp
, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n",
490 info
->fourier_nx
[k_win
], info
->fourier_ny
[k_win
], info
->fourier_nz
[k_win
],
491 info
->orig_nk
[XX
], info
->orig_nk
[YY
], info
->orig_nk
[ZZ
]);
492 bCanUseOrigTPR
= FALSE
;
494 if (bCanUseOrigTPR
&& ntprs
> 1)
495 fprintf(fp
, "and original PME settings.\n");
499 /* Return the index of the mdp file that showed the highest performance
500 * and the optimal number of PME nodes */
502 *npme_optimal
= winPME
;
504 return bCanUseOrigTPR
;
508 /* Get the commands we need to set up the runs from environment variables */
509 static void get_program_paths(bool bThreads
, char *cmd_mpirun
[], char cmd_np
[],
510 char *cmd_mdrun
[], int repeats
)
517 const char def_mpirun
[] = "mpirun";
518 const char def_mdrun
[] = "mdrun";
519 const char filename
[] = "benchtest.log";
520 const char match_mpi
[] = "NNODES=";
521 const char match_mdrun
[]= "Program: ";
522 const char empty_mpirun
[] = "";
527 /* Get the commands we need to set up the runs from environment variables */
530 if ( (cp
= getenv("MPIRUN")) != NULL
)
531 *cmd_mpirun
= strdup(cp
);
533 *cmd_mpirun
= strdup(def_mpirun
);
537 *cmd_mpirun
= strdup(empty_mpirun
);
540 if ( (cp
= getenv("MDRUN" )) != NULL
)
541 *cmd_mdrun
= strdup(cp
);
543 *cmd_mdrun
= strdup(def_mdrun
);
546 /* If no simulations have to be performed, we are done here */
550 /* Run a small test to see whether mpirun + mdrun work */
551 fprintf(stdout
, "Making sure that mdrun can be executed. ");
554 snew(command
, strlen(*cmd_mdrun
) + strlen(cmd_np
) + strlen(filename
) + 50);
555 sprintf(command
, "%s%s-version -maxh 0.001 1> %s 2>&1", *cmd_mdrun
, cmd_np
, filename
);
559 snew(command
, strlen(*cmd_mpirun
) + strlen(cmd_np
) + strlen(*cmd_mdrun
) + strlen(filename
) + 50);
560 sprintf(command
, "%s%s%s -version -maxh 0.001 1> %s 2>&1", *cmd_mpirun
, cmd_np
, *cmd_mdrun
, filename
);
562 fprintf(stdout
, "Trying '%s' ... ", command
);
563 make_backup(filename
);
564 gmx_system_call(command
);
566 /* Check if we find the characteristic string in the output: */
567 if (!gmx_fexist(filename
))
568 gmx_fatal(FARGS
, "Output from test run could not be found.");
570 fp
= fopen(filename
, "r");
571 /* We need to scan the whole output file, since sometimes the queuing system
572 * also writes stuff to stdout/err */
575 cp2
=fgets(line
, STRLEN
, fp
);
578 if ( str_starts(line
, match_mdrun
) )
580 if ( str_starts(line
, match_mpi
) )
590 gmx_fatal(FARGS
, "Need a threaded version of mdrun. This one\n"
592 "seems to have been compiled with MPI instead.",
600 gmx_fatal(FARGS
, "Need an MPI-enabled version of mdrun. This one\n"
602 "seems to have been compiled without MPI support.",
609 gmx_fatal(FARGS
, "Cannot execute mdrun. Please check %s for problems!",
613 fprintf(stdout
, "passed.\n");
621 static void launch_simulation(
622 bool bLaunch
, /* Should the simulation be launched? */
623 FILE *fp
, /* General log file */
624 bool bThreads
, /* whether to use threads */
625 char *cmd_mpirun
, /* Command for mpirun */
626 char *cmd_np
, /* Switch for -np or -nt or empty */
627 char *cmd_mdrun
, /* Command for mdrun */
628 char *args_for_mdrun
, /* Arguments for mdrun */
629 const char *simulation_tpr
, /* This tpr will be simulated */
630 int nnodes
, /* Number of nodes to run on */
631 int nPMEnodes
) /* Number of PME nodes to use */
636 /* Make enough space for the system call command,
637 * (100 extra chars for -npme ... etc. options should suffice): */
638 snew(command
, strlen(cmd_mpirun
)+strlen(cmd_mdrun
)+strlen(cmd_np
)+strlen(args_for_mdrun
)+strlen(simulation_tpr
)+100);
640 /* Note that the -passall options requires args_for_mdrun to be at the end
641 * of the command line string */
644 sprintf(command
, "%s%s-npme %d -s %s %s",
645 cmd_mdrun
, cmd_np
, nPMEnodes
, simulation_tpr
, args_for_mdrun
);
649 sprintf(command
, "%s%s%s -npme %d -s %s %s",
650 cmd_mpirun
, cmd_np
, cmd_mdrun
, nPMEnodes
, simulation_tpr
, args_for_mdrun
);
653 fprintf(fp
, "%s this command line to launch the simulation:\n\n%s", bLaunch
? "Using":"Please use", command
);
657 /* Now the real thing! */
660 fprintf(stdout
, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command
);
663 gmx_system_call(command
);
669 static void modify_PMEsettings(
670 gmx_large_int_t simsteps
, /* Set this value as number of time steps */
671 const char *fn_best_tpr
, /* tpr file with the best performance */
672 const char *fn_sim_tpr
) /* name of tpr file to be launched */
680 read_tpx_state(fn_best_tpr
,ir
,&state
,NULL
,&mtop
);
682 /* Set nsteps to the right value */
683 ir
->nsteps
= simsteps
;
685 /* Write the tpr file which will be launched */
686 sprintf(buf
, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr
, gmx_large_int_pfmt
);
687 fprintf(stdout
,buf
,ir
->nsteps
);
689 write_tpx_state(fn_sim_tpr
,ir
,&state
,&mtop
);
695 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
697 /* Make additional TPR files with more computational load for the
698 * direct space processors: */
699 static void make_benchmark_tprs(
700 const char *fn_sim_tpr
, /* READ : User-provided tpr file */
701 char *fn_bench_tprs
[], /* WRITE: Names of benchmark tpr files */
702 gmx_large_int_t benchsteps
, /* Number of time steps for benchmark runs */
703 gmx_large_int_t statesteps
, /* Step counter in checkpoint file */
704 real upfac
, /* Scale rcoulomb inbetween downfac and upfac */
706 int ntprs
, /* No. of TPRs to write, each with a different rcoulomb and fourierspacing */
707 real fourierspacing
, /* Basic fourierspacing from tpr input file */
708 t_inputinfo
*info
, /* Contains information about mdp file options */
709 FILE *fp
) /* Write the output here */
716 real nlist_buffer
; /* Thickness of the buffer regions for PME-switch potentials: */
722 sprintf(buf
, "Making benchmark tpr file%s with %s time steps", ntprs
>1? "s":"", gmx_large_int_pfmt
);
723 fprintf(stdout
, buf
, benchsteps
);
726 sprintf(buf
, " (adding %s steps from checkpoint file)", gmx_large_int_pfmt
);
727 fprintf(stdout
, buf
, statesteps
);
728 benchsteps
+= statesteps
;
730 fprintf(stdout
, ".\n");
734 read_tpx_state(fn_sim_tpr
,ir
,&state
,NULL
,&mtop
);
736 /* Check if some kind of PME was chosen */
737 if (EEL_PME(ir
->coulombtype
) == FALSE
)
738 gmx_fatal(FARGS
, "Can only do optimizations for simulations with %s electrostatics.",
741 /* Check if rcoulomb == rlist, which is necessary for plain PME. */
742 if ( (eelPME
== ir
->coulombtype
) && !(ir
->rcoulomb
== ir
->rlist
) )
744 gmx_fatal(FARGS
, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
745 EELTYPE(eelPME
), ir
->rcoulomb
, ir
->rlist
);
747 /* For other PME types, rcoulomb is allowed to be smaller than rlist */
748 else if (ir
->rcoulomb
> ir
->rlist
)
750 gmx_fatal(FARGS
, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
751 EELTYPE(ir
->coulombtype
), ir
->rcoulomb
, ir
->rlist
);
754 /* Reduce the number of steps for the benchmarks */
755 info
->orig_sim_steps
= ir
->nsteps
;
756 ir
->nsteps
= benchsteps
;
758 /* Determine length of triclinic box vectors */
763 box_size
[d
] += state
.box
[d
][i
]*state
.box
[d
][i
];
764 box_size
[d
] = sqrt(box_size
[d
]);
767 /* Remember the original values: */
768 info
->orig_rvdw
= ir
->rvdw
;
769 info
->orig_rcoulomb
= ir
->rcoulomb
;
770 info
->orig_rlist
= ir
->rlist
;
771 info
->orig_rlistlong
= ir
->rlistlong
;
772 info
->orig_nk
[XX
] = ir
->nkx
;
773 info
->orig_nk
[YY
] = ir
->nky
;
774 info
->orig_nk
[ZZ
] = ir
->nkz
;
775 info
->orig_fs
[XX
] = box_size
[XX
]/ir
->nkx
; /* fourierspacing in x direction */
776 info
->orig_fs
[YY
] = box_size
[YY
]/ir
->nky
;
777 info
->orig_fs
[ZZ
] = box_size
[ZZ
]/ir
->nkz
;
779 /* For PME-switch potentials, keep the radial distance of the buffer region */
780 nlist_buffer
= info
->orig_rlist
- info
->orig_rcoulomb
;
782 /* Print information about settings of which some are potentially modified: */
783 fprintf(fp
, " Coulomb type : %s\n", EELTYPE(ir
->coulombtype
));
784 fprintf(fp
, " Fourier nkx nky nkz : %d %d %d\n",
785 info
->orig_nk
[XX
], info
->orig_nk
[YY
], info
->orig_nk
[ZZ
]);
786 fprintf(fp
, " rcoulomb : %f nm\n", info
->orig_rcoulomb
);
787 fprintf(fp
, " Van der Waals type : %s\n", EVDWTYPE(ir
->vdwtype
));
788 fprintf(fp
, " rvdw : %f nm\n", info
->orig_rvdw
);
789 if (EVDW_SWITCHED(ir
->vdwtype
))
790 fprintf(fp
, " rvdw_switch : %f nm\n", ir
->rvdw_switch
);
791 if (EPME_SWITCHED(ir
->coulombtype
))
792 fprintf(fp
, " rlist : %f nm\n", info
->orig_rlist
);
793 if (info
->orig_rlistlong
!= max_cutoff(ir
->rvdw
,ir
->rcoulomb
))
794 fprintf(fp
, " rlistlong : %f nm\n", info
->orig_rlistlong
);
796 /* Print a descriptive line about the tpr settings tested */
797 fprintf(fp
, "\nWill try these real/reciprocal workload settings:\n");
798 fprintf(fp
, " No. scaling rcoulomb");
799 fprintf(fp
, " nkx nky nkz");
800 if (fourierspacing
> 0)
801 fprintf(fp
, " spacing");
802 if (evdwCUT
== ir
->vdwtype
)
803 fprintf(fp
, " rvdw");
804 if (EPME_SWITCHED(ir
->coulombtype
))
805 fprintf(fp
, " rlist");
806 if ( info
->orig_rlistlong
!= max_cutoff(info
->orig_rlist
,max_cutoff(info
->orig_rvdw
,info
->orig_rcoulomb
)) )
807 fprintf(fp
, " rlistlong");
808 fprintf(fp
, " tpr file\n");
812 fprintf(stdout
, "Calculating PME grid points on the basis of ");
813 if (fourierspacing
> 0)
814 fprintf(stdout
, "a fourierspacing of %f nm\n", fourierspacing
);
816 fprintf(stdout
, "original nkx/nky/nkz settings from tpr file\n");
819 /* Loop to create the requested number of tpr input files */
820 for (j
= 0; j
< ntprs
; j
++)
822 /* Rcoulomb scaling factor for this file: */
826 fac
= (upfac
-downfac
)/(ntprs
-1) * j
+ downfac
;
827 fprintf(stdout
, "--- Scaling factor %f ---\n", fac
);
829 /* Scale the Coulomb radius */
830 ir
->rcoulomb
= info
->orig_rcoulomb
*fac
;
832 /* Adjust other radii since various conditions neet to be fulfilled */
833 if (eelPME
== ir
->coulombtype
)
835 /* plain PME, rcoulomb must be equal to rlist */
836 ir
->rlist
= ir
->rcoulomb
;
840 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
841 ir
->rlist
= ir
->rcoulomb
+ nlist_buffer
;
844 if (evdwCUT
== ir
->vdwtype
)
846 /* For vdw cutoff, rvdw >= rlist */
847 ir
->rvdw
= max(info
->orig_rvdw
, ir
->rlist
);
850 ir
->rlistlong
= max_cutoff(ir
->rlist
,max_cutoff(ir
->rvdw
,ir
->rcoulomb
));
852 /* Try to reduce the number of reciprocal grid points in a smart way */
853 /* Did the user supply a value for fourierspacing on the command line? */
854 if (fourierspacing
> 0)
856 info
->fourier_sp
[j
] = fourierspacing
*fac
;
857 /* Calculate the optimal grid dimensions */
861 calc_grid(stdout
,state
.box
,info
->fourier_sp
[j
],&(ir
->nkx
),&(ir
->nky
),&(ir
->nkz
),1);
862 /* Check consistency */
864 if ((ir
->nkx
!= info
->orig_nk
[XX
]) || (ir
->nky
!= info
->orig_nk
[YY
]) || (ir
->nkz
!= info
->orig_nk
[ZZ
]))
866 fprintf(stderr
, "WARNING: Original grid was %dx%dx%d. The fourierspacing of %f nm does not reproduce the grid\n"
867 " found in the tpr input file! Will use the new settings.\n",
868 info
->orig_nk
[XX
],info
->orig_nk
[YY
],info
->orig_nk
[ZZ
],fourierspacing
);
876 /* Print out fourierspacing from input tpr */
877 fprintf(stdout
, "Input file fourier grid is %dx%dx%d\n",
878 info
->orig_nk
[XX
], info
->orig_nk
[YY
], info
->orig_nk
[ZZ
]);
880 /* Reconstruct fourierspacing for each dimension from the input file */
882 calc_grid(stdout
,state
.box
,info
->orig_fs
[XX
]*fac
,&(ir
->nkx
),&(ir
->nky
),&(ir
->nkz
),1);
884 calc_grid(stdout
,state
.box
,info
->orig_fs
[YY
]*fac
,&(ir
->nkx
),&(ir
->nky
),&(ir
->nkz
),1);
886 calc_grid(stdout
,state
.box
,info
->orig_fs
[ZZ
]*fac
,&(ir
->nkx
),&(ir
->nky
),&(ir
->nkz
),1);
889 /* Save modified radii and fourier grid components for later output: */
890 info
->r_coulomb
[j
] = ir
->rcoulomb
;
891 info
->r_vdw
[j
] = ir
->rvdw
;
892 info
->fourier_nx
[j
] = ir
->nkx
;
893 info
->fourier_ny
[j
] = ir
->nky
;
894 info
->fourier_nz
[j
] = ir
->nkz
;
895 info
->rlist
[j
] = ir
->rlist
;
896 info
->rlistlong
[j
] = ir
->rlistlong
;
898 /* Write the benchmark tpr file */
899 strncpy(fn_bench_tprs
[j
],fn_sim_tpr
,strlen(fn_sim_tpr
)-strlen(".tpr"));
900 sprintf(buf
, "_bench%.2d.tpr", j
);
901 strcat(fn_bench_tprs
[j
], buf
);
902 fprintf(stdout
,"Writing benchmark tpr %s with nsteps=", fn_bench_tprs
[j
]);
903 fprintf(stdout
, gmx_large_int_pfmt
, ir
->nsteps
);
904 fprintf(stdout
,", scaling factor %f\n", fac
);
905 write_tpx_state(fn_bench_tprs
[j
],ir
,&state
,&mtop
);
907 /* Write information about modified tpr settings to log file */
908 fprintf(fp
, "%4d%10f%10f", j
, fac
, ir
->rcoulomb
);
909 fprintf(fp
, "%5d%5d%5d", ir
->nkx
, ir
->nky
, ir
->nkz
);
910 if (fourierspacing
> 0)
911 fprintf(fp
, "%9f ", info
->fourier_sp
[j
]);
912 if (evdwCUT
== ir
->vdwtype
)
913 fprintf(fp
, "%10f", ir
->rvdw
);
914 if (EPME_SWITCHED(ir
->coulombtype
))
915 fprintf(fp
, "%10f", ir
->rlist
);
916 if ( info
->orig_rlistlong
!= max_cutoff(info
->orig_rlist
,max_cutoff(info
->orig_rvdw
,info
->orig_rcoulomb
)) )
917 fprintf(fp
, "%10f", ir
->rlistlong
);
918 fprintf(fp
, " %-14s\n",fn_bench_tprs
[j
]);
920 /* Make it clear to the user that some additional settings were modified */
921 if ( !is_equal(ir
->rvdw
, info
->orig_rvdw
)
922 || !is_equal(ir
->rlistlong
, info
->orig_rlistlong
) )
928 fprintf(fp
, "\nNote that in addition to rcoulomb and the fourier grid\n"
929 "also other input settings were changed (see table above).\n"
930 "Please check if the modified settings are appropriate.\n");
937 /* Whether these files are written depends on tpr (or mdp) settings,
938 * not on mdrun command line options! */
939 static bool tpr_triggers_file(const char *opt
)
941 if ( (0 == strcmp(opt
, "-pf"))
942 || (0 == strcmp(opt
, "-px")) )
949 /* Rename the files we want to keep to some meaningful filename and
951 static void cleanup(const t_filenm
*fnm
, int nfile
, int k
, int nnodes
,
952 int nPMEnodes
, int nr
, bool bKeepStderr
)
954 char numstring
[STRLEN
];
955 char newfilename
[STRLEN
];
961 fprintf(stdout
, "Cleaning up, deleting benchmark temp files ...\n");
963 for (i
=0; i
<nfile
; i
++)
965 opt
= (char *)fnm
[i
].opt
;
966 if ( strcmp(opt
, "-p") == 0 )
968 /* do nothing; keep this file */
971 else if (strcmp(opt
, "-bg") == 0)
973 /* Give the log file a nice name so one can later see which parameters were used */
976 sprintf(numstring
, "_%d", nr
);
977 sprintf(newfilename
, "%s_no%d_np%d_npme%d%s", opt2fn("-bg",nfile
,fnm
), k
, nnodes
, nPMEnodes
, numstring
);
978 if (gmx_fexist(opt2fn("-bg",nfile
,fnm
)))
980 fprintf(stdout
, "renaming log file to %s\n", newfilename
);
981 make_backup(newfilename
);
982 rename(opt2fn("-bg",nfile
,fnm
), newfilename
);
985 else if (strcmp(opt
, "-err") == 0)
987 /* This file contains the output of stderr. We want to keep it in
988 * cases where there have been problems. */
989 fn
= opt2fn(opt
, nfile
, fnm
);
992 sprintf(numstring
, "_%d", nr
);
993 sprintf(newfilename
, "%s_no%d_np%d_npme%d%s", fn
, k
, nnodes
, nPMEnodes
, numstring
);
998 fprintf(stdout
, "Saving stderr output in %s\n", newfilename
);
999 make_backup(newfilename
);
1000 rename(fn
, newfilename
);
1004 fprintf(stdout
, "Deleting %s\n", fn
);
1009 /* Delete the files which are created for each benchmark run: (options -b*) */
1010 else if ( ( (0 == strncmp(opt
, "-b", 2)) && (opt2bSet(opt
,nfile
,fnm
) || !is_optional(&fnm
[i
])) )
1011 || tpr_triggers_file(opt
) )
1013 fn
= opt2fn(opt
, nfile
, fnm
);
1016 fprintf(stdout
, "Deleting %s\n", fn
);
1024 /* Returns the largest common factor of n1 and n2 */
1025 static int largest_common_factor(int n1
, int n2
)
1030 for (factor
=nmax
; factor
> 0; factor
--)
1032 if ( 0==(n1
% factor
) && 0==(n2
% factor
) )
1037 return 0; /* one for the compiler */
1040 enum {eNpmeAuto
, eNpmeAll
, eNpmeReduced
, eNpmeSubset
, eNpmeNr
};
1042 /* Create a list of numbers of PME nodes to test */
1043 static void make_npme_list(
1044 const char *npmevalues_opt
, /* Make a complete list with all
1045 * possibilities or a short list that keeps only
1046 * reasonable numbers of PME nodes */
1047 int *nentries
, /* Number of entries we put in the nPMEnodes list */
1048 int *nPMEnodes
[], /* Each entry contains the value for -npme */
1049 int nnodes
, /* Total number of nodes to do the tests on */
1050 int minPMEnodes
, /* Minimum number of PME nodes */
1051 int maxPMEnodes
) /* Maximum number of PME nodes */
1054 int min_factor
=1; /* We request that npp and npme have this minimal
1055 * largest common factor (depends on npp) */
1056 int nlistmax
; /* Max. list size */
1057 int nlist
; /* Actual number of entries in list */
1061 /* Do we need to check all possible values for -npme or is a reduced list enough? */
1062 if ( 0 == strcmp(npmevalues_opt
, "all") )
1066 else if ( 0 == strcmp(npmevalues_opt
, "subset") )
1068 eNPME
= eNpmeSubset
;
1070 else if ( 0 == strcmp(npmevalues_opt
, "auto") )
1074 else if (nnodes
< 128)
1075 eNPME
= eNpmeReduced
;
1077 eNPME
= eNpmeSubset
;
1081 gmx_fatal(FARGS
, "Unknown option for -npme in make_npme_list");
1084 /* Calculate how many entries we could possibly have (in case of -npme all) */
1087 nlistmax
= maxPMEnodes
- minPMEnodes
+ 3;
1088 if (0 == minPMEnodes
)
1094 /* Now make the actual list which is at most of size nlist */
1095 snew(*nPMEnodes
, nlistmax
);
1096 nlist
= 0; /* start counting again, now the real entries in the list */
1097 for (i
= 0; i
< nlistmax
- 2; i
++)
1099 npme
= maxPMEnodes
- i
;
1110 /* For 2d PME we want a common largest factor of at least the cube
1111 * root of the number of PP nodes */
1112 min_factor
= (int) pow(npp
, 1.0/3.0);
1115 gmx_fatal(FARGS
, "Unknown option for eNPME in make_npme_list");
1118 if (largest_common_factor(npp
, npme
) >= min_factor
)
1120 (*nPMEnodes
)[nlist
] = npme
;
1124 /* We always test 0 PME nodes and the automatic number */
1125 *nentries
= nlist
+ 2;
1126 (*nPMEnodes
)[nlist
] = 0;
1127 (*nPMEnodes
)[nlist
+1] = -1;
1129 fprintf(stderr
, "Will try the following %d different values for -npme:\n", *nentries
);
1130 for (i
=0; i
<*nentries
-1; i
++)
1131 fprintf(stderr
, "%d, ", (*nPMEnodes
)[i
]);
1132 fprintf(stderr
, "and %d (auto).\n", (*nPMEnodes
)[*nentries
-1]);
1136 /* Allocate memory to store the performance data */
1137 static void init_perfdata(t_perf
*perfdata
[], int ntprs
, int datasets
, int repeats
)
1142 for (k
=0; k
<ntprs
; k
++)
1144 snew(perfdata
[k
], datasets
);
1145 for (i
=0; i
<datasets
; i
++)
1147 for (j
=0; j
<repeats
; j
++)
1149 snew(perfdata
[k
][i
].Gcycles
, repeats
);
1150 snew(perfdata
[k
][i
].ns_per_day
, repeats
);
1151 snew(perfdata
[k
][i
].PME_f_load
, repeats
);
1158 static void do_the_tests(
1159 FILE *fp
, /* General g_tune_pme output file */
1160 char **tpr_names
, /* Filenames of the input files to test */
1161 int maxPMEnodes
, /* Max fraction of nodes to use for PME */
1162 int minPMEnodes
, /* Min fraction of nodes to use for PME */
1163 const char *npmevalues_opt
, /* Which -npme values should be tested */
1164 t_perf
**perfdata
, /* Here the performace data is stored */
1165 int *pmeentries
, /* Entries in the nPMEnodes list */
1166 int repeats
, /* Repeat each test this often */
1167 int nnodes
, /* Total number of nodes = nPP + nPME */
1168 int nr_tprs
, /* Total number of tpr files to test */
1169 bool bThreads
, /* Threads or MPI? */
1170 char *cmd_mpirun
, /* mpirun command string */
1171 char *cmd_np
, /* "-np", "-n", whatever mpirun needs */
1172 char *cmd_mdrun
, /* mdrun command string */
1173 char *cmd_args_bench
, /* arguments for mdrun in a string */
1174 const t_filenm
*fnm
, /* List of filenames from command line */
1175 int nfile
, /* Number of files specified on the cmdl. */
1176 int sim_part
, /* For checkpointing */
1177 int presteps
, /* DLB equilibration steps, is checked */
1178 gmx_large_int_t cpt_steps
) /* Time step counter in the checkpoint */
1180 int i
,nr
,k
,ret
,count
=0,totaltests
;
1181 int *nPMEnodes
=NULL
;
1184 char *command
, *cmd_stub
;
1186 bool bResetProblem
=FALSE
;
1189 /* This string array corresponds to the eParselog enum type at the start
1191 const char* ParseLog
[] = {"OK.",
1192 "Logfile not found!",
1193 "No timings, logfile truncated?",
1194 "Run was terminated.",
1195 "Counters were not reset properly.",
1196 "No DD grid found for these settings.",
1197 "TPX version conflict!",
1198 "mdrun was not started in parallel!",
1199 "A fatal error occured!" };
1200 char str_PME_f_load
[13];
1203 /* Allocate space for the mdrun command line. 100 extra characters should
1204 be more than enough for the -npme etcetera arguments */
1205 cmdline_length
= strlen(cmd_mpirun
)
1208 + strlen(cmd_args_bench
)
1209 + strlen(tpr_names
[0]) + 100;
1210 snew(command
, cmdline_length
);
1211 snew(cmd_stub
, cmdline_length
);
1213 /* Construct the part of the command line that stays the same for all tests: */
1216 sprintf(cmd_stub
, "%s%s", cmd_mdrun
, cmd_np
);
1220 sprintf(cmd_stub
, "%s%s%s ", cmd_mpirun
, cmd_np
, cmd_mdrun
);
1223 /* Create a list of numbers of PME nodes to test */
1224 make_npme_list(npmevalues_opt
, pmeentries
, &nPMEnodes
,
1225 nnodes
, minPMEnodes
, maxPMEnodes
);
1229 fprintf(fp
, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1231 finalize(opt2fn("-p", nfile
, fnm
));
1235 /* Allocate one dataset for each tpr input file: */
1236 init_perfdata(perfdata
, nr_tprs
, *pmeentries
, repeats
);
1238 /*****************************************/
1239 /* Main loop over all tpr files to test: */
1240 /*****************************************/
1241 totaltests
= nr_tprs
*(*pmeentries
)*repeats
;
1242 for (k
=0; k
<nr_tprs
;k
++)
1244 fprintf(fp
, "\nIndividual timings for input file %d (%s):\n", k
, tpr_names
[k
]);
1245 fprintf(fp
, "PME nodes Gcycles ns/day PME/f Remark\n");
1246 /* Loop over various numbers of PME nodes: */
1247 for (i
= 0; i
< *pmeentries
; i
++)
1249 pd
= &perfdata
[k
][i
];
1251 /* Loop over the repeats for each scenario: */
1252 for (nr
= 0; nr
< repeats
; nr
++)
1254 pd
->nPMEnodes
= nPMEnodes
[i
];
1256 /* Add -npme and -s to the command line and save it. Note that
1257 * the -passall (if set) options requires cmd_args_bench to be
1258 * at the end of the command line string */
1259 snew(pd
->mdrun_cmd_line
, cmdline_length
);
1260 sprintf(pd
->mdrun_cmd_line
, "%s-npme %d -s %s %s",
1261 cmd_stub
, pd
->nPMEnodes
, tpr_names
[k
], cmd_args_bench
);
1263 /* Do a benchmark simulation: */
1265 sprintf(buf
, ", pass %d/%d", nr
+1, repeats
);
1268 fprintf(stdout
, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1269 (100.0*count
)/totaltests
,
1270 k
+1, nr_tprs
, i
+1, *pmeentries
, buf
);
1271 make_backup(opt2fn("-err",nfile
,fnm
));
1272 sprintf(command
, "%s 1> /dev/null 2>%s", pd
->mdrun_cmd_line
, opt2fn("-err",nfile
,fnm
));
1273 fprintf(stdout
, "%s\n", pd
->mdrun_cmd_line
);
1274 gmx_system_call(command
);
1276 /* Collect the performance data from the log file; also check stderr
1277 * for fatal errors */
1278 ret
= parse_logfile(opt2fn("-bg",nfile
,fnm
), opt2fn("-err",nfile
,fnm
),
1279 pd
, nr
, presteps
, cpt_steps
, nnodes
);
1280 if ((presteps
> 0) && (ret
== eParselogResetProblem
))
1281 bResetProblem
= TRUE
;
1283 if (-1 == pd
->nPMEnodes
)
1284 sprintf(buf
, "(%3d)", pd
->guessPME
);
1289 if (pd
->PME_f_load
[nr
] > 0.0)
1290 sprintf(str_PME_f_load
, "%12.3f", pd
->PME_f_load
[nr
]);
1292 sprintf(str_PME_f_load
, "%s", " - ");
1294 /* Write the data we got to disk */
1295 fprintf(fp
, "%4d%s %12.3f %12.3f %s %s", pd
->nPMEnodes
,
1296 buf
, pd
->Gcycles
[nr
], pd
->ns_per_day
[nr
], str_PME_f_load
, ParseLog
[ret
]);
1297 if (! (ret
==eParselogOK
|| ret
==eParselogNoDDGrid
|| ret
==eParselogNotFound
) )
1298 fprintf(fp
, " Check %s file for problems.", ret
==eParselogFatal
? "err":"log");
1303 /* Do some cleaning up and delete the files we do not need any more */
1304 cleanup(fnm
, nfile
, k
, nnodes
, pd
->nPMEnodes
, nr
, ret
==eParselogFatal
);
1306 /* If the first run with this number of processors already failed, do not try again: */
1307 if (pd
->Gcycles
[0] <= 0.0 && repeats
> 1)
1309 fprintf(stdout
, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1310 count
+= repeats
-(nr
+1);
1313 } /* end of repeats loop */
1314 } /* end of -npme loop */
1315 } /* end of tpr file loop */
1319 fprintf(fp
, "WARNING: The cycle and time step counters could not be reset\n"
1320 "properly. The reason could be that mpirun did not manage to\n"
1321 "export the environment variable GMX_RESET_COUNTER. You might\n"
1322 "have to give a special switch to mpirun for that.\n"
1323 "Alternatively, you can manually set GMX_RESET_COUNTER to the\n"
1324 "value normally provided by -presteps.");
1332 static void check_input(
1338 real maxPMEfraction
,
1339 real minPMEfraction
,
1340 real fourierspacing
,
1341 gmx_large_int_t bench_nsteps
,
1342 const t_filenm
*fnm
,
1349 /* Make sure the input file exists */
1350 if (!gmx_fexist(opt2fn("-s",nfile
,fnm
)))
1351 gmx_fatal(FARGS
, "File %s not found.", opt2fn("-s",nfile
,fnm
));
1353 /* Make sure that the checkpoint file is not overwritten by the benchmark runs */
1354 if ( (0 == strcmp(opt2fn("-cpi",nfile
,fnm
), opt2fn("-cpo",nfile
,fnm
)) ) && (sim_part
> 1) )
1355 gmx_fatal(FARGS
, "Checkpoint input and output file must not be identical,\nbecause then the input file might change during the benchmarks.");
1357 /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1359 gmx_fatal(FARGS
, "Number of repeats < 0!");
1361 /* Check number of nodes */
1363 gmx_fatal(FARGS
, "Number of nodes/threads must be a positive integer.");
1365 /* Automatically choose -ntpr if not set */
1372 fprintf(stderr
, "Will test %d tpr file%s.\n", *ntprs
, *ntprs
==1?"":"s");
1377 fprintf(stderr
, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1380 if ( is_equal(*downfac
,*upfac
) && (*ntprs
> 1) )
1382 fprintf(stderr
, "WARNING: Resetting -ntpr to 1 since both scaling factors are the same.\n"
1383 "Please choose upfac unequal to downfac to test various PME grid settings\n");
1387 /* Check whether max and min fraction are within required values */
1388 if (maxPMEfraction
> 0.5 || maxPMEfraction
< 0)
1389 gmx_fatal(FARGS
, "-max must be between 0 and 0.5");
1390 if (minPMEfraction
> 0.5 || minPMEfraction
< 0)
1391 gmx_fatal(FARGS
, "-min must be between 0 and 0.5");
1392 if (maxPMEfraction
< minPMEfraction
)
1393 gmx_fatal(FARGS
, "-max must be larger or equal to -min");
1395 /* Check whether the number of steps - if it was set - has a reasonable value */
1396 if (bench_nsteps
< 0)
1397 gmx_fatal(FARGS
, "Number of steps must be positive.");
1399 if (bench_nsteps
> 10000 || bench_nsteps
< 100)
1401 fprintf(stderr
, "WARNING: steps=");
1402 fprintf(stderr
, gmx_large_int_pfmt
, bench_nsteps
);
1403 fprintf(stderr
, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps
< 100)? "few" : "many");
1408 gmx_fatal(FARGS
, "Cannot have a negative number of presteps.\n");
1411 if (*upfac
<= 0.0 || *downfac
<= 0.0 || *downfac
> *upfac
)
1412 gmx_fatal(FARGS
, "Both scaling factors must be larger than zero and upper\n"
1413 "scaling limit (%f) must be larger than lower limit (%f).",
1416 if (*downfac
< 0.75 || *upfac
> 1.5)
1417 fprintf(stderr
, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1419 if (fourierspacing
< 0)
1420 gmx_fatal(FARGS
, "Please choose a positive value for fourierspacing.");
1422 /* Make shure that the scaling factor options are compatible with the number of tprs */
1423 if ( (1 == *ntprs
) && ( opt2parg_bSet("-upfac",npargs
,pa
) || opt2parg_bSet("-downfac",npargs
,pa
) ) )
1425 if (opt2parg_bSet("-upfac",npargs
,pa
) && opt2parg_bSet("-downfac",npargs
,pa
) && !is_equal(*upfac
,*downfac
))
1427 gmx_fatal(FARGS
, "Please specify -ntpr > 1 for both scaling factors to take effect.\n"
1428 "(upfac=%f, downfac=%f)\n", *upfac
, *downfac
);
1430 if (opt2parg_bSet("-upfac",npargs
,pa
))
1432 if (opt2parg_bSet("-downfac",npargs
,pa
))
1434 if (!is_equal(*upfac
, 1.0))
1436 fprintf(stderr
, "WARNING: Using a scaling factor of %f with -ntpr 1, thus not testing the original tpr settings.\n",
1443 /* Returns TRUE when "opt" is a switch for g_tune_pme itself */
1444 static bool is_main_switch(char *opt
)
1446 if ( (0 == strcmp(opt
,"-s" ))
1447 || (0 == strcmp(opt
,"-p" ))
1448 || (0 == strcmp(opt
,"-launch" ))
1449 || (0 == strcmp(opt
,"-r" ))
1450 || (0 == strcmp(opt
,"-ntpr" ))
1451 || (0 == strcmp(opt
,"-max" ))
1452 || (0 == strcmp(opt
,"-min" ))
1453 || (0 == strcmp(opt
,"-upfac" ))
1454 || (0 == strcmp(opt
,"-downfac" ))
1455 || (0 == strcmp(opt
,"-four" ))
1456 || (0 == strcmp(opt
,"-steps" ))
1457 || (0 == strcmp(opt
,"-simsteps" ))
1458 || (0 == strcmp(opt
,"-resetstep"))
1459 || (0 == strcmp(opt
,"-so" ))
1460 || (0 == strcmp(opt
,"-npstring" ))
1461 || (0 == strcmp(opt
,"-npme" ))
1462 || (0 == strcmp(opt
,"-passall" )) )
1469 /* Returns TRUE when "opt" is needed at launch time */
1470 static bool is_launch_option(char *opt
, bool bSet
)
1479 /* Returns TRUE when "opt" is needed at launch time */
1480 static bool is_launch_file(char *opt
, bool bSet
)
1482 /* We need all options that were set on the command line
1483 * and that do not start with -b */
1484 if (0 == strncmp(opt
,"-b", 2))
1494 /* Returns TRUE when "opt" gives an option needed for the benchmarks runs */
1495 static bool is_bench_option(char *opt
, bool bSet
)
1497 /* If option is set, we might need it for the benchmarks.
1498 * This includes -cpi */
1501 if ( (0 == strcmp(opt
, "-append" ))
1502 || (0 == strcmp(opt
, "-maxh" ))
1503 || (0 == strcmp(opt
, "-deffnm" ))
1504 || (0 == strcmp(opt
, "-resethway")) )
1514 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1515 static bool is_bench_file(char *opt
, bool bSet
, bool bOptional
, bool bIsOutput
)
1517 /* All options starting with "-b" are for _b_enchmark files exclusively */
1518 if (0 == strncmp(opt
,"-b", 2))
1520 if (!bOptional
|| bSet
)
1530 if (bSet
) /* These are additional input files like -cpi -ei */
1538 /* Adds 'buf' to 'str' */
1539 static void add_to_string(char **str
, char *buf
)
1544 len
= strlen(*str
) + strlen(buf
) + 1;
1550 /* Create the command line for the benchmark as well as for the real run */
1551 static void create_command_line_snippets(
1558 const char *procstring
, /* How to pass the number of processors to $MPIRUN */
1559 char *cmd_np
[], /* Actual command line snippet, e.g. '-np <N>' */
1560 char *cmd_args_bench
[], /* command line arguments for benchmark runs */
1561 char *cmd_args_launch
[], /* command line arguments for simulation run */
1562 char extra_args
[]) /* Add this to the end of the command line */
1568 #define BUFLENGTH 255
1569 char buf
[BUFLENGTH
];
1570 char strbuf
[BUFLENGTH
];
1571 char strbuf2
[BUFLENGTH
];
1575 np_or_nt
=strdup("-nt");
1577 np_or_nt
=strdup("-np");
1579 /* strlen needs at least '\0' as a string: */
1580 snew(*cmd_args_bench
,1);
1581 snew(*cmd_args_launch
,1);
1582 *cmd_args_launch
[0]='\0';
1583 *cmd_args_bench
[0] ='\0';
1586 /*******************************************/
1587 /* 1. Process other command line arguments */
1588 /*******************************************/
1589 for (i
=0; i
<npargs
; i
++)
1591 /* What command line switch are we currently processing: */
1592 opt
= (char *)pa
[i
].option
;
1593 /* Skip options not meant for mdrun */
1594 if (!is_main_switch(opt
))
1596 /* Print it to a string buffer, strip away trailing whitespaces that pa_val also returns: */
1597 sprintf(strbuf2
, "%s", pa_val(&pa
[i
],buf
,BUFLENGTH
));
1599 sprintf(strbuf
, "%s %s ", opt
, strbuf2
);
1600 /* We need the -np (or -nt) switch in a separate buffer - whether or not it was set! */
1601 if (0 == strcmp(opt
,np_or_nt
))
1603 if (strcmp(procstring
, "none")==0 && !bThreads
)
1605 /* Omit -np <N> entirely */
1607 sprintf(*cmd_np
, " ");
1611 /* This is the normal case with -np <N> */
1612 snew(*cmd_np
, strlen(procstring
)+strlen(strbuf2
)+4);
1613 sprintf(*cmd_np
, " %s %s ", bThreads
? "-nt" : procstring
, strbuf2
);
1618 if (is_bench_option(opt
,pa
[i
].bSet
))
1619 add_to_string(cmd_args_bench
, strbuf
);
1621 if (is_launch_option(opt
,pa
[i
].bSet
))
1622 add_to_string(cmd_args_launch
, strbuf
);
1628 /* Add equilibration steps to benchmark options */
1629 sprintf(strbuf
, "-resetstep %d ", presteps
);
1630 add_to_string(cmd_args_bench
, strbuf
);
1633 /********************/
1634 /* 2. Process files */
1635 /********************/
1636 for (i
=0; i
<nfile
; i
++)
1638 opt
= (char *)fnm
[i
].opt
;
1639 name
= opt2fn(opt
,nfile
,fnm
);
1641 /* Strbuf contains the options, now let's sort out where we need that */
1642 sprintf(strbuf
, "%s %s ", opt
, name
);
1644 /* Skip options not meant for mdrun */
1645 if (!is_main_switch(opt
))
1648 if ( is_bench_file(opt
, opt2bSet(opt
,nfile
,fnm
), is_optional(&fnm
[i
]), is_output(&fnm
[i
])) )
1650 /* All options starting with -b* need the 'b' removed,
1651 * therefore overwrite strbuf */
1652 if (0 == strncmp(opt
, "-b", 2))
1653 sprintf(strbuf
, "-%s %s ", &opt
[2], name
);
1655 add_to_string(cmd_args_bench
, strbuf
);
1658 if ( is_launch_file(opt
,opt2bSet(opt
,nfile
,fnm
)) )
1659 add_to_string(cmd_args_launch
, strbuf
);
1663 add_to_string(cmd_args_bench
, extra_args
);
1664 add_to_string(cmd_args_launch
, extra_args
);
1669 /* Set option opt */
1670 static void setopt(const char *opt
,int nfile
,t_filenm fnm
[])
1674 for(i
=0; (i
<nfile
); i
++)
1675 if (strcmp(opt
,fnm
[i
].opt
)==0)
1676 fnm
[i
].flag
|= ffSET
;
1680 static void couple_files_options(int nfile
, t_filenm fnm
[])
1688 for (i
=0; i
<nfile
; i
++)
1690 opt
= (char *)fnm
[i
].opt
;
1691 bSet
= ((fnm
[i
].flag
& ffSET
) != 0);
1692 bBench
= (0 == strncmp(opt
,"-b", 2));
1694 /* Check optional files */
1695 /* If e.g. -eo is set, then -beo also needs to be set */
1696 if (is_optional(&fnm
[i
]) && bSet
&& !bBench
)
1698 sprintf(buf
, "-b%s", &opt
[1]);
1699 setopt(buf
,nfile
,fnm
);
1701 /* If -beo is set, then -eo also needs to be! */
1702 if (is_optional(&fnm
[i
]) && bSet
&& bBench
)
1704 sprintf(buf
, "-%s", &opt
[2]);
1705 setopt(buf
,nfile
,fnm
);
1711 static double gettime()
1713 #ifdef HAVE_GETTIMEOFDAY
1715 struct timezone tz
= { 0,0 };
1718 gettimeofday(&t
,&tz
);
1720 seconds
= (double) t
.tv_sec
+ 1e-6*(double)t
.tv_usec
;
1726 seconds
= time(NULL
);
1733 #define BENCHSTEPS (1000)
1735 int gmx_tune_pme(int argc
,char *argv
[])
1737 const char *desc
[] = {
1738 "For a given number [TT]-np[tt] or [TT]-nt[tt] of processors/threads, this program systematically",
1739 "times mdrun with various numbers of PME-only nodes and determines",
1740 "which setting is fastest. It will also test whether performance can",
1741 "be enhanced by shifting load from the reciprocal to the real space",
1742 "part of the Ewald sum. ",
1743 "Simply pass your [TT].tpr[tt] file to g_tune_pme together with other options",
1744 "for mdrun as needed.[PAR]",
1745 "Which executables are used can be set in the environment variables",
1746 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
1747 "will be used as defaults. Note that for certain MPI frameworks you",
1748 "need to provide a machine- or hostfile. This can also be passed",
1749 "via the MPIRUN variable, e.g.",
1750 "'export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"'[PAR]",
1751 "Please call g_tune_pme with the normal options you would pass to",
1752 "mdrun and add [TT]-np[tt] for the number of processors to perform the",
1753 "tests on, or [TT]-nt[tt] for the number of threads. You can also add [TT]-r[tt]",
1754 "to repeat each test several times to get better statistics. [PAR]",
1755 "g_tune_pme can test various real space / reciprocal space workloads",
1756 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
1757 "written with enlarged cutoffs and smaller fourier grids respectively.",
1758 "Typically, the first test (no. 0) will be with the settings from the input",
1759 "[TT].tpr[tt] file; the last test (no. [TT]ntpr[tt]) will have cutoffs multiplied",
1760 "by (and at the same time fourier grid dimensions divided by) the scaling",
1761 "factor [TT]-fac[tt] (default 1.2). The remaining [TT].tpr[tt] files will have equally",
1762 "spaced values inbetween these extremes. Note that you can set [TT]-ntpr[tt] to 1",
1763 "if you just want to find the optimal number of PME-only nodes; in that case",
1764 "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
1765 "For the benchmark runs, the default of 1000 time steps should suffice for most",
1766 "MD systems. The dynamic load balancing needs about 100 time steps",
1767 "to adapt to local load imbalances, therefore the time step counters",
1768 "are by default reset after 100 steps. For large systems",
1769 "(>1M atoms) you may have to set [TT]-resetstep[tt] to a higher value.",
1770 "From the 'DD' load imbalance entries in the md.log output file you",
1771 "can tell after how many steps the load is sufficiently balanced.[PAR]"
1772 "Example call: [TT]g_tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
1773 "After calling mdrun several times, detailed performance information",
1774 "is available in the output file perf.out. ",
1775 "Note that during the benchmarks a couple of temporary files are written",
1776 "(options -b*), these will be automatically deleted after each test.[PAR]",
1777 "If you want the simulation to be started automatically with the",
1778 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
1783 int pmeentries
=0; /* How many values for -npme do we actually test for each tpr file */
1784 real maxPMEfraction
=0.50;
1785 real minPMEfraction
=0.25;
1786 int maxPMEnodes
, minPMEnodes
;
1787 real downfac
=1.0,upfac
=1.2;
1789 real fs
=0.0; /* 0 indicates: not set by the user */
1790 gmx_large_int_t bench_nsteps
=BENCHSTEPS
;
1791 gmx_large_int_t new_sim_nsteps
=-1; /* -1 indicates: not set by the user */
1792 gmx_large_int_t cpt_steps
=0; /* Step counter in .cpt input file */
1793 int presteps
=100; /* Do a full cycle reset after presteps steps */
1794 bool bOverwrite
=FALSE
, bKeepTPR
;
1796 bool bPassAll
=FALSE
;
1797 char *ExtraArgs
=NULL
;
1798 char **tpr_names
=NULL
;
1799 const char *simulation_tpr
=NULL
;
1800 int best_npme
, best_tpr
;
1801 int sim_part
= 1; /* For benchmarks with checkpoint files */
1803 /* Default program names if nothing else is found */
1804 char *cmd_mpirun
=NULL
, *cmd_mdrun
=NULL
;
1805 char *cmd_args_bench
, *cmd_args_launch
;
1808 t_perf
**perfdata
=NULL
;
1814 /* Print out how long the tuning took */
1817 static t_filenm fnm
[] = {
1819 { efOUT
, "-p", "perf", ffWRITE
},
1820 { efLOG
, "-err", "errors", ffWRITE
},
1821 { efTPX
, "-so", "tuned", ffWRITE
},
1823 { efTPX
, NULL
, NULL
, ffREAD
},
1824 { efTRN
, "-o", NULL
, ffWRITE
},
1825 { efXTC
, "-x", NULL
, ffOPTWR
},
1826 { efCPT
, "-cpi", NULL
, ffOPTRD
},
1827 { efCPT
, "-cpo", NULL
, ffOPTWR
},
1828 { efSTO
, "-c", "confout", ffWRITE
},
1829 { efEDR
, "-e", "ener", ffWRITE
},
1830 { efLOG
, "-g", "md", ffWRITE
},
1831 { efXVG
, "-dhdl", "dhdl", ffOPTWR
},
1832 { efXVG
, "-field", "field", ffOPTWR
},
1833 { efXVG
, "-table", "table", ffOPTRD
},
1834 { efXVG
, "-tablep", "tablep", ffOPTRD
},
1835 { efXVG
, "-tableb", "table", ffOPTRD
},
1836 { efTRX
, "-rerun", "rerun", ffOPTRD
},
1837 { efXVG
, "-tpi", "tpi", ffOPTWR
},
1838 { efXVG
, "-tpid", "tpidist", ffOPTWR
},
1839 { efEDI
, "-ei", "sam", ffOPTRD
},
1840 { efEDO
, "-eo", "sam", ffOPTWR
},
1841 { efGCT
, "-j", "wham", ffOPTRD
},
1842 { efGCT
, "-jo", "bam", ffOPTWR
},
1843 { efXVG
, "-ffout", "gct", ffOPTWR
},
1844 { efXVG
, "-devout", "deviatie", ffOPTWR
},
1845 { efXVG
, "-runav", "runaver", ffOPTWR
},
1846 { efXVG
, "-px", "pullx", ffOPTWR
},
1847 { efXVG
, "-pf", "pullf", ffOPTWR
},
1848 { efMTX
, "-mtx", "nm", ffOPTWR
},
1849 { efNDX
, "-dn", "dipole", ffOPTWR
},
1850 /* Output files that are deleted after each benchmark run */
1851 { efTRN
, "-bo", "bench", ffWRITE
},
1852 { efXTC
, "-bx", "bench", ffWRITE
},
1853 { efCPT
, "-bcpo", "bench", ffWRITE
},
1854 { efSTO
, "-bc", "bench", ffWRITE
},
1855 { efEDR
, "-be", "bench", ffWRITE
},
1856 { efLOG
, "-bg", "bench", ffWRITE
},
1857 { efEDO
, "-beo", "bench", ffOPTWR
},
1858 { efXVG
, "-bdhdl", "benchdhdl",ffOPTWR
},
1859 { efXVG
, "-bfield", "benchfld" ,ffOPTWR
},
1860 { efXVG
, "-btpi", "benchtpi", ffOPTWR
},
1861 { efXVG
, "-btpid", "benchtpid",ffOPTWR
},
1862 { efGCT
, "-bjo", "bench", ffOPTWR
},
1863 { efXVG
, "-bffout", "benchgct", ffOPTWR
},
1864 { efXVG
, "-bdevout","benchdev", ffOPTWR
},
1865 { efXVG
, "-brunav", "benchrnav",ffOPTWR
},
1866 { efXVG
, "-bpx", "benchpx", ffOPTWR
},
1867 { efXVG
, "-bpf", "benchpf", ffOPTWR
},
1868 { efMTX
, "-bmtx", "benchn", ffOPTWR
},
1869 { efNDX
, "-bdn", "bench", ffOPTWR
}
1872 /* Command line options of mdrun */
1873 bool bDDBondCheck
= TRUE
;
1874 bool bDDBondComm
= TRUE
;
1875 bool bVerbose
= FALSE
;
1876 bool bCompact
= TRUE
;
1877 bool bSepPot
= FALSE
;
1878 bool bRerunVSite
= FALSE
;
1879 bool bIonize
= FALSE
;
1880 bool bConfout
= TRUE
;
1881 bool bReproducible
= FALSE
;
1882 bool bThreads
= FALSE
;
1885 int nstglobalcomm
=-1;
1887 int repl_ex_seed
=-1;
1891 const char *ddno_opt
[ddnoNR
+1] =
1892 { NULL
, "interleave", "pp_pme", "cartesian", NULL
};
1893 const char *dddlb_opt
[] =
1894 { NULL
, "auto", "no", "yes", NULL
};
1895 const char *procstring
[] =
1896 { NULL
, "-np", "-n", "none", NULL
};
1897 const char *npmevalues_opt
[] =
1898 { NULL
, "auto", "all", "subset", NULL
};
1899 real rdd
=0.0,rconstr
=0.0,dlb_scale
=0.8,pforce
=-1;
1900 char *ddcsx
=NULL
,*ddcsy
=NULL
,*ddcsz
=NULL
;
1902 #define STD_CPT_PERIOD (15.0)
1903 real cpt_period
=STD_CPT_PERIOD
,max_hours
=-1;
1904 bool bAppendFiles
=TRUE
;
1905 bool bResetCountersHalfWay
=FALSE
;
1906 output_env_t oenv
=NULL
;
1909 /***********************/
1910 /* g_tune_pme options: */
1911 /***********************/
1912 { "-np", FALSE
, etINT
, {&nnodes
},
1913 "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
1914 { "-npstring", FALSE
, etENUM
, {procstring
},
1915 "Specify the number of processors to $MPIRUN using this string"},
1916 { "-passall", FALSE
, etBOOL
, {&bPassAll
},
1917 "HIDDENPut arguments unknown to mdrun at the end of the command line. Can e.g. be used for debugging purposes. "},
1918 { "-nt", FALSE
, etINT
, {&nthreads
},
1919 "Number of threads to run the tests on (turns MPI & mpirun off)"},
1920 { "-r", FALSE
, etINT
, {&repeats
},
1921 "Repeat each test this often" },
1922 { "-max", FALSE
, etREAL
, {&maxPMEfraction
},
1923 "Max fraction of PME nodes to test with" },
1924 { "-min", FALSE
, etREAL
, {&minPMEfraction
},
1925 "Min fraction of PME nodes to test with" },
1926 { "-npme", FALSE
, etENUM
, {npmevalues_opt
},
1927 "Benchmark all possible values for -npme or just the subset that is expected to perform well"},
1928 { "-upfac", FALSE
, etREAL
, {&upfac
},
1929 "Upper limit for rcoulomb scaling factor (Note that rcoulomb upscaling results in fourier grid downscaling)" },
1930 { "-downfac", FALSE
, etREAL
, {&downfac
},
1931 "Lower limit for rcoulomb scaling factor" },
1932 { "-ntpr", FALSE
, etINT
, {&ntprs
},
1933 "Number of tpr files to benchmark. Create these many files with scaling factors ranging from 1.0 to fac. If < 1, automatically choose the number of tpr files to test" },
1934 { "-four", FALSE
, etREAL
, {&fs
},
1935 "Use this fourierspacing value instead of the grid found in the tpr input file. (Spacing applies to a scaling factor of 1.0 if multiple tpr files are written)" },
1936 { "-steps", FALSE
, etGMX_LARGE_INT
, {&bench_nsteps
},
1937 "Take timings for these many steps in the benchmark runs" },
1938 { "-resetstep",FALSE
, etINT
, {&presteps
},
1939 "Let dlb equilibrate these many steps before timings are taken (reset cycle counters after these many steps)" },
1940 { "-simsteps", FALSE
, etGMX_LARGE_INT
, {&new_sim_nsteps
},
1941 "If non-negative, perform these many steps in the real run (overwrite nsteps from tpr, add cpt steps)" },
1942 { "-launch", FALSE
, etBOOL
, {&bLaunch
},
1943 "Lauch the real simulation after optimization" },
1944 /******************/
1945 /* mdrun options: */
1946 /******************/
1947 { "-deffnm", FALSE
, etSTR
, {&deffnm
},
1948 "Set the default filename for all file options at launch time" },
1949 { "-ddorder", FALSE
, etENUM
, {ddno_opt
},
1951 { "-ddcheck", FALSE
, etBOOL
, {&bDDBondCheck
},
1952 "Check for all bonded interactions with DD" },
1953 { "-ddbondcomm",FALSE
, etBOOL
, {&bDDBondComm
},
1954 "HIDDENUse special bonded atom communication when -rdd > cut-off" },
1955 { "-rdd", FALSE
, etREAL
, {&rdd
},
1956 "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
1957 { "-rcon", FALSE
, etREAL
, {&rconstr
},
1958 "Maximum distance for P-LINCS (nm), 0 is estimate" },
1959 { "-dlb", FALSE
, etENUM
, {dddlb_opt
},
1960 "Dynamic load balancing (with DD)" },
1961 { "-dds", FALSE
, etREAL
, {&dlb_scale
},
1962 "Minimum allowed dlb scaling of the DD cell size" },
1963 { "-ddcsx", FALSE
, etSTR
, {&ddcsx
},
1964 "HIDDENThe DD cell sizes in x" },
1965 { "-ddcsy", FALSE
, etSTR
, {&ddcsy
},
1966 "HIDDENThe DD cell sizes in y" },
1967 { "-ddcsz", FALSE
, etSTR
, {&ddcsz
},
1968 "HIDDENThe DD cell sizes in z" },
1969 { "-gcom", FALSE
, etINT
, {&nstglobalcomm
},
1970 "Global communication frequency" },
1971 { "-v", FALSE
, etBOOL
, {&bVerbose
},
1972 "Be loud and noisy" },
1973 { "-compact", FALSE
, etBOOL
, {&bCompact
},
1974 "Write a compact log file" },
1975 { "-seppot", FALSE
, etBOOL
, {&bSepPot
},
1976 "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
1977 { "-pforce", FALSE
, etREAL
, {&pforce
},
1978 "Print all forces larger than this (kJ/mol nm)" },
1979 { "-reprod", FALSE
, etBOOL
, {&bReproducible
},
1980 "Try to avoid optimizations that affect binary reproducibility" },
1981 { "-cpt", FALSE
, etREAL
, {&cpt_period
},
1982 "Checkpoint interval (minutes)" },
1983 { "-append", FALSE
, etBOOL
, {&bAppendFiles
},
1984 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
1985 { "-maxh", FALSE
, etREAL
, {&max_hours
},
1986 "Terminate after 0.99 times this time (hours)" },
1987 { "-multi", FALSE
, etINT
, {&nmultisim
},
1988 "Do multiple simulations in parallel" },
1989 { "-replex", FALSE
, etINT
, {&repl_ex_nst
},
1990 "Attempt replica exchange every # steps" },
1991 { "-reseed", FALSE
, etINT
, {&repl_ex_seed
},
1992 "Seed for replica exchange, -1 is generate a seed" },
1993 { "-rerunvsite", FALSE
, etBOOL
, {&bRerunVSite
},
1994 "HIDDENRecalculate virtual site coordinates with -rerun" },
1995 { "-ionize", FALSE
, etBOOL
, {&bIonize
},
1996 "Do a simulation including the effect of an X-Ray bombardment on your system" },
1997 { "-confout", FALSE
, etBOOL
, {&bConfout
},
1998 "HIDDENWrite the last configuration with -c and force checkpointing at the last step" },
1999 { "-stepout", FALSE
, etINT
, {&nstepout
},
2000 "HIDDENFrequency of writing the remaining runtime" },
2001 { "-resethway", FALSE
, etBOOL
, {&bResetCountersHalfWay
},
2002 "HIDDENReset the cycle counters after half the number of steps or halfway -maxh (launch only)" }
2006 #define NFILE asize(fnm)
2008 CopyRight(stderr
,argv
[0]);
2010 seconds
= gettime();
2012 parse_common_args(&argc
,argv
,PCA_NOEXIT_ON_ARGS
,
2013 NFILE
,fnm
,asize(pa
),pa
,asize(desc
),desc
,
2016 /* Store the remaining unparsed command line entries in a string */
2018 ExtraArgs
[0] = '\0';
2019 for (i
=1; i
<argc
; i
++) /* argc will now be 1 if everything was understood */
2021 add_to_string(&ExtraArgs
, argv
[i
]);
2022 add_to_string(&ExtraArgs
, " ");
2024 if ( !bPassAll
&& (ExtraArgs
[0] != '\0') )
2026 fprintf(stderr
, "\nWARNING: The following arguments you provided have no effect:\n"
2028 "Use the -passall option to force them to appear on the command lines\n"
2029 "for the benchmark simulations%s.\n\n",
2030 ExtraArgs
, bLaunch
? " and at launch time" : "");
2033 if (opt2parg_bSet("-nt",asize(pa
),pa
))
2036 if (opt2parg_bSet("-npstring",asize(pa
),pa
))
2037 fprintf(stderr
, "WARNING: -npstring has no effect when using threads.\n");
2040 gmx_fatal(FARGS
, "Can't run multi-threaded MPI simulation yet!");
2041 /* and now we just set this; a bit of an ugly hack*/
2044 /* Automatically set -beo options if -eo is set etc. */
2045 couple_files_options(NFILE
,fnm
);
2047 /* Construct the command line arguments for benchmark runs
2048 * as well as for the simulation run
2050 create_command_line_snippets(bThreads
,presteps
,NFILE
,fnm
,asize(pa
),pa
,procstring
[0],
2051 &cmd_np
, &cmd_args_bench
, &cmd_args_launch
,
2052 bPassAll
? ExtraArgs
: (char *)"");
2054 /* Read in checkpoint file if requested */
2056 if(opt2bSet("-cpi",NFILE
,fnm
))
2059 cr
->duty
=DUTY_PP
; /* makes the following routine happy */
2060 read_checkpoint_simulation_part(opt2fn("-cpi",NFILE
,fnm
),
2061 &sim_part
,&cpt_steps
,cr
,
2065 /* sim_part will now be 1 if no checkpoint file was found */
2067 gmx_fatal(FARGS
, "Checkpoint file %s not found!", opt2fn("-cpi",
2072 /* Open performance output file and write header info */
2073 fp
= ffopen(opt2fn("-p",NFILE
,fnm
),"w");
2075 /* Make a quick consistency check of command line parameters */
2076 check_input(nnodes
, repeats
, &ntprs
, &upfac
, &downfac
, maxPMEfraction
,
2077 minPMEfraction
, fs
, bench_nsteps
, fnm
, NFILE
, sim_part
, presteps
,
2080 /* Determine max and min number of PME nodes to test: */
2083 maxPMEnodes
= floor(maxPMEfraction
*nnodes
);
2084 minPMEnodes
= max(floor(minPMEfraction
*nnodes
), 0);
2085 fprintf(stdout
, "Will try runs with %d ", minPMEnodes
);
2086 if (maxPMEnodes
!= minPMEnodes
)
2087 fprintf(stdout
, "- %d ", maxPMEnodes
);
2088 fprintf(stdout
, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2096 /* Get the commands we need to set up the runs from environment variables */
2097 get_program_paths(bThreads
, &cmd_mpirun
, cmd_np
, &cmd_mdrun
, repeats
);
2099 /* Print some header info to file */
2101 fprintf(fp
, "\n P E R F O R M A N C E R E S U L T S\n");
2103 fprintf(fp
, "%s for Gromacs %s\n", ShortProgram(),GromacsVersion());
2106 fprintf(fp
, "Number of nodes : %d\n", nnodes
);
2107 fprintf(fp
, "The mpirun command is : %s\n", cmd_mpirun
);
2108 if ( strcmp(procstring
[0], "none") != 0)
2109 fprintf(fp
, "Passing # of nodes via : %s\n", procstring
[0]);
2111 fprintf(fp
, "Not setting number of nodes in system call\n");
2114 fprintf(fp
, "Number of threads : %d\n", nnodes
);
2116 fprintf(fp
, "The mdrun command is : %s\n", cmd_mdrun
);
2117 fprintf(fp
, "mdrun args benchmarks : %s\n", cmd_args_bench
);
2118 fprintf(fp
, "Benchmark steps : ");
2119 fprintf(fp
, gmx_large_int_pfmt
, bench_nsteps
);
2121 fprintf(fp
, "dlb equilibration steps : %d\n", presteps
);
2124 fprintf(fp
, "Checkpoint time step : ");
2125 fprintf(fp
, gmx_large_int_pfmt
, cpt_steps
);
2129 fprintf(fp
, "mdrun args at launchtime: %s\n", cmd_args_launch
);
2130 if (!bPassAll
&& ExtraArgs
[0] != '\0')
2131 fprintf(fp
, "Unused arguments : %s\n", ExtraArgs
);
2132 if (new_sim_nsteps
>= 0)
2135 fprintf(stderr
, "Note: Simulation input file %s will have ", opt2fn("-so",NFILE
,fnm
));
2136 fprintf(stderr
, gmx_large_int_pfmt
, new_sim_nsteps
+cpt_steps
);
2137 fprintf(stderr
, " steps.\n");
2138 fprintf(fp
, "Simulation steps : ");
2139 fprintf(fp
, gmx_large_int_pfmt
, new_sim_nsteps
);
2143 fprintf(fp
, "Repeats for each test : %d\n", repeats
);
2147 fprintf(fp
, "Requested grid spacing : %f (tpr file will be changed accordingly)\n", fs
);
2148 fprintf(fp
, " This will be the grid spacing at a scaling factor of 1.0\n");
2151 fprintf(fp
, "Input file : %s\n", opt2fn("-s",NFILE
,fnm
));
2153 /* Allocate memory for the inputinfo struct: */
2155 info
->nr_inputfiles
= ntprs
;
2156 for (i
=0; i
<ntprs
; i
++)
2158 snew(info
->r_coulomb
, ntprs
);
2159 snew(info
->r_vdw
, ntprs
);
2160 snew(info
->rlist
, ntprs
);
2161 snew(info
->rlistlong
, ntprs
);
2162 snew(info
->fourier_nx
, ntprs
);
2163 snew(info
->fourier_ny
, ntprs
);
2164 snew(info
->fourier_nz
, ntprs
);
2165 snew(info
->fourier_sp
, ntprs
);
2167 /* Make alternative tpr files to test: */
2168 snew(tpr_names
, ntprs
);
2169 for (i
=0; i
<ntprs
; i
++)
2170 snew(tpr_names
[i
], STRLEN
);
2172 make_benchmark_tprs(opt2fn("-s",NFILE
,fnm
), tpr_names
, bench_nsteps
+presteps
,
2173 cpt_steps
, upfac
, downfac
, ntprs
, fs
, info
, fp
);
2176 /********************************************************************************/
2177 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
2178 /********************************************************************************/
2179 snew(perfdata
, ntprs
);
2180 do_the_tests(fp
, tpr_names
, maxPMEnodes
, minPMEnodes
, npmevalues_opt
[0], perfdata
, &pmeentries
,
2181 repeats
, nnodes
, ntprs
, bThreads
, cmd_mpirun
, cmd_np
, cmd_mdrun
,
2182 cmd_args_bench
, fnm
, NFILE
, sim_part
, presteps
, cpt_steps
);
2184 fprintf(fp
, "\nTuning took%8.1f minutes.\n", (gettime()-seconds
)/60.0);
2186 /* Analyse the results and give a suggestion for optimal settings: */
2187 bKeepTPR
= analyze_data(fp
, opt2fn("-p", NFILE
, fnm
), perfdata
, nnodes
, ntprs
, pmeentries
,
2188 repeats
, info
, &best_tpr
, &best_npme
);
2190 /* Take the best-performing tpr file and enlarge nsteps to original value */
2191 if ( bKeepTPR
&& !bOverwrite
&& !(fs
> 0.0) )
2193 simulation_tpr
= opt2fn("-s",NFILE
,fnm
);
2197 simulation_tpr
= opt2fn("-so",NFILE
,fnm
);
2198 modify_PMEsettings(bOverwrite
? (new_sim_nsteps
+cpt_steps
) :
2199 info
->orig_sim_steps
, tpr_names
[best_tpr
],
2203 /* Now start the real simulation if the user requested it ... */
2204 launch_simulation(bLaunch
, fp
, bThreads
, cmd_mpirun
, cmd_np
, cmd_mdrun
,
2205 cmd_args_launch
, simulation_tpr
, nnodes
, best_npme
);
2208 /* ... or simply print the performance results to screen: */
2210 finalize(opt2fn("-p", NFILE
, fnm
));