fixed g_tune_pme to pass correctly boolean arguments to mdrun
[gromacs/rigid-bodies.git] / src / tools / gmx_tune_pme.c
blobf22d3080c3461ed0af5c49cbf9bdc5710c20a0ce
1 /*
2 *
3 * This source code is part of
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
10 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
11 * Copyright (c) 2001-2008, The GROMACS development team,
12 * check out http://www.gromacs.org for more information.
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * If you want to redistribute modifications, please consider that
20 * scientific software is very special. Version control is crucial -
21 * bugs must be traceable. We will be happy to consider code for
22 * inclusion in the official distribution, but derived work must not
23 * be called official GROMACS. Details are found in the README & COPYING
24 * files - if they are missing, get the official version at www.gromacs.org.
26 * To help us fund GROMACS development, we humbly ask that you cite
27 * the papers on the package - you can find them in the top README file.
29 * For more info, check our website at http://www.gromacs.org
31 * And Hey:
32 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>
36 #endif
39 #include <time.h>
40 #ifdef HAVE_SYS_TIME_H
41 #include <sys/time.h>
42 #endif
46 #include "statutil.h"
47 #include "typedefs.h"
48 #include "smalloc.h"
49 #include "vec.h"
50 #include "copyrite.h"
51 #include "statutil.h"
52 #include "tpxio.h"
53 #include "string2.h"
54 #include "readinp.h"
55 #include "calcgrid.h"
56 #include "checkpoint.h"
57 #include "gmx_ana.h"
58 #include "names.h"
62 enum {
63 ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
66 /* Enum for situations that can occur during log file parsing, the
67 * corresponding string entries can be found in do_the_tests() in
68 * const char* ParseLog[] */
69 enum {
70 eParselogOK,
71 eParselogNotFound,
72 eParselogNoPerfData,
73 eParselogTerm,
74 eParselogResetProblem,
75 eParselogNoDDGrid,
76 eParselogTPXVersion,
77 eParselogNotParallel,
78 eParselogFatal,
79 eParselogNr
83 typedef struct
85 int nPMEnodes; /* number of PME-only nodes used in this test */
86 int nx, ny, nz; /* DD grid */
87 int guessPME; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
88 double *Gcycles; /* This can contain more than one value if doing multiple tests */
89 double Gcycles_Av;
90 float *ns_per_day;
91 float ns_per_day_Av;
92 float *PME_f_load; /* PME mesh/force load average*/
93 float PME_f_load_Av; /* Average average ;) ... */
94 char *mdrun_cmd_line; /* Mdrun command line used for this test */
95 } t_perf;
98 typedef struct
100 int nr_inputfiles; /* The number of tpr and mdp input files */
101 gmx_large_int_t orig_sim_steps; /* Number of steps to be done in the real simulation */
102 real *rcoulomb; /* The coulomb radii [0...nr_inputfiles] */
103 real *rvdw; /* The vdW radii */
104 real *rlist; /* Neighbourlist cutoff radius */
105 real *rlistlong;
106 int *nkx, *nky, *nkz;
107 real *fsx, *fsy, *fsz; /* Fourierspacing in x,y,z dimension */
108 } t_inputinfo;
111 static void sep_line(FILE *fp)
113 fprintf(fp, "\n------------------------------------------------------------\n");
117 /* Wrapper for system calls */
118 static int gmx_system_call(char *command)
120 #ifdef GMX_NO_SYSTEM
121 gmx_fatal(FARGS,"No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n",command);
122 #else
123 return ( system(command) );
124 #endif
128 /* Check if string starts with substring */
129 static gmx_bool str_starts(const char *string, const char *substring)
131 return ( strncmp(string, substring, strlen(substring)) == 0);
135 static void cleandata(t_perf *perfdata, int test_nr)
137 perfdata->Gcycles[test_nr] = 0.0;
138 perfdata->ns_per_day[test_nr] = 0.0;
139 perfdata->PME_f_load[test_nr] = 0.0;
141 return;
145 static gmx_bool is_equal(real a, real b)
147 real diff, eps=1.0e-7;
150 diff = a - b;
152 if (diff < 0.0) diff = -diff;
154 if (diff < eps)
155 return TRUE;
156 else
157 return FALSE;
161 static void finalize(const char *fn_out)
163 char buf[STRLEN];
164 FILE *fp;
167 fp = fopen(fn_out,"r");
168 fprintf(stdout,"\n\n");
170 while( fgets(buf,STRLEN-1,fp) != NULL )
172 fprintf(stdout,"%s",buf);
174 fclose(fp);
175 fprintf(stdout,"\n\n");
176 thanx(stderr);
180 enum {eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr};
182 static int parse_logfile(const char *logfile, const char *errfile,
183 t_perf *perfdata, int test_nr, int presteps, gmx_large_int_t cpt_steps,
184 int nnodes)
186 FILE *fp;
187 char line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
188 const char matchstrdd[]="Domain decomposition grid";
189 const char matchstrcr[]="resetting all time and cycle counters";
190 const char matchstrbal[]="Average PME mesh/force load:";
191 const char matchstring[]="R E A L C Y C L E A N D T I M E A C C O U N T I N G";
192 const char errSIG[]="signal, stopping at the next";
193 int iFound;
194 int procs;
195 float dum1,dum2,dum3;
196 int npme;
197 gmx_large_int_t resetsteps=-1;
198 gmx_bool bFoundResetStr = FALSE;
199 gmx_bool bResetChecked = FALSE;
202 if (!gmx_fexist(logfile))
204 fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
205 cleandata(perfdata, test_nr);
206 return eParselogNotFound;
209 fp = fopen(logfile, "r");
210 perfdata->PME_f_load[test_nr] = -1.0;
211 perfdata->guessPME = -1;
213 iFound = eFoundNothing;
214 if (1 == nnodes)
215 iFound = eFoundDDStr; /* Skip some case statements */
217 while (fgets(line, STRLEN, fp) != NULL)
219 /* Remove leading spaces */
220 ltrim(line);
222 /* Check for TERM and INT signals from user: */
223 if ( strstr(line, errSIG) != NULL )
225 fclose(fp);
226 cleandata(perfdata, test_nr);
227 return eParselogTerm;
230 /* Check whether cycle resetting worked */
231 if (presteps > 0 && !bFoundResetStr)
233 if (strstr(line, matchstrcr) != NULL)
235 sprintf(dumstring, "Step %s", gmx_large_int_pfmt);
236 sscanf(line, dumstring, &resetsteps);
237 bFoundResetStr = TRUE;
238 if (resetsteps == presteps+cpt_steps)
240 bResetChecked = TRUE;
242 else
244 sprintf(dumstring , gmx_large_int_pfmt, resetsteps);
245 sprintf(dumstring2, gmx_large_int_pfmt, presteps+cpt_steps);
246 fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
247 " though they were supposed to be reset at step %s!\n",
248 dumstring, dumstring2);
253 /* Look for strings that appear in a certain order in the log file: */
254 switch(iFound)
256 case eFoundNothing:
257 /* Look for domain decomp grid and separate PME nodes: */
258 if (str_starts(line, matchstrdd))
260 sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
261 &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
262 if (perfdata->nPMEnodes == -1)
263 perfdata->guessPME = npme;
264 else if (perfdata->nPMEnodes != npme)
265 gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
266 iFound = eFoundDDStr;
268 /* Catch a few errors that might have occured: */
269 else if (str_starts(line, "There is no domain decomposition for"))
271 fclose(fp);
272 return eParselogNoDDGrid;
274 else if (str_starts(line, "reading tpx file"))
276 fclose(fp);
277 return eParselogTPXVersion;
279 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
281 fclose(fp);
282 return eParselogNotParallel;
284 break;
285 case eFoundDDStr:
286 /* Look for PME mesh/force balance (not necessarily present, though) */
287 if (str_starts(line, matchstrbal))
288 sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
289 /* Look for matchstring */
290 if (str_starts(line, matchstring))
291 iFound = eFoundAccountingStr;
292 break;
293 case eFoundAccountingStr:
294 /* Already found matchstring - look for cycle data */
295 if (str_starts(line, "Total "))
297 sscanf(line,"Total %d %lf",&procs,&(perfdata->Gcycles[test_nr]));
298 iFound = eFoundCycleStr;
300 break;
301 case eFoundCycleStr:
302 /* Already found cycle data - look for remaining performance info and return */
303 if (str_starts(line, "Performance:"))
305 sscanf(line,"%s %f %f %f %f", dumstring, &dum1, &dum2, &(perfdata->ns_per_day[test_nr]), &dum3);
306 fclose(fp);
307 if (bResetChecked || presteps == 0)
308 return eParselogOK;
309 else
310 return eParselogResetProblem;
312 break;
314 } /* while */
316 /* Close the log file */
317 fclose(fp);
319 /* Check why there is no performance data in the log file.
320 * Did a fatal errors occur? */
321 if (gmx_fexist(errfile))
323 fp = fopen(errfile, "r");
324 while (fgets(line, STRLEN, fp) != NULL)
326 if ( str_starts(line, "Fatal error:") )
328 if (fgets(line, STRLEN, fp) != NULL)
329 fprintf(stderr, "\nWARNING: A fatal error has occured during this benchmark:\n"
330 "%s\n", line);
331 fclose(fp);
332 cleandata(perfdata, test_nr);
333 return eParselogFatal;
336 fclose(fp);
338 else
340 fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
343 /* Giving up ... we could not find out why there is no performance data in
344 * the log file. */
345 fprintf(stdout, "No performance data in log file.\n");
346 cleandata(perfdata, test_nr);
348 return eParselogNoPerfData;
352 static gmx_bool analyze_data(
353 FILE *fp,
354 const char *fn,
355 t_perf **perfdata,
356 int nnodes,
357 int ntprs,
358 int ntests,
359 int nrepeats,
360 t_inputinfo *info,
361 int *index_tpr, /* OUT: Nr of mdp file with best settings */
362 int *npme_optimal) /* OUT: Optimal number of PME nodes */
364 int i,j,k;
365 int line=0, line_win=-1;
366 int k_win=-1, i_win=-1, winPME;
367 double s=0.0; /* standard deviation */
368 t_perf *pd;
369 char strbuf[STRLEN];
370 char str_PME_f_load[13];
371 gmx_bool bCanUseOrigTPR;
372 gmx_bool bRefinedCoul, bRefinedVdW, bRefinedGrid;
375 if (nrepeats > 1)
377 sep_line(fp);
378 fprintf(fp, "Summary of successful runs:\n");
379 fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
380 if (nnodes > 1)
381 fprintf(fp, " DD grid");
382 fprintf(fp, "\n");
386 for (k=0; k<ntprs; k++)
388 for (i=0; i<ntests; i++)
390 /* Select the right dataset: */
391 pd = &(perfdata[k][i]);
393 pd->Gcycles_Av = 0.0;
394 pd->PME_f_load_Av = 0.0;
395 pd->ns_per_day_Av = 0.0;
397 if (pd->nPMEnodes == -1)
398 sprintf(strbuf, "(%3d)", pd->guessPME);
399 else
400 sprintf(strbuf, " ");
402 /* Get the average run time of a setting */
403 for (j=0; j<nrepeats; j++)
405 pd->Gcycles_Av += pd->Gcycles[j];
406 pd->PME_f_load_Av += pd->PME_f_load[j];
408 pd->Gcycles_Av /= nrepeats;
409 pd->PME_f_load_Av /= nrepeats;
411 for (j=0; j<nrepeats; j++)
413 if (pd->ns_per_day[j] > 0.0)
414 pd->ns_per_day_Av += pd->ns_per_day[j];
415 else
417 /* Somehow the performance number was not aquired for this run,
418 * therefor set the average to some negative value: */
419 pd->ns_per_day_Av = -1.0f*nrepeats;
420 break;
423 pd->ns_per_day_Av /= nrepeats;
425 /* Nicer output: */
426 if (pd->PME_f_load_Av > 0.0)
427 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
428 else
429 sprintf(str_PME_f_load, "%s", " - ");
432 /* We assume we had a successful run if both averages are positive */
433 if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
435 /* Output statistics if repeats were done */
436 if (nrepeats > 1)
438 /* Calculate the standard deviation */
439 s = 0.0;
440 for (j=0; j<nrepeats; j++)
441 s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
442 s /= (nrepeats - 1);
443 s = sqrt(s);
445 fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
446 line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
447 pd->ns_per_day_Av, str_PME_f_load);
448 if (nnodes > 1)
449 fprintf(fp, " %3d %3d %3d", pd->nx, pd->ny, pd->nz);
450 fprintf(fp, "\n");
452 /* Store the index of the best run found so far in 'winner': */
453 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
455 k_win = k;
456 i_win = i;
457 line_win = line;
459 line++;
464 if (k_win == -1)
465 gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
467 sep_line(fp);
469 winPME = perfdata[k_win][i_win].nPMEnodes;
471 if (1 == ntests)
473 /* We stuck to a fixed number of PME-only nodes */
474 sprintf(strbuf, "settings No. %d", k_win);
476 else
478 /* We have optimized the number of PME-only nodes */
479 if (winPME == -1)
480 sprintf(strbuf, "%s", "the automatic number of PME nodes");
481 else
482 sprintf(strbuf, "%d PME nodes", winPME);
484 fprintf(fp, "Best performance was achieved with %s", strbuf);
485 if ((nrepeats > 1) && (ntests > 1))
486 fprintf(fp, " (see line %d)", line_win);
487 fprintf(fp, "\n");
489 /* Only mention settings if they were modified: */
490 bRefinedCoul = !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]);
491 bRefinedVdW = !is_equal(info->rvdw[k_win] , info->rvdw[0] );
492 bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] &&
493 info->nky[k_win] == info->nky[0] &&
494 info->nkz[k_win] == info->nkz[0]);
496 if (bRefinedCoul || bRefinedVdW || bRefinedGrid)
498 fprintf(fp, "Optimized PME settings:\n");
499 bCanUseOrigTPR = FALSE;
501 else
503 bCanUseOrigTPR = TRUE;
506 if (bRefinedCoul)
507 fprintf(fp, " New Coulomb radius: %f nm (was %f nm)\n", info->rcoulomb[k_win], info->rcoulomb[0]);
509 if (bRefinedVdW)
510 fprintf(fp, " New Van der Waals radius: %f nm (was %f nm)\n", info->rvdw[k_win], info->rvdw[0]);
512 if (bRefinedGrid)
513 fprintf(fp, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n", info->nkx[k_win], info->nky[k_win], info->nkz[k_win],
514 info->nkx[0], info->nky[0], info->nkz[0]);
516 if (bCanUseOrigTPR && ntprs > 1)
517 fprintf(fp, "and original PME settings.\n");
519 fflush(fp);
521 /* Return the index of the mdp file that showed the highest performance
522 * and the optimal number of PME nodes */
523 *index_tpr = k_win;
524 *npme_optimal = winPME;
526 return bCanUseOrigTPR;
530 /* Get the commands we need to set up the runs from environment variables */
531 static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char cmd_np[],
532 char *cmd_mdrun[], int repeats)
534 char *command=NULL;
535 char *cp;
536 char *cp2;
537 char line[STRLEN];
538 FILE *fp;
539 const char def_mpirun[] = "mpirun";
540 const char def_mdrun[] = "mdrun";
541 const char filename[] = "benchtest.log";
542 const char match_mpi[] = "NNODES=";
543 const char match_mdrun[]= "Program: ";
544 const char empty_mpirun[] = "";
545 gmx_bool bMdrun = FALSE;
546 gmx_bool bMPI = FALSE;
549 /* Get the commands we need to set up the runs from environment variables */
550 if (!bThreads)
552 if ( (cp = getenv("MPIRUN")) != NULL)
553 *cmd_mpirun = strdup(cp);
554 else
555 *cmd_mpirun = strdup(def_mpirun);
557 else
559 *cmd_mpirun = strdup(empty_mpirun);
562 if ( (cp = getenv("MDRUN" )) != NULL )
563 *cmd_mdrun = strdup(cp);
564 else
565 *cmd_mdrun = strdup(def_mdrun);
568 /* If no simulations have to be performed, we are done here */
569 if (repeats <= 0)
570 return;
572 /* Run a small test to see whether mpirun + mdrun work */
573 fprintf(stdout, "Making sure that mdrun can be executed. ");
574 if (bThreads)
576 snew(command, strlen(*cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
577 sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", *cmd_mdrun, cmd_np, filename);
579 else
581 snew(command, strlen(*cmd_mpirun) + strlen(cmd_np) + strlen(*cmd_mdrun) + strlen(filename) + 50);
582 sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", *cmd_mpirun, cmd_np, *cmd_mdrun, filename);
584 fprintf(stdout, "Trying '%s' ... ", command);
585 make_backup(filename);
586 gmx_system_call(command);
588 /* Check if we find the characteristic string in the output: */
589 if (!gmx_fexist(filename))
590 gmx_fatal(FARGS, "Output from test run could not be found.");
592 fp = fopen(filename, "r");
593 /* We need to scan the whole output file, since sometimes the queuing system
594 * also writes stuff to stdout/err */
595 while ( !feof(fp) )
597 cp2=fgets(line, STRLEN, fp);
598 if (cp2!=NULL)
600 if ( str_starts(line, match_mdrun) )
601 bMdrun = TRUE;
602 if ( str_starts(line, match_mpi) )
603 bMPI = TRUE;
606 fclose(fp);
608 if (bThreads)
610 if (bMPI)
612 gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
613 "(%s)\n"
614 "seems to have been compiled with MPI instead.",
615 *cmd_mdrun);
618 else
620 if (bMdrun && !bMPI)
622 gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
623 "(%s)\n"
624 "seems to have been compiled without MPI support.",
625 *cmd_mdrun);
629 if (!bMdrun)
631 gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
632 filename);
635 fprintf(stdout, "passed.\n");
637 /* Clean up ... */
638 remove(filename);
639 sfree(command);
643 static void launch_simulation(
644 gmx_bool bLaunch, /* Should the simulation be launched? */
645 FILE *fp, /* General log file */
646 gmx_bool bThreads, /* whether to use threads */
647 char *cmd_mpirun, /* Command for mpirun */
648 char *cmd_np, /* Switch for -np or -nt or empty */
649 char *cmd_mdrun, /* Command for mdrun */
650 char *args_for_mdrun, /* Arguments for mdrun */
651 const char *simulation_tpr, /* This tpr will be simulated */
652 int nnodes, /* Number of nodes to run on */
653 int nPMEnodes) /* Number of PME nodes to use */
655 char *command;
658 /* Make enough space for the system call command,
659 * (100 extra chars for -npme ... etc. options should suffice): */
660 snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
662 /* Note that the -passall options requires args_for_mdrun to be at the end
663 * of the command line string */
664 if (bThreads)
666 sprintf(command, "%s%s-npme %d -s %s %s",
667 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
669 else
671 sprintf(command, "%s%s%s -npme %d -s %s %s",
672 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
675 fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch? "Using":"Please use", command);
676 sep_line(fp);
677 fflush(fp);
679 /* Now the real thing! */
680 if (bLaunch)
682 fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
683 sep_line(stdout);
684 fflush(stdout);
685 gmx_system_call(command);
686 thanx(fp);
691 static void modify_PMEsettings(
692 gmx_large_int_t simsteps, /* Set this value as number of time steps */
693 const char *fn_best_tpr, /* tpr file with the best performance */
694 const char *fn_sim_tpr) /* name of tpr file to be launched */
696 t_inputrec *ir;
697 t_state state;
698 gmx_mtop_t mtop;
699 char buf[200];
701 snew(ir,1);
702 read_tpx_state(fn_best_tpr,ir,&state,NULL,&mtop);
704 /* Set nsteps to the right value */
705 ir->nsteps = simsteps;
707 /* Write the tpr file which will be launched */
708 sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, gmx_large_int_pfmt);
709 fprintf(stdout,buf,ir->nsteps);
710 fflush(stdout);
711 write_tpx_state(fn_sim_tpr,ir,&state,&mtop);
713 sfree(ir);
717 typedef struct
719 int nkx, nky, nkz; /* Fourier grid */
720 real fac; /* actual scaling factor */
721 real fs; /* Fourierspacing */
722 } t_pmegrid;
725 static void copy_grid(t_pmegrid *ingrid, t_pmegrid *outgrid)
727 outgrid->nkx = ingrid->nkx;
728 outgrid->nky = ingrid->nky;
729 outgrid->nkz = ingrid->nkz;
730 outgrid->fac = ingrid->fac;
731 outgrid->fs = ingrid->fs;
734 /* Removes entry 'index' from the t_pmegrid list */
735 static void remove_from_list(
736 t_pmegrid gridlist[],
737 int *nlist, /* Length of the list */
738 int index) /* Index to remove from the list */
740 int j;
743 for (j = index; j < (*nlist - 1); j++)
745 copy_grid(&gridlist[j+1], &gridlist[j]);
747 *nlist = *nlist - 1;
751 /* Returns the index of the least necessary grid in the list.
753 * This is the one where the following conditions hold for the scaling factor:
754 * 1. this grid has the smallest distance to its neighboring grid (distance
755 * measured by fac) -> this condition is true for two grids at the same time
756 * 2. this grid (of the two) has the smaller distance to the other neighboring
757 * grid
759 * The extreme grids (the ones with the smallest and largest
760 * scaling factor) are never thrown away.
762 static int get_throwaway_index(
763 t_pmegrid gridlist[],
764 int nlist) /* Length of the list */
766 int i,index = -1;
767 real dist,mindist,d_left,d_right;
770 /* Find the two factors with the smallest mutual distance */
771 mindist = GMX_FLOAT_MAX;
772 for (i = 1; i < nlist; i++)
774 dist = fabs(gridlist[i].fac - gridlist[i-1].fac);
775 if (dist < mindist)
777 index = i;
778 mindist = dist;
781 /* index and index-1 have the smallest mutual distance */
782 if (index == 1)
784 /* Never return the first index, i.e. index == 0 */
787 else
789 d_left = fabs(gridlist[index-1].fac - gridlist[index-2].fac);
790 d_right = fabs(gridlist[index+1].fac - gridlist[index ].fac);
792 /* Return the left index if its distance to its left neighbor is shorter
793 * than the distance of the right index to its right neighbor */
794 if (d_left < d_right)
795 index--;
797 /* Never return the last index */
798 if (index == nlist-1)
799 index--;
801 return index;
805 static void make_grid_list(
806 real fmin, /* minimum scaling factor (downscaling fac) */
807 real fmax, /* maximum scaling factor (upscaling fac) */
808 int *ntprs, /* Number of tpr files to test */
809 matrix box, /* The box */
810 t_pmegrid *griduse[], /* List of grids that have to be tested */
811 real fs) /* Requested fourierspacing at a scaling factor
812 of unity */
814 real req_fac,act_fac=0,act_fs,eps;
815 rvec box_size;
816 int i,jmin=0,d,ngridall=0;
817 int nkx=0,nky=0,nkz=0;
818 int nkx_old=0,nky_old=0,nkz_old=0;
819 t_pmegrid *gridall;
820 int gridalloc,excess;
823 /* Determine length of triclinic box vectors */
824 for(d=0; d<DIM; d++)
826 box_size[d] = 0;
827 for(i=0;i<DIM;i++)
828 box_size[d] += box[d][i]*box[d][i];
829 box_size[d] = sqrt(box_size[d]);
832 gridalloc = 25;
833 snew(gridall, gridalloc);
835 fprintf(stdout, "Possible PME grid settings (apart from input file settings):\n");
836 fprintf(stdout, " nkx nky nkz max spacing scaling factor\n");
838 /* eps should provide a fine enough spacing not to miss any grid */
839 if (fmax != fmin)
840 eps = (fmax-fmin)/(100.0*(*ntprs - 1));
841 else
842 eps = 1.0/max( (*griduse)[0].nkz, max( (*griduse)[0].nkx, (*griduse)[0].nky ) );
844 for (req_fac = fmin; act_fac < fmax; req_fac += eps)
846 nkx=0;
847 nky=0;
848 nkz=0;
849 calc_grid(NULL,box,fs*req_fac,&nkx,&nky,&nkz);
850 act_fs = max(box_size[XX]/nkx,max(box_size[YY]/nky,box_size[ZZ]/nkz));
851 act_fac = act_fs/fs;
852 if ( ! ( nkx==nkx_old && nky==nky_old && nkz==nkz_old ) /* Exclude if grid is already in list */
853 && ! ( nkx==(*griduse)[0].nkx && nky==(*griduse)[0].nky && nkz==(*griduse)[0].nkz ) ) /* Exclude input file grid */
855 /* We found a new grid that will do */
856 nkx_old = nkx;
857 nky_old = nky;
858 nkz_old = nkz;
859 gridall[ngridall].nkx = nkx;
860 gridall[ngridall].nky = nky;
861 gridall[ngridall].nkz = nkz;
862 gridall[ngridall].fac = act_fac;
863 gridall[ngridall].fs = act_fs;
864 fprintf(stdout, "%5d%5d%5d %12f %12f\n",nkx,nky,nkz,act_fs,act_fac);
865 ngridall++;
866 if (ngridall >= gridalloc)
868 gridalloc += 25;
869 srenew(gridall, gridalloc);
874 /* excess is the number of grids we have to get rid of */
875 excess = ngridall+1 - *ntprs;
877 /* If we found less grids than tpr files were requested, simply test all
878 * the grid there are ... */
879 if (excess < 0)
881 fprintf(stdout, "NOTE: You requested %d tpr files, but apart from the input file grid only the\n"
882 " above listed %d suitable PME grids were found. Will test all suitable settings.\n",
883 *ntprs, ngridall);
884 *ntprs = ngridall+1;
886 else
888 if (2 == *ntprs)
890 /* We can only keep the input tpr file plus one extra tpr file.
891 * We make that choice depending on the values of -upfac and -downfac */
892 if (fmin < 1.0)
894 /* Keep the one with the -downfac as scaling factor. This is already
895 * stored in gridall[0] */
898 else
900 /* Keep the one with -upfac as scaling factor */
901 copy_grid(&(gridall[ngridall-1]), &(gridall[0]));
905 else
907 /* From the grid list throw away the unnecessary ones (keep the extremes) */
908 for (i = 0; i < excess; i++)
910 /* Get the index of the least necessary grid from the list ... */
911 jmin = get_throwaway_index(gridall, ngridall);
912 /* ... and remove the grid from the list */
913 remove_from_list(gridall, &ngridall, jmin);
918 /* The remaining list contains the grids we want to test */
919 for (i=1; i < *ntprs; i++)
920 copy_grid(&(gridall[i-1]), &((*griduse)[i]));
922 sfree(gridall);
926 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
928 /* Make additional TPR files with more computational load for the
929 * direct space processors: */
930 static void make_benchmark_tprs(
931 const char *fn_sim_tpr, /* READ : User-provided tpr file */
932 char *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files */
933 gmx_large_int_t benchsteps, /* Number of time steps for benchmark runs */
934 gmx_large_int_t statesteps, /* Step counter in checkpoint file */
935 real upfac, /* Scale rcoulomb inbetween downfac and upfac */
936 real downfac,
937 int *ntprs, /* No. of TPRs to write, each with a different
938 rcoulomb and fourierspacing. If not enough
939 grids are found, ntprs is reduced accordingly */
940 real fourierspacing, /* Basic fourierspacing from tpr input file */
941 t_inputinfo *info, /* Contains information about mdp file options */
942 FILE *fp) /* Write the output here */
944 int i,j,d;
945 t_inputrec *ir;
946 t_state state;
947 gmx_mtop_t mtop;
948 real nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials: */
949 char buf[200];
950 rvec box_size;
951 gmx_bool bNote = FALSE;
952 t_pmegrid *pmegrid=NULL; /* Grid settings for the PME grids to test */
955 sprintf(buf, "Making benchmark tpr file%s with %s time step%s",
956 *ntprs > 1? "s":"", gmx_large_int_pfmt, benchsteps>1?"s":"");
957 fprintf(stdout, buf, benchsteps);
958 if (statesteps > 0)
960 sprintf(buf, " (adding %s steps from checkpoint file)", gmx_large_int_pfmt);
961 fprintf(stdout, buf, statesteps);
962 benchsteps += statesteps;
964 fprintf(stdout, ".\n");
967 snew(ir,1);
968 read_tpx_state(fn_sim_tpr,ir,&state,NULL,&mtop);
970 /* Check if some kind of PME was chosen */
971 if (EEL_PME(ir->coulombtype) == FALSE)
972 gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
973 EELTYPE(eelPME));
975 /* Check if rcoulomb == rlist, which is necessary for plain PME. */
976 if ( (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist) )
978 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
979 EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
981 /* For other PME types, rcoulomb is allowed to be smaller than rlist */
982 else if (ir->rcoulomb > ir->rlist)
984 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
985 EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
988 /* Reduce the number of steps for the benchmarks */
989 info->orig_sim_steps = ir->nsteps;
990 ir->nsteps = benchsteps;
992 /* For PME-switch potentials, keep the radial distance of the buffer region */
993 nlist_buffer = ir->rlist - ir->rcoulomb;
995 /* Determine length of triclinic box vectors */
996 for(d=0; d<DIM; d++)
998 box_size[d] = 0;
999 for(i=0;i<DIM;i++)
1000 box_size[d] += state.box[d][i]*state.box[d][i];
1001 box_size[d] = sqrt(box_size[d]);
1004 /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
1005 info->fsx[0] = box_size[XX]/ir->nkx;
1006 info->fsy[0] = box_size[YY]/ir->nky;
1007 info->fsz[0] = box_size[ZZ]/ir->nkz;
1009 /* Put the input grid as first entry into the grid list */
1010 snew(pmegrid, *ntprs);
1011 pmegrid[0].fac = 1.00;
1012 pmegrid[0].nkx = ir->nkx;
1013 pmegrid[0].nky = ir->nky;
1014 pmegrid[0].nkz = ir->nkz;
1015 pmegrid[0].fs = max(box_size[ZZ]/ir->nkz, max(box_size[XX]/ir->nkx, box_size[YY]/ir->nky));
1017 /* If no value for the fourierspacing was provided on the command line, we
1018 * use the reconstruction from the tpr file */
1019 if (fourierspacing <= 0)
1021 fourierspacing = pmegrid[0].fs;
1024 fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing);
1026 /* Print information about settings of which some are potentially modified: */
1027 fprintf(fp, " Coulomb type : %s\n", EELTYPE(ir->coulombtype));
1028 fprintf(fp, " Grid spacing x y z : %f %f %f\n",
1029 box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz);
1030 fprintf(fp, " Van der Waals type : %s\n", EVDWTYPE(ir->vdwtype));
1031 if (EVDW_SWITCHED(ir->vdwtype))
1032 fprintf(fp, " rvdw_switch : %f nm\n", ir->rvdw_switch);
1033 if (EPME_SWITCHED(ir->coulombtype))
1034 fprintf(fp, " rlist : %f nm\n", ir->rlist);
1035 if (ir->rlistlong != max_cutoff(ir->rvdw,ir->rcoulomb))
1036 fprintf(fp, " rlistlong : %f nm\n", ir->rlistlong);
1038 /* Print a descriptive line about the tpr settings tested */
1039 fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
1040 fprintf(fp, " No. scaling rcoulomb");
1041 fprintf(fp, " nkx nky nkz");
1042 fprintf(fp, " spacing");
1043 if (evdwCUT == ir->vdwtype)
1044 fprintf(fp, " rvdw");
1045 if (EPME_SWITCHED(ir->coulombtype))
1046 fprintf(fp, " rlist");
1047 if ( ir->rlistlong != max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb)) )
1048 fprintf(fp, " rlistlong");
1049 fprintf(fp, " tpr file\n");
1052 /* Get useful PME grids if requested, the actual number of entries is
1053 * returned in npmegrid */
1054 if (*ntprs > 1)
1055 make_grid_list(downfac, upfac, ntprs, state.box, &pmegrid, fourierspacing);
1057 /* Loop to create the requested number of tpr input files */
1058 for (j = 0; j < *ntprs; j++)
1060 /* The first one is the provided tpr file, just need to modify the number
1061 * of steps, so skip the following block */
1062 if (j > 0)
1064 /* Scale the Coulomb radius */
1065 ir->rcoulomb = info->rcoulomb[0]*pmegrid[j].fac;
1067 /* Adjust other radii since various conditions neet to be fulfilled */
1068 if (eelPME == ir->coulombtype)
1070 /* plain PME, rcoulomb must be equal to rlist */
1071 ir->rlist = ir->rcoulomb;
1073 else
1075 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
1076 ir->rlist = ir->rcoulomb + nlist_buffer;
1079 if (evdwCUT == ir->vdwtype)
1081 /* For vdw cutoff, rvdw >= rlist */
1082 ir->rvdw = max(info->rvdw[0], ir->rlist);
1085 ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
1087 /* Copy the optimal grid dimensions to ir */
1088 ir->nkx = pmegrid[j].nkx;
1089 ir->nky = pmegrid[j].nky;
1090 ir->nkz = pmegrid[j].nkz;
1092 } /* end of "if (j != 0)" */
1094 /* for j==0: Save the original settings
1095 * for j >0: Save modified radii and fourier grids */
1096 info->rcoulomb[j] = ir->rcoulomb;
1097 info->rvdw[j] = ir->rvdw;
1098 info->nkx[j] = ir->nkx;
1099 info->nky[j] = ir->nky;
1100 info->nkz[j] = ir->nkz;
1101 info->rlist[j] = ir->rlist;
1102 info->rlistlong[j] = ir->rlistlong;
1103 info->fsx[j] = pmegrid[j].fs;
1104 info->fsy[j] = pmegrid[j].fs;
1105 info->fsz[j] = pmegrid[j].fs;
1107 /* Write the benchmark tpr file */
1108 strncpy(fn_bench_tprs[j],fn_sim_tpr,strlen(fn_sim_tpr)-strlen(".tpr"));
1109 sprintf(buf, "_bench%.2d.tpr", j);
1110 strcat(fn_bench_tprs[j], buf);
1111 fprintf(stdout,"Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
1112 fprintf(stdout, gmx_large_int_pfmt, ir->nsteps);
1113 if (j > 0)
1114 fprintf(stdout,", scaling factor %f\n", pmegrid[j].fac);
1115 else
1116 fprintf(stdout,", unmodified settings\n");
1118 write_tpx_state(fn_bench_tprs[j],ir,&state,&mtop);
1120 /* Write information about modified tpr settings to log file */
1121 if (j==0)
1122 fprintf(fp, "%4d%10s%10f", j, "-input-", ir->rcoulomb);
1123 else
1124 fprintf(fp, "%4d%10f%10f", j, pmegrid[j].fac, ir->rcoulomb);
1125 fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
1126 fprintf(fp, " %9f ", info->fsx[j]);
1127 if (evdwCUT == ir->vdwtype)
1128 fprintf(fp, "%10f", ir->rvdw);
1129 if (EPME_SWITCHED(ir->coulombtype))
1130 fprintf(fp, "%10f", ir->rlist);
1131 if ( info->rlistlong[0] != max_cutoff(info->rlist[0],max_cutoff(info->rvdw[0],info->rcoulomb[0])) )
1132 fprintf(fp, "%10f", ir->rlistlong);
1133 fprintf(fp, " %-14s\n",fn_bench_tprs[j]);
1135 /* Make it clear to the user that some additional settings were modified */
1136 if ( !is_equal(ir->rvdw , info->rvdw[0])
1137 || !is_equal(ir->rlistlong, info->rlistlong[0]) )
1139 bNote = TRUE;
1142 if (bNote)
1143 fprintf(fp, "\nNote that in addition to rcoulomb and the fourier grid\n"
1144 "also other input settings were changed (see table above).\n"
1145 "Please check if the modified settings are appropriate.\n");
1146 fflush(stdout);
1147 fflush(fp);
1148 sfree(ir);
1152 /* Whether these files are written depends on tpr (or mdp) settings,
1153 * not on mdrun command line options! */
1154 static gmx_bool tpr_triggers_file(const char *opt)
1156 if ( (0 == strcmp(opt, "-pf"))
1157 || (0 == strcmp(opt, "-px")) )
1158 return TRUE;
1159 else
1160 return FALSE;
1164 /* Rename the files we want to keep to some meaningful filename and
1165 * delete the rest */
1166 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
1167 int nPMEnodes, int nr, gmx_bool bKeepStderr)
1169 char numstring[STRLEN];
1170 char newfilename[STRLEN];
1171 const char *fn=NULL;
1172 int i;
1173 const char *opt;
1176 fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
1178 for (i=0; i<nfile; i++)
1180 opt = (char *)fnm[i].opt;
1181 if ( strcmp(opt, "-p") == 0 )
1183 /* do nothing; keep this file */
1186 else if (strcmp(opt, "-bg") == 0)
1188 /* Give the log file a nice name so one can later see which parameters were used */
1189 numstring[0] = '\0';
1190 if (nr > 0)
1191 sprintf(numstring, "_%d", nr);
1192 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg",nfile,fnm), k, nnodes, nPMEnodes, numstring);
1193 if (gmx_fexist(opt2fn("-bg",nfile,fnm)))
1195 fprintf(stdout, "renaming log file to %s\n", newfilename);
1196 make_backup(newfilename);
1197 rename(opt2fn("-bg",nfile,fnm), newfilename);
1200 else if (strcmp(opt, "-err") == 0)
1202 /* This file contains the output of stderr. We want to keep it in
1203 * cases where there have been problems. */
1204 fn = opt2fn(opt, nfile, fnm);
1205 numstring[0] = '\0';
1206 if (nr > 0)
1207 sprintf(numstring, "_%d", nr);
1208 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
1209 if (gmx_fexist(fn))
1211 if (bKeepStderr)
1213 fprintf(stdout, "Saving stderr output in %s\n", newfilename);
1214 make_backup(newfilename);
1215 rename(fn, newfilename);
1217 else
1219 fprintf(stdout, "Deleting %s\n", fn);
1220 remove(fn);
1224 /* Delete the files which are created for each benchmark run: (options -b*) */
1225 else if ( ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt,nfile,fnm) || !is_optional(&fnm[i])) )
1226 || tpr_triggers_file(opt) )
1228 fn = opt2fn(opt, nfile, fnm);
1229 if (gmx_fexist(fn))
1231 fprintf(stdout, "Deleting %s\n", fn);
1232 remove(fn);
1239 /* Returns the largest common factor of n1 and n2 */
1240 static int largest_common_factor(int n1, int n2)
1242 int factor, nmax;
1244 nmax = min(n1, n2);
1245 for (factor=nmax; factor > 0; factor--)
1247 if ( 0==(n1 % factor) && 0==(n2 % factor) )
1249 return(factor);
1252 return 0; /* one for the compiler */
1255 enum {eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr};
1257 /* Create a list of numbers of PME nodes to test */
1258 static void make_npme_list(
1259 const char *npmevalues_opt, /* Make a complete list with all
1260 * possibilities or a short list that keeps only
1261 * reasonable numbers of PME nodes */
1262 int *nentries, /* Number of entries we put in the nPMEnodes list */
1263 int *nPMEnodes[], /* Each entry contains the value for -npme */
1264 int nnodes, /* Total number of nodes to do the tests on */
1265 int minPMEnodes, /* Minimum number of PME nodes */
1266 int maxPMEnodes) /* Maximum number of PME nodes */
1268 int i,npme,npp;
1269 int min_factor=1; /* We request that npp and npme have this minimal
1270 * largest common factor (depends on npp) */
1271 int nlistmax; /* Max. list size */
1272 int nlist; /* Actual number of entries in list */
1273 int eNPME=0;
1276 /* Do we need to check all possible values for -npme or is a reduced list enough? */
1277 if ( 0 == strcmp(npmevalues_opt, "all") )
1279 eNPME = eNpmeAll;
1281 else if ( 0 == strcmp(npmevalues_opt, "subset") )
1283 eNPME = eNpmeSubset;
1285 else if ( 0 == strcmp(npmevalues_opt, "auto") )
1287 if (nnodes <= 64)
1288 eNPME = eNpmeAll;
1289 else if (nnodes < 128)
1290 eNPME = eNpmeReduced;
1291 else
1292 eNPME = eNpmeSubset;
1294 else
1296 gmx_fatal(FARGS, "Unknown option for -npme in make_npme_list");
1299 /* Calculate how many entries we could possibly have (in case of -npme all) */
1300 if (nnodes > 2)
1302 nlistmax = maxPMEnodes - minPMEnodes + 3;
1303 if (0 == minPMEnodes)
1304 nlistmax--;
1306 else
1307 nlistmax = 1;
1309 /* Now make the actual list which is at most of size nlist */
1310 snew(*nPMEnodes, nlistmax);
1311 nlist = 0; /* start counting again, now the real entries in the list */
1312 for (i = 0; i < nlistmax - 2; i++)
1314 npme = maxPMEnodes - i;
1315 npp = nnodes-npme;
1316 switch (eNPME)
1318 case eNpmeAll:
1319 min_factor = 1;
1320 break;
1321 case eNpmeReduced:
1322 min_factor = 2;
1323 break;
1324 case eNpmeSubset:
1325 /* For 2d PME we want a common largest factor of at least the cube
1326 * root of the number of PP nodes */
1327 min_factor = (int) pow(npp, 1.0/3.0);
1328 break;
1329 default:
1330 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1331 break;
1333 if (largest_common_factor(npp, npme) >= min_factor)
1335 (*nPMEnodes)[nlist] = npme;
1336 nlist++;
1339 /* We always test 0 PME nodes and the automatic number */
1340 *nentries = nlist + 2;
1341 (*nPMEnodes)[nlist ] = 0;
1342 (*nPMEnodes)[nlist+1] = -1;
1344 fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1345 for (i=0; i<*nentries-1; i++)
1346 fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1347 fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1351 /* Allocate memory to store the performance data */
1352 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1354 int i, j, k;
1357 for (k=0; k<ntprs; k++)
1359 snew(perfdata[k], datasets);
1360 for (i=0; i<datasets; i++)
1362 for (j=0; j<repeats; j++)
1364 snew(perfdata[k][i].Gcycles , repeats);
1365 snew(perfdata[k][i].ns_per_day, repeats);
1366 snew(perfdata[k][i].PME_f_load, repeats);
1373 static void do_the_tests(
1374 FILE *fp, /* General g_tune_pme output file */
1375 char **tpr_names, /* Filenames of the input files to test */
1376 int maxPMEnodes, /* Max fraction of nodes to use for PME */
1377 int minPMEnodes, /* Min fraction of nodes to use for PME */
1378 int npme_fixed, /* If >= -1, test fixed number of PME
1379 * nodes only */
1380 const char *npmevalues_opt, /* Which -npme values should be tested */
1381 t_perf **perfdata, /* Here the performace data is stored */
1382 int *pmeentries, /* Entries in the nPMEnodes list */
1383 int repeats, /* Repeat each test this often */
1384 int nnodes, /* Total number of nodes = nPP + nPME */
1385 int nr_tprs, /* Total number of tpr files to test */
1386 gmx_bool bThreads, /* Threads or MPI? */
1387 char *cmd_mpirun, /* mpirun command string */
1388 char *cmd_np, /* "-np", "-n", whatever mpirun needs */
1389 char *cmd_mdrun, /* mdrun command string */
1390 char *cmd_args_bench, /* arguments for mdrun in a string */
1391 const t_filenm *fnm, /* List of filenames from command line */
1392 int nfile, /* Number of files specified on the cmdl. */
1393 int sim_part, /* For checkpointing */
1394 int presteps, /* DLB equilibration steps, is checked */
1395 gmx_large_int_t cpt_steps) /* Time step counter in the checkpoint */
1397 int i,nr,k,ret,count=0,totaltests;
1398 int *nPMEnodes=NULL;
1399 t_perf *pd=NULL;
1400 int cmdline_length;
1401 char *command, *cmd_stub;
1402 char buf[STRLEN];
1403 gmx_bool bResetProblem=FALSE;
1406 /* This string array corresponds to the eParselog enum type at the start
1407 * of this file */
1408 const char* ParseLog[] = {"OK.",
1409 "Logfile not found!",
1410 "No timings, logfile truncated?",
1411 "Run was terminated.",
1412 "Counters were not reset properly.",
1413 "No DD grid found for these settings.",
1414 "TPX version conflict!",
1415 "mdrun was not started in parallel!",
1416 "A fatal error occured!" };
1417 char str_PME_f_load[13];
1420 /* Allocate space for the mdrun command line. 100 extra characters should
1421 be more than enough for the -npme etcetera arguments */
1422 cmdline_length = strlen(cmd_mpirun)
1423 + strlen(cmd_np)
1424 + strlen(cmd_mdrun)
1425 + strlen(cmd_args_bench)
1426 + strlen(tpr_names[0]) + 100;
1427 snew(command , cmdline_length);
1428 snew(cmd_stub, cmdline_length);
1430 /* Construct the part of the command line that stays the same for all tests: */
1431 if (bThreads)
1433 sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1435 else
1437 sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1440 /* Create a list of numbers of PME nodes to test */
1441 if (npme_fixed < -1)
1443 make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1444 nnodes, minPMEnodes, maxPMEnodes);
1446 else
1448 *pmeentries = 1;
1449 snew(nPMEnodes, 1);
1450 nPMEnodes[0] = npme_fixed;
1451 fprintf(stderr, "Will use a fixed number of %d PME-only nodes.\n", nPMEnodes[0]);
1454 if (0 == repeats)
1456 fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1457 fclose(fp);
1458 finalize(opt2fn("-p", nfile, fnm));
1459 exit(0);
1462 /* Allocate one dataset for each tpr input file: */
1463 init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1465 /*****************************************/
1466 /* Main loop over all tpr files to test: */
1467 /*****************************************/
1468 totaltests = nr_tprs*(*pmeentries)*repeats;
1469 for (k=0; k<nr_tprs;k++)
1471 fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1472 fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n");
1473 /* Loop over various numbers of PME nodes: */
1474 for (i = 0; i < *pmeentries; i++)
1476 pd = &perfdata[k][i];
1478 /* Loop over the repeats for each scenario: */
1479 for (nr = 0; nr < repeats; nr++)
1481 pd->nPMEnodes = nPMEnodes[i];
1483 /* Add -npme and -s to the command line and save it. Note that
1484 * the -passall (if set) options requires cmd_args_bench to be
1485 * at the end of the command line string */
1486 snew(pd->mdrun_cmd_line, cmdline_length);
1487 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1488 cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1490 /* Do a benchmark simulation: */
1491 if (repeats > 1)
1492 sprintf(buf, ", pass %d/%d", nr+1, repeats);
1493 else
1494 buf[0]='\0';
1495 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1496 (100.0*count)/totaltests,
1497 k+1, nr_tprs, i+1, *pmeentries, buf);
1498 make_backup(opt2fn("-err",nfile,fnm));
1499 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err",nfile,fnm));
1500 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1501 gmx_system_call(command);
1503 /* Collect the performance data from the log file; also check stderr
1504 * for fatal errors */
1505 ret = parse_logfile(opt2fn("-bg",nfile,fnm), opt2fn("-err",nfile,fnm),
1506 pd, nr, presteps, cpt_steps, nnodes);
1507 if ((presteps > 0) && (ret == eParselogResetProblem))
1508 bResetProblem = TRUE;
1510 if (-1 == pd->nPMEnodes)
1511 sprintf(buf, "(%3d)", pd->guessPME);
1512 else
1513 sprintf(buf, " ");
1515 /* Nicer output */
1516 if (pd->PME_f_load[nr] > 0.0)
1517 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1518 else
1519 sprintf(str_PME_f_load, "%s", " - ");
1521 /* Write the data we got to disk */
1522 fprintf(fp, "%4d%s %12.3f %12.3f %s %s", pd->nPMEnodes,
1523 buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1524 if (! (ret==eParselogOK || ret==eParselogNoDDGrid || ret==eParselogNotFound) )
1525 fprintf(fp, " Check %s file for problems.", ret==eParselogFatal? "err":"log");
1526 fprintf(fp, "\n");
1527 fflush(fp);
1528 count++;
1530 /* Do some cleaning up and delete the files we do not need any more */
1531 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret==eParselogFatal);
1533 /* If the first run with this number of processors already failed, do not try again: */
1534 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1536 fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1537 count += repeats-(nr+1);
1538 break;
1540 } /* end of repeats loop */
1541 } /* end of -npme loop */
1542 } /* end of tpr file loop */
1543 if (bResetProblem)
1545 sep_line(fp);
1546 fprintf(fp, "WARNING: The cycle and time step counters could not be reset\n"
1547 "properly. The reason could be that mpirun did not manage to\n"
1548 "export the environment variable GMX_RESET_COUNTER. You might\n"
1549 "have to give a special switch to mpirun for that.\n"
1550 "Alternatively, you can manually set GMX_RESET_COUNTER to the\n"
1551 "value normally provided by -presteps.");
1552 sep_line(fp);
1554 sfree(command);
1555 sfree(cmd_stub);
1559 static void check_input(
1560 int nnodes,
1561 int repeats,
1562 int *ntprs,
1563 real *upfac,
1564 real *downfac,
1565 real maxPMEfraction,
1566 real minPMEfraction,
1567 int npme_fixed,
1568 real fourierspacing,
1569 gmx_large_int_t bench_nsteps,
1570 const t_filenm *fnm,
1571 int nfile,
1572 int sim_part,
1573 int presteps,
1574 int npargs,
1575 t_pargs *pa)
1577 /* Make sure the input file exists */
1578 if (!gmx_fexist(opt2fn("-s",nfile,fnm)))
1579 gmx_fatal(FARGS, "File %s not found.", opt2fn("-s",nfile,fnm));
1581 /* Make sure that the checkpoint file is not overwritten by the benchmark runs */
1582 if ( (0 == strcmp(opt2fn("-cpi",nfile,fnm), opt2fn("-cpo",nfile,fnm)) ) && (sim_part > 1) )
1583 gmx_fatal(FARGS, "Checkpoint input and output file must not be identical,\nbecause then the input file might change during the benchmarks.");
1585 /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1586 if (repeats < 0)
1587 gmx_fatal(FARGS, "Number of repeats < 0!");
1589 /* Check number of nodes */
1590 if (nnodes < 1)
1591 gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
1593 /* Automatically choose -ntpr if not set */
1594 if (*ntprs < 1)
1596 if (nnodes < 16)
1597 *ntprs = 1;
1598 else
1599 *ntprs = 3;
1600 fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs==1?"":"s");
1602 else
1604 if (1 == *ntprs)
1605 fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1608 if ( is_equal(*downfac,*upfac) && (*ntprs > 2) && !(fourierspacing > 0.0))
1610 fprintf(stderr, "WARNING: Resetting -ntpr to 2 since both scaling factors are the same.\n"
1611 "Please choose upfac unequal to downfac to test various PME grid settings\n");
1612 *ntprs = 2;
1615 /* Check whether max and min fraction are within required values */
1616 if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1617 gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1618 if (minPMEfraction > 0.5 || minPMEfraction < 0)
1619 gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1620 if (maxPMEfraction < minPMEfraction)
1621 gmx_fatal(FARGS, "-max must be larger or equal to -min");
1623 /* Check whether the number of steps - if it was set - has a reasonable value */
1624 if (bench_nsteps < 0)
1625 gmx_fatal(FARGS, "Number of steps must be positive.");
1627 if (bench_nsteps > 10000 || bench_nsteps < 100)
1629 fprintf(stderr, "WARNING: steps=");
1630 fprintf(stderr, gmx_large_int_pfmt, bench_nsteps);
1631 fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100)? "few" : "many");
1634 if (presteps < 0)
1636 gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1639 if (*upfac <= 0.0 || *downfac <= 0.0 || *downfac > *upfac)
1640 gmx_fatal(FARGS, "Both scaling factors must be larger than zero and upper\n"
1641 "scaling limit (%f) must be larger than lower limit (%f).",
1642 *upfac, *downfac);
1644 if (*downfac < 0.75 || *upfac > 1.25)
1645 fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1647 if (fourierspacing < 0)
1648 gmx_fatal(FARGS, "Please choose a positive value for fourierspacing.");
1650 /* Make shure that the scaling factor options are compatible with the number of tprs */
1651 if ( (*ntprs < 3) && ( opt2parg_bSet("-upfac",npargs,pa) || opt2parg_bSet("-downfac",npargs,pa) ) )
1653 if (opt2parg_bSet("-upfac",npargs,pa) && opt2parg_bSet("-downfac",npargs,pa) && !is_equal(*upfac,*downfac))
1655 fprintf(stderr, "NOTE: Specify -ntpr > 2 for both scaling factors to take effect.\n"
1656 "(upfac=%f, downfac=%f)\n", *upfac, *downfac);
1658 if (opt2parg_bSet("-upfac",npargs,pa))
1659 *downfac = *upfac;
1660 if (opt2parg_bSet("-downfac",npargs,pa))
1661 *upfac = *downfac;
1663 if ( (2 == *ntprs) && (opt2parg_bSet("-downfac",npargs,pa)) && !is_equal(*downfac, 1.0))
1665 *upfac = 1.0;
1668 /* If a fixed number of PME nodes is set we do rcoulomb and PME gird tuning
1669 * only. We need to check whether the requested number of PME-only nodes
1670 * makes sense. */
1671 if (npme_fixed > -1)
1673 /* No more than 50% of all nodes can be assigned as PME-only nodes. */
1674 if (2*npme_fixed > nnodes)
1676 gmx_fatal(FARGS, "Cannot have more than %d PME-only nodes for a total of %d nodes (you chose %d).\n",
1677 nnodes/2, nnodes, npme_fixed);
1679 if ((npme_fixed > 0) && (5*npme_fixed < nnodes))
1681 fprintf(stderr, "WARNING: Only %g percent of the nodes are assigned as PME-only nodes.\n",
1682 100.0*((real)npme_fixed / (real)nnodes));
1684 if (opt2parg_bSet("-min",npargs,pa) || opt2parg_bSet("-max",npargs,pa))
1686 fprintf(stderr, "NOTE: The -min, -max, and -npme options have no effect when a\n"
1687 " fixed number of PME-only nodes is requested with -fix.\n");
1693 /* Returns TRUE when "opt" is a switch for g_tune_pme itself */
1694 static gmx_bool is_main_switch(char *opt)
1696 if ( (0 == strcmp(opt,"-s" ))
1697 || (0 == strcmp(opt,"-p" ))
1698 || (0 == strcmp(opt,"-launch" ))
1699 || (0 == strcmp(opt,"-r" ))
1700 || (0 == strcmp(opt,"-ntpr" ))
1701 || (0 == strcmp(opt,"-max" ))
1702 || (0 == strcmp(opt,"-min" ))
1703 || (0 == strcmp(opt,"-upfac" ))
1704 || (0 == strcmp(opt,"-downfac" ))
1705 || (0 == strcmp(opt,"-fix" ))
1706 || (0 == strcmp(opt,"-four" ))
1707 || (0 == strcmp(opt,"-steps" ))
1708 || (0 == strcmp(opt,"-simsteps" ))
1709 || (0 == strcmp(opt,"-resetstep"))
1710 || (0 == strcmp(opt,"-so" ))
1711 || (0 == strcmp(opt,"-npstring" ))
1712 || (0 == strcmp(opt,"-npme" ))
1713 || (0 == strcmp(opt,"-passall" )) )
1714 return TRUE;
1716 return FALSE;
1720 /* Returns TRUE when "opt" is needed at launch time */
1721 static gmx_bool is_launch_option(char *opt, gmx_bool bSet)
1723 if (bSet)
1724 return TRUE;
1725 else
1726 return FALSE;
1730 /* Returns TRUE when "opt" is needed at launch time */
1731 static gmx_bool is_launch_file(char *opt, gmx_bool bSet)
1733 /* We need all options that were set on the command line
1734 * and that do not start with -b */
1735 if (0 == strncmp(opt,"-b", 2))
1736 return FALSE;
1738 if (bSet)
1739 return TRUE;
1740 else
1741 return FALSE;
1745 /* Returns TRUE when "opt" gives an option needed for the benchmarks runs */
1746 static gmx_bool is_bench_option(char *opt, gmx_bool bSet)
1748 /* If option is set, we might need it for the benchmarks.
1749 * This includes -cpi */
1750 if (bSet)
1752 if ( (0 == strcmp(opt, "-append" ))
1753 || (0 == strcmp(opt, "-maxh" ))
1754 || (0 == strcmp(opt, "-deffnm" ))
1755 || (0 == strcmp(opt, "-resethway"))
1756 || (0 == strcmp(opt, "-cpnum" )) )
1757 return FALSE;
1758 else
1759 return TRUE;
1761 else
1762 return FALSE;
1766 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1767 static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput)
1769 /* All options starting with "-b" are for _b_enchmark files exclusively */
1770 if (0 == strncmp(opt,"-b", 2))
1772 if (!bOptional || bSet)
1773 return TRUE;
1774 else
1775 return FALSE;
1777 else
1779 if (bIsOutput)
1780 return FALSE;
1781 else
1782 if (bSet) /* These are additional input files like -cpi -ei */
1783 return TRUE;
1784 else
1785 return FALSE;
1790 /* Adds 'buf' to 'str' */
1791 static void add_to_string(char **str, char *buf)
1793 int len;
1796 len = strlen(*str) + strlen(buf) + 1;
1797 srenew(*str, len);
1798 strcat(*str, buf);
1802 /* Create the command line for the benchmark as well as for the real run */
1803 static void create_command_line_snippets(
1804 gmx_bool bThreads,
1805 int presteps,
1806 int nfile,
1807 t_filenm fnm[],
1808 int npargs,
1809 t_pargs *pa,
1810 const char *procstring, /* How to pass the number of processors to $MPIRUN */
1811 char *cmd_np[], /* Actual command line snippet, e.g. '-np <N>' */
1812 char *cmd_args_bench[], /* command line arguments for benchmark runs */
1813 char *cmd_args_launch[], /* command line arguments for simulation run */
1814 char extra_args[]) /* Add this to the end of the command line */
1816 int i;
1817 char *opt;
1818 const char *name;
1819 char *np_or_nt;
1820 #define BUFLENGTH 255
1821 char buf[BUFLENGTH];
1822 char strbuf[BUFLENGTH];
1823 char strbuf2[BUFLENGTH];
1826 if (bThreads)
1827 np_or_nt=strdup("-nt");
1828 else
1829 np_or_nt=strdup("-np");
1831 /* strlen needs at least '\0' as a string: */
1832 snew(*cmd_args_bench ,1);
1833 snew(*cmd_args_launch,1);
1834 *cmd_args_launch[0]='\0';
1835 *cmd_args_bench[0] ='\0';
1838 /*******************************************/
1839 /* 1. Process other command line arguments */
1840 /*******************************************/
1841 for (i=0; i<npargs; i++)
1843 /* What command line switch are we currently processing: */
1844 opt = (char *)pa[i].option;
1845 /* Skip options not meant for mdrun */
1846 if (!is_main_switch(opt))
1848 /* Boolean arguments need to be generated in the -[no]argname format */
1849 if (pa[i].type == etBOOL)
1851 sprintf(strbuf,"-%s%s ",*pa[i].u.b ? "" : "no",opt+1);
1853 else
1855 /* Print it to a string buffer, strip away trailing whitespaces that pa_val also returns: */
1856 sprintf(strbuf2, "%s", pa_val(&pa[i],buf,BUFLENGTH));
1857 rtrim(strbuf2);
1858 sprintf(strbuf, "%s %s ", opt, strbuf2);
1861 /* We need the -np (or -nt) switch in a separate buffer - whether or not it was set! */
1862 if (0 == strcmp(opt,np_or_nt))
1864 if (strcmp(procstring, "none")==0 && !bThreads)
1866 /* Omit -np <N> entirely */
1867 snew(*cmd_np, 2);
1868 sprintf(*cmd_np, " ");
1870 else
1872 /* This is the normal case with -np <N> */
1873 snew(*cmd_np, strlen(procstring)+strlen(strbuf2)+4);
1874 sprintf(*cmd_np, " %s %s ", bThreads? "-nt" : procstring, strbuf2);
1877 else
1879 if (is_bench_option(opt,pa[i].bSet))
1880 add_to_string(cmd_args_bench, strbuf);
1882 if (is_launch_option(opt,pa[i].bSet))
1883 add_to_string(cmd_args_launch, strbuf);
1887 if (presteps > 0)
1889 /* Add equilibration steps to benchmark options */
1890 sprintf(strbuf, "-resetstep %d ", presteps);
1891 add_to_string(cmd_args_bench, strbuf);
1894 /********************/
1895 /* 2. Process files */
1896 /********************/
1897 for (i=0; i<nfile; i++)
1899 opt = (char *)fnm[i].opt;
1900 name = opt2fn(opt,nfile,fnm);
1902 /* Strbuf contains the options, now let's sort out where we need that */
1903 sprintf(strbuf, "%s %s ", opt, name);
1905 /* Skip options not meant for mdrun */
1906 if (!is_main_switch(opt))
1909 if ( is_bench_file(opt, opt2bSet(opt,nfile,fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1911 /* All options starting with -b* need the 'b' removed,
1912 * therefore overwrite strbuf */
1913 if (0 == strncmp(opt, "-b", 2))
1914 sprintf(strbuf, "-%s %s ", &opt[2], name);
1916 add_to_string(cmd_args_bench, strbuf);
1919 if ( is_launch_file(opt,opt2bSet(opt,nfile,fnm)) )
1920 add_to_string(cmd_args_launch, strbuf);
1924 add_to_string(cmd_args_bench , extra_args);
1925 add_to_string(cmd_args_launch, extra_args);
1926 #undef BUFLENGTH
1930 /* Set option opt */
1931 static void setopt(const char *opt,int nfile,t_filenm fnm[])
1933 int i;
1935 for(i=0; (i<nfile); i++)
1936 if (strcmp(opt,fnm[i].opt)==0)
1937 fnm[i].flag |= ffSET;
1941 static void couple_files_options(int nfile, t_filenm fnm[])
1943 int i;
1944 gmx_bool bSet,bBench;
1945 char *opt;
1946 char buf[20];
1949 for (i=0; i<nfile; i++)
1951 opt = (char *)fnm[i].opt;
1952 bSet = ((fnm[i].flag & ffSET) != 0);
1953 bBench = (0 == strncmp(opt,"-b", 2));
1955 /* Check optional files */
1956 /* If e.g. -eo is set, then -beo also needs to be set */
1957 if (is_optional(&fnm[i]) && bSet && !bBench)
1959 sprintf(buf, "-b%s", &opt[1]);
1960 setopt(buf,nfile,fnm);
1962 /* If -beo is set, then -eo also needs to be! */
1963 if (is_optional(&fnm[i]) && bSet && bBench)
1965 sprintf(buf, "-%s", &opt[2]);
1966 setopt(buf,nfile,fnm);
1972 static double gettime()
1974 #ifdef HAVE_GETTIMEOFDAY
1975 struct timeval t;
1976 double seconds;
1978 gettimeofday(&t,NULL);
1980 seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
1982 return seconds;
1983 #else
1984 double seconds;
1986 seconds = time(NULL);
1988 return seconds;
1989 #endif
1993 #define BENCHSTEPS (1000)
1995 int gmx_tune_pme(int argc,char *argv[])
1997 const char *desc[] = {
1998 "For a given number [TT]-np[tt] or [TT]-nt[tt] of processors/threads, this program systematically",
1999 "times [TT]mdrun[tt] with various numbers of PME-only nodes and determines",
2000 "which setting is fastest. It will also test whether performance can",
2001 "be enhanced by shifting load from the reciprocal to the real space",
2002 "part of the Ewald sum. ",
2003 "Simply pass your [TT].tpr[tt] file to [TT]g_tune_pme[tt] together with other options",
2004 "for [TT]mdrun[tt] as needed.[PAR]",
2005 "Which executables are used can be set in the environment variables",
2006 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
2007 "will be used as defaults. Note that for certain MPI frameworks you",
2008 "need to provide a machine- or hostfile. This can also be passed",
2009 "via the MPIRUN variable, e.g.[PAR]",
2010 "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
2011 "Please call [TT]g_tune_pme[tt] with the normal options you would pass to",
2012 "[TT]mdrun[tt] and add [TT]-np[tt] for the number of processors to perform the",
2013 "tests on, or [TT]-nt[tt] for the number of threads. You can also add [TT]-r[tt]",
2014 "to repeat each test several times to get better statistics. [PAR]",
2015 "[TT]g_tune_pme[tt] can test various real space / reciprocal space workloads",
2016 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
2017 "written with enlarged cutoffs and smaller fourier grids respectively.",
2018 "Typically, the first test (number 0) will be with the settings from the input",
2019 "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have cutoffs multiplied",
2020 "by (and at the same time fourier grid dimensions divided by) the scaling",
2021 "factor [TT]-fac[tt] (default 1.2). The remaining [TT].tpr[tt] files will have about ",
2022 "equally-spaced values in between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
2023 "if you just want to find the optimal number of PME-only nodes; in that case",
2024 "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
2025 "For the benchmark runs, the default of 1000 time steps should suffice for most",
2026 "MD systems. The dynamic load balancing needs about 100 time steps",
2027 "to adapt to local load imbalances, therefore the time step counters",
2028 "are by default reset after 100 steps. For large systems",
2029 "(>1M atoms) you may have to set [TT]-resetstep[tt] to a higher value.",
2030 "From the 'DD' load imbalance entries in the md.log output file you",
2031 "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
2032 "[TT]g_tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
2033 "After calling [TT]mdrun[tt] several times, detailed performance information",
2034 "is available in the output file [TT]perf.out.[tt] ",
2035 "[BB]Note[bb] that during the benchmarks, a couple of temporary files are written",
2036 "(options [TT]-b[tt]*), these will be automatically deleted after each test.[PAR]",
2037 "If you want the simulation to be started automatically with the",
2038 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
2041 int nnodes =1;
2042 int repeats=2;
2043 int pmeentries=0; /* How many values for -npme do we actually test for each tpr file */
2044 real maxPMEfraction=0.50;
2045 real minPMEfraction=0.25;
2046 int maxPMEnodes, minPMEnodes;
2047 int npme_fixed=-2; /* If >= -1, use only this number
2048 * of PME-only nodes */
2049 real downfac=1.0,upfac=1.2;
2050 int ntprs=0;
2051 real fs=0.0; /* 0 indicates: not set by the user */
2052 gmx_large_int_t bench_nsteps=BENCHSTEPS;
2053 gmx_large_int_t new_sim_nsteps=-1; /* -1 indicates: not set by the user */
2054 gmx_large_int_t cpt_steps=0; /* Step counter in .cpt input file */
2055 int presteps=100; /* Do a full cycle reset after presteps steps */
2056 gmx_bool bOverwrite=FALSE, bKeepTPR;
2057 gmx_bool bLaunch=FALSE;
2058 gmx_bool bPassAll=FALSE;
2059 char *ExtraArgs=NULL;
2060 char **tpr_names=NULL;
2061 const char *simulation_tpr=NULL;
2062 int best_npme, best_tpr;
2063 int sim_part = 1; /* For benchmarks with checkpoint files */
2065 /* Default program names if nothing else is found */
2066 char *cmd_mpirun=NULL, *cmd_mdrun=NULL;
2067 char *cmd_args_bench, *cmd_args_launch;
2068 char *cmd_np=NULL;
2070 t_perf **perfdata=NULL;
2071 t_inputinfo *info;
2072 int i;
2073 FILE *fp;
2074 t_commrec *cr;
2076 /* Print out how long the tuning took */
2077 double seconds;
2079 static t_filenm fnm[] = {
2080 /* g_tune_pme */
2081 { efOUT, "-p", "perf", ffWRITE },
2082 { efLOG, "-err", "errors", ffWRITE },
2083 { efTPX, "-so", "tuned", ffWRITE },
2084 /* mdrun: */
2085 { efTPX, NULL, NULL, ffREAD },
2086 { efTRN, "-o", NULL, ffWRITE },
2087 { efXTC, "-x", NULL, ffOPTWR },
2088 { efCPT, "-cpi", NULL, ffOPTRD },
2089 { efCPT, "-cpo", NULL, ffOPTWR },
2090 { efSTO, "-c", "confout", ffWRITE },
2091 { efEDR, "-e", "ener", ffWRITE },
2092 { efLOG, "-g", "md", ffWRITE },
2093 { efXVG, "-dhdl", "dhdl", ffOPTWR },
2094 { efXVG, "-field", "field", ffOPTWR },
2095 { efXVG, "-table", "table", ffOPTRD },
2096 { efXVG, "-tablep", "tablep", ffOPTRD },
2097 { efXVG, "-tableb", "table", ffOPTRD },
2098 { efTRX, "-rerun", "rerun", ffOPTRD },
2099 { efXVG, "-tpi", "tpi", ffOPTWR },
2100 { efXVG, "-tpid", "tpidist", ffOPTWR },
2101 { efEDI, "-ei", "sam", ffOPTRD },
2102 { efEDO, "-eo", "sam", ffOPTWR },
2103 { efGCT, "-j", "wham", ffOPTRD },
2104 { efGCT, "-jo", "bam", ffOPTWR },
2105 { efXVG, "-ffout", "gct", ffOPTWR },
2106 { efXVG, "-devout", "deviatie", ffOPTWR },
2107 { efXVG, "-runav", "runaver", ffOPTWR },
2108 { efXVG, "-px", "pullx", ffOPTWR },
2109 { efXVG, "-pf", "pullf", ffOPTWR },
2110 { efMTX, "-mtx", "nm", ffOPTWR },
2111 { efNDX, "-dn", "dipole", ffOPTWR },
2112 /* Output files that are deleted after each benchmark run */
2113 { efTRN, "-bo", "bench", ffWRITE },
2114 { efXTC, "-bx", "bench", ffWRITE },
2115 { efCPT, "-bcpo", "bench", ffWRITE },
2116 { efSTO, "-bc", "bench", ffWRITE },
2117 { efEDR, "-be", "bench", ffWRITE },
2118 { efLOG, "-bg", "bench", ffWRITE },
2119 { efEDO, "-beo", "bench", ffOPTWR },
2120 { efXVG, "-bdhdl", "benchdhdl",ffOPTWR },
2121 { efXVG, "-bfield", "benchfld" ,ffOPTWR },
2122 { efXVG, "-btpi", "benchtpi", ffOPTWR },
2123 { efXVG, "-btpid", "benchtpid",ffOPTWR },
2124 { efGCT, "-bjo", "bench", ffOPTWR },
2125 { efXVG, "-bffout", "benchgct", ffOPTWR },
2126 { efXVG, "-bdevout","benchdev", ffOPTWR },
2127 { efXVG, "-brunav", "benchrnav",ffOPTWR },
2128 { efXVG, "-bpx", "benchpx", ffOPTWR },
2129 { efXVG, "-bpf", "benchpf", ffOPTWR },
2130 { efMTX, "-bmtx", "benchn", ffOPTWR },
2131 { efNDX, "-bdn", "bench", ffOPTWR }
2134 /* Command line options of mdrun */
2135 gmx_bool bDDBondCheck = TRUE;
2136 gmx_bool bDDBondComm = TRUE;
2137 gmx_bool bVerbose = FALSE;
2138 gmx_bool bCompact = TRUE;
2139 gmx_bool bSepPot = FALSE;
2140 gmx_bool bRerunVSite = FALSE;
2141 gmx_bool bIonize = FALSE;
2142 gmx_bool bConfout = TRUE;
2143 gmx_bool bReproducible = FALSE;
2144 gmx_bool bThreads = FALSE;
2146 int nmultisim=0;
2147 int nstglobalcomm=-1;
2148 int repl_ex_nst=0;
2149 int repl_ex_seed=-1;
2150 int nstepout=100;
2151 int nthreads=1;
2153 const char *ddno_opt[ddnoNR+1] =
2154 { NULL, "interleave", "pp_pme", "cartesian", NULL };
2155 const char *dddlb_opt[] =
2156 { NULL, "auto", "no", "yes", NULL };
2157 const char *procstring[] =
2158 { NULL, "-np", "-n", "none", NULL };
2159 const char *npmevalues_opt[] =
2160 { NULL, "auto", "all", "subset", NULL };
2161 real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
2162 char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
2163 char *deffnm=NULL;
2164 #define STD_CPT_PERIOD (15.0)
2165 real cpt_period=STD_CPT_PERIOD,max_hours=-1;
2166 gmx_bool bAppendFiles=TRUE;
2167 gmx_bool bKeepAndNumCPT=FALSE;
2168 gmx_bool bResetCountersHalfWay=FALSE;
2169 output_env_t oenv=NULL;
2171 t_pargs pa[] = {
2172 /***********************/
2173 /* g_tune_pme options: */
2174 /***********************/
2175 { "-np", FALSE, etINT, {&nnodes},
2176 "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
2177 { "-npstring", FALSE, etENUM, {procstring},
2178 "Specify the number of processors to [TT]$MPIRUN[tt] using this string"},
2179 { "-passall", FALSE, etBOOL, {&bPassAll},
2180 "HIDDENPut arguments unknown to [TT]mdrun[tt] at the end of the command line. Can be used for debugging purposes. "},
2181 { "-nt", FALSE, etINT, {&nthreads},
2182 "Number of threads to run the tests on (turns MPI & mpirun off)"},
2183 { "-r", FALSE, etINT, {&repeats},
2184 "Repeat each test this often" },
2185 { "-max", FALSE, etREAL, {&maxPMEfraction},
2186 "Max fraction of PME nodes to test with" },
2187 { "-min", FALSE, etREAL, {&minPMEfraction},
2188 "Min fraction of PME nodes to test with" },
2189 { "-npme", FALSE, etENUM, {npmevalues_opt},
2190 "Benchmark all possible values for [TT]-npme[tt] or just the subset that is expected to perform well"},
2191 { "-fix", FALSE, etINT, {&npme_fixed},
2192 "If >= -1, do not vary the number of PME-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
2193 { "-upfac", FALSE, etREAL, {&upfac},
2194 "Upper limit for rcoulomb scaling factor (Note that rcoulomb upscaling results in fourier grid downscaling)" },
2195 { "-downfac", FALSE, etREAL, {&downfac},
2196 "Lower limit for rcoulomb scaling factor" },
2197 { "-ntpr", FALSE, etINT, {&ntprs},
2198 "Number of [TT].tpr[tt] files to benchmark. Create this many files with scaling factors ranging from 1.0 to fac. If < 1, automatically choose the number of [TT].tpr[tt] files to test" },
2199 { "-four", FALSE, etREAL, {&fs},
2200 "Use this fourierspacing value instead of the grid found in the [TT].tpr[tt] input file. (Spacing applies to a scaling factor of 1.0 if multiple [TT].tpr[tt] files are written)" },
2201 { "-steps", FALSE, etGMX_LARGE_INT, {&bench_nsteps},
2202 "Take timings for this many steps in the benchmark runs" },
2203 { "-resetstep",FALSE, etINT, {&presteps},
2204 "Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)" },
2205 { "-simsteps", FALSE, etGMX_LARGE_INT, {&new_sim_nsteps},
2206 "If non-negative, perform this many steps in the real run (overwrites nsteps from [TT].tpr[tt], add [TT].cpt[tt] steps)" },
2207 { "-launch", FALSE, etBOOL, {&bLaunch},
2208 "Lauch the real simulation after optimization" },
2209 /******************/
2210 /* mdrun options: */
2211 /******************/
2212 { "-deffnm", FALSE, etSTR, {&deffnm},
2213 "Set the default filename for all file options at launch time" },
2214 { "-ddorder", FALSE, etENUM, {ddno_opt},
2215 "DD node order" },
2216 { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
2217 "Check for all bonded interactions with DD" },
2218 { "-ddbondcomm",FALSE, etBOOL, {&bDDBondComm},
2219 "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
2220 { "-rdd", FALSE, etREAL, {&rdd},
2221 "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
2222 { "-rcon", FALSE, etREAL, {&rconstr},
2223 "Maximum distance for P-LINCS (nm), 0 is estimate" },
2224 { "-dlb", FALSE, etENUM, {dddlb_opt},
2225 "Dynamic load balancing (with DD)" },
2226 { "-dds", FALSE, etREAL, {&dlb_scale},
2227 "Minimum allowed dlb scaling of the DD cell size" },
2228 { "-ddcsx", FALSE, etSTR, {&ddcsx},
2229 "HIDDENThe DD cell sizes in x" },
2230 { "-ddcsy", FALSE, etSTR, {&ddcsy},
2231 "HIDDENThe DD cell sizes in y" },
2232 { "-ddcsz", FALSE, etSTR, {&ddcsz},
2233 "HIDDENThe DD cell sizes in z" },
2234 { "-gcom", FALSE, etINT, {&nstglobalcomm},
2235 "Global communication frequency" },
2236 { "-v", FALSE, etBOOL, {&bVerbose},
2237 "Be loud and noisy" },
2238 { "-compact", FALSE, etBOOL, {&bCompact},
2239 "Write a compact log file" },
2240 { "-seppot", FALSE, etBOOL, {&bSepPot},
2241 "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
2242 { "-pforce", FALSE, etREAL, {&pforce},
2243 "Print all forces larger than this (kJ/mol nm)" },
2244 { "-reprod", FALSE, etBOOL, {&bReproducible},
2245 "Try to avoid optimizations that affect binary reproducibility" },
2246 { "-cpt", FALSE, etREAL, {&cpt_period},
2247 "Checkpoint interval (minutes)" },
2248 { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
2249 "Keep and number checkpoint files" },
2250 { "-append", FALSE, etBOOL, {&bAppendFiles},
2251 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
2252 { "-maxh", FALSE, etREAL, {&max_hours},
2253 "Terminate after 0.99 times this time (hours)" },
2254 { "-multi", FALSE, etINT, {&nmultisim},
2255 "Do multiple simulations in parallel" },
2256 { "-replex", FALSE, etINT, {&repl_ex_nst},
2257 "Attempt replica exchange every # steps" },
2258 { "-reseed", FALSE, etINT, {&repl_ex_seed},
2259 "Seed for replica exchange, -1 is generate a seed" },
2260 { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
2261 "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
2262 { "-ionize", FALSE, etBOOL, {&bIonize},
2263 "Do a simulation including the effect of an X-ray bombardment on your system" },
2264 { "-confout", FALSE, etBOOL, {&bConfout},
2265 "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
2266 { "-stepout", FALSE, etINT, {&nstepout},
2267 "HIDDENFrequency of writing the remaining runtime" },
2268 { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
2269 "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
2273 #define NFILE asize(fnm)
2275 CopyRight(stderr,argv[0]);
2277 seconds = gettime();
2279 parse_common_args(&argc,argv,PCA_NOEXIT_ON_ARGS,
2280 NFILE,fnm,asize(pa),pa,asize(desc),desc,
2281 0,NULL,&oenv);
2283 /* Store the remaining unparsed command line entries in a string */
2284 snew(ExtraArgs, 1);
2285 ExtraArgs[0] = '\0';
2286 for (i=1; i<argc; i++) /* argc will now be 1 if everything was understood */
2288 add_to_string(&ExtraArgs, argv[i]);
2289 add_to_string(&ExtraArgs, " ");
2291 if ( !bPassAll && (ExtraArgs[0] != '\0') )
2293 fprintf(stderr, "\nWARNING: The following arguments you provided have no effect:\n"
2294 "%s\n"
2295 "Use the -passall option to force them to appear on the command lines\n"
2296 "for the benchmark simulations%s.\n\n",
2297 ExtraArgs, bLaunch? " and at launch time" : "");
2300 if (opt2parg_bSet("-nt",asize(pa),pa))
2302 bThreads=TRUE;
2303 if (opt2parg_bSet("-npstring",asize(pa),pa))
2304 fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2306 if (nnodes > 1)
2307 gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2308 /* and now we just set this; a bit of an ugly hack*/
2309 nnodes=nthreads;
2311 /* Automatically set -beo options if -eo is set etc. */
2312 couple_files_options(NFILE,fnm);
2314 /* Construct the command line arguments for benchmark runs
2315 * as well as for the simulation run
2317 create_command_line_snippets(bThreads,presteps,NFILE,fnm,asize(pa),pa,procstring[0],
2318 &cmd_np, &cmd_args_bench, &cmd_args_launch,
2319 bPassAll? ExtraArgs : (char *)"");
2321 /* Read in checkpoint file if requested */
2322 sim_part = 1;
2323 if(opt2bSet("-cpi",NFILE,fnm))
2325 snew(cr,1);
2326 cr->duty=DUTY_PP; /* makes the following routine happy */
2327 read_checkpoint_simulation_part(opt2fn("-cpi",NFILE,fnm),
2328 &sim_part,&cpt_steps,cr,
2329 FALSE,NFILE,fnm,NULL,NULL);
2330 sfree(cr);
2331 sim_part++;
2332 /* sim_part will now be 1 if no checkpoint file was found */
2333 if (sim_part<=1)
2334 gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi",
2335 NFILE,
2336 fnm));
2339 /* Open performance output file and write header info */
2340 fp = ffopen(opt2fn("-p",NFILE,fnm),"w");
2342 /* Make a quick consistency check of command line parameters */
2343 check_input(nnodes, repeats, &ntprs, &upfac, &downfac,
2344 maxPMEfraction, minPMEfraction, npme_fixed,
2345 fs, bench_nsteps, fnm, NFILE, sim_part, presteps,
2346 asize(pa),pa);
2348 /* Determine max and min number of PME nodes to test: */
2349 if ((nnodes > 2) && (npme_fixed >= -1))
2351 maxPMEnodes = floor(maxPMEfraction*nnodes);
2352 minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2353 fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2354 if (maxPMEnodes != minPMEnodes)
2355 fprintf(stdout, "- %d ", maxPMEnodes);
2356 fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2358 else
2360 maxPMEnodes = 0;
2361 minPMEnodes = 0;
2364 /* Get the commands we need to set up the runs from environment variables */
2365 get_program_paths(bThreads, &cmd_mpirun, cmd_np, &cmd_mdrun, repeats);
2367 /* Print some header info to file */
2368 sep_line(fp);
2369 fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n");
2370 sep_line(fp);
2371 fprintf(fp, "%s for Gromacs %s\n", ShortProgram(),GromacsVersion());
2372 if (!bThreads)
2374 fprintf(fp, "Number of nodes : %d\n", nnodes);
2375 fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun);
2376 if ( strcmp(procstring[0], "none") != 0)
2377 fprintf(fp, "Passing # of nodes via : %s\n", procstring[0]);
2378 else
2379 fprintf(fp, "Not setting number of nodes in system call\n");
2381 else
2382 fprintf(fp, "Number of threads : %d\n", nnodes);
2384 fprintf(fp, "The mdrun command is : %s\n", cmd_mdrun);
2385 fprintf(fp, "mdrun args benchmarks : %s\n", cmd_args_bench);
2386 fprintf(fp, "Benchmark steps : ");
2387 fprintf(fp, gmx_large_int_pfmt, bench_nsteps);
2388 fprintf(fp, "\n");
2389 fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2390 if (sim_part > 1)
2392 fprintf(fp, "Checkpoint time step : ");
2393 fprintf(fp, gmx_large_int_pfmt, cpt_steps);
2394 fprintf(fp, "\n");
2396 if (bLaunch)
2397 fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2398 if (!bPassAll && ExtraArgs[0] != '\0')
2399 fprintf(fp, "Unused arguments : %s\n", ExtraArgs);
2400 if (new_sim_nsteps >= 0)
2402 bOverwrite = TRUE;
2403 fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so",NFILE,fnm));
2404 fprintf(stderr, gmx_large_int_pfmt, new_sim_nsteps+cpt_steps);
2405 fprintf(stderr, " steps.\n");
2406 fprintf(fp, "Simulation steps : ");
2407 fprintf(fp, gmx_large_int_pfmt, new_sim_nsteps);
2408 fprintf(fp, "\n");
2410 if (repeats > 1)
2411 fprintf(fp, "Repeats for each test : %d\n", repeats);
2413 if (fs > 0.0)
2415 fprintf(fp, "Requested grid spacing : %f (This will be the grid spacing at a scaling factor of 1.0)\n", fs);
2418 if (npme_fixed >= -1)
2420 fprintf(fp, "Fixing -npme at : %d\n", npme_fixed);
2423 fprintf(fp, "Input file : %s\n", opt2fn("-s",NFILE,fnm));
2425 /* Allocate memory for the inputinfo struct: */
2426 snew(info, 1);
2427 info->nr_inputfiles = ntprs;
2428 for (i=0; i<ntprs; i++)
2430 snew(info->rcoulomb , ntprs);
2431 snew(info->rvdw , ntprs);
2432 snew(info->rlist , ntprs);
2433 snew(info->rlistlong, ntprs);
2434 snew(info->nkx , ntprs);
2435 snew(info->nky , ntprs);
2436 snew(info->nkz , ntprs);
2437 snew(info->fsx , ntprs);
2438 snew(info->fsy , ntprs);
2439 snew(info->fsz , ntprs);
2441 /* Make alternative tpr files to test: */
2442 snew(tpr_names, ntprs);
2443 for (i=0; i<ntprs; i++)
2444 snew(tpr_names[i], STRLEN);
2446 /* It can be that ntprs is reduced by make_benchmark_tprs if not enough
2447 * different grids could be found. */
2448 make_benchmark_tprs(opt2fn("-s",NFILE,fnm), tpr_names, bench_nsteps+presteps,
2449 cpt_steps, upfac, downfac, &ntprs, fs, info, fp);
2452 /********************************************************************************/
2453 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
2454 /********************************************************************************/
2455 snew(perfdata, ntprs);
2456 do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries,
2457 repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2458 cmd_args_bench, fnm, NFILE, sim_part, presteps, cpt_steps);
2460 fprintf(fp, "\nTuning took%8.1f minutes.\n", (gettime()-seconds)/60.0);
2462 /* Analyse the results and give a suggestion for optimal settings: */
2463 bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2464 repeats, info, &best_tpr, &best_npme);
2466 /* Take the best-performing tpr file and enlarge nsteps to original value */
2467 if ( bKeepTPR && !bOverwrite && !(fs > 0.0) )
2469 simulation_tpr = opt2fn("-s",NFILE,fnm);
2471 else
2473 simulation_tpr = opt2fn("-so",NFILE,fnm);
2474 modify_PMEsettings(bOverwrite? (new_sim_nsteps+cpt_steps) :
2475 info->orig_sim_steps, tpr_names[best_tpr],
2476 simulation_tpr);
2479 /* Now start the real simulation if the user requested it ... */
2480 launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2481 cmd_args_launch, simulation_tpr, nnodes, best_npme);
2482 ffclose(fp);
2484 /* ... or simply print the performance results to screen: */
2485 if (!bLaunch)
2486 finalize(opt2fn("-p", NFILE, fnm));
2488 return 0;