Check md.log to ensure GMX_RESET_COUNTERS was passed to mdrun
[gromacs/adressmacs.git] / src / tools / gmx_tune_pme.c
blob0da272811ba165a8463cbe779ee8429be04f3a59
1 /*
2 *
3 * This source code is part of
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
10 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
11 * Copyright (c) 2001-2008, The GROMACS development team,
12 * check out http://www.gromacs.org for more information.
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * If you want to redistribute modifications, please consider that
20 * scientific software is very special. Version control is crucial -
21 * bugs must be traceable. We will be happy to consider code for
22 * inclusion in the official distribution, but derived work must not
23 * be called official GROMACS. Details are found in the README & COPYING
24 * files - if they are missing, get the official version at www.gromacs.org.
26 * To help us fund GROMACS development, we humbly ask that you cite
27 * the papers on the package - you can find them in the top README file.
29 * For more info, check our website at http://www.gromacs.org
31 * And Hey:
32 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
34 #include "statutil.h"
35 #include "typedefs.h"
36 #include "smalloc.h"
37 #include "vec.h"
38 #include "copyrite.h"
39 #include "statutil.h"
40 #include "tpxio.h"
41 #include "string2.h"
42 #include "readinp.h"
43 #include "calcgrid.h"
44 #include "checkpoint.h"
45 #include "gmx_ana.h"
49 enum {
50 ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
53 /* Enum for situations that can occur during log file parsing */
54 enum {
55 eParselogOK,
56 eParselogNotFound,
57 eParselogNoPerfData,
58 eParselogTerm,
59 eParselogResetProblem,
60 eParselogNr
64 typedef struct
66 int nPMEnodes; /* number of PME only nodes used in this test */
67 int nx, ny, nz; /* DD grid */
68 int guessPME; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
69 real *Gcycles; /* This can contain more than one value if doing multiple tests */
70 real Gcycles_Av;
71 real *ns_per_day;
72 real ns_per_day_Av;
73 real *PME_f_load; /* PME mesh/force load average*/
74 real PME_f_load_Av; /* Average average ;) ... */
75 char *mdrun_cmd_line; /* Mdrun command line used for this test */
76 } t_perf;
79 typedef struct
81 int nr_inputfiles; /* The number of tpr and mdp input files */
82 gmx_step_t orig_sim_steps; /* Number of steps to be done in the real simulation */
83 real *r_coulomb; /* The coulomb radii [0...nr_inputfiles] */
84 real *r_vdW; /* The vdW radii */
85 int *fourier_nx, *fourier_ny, *fourier_nz;
86 real *fourier_sp; /* Fourierspacing */
87 } t_inputinfo;
90 static void sep_line(FILE *fp)
92 fprintf(fp, "\n------------------------------------------------------------\n");
96 /* Wrapper for system calls */
97 static int gmx_system_call(char *command)
99 #ifdef GMX_NO_SYSTEM
100 gmx_fatal(FARGS,"No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n",command);
101 #else
102 return ( system(command) );
103 #endif
107 /* Check if string starts with substring */
108 static bool str_starts(const char *string, const char *substring)
110 return ( strncmp(string, substring, strlen(substring)) == 0);
114 static void cleandata(t_perf *perfdata, int test_nr)
116 perfdata->Gcycles[test_nr] = 0.0;
117 perfdata->ns_per_day[test_nr] = 0.0;
118 perfdata->PME_f_load[test_nr] = 0.0;
120 return;
124 enum {eFoundNothing, eFoundDDStr, eFoundPMEfStr, eFoundCycleStr};
126 static int parse_logfile(char *logfile, t_perf *perfdata, int test_nr, int presteps, gmx_step_t cpt_steps)
128 FILE *fp;
129 char line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
130 const char matchstrdd[]="Domain decomposition grid";
131 const char matchstrcr[]="resetting all time and cycle counters";
132 const char matchstrbal[]="Average PME mesh/force load:";
133 const char matchstring[]="R E A L C Y C L E A N D T I M E A C C O U N T I N G";
134 const char errTERM[]="Received the TERM signal, stopping at the next step";
135 const char errUSR1[]="Received the USR1 signal, stopping at the next NS step";
136 int iFound;
137 int procs;
138 real dum1,dum2,dum3;
139 int npme;
140 gmx_step_t resetsteps=-1;
141 bool bFoundResetStr = FALSE;
142 bool bResetChecked = FALSE;
145 if (!gmx_fexist(logfile))
147 fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
148 cleandata(perfdata, test_nr);
149 return eParselogNotFound;
152 fp = fopen(logfile, "r");
153 perfdata->PME_f_load[test_nr] = -1.0;
154 perfdata->guessPME = -1;
155 iFound = eFoundNothing;
157 while (fgets(line, STRLEN, fp) != NULL)
159 /* Remove leading spaces */
160 ltrim(line);
162 /* Check for TERM and USR1 signals from user: */
163 if ( str_starts(line, errTERM) || str_starts(line, errUSR1) )
165 fclose(fp);
166 cleandata(perfdata, test_nr);
167 return eParselogTerm;
170 /* Check whether cycle resetting worked */
171 if (presteps > 0 && !bFoundResetStr)
173 if (strstr(line, matchstrcr) != NULL)
175 sprintf(dumstring, "Step %s", gmx_step_pfmt);
176 sscanf(line, dumstring, &resetsteps);
177 bFoundResetStr = TRUE;
178 if (resetsteps == presteps+cpt_steps)
180 bResetChecked = TRUE;
182 else
184 sprintf(dumstring , gmx_step_pfmt, resetsteps);
185 sprintf(dumstring2, gmx_step_pfmt, presteps+cpt_steps);
186 fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
187 " though they were supposed to be reset at step %s!\n",
188 dumstring, dumstring2);
193 /* Look for strings that appear in a certain order in the log file: */
194 switch(iFound)
196 case eFoundNothing:
197 /* Look for domain decomp grid and separate PME nodes: */
198 if (str_starts(line, matchstrdd))
200 sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
201 &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
202 if (perfdata->nPMEnodes == -1)
203 perfdata->guessPME = npme;
204 else if (perfdata->nPMEnodes != npme)
205 gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
206 iFound = eFoundDDStr;
208 break;
209 case eFoundDDStr:
210 /* Look for PME mesh/force balance (not necessarily present, though) */
211 if (str_starts(line, matchstrbal))
212 sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
213 /* Look for matchstring */
214 if (str_starts(line, matchstring))
215 iFound = eFoundPMEfStr;
216 break;
217 case eFoundPMEfStr:
218 /* Already found matchstring - look for cycle data */
219 if (str_starts(line, "Total "))
221 sscanf(line,"Total %d %f",&procs,&(perfdata->Gcycles[test_nr]));
222 iFound = eFoundCycleStr;
224 break;
225 case eFoundCycleStr:
226 /* Already found cycle data - look for remaining performance info and return */
227 if (str_starts(line, "Performance:"))
229 sscanf(line,"%s %f %f %f %f", dumstring, &dum1, &dum2, &(perfdata->ns_per_day[test_nr]), &dum3);
230 fclose(fp);
231 if (bResetChecked || presteps == 0)
232 return eParselogOK;
233 else
234 return eParselogResetProblem;
236 break;
238 } /* while */
239 fprintf(stdout, "No performance data in log file.\n");
240 fclose(fp);
241 cleandata(perfdata, test_nr);
243 return eParselogNoPerfData;
247 static int analyze_data(
248 FILE *fp,
249 t_perf **perfdata,
250 int ntprs,
251 int ntests,
252 int nrepeats,
253 t_inputinfo *info,
254 int *index_tpr, /* OUT: Nr of mdp file with best settings */
255 int *npme_optimal) /* OUT: Optimal number of PME nodes */
257 int i,j,k;
258 int line=0, line_win=-1;
259 int k_win=-1, i_win=-1, winPME;
260 real s=0.0; /* standard deviation */
261 t_perf *pd;
262 char strbuf[STRLEN];
263 char str_PME_f_load[13];
266 if (nrepeats > 1)
268 sep_line(fp);
269 fprintf(fp, "Summary of successful runs:\n");
270 fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f DD grid\n");
274 for (k=0; k<ntprs; k++)
276 for (i=0; i<ntests; i++)
278 /* Select the right dataset: */
279 pd = &(perfdata[k][i]);
281 pd->Gcycles_Av = 0.0;
282 pd->PME_f_load_Av = 0.0;
283 pd->ns_per_day_Av = 0.0;
285 if (pd->nPMEnodes == -1)
286 sprintf(strbuf, "(%3d)", pd->guessPME);
287 else
288 sprintf(strbuf, " ");
290 /* Get the average run time of a setting */
291 for (j=0; j<nrepeats; j++)
293 pd->Gcycles_Av += pd->Gcycles[j];
294 pd->PME_f_load_Av += pd->PME_f_load[j];
296 pd->Gcycles_Av /= nrepeats;
297 pd->PME_f_load_Av /= nrepeats;
299 for (j=0; j<nrepeats; j++)
301 if (pd->ns_per_day[j] > 0.0)
302 pd->ns_per_day_Av += pd->ns_per_day[j];
303 else
305 /* Somehow the performance number was not aquired for this run,
306 * therefor set the average to some negative value: */
307 pd->ns_per_day_Av = -1.0f*nrepeats;
308 break;
311 pd->ns_per_day_Av /= nrepeats;
313 /* Nicer output: */
314 if (pd->PME_f_load_Av > 0.0)
315 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
316 else
317 sprintf(str_PME_f_load, "%s", " - ");
320 /* We assume we had a successful run if both averages are positive */
321 if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
323 /* Output statistics if repeats were done */
324 if (nrepeats > 1)
326 /* Calculate the standard deviation */
327 s = 0.0;
328 for (j=0; j<nrepeats; j++)
329 s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
330 s /= (nrepeats - 1);
331 s = sqrt(s);
333 fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s %3d %3d %3d\n",
334 line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
335 pd->ns_per_day_Av, str_PME_f_load, pd->nx, pd->ny, pd->nz);
337 /* Store the index of the best run found so far in 'winner': */
338 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
340 k_win = k;
341 i_win = i;
342 line_win = line;
344 line++;
349 if (k_win == -1)
350 gmx_fatal(FARGS, "None of the runs was successful! Exiting.");
352 sep_line(fp);
354 winPME = perfdata[k_win][i_win].nPMEnodes;
355 if (winPME == -1)
356 sprintf(strbuf, "%s", "the automatic number of");
357 else
358 sprintf(strbuf, "%d", winPME);
359 fprintf(fp, "Best performance was achieved with %s PME nodes", strbuf);
360 if (nrepeats > 1)
361 fprintf(fp, " (see line %d) ", line_win);
362 if (ntprs > 1)
363 fprintf(fp, "\nand %s PME settings. ", (k_win ? "optimized" : "original"));
364 fprintf(fp, "\n");
366 /* Only mention settings if rcoulomb, rvdv, nkx, nky, or nkz was modified: */
367 if (k_win)
369 fprintf(fp, "Optimized PME settings:\n");
370 fprintf(fp, "r_coulomb = %f, r_vdW = %f, nx,ny,nz = %d %d %d\n",
371 info->r_coulomb[k_win], info->r_vdW[k_win],
372 info->fourier_nx[k_win], info->fourier_ny[k_win], info->fourier_nz[k_win]);
374 fflush(fp);
376 /* Return the index of the mdp file that showed the highest performance
377 * and the optimal number of PME nodes */
378 *index_tpr = k_win;
379 *npme_optimal = winPME;
381 return 0;
385 static void counters_restore_env(int resetcount_orig, bool bHaveResetCounter)
387 char *env_ptr;
390 if (TRUE == bHaveResetCounter)
392 /* Restore the old value */
393 snew(env_ptr, 20);
394 sprintf(env_ptr, "%d", resetcount_orig);
395 setenv("GMX_RESET_COUNTERS",env_ptr,TRUE);
396 fprintf(stdout, "\nSetting GMX_RESET_COUNTERS back to %s.\n", env_ptr);
398 else
400 /* Remove the environment variable again */
401 unsetenv("GMX_RESET_COUNTERS");
402 fprintf(stdout, "\nRemoving GMX_RESET_COUNTERS from environment again.\n");
407 static void counters_set_env(int presteps, int *resetcount_orig, bool *bHaveResetCounter)
409 char *env_ptr;
410 char *cp;
413 /* If the GMX_RESET_COUNTERS environment is present we save it
414 * so that we can set it to the original value later again */
415 if ( (cp = getenv("GMX_RESET_COUNTERS")) != NULL)
417 sscanf(cp,"%d",resetcount_orig);
418 *bHaveResetCounter = TRUE;
420 else
421 *bHaveResetCounter = FALSE;
423 /* Set GMX_RESET_COUNTERS to the value requested in g_tune_pme */
424 snew(env_ptr, 20);
425 sprintf(env_ptr, "%d", presteps);
426 fprintf(stdout, "Setting environment variable GMX_RESET_COUNTERS to %s.\n", env_ptr);
427 setenv("GMX_RESET_COUNTERS",env_ptr,TRUE);
431 /* Get the commands we need to set up the runs from environment variables */
432 static void get_program_paths(char *cmd_mpirun[], char *cmd_mdrun[], char *cmd_export[], int repeats)
434 char *command=NULL;
435 char *cp;
436 char *cp2;
437 char line[STRLEN];
438 FILE *fp;
439 const char def_mpirun[] = "mpirun";
440 const char def_mdrun[] = "mdrun";
441 const char def_export[] = "-x GMX_RESET_COUNTERS ";
442 const char filename[] = "testrun.log";
443 const char match_mdrun[]= "NNODES=";
444 bool bFound = FALSE;
445 int i;
448 /* Get the commands we need to set up the runs from environment variables */
449 if ( (cp = getenv("MPIRUN")) != NULL)
450 *cmd_mpirun = strdup(cp);
451 else
452 *cmd_mpirun = strdup(def_mpirun);
454 if ( (cp = getenv("MDRUN" )) != NULL )
455 *cmd_mdrun = strdup(cp);
456 else
457 *cmd_mdrun = strdup(def_mdrun);
459 *cmd_export = strdup(def_export);
461 /* If no simulations have to be performed, we are done here */
462 if (repeats <= 0)
463 return;
465 /* Run a small test to see if mpirun and mdrun work if we intend to execute mdrun! */
466 fprintf(stdout, "Making shure that mdrun can be executed. ");
467 for (i=0; i<2; i++)
469 snew(command, strlen(*cmd_mpirun) +strlen(*cmd_export) + strlen(*cmd_mdrun) + strlen(filename) + 30);
470 sprintf(command, "%s %s-np 1 %s -h -quiet >& %s", *cmd_mpirun, *cmd_export, *cmd_mdrun, filename);
471 fprintf(stdout, "Trying '%s' ... ", command);
473 gmx_system_call(command);
475 /* Check if we find the gromacs header in the log file: */
476 fp = fopen(filename, "r");
477 while ( (!feof(fp)) && (bFound==FALSE) )
479 cp2=fgets(line, STRLEN, fp);
480 if (cp2!=NULL && str_starts(line, match_mdrun))
481 bFound = TRUE;
483 /* 2nd try ... */
484 if (!bFound)
486 fprintf(stdout, "No success.\n");
487 *cmd_export = strdup("");
489 else
490 break;
492 if (!bFound)
493 gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!", filename);
495 fclose(fp);
496 fprintf(stdout, "passed.\n");
498 /* Clean up ... */
499 remove(filename);
503 static void launch_simulation(
504 bool bLaunch, /* Should the simulation be launched? */
505 FILE *fp, /* General log file */
506 char *cmd_mpirun, /* Command for mpirun */
507 char *cmd_mdrun, /* Command for mdrun */
508 char *args_for_mdrun, /* Arguments for mdrun */
509 char *simulation_tpr, /* This tpr will be simulated */
510 int nnodes, /* Number of nodes to run on */
511 int nPMEnodes) /* Number of PME nodes to use */
513 char *command;
516 /* Make enough space for the system call command,
517 * (100 extra chars for -np ... etc. options should suffice): */
518 snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
520 sprintf(command, "%s -np %d %s %s-npme %d -s %s",
521 cmd_mpirun, nnodes, cmd_mdrun, args_for_mdrun, nPMEnodes, simulation_tpr);
523 fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch? "Using":"Please use", command);
524 sep_line(fp);
525 fflush(fp);
527 /* Now the real thing! */
528 if (bLaunch)
530 fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
531 sep_line(stdout);
532 fflush(stdout);
533 gmx_system_call(command);
534 thanx(fp);
539 static void modify_PMEsettings(
540 gmx_step_t simsteps, /* Set this value as number of time steps */
541 char *fn_best_tpr, /* tpr file with the best performance */
542 char *fn_sim_tpr) /* name of tpr file to be launched */
544 t_inputrec *ir;
545 t_state state;
546 gmx_mtop_t mtop;
547 char buf[200];
549 snew(ir,1);
550 read_tpx_state(fn_best_tpr,ir,&state,NULL,&mtop);
552 /* Set nsteps to the right value */
553 ir->nsteps = simsteps;
555 /* Write the tpr file which will be launched */
556 sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, gmx_step_pfmt);
557 fprintf(stdout,buf,ir->nsteps);
558 fflush(stdout);
559 write_tpx_state(fn_sim_tpr,ir,&state,&mtop);
561 sfree(ir);
565 /* Make additional TPR files with more computational load for the
566 * direct space processors: */
567 static void make_benchmark_tprs(
568 char *fn_sim_tpr, /* READ : User-provided tpr file */
569 char *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files */
570 gmx_step_t benchsteps, /* Number of time steps for benchmark runs */
571 gmx_step_t statesteps, /* Step counter in checkpoint file */
572 real maxfac, /* Max scaling factor for rcoulomb and fourierspacing */
573 int ntprs, /* No. of TPRs to write, each with a different rcoulomb and fourierspacing */
574 real fourierspacing, /* Basic fourierspacing from tpr input file */
575 t_inputinfo *info, /* Contains information about mdp file options */
576 FILE *fp) /* Write the output here */
578 int i,j,d;
579 t_inputrec *ir;
580 t_state state;
581 gmx_mtop_t mtop;
582 real fac;
583 real orig_rcoulomb, orig_rvdw, orig_rlist;
584 rvec orig_fs; /* original fourierspacing per dimension */
585 ivec orig_nk; /* original number of grid points per dimension */
586 char buf[200];
587 real max_spacing;
588 rvec box_size;
591 sprintf(buf, "Making benchmark tpr files with %s time steps", gmx_step_pfmt);
592 fprintf(stdout, buf, benchsteps);
593 if (statesteps > 0)
595 sprintf(buf, " (adding %s steps from checkpoint file)", gmx_step_pfmt);
596 fprintf(stdout, buf, statesteps);
597 benchsteps += statesteps;
599 fprintf(stdout, ".\n");
602 snew(ir,1);
603 read_tpx_state(fn_sim_tpr,ir,&state,NULL,&mtop);
605 /* Check if PME was chosen */
606 if (EEL_PME(ir->coulombtype) == FALSE)
607 gmx_fatal(FARGS, "Can only do optimizations for simulations with PME");
609 /* Check if rcoulomb == rlist, which is necessary for PME */
610 if (!(ir->rcoulomb == ir->rlist))
611 gmx_fatal(FARGS, "PME requires rcoulomb (%f) to be equal to rlist (%f).", ir->rcoulomb, ir->rlist);
613 /* Reduce the number of steps for the benchmarks */
614 info->orig_sim_steps = ir->nsteps;
615 ir->nsteps = benchsteps;
617 /* Determine lenght of triclinic box vectors */
618 for(d=0; d<DIM; d++)
620 box_size[d] = 0;
621 for(i=0;i<DIM;i++)
622 box_size[d] += state.box[d][i]*state.box[d][i];
623 box_size[d] = sqrt(box_size[d]);
626 /* Remember the original values: */
627 orig_rvdw = ir->rvdw;
628 orig_rcoulomb = ir->rcoulomb;
629 orig_rlist = ir->rlist;
630 orig_nk[XX] = ir->nkx;
631 orig_nk[YY] = ir->nky;
632 orig_nk[ZZ] = ir->nkz;
633 orig_fs[XX] = box_size[XX]/ir->nkx; /* fourierspacing in x direction */
634 orig_fs[YY] = box_size[YY]/ir->nky;
635 orig_fs[ZZ] = box_size[ZZ]/ir->nkz;
637 fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
638 fprintf(fp, " No. scaling r_coul (r_vdW) nkx nky nkz (spacing) tpr file\n");
640 if (ntprs > 1)
642 fprintf(stdout, "Calculating PME grid points on the basis of ");
643 if (fourierspacing > 0)
644 fprintf(stdout, "a fourierspacing of %f nm\n", fourierspacing);
645 else
646 fprintf(stdout, "original nkx/nky/nkz settings from tpr file\n");
649 /* Loop to create the requested number of tpr input files */
650 for (j = 0; j < ntprs; j++)
652 /* Rcoulomb scaling factor for this file: */
653 if (ntprs == 1)
654 fac = 1.0;
655 else
656 fac = (maxfac-1.0f)/(ntprs-1) * j +1;
657 fprintf(stdout, "--- Scaling factor %f ---\n", fac);
659 ir->rcoulomb = orig_rcoulomb*fac;
660 ir->rlist = orig_rlist *fac;
661 ir->rvdw = orig_rvdw *fac;
663 /* Try to reduce the number of reciprocal grid points in a smart way */
664 /* Did the user supply a value for fourierspacing on the command line? */
665 if (fourierspacing > 0)
667 info->fourier_sp[j] = fourierspacing*fac;
668 /* Calculate the optimal grid dimensions */
669 ir->nkx = 0;
670 ir->nky = 0;
671 ir->nkz = 0;
672 max_spacing = calc_grid(stdout,state.box,info->fourier_sp[j],&(ir->nkx),&(ir->nky),&(ir->nkz),1);
673 /* Check consistency */
674 if (0 == j)
675 if ((ir->nkx != orig_nk[XX]) || (ir->nky != orig_nk[YY]) || (ir->nkz != orig_nk[ZZ]))
676 gmx_fatal(FARGS, "Wrong fourierspacing %f, actual grid = %dx%dx%d, original grid = %dx%dx%d",
677 fourierspacing,ir->nkx,ir->nky,ir->nkz,orig_nk[XX],orig_nk[YY],orig_nk[ZZ]);
679 else
681 if (0 == j)
683 /* Print out fourierspacing from input tpr */
684 fprintf(stdout, "Input file fourier grid is %dx%dx%d\n", orig_nk[XX], orig_nk[YY], orig_nk[ZZ]);
686 else
688 /* Reconstruct fourierspacing for each dimension from the input file */
689 ir->nkx=0;
690 max_spacing = calc_grid(stdout,state.box,orig_fs[XX]*fac,&(ir->nkx),&(ir->nky),&(ir->nkz),1);
691 ir->nky=0;
692 max_spacing = calc_grid(stdout,state.box,orig_fs[XX]*fac,&(ir->nkx),&(ir->nky),&(ir->nkz),1);
693 ir->nkz=0;
694 max_spacing = calc_grid(stdout,state.box,orig_fs[XX]*fac,&(ir->nkx),&(ir->nky),&(ir->nkz),1);
697 /* r_vdw should only grow if necessary! */
698 if (j > 0)
700 ir->rvdw = min(ir->rvdw, orig_rcoulomb*fac);
701 ir->rvdw = max(ir->rvdw, orig_rvdw);
703 /* Save modified radii and fourier grid components for later output: */
704 info->r_coulomb[j] = ir->rcoulomb;
705 info->r_vdW[j] = ir->rvdw;
706 info->fourier_nx[j]= ir->nkx;
707 info->fourier_ny[j]= ir->nky;
708 info->fourier_nz[j]= ir->nkz;
710 /* Write the benchmark tpr file */
711 strncpy(fn_bench_tprs[j],fn_sim_tpr,strlen(fn_sim_tpr)-strlen(".tpr"));
712 sprintf(buf, "_bench%.2d.tpr", j);
713 strcat(fn_bench_tprs[j], buf);
714 fprintf(stdout,"Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
715 fprintf(stdout, gmx_step_pfmt, ir->nsteps);
716 fprintf(stdout,", scaling factor %f\n", fac);
717 write_tpx_state(fn_bench_tprs[j],ir,&state,&mtop);
719 /* Write some info to log file */
720 fprintf(fp, "%3d %9f %9f (%7f) %4d %4d %4d %9f %-14s\n",
721 j, fac, ir->rcoulomb, ir->rvdw, ir->nkx, ir->nky, ir->nkz, info->fourier_sp[j],fn_bench_tprs[j]);
723 fflush(stdout);
724 fflush(fp);
726 sfree(ir);
730 /* Rename the files we want to keep to some meaningful filename and
731 * delete the rest */
732 static void cleanup(t_filenm *fnm, int nfile, int k, int nnodes, int nPMEnodes, int nr)
734 char numstring[STRLEN];
735 char newfilename[STRLEN];
736 char *fn=NULL;
737 int i;
738 const char *opt;
741 fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
743 for (i=0; i<nfile; i++)
745 opt = (char *)fnm[i].opt;
746 if ( strcmp(opt, "-p") == 0 )
748 /* do nothing; keep this file */
751 else if (strcmp(opt, "-bg") == 0)
753 /* Give the log file a nice name so one can later see which parameters were used */
754 numstring[0] = '\0';
755 if (nr > 0)
756 sprintf(numstring, "_%d", nr);
757 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg",nfile,fnm), k, nnodes, nPMEnodes, numstring);
758 if (gmx_fexist(opt2fn("-bg",nfile,fnm)))
760 fprintf(stdout, "renaming log file to %s\n", newfilename);
761 make_backup(newfilename);
762 rename(opt2fn("-bg",nfile,fnm), newfilename);
765 /* Delete the files which are created for each benchmark run: (options -b*) */
766 else if ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt,nfile,fnm) || !is_optional(&fnm[i])) )
768 fn = opt2fn(opt, nfile, fnm);
769 if (gmx_fexist(fn))
771 fprintf(stdout, "Deleting %s\n", fn);
772 remove(fn);
779 static void do_the_tests(FILE *fp, char **tpr_names, int maxPMEnodes, int minPMEnodes,
780 int datasets, t_perf **perfdata, int repeats, int nnodes, int nr_tprs,
781 char *cmd_mpirun, char *cmd_export, char *cmd_mdrun, char *args_for_mdrun,
782 t_filenm *fnm, int nfile, int sim_part, int presteps, gmx_step_t cpt_steps)
784 int i,nr,k,ret;
785 int nPMEnodes;
786 t_perf *pd=NULL;
787 int cmdline_length;
788 char *command;
789 char buf[STRLEN];
790 char *opt_noaddpart;
791 bool bResetProblem=FALSE;
794 /* This string array corresponds to the eParselog enum type from above */
795 const char* ParseLog[] = {"OK",
796 "Logfile not found",
797 "No timings in log file",
798 "Run was terminated",
799 "Counters were not reset properly"};
800 char str_PME_f_load[13];
802 /* The -noaddpart option is needed so that the md.log files do not
803 * get renamed if checkpoints are used!
805 if (sim_part > 1)
806 opt_noaddpart=" -noaddpart";
807 else
808 opt_noaddpart="";
810 /* Allocate space for the mdrun command line. 100 extra characters should be more than enough
811 * for the -npme etcetera arguments */
812 cmdline_length = strlen(cmd_mpirun)
813 + strlen(cmd_export)
814 + strlen(cmd_mdrun)
815 + strlen(args_for_mdrun)
816 + strlen(tpr_names[0]) + 100;
817 snew(command, cmdline_length);
819 /* Loop over all tpr files to test: */
820 for (k=0; k<nr_tprs;k++)
822 fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
823 fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n");
824 i=0;
825 /* Start with the maximum number of PME only nodes: */
826 nPMEnodes = maxPMEnodes;
828 /* Loop over various numbers of PME nodes: */
829 for (i = 0; i<datasets; i++)
831 pd = &perfdata[k][i];
833 /* Loop over the repeats for each scenario: */
834 for (nr = 0; nr < repeats; nr++)
836 pd->nPMEnodes = nPMEnodes;
838 /* Construct the command line to call mdrun (and save it): */
839 snew(pd->mdrun_cmd_line, cmdline_length);
840 sprintf(pd->mdrun_cmd_line, "%s %s-np %d %s %s-npme %d -s %s%s",
841 cmd_mpirun, cmd_export, nnodes, cmd_mdrun, args_for_mdrun, nPMEnodes, tpr_names[k], opt_noaddpart);
843 /* Do a benchmark simulation: */
844 if (repeats > 1)
845 sprintf(buf, ", pass %d/%d", nr+1, repeats);
846 else
847 buf[0]='\0';
848 fprintf(stdout, "\n=== tpr %d/%d, run %d/%d%s:\n", k+1, nr_tprs, i+1, datasets, buf);
849 sprintf(command, "%s -noaddpart >& /dev/null", pd->mdrun_cmd_line);
850 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
851 gmx_system_call(command);
853 /* Collect the performance data from the log file */
854 ret = parse_logfile(opt2fn("-bg",nfile,fnm), pd, nr, presteps, cpt_steps);
855 if ((presteps > 0) && (ret == eParselogResetProblem))
856 bResetProblem = TRUE;
858 if (nPMEnodes == -1)
859 sprintf(buf, "(%3d)", pd->guessPME);
860 else
861 sprintf(buf, " ");
863 /* Nicer output */
864 if (pd->PME_f_load[nr] > 0.0)
865 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
866 else
867 sprintf(str_PME_f_load, "%s", " - ");
869 /* Write the data we got to disk */
870 fprintf(fp, "%4d%s %12.3f %12.3f %s %s\n", pd->nPMEnodes, buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
871 fflush(fp);
873 /* Do some cleaning up and delete the files we do not need any more */
874 cleanup(fnm, nfile, k, nnodes, nPMEnodes, nr);
876 /* If the first run with this number of processors already failed, do not try again: */
877 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
879 fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
880 break;
883 /* Prepare for the next number of PME only nodes */
884 /* The last but one check is always without MPMD PME ... */
885 if ((nPMEnodes == minPMEnodes) && (0 != minPMEnodes))
886 nPMEnodes = 0;
887 /* ... and the last check with the guessed settings */
888 else if (nPMEnodes == 0)
889 nPMEnodes = -1;
890 else
891 nPMEnodes--;
894 if (bResetProblem)
896 sep_line(fp);
897 fprintf(fp, "WARNING: The cycle and time step counters could not be reset\n"
898 "properly. The reason could be that mpirun did not manage to\n"
899 "export the environment variable GMX_RESET_COUNTER. You might\n"
900 "have to give a special switch to mpirun for that.\n"
901 "Alternatively, you can manually set GMX_RESET_COUNTER to the\n"
902 "value normally provided by -presteps.");
903 sep_line(fp);
908 static bool is_equal(real a, real b)
910 real diff, eps=1.0e-6;
913 diff = a - b;
915 if (diff < 0.0) diff = -diff;
917 if (diff < eps)
918 return TRUE;
919 else
920 return FALSE;
924 static void check_input(
925 int nnodes,
926 int repeats,
927 int *ntprs,
928 real maxfac,
929 real maxPMEfraction,
930 real minPMEfraction,
931 real fourierspacing,
932 gmx_step_t bench_nsteps,
933 t_filenm *fnm,
934 int nfile,
935 int sim_part,
936 int presteps)
938 /* Make shure the input file exists */
939 if (!gmx_fexist(opt2fn("-s",nfile,fnm)))
940 gmx_fatal(FARGS, "File %s not found.", opt2fn("-s",nfile,fnm));
942 /* Make shure that the checkpoint file is not overwritten by the benchmark runs */
943 if ( (0 == strcmp(opt2fn("-cpi",nfile,fnm), opt2fn("-cpo",nfile,fnm)) ) && (sim_part > 1) )
944 gmx_fatal(FARGS, "Checkpoint input and output file must not be identical,\nbecause then the input file might change during the benchmarks.");
946 /* Make shure that repeats is >= 0 (if == 0, only write tpr files) */
947 if (repeats < 0)
948 gmx_fatal(FARGS, "Number of repeats < 0!");
950 /* Check whether we have enough nodes */
951 if (nnodes < 3)
952 gmx_fatal(FARGS, "Can not have separate PME nodes with 2 or less nodes, so there is nothing to optimize here.");
954 /* Automatically choose -ntpr if not set */
955 if (*ntprs < 1)
957 if (nnodes < 16)
958 *ntprs = 1;
959 else
960 *ntprs = 3;
961 fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs==1?"":"s");
963 else
965 if ( (1 == *ntprs) && !is_equal(maxfac,1.0) )
966 fprintf(stderr, "Note: Choose ntpr>1 to shift PME load to real space.\n");
969 if ( is_equal(1.0,maxfac) && (*ntprs > 1) )
971 fprintf(stderr, "WARNING: Resetting -ntpr to 1 since upscaling factor equals unity.\n Please select -fac>1 if you want to test various PME grid settings\n");
972 *ntprs = 1;
975 /* Check whether max and min fraction are within required values */
976 if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
977 gmx_fatal(FARGS, "-max must be between 0 and 0.5");
978 if (minPMEfraction > 0.5 || minPMEfraction < 0)
979 gmx_fatal(FARGS, "-min must be between 0 and 0.5");
980 if (maxPMEfraction < minPMEfraction)
981 gmx_fatal(FARGS, "-max must be larger or equal to -min");
983 /* Check whether the number of steps - if it was set - has a reasonable value */
984 if (bench_nsteps < 0)
985 gmx_fatal(FARGS, "Number of steps must be positive.");
987 if (bench_nsteps > 10000 || bench_nsteps < 100)
989 fprintf(stderr, "WARNING: steps=");
990 fprintf(stderr, gmx_step_pfmt, bench_nsteps);
991 fprintf(stderr, ". Are you shure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100)? "few" : "many");
994 if (presteps < 0)
996 gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
999 if (maxfac <= 0.0)
1000 gmx_fatal(FARGS, "Scaling factor must be larger than zero.");
1002 if (maxfac < 1.0)
1003 fprintf(stderr, "WARNING: A scaling factor smaller than one means that load will be shifted to reciprocal space. Are you shure you want that?\n");
1005 if (maxfac < 0.75 || maxfac > 1.5)
1006 fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1008 if (fourierspacing < 0)
1009 gmx_fatal(FARGS, "Please choose a positive value for fourierspacing.");
1013 /* Returns TRUE when "opt" is a switch for g_tune_pme itself */
1014 static bool is_main_switch(char *opt)
1016 if ( (0 == strcmp(opt,"-s" ))
1017 || (0 == strcmp(opt,"-p" ))
1018 || (0 == strcmp(opt,"-launch" ))
1019 || (0 == strcmp(opt,"-r" ))
1020 || (0 == strcmp(opt,"-ntpr" ))
1021 || (0 == strcmp(opt,"-max" ))
1022 || (0 == strcmp(opt,"-min" ))
1023 || (0 == strcmp(opt,"-fac" ))
1024 || (0 == strcmp(opt,"-four" ))
1025 || (0 == strcmp(opt,"-steps" ))
1026 || (0 == strcmp(opt,"-simsteps"))
1027 || (0 == strcmp(opt,"-presteps"))
1028 || (0 == strcmp(opt,"-so" )) )
1029 return TRUE;
1031 return FALSE;
1035 /* Returns TRUE when "opt" is needed at launch time */
1036 static bool is_launch_option(char *opt, bool bSet)
1038 if (bSet)
1039 return TRUE;
1040 else
1041 return FALSE;
1045 /* Returns TRUE when "opt" is needed at launch time */
1046 static bool is_launch_file(char *opt, bool bSet, bool bOptional)
1048 /* We need all options that were set on the command line
1049 * and that do not start with -b */
1050 if (0 == strncmp(opt,"-b", 2))
1051 return FALSE;
1053 if (bSet)
1054 return TRUE;
1055 else
1056 return FALSE;
1060 /* Returns TRUE when "opt" gives an option needed for the benchmarks runs */
1061 static bool is_bench_option(char *opt, bool bSet)
1063 /* If option is set, we might need it for the benchmarks.
1064 * This includes -cpi */
1065 if (bSet)
1067 if ( (0 == strcmp(opt, "-append" ))
1068 || (0 == strcmp(opt, "-addpart"))
1069 || (0 == strcmp(opt, "-maxh" )) )
1070 return FALSE;
1071 else
1072 return TRUE;
1074 else
1075 return FALSE;
1079 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1080 static bool is_bench_file(char *opt, bool bSet, bool bOptional, bool bIsOutput)
1082 /* All options starting with "-b" are for _b_enchmark files exclusively */
1083 if (0 == strncmp(opt,"-b", 2))
1085 if (!bOptional || bSet)
1086 return TRUE;
1087 else
1088 return FALSE;
1090 else
1092 if (bIsOutput)
1093 return FALSE;
1094 else
1095 if (bSet) /* These are additional input files like -cpi -ei */
1096 return TRUE;
1097 else
1098 return FALSE;
1103 /* Adds 'buf' to 'cmd_args' */
1104 static void add_to_command_line(char **cmd_args, char *buf)
1106 int len;
1109 len = strlen(*cmd_args) + strlen(buf) + 1;
1110 srenew(*cmd_args, len);
1111 strcat(*cmd_args, buf);
1115 /* Create the command line for the benchmark as well as for the real run */
1116 static void create_command_line_snippets(
1117 int nfile,
1118 t_filenm fnm[],
1119 int npargs,
1120 t_pargs *pa,
1121 char *cmd_np[], /* -np string */
1122 char *cmd_args_bench[], /* command line arguments for benchmark runs */
1123 char *cmd_args_launch[]) /* command line arguments for simulation run */
1125 int i;
1126 char *opt;
1127 char *name;
1128 #define BUFLENGTH 255
1129 char buf[BUFLENGTH];
1130 char strbuf[BUFLENGTH];
1131 char strbuf2[BUFLENGTH];
1134 /* strlen needs at least '\0' as a string: */
1135 snew(*cmd_args_bench ,1);
1136 snew(*cmd_args_launch,1);
1137 *cmd_args_launch[0]='\0';
1138 *cmd_args_bench[0] ='\0';
1141 /*******************************************/
1142 /* 1. Process other command line arguments */
1143 /*******************************************/
1144 for (i=0; i<npargs; i++)
1146 /* What command line switch are we currently processing: */
1147 opt = (char *)pa[i].option;
1149 /* Skip options not meant for mdrun */
1150 if (!is_main_switch(opt))
1152 /* Print it to a string buffer, strip away trailing whitespaces that pa_val also returns: */
1153 sprintf(strbuf2, "%s", pa_val(&pa[i],buf,BUFLENGTH));
1154 rtrim(strbuf2);
1155 sprintf(strbuf, "%s %s ", opt, strbuf2);
1156 /* We need the -np switch in an extra buffer - whether or not it was set! */
1157 if (0 == strcmp(opt,"-np"))
1159 snew(*cmd_np, strlen(strbuf)+1);
1160 sprintf(*cmd_np, " %s", strbuf);
1162 else
1164 if (is_bench_option(opt,pa[i].bSet))
1165 add_to_command_line(cmd_args_bench, strbuf);
1167 if (is_launch_option(opt,pa[i].bSet))
1168 add_to_command_line(cmd_args_launch, strbuf);
1173 /********************/
1174 /* 2. Process files */
1175 /********************/
1176 for (i=0; i<nfile; i++)
1178 opt = (char *)fnm[i].opt;
1179 name = opt2fn(opt,nfile,fnm);
1181 /* Strbuf contains the options, now let's sort out where we need that */
1182 sprintf(strbuf, "%s %s ", opt, name);
1184 /* Skip options not meant for mdrun */
1185 if (!is_main_switch(opt))
1188 if ( is_bench_file(opt, opt2bSet(opt,nfile,fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1190 /* All options starting with -b* need th 'b' removed,
1191 * therefore overwrite strbuf */
1192 if (0 == strncmp(opt, "-b", 2))
1193 sprintf(strbuf, "-%s %s ", &opt[2], name);
1195 add_to_command_line(cmd_args_bench, strbuf);
1198 if ( is_launch_file(opt,opt2bSet(opt,nfile,fnm),is_optional(&fnm[i])) )
1199 add_to_command_line(cmd_args_launch, strbuf);
1202 #undef BUFLENGTH
1206 /* Set option opt */
1207 void setopt(const char *opt,int nfile,t_filenm fnm[])
1209 int i;
1211 for(i=0; (i<nfile); i++)
1212 if (strcmp(opt,fnm[i].opt)==0)
1213 fnm[i].flag |= ffSET;
1217 static void couple_files_options(int nfile, t_filenm fnm[])
1219 int i;
1220 bool bSet,bBench;
1221 char *opt;
1222 char buf[20];
1225 for (i=0; i<nfile; i++)
1227 opt = (char *)fnm[i].opt;
1228 bSet = ((fnm[i].flag & ffSET) != 0);
1229 bBench = (0 == strncmp(opt,"-b", 2));
1231 /* Check optional files */
1232 /* If e.g. -eo is set, then -beo also needs to be set */
1233 if (is_optional(&fnm[i]) && bSet && !bBench)
1235 sprintf(buf, "-b%s", &opt[1]);
1236 setopt(buf,nfile,fnm);
1238 /* If -beo is set, then -eo also needs to be! */
1239 if (is_optional(&fnm[i]) && bSet && bBench)
1241 sprintf(buf, "-%s", &opt[2]);
1242 setopt(buf,nfile,fnm);
1248 #define BENCHSTEPS (1000)
1250 int gmx_tune_pme(int argc,char *argv[])
1252 const char *desc[] = {
1253 "For a given number [TT]-np[tt] of processors this program systematically",
1254 "times mdrun with various numbers of PME-only nodes and determines",
1255 "which setting is fastest. It will also test whether performance can",
1256 "be enhanced by shifting load from the reciprocal to the real space",
1257 "part of the Ewald sum. "
1258 "Simply pass your [TT].tpr[tt] file to g_tune_pme together with other options",
1259 "for mdrun as needed.[PAR]",
1260 "Which executables are used can be set in the environment variables",
1261 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
1262 "will be used as defaults. Note that for certain MPI frameworks you",
1263 "need to provide a machine- or hostfile. This can also be passed",
1264 "via the MPIRUN variable, e.g.",
1265 "'export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"'[PAR]",
1266 "Please call g_tune_pme with the normal options you would pass to",
1267 "mdrun and add [TT]-np[tt] for the number of processors to perform the",
1268 "tests on. You can also add [TT]-r[tt] to repeat each test several times",
1269 "to get better statistics. [PAR]",
1270 "g_tune_pme can test various real space / reciprocal space workloads",
1271 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
1272 "written with enlarged cutoffs and smaller fourier grids respectively.",
1273 "The first test (no. 0) will be with the settings from the input",
1274 "[TT].tpr[tt] file; the last test (no. [TT]ntpr[tt]) will have cutoffs multiplied",
1275 "by (and at the same time fourier grid dimensions divided by) the scaling",
1276 "factor [TT]-fac[tt] (default 1.2). The remaining [TT].tpr[tt] files will have equally",
1277 "spaced values inbetween these extremes. Note that you can set [TT]-ntpr[tt] to 1",
1278 "if you just want to find the optimal number of PME-only nodes; in that case",
1279 "your input [TT].tpr[tt] file will remain unchanged[PAR]",
1280 "For the benchmark runs, 2500 time steps should suffice for most MD",
1281 "systems. Note that dynamic load balancing needs about 100 time steps",
1282 "to adapt to local load imbalances. To get clean benchmark numbers,",
1283 "[TT]-steps[tt] should therefore always be much larger than 100![PAR]",
1284 "Example call: [TT]g_tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
1285 "After calling mdrun several times, detailed performance information",
1286 "is available in the output file perf.out. "
1287 "Note that during the benchmarks a couple of temporary files are written",
1288 "(options -b*), these will be automatically deleted after each test.[PAR]"
1289 "If you want the simulation to be started automatically with the",
1290 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
1293 int nnodes =3;
1294 int repeats=2;
1295 real maxPMEfraction=0.50;
1296 real minPMEfraction=0.25;
1297 int maxPMEnodes, minPMEnodes;
1298 real maxfac=1.2;
1299 int ntprs=0;
1300 real fs=0.0; /* 0 indicates: not set by the user */
1301 gmx_step_t bench_nsteps=BENCHSTEPS;
1302 gmx_step_t new_sim_nsteps=-1; /* -1 indicates: not set by the user */
1303 gmx_step_t cpt_steps=0; /* Step counter in .cpt input file */
1304 int presteps=100; /* Do a full cycle reset after presteps steps */
1305 bool bHaveResetCounter; /* Was the GMX_RESET_COUNTER env set by user? */
1306 int resetcount_orig; /* The value of GMX_RESET_COUNTER if set */
1308 bool bOverwrite=FALSE;
1309 bool bLaunch=FALSE;
1310 char **tpr_names=NULL;
1311 char *simulation_tpr=NULL;
1312 int best_npme, best_tpr;
1313 int sim_part = 1; /* For benchmarks with checkpoint files */
1315 /* Default program names if nothing else is found */
1316 char *cmd_mpirun=NULL, *cmd_mdrun=NULL, *cmd_export=NULL;
1317 char *cmd_args_bench, *cmd_args_launch, *cmd_np;
1320 t_perf **perfdata;
1321 t_inputinfo *info;
1322 int datasets;
1323 int i,j,k;
1324 FILE *fp;
1325 t_commrec *cr;
1327 static t_filenm fnm[] = {
1328 /* g_tune_pme */
1329 { efOUT, "-p", "perf", ffWRITE },
1330 { efTPX, "-so", "tuned", ffWRITE },
1331 /* mdrun: */
1332 { efTPX, NULL, NULL, ffREAD },
1333 { efTRN, "-o", NULL, ffWRITE },
1334 { efXTC, "-x", NULL, ffOPTWR },
1335 { efCPT, "-cpi", NULL, ffOPTRD },
1336 { efCPT, "-cpo", NULL, ffOPTWR },
1337 { efSTO, "-c", "confout", ffWRITE },
1338 { efEDR, "-e", "ener", ffWRITE },
1339 { efLOG, "-g", "md", ffWRITE },
1340 { efXVG, "-dhdl", "dhdl", ffOPTWR },
1341 { efXVG, "-field", "field", ffOPTWR },
1342 { efXVG, "-table", "table", ffOPTRD },
1343 { efXVG, "-tablep", "tablep", ffOPTRD },
1344 { efXVG, "-tableb", "table", ffOPTRD },
1345 { efTRX, "-rerun", "rerun", ffOPTRD },
1346 { efXVG, "-tpi", "tpi", ffOPTWR },
1347 { efXVG, "-tpid", "tpidist", ffOPTWR },
1348 { efEDI, "-ei", "sam", ffOPTRD },
1349 { efEDO, "-eo", "sam", ffOPTWR },
1350 { efGCT, "-j", "wham", ffOPTRD },
1351 { efGCT, "-jo", "bam", ffOPTWR },
1352 { efXVG, "-ffout", "gct", ffOPTWR },
1353 { efXVG, "-devout", "deviatie", ffOPTWR },
1354 { efXVG, "-runav", "runaver", ffOPTWR },
1355 { efXVG, "-px", "pullx", ffOPTWR },
1356 { efXVG, "-pf", "pullf", ffOPTWR },
1357 { efMTX, "-mtx", "nm", ffOPTWR },
1358 { efNDX, "-dn", "dipole", ffOPTWR },
1359 /* Output files that are deleted after each benchmark run */
1360 { efTRN, "-bo", "bench", ffWRITE },
1361 { efXTC, "-bx", "bench", ffWRITE },
1362 { efCPT, "-bcpo", "bench", ffWRITE },
1363 { efSTO, "-bc", "bench", ffWRITE },
1364 { efEDR, "-be", "bench", ffWRITE },
1365 { efLOG, "-bg", "bench", ffWRITE },
1366 { efEDO, "-beo", "bench", ffOPTWR },
1367 { efXVG, "-bdhdl", "benchdhdl",ffOPTWR },
1368 { efXVG, "-bfield", "benchfld" ,ffOPTWR },
1369 { efXVG, "-btpi", "benchtpi", ffOPTWR },
1370 { efXVG, "-btpid", "benchtpid",ffOPTWR },
1371 { efGCT, "-bjo", "bench", ffOPTWR },
1372 { efXVG, "-bffout", "benchgct", ffOPTWR },
1373 { efXVG, "-bdevout","benchdev", ffOPTWR },
1374 { efXVG, "-brunav", "benchrnav",ffOPTWR },
1375 { efXVG, "-bpx", "benchpx", ffOPTWR },
1376 { efXVG, "-bpf", "benchpf", ffOPTWR },
1377 { efMTX, "-bmtx", "benchn", ffOPTWR },
1378 { efNDX, "-bdn", "bench", ffOPTWR }
1381 /* Command line options of mdrun */
1382 bool bDDBondCheck = TRUE;
1383 bool bDDBondComm = TRUE;
1384 bool bSumEner = TRUE;
1385 bool bVerbose = FALSE;
1386 bool bCompact = TRUE;
1387 bool bSepPot = FALSE;
1388 bool bRerunVSite = FALSE;
1389 bool bIonize = FALSE;
1390 bool bConfout = TRUE;
1391 bool bReproducible = FALSE;
1393 int nmultisim=0;
1394 int repl_ex_nst=0;
1395 int repl_ex_seed=-1;
1396 int nstepout=100;
1397 int nthreads=1;
1400 const char *ddno_opt[ddnoNR+1] =
1401 { NULL, "interleave", "pp_pme", "cartesian", NULL };
1402 const char *dddlb_opt[] =
1403 { NULL, "auto", "no", "yes", NULL };
1404 real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
1405 char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
1406 #define STD_CPT_PERIOD (15.0)
1407 real cpt_period=STD_CPT_PERIOD,max_hours=-1;
1408 bool bAppendFiles=FALSE,bAddPart=TRUE;
1411 t_pargs pa[] = {
1412 /***********************/
1413 /* g_tune_pme options: */
1414 /***********************/
1415 { "-np", FALSE, etINT, {&nnodes},
1416 "Number of nodes to run the tests on (at least 3)" },
1417 { "-r", FALSE, etINT, {&repeats},
1418 "Repeat each test this often" },
1419 { "-max", FALSE, etREAL, {&maxPMEfraction},
1420 "Max fraction of PME nodes to test with" },
1421 { "-min", FALSE, etREAL, {&minPMEfraction},
1422 "Min fraction of PME nodes to test with" },
1423 { "-fac", FALSE, etREAL, {&maxfac},
1424 "Max upscaling factor for rcoulomb (= downscaling factor for the fourier grid)" },
1425 { "-ntpr", FALSE, etINT, {&ntprs},
1426 "Number of tpr files to benchmark. Create these many files with scaling factors ranging from 1.0 to fac. If < 1, automatically choose the number of tpr files to test" },
1427 { "-four", FALSE, etREAL, {&fs},
1428 "Fourierspacing that was chosen to create the input tpr file" },
1429 { "-steps", FALSE, etGMX_STEP_T, {&bench_nsteps},
1430 "Use these many steps for the benchmarks" },
1431 { "-presteps", FALSE, etINT, {&presteps},
1432 "Let dlb equilibrate these many steps before timings are taken" },
1433 { "-simsteps", FALSE, etGMX_STEP_T, {&new_sim_nsteps},
1434 "If non-negative, perform these many steps in the real run (overwrite nsteps from tpr, add cpt steps)" },
1435 { "-launch", FALSE, etBOOL, {&bLaunch},
1436 "Lauch the real simulation after optimization" },
1437 /******************/
1438 /* mdrun options: */
1439 /******************/
1440 { "-nt", FALSE, etINT, {&nthreads},
1441 "HIDDENNumber of threads to start on each node" },
1442 { "-ddorder", FALSE, etENUM, {ddno_opt},
1443 "DD node order" },
1444 { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
1445 "Check for all bonded interactions with DD" },
1446 { "-ddbondcomm",FALSE, etBOOL, {&bDDBondComm},
1447 "HIDDENUse special bonded atom communication when -rdd > cut-off" },
1448 { "-rdd", FALSE, etREAL, {&rdd},
1449 "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
1450 { "-rcon", FALSE, etREAL, {&rconstr},
1451 "Maximum distance for P-LINCS (nm), 0 is estimate" },
1452 { "-dlb", FALSE, etENUM, {dddlb_opt},
1453 "Dynamic load balancing (with DD)" },
1454 { "-dds", FALSE, etREAL, {&dlb_scale},
1455 "Minimum allowed dlb scaling of the DD cell size" },
1456 { "-ddcsx", FALSE, etSTR, {&ddcsx},
1457 "HIDDENThe DD cell sizes in x" },
1458 { "-ddcsy", FALSE, etSTR, {&ddcsy},
1459 "HIDDENThe DD cell sizes in y" },
1460 { "-ddcsz", FALSE, etSTR, {&ddcsz},
1461 "HIDDENThe DD cell sizes in z" },
1462 { "-sum", FALSE, etBOOL, {&bSumEner},
1463 "Sum the energies at every step" },
1464 { "-v", FALSE, etBOOL, {&bVerbose},
1465 "Be loud and noisy" },
1466 { "-compact", FALSE, etBOOL, {&bCompact},
1467 "Write a compact log file" },
1468 { "-seppot", FALSE, etBOOL, {&bSepPot},
1469 "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
1470 { "-pforce", FALSE, etREAL, {&pforce},
1471 "Print all forces larger than this (kJ/mol nm)" },
1472 { "-reprod", FALSE, etBOOL, {&bReproducible},
1473 "Try to avoid optimizations that affect binary reproducibility" },
1474 { "-cpt", FALSE, etREAL, {&cpt_period},
1475 "Checkpoint interval (minutes)" },
1476 { "-append", FALSE, etBOOL, {&bAppendFiles},
1477 "Append to previous output files when continuing from checkpoint" },
1478 { "-addpart", FALSE, etBOOL, {&bAddPart},
1479 "Add the simulation part number to all output files when continuing from checkpoint" },
1480 { "-maxh", FALSE, etREAL, {&max_hours},
1481 "Terminate after 0.99 times this time (hours)" },
1482 { "-multi", FALSE, etINT, {&nmultisim},
1483 "Do multiple simulations in parallel" },
1484 { "-replex", FALSE, etINT, {&repl_ex_nst},
1485 "Attempt replica exchange every # steps" },
1486 { "-reseed", FALSE, etINT, {&repl_ex_seed},
1487 "Seed for replica exchange, -1 is generate a seed" },
1488 { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
1489 "HIDDENRecalculate virtual site coordinates with -rerun" },
1490 { "-ionize", FALSE, etBOOL, {&bIonize},
1491 "Do a simulation including the effect of an X-Ray bombardment on your system" },
1492 { "-confout", FALSE, etBOOL, {&bConfout},
1493 "HIDDENWrite the last configuration with -c and force checkpointing at the last step" },
1494 { "-stepout", FALSE, etINT, {&nstepout},
1495 "HIDDENFrequency of writing the remaining runtime" },
1499 #define NFILE asize(fnm)
1501 CopyRight(stderr,argv[0]);
1503 parse_common_args(&argc,argv,PCA_NOEXIT_ON_ARGS,
1504 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL);
1506 /* Automatically set -beo options if -eo is set etc. */
1507 couple_files_options(NFILE,fnm);
1509 /* Construct the command line arguments for benchmark runs
1510 * as well as for the simulation run
1512 create_command_line_snippets(NFILE,fnm,asize(pa),pa,
1513 &cmd_np, &cmd_args_bench, &cmd_args_launch);
1515 /* Read in checkpoint file if requested */
1516 sim_part = 1;
1517 if(opt2bSet("-cpi",NFILE,fnm))
1519 snew(cr,1);
1520 cr->duty=DUTY_PP; /* makes the following routine happy */
1521 read_checkpoint_simulation_part(opt2fn("-cpi",NFILE,fnm),&sim_part,&cpt_steps,cr);
1522 sfree(cr);
1523 sim_part++;
1524 /* sim_part will now be 1 if no checkpoint file was found */
1525 if (sim_part<=1)
1526 gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi",NFILE,fnm));
1529 /* Open performance output file and write header info */
1530 fp = ffopen(opt2fn("-p",NFILE,fnm),"w");
1532 /* Make a quick consistency check of command line parameters */
1533 check_input(nnodes, repeats, &ntprs, maxfac, maxPMEfraction, minPMEfraction,
1534 fs, bench_nsteps, fnm, NFILE, sim_part, presteps);
1536 /* Determine max and min number of PME nodes to test: */
1537 maxPMEnodes = floor(maxPMEfraction*nnodes);
1538 minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
1539 fprintf(stdout, "Will try runs with %d ", minPMEnodes);
1540 if (maxPMEnodes != minPMEnodes)
1541 fprintf(stdout, "- %d ", maxPMEnodes);
1542 fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
1544 /* Get the commands we need to set up the runs from environment variables */
1545 get_program_paths(&cmd_mpirun, &cmd_mdrun, &cmd_export, repeats);
1547 /* Set the GMX_RESET_COUNTERS environment variable */
1548 if (presteps > 0)
1549 counters_set_env(presteps, &resetcount_orig, &bHaveResetCounter);
1551 /* Print some header info to file */
1552 sep_line(fp);
1553 fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n");
1554 sep_line(fp);
1555 fprintf(fp, "%s for Gromacs %s\n", ShortProgram(),GromacsVersion());
1556 fprintf(fp, "Number of nodes : %d\n", nnodes);
1557 fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun);
1558 fprintf(fp, "Exporting env with : %s\n", cmd_export);
1559 fprintf(fp, "The mdrun command is : %s\n", cmd_mdrun);
1560 fprintf(fp, "Input file is : %s\n", opt2fn("-s",NFILE,fnm));
1561 if (fs > 0.0)
1562 fprintf(fp, "Basic fourierspacing : %f\n", fs);
1563 fprintf(fp, "mdrun args benchmarks : %s\n", cmd_args_bench);
1564 fprintf(fp, "Benchmark steps : ");
1565 fprintf(fp, gmx_step_pfmt, bench_nsteps);
1566 fprintf(fp, "\n");
1567 fprintf(fp, " + presteps : %d\n", presteps);
1568 if (sim_part > 1)
1570 fprintf(fp, "Checkpoint time step : ");
1571 fprintf(fp, gmx_step_pfmt, cpt_steps);
1572 fprintf(fp, "\n");
1574 if (bLaunch)
1575 fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
1576 if (new_sim_nsteps >= 0)
1578 bOverwrite = TRUE;
1579 fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so",NFILE,fnm));
1580 fprintf(stderr, gmx_step_pfmt, new_sim_nsteps+cpt_steps);
1581 fprintf(stderr, " steps.\n");
1582 fprintf(fp, "Simulation steps : ");
1583 fprintf(fp, gmx_step_pfmt, new_sim_nsteps);
1584 fprintf(fp, "\n");
1586 if (repeats > 1)
1587 fprintf(fp, "Doing %d repeats for each test.\n", repeats);
1589 /* Allocate memory for the inputinfo struct: */
1590 snew(info, 1);
1591 info->nr_inputfiles = ntprs;
1592 for (i=0; i<ntprs; i++)
1594 snew(info->r_coulomb , ntprs);
1595 snew(info->r_vdW , ntprs);
1596 snew(info->fourier_nx, ntprs);
1597 snew(info->fourier_ny, ntprs);
1598 snew(info->fourier_nz, ntprs);
1599 snew(info->fourier_sp, ntprs);
1601 /* Make alternative tpr files to test: */
1602 snew(tpr_names, ntprs);
1603 for (i=0; i<ntprs; i++)
1604 snew(tpr_names[i], STRLEN);
1606 make_benchmark_tprs(opt2fn("-s",NFILE,fnm), tpr_names, bench_nsteps+presteps, cpt_steps, maxfac, ntprs, fs, info, fp);
1608 if (repeats == 0)
1610 fprintf(stderr, "Nothing more to do.\n");
1611 fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1612 thanx(stderr);
1613 return 0;
1616 /* Memory allocation for performance data */
1617 datasets = maxPMEnodes - minPMEnodes + 3;
1618 if (0 == minPMEnodes)
1619 datasets--;
1621 /* Allocate one dataset for each tpr input file: */
1622 snew(perfdata, ntprs);
1624 /* Allocate a subset for each test with a given number of PME nodes */
1625 for (k=0; k<ntprs; k++)
1627 snew(perfdata[k], datasets);
1628 for (i=0; i<datasets; i++)
1630 for (j=0; j<repeats; j++)
1632 snew(perfdata[k][i].Gcycles , repeats);
1633 snew(perfdata[k][i].ns_per_day, repeats);
1634 snew(perfdata[k][i].PME_f_load, repeats);
1639 /********************************************************************************/
1640 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
1641 /********************************************************************************/
1642 do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, datasets, perfdata, repeats, nnodes, ntprs,
1643 cmd_mpirun, cmd_export, cmd_mdrun, cmd_args_bench, fnm, NFILE, sim_part, presteps, cpt_steps);
1645 /* Restore original environment */
1646 if (presteps > 0)
1647 counters_restore_env(resetcount_orig, bHaveResetCounter);
1649 /* Analyse the results and give a suggestion for optimal settings: */
1650 analyze_data(fp, perfdata, ntprs, datasets, repeats, info, &best_tpr, &best_npme);
1652 /* Take the best-performing tpr file and enlarge nsteps to original value */
1653 if ((best_tpr > 0) || bOverwrite)
1655 simulation_tpr = opt2fn("-so",NFILE,fnm);
1656 modify_PMEsettings(bOverwrite? (new_sim_nsteps+cpt_steps):info->orig_sim_steps, tpr_names[best_tpr], simulation_tpr);
1658 else
1659 simulation_tpr = opt2fn("-s",NFILE,fnm);
1661 /* Now start the real simulation if the user requested it ... */
1662 launch_simulation(bLaunch, fp, cmd_mpirun, cmd_mdrun, cmd_args_launch, simulation_tpr, nnodes, best_npme);
1663 fclose(fp);
1665 /* ... or simply print the performance results to screen: */
1666 if (!bLaunch)
1668 FILE *fp = fopen(opt2fn("-p", NFILE, fnm),"r");
1669 char buf[STRLEN];
1670 fprintf(stdout,"\n\n");
1672 while( fgets(buf,STRLEN-1,fp) != NULL )
1674 fprintf(stdout,"%s",buf);
1676 fclose(fp);
1677 fprintf(stdout,"\n\n");
1678 thanx(stderr);
1681 return 0;