src/tools/gmx_tune_pme.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  10  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  11  * Copyright (c) 2001-2008, The GROMACS development team,
  12  * check out http://www.gromacs.org for more information.
  13
  14  * This program is free software; you can redistribute it and/or
  15  * modify it under the terms of the GNU General Public License
  16  * as published by the Free Software Foundation; either version 2
  17  * of the License, or (at your option) any later version.
  18  *
  19  * If you want to redistribute modifications, please consider that
  20  * scientific software is very special. Version control is crucial -
  21  * bugs must be traceable. We will be happy to consider code for
  22  * inclusion in the official distribution, but derived work must not
  23  * be called official GROMACS. Details are found in the README & COPYING
  24  * files - if they are missing, get the official version at www.gromacs.org.
  25  *
  26  * To help us fund GROMACS development, we humbly ask that you cite
  27  * the papers on the package - you can find them in the top README file.
  28  *
  29  * For more info, check our website at http://www.gromacs.org
  30  *
  31  * And Hey:
  32  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  33  */
  34 #ifdef HAVE_CONFIG_H
  35 #include <config.h>
  36 #endif
  37
  38
  39 #include <time.h>
  40 #ifdef HAVE_SYS_TIME_H
  41 #include <sys/time.h>
  42 #endif
  43
  44
  45
  46 #include "statutil.h"
  47 #include "typedefs.h"
  48 #include "smalloc.h"
  49 #include "vec.h"
  50 #include "copyrite.h"
  51 #include "statutil.h"
  52 #include "tpxio.h"
  53 #include "string2.h"
  54 #include "readinp.h"
  55 #include "calcgrid.h"
  56 #include "checkpoint.h"
  57 #include "gmx_ana.h"
  58 #include "names.h"
  59
  60
  61
  62 enum {
  63   ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
  64 };
  65
  66 /* Enum for situations that can occur during log file parsing, the
  67  * corresponding string entries can be found in do_the_tests() in
  68  * const char* ParseLog[] */
  69 enum {
  70     eParselogOK,
  71     eParselogNotFound,
  72     eParselogNoPerfData,
  73     eParselogTerm,
  74     eParselogResetProblem,
  75     eParselogNoDDGrid,
  76     eParselogTPXVersion,
  77     eParselogNotParallel,
  78     eParselogFatal,
  79     eParselogNr
  80 };
  81
  82
  83 typedef struct
  84 {
  85     int  nPMEnodes;       /* number of PME-only nodes used in this test */
  86     int  nx, ny, nz;      /* DD grid */
  87     int  guessPME;        /* if nPMEnodes == -1, this is the guessed number of PME nodes */
  88     double *Gcycles;      /* This can contain more than one value if doing multiple tests */
  89     double Gcycles_Av;
  90     float *ns_per_day;
  91     float ns_per_day_Av;
  92     float *PME_f_load;    /* PME mesh/force load average*/
  93     float PME_f_load_Av;  /* Average average ;) ... */
  94     char *mdrun_cmd_line; /* Mdrun command line used for this test */
  95 } t_perf;
  96
  97
  98 typedef struct
  99 {
 100     int  nr_inputfiles;         /* The number of tpr and mdp input files */
 101     gmx_large_int_t orig_sim_steps;  /* Number of steps to be done in the real simulation */
 102     real *rcoulomb;             /* The coulomb radii [0...nr_inputfiles] */
 103     real *rvdw;                 /* The vdW radii */
 104     real *rlist;                /* Neighbourlist cutoff radius */
 105     real *rlistlong;
 106     int  *nkx, *nky, *nkz;
 107     real *fsx, *fsy, *fsz;      /* Fourierspacing in x,y,z dimension */
 108 } t_inputinfo;
 109
 110
 111 static void sep_line(FILE *fp)
 112 {
 113     fprintf(fp, "\n------------------------------------------------------------\n");
 114 }
 115
 116
 117 /* Wrapper for system calls */
 118 static int gmx_system_call(char *command)
 119 {
 120 #ifdef GMX_NO_SYSTEM
 121     gmx_fatal(FARGS,"No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n",command);
 122 #else
 123     return ( system(command) );
 124 #endif
 125 }
 126
 127
 128 /* Check if string starts with substring */
 129 static gmx_bool str_starts(const char *string, const char *substring)
 130 {
 131     return ( strncmp(string, substring, strlen(substring)) == 0);
 132 }
 133
 134
 135 static void cleandata(t_perf *perfdata, int test_nr)
 136 {
 137     perfdata->Gcycles[test_nr]    = 0.0;
 138     perfdata->ns_per_day[test_nr] = 0.0;
 139     perfdata->PME_f_load[test_nr] = 0.0;
 140
 141     return;
 142 }
 143
 144
 145 static gmx_bool is_equal(real a, real b)
 146 {
 147     real diff, eps=1.0e-7;
 148
 149
 150     diff = a - b;
 151
 152     if (diff < 0.0) diff = -diff;
 153
 154     if (diff < eps)
 155         return TRUE;
 156     else
 157         return FALSE;
 158 }
 159
 160
 161 static void finalize(const char *fn_out)
 162 {
 163     char buf[STRLEN];
 164     FILE *fp;
 165
 166
 167     fp = fopen(fn_out,"r");
 168     fprintf(stdout,"\n\n");
 169
 170     while( fgets(buf,STRLEN-1,fp) != NULL )
 171     {
 172         fprintf(stdout,"%s",buf);
 173     }
 174     fclose(fp);
 175     fprintf(stdout,"\n\n");
 176     thanx(stderr);
 177 }
 178
 179
 180 enum {eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr};
 181
 182 static int parse_logfile(const char *logfile, const char *errfile,
 183         t_perf *perfdata, int test_nr, int presteps, gmx_large_int_t cpt_steps,
 184         int nnodes)
 185 {
 186     FILE  *fp;
 187     char  line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
 188     const char matchstrdd[]="Domain decomposition grid";
 189     const char matchstrcr[]="resetting all time and cycle counters";
 190     const char matchstrbal[]="Average PME mesh/force load:";
 191     const char matchstring[]="R E A L   C Y C L E   A N D   T I M E   A C C O U N T I N G";
 192     const char errSIG[]="signal, stopping at the next";
 193     int   iFound;
 194     int   procs;
 195     float  dum1,dum2,dum3;
 196     int   npme;
 197     gmx_large_int_t resetsteps=-1;
 198     gmx_bool  bFoundResetStr = FALSE;
 199     gmx_bool  bResetChecked  = FALSE;
 200
 201
 202     if (!gmx_fexist(logfile))
 203     {
 204         fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
 205         cleandata(perfdata, test_nr);
 206         return eParselogNotFound;
 207     }
 208
 209     fp = fopen(logfile, "r");
 210     perfdata->PME_f_load[test_nr] = -1.0;
 211     perfdata->guessPME            = -1;
 212
 213     iFound = eFoundNothing;
 214     if (1 == nnodes)
 215         iFound = eFoundDDStr; /* Skip some case statements */
 216
 217     while (fgets(line, STRLEN, fp) != NULL)
 218     {
 219         /* Remove leading spaces */
 220         ltrim(line);
 221
 222         /* Check for TERM and INT signals from user: */
 223         if ( strstr(line, errSIG) != NULL )
 224         {
 225             fclose(fp);
 226             cleandata(perfdata, test_nr);
 227             return eParselogTerm;
 228         }
 229
 230         /* Check whether cycle resetting  worked */
 231         if (presteps > 0 && !bFoundResetStr)
 232         {
 233             if (strstr(line, matchstrcr) != NULL)
 234             {
 235                 sprintf(dumstring, "Step %s", gmx_large_int_pfmt);
 236                 sscanf(line, dumstring, &resetsteps);
 237                 bFoundResetStr = TRUE;
 238                 if (resetsteps == presteps+cpt_steps)
 239                 {
 240                     bResetChecked = TRUE;
 241                 }
 242                 else
 243                 {
 244                     sprintf(dumstring , gmx_large_int_pfmt, resetsteps);
 245                     sprintf(dumstring2, gmx_large_int_pfmt, presteps+cpt_steps);
 246                     fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
 247                                     "         though they were supposed to be reset at step %s!\n",
 248                             dumstring, dumstring2);
 249                 }
 250             }
 251         }
 252
 253         /* Look for strings that appear in a certain order in the log file: */
 254         switch(iFound)
 255         {
 256             case eFoundNothing:
 257                 /* Look for domain decomp grid and separate PME nodes: */
 258                 if (str_starts(line, matchstrdd))
 259                 {
 260                     sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
 261                             &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
 262                     if (perfdata->nPMEnodes == -1)
 263                         perfdata->guessPME = npme;
 264                     else if (perfdata->nPMEnodes != npme)
 265                         gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
 266                     iFound = eFoundDDStr;
 267                 }
 268                 /* Catch a few errors that might have occured: */
 269                 else if (str_starts(line, "There is no domain decomposition for"))
 270                 {
 271                     fclose(fp);
 272                     return eParselogNoDDGrid;
 273                 }
 274                 else if (str_starts(line, "reading tpx file"))
 275                 {
 276                     fclose(fp);
 277                     return eParselogTPXVersion;
 278                 }
 279                 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
 280                 {
 281                     fclose(fp);
 282                     return eParselogNotParallel;
 283                 }
 284                 break;
 285             case eFoundDDStr:
 286                 /* Look for PME mesh/force balance (not necessarily present, though) */
 287                 if (str_starts(line, matchstrbal))
 288                     sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
 289                 /* Look for matchstring */
 290                 if (str_starts(line, matchstring))
 291                     iFound = eFoundAccountingStr;
 292                 break;
 293             case eFoundAccountingStr:
 294                 /* Already found matchstring - look for cycle data */
 295                 if (str_starts(line, "Total  "))
 296                 {
 297                     sscanf(line,"Total %d %lf",&procs,&(perfdata->Gcycles[test_nr]));
 298                     iFound = eFoundCycleStr;
 299                 }
 300                 break;
 301             case eFoundCycleStr:
 302                 /* Already found cycle data - look for remaining performance info and return */
 303                 if (str_starts(line, "Performance:"))
 304                 {
 305                     sscanf(line,"%s %f %f %f %f", dumstring, &dum1, &dum2, &(perfdata->ns_per_day[test_nr]), &dum3);
 306                     fclose(fp);
 307                     if (bResetChecked || presteps == 0)
 308                         return eParselogOK;
 309                     else
 310                         return eParselogResetProblem;
 311                 }
 312                 break;
 313         }
 314     } /* while */
 315
 316     /* Close the log file */
 317     fclose(fp);
 318
 319     /* Check why there is no performance data in the log file.
 320      * Did a fatal errors occur? */
 321     if (gmx_fexist(errfile))
 322     {
 323         fp = fopen(errfile, "r");
 324         while (fgets(line, STRLEN, fp) != NULL)
 325         {
 326             if ( str_starts(line, "Fatal error:") )
 327             {
 328                 if (fgets(line, STRLEN, fp) != NULL)
 329                     fprintf(stderr, "\nWARNING: A fatal error has occured during this benchmark:\n"
 330                                     "%s\n", line);
 331                 fclose(fp);
 332                 cleandata(perfdata, test_nr);
 333                 return eParselogFatal;
 334             }
 335         }
 336         fclose(fp);
 337     }
 338     else
 339     {
 340         fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
 341     }
 342
 343     /* Giving up ... we could not find out why there is no performance data in
 344      * the log file. */
 345     fprintf(stdout, "No performance data in log file.\n");
 346     cleandata(perfdata, test_nr);
 347
 348     return eParselogNoPerfData;
 349 }
 350
 351
 352 static gmx_bool analyze_data(
 353         FILE        *fp,
 354         const char  *fn,
 355         t_perf      **perfdata,
 356         int         nnodes,
 357         int         ntprs,
 358         int         ntests,
 359         int         nrepeats,
 360         t_inputinfo *info,
 361         int         *index_tpr,    /* OUT: Nr of mdp file with best settings */
 362         int         *npme_optimal) /* OUT: Optimal number of PME nodes */
 363 {
 364     int  i,j,k;
 365     int line=0, line_win=-1;
 366     int  k_win=-1, i_win=-1, winPME;
 367     double s=0.0;  /* standard deviation */
 368     t_perf *pd;
 369     char strbuf[STRLEN];
 370     char str_PME_f_load[13];
 371     gmx_bool bCanUseOrigTPR;
 372     gmx_bool bRefinedCoul, bRefinedVdW, bRefinedGrid;
 373
 374
 375     if (nrepeats > 1)
 376     {
 377         sep_line(fp);
 378         fprintf(fp, "Summary of successful runs:\n");
 379         fprintf(fp, "Line tpr PME nodes  Gcycles Av.     Std.dev.       ns/day        PME/f");
 380         if (nnodes > 1)
 381             fprintf(fp, "    DD grid");
 382         fprintf(fp, "\n");
 383     }
 384
 385
 386     for (k=0; k<ntprs; k++)
 387     {
 388         for (i=0; i<ntests; i++)
 389         {
 390             /* Select the right dataset: */
 391             pd = &(perfdata[k][i]);
 392
 393             pd->Gcycles_Av    = 0.0;
 394             pd->PME_f_load_Av = 0.0;
 395             pd->ns_per_day_Av = 0.0;
 396
 397             if (pd->nPMEnodes == -1)
 398                 sprintf(strbuf, "(%3d)", pd->guessPME);
 399             else
 400                 sprintf(strbuf, "     ");
 401
 402             /* Get the average run time of a setting */
 403             for (j=0; j<nrepeats; j++)
 404             {
 405                 pd->Gcycles_Av    += pd->Gcycles[j];
 406                 pd->PME_f_load_Av += pd->PME_f_load[j];
 407             }
 408             pd->Gcycles_Av    /= nrepeats;
 409             pd->PME_f_load_Av /= nrepeats;
 410
 411             for (j=0; j<nrepeats; j++)
 412             {
 413                 if (pd->ns_per_day[j] > 0.0)
 414                     pd->ns_per_day_Av += pd->ns_per_day[j];
 415                 else
 416                 {
 417                     /* Somehow the performance number was not aquired for this run,
 418                      * therefor set the average to some negative value: */
 419                     pd->ns_per_day_Av = -1.0f*nrepeats;
 420                     break;
 421                 }
 422             }
 423             pd->ns_per_day_Av /= nrepeats;
 424
 425             /* Nicer output: */
 426             if (pd->PME_f_load_Av > 0.0)
 427                 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
 428             else
 429                 sprintf(str_PME_f_load, "%s", "         -  ");
 430
 431
 432             /* We assume we had a successful run if both averages are positive */
 433             if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
 434             {
 435                 /* Output statistics if repeats were done */
 436                 if (nrepeats > 1)
 437                 {
 438                     /* Calculate the standard deviation */
 439                     s = 0.0;
 440                     for (j=0; j<nrepeats; j++)
 441                         s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
 442                     s /= (nrepeats - 1);
 443                     s = sqrt(s);
 444
 445                     fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
 446                             line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
 447                             pd->ns_per_day_Av, str_PME_f_load);
 448                     if (nnodes > 1)
 449                         fprintf(fp, "  %3d %3d %3d", pd->nx, pd->ny, pd->nz);
 450                     fprintf(fp, "\n");
 451                 }
 452                 /* Store the index of the best run found so far in 'winner': */
 453                 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
 454                 {
 455                     k_win = k;
 456                     i_win = i;
 457                     line_win = line;
 458                 }
 459                 line++;
 460             }
 461         }
 462     }
 463
 464     if (k_win == -1)
 465         gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
 466
 467     sep_line(fp);
 468
 469     winPME = perfdata[k_win][i_win].nPMEnodes;
 470
 471     if (1 == ntests)
 472     {
 473         /* We stuck to a fixed number of PME-only nodes */
 474         sprintf(strbuf, "settings No. %d", k_win);
 475     }
 476     else
 477     {
 478         /* We have optimized the number of PME-only nodes */
 479         if (winPME == -1)
 480             sprintf(strbuf, "%s", "the automatic number of PME nodes");
 481         else
 482             sprintf(strbuf, "%d PME nodes", winPME);
 483     }
 484     fprintf(fp, "Best performance was achieved with %s", strbuf);
 485     if ((nrepeats > 1) && (ntests > 1))
 486         fprintf(fp, " (see line %d)", line_win);
 487     fprintf(fp, "\n");
 488
 489     /* Only mention settings if they were modified: */
 490     bRefinedCoul = !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]);
 491     bRefinedVdW  = !is_equal(info->rvdw[k_win]    , info->rvdw[0]    );
 492     bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] &&
 493                      info->nky[k_win] == info->nky[0] &&
 494                      info->nkz[k_win] == info->nkz[0]);
 495
 496     if (bRefinedCoul || bRefinedVdW || bRefinedGrid)
 497     {
 498         fprintf(fp, "Optimized PME settings:\n");
 499         bCanUseOrigTPR = FALSE;
 500     }
 501     else
 502     {
 503         bCanUseOrigTPR = TRUE;
 504     }
 505
 506     if (bRefinedCoul)
 507         fprintf(fp, "   New Coulomb radius: %f nm (was %f nm)\n", info->rcoulomb[k_win], info->rcoulomb[0]);
 508
 509     if (bRefinedVdW)
 510         fprintf(fp, "   New Van der Waals radius: %f nm (was %f nm)\n", info->rvdw[k_win], info->rvdw[0]);
 511
 512     if (bRefinedGrid)
 513         fprintf(fp, "   New Fourier grid xyz: %d %d %d (was %d %d %d)\n", info->nkx[k_win], info->nky[k_win], info->nkz[k_win],
 514                                                                           info->nkx[0], info->nky[0], info->nkz[0]);
 515
 516     if (bCanUseOrigTPR && ntprs > 1)
 517         fprintf(fp, "and original PME settings.\n");
 518
 519     fflush(fp);
 520
 521     /* Return the index of the mdp file that showed the highest performance
 522      * and the optimal number of PME nodes */
 523     *index_tpr    = k_win;
 524     *npme_optimal = winPME;
 525
 526     return bCanUseOrigTPR;
 527 }
 528
 529
 530 /* Get the commands we need to set up the runs from environment variables */
 531 static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char cmd_np[],
 532                               char *cmd_mdrun[], int repeats)
 533 {
 534     char *command=NULL;
 535     char *cp;
 536     char *cp2;
 537     char line[STRLEN];
 538     FILE *fp;
 539     const char def_mpirun[] = "mpirun";
 540     const char def_mdrun[]  = "mdrun";
 541     const char filename[]   = "benchtest.log";
 542     const char match_mpi[]  = "NNODES=";
 543     const char match_mdrun[]= "Program: ";
 544     const char empty_mpirun[] = "";
 545     gmx_bool  bMdrun = FALSE;
 546     gmx_bool  bMPI   = FALSE;
 547
 548
 549     /* Get the commands we need to set up the runs from environment variables */
 550     if (!bThreads)
 551     {
 552         if ( (cp = getenv("MPIRUN")) != NULL)
 553             *cmd_mpirun = strdup(cp);
 554         else
 555             *cmd_mpirun = strdup(def_mpirun);
 556     }
 557     else
 558     {
 559         *cmd_mpirun = strdup(empty_mpirun);
 560     }
 561
 562     if ( (cp = getenv("MDRUN" )) != NULL )
 563         *cmd_mdrun  = strdup(cp);
 564     else
 565         *cmd_mdrun  = strdup(def_mdrun);
 566
 567
 568     /* If no simulations have to be performed, we are done here */
 569     if (repeats <= 0)
 570         return;
 571
 572     /* Run a small test to see whether mpirun + mdrun work  */
 573     fprintf(stdout, "Making sure that mdrun can be executed. ");
 574     if (bThreads)
 575     {
 576         snew(command, strlen(*cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
 577         sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", *cmd_mdrun, cmd_np, filename);
 578     }
 579     else
 580     {
 581         snew(command, strlen(*cmd_mpirun) + strlen(cmd_np) + strlen(*cmd_mdrun) + strlen(filename) + 50);
 582         sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", *cmd_mpirun, cmd_np, *cmd_mdrun, filename);
 583     }
 584     fprintf(stdout, "Trying '%s' ... ", command);
 585     make_backup(filename);
 586     gmx_system_call(command);
 587
 588     /* Check if we find the characteristic string in the output: */
 589     if (!gmx_fexist(filename))
 590         gmx_fatal(FARGS, "Output from test run could not be found.");
 591
 592     fp = fopen(filename, "r");
 593     /* We need to scan the whole output file, since sometimes the queuing system
 594      * also writes stuff to stdout/err */
 595     while ( !feof(fp) )
 596     {
 597         cp2=fgets(line, STRLEN, fp);
 598         if (cp2!=NULL)
 599         {
 600             if ( str_starts(line, match_mdrun) )
 601                 bMdrun = TRUE;
 602             if ( str_starts(line, match_mpi) )
 603                 bMPI = TRUE;
 604         }
 605     }
 606     fclose(fp);
 607
 608     if (bThreads)
 609     {
 610         if (bMPI)
 611         {
 612             gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
 613                     "(%s)\n"
 614                     "seems to have been compiled with MPI instead.",
 615                     *cmd_mdrun);
 616         }
 617     }
 618     else
 619     {
 620         if (bMdrun && !bMPI)
 621         {
 622             gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
 623                     "(%s)\n"
 624                     "seems to have been compiled without MPI support.",
 625                     *cmd_mdrun);
 626         }
 627     }
 628
 629     if (!bMdrun)
 630     {
 631         gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
 632                 filename);
 633     }
 634
 635     fprintf(stdout, "passed.\n");
 636
 637     /* Clean up ... */
 638     remove(filename);
 639     sfree(command);
 640 }
 641
 642
 643 static void launch_simulation(
 644         gmx_bool bLaunch,           /* Should the simulation be launched? */
 645         FILE *fp,               /* General log file */
 646         gmx_bool bThreads,          /* whether to use threads */
 647         char *cmd_mpirun,       /* Command for mpirun */
 648         char *cmd_np,           /* Switch for -np or -nt or empty */
 649         char *cmd_mdrun,        /* Command for mdrun */
 650         char *args_for_mdrun,   /* Arguments for mdrun */
 651         const char *simulation_tpr,   /* This tpr will be simulated */
 652         int  nnodes,            /* Number of nodes to run on */
 653         int  nPMEnodes)         /* Number of PME nodes to use */
 654 {
 655     char  *command;
 656
 657
 658     /* Make enough space for the system call command,
 659      * (100 extra chars for -npme ... etc. options should suffice): */
 660     snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
 661
 662     /* Note that the -passall options requires args_for_mdrun to be at the end
 663      * of the command line string */
 664     if (bThreads)
 665     {
 666         sprintf(command, "%s%s-npme %d -s %s %s",
 667                 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
 668     }
 669     else
 670     {
 671         sprintf(command, "%s%s%s -npme %d -s %s %s",
 672                 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
 673     }
 674
 675     fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch? "Using":"Please use", command);
 676     sep_line(fp);
 677     fflush(fp);
 678
 679     /* Now the real thing! */
 680     if (bLaunch)
 681     {
 682         fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
 683         sep_line(stdout);
 684         fflush(stdout);
 685         gmx_system_call(command);
 686         thanx(fp);
 687     }
 688 }
 689
 690
 691 static void modify_PMEsettings(
 692         gmx_large_int_t simsteps,  /* Set this value as number of time steps */
 693         const char *fn_best_tpr,   /* tpr file with the best performance */
 694         const char *fn_sim_tpr)    /* name of tpr file to be launched */
 695 {
 696     t_inputrec   *ir;
 697     t_state      state;
 698     gmx_mtop_t   mtop;
 699     char         buf[200];
 700
 701     snew(ir,1);
 702     read_tpx_state(fn_best_tpr,ir,&state,NULL,&mtop);
 703
 704     /* Set nsteps to the right value */
 705     ir->nsteps = simsteps;
 706
 707     /* Write the tpr file which will be launched */
 708     sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, gmx_large_int_pfmt);
 709     fprintf(stdout,buf,ir->nsteps);
 710     fflush(stdout);
 711     write_tpx_state(fn_sim_tpr,ir,&state,&mtop);
 712
 713     sfree(ir);
 714 }
 715
 716
 717 typedef struct
 718 {
 719     int  nkx, nky, nkz;      /* Fourier grid                                  */
 720     real fac;                /* actual scaling factor                         */
 721     real fs;                 /* Fourierspacing                                */
 722 } t_pmegrid;
 723
 724
 725 static void copy_grid(t_pmegrid *ingrid, t_pmegrid *outgrid)
 726 {
 727     outgrid->nkx = ingrid->nkx;
 728     outgrid->nky = ingrid->nky;
 729     outgrid->nkz = ingrid->nkz;
 730     outgrid->fac = ingrid->fac;
 731     outgrid->fs  = ingrid->fs;
 732 }
 733
 734 /* Removes entry 'index' from the t_pmegrid list */
 735 static void remove_from_list(
 736         t_pmegrid gridlist[],
 737         int *nlist,           /* Length of the list                           */
 738         int index)            /* Index to remove from the list                */
 739 {
 740     int j;
 741
 742
 743     for (j = index; j < (*nlist - 1); j++)
 744     {
 745         copy_grid(&gridlist[j+1], &gridlist[j]);
 746     }
 747     *nlist = *nlist - 1;
 748 }
 749
 750
 751 /* Returns the index of the least necessary grid in the list.
 752  *
 753  * This is the one where the following conditions hold for the scaling factor:
 754  * 1. this grid has the smallest distance to its neighboring grid (distance
 755  *    measured by fac) -> this condition is true for two grids at the same time
 756  * 2. this grid (of the two) has the smaller distance to the other neighboring
 757  *    grid
 758  *
 759  * The extreme grids (the ones with the smallest and largest
 760  * scaling factor) are never thrown away.
 761  */
 762 static int get_throwaway_index(
 763         t_pmegrid gridlist[],
 764         int nlist)           /* Length of the list                           */
 765 {
 766     int i,index = -1;
 767     real dist,mindist,d_left,d_right;
 768
 769
 770     /* Find the two factors with the smallest mutual distance */
 771     mindist = GMX_FLOAT_MAX;
 772     for (i = 1; i < nlist; i++)
 773     {
 774         dist = fabs(gridlist[i].fac - gridlist[i-1].fac);
 775         if (dist < mindist)
 776         {
 777             index = i;
 778             mindist = dist;
 779         }
 780     }
 781     /* index and index-1 have the smallest mutual distance */
 782     if (index == 1)
 783     {
 784         /* Never return the first index, i.e. index == 0 */
 785         ;
 786     }
 787     else
 788     {
 789         d_left  = fabs(gridlist[index-1].fac - gridlist[index-2].fac);
 790         d_right = fabs(gridlist[index+1].fac - gridlist[index  ].fac);
 791
 792         /* Return the left index if its distance to its left neighbor is shorter
 793          * than the distance of the right index to its right neighbor */
 794         if (d_left < d_right)
 795             index--;
 796     }
 797     /* Never return the last index */
 798     if (index == nlist-1)
 799         index--;
 800
 801     return index;
 802 }
 803
 804
 805 static void make_grid_list(
 806         real fmin,            /* minimum scaling factor (downscaling fac)     */
 807         real fmax,            /* maximum scaling factor (upscaling fac)       */
 808         int *ntprs,           /* Number of tpr files to test                  */
 809         matrix box,           /* The box                                      */
 810         t_pmegrid *griduse[], /* List of grids that have to be tested         */
 811         real fs)              /* Requested fourierspacing at a scaling factor
 812                                  of unity                                     */
 813 {
 814     real req_fac,act_fac=0,act_fs,eps;
 815     rvec box_size;
 816     int i,jmin=0,d,ngridall=0;
 817     int nkx=0,nky=0,nkz=0;
 818     int nkx_old=0,nky_old=0,nkz_old=0;
 819     t_pmegrid *gridall;
 820     int gridalloc,excess;
 821
 822
 823     /* Determine length of triclinic box vectors */
 824     for(d=0; d<DIM; d++)
 825     {
 826         box_size[d] = 0;
 827         for(i=0;i<DIM;i++)
 828             box_size[d] += box[d][i]*box[d][i];
 829         box_size[d] = sqrt(box_size[d]);
 830     }
 831
 832     gridalloc = 25;
 833     snew(gridall, gridalloc);
 834
 835     fprintf(stdout, "Possible PME grid settings (apart from input file settings):\n");
 836     fprintf(stdout, "  nkx  nky  nkz  max spacing  scaling factor\n");
 837
 838     /* eps should provide a fine enough spacing not to miss any grid */
 839     if (fmax != fmin)
 840         eps = (fmax-fmin)/(100.0*(*ntprs - 1));
 841     else
 842         eps = 1.0/max( (*griduse)[0].nkz, max( (*griduse)[0].nkx, (*griduse)[0].nky ) );
 843
 844     for (req_fac = fmin; act_fac < fmax; req_fac += eps)
 845     {
 846         nkx=0;
 847         nky=0;
 848         nkz=0;
 849         calc_grid(NULL,box,fs*req_fac,&nkx,&nky,&nkz);
 850         act_fs = max(box_size[XX]/nkx,max(box_size[YY]/nky,box_size[ZZ]/nkz));
 851         act_fac = act_fs/fs;
 852         if (    ! ( nkx==nkx_old           && nky==nky_old           && nkz==nkz_old           )    /* Exclude if grid is already in list */
 853              && ! ( nkx==(*griduse)[0].nkx && nky==(*griduse)[0].nky && nkz==(*griduse)[0].nkz ) )  /* Exclude input file grid */
 854         {
 855             /* We found a new grid that will do */
 856             nkx_old = nkx;
 857             nky_old = nky;
 858             nkz_old = nkz;
 859             gridall[ngridall].nkx = nkx;
 860             gridall[ngridall].nky = nky;
 861             gridall[ngridall].nkz = nkz;
 862             gridall[ngridall].fac = act_fac;
 863             gridall[ngridall].fs  = act_fs;
 864             fprintf(stdout, "%5d%5d%5d %12f %12f\n",nkx,nky,nkz,act_fs,act_fac);
 865             ngridall++;
 866             if (ngridall >= gridalloc)
 867             {
 868                 gridalloc += 25;
 869                 srenew(gridall, gridalloc);
 870             }
 871         }
 872     }
 873
 874     /* excess is the number of grids we have to get rid of */
 875     excess = ngridall+1 - *ntprs;
 876
 877     /* If we found less grids than tpr files were requested, simply test all
 878      * the grid there are ... */
 879     if (excess < 0)
 880     {
 881         fprintf(stdout, "NOTE: You requested %d tpr files, but apart from the input file grid only the\n"
 882                         "      above listed %d suitable PME grids were found. Will test all suitable settings.\n",
 883                         *ntprs, ngridall);
 884         *ntprs = ngridall+1;
 885     }
 886     else
 887     {
 888         if (2 == *ntprs)
 889         {
 890             /* We can only keep the input tpr file plus one extra tpr file.
 891              * We make that choice depending on the values of -upfac and -downfac */
 892             if (fmin < 1.0)
 893             {
 894                 /* Keep the one with the -downfac as scaling factor. This is already
 895                  * stored in gridall[0] */
 896                 ;
 897             }
 898             else
 899             {
 900                 /* Keep the one with -upfac as scaling factor */
 901                 copy_grid(&(gridall[ngridall-1]), &(gridall[0]));
 902             }
 903
 904         }
 905         else
 906         {
 907             /* From the grid list throw away the unnecessary ones (keep the extremes) */
 908             for (i = 0; i < excess; i++)
 909             {
 910                 /* Get the index of the least necessary grid from the list ... */
 911                 jmin = get_throwaway_index(gridall, ngridall);
 912                 /* ... and remove the grid from the list */
 913                 remove_from_list(gridall, &ngridall, jmin);
 914             }
 915         }
 916     }
 917
 918     /* The remaining list contains the grids we want to test */
 919     for (i=1; i < *ntprs; i++)
 920         copy_grid(&(gridall[i-1]), &((*griduse)[i]));
 921
 922     sfree(gridall);
 923 }
 924
 925
 926 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
 927
 928 /* Make additional TPR files with more computational load for the
 929  * direct space processors: */
 930 static void make_benchmark_tprs(
 931         const char *fn_sim_tpr,     /* READ : User-provided tpr file                 */
 932         char *fn_bench_tprs[],      /* WRITE: Names of benchmark tpr files           */
 933         gmx_large_int_t benchsteps, /* Number of time steps for benchmark runs       */
 934         gmx_large_int_t statesteps, /* Step counter in checkpoint file               */
 935         real upfac,                 /* Scale rcoulomb inbetween downfac and upfac    */
 936         real downfac,
 937         int *ntprs,                 /* No. of TPRs to write, each with a different
 938                                        rcoulomb and fourierspacing. If not enough
 939                                        grids are found, ntprs is reduced accordingly */
 940         real fourierspacing,        /* Basic fourierspacing from tpr input file      */
 941         t_inputinfo *info,          /* Contains information about mdp file options   */
 942         FILE *fp)                   /* Write the output here                         */
 943 {
 944     int          i,j,d;
 945     t_inputrec   *ir;
 946     t_state      state;
 947     gmx_mtop_t   mtop;
 948     real         nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials: */
 949     char         buf[200];
 950     rvec         box_size;
 951     gmx_bool         bNote = FALSE;
 952     t_pmegrid    *pmegrid=NULL; /* Grid settings for the PME grids to test    */
 953
 954
 955     sprintf(buf, "Making benchmark tpr file%s with %s time step%s",
 956             *ntprs > 1? "s":"", gmx_large_int_pfmt, benchsteps>1?"s":"");
 957     fprintf(stdout, buf, benchsteps);
 958     if (statesteps > 0)
 959     {
 960         sprintf(buf, " (adding %s steps from checkpoint file)", gmx_large_int_pfmt);
 961         fprintf(stdout, buf, statesteps);
 962         benchsteps += statesteps;
 963     }
 964     fprintf(stdout, ".\n");
 965
 966
 967     snew(ir,1);
 968     read_tpx_state(fn_sim_tpr,ir,&state,NULL,&mtop);
 969
 970     /* Check if some kind of PME was chosen */
 971     if (EEL_PME(ir->coulombtype) == FALSE)
 972         gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
 973                 EELTYPE(eelPME));
 974
 975     /* Check if rcoulomb == rlist, which is necessary for plain PME. */
 976     if (  (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist) )
 977     {
 978         gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
 979                 EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
 980     }
 981     /* For other PME types, rcoulomb is allowed to be smaller than rlist */
 982     else if (ir->rcoulomb > ir->rlist)
 983     {
 984         gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
 985                 EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
 986     }
 987
 988     /* Reduce the number of steps for the benchmarks */
 989     info->orig_sim_steps = ir->nsteps;
 990     ir->nsteps           = benchsteps;
 991
 992     /* For PME-switch potentials, keep the radial distance of the buffer region */
 993     nlist_buffer   = ir->rlist - ir->rcoulomb;
 994
 995     /* Determine length of triclinic box vectors */
 996     for(d=0; d<DIM; d++)
 997     {
 998         box_size[d] = 0;
 999         for(i=0;i<DIM;i++)
1000             box_size[d] += state.box[d][i]*state.box[d][i];
1001         box_size[d] = sqrt(box_size[d]);
1002     }
1003
1004     /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
1005     info->fsx[0] = box_size[XX]/ir->nkx;
1006     info->fsy[0] = box_size[YY]/ir->nky;
1007     info->fsz[0] = box_size[ZZ]/ir->nkz;
1008
1009     /* Put the input grid as first entry into the grid list */
1010     snew(pmegrid, *ntprs);
1011     pmegrid[0].fac = 1.00;
1012     pmegrid[0].nkx = ir->nkx;
1013     pmegrid[0].nky = ir->nky;
1014     pmegrid[0].nkz = ir->nkz;
1015     pmegrid[0].fs  = max(box_size[ZZ]/ir->nkz, max(box_size[XX]/ir->nkx, box_size[YY]/ir->nky));
1016
1017     /* If no value for the fourierspacing was provided on the command line, we
1018      * use the reconstruction from the tpr file */
1019     if (fourierspacing <= 0)
1020     {
1021         fourierspacing = pmegrid[0].fs;
1022     }
1023
1024     fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing);
1025
1026     /* Print information about settings of which some are potentially modified: */
1027     fprintf(fp, "   Coulomb type         : %s\n", EELTYPE(ir->coulombtype));
1028     fprintf(fp, "   Grid spacing x y z   : %f %f %f\n",
1029             box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz);
1030     fprintf(fp, "   Van der Waals type   : %s\n", EVDWTYPE(ir->vdwtype));
1031     if (EVDW_SWITCHED(ir->vdwtype))
1032         fprintf(fp, "   rvdw_switch          : %f nm\n", ir->rvdw_switch);
1033     if (EPME_SWITCHED(ir->coulombtype))
1034         fprintf(fp, "   rlist                : %f nm\n", ir->rlist);
1035     if (ir->rlistlong != max_cutoff(ir->rvdw,ir->rcoulomb))
1036         fprintf(fp, "   rlistlong            : %f nm\n", ir->rlistlong);
1037
1038     /* Print a descriptive line about the tpr settings tested */
1039     fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
1040     fprintf(fp, " No.   scaling  rcoulomb");
1041     fprintf(fp, "  nkx  nky  nkz");
1042     fprintf(fp, "   spacing");
1043     if (evdwCUT == ir->vdwtype)
1044         fprintf(fp, "      rvdw");
1045     if (EPME_SWITCHED(ir->coulombtype))
1046         fprintf(fp, "     rlist");
1047     if ( ir->rlistlong != max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb)) )
1048         fprintf(fp, " rlistlong");
1049     fprintf(fp, "  tpr file\n");
1050
1051
1052     /* Get useful PME grids if requested, the actual number of entries is
1053      * returned in npmegrid */
1054     if (*ntprs > 1)
1055         make_grid_list(downfac, upfac, ntprs, state.box, &pmegrid, fourierspacing);
1056
1057     /* Loop to create the requested number of tpr input files */
1058     for (j = 0; j < *ntprs; j++)
1059     {
1060         /* The first one is the provided tpr file, just need to modify the number
1061          * of steps, so skip the following block */
1062         if (j > 0)
1063         {
1064             /* Scale the Coulomb radius */
1065             ir->rcoulomb = info->rcoulomb[0]*pmegrid[j].fac;
1066
1067             /* Adjust other radii since various conditions neet to be fulfilled */
1068             if (eelPME == ir->coulombtype)
1069             {
1070                 /* plain PME, rcoulomb must be equal to rlist */
1071                 ir->rlist = ir->rcoulomb;
1072             }
1073             else
1074             {
1075                 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
1076                 ir->rlist = ir->rcoulomb + nlist_buffer;
1077             }
1078
1079             if (evdwCUT == ir->vdwtype)
1080             {
1081                 /* For vdw cutoff, rvdw >= rlist */
1082                 ir->rvdw = max(info->rvdw[0], ir->rlist);
1083             }
1084
1085             ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
1086
1087             /* Copy the optimal grid dimensions to ir */
1088             ir->nkx = pmegrid[j].nkx;
1089             ir->nky = pmegrid[j].nky;
1090             ir->nkz = pmegrid[j].nkz;
1091
1092         } /* end of "if (j != 0)" */
1093
1094         /* for j==0: Save the original settings
1095          * for j >0: Save modified radii and fourier grids */
1096         info->rcoulomb[j]  = ir->rcoulomb;
1097         info->rvdw[j]      = ir->rvdw;
1098         info->nkx[j]       = ir->nkx;
1099         info->nky[j]       = ir->nky;
1100         info->nkz[j]       = ir->nkz;
1101         info->rlist[j]     = ir->rlist;
1102         info->rlistlong[j] = ir->rlistlong;
1103         info->fsx[j]       = pmegrid[j].fs;
1104         info->fsy[j]       = pmegrid[j].fs;
1105         info->fsz[j]       = pmegrid[j].fs;
1106
1107         /* Write the benchmark tpr file */
1108         strncpy(fn_bench_tprs[j],fn_sim_tpr,strlen(fn_sim_tpr)-strlen(".tpr"));
1109         sprintf(buf, "_bench%.2d.tpr", j);
1110         strcat(fn_bench_tprs[j], buf);
1111         fprintf(stdout,"Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
1112         fprintf(stdout, gmx_large_int_pfmt, ir->nsteps);
1113         if (j > 0)
1114             fprintf(stdout,", scaling factor %f\n", pmegrid[j].fac);
1115         else
1116             fprintf(stdout,", unmodified settings\n");
1117
1118         write_tpx_state(fn_bench_tprs[j],ir,&state,&mtop);
1119
1120         /* Write information about modified tpr settings to log file */
1121         if (j==0)
1122             fprintf(fp, "%4d%10s%10f", j, "-input-", ir->rcoulomb);
1123         else
1124             fprintf(fp, "%4d%10f%10f", j, pmegrid[j].fac, ir->rcoulomb);
1125         fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
1126         fprintf(fp, " %9f ", info->fsx[j]);
1127         if (evdwCUT == ir->vdwtype)
1128             fprintf(fp, "%10f", ir->rvdw);
1129         if (EPME_SWITCHED(ir->coulombtype))
1130             fprintf(fp, "%10f", ir->rlist);
1131         if ( info->rlistlong[0] != max_cutoff(info->rlist[0],max_cutoff(info->rvdw[0],info->rcoulomb[0])) )
1132             fprintf(fp, "%10f", ir->rlistlong);
1133         fprintf(fp, "  %-14s\n",fn_bench_tprs[j]);
1134
1135         /* Make it clear to the user that some additional settings were modified */
1136         if (   !is_equal(ir->rvdw     , info->rvdw[0])
1137             || !is_equal(ir->rlistlong, info->rlistlong[0]) )
1138         {
1139             bNote = TRUE;
1140         }
1141     }
1142     if (bNote)
1143         fprintf(fp, "\nNote that in addition to rcoulomb and the fourier grid\n"
1144                     "also other input settings were changed (see table above).\n"
1145                     "Please check if the modified settings are appropriate.\n");
1146     fflush(stdout);
1147     fflush(fp);
1148     sfree(ir);
1149 }
1150
1151
1152 /* Whether these files are written depends on tpr (or mdp) settings,
1153  * not on mdrun command line options! */
1154 static gmx_bool tpr_triggers_file(const char *opt)
1155 {
1156     if ( (0 == strcmp(opt, "-pf"))
1157       || (0 == strcmp(opt, "-px")) )
1158         return TRUE;
1159     else
1160         return FALSE;
1161 }
1162
1163
1164 /* Rename the files we want to keep to some meaningful filename and
1165  * delete the rest */
1166 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
1167                     int nPMEnodes, int nr, gmx_bool bKeepStderr)
1168 {
1169     char numstring[STRLEN];
1170     char newfilename[STRLEN];
1171     const char *fn=NULL;
1172     int i;
1173     const char *opt;
1174
1175
1176     fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
1177
1178     for (i=0; i<nfile; i++)
1179     {
1180         opt = (char *)fnm[i].opt;
1181         if ( strcmp(opt, "-p") == 0 )
1182         {
1183             /* do nothing; keep this file */
1184             ;
1185         }
1186         else if (strcmp(opt, "-bg") == 0)
1187         {
1188             /* Give the log file a nice name so one can later see which parameters were used */
1189             numstring[0] = '\0';
1190             if (nr > 0)
1191                 sprintf(numstring, "_%d", nr);
1192             sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg",nfile,fnm), k, nnodes, nPMEnodes, numstring);
1193             if (gmx_fexist(opt2fn("-bg",nfile,fnm)))
1194             {
1195                 fprintf(stdout, "renaming log file to %s\n", newfilename);
1196                 make_backup(newfilename);
1197                 rename(opt2fn("-bg",nfile,fnm), newfilename);
1198             }
1199         }
1200         else if (strcmp(opt, "-err") == 0)
1201         {
1202             /* This file contains the output of stderr. We want to keep it in
1203              * cases where there have been problems. */
1204             fn = opt2fn(opt, nfile, fnm);
1205             numstring[0] = '\0';
1206             if (nr > 0)
1207                 sprintf(numstring, "_%d", nr);
1208             sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
1209             if (gmx_fexist(fn))
1210             {
1211                 if (bKeepStderr)
1212                 {
1213                     fprintf(stdout, "Saving stderr output in %s\n", newfilename);
1214                     make_backup(newfilename);
1215                     rename(fn, newfilename);
1216                 }
1217                 else
1218                 {
1219                     fprintf(stdout, "Deleting %s\n", fn);
1220                     remove(fn);
1221                 }
1222             }
1223         }
1224         /* Delete the files which are created for each benchmark run: (options -b*) */
1225         else if ( ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt,nfile,fnm) || !is_optional(&fnm[i])) )
1226                   || tpr_triggers_file(opt) )
1227         {
1228             fn = opt2fn(opt, nfile, fnm);
1229             if (gmx_fexist(fn))
1230             {
1231                 fprintf(stdout, "Deleting %s\n", fn);
1232                 remove(fn);
1233             }
1234         }
1235     }
1236 }
1237
1238
1239 /* Returns the largest common factor of n1 and n2 */
1240 static int largest_common_factor(int n1, int n2)
1241 {
1242     int factor, nmax;
1243
1244     nmax = min(n1, n2);
1245     for (factor=nmax; factor > 0; factor--)
1246     {
1247         if ( 0==(n1 % factor) && 0==(n2 % factor) )
1248         {
1249             return(factor);
1250         }
1251     }
1252     return 0; /* one for the compiler */
1253 }
1254
1255 enum {eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr};
1256
1257 /* Create a list of numbers of PME nodes to test */
1258 static void make_npme_list(
1259         const char *npmevalues_opt,  /* Make a complete list with all
1260                            * possibilities or a short list that keeps only
1261                            * reasonable numbers of PME nodes                  */
1262         int *nentries,    /* Number of entries we put in the nPMEnodes list   */
1263         int *nPMEnodes[], /* Each entry contains the value for -npme          */
1264         int nnodes,       /* Total number of nodes to do the tests on         */
1265         int minPMEnodes,  /* Minimum number of PME nodes                      */
1266         int maxPMEnodes)  /* Maximum number of PME nodes                      */
1267 {
1268     int i,npme,npp;
1269     int min_factor=1;     /* We request that npp and npme have this minimal
1270                            * largest common factor (depends on npp)           */
1271     int nlistmax;         /* Max. list size                                   */
1272     int nlist;            /* Actual number of entries in list                 */
1273     int eNPME=0;
1274
1275
1276     /* Do we need to check all possible values for -npme or is a reduced list enough? */
1277     if ( 0 == strcmp(npmevalues_opt, "all") )
1278     {
1279         eNPME = eNpmeAll;
1280     }
1281     else if ( 0 == strcmp(npmevalues_opt, "subset") )
1282     {
1283         eNPME = eNpmeSubset;
1284     }
1285     else if ( 0 == strcmp(npmevalues_opt, "auto") )
1286     {
1287         if (nnodes <= 64)
1288             eNPME = eNpmeAll;
1289         else if (nnodes < 128)
1290             eNPME = eNpmeReduced;
1291         else
1292             eNPME = eNpmeSubset;
1293     }
1294     else
1295     {
1296         gmx_fatal(FARGS, "Unknown option for -npme in make_npme_list");
1297     }
1298
1299     /* Calculate how many entries we could possibly have (in case of -npme all) */
1300     if (nnodes > 2)
1301     {
1302         nlistmax = maxPMEnodes - minPMEnodes + 3;
1303         if (0 == minPMEnodes)
1304             nlistmax--;
1305     }
1306     else
1307         nlistmax = 1;
1308
1309     /* Now make the actual list which is at most of size nlist */
1310     snew(*nPMEnodes, nlistmax);
1311     nlist = 0; /* start counting again, now the real entries in the list */
1312     for (i = 0; i < nlistmax - 2; i++)
1313     {
1314         npme = maxPMEnodes - i;
1315         npp  = nnodes-npme;
1316         switch (eNPME)
1317         {
1318             case eNpmeAll:
1319                 min_factor = 1;
1320                 break;
1321             case eNpmeReduced:
1322                 min_factor = 2;
1323                 break;
1324             case eNpmeSubset:
1325                 /* For 2d PME we want a common largest factor of at least the cube
1326                  * root of the number of PP nodes */
1327                 min_factor = (int) pow(npp, 1.0/3.0);
1328                 break;
1329             default:
1330                 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1331                 break;
1332         }
1333         if (largest_common_factor(npp, npme) >= min_factor)
1334         {
1335             (*nPMEnodes)[nlist] = npme;
1336             nlist++;
1337         }
1338     }
1339     /* We always test 0 PME nodes and the automatic number */
1340     *nentries = nlist + 2;
1341     (*nPMEnodes)[nlist  ] =  0;
1342     (*nPMEnodes)[nlist+1] = -1;
1343
1344     fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1345     for (i=0; i<*nentries-1; i++)
1346         fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1347     fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1348 }
1349
1350
1351 /* Allocate memory to store the performance data */
1352 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1353 {
1354     int i, j, k;
1355
1356
1357     for (k=0; k<ntprs; k++)
1358     {
1359         snew(perfdata[k], datasets);
1360         for (i=0; i<datasets; i++)
1361         {
1362             for (j=0; j<repeats; j++)
1363             {
1364                 snew(perfdata[k][i].Gcycles   , repeats);
1365                 snew(perfdata[k][i].ns_per_day, repeats);
1366                 snew(perfdata[k][i].PME_f_load, repeats);
1367             }
1368         }
1369     }
1370 }
1371
1372
1373 static void do_the_tests(
1374         FILE *fp,                   /* General g_tune_pme output file         */
1375         char **tpr_names,           /* Filenames of the input files to test   */
1376         int maxPMEnodes,            /* Max fraction of nodes to use for PME   */
1377         int minPMEnodes,            /* Min fraction of nodes to use for PME   */
1378         int npme_fixed,             /* If >= -1, test fixed number of PME
1379                                      * nodes only                             */
1380         const char *npmevalues_opt, /* Which -npme values should be tested    */
1381         t_perf **perfdata,          /* Here the performace data is stored     */
1382         int *pmeentries,            /* Entries in the nPMEnodes list          */
1383         int repeats,                /* Repeat each test this often            */
1384         int nnodes,                 /* Total number of nodes = nPP + nPME     */
1385         int nr_tprs,                /* Total number of tpr files to test      */
1386         gmx_bool bThreads,          /* Threads or MPI?                        */
1387         char *cmd_mpirun,           /* mpirun command string                  */
1388         char *cmd_np,               /* "-np", "-n", whatever mpirun needs     */
1389         char *cmd_mdrun,            /* mdrun command string                   */
1390         char *cmd_args_bench,       /* arguments for mdrun in a string        */
1391         const t_filenm *fnm,        /* List of filenames from command line    */
1392         int nfile,                  /* Number of files specified on the cmdl. */
1393         int sim_part,               /* For checkpointing                      */
1394         int presteps,               /* DLB equilibration steps, is checked    */
1395         gmx_large_int_t cpt_steps)  /* Time step counter in the checkpoint    */
1396 {
1397     int     i,nr,k,ret,count=0,totaltests;
1398     int     *nPMEnodes=NULL;
1399     t_perf  *pd=NULL;
1400     int     cmdline_length;
1401     char    *command, *cmd_stub;
1402     char    buf[STRLEN];
1403     gmx_bool    bResetProblem=FALSE;
1404
1405
1406     /* This string array corresponds to the eParselog enum type at the start
1407      * of this file */
1408     const char* ParseLog[] = {"OK.",
1409                               "Logfile not found!",
1410                               "No timings, logfile truncated?",
1411                               "Run was terminated.",
1412                               "Counters were not reset properly.",
1413                               "No DD grid found for these settings.",
1414                               "TPX version conflict!",
1415                               "mdrun was not started in parallel!",
1416                               "A fatal error occured!" };
1417     char    str_PME_f_load[13];
1418
1419
1420     /* Allocate space for the mdrun command line. 100 extra characters should
1421        be more than enough for the -npme etcetera arguments */
1422     cmdline_length =  strlen(cmd_mpirun)
1423                     + strlen(cmd_np)
1424                     + strlen(cmd_mdrun)
1425                     + strlen(cmd_args_bench)
1426                     + strlen(tpr_names[0]) + 100;
1427     snew(command , cmdline_length);
1428     snew(cmd_stub, cmdline_length);
1429
1430     /* Construct the part of the command line that stays the same for all tests: */
1431     if (bThreads)
1432     {
1433         sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1434     }
1435     else
1436     {
1437         sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1438     }
1439
1440     /* Create a list of numbers of PME nodes to test */
1441     if (npme_fixed < -1)
1442     {
1443         make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1444                 nnodes, minPMEnodes, maxPMEnodes);
1445     }
1446     else
1447     {
1448         *pmeentries  = 1;
1449         snew(nPMEnodes, 1);
1450         nPMEnodes[0] = npme_fixed;
1451         fprintf(stderr, "Will use a fixed number of %d PME-only nodes.\n", nPMEnodes[0]);
1452     }
1453
1454     if (0 == repeats)
1455     {
1456         fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1457         fclose(fp);
1458         finalize(opt2fn("-p", nfile, fnm));
1459         exit(0);
1460     }
1461
1462     /* Allocate one dataset for each tpr input file: */
1463     init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1464
1465     /*****************************************/
1466     /* Main loop over all tpr files to test: */
1467     /*****************************************/
1468     totaltests = nr_tprs*(*pmeentries)*repeats;
1469     for (k=0; k<nr_tprs;k++)
1470     {
1471         fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1472         fprintf(fp, "PME nodes      Gcycles       ns/day        PME/f    Remark\n");
1473         /* Loop over various numbers of PME nodes: */
1474         for (i = 0; i < *pmeentries; i++)
1475         {
1476             pd = &perfdata[k][i];
1477
1478             /* Loop over the repeats for each scenario: */
1479             for (nr = 0; nr < repeats; nr++)
1480             {
1481                 pd->nPMEnodes = nPMEnodes[i];
1482
1483                 /* Add -npme and -s to the command line and save it. Note that
1484                  * the -passall (if set) options requires cmd_args_bench to be
1485                  * at the end of the command line string */
1486                 snew(pd->mdrun_cmd_line, cmdline_length);
1487                 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1488                         cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1489
1490                 /* Do a benchmark simulation: */
1491                 if (repeats > 1)
1492                     sprintf(buf, ", pass %d/%d", nr+1, repeats);
1493                 else
1494                     buf[0]='\0';
1495                 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1496                         (100.0*count)/totaltests,
1497                         k+1, nr_tprs, i+1, *pmeentries, buf);
1498                 make_backup(opt2fn("-err",nfile,fnm));
1499                 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err",nfile,fnm));
1500                 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1501                 gmx_system_call(command);
1502
1503                 /* Collect the performance data from the log file; also check stderr
1504                  * for fatal errors */
1505                 ret = parse_logfile(opt2fn("-bg",nfile,fnm), opt2fn("-err",nfile,fnm),
1506                         pd, nr, presteps, cpt_steps, nnodes);
1507                 if ((presteps > 0) && (ret == eParselogResetProblem))
1508                     bResetProblem = TRUE;
1509
1510                 if (-1 == pd->nPMEnodes)
1511                     sprintf(buf, "(%3d)", pd->guessPME);
1512                 else
1513                     sprintf(buf, "     ");
1514
1515                 /* Nicer output */
1516                 if (pd->PME_f_load[nr] > 0.0)
1517                     sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1518                 else
1519                     sprintf(str_PME_f_load, "%s", "         -  ");
1520
1521                 /* Write the data we got to disk */
1522                 fprintf(fp, "%4d%s %12.3f %12.3f %s    %s", pd->nPMEnodes,
1523                         buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1524                 if (! (ret==eParselogOK || ret==eParselogNoDDGrid || ret==eParselogNotFound) )
1525                     fprintf(fp, " Check %s file for problems.", ret==eParselogFatal? "err":"log");
1526                 fprintf(fp, "\n");
1527                 fflush(fp);
1528                 count++;
1529
1530                 /* Do some cleaning up and delete the files we do not need any more */
1531                 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret==eParselogFatal);
1532
1533                 /* If the first run with this number of processors already failed, do not try again: */
1534                 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1535                 {
1536                     fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1537                     count += repeats-(nr+1);
1538                     break;
1539                 }
1540             } /* end of repeats loop */
1541         } /* end of -npme loop */
1542     } /* end of tpr file loop */
1543     if (bResetProblem)
1544     {
1545         sep_line(fp);
1546         fprintf(fp, "WARNING: The cycle and time step counters could not be reset\n"
1547                     "properly. The reason could be that mpirun did not manage to\n"
1548                     "export the environment variable GMX_RESET_COUNTER. You might\n"
1549                     "have to give a special switch to mpirun for that.\n"
1550                     "Alternatively, you can manually set GMX_RESET_COUNTER to the\n"
1551                     "value normally provided by -presteps.");
1552         sep_line(fp);
1553     }
1554     sfree(command);
1555     sfree(cmd_stub);
1556 }
1557
1558
1559 static void check_input(
1560         int nnodes,
1561         int repeats,
1562         int *ntprs,
1563         real *upfac,
1564         real *downfac,
1565         real maxPMEfraction,
1566         real minPMEfraction,
1567         int  npme_fixed,
1568         real fourierspacing,
1569         gmx_large_int_t bench_nsteps,
1570         const t_filenm *fnm,
1571         int nfile,
1572         int sim_part,
1573         int presteps,
1574         int npargs,
1575         t_pargs *pa)
1576 {
1577     /* Make sure the input file exists */
1578     if (!gmx_fexist(opt2fn("-s",nfile,fnm)))
1579         gmx_fatal(FARGS, "File %s not found.", opt2fn("-s",nfile,fnm));
1580
1581     /* Make sure that the checkpoint file is not overwritten by the benchmark runs */
1582     if ( (0 == strcmp(opt2fn("-cpi",nfile,fnm), opt2fn("-cpo",nfile,fnm)) ) && (sim_part > 1) )
1583         gmx_fatal(FARGS, "Checkpoint input and output file must not be identical,\nbecause then the input file might change during the benchmarks.");
1584
1585     /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1586     if (repeats < 0)
1587         gmx_fatal(FARGS, "Number of repeats < 0!");
1588
1589     /* Check number of nodes */
1590     if (nnodes < 1)
1591         gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
1592
1593     /* Automatically choose -ntpr if not set */
1594     if (*ntprs < 1)
1595     {
1596         if (nnodes < 16)
1597             *ntprs = 1;
1598         else
1599             *ntprs = 3;
1600         fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs==1?"":"s");
1601     }
1602     else
1603     {
1604         if (1 == *ntprs)
1605             fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1606     }
1607
1608     if ( is_equal(*downfac,*upfac) && (*ntprs > 2) && !(fourierspacing > 0.0))
1609     {
1610         fprintf(stderr, "WARNING: Resetting -ntpr to 2 since both scaling factors are the same.\n"
1611                         "Please choose upfac unequal to downfac to test various PME grid settings\n");
1612         *ntprs = 2;
1613     }
1614
1615     /* Check whether max and min fraction are within required values */
1616     if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1617         gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1618     if (minPMEfraction > 0.5 || minPMEfraction < 0)
1619         gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1620     if (maxPMEfraction < minPMEfraction)
1621         gmx_fatal(FARGS, "-max must be larger or equal to -min");
1622
1623     /* Check whether the number of steps - if it was set - has a reasonable value */
1624     if (bench_nsteps < 0)
1625         gmx_fatal(FARGS, "Number of steps must be positive.");
1626
1627     if (bench_nsteps > 10000 || bench_nsteps < 100)
1628     {
1629         fprintf(stderr, "WARNING: steps=");
1630         fprintf(stderr, gmx_large_int_pfmt, bench_nsteps);
1631         fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100)? "few" : "many");
1632     }
1633
1634     if (presteps < 0)
1635     {
1636         gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1637     }
1638
1639     if (*upfac <= 0.0 || *downfac <= 0.0 || *downfac > *upfac)
1640         gmx_fatal(FARGS, "Both scaling factors must be larger than zero and upper\n"
1641                          "scaling limit (%f) must be larger than lower limit (%f).",
1642                          *upfac, *downfac);
1643
1644     if (*downfac < 0.75 || *upfac > 1.25)
1645         fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1646
1647     if (fourierspacing < 0)
1648         gmx_fatal(FARGS, "Please choose a positive value for fourierspacing.");
1649
1650     /* Make shure that the scaling factor options are compatible with the number of tprs */
1651     if ( (*ntprs < 3) && ( opt2parg_bSet("-upfac",npargs,pa) || opt2parg_bSet("-downfac",npargs,pa) ) )
1652     {
1653         if (opt2parg_bSet("-upfac",npargs,pa) && opt2parg_bSet("-downfac",npargs,pa) && !is_equal(*upfac,*downfac))
1654         {
1655             fprintf(stderr, "NOTE: Specify -ntpr > 2 for both scaling factors to take effect.\n"
1656                              "(upfac=%f, downfac=%f)\n", *upfac, *downfac);
1657         }
1658         if (opt2parg_bSet("-upfac",npargs,pa))
1659             *downfac = *upfac;
1660         if (opt2parg_bSet("-downfac",npargs,pa))
1661             *upfac = *downfac;
1662     }
1663     if ( (2 == *ntprs) && (opt2parg_bSet("-downfac",npargs,pa)) && !is_equal(*downfac, 1.0))
1664     {
1665         *upfac = 1.0;
1666     }
1667
1668     /* If a fixed number of PME nodes is set we do rcoulomb and PME gird tuning
1669      * only. We need to check whether the requested number of PME-only nodes
1670      * makes sense. */
1671     if (npme_fixed > -1)
1672     {
1673         /* No more than 50% of all nodes can be assigned as PME-only nodes. */
1674         if (2*npme_fixed > nnodes)
1675         {
1676             gmx_fatal(FARGS, "Cannot have more than %d PME-only nodes for a total of %d nodes (you chose %d).\n",
1677                              nnodes/2, nnodes, npme_fixed);
1678         }
1679         if ((npme_fixed > 0) && (5*npme_fixed < nnodes))
1680         {
1681             fprintf(stderr, "WARNING: Only %g percent of the nodes are assigned as PME-only nodes.\n",
1682                              100.0*((real)npme_fixed / (real)nnodes));
1683         }
1684         if (opt2parg_bSet("-min",npargs,pa) || opt2parg_bSet("-max",npargs,pa))
1685         {
1686             fprintf(stderr, "NOTE: The -min, -max, and -npme options have no effect when a\n"
1687                             "      fixed number of PME-only nodes is requested with -fix.\n");
1688         }
1689     }
1690 }
1691
1692
1693 /* Returns TRUE when "opt" is a switch for g_tune_pme itself */
1694 static gmx_bool is_main_switch(char *opt)
1695 {
1696     if ( (0 == strcmp(opt,"-s"        ))
1697       || (0 == strcmp(opt,"-p"        ))
1698       || (0 == strcmp(opt,"-launch"   ))
1699       || (0 == strcmp(opt,"-r"        ))
1700       || (0 == strcmp(opt,"-ntpr"     ))
1701       || (0 == strcmp(opt,"-max"      ))
1702       || (0 == strcmp(opt,"-min"      ))
1703       || (0 == strcmp(opt,"-upfac"    ))
1704       || (0 == strcmp(opt,"-downfac"  ))
1705       || (0 == strcmp(opt,"-fix"      ))
1706       || (0 == strcmp(opt,"-four"     ))
1707       || (0 == strcmp(opt,"-steps"    ))
1708       || (0 == strcmp(opt,"-simsteps" ))
1709       || (0 == strcmp(opt,"-resetstep"))
1710       || (0 == strcmp(opt,"-so"       ))
1711       || (0 == strcmp(opt,"-npstring" ))
1712       || (0 == strcmp(opt,"-npme"     ))
1713       || (0 == strcmp(opt,"-passall"  )) )
1714     return TRUE;
1715
1716     return FALSE;
1717 }
1718
1719
1720 /* Returns TRUE when "opt" is needed at launch time */
1721 static gmx_bool is_launch_option(char *opt, gmx_bool bSet)
1722 {
1723     if (bSet)
1724         return TRUE;
1725     else
1726         return FALSE;
1727 }
1728
1729
1730 /* Returns TRUE when "opt" is needed at launch time */
1731 static gmx_bool is_launch_file(char *opt, gmx_bool bSet)
1732 {
1733     /* We need all options that were set on the command line
1734      * and that do not start with -b */
1735     if (0 == strncmp(opt,"-b", 2))
1736         return FALSE;
1737
1738     if (bSet)
1739         return TRUE;
1740     else
1741         return FALSE;
1742 }
1743
1744
1745 /* Returns TRUE when "opt" gives an option needed for the benchmarks runs */
1746 static gmx_bool is_bench_option(char *opt, gmx_bool bSet)
1747 {
1748     /* If option is set, we might need it for the benchmarks.
1749      * This includes -cpi */
1750     if (bSet)
1751     {
1752         if ( (0 == strcmp(opt, "-append"   ))
1753           || (0 == strcmp(opt, "-maxh"     ))
1754           || (0 == strcmp(opt, "-deffnm"   ))
1755           || (0 == strcmp(opt, "-resethway"))
1756           || (0 == strcmp(opt, "-cpnum"    )) )
1757             return FALSE;
1758         else
1759             return TRUE;
1760     }
1761     else
1762         return FALSE;
1763 }
1764
1765
1766 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1767 static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput)
1768 {
1769     /* All options starting with "-b" are for _b_enchmark files exclusively */
1770     if (0 == strncmp(opt,"-b", 2))
1771     {
1772         if (!bOptional || bSet)
1773             return TRUE;
1774         else
1775             return FALSE;
1776     }
1777     else
1778     {
1779         if (bIsOutput)
1780             return FALSE;
1781         else
1782             if (bSet) /* These are additional input files like -cpi -ei */
1783                 return TRUE;
1784             else
1785                 return FALSE;
1786     }
1787 }
1788
1789
1790 /* Adds 'buf' to 'str' */
1791 static void add_to_string(char **str, char *buf)
1792 {
1793     int len;
1794
1795
1796     len = strlen(*str) + strlen(buf) + 1;
1797     srenew(*str, len);
1798     strcat(*str, buf);
1799 }
1800
1801
1802 /* Create the command line for the benchmark as well as for the real run */
1803 static void create_command_line_snippets(
1804         gmx_bool     bThreads,
1805         int      presteps,
1806         int      nfile,
1807         t_filenm fnm[],
1808         int      npargs,
1809         t_pargs  *pa,
1810         const char *procstring,      /* How to pass the number of processors to $MPIRUN */
1811         char     *cmd_np[],          /* Actual command line snippet, e.g. '-np <N>' */
1812         char     *cmd_args_bench[],  /* command line arguments for benchmark runs */
1813         char     *cmd_args_launch[], /* command line arguments for simulation run */
1814         char     extra_args[])       /* Add this to the end of the command line */
1815 {
1816     int        i;
1817     char       *opt;
1818     const char *name;
1819     char       *np_or_nt;
1820 #define BUFLENGTH 255
1821     char       buf[BUFLENGTH];
1822     char       strbuf[BUFLENGTH];
1823     char       strbuf2[BUFLENGTH];
1824
1825
1826     if (bThreads)
1827         np_or_nt=strdup("-nt");
1828     else
1829         np_or_nt=strdup("-np");
1830
1831     /* strlen needs at least '\0' as a string: */
1832     snew(*cmd_args_bench ,1);
1833     snew(*cmd_args_launch,1);
1834     *cmd_args_launch[0]='\0';
1835     *cmd_args_bench[0] ='\0';
1836
1837
1838     /*******************************************/
1839     /* 1. Process other command line arguments */
1840     /*******************************************/
1841     for (i=0; i<npargs; i++)
1842     {
1843         /* What command line switch are we currently processing: */
1844         opt = (char *)pa[i].option;
1845         /* Skip options not meant for mdrun */
1846         if (!is_main_switch(opt))
1847         {
1848             /* Boolean arguments need to be generated in the -[no]argname format */
1849             if (pa[i].type == etBOOL)
1850             {
1851                 sprintf(strbuf,"-%s%s ",*pa[i].u.b ? "" : "no",opt+1);
1852             }
1853             else
1854             {
1855                 /* Print it to a string buffer, strip away trailing whitespaces that pa_val also returns: */
1856                 sprintf(strbuf2, "%s", pa_val(&pa[i],buf,BUFLENGTH));
1857                 rtrim(strbuf2);
1858                 sprintf(strbuf, "%s %s ", opt, strbuf2);
1859             }
1860
1861             /* We need the -np (or -nt) switch in a separate buffer - whether or not it was set! */
1862             if (0 == strcmp(opt,np_or_nt))
1863             {
1864                 if (strcmp(procstring, "none")==0 && !bThreads)
1865                 {
1866                     /* Omit -np <N> entirely */
1867                     snew(*cmd_np, 2);
1868                     sprintf(*cmd_np, " ");
1869                 }
1870                 else
1871                 {
1872                     /* This is the normal case with -np <N> */
1873                     snew(*cmd_np, strlen(procstring)+strlen(strbuf2)+4);
1874                     sprintf(*cmd_np, " %s %s ", bThreads? "-nt" : procstring, strbuf2);
1875                 }
1876             }
1877             else
1878             {
1879                 if (is_bench_option(opt,pa[i].bSet))
1880                     add_to_string(cmd_args_bench, strbuf);
1881
1882                 if (is_launch_option(opt,pa[i].bSet))
1883                     add_to_string(cmd_args_launch, strbuf);
1884             }
1885         }
1886     }
1887     if (presteps > 0)
1888     {
1889         /* Add equilibration steps to benchmark options */
1890         sprintf(strbuf, "-resetstep %d ", presteps);
1891         add_to_string(cmd_args_bench, strbuf);
1892     }
1893
1894     /********************/
1895     /* 2. Process files */
1896     /********************/
1897     for (i=0; i<nfile; i++)
1898     {
1899         opt  = (char *)fnm[i].opt;
1900         name = opt2fn(opt,nfile,fnm);
1901
1902         /* Strbuf contains the options, now let's sort out where we need that */
1903         sprintf(strbuf, "%s %s ", opt, name);
1904
1905         /* Skip options not meant for mdrun */
1906         if (!is_main_switch(opt))
1907         {
1908
1909             if ( is_bench_file(opt, opt2bSet(opt,nfile,fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1910             {
1911                 /* All options starting with -b* need the 'b' removed,
1912                  * therefore overwrite strbuf */
1913                 if (0 == strncmp(opt, "-b", 2))
1914                     sprintf(strbuf, "-%s %s ", &opt[2], name);
1915
1916                 add_to_string(cmd_args_bench, strbuf);
1917             }
1918
1919             if ( is_launch_file(opt,opt2bSet(opt,nfile,fnm)) )
1920                 add_to_string(cmd_args_launch, strbuf);
1921         }
1922     }
1923
1924     add_to_string(cmd_args_bench , extra_args);
1925     add_to_string(cmd_args_launch, extra_args);
1926 #undef BUFLENGTH
1927 }
1928
1929
1930 /* Set option opt */
1931 static void setopt(const char *opt,int nfile,t_filenm fnm[])
1932 {
1933   int i;
1934
1935   for(i=0; (i<nfile); i++)
1936     if (strcmp(opt,fnm[i].opt)==0)
1937       fnm[i].flag |= ffSET;
1938 }
1939
1940
1941 static void couple_files_options(int nfile, t_filenm fnm[])
1942 {
1943     int i;
1944     gmx_bool bSet,bBench;
1945     char *opt;
1946     char buf[20];
1947
1948
1949     for (i=0; i<nfile; i++)
1950     {
1951         opt  = (char *)fnm[i].opt;
1952         bSet = ((fnm[i].flag & ffSET) != 0);
1953         bBench = (0 == strncmp(opt,"-b", 2));
1954
1955         /* Check optional files */
1956         /* If e.g. -eo is set, then -beo also needs to be set */
1957         if (is_optional(&fnm[i]) && bSet && !bBench)
1958         {
1959             sprintf(buf, "-b%s", &opt[1]);
1960             setopt(buf,nfile,fnm);
1961         }
1962         /* If -beo is set, then -eo also needs to be! */
1963         if (is_optional(&fnm[i]) && bSet && bBench)
1964         {
1965             sprintf(buf, "-%s", &opt[2]);
1966             setopt(buf,nfile,fnm);
1967         }
1968     }
1969 }
1970
1971
1972 static double gettime()
1973 {
1974 #ifdef HAVE_GETTIMEOFDAY
1975     struct timeval t;
1976     double seconds;
1977
1978     gettimeofday(&t,NULL);
1979
1980     seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
1981
1982     return seconds;
1983 #else
1984     double  seconds;
1985
1986     seconds = time(NULL);
1987
1988     return seconds;
1989 #endif
1990 }
1991
1992
1993 #define BENCHSTEPS (1000)
1994
1995 int gmx_tune_pme(int argc,char *argv[])
1996 {
1997     const char *desc[] = {
1998             "For a given number [TT]-np[tt] or [TT]-nt[tt] of processors/threads, this program systematically",
1999             "times [TT]mdrun[tt] with various numbers of PME-only nodes and determines",
2000             "which setting is fastest. It will also test whether performance can",
2001             "be enhanced by shifting load from the reciprocal to the real space",
2002             "part of the Ewald sum. ",
2003             "Simply pass your [TT].tpr[tt] file to [TT]g_tune_pme[tt] together with other options",
2004             "for [TT]mdrun[tt] as needed.[PAR]",
2005             "Which executables are used can be set in the environment variables",
2006             "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
2007             "will be used as defaults. Note that for certain MPI frameworks you",
2008             "need to provide a machine- or hostfile. This can also be passed",
2009             "via the MPIRUN variable, e.g.[PAR]",
2010             "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
2011             "Please call [TT]g_tune_pme[tt] with the normal options you would pass to",
2012             "[TT]mdrun[tt] and add [TT]-np[tt] for the number of processors to perform the",
2013             "tests on, or [TT]-nt[tt] for the number of threads. You can also add [TT]-r[tt]",
2014             "to repeat each test several times to get better statistics. [PAR]",
2015             "[TT]g_tune_pme[tt] can test various real space / reciprocal space workloads",
2016             "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
2017             "written with enlarged cutoffs and smaller fourier grids respectively.",
2018             "Typically, the first test (number 0) will be with the settings from the input",
2019             "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have cutoffs multiplied",
2020             "by (and at the same time fourier grid dimensions divided by) the scaling",
2021             "factor [TT]-fac[tt] (default 1.2). The remaining [TT].tpr[tt] files will have about ",
2022             "equally-spaced values in between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
2023             "if you just want to find the optimal number of PME-only nodes; in that case",
2024             "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
2025             "For the benchmark runs, the default of 1000 time steps should suffice for most",
2026             "MD systems. The dynamic load balancing needs about 100 time steps",
2027             "to adapt to local load imbalances, therefore the time step counters",
2028             "are by default reset after 100 steps. For large systems",
2029             "(>1M atoms) you may have to set [TT]-resetstep[tt] to a higher value.",
2030             "From the 'DD' load imbalance entries in the md.log output file you",
2031             "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
2032             "[TT]g_tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
2033             "After calling [TT]mdrun[tt] several times, detailed performance information",
2034             "is available in the output file [TT]perf.out.[tt] ",
2035             "[BB]Note[bb] that during the benchmarks, a couple of temporary files are written",
2036             "(options [TT]-b[tt]*), these will be automatically deleted after each test.[PAR]",
2037             "If you want the simulation to be started automatically with the",
2038             "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
2039     };
2040
2041     int        nnodes =1;
2042     int        repeats=2;
2043     int        pmeentries=0; /* How many values for -npme do we actually test for each tpr file */
2044     real       maxPMEfraction=0.50;
2045     real       minPMEfraction=0.25;
2046     int        maxPMEnodes, minPMEnodes;
2047     int        npme_fixed=-2;             /* If >= -1, use only this number
2048                                            * of PME-only nodes                */
2049     real       downfac=1.0,upfac=1.2;
2050     int        ntprs=0;
2051     real       fs=0.0;                    /* 0 indicates: not set by the user */
2052     gmx_large_int_t bench_nsteps=BENCHSTEPS;
2053     gmx_large_int_t new_sim_nsteps=-1;   /* -1 indicates: not set by the user */
2054     gmx_large_int_t cpt_steps=0;         /* Step counter in .cpt input file   */
2055     int        presteps=100;    /* Do a full cycle reset after presteps steps */
2056     gmx_bool       bOverwrite=FALSE, bKeepTPR;
2057     gmx_bool       bLaunch=FALSE;
2058     gmx_bool       bPassAll=FALSE;
2059     char       *ExtraArgs=NULL;
2060     char       **tpr_names=NULL;
2061     const char *simulation_tpr=NULL;
2062     int        best_npme, best_tpr;
2063     int        sim_part = 1;     /* For benchmarks with checkpoint files */
2064
2065     /* Default program names if nothing else is found */
2066     char        *cmd_mpirun=NULL, *cmd_mdrun=NULL;
2067     char        *cmd_args_bench, *cmd_args_launch;
2068     char        *cmd_np=NULL;
2069
2070     t_perf      **perfdata=NULL;
2071     t_inputinfo *info;
2072     int         i;
2073     FILE        *fp;
2074     t_commrec   *cr;
2075
2076     /* Print out how long the tuning took */
2077     double      seconds;
2078
2079     static t_filenm fnm[] = {
2080       /* g_tune_pme */
2081       { efOUT, "-p",      "perf",     ffWRITE },
2082       { efLOG, "-err",    "errors",   ffWRITE },
2083       { efTPX, "-so",     "tuned",    ffWRITE },
2084       /* mdrun: */
2085       { efTPX, NULL,      NULL,       ffREAD },
2086       { efTRN, "-o",      NULL,       ffWRITE },
2087       { efXTC, "-x",      NULL,       ffOPTWR },
2088       { efCPT, "-cpi",    NULL,       ffOPTRD },
2089       { efCPT, "-cpo",    NULL,       ffOPTWR },
2090       { efSTO, "-c",      "confout",  ffWRITE },
2091       { efEDR, "-e",      "ener",     ffWRITE },
2092       { efLOG, "-g",      "md",       ffWRITE },
2093       { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
2094       { efXVG, "-field",  "field",    ffOPTWR },
2095       { efXVG, "-table",  "table",    ffOPTRD },
2096       { efXVG, "-tablep", "tablep",   ffOPTRD },
2097       { efXVG, "-tableb", "table",    ffOPTRD },
2098       { efTRX, "-rerun",  "rerun",    ffOPTRD },
2099       { efXVG, "-tpi",    "tpi",      ffOPTWR },
2100       { efXVG, "-tpid",   "tpidist",  ffOPTWR },
2101       { efEDI, "-ei",     "sam",      ffOPTRD },
2102       { efEDO, "-eo",     "sam",      ffOPTWR },
2103       { efGCT, "-j",      "wham",     ffOPTRD },
2104       { efGCT, "-jo",     "bam",      ffOPTWR },
2105       { efXVG, "-ffout",  "gct",      ffOPTWR },
2106       { efXVG, "-devout", "deviatie", ffOPTWR },
2107       { efXVG, "-runav",  "runaver",  ffOPTWR },
2108       { efXVG, "-px",     "pullx",    ffOPTWR },
2109       { efXVG, "-pf",     "pullf",    ffOPTWR },
2110       { efMTX, "-mtx",    "nm",       ffOPTWR },
2111       { efNDX, "-dn",     "dipole",   ffOPTWR },
2112       /* Output files that are deleted after each benchmark run */
2113       { efTRN, "-bo",     "bench",    ffWRITE },
2114       { efXTC, "-bx",     "bench",    ffWRITE },
2115       { efCPT, "-bcpo",   "bench",    ffWRITE },
2116       { efSTO, "-bc",     "bench",    ffWRITE },
2117       { efEDR, "-be",     "bench",    ffWRITE },
2118       { efLOG, "-bg",     "bench",    ffWRITE },
2119       { efEDO, "-beo",    "bench",    ffOPTWR },
2120       { efXVG, "-bdhdl",  "benchdhdl",ffOPTWR },
2121       { efXVG, "-bfield", "benchfld" ,ffOPTWR },
2122       { efXVG, "-btpi",   "benchtpi", ffOPTWR },
2123       { efXVG, "-btpid",  "benchtpid",ffOPTWR },
2124       { efGCT, "-bjo",    "bench",    ffOPTWR },
2125       { efXVG, "-bffout", "benchgct", ffOPTWR },
2126       { efXVG, "-bdevout","benchdev", ffOPTWR },
2127       { efXVG, "-brunav", "benchrnav",ffOPTWR },
2128       { efXVG, "-bpx",    "benchpx",  ffOPTWR },
2129       { efXVG, "-bpf",    "benchpf",  ffOPTWR },
2130       { efMTX, "-bmtx",   "benchn",   ffOPTWR },
2131       { efNDX, "-bdn",    "bench",    ffOPTWR }
2132     };
2133
2134     /* Command line options of mdrun */
2135     gmx_bool bDDBondCheck = TRUE;
2136     gmx_bool bDDBondComm  = TRUE;
2137     gmx_bool bVerbose     = FALSE;
2138     gmx_bool bCompact     = TRUE;
2139     gmx_bool bSepPot      = FALSE;
2140     gmx_bool bRerunVSite  = FALSE;
2141     gmx_bool bIonize      = FALSE;
2142     gmx_bool bConfout     = TRUE;
2143     gmx_bool bReproducible = FALSE;
2144     gmx_bool bThreads     = FALSE;
2145
2146     int  nmultisim=0;
2147     int  nstglobalcomm=-1;
2148     int  repl_ex_nst=0;
2149     int  repl_ex_seed=-1;
2150     int  nstepout=100;
2151     int  nthreads=1;
2152
2153     const char *ddno_opt[ddnoNR+1] =
2154       { NULL, "interleave", "pp_pme", "cartesian", NULL };
2155     const char *dddlb_opt[] =
2156       { NULL, "auto", "no", "yes", NULL };
2157     const char *procstring[] =
2158       { NULL, "-np", "-n", "none", NULL };
2159     const char *npmevalues_opt[] =
2160       { NULL, "auto", "all", "subset", NULL };
2161     real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
2162     char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
2163     char *deffnm=NULL;
2164 #define STD_CPT_PERIOD (15.0)
2165     real cpt_period=STD_CPT_PERIOD,max_hours=-1;
2166     gmx_bool bAppendFiles=TRUE;
2167     gmx_bool bKeepAndNumCPT=FALSE;
2168     gmx_bool bResetCountersHalfWay=FALSE;
2169     output_env_t oenv=NULL;
2170
2171     t_pargs pa[] = {
2172       /***********************/
2173       /* g_tune_pme options: */
2174       /***********************/
2175       { "-np",       FALSE, etINT,  {&nnodes},
2176         "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
2177       { "-npstring", FALSE, etENUM, {procstring},
2178         "Specify the number of processors to [TT]$MPIRUN[tt] using this string"},
2179       { "-passall",  FALSE, etBOOL, {&bPassAll},
2180         "HIDDENPut arguments unknown to [TT]mdrun[tt] at the end of the command line. Can be used for debugging purposes. "},
2181       { "-nt",       FALSE, etINT,  {&nthreads},
2182         "Number of threads to run the tests on (turns MPI & mpirun off)"},
2183       { "-r",        FALSE, etINT,  {&repeats},
2184         "Repeat each test this often" },
2185       { "-max",      FALSE, etREAL, {&maxPMEfraction},
2186         "Max fraction of PME nodes to test with" },
2187       { "-min",      FALSE, etREAL, {&minPMEfraction},
2188         "Min fraction of PME nodes to test with" },
2189       { "-npme",     FALSE, etENUM, {npmevalues_opt},
2190         "Benchmark all possible values for [TT]-npme[tt] or just the subset that is expected to perform well"},
2191       { "-fix",      FALSE, etINT, {&npme_fixed},
2192         "If >= -1, do not vary the number of PME-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
2193       { "-upfac",    FALSE, etREAL, {&upfac},
2194         "Upper limit for rcoulomb scaling factor (Note that rcoulomb upscaling results in fourier grid downscaling)" },
2195       { "-downfac",  FALSE, etREAL, {&downfac},
2196         "Lower limit for rcoulomb scaling factor" },
2197       { "-ntpr",     FALSE, etINT,  {&ntprs},
2198         "Number of [TT].tpr[tt] files to benchmark. Create this many files with scaling factors ranging from 1.0 to fac. If < 1, automatically choose the number of [TT].tpr[tt] files to test" },
2199       { "-four",     FALSE, etREAL, {&fs},
2200         "Use this fourierspacing value instead of the grid found in the [TT].tpr[tt] input file. (Spacing applies to a scaling factor of 1.0 if multiple [TT].tpr[tt] files are written)" },
2201       { "-steps",    FALSE, etGMX_LARGE_INT, {&bench_nsteps},
2202         "Take timings for this many steps in the benchmark runs" },
2203       { "-resetstep",FALSE, etINT,  {&presteps},
2204         "Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)" },
2205       { "-simsteps", FALSE, etGMX_LARGE_INT, {&new_sim_nsteps},
2206         "If non-negative, perform this many steps in the real run (overwrites nsteps from [TT].tpr[tt], add [TT].cpt[tt] steps)" },
2207       { "-launch",   FALSE, etBOOL, {&bLaunch},
2208         "Lauch the real simulation after optimization" },
2209       /******************/
2210       /* mdrun options: */
2211       /******************/
2212       { "-deffnm",    FALSE, etSTR, {&deffnm},
2213           "Set the default filename for all file options at launch time" },
2214       { "-ddorder",   FALSE, etENUM, {ddno_opt},
2215         "DD node order" },
2216       { "-ddcheck",   FALSE, etBOOL, {&bDDBondCheck},
2217         "Check for all bonded interactions with DD" },
2218       { "-ddbondcomm",FALSE, etBOOL, {&bDDBondComm},
2219         "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
2220       { "-rdd",       FALSE, etREAL, {&rdd},
2221         "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
2222       { "-rcon",      FALSE, etREAL, {&rconstr},
2223         "Maximum distance for P-LINCS (nm), 0 is estimate" },
2224       { "-dlb",       FALSE, etENUM, {dddlb_opt},
2225         "Dynamic load balancing (with DD)" },
2226       { "-dds",       FALSE, etREAL, {&dlb_scale},
2227         "Minimum allowed dlb scaling of the DD cell size" },
2228       { "-ddcsx",     FALSE, etSTR,  {&ddcsx},
2229         "HIDDENThe DD cell sizes in x" },
2230       { "-ddcsy",     FALSE, etSTR,  {&ddcsy},
2231         "HIDDENThe DD cell sizes in y" },
2232       { "-ddcsz",     FALSE, etSTR,  {&ddcsz},
2233         "HIDDENThe DD cell sizes in z" },
2234       { "-gcom",      FALSE, etINT,  {&nstglobalcomm},
2235         "Global communication frequency" },
2236       { "-v",         FALSE, etBOOL, {&bVerbose},
2237         "Be loud and noisy" },
2238       { "-compact",   FALSE, etBOOL, {&bCompact},
2239         "Write a compact log file" },
2240       { "-seppot",    FALSE, etBOOL, {&bSepPot},
2241         "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
2242       { "-pforce",    FALSE, etREAL, {&pforce},
2243         "Print all forces larger than this (kJ/mol nm)" },
2244       { "-reprod",    FALSE, etBOOL, {&bReproducible},
2245         "Try to avoid optimizations that affect binary reproducibility" },
2246       { "-cpt",       FALSE, etREAL, {&cpt_period},
2247         "Checkpoint interval (minutes)" },
2248       { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
2249         "Keep and number checkpoint files" },
2250       { "-append",    FALSE, etBOOL, {&bAppendFiles},
2251         "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
2252       { "-maxh",      FALSE, etREAL, {&max_hours},
2253         "Terminate after 0.99 times this time (hours)" },
2254       { "-multi",     FALSE, etINT,  {&nmultisim},
2255         "Do multiple simulations in parallel" },
2256       { "-replex",    FALSE, etINT,  {&repl_ex_nst},
2257         "Attempt replica exchange every # steps" },
2258       { "-reseed",    FALSE, etINT,  {&repl_ex_seed},
2259         "Seed for replica exchange, -1 is generate a seed" },
2260       { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
2261         "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
2262       { "-ionize",    FALSE, etBOOL, {&bIonize},
2263         "Do a simulation including the effect of an X-ray bombardment on your system" },
2264       { "-confout",   FALSE, etBOOL, {&bConfout},
2265         "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
2266       { "-stepout",   FALSE, etINT,  {&nstepout},
2267         "HIDDENFrequency of writing the remaining runtime" },
2268       { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
2269         "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
2270     };
2271
2272
2273 #define NFILE asize(fnm)
2274
2275     CopyRight(stderr,argv[0]);
2276
2277     seconds = gettime();
2278
2279     parse_common_args(&argc,argv,PCA_NOEXIT_ON_ARGS,
2280                       NFILE,fnm,asize(pa),pa,asize(desc),desc,
2281                       0,NULL,&oenv);
2282
2283     /* Store the remaining unparsed command line entries in a string */
2284     snew(ExtraArgs, 1);
2285     ExtraArgs[0] = '\0';
2286     for (i=1; i<argc; i++) /* argc will now be 1 if everything was understood */
2287     {
2288         add_to_string(&ExtraArgs, argv[i]);
2289         add_to_string(&ExtraArgs, " ");
2290     }
2291     if ( !bPassAll && (ExtraArgs[0] != '\0') )
2292     {
2293         fprintf(stderr, "\nWARNING: The following arguments you provided have no effect:\n"
2294                         "%s\n"
2295                         "Use the -passall option to force them to appear on the command lines\n"
2296                         "for the benchmark simulations%s.\n\n",
2297                         ExtraArgs, bLaunch? " and at launch time" : "");
2298     }
2299
2300     if (opt2parg_bSet("-nt",asize(pa),pa))
2301     {
2302         bThreads=TRUE;
2303         if (opt2parg_bSet("-npstring",asize(pa),pa))
2304             fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2305
2306         if (nnodes > 1)
2307             gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2308         /* and now we just set this; a bit of an ugly hack*/
2309         nnodes=nthreads;
2310     }
2311     /* Automatically set -beo options if -eo is set etc. */
2312     couple_files_options(NFILE,fnm);
2313
2314     /* Construct the command line arguments for benchmark runs
2315      * as well as for the simulation run
2316      */
2317     create_command_line_snippets(bThreads,presteps,NFILE,fnm,asize(pa),pa,procstring[0],
2318                                  &cmd_np, &cmd_args_bench, &cmd_args_launch,
2319                                  bPassAll? ExtraArgs : (char *)"");
2320
2321     /* Read in checkpoint file if requested */
2322     sim_part = 1;
2323     if(opt2bSet("-cpi",NFILE,fnm))
2324     {
2325         snew(cr,1);
2326         cr->duty=DUTY_PP; /* makes the following routine happy */
2327         read_checkpoint_simulation_part(opt2fn("-cpi",NFILE,fnm),
2328                                         &sim_part,&cpt_steps,cr,
2329                                         FALSE,NFILE,fnm,NULL,NULL);
2330         sfree(cr);
2331         sim_part++;
2332         /* sim_part will now be 1 if no checkpoint file was found */
2333         if (sim_part<=1)
2334             gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi",
2335                                                                      NFILE,
2336                                                                      fnm));
2337     }
2338
2339     /* Open performance output file and write header info */
2340     fp = ffopen(opt2fn("-p",NFILE,fnm),"w");
2341
2342     /* Make a quick consistency check of command line parameters */
2343     check_input(nnodes, repeats, &ntprs, &upfac, &downfac,
2344                 maxPMEfraction, minPMEfraction, npme_fixed,
2345                 fs, bench_nsteps, fnm, NFILE, sim_part, presteps,
2346                 asize(pa),pa);
2347
2348     /* Determine max and min number of PME nodes to test: */
2349     if ((nnodes > 2) && (npme_fixed >= -1))
2350     {
2351         maxPMEnodes = floor(maxPMEfraction*nnodes);
2352         minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2353         fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2354         if (maxPMEnodes != minPMEnodes)
2355             fprintf(stdout, "- %d ", maxPMEnodes);
2356         fprintf(stdout, "PME-only nodes.\n  Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2357     }
2358     else
2359     {
2360         maxPMEnodes = 0;
2361         minPMEnodes = 0;
2362     }
2363
2364     /* Get the commands we need to set up the runs from environment variables */
2365     get_program_paths(bThreads, &cmd_mpirun, cmd_np, &cmd_mdrun, repeats);
2366
2367     /* Print some header info to file */
2368     sep_line(fp);
2369     fprintf(fp, "\n      P E R F O R M A N C E   R E S U L T S\n");
2370     sep_line(fp);
2371     fprintf(fp, "%s for Gromacs %s\n", ShortProgram(),GromacsVersion());
2372     if (!bThreads)
2373     {
2374         fprintf(fp, "Number of nodes         : %d\n", nnodes);
2375         fprintf(fp, "The mpirun command is   : %s\n", cmd_mpirun);
2376         if ( strcmp(procstring[0], "none") != 0)
2377             fprintf(fp, "Passing # of nodes via  : %s\n", procstring[0]);
2378         else
2379             fprintf(fp, "Not setting number of nodes in system call\n");
2380     }
2381     else
2382         fprintf(fp, "Number of threads       : %d\n", nnodes);
2383
2384     fprintf(fp, "The mdrun  command is   : %s\n", cmd_mdrun);
2385     fprintf(fp, "mdrun args benchmarks   : %s\n", cmd_args_bench);
2386     fprintf(fp, "Benchmark steps         : ");
2387     fprintf(fp, gmx_large_int_pfmt, bench_nsteps);
2388     fprintf(fp, "\n");
2389     fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2390     if (sim_part > 1)
2391     {
2392         fprintf(fp, "Checkpoint time step    : ");
2393         fprintf(fp, gmx_large_int_pfmt, cpt_steps);
2394         fprintf(fp, "\n");
2395     }
2396     if (bLaunch)
2397         fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2398     if (!bPassAll && ExtraArgs[0] != '\0')
2399         fprintf(fp, "Unused arguments        : %s\n", ExtraArgs);
2400     if (new_sim_nsteps >= 0)
2401     {
2402         bOverwrite = TRUE;
2403         fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so",NFILE,fnm));
2404         fprintf(stderr, gmx_large_int_pfmt, new_sim_nsteps+cpt_steps);
2405         fprintf(stderr, " steps.\n");
2406         fprintf(fp, "Simulation steps        : ");
2407         fprintf(fp, gmx_large_int_pfmt, new_sim_nsteps);
2408         fprintf(fp, "\n");
2409     }
2410     if (repeats > 1)
2411         fprintf(fp, "Repeats for each test   : %d\n", repeats);
2412
2413     if (fs > 0.0)
2414     {
2415         fprintf(fp, "Requested grid spacing  : %f (This will be the grid spacing at a scaling factor of 1.0)\n", fs);
2416     }
2417
2418     if (npme_fixed >= -1)
2419     {
2420         fprintf(fp, "Fixing -npme at         : %d\n", npme_fixed);
2421     }
2422
2423     fprintf(fp, "Input file              : %s\n", opt2fn("-s",NFILE,fnm));
2424
2425     /* Allocate memory for the inputinfo struct: */
2426     snew(info, 1);
2427     info->nr_inputfiles = ntprs;
2428     for (i=0; i<ntprs; i++)
2429     {
2430         snew(info->rcoulomb , ntprs);
2431         snew(info->rvdw     , ntprs);
2432         snew(info->rlist    , ntprs);
2433         snew(info->rlistlong, ntprs);
2434         snew(info->nkx      , ntprs);
2435         snew(info->nky      , ntprs);
2436         snew(info->nkz      , ntprs);
2437         snew(info->fsx      , ntprs);
2438         snew(info->fsy      , ntprs);
2439         snew(info->fsz      , ntprs);
2440     }
2441     /* Make alternative tpr files to test: */
2442     snew(tpr_names, ntprs);
2443     for (i=0; i<ntprs; i++)
2444         snew(tpr_names[i], STRLEN);
2445
2446     /* It can be that ntprs is reduced by make_benchmark_tprs if not enough
2447      * different grids could be found. */
2448     make_benchmark_tprs(opt2fn("-s",NFILE,fnm), tpr_names, bench_nsteps+presteps,
2449             cpt_steps, upfac, downfac, &ntprs, fs, info, fp);
2450
2451
2452     /********************************************************************************/
2453     /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats  */
2454     /********************************************************************************/
2455     snew(perfdata, ntprs);
2456     do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries,
2457                  repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2458                  cmd_args_bench, fnm, NFILE, sim_part, presteps, cpt_steps);
2459
2460     fprintf(fp, "\nTuning took%8.1f minutes.\n", (gettime()-seconds)/60.0);
2461
2462     /* Analyse the results and give a suggestion for optimal settings: */
2463     bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2464                             repeats, info, &best_tpr, &best_npme);
2465
2466     /* Take the best-performing tpr file and enlarge nsteps to original value */
2467     if ( bKeepTPR && !bOverwrite && !(fs > 0.0) )
2468     {
2469         simulation_tpr = opt2fn("-s",NFILE,fnm);
2470     }
2471     else
2472     {
2473         simulation_tpr = opt2fn("-so",NFILE,fnm);
2474         modify_PMEsettings(bOverwrite? (new_sim_nsteps+cpt_steps) :
2475                            info->orig_sim_steps, tpr_names[best_tpr],
2476                            simulation_tpr);
2477     }
2478
2479     /* Now start the real simulation if the user requested it ... */
2480     launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2481                       cmd_args_launch, simulation_tpr, nnodes, best_npme);
2482     ffclose(fp);
2483
2484     /* ... or simply print the performance results to screen: */
2485     if (!bLaunch)
2486         finalize(opt2fn("-p", NFILE, fnm));
2487
2488     return 0;
2489 }