src/tools/gmx_tune_pme.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  10  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  11  * Copyright (c) 2001-2008, The GROMACS development team,
  12  * check out http://www.gromacs.org for more information.
  13
  14  * This program is free software; you can redistribute it and/or
  15  * modify it under the terms of the GNU General Public License
  16  * as published by the Free Software Foundation; either version 2
  17  * of the License, or (at your option) any later version.
  18  *
  19  * If you want to redistribute modifications, please consider that
  20  * scientific software is very special. Version control is crucial -
  21  * bugs must be traceable. We will be happy to consider code for
  22  * inclusion in the official distribution, but derived work must not
  23  * be called official GROMACS. Details are found in the README & COPYING
  24  * files - if they are missing, get the official version at www.gromacs.org.
  25  *
  26  * To help us fund GROMACS development, we humbly ask that you cite
  27  * the papers on the package - you can find them in the top README file.
  28  *
  29  * For more info, check our website at http://www.gromacs.org
  30  *
  31  * And Hey:
  32  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  33  */
  34 #ifdef HAVE_CONFIG_H
  35 #include <config.h>
  36 #endif
  37
  38 #include "statutil.h"
  39 #include "typedefs.h"
  40 #include "smalloc.h"
  41 #include "vec.h"
  42 #include "copyrite.h"
  43 #include "statutil.h"
  44 #include "tpxio.h"
  45 #include "string2.h"
  46 #include "readinp.h"
  47 #include "calcgrid.h"
  48 #include "checkpoint.h"
  49 #include "gmx_ana.h"
  50 #include "names.h"
  51
  52
  53
  54 enum {
  55   ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
  56 };
  57
  58 /* Enum for situations that can occur during log file parsing, the
  59  * corresponding string entries can be found in do_the_tests() in
  60  * const char* ParseLog[] */
  61 enum {
  62     eParselogOK,
  63     eParselogNotFound,
  64     eParselogNoPerfData,
  65     eParselogTerm,
  66     eParselogResetProblem,
  67     eParselogNoDDGrid,
  68     eParselogTPXVersion,
  69     eParselogNotParallel,
  70     eParselogFatal,
  71     eParselogNr
  72 };
  73
  74
  75 typedef struct
  76 {
  77     int  nPMEnodes;       /* number of PME only nodes used in this test */
  78     int  nx, ny, nz;      /* DD grid */
  79     int  guessPME;        /* if nPMEnodes == -1, this is the guessed number of PME nodes */
  80     double *Gcycles;      /* This can contain more than one value if doing multiple tests */
  81     double Gcycles_Av;
  82     float *ns_per_day;
  83     float ns_per_day_Av;
  84     float *PME_f_load;    /* PME mesh/force load average*/
  85     float PME_f_load_Av;  /* Average average ;) ... */
  86     char *mdrun_cmd_line; /* Mdrun command line used for this test */
  87 } t_perf;
  88
  89
  90 typedef struct
  91 {
  92     int  nr_inputfiles;         /* The number of tpr and mdp input files */
  93     gmx_large_int_t orig_sim_steps;  /* Number of steps to be done in the real simulation */
  94     real *rcoulomb;             /* The coulomb radii [0...nr_inputfiles] */
  95     real *rvdw;                 /* The vdW radii */
  96     real *rlist;                /* Neighbourlist cutoff radius */
  97     real *rlistlong;
  98     int  *nkx, *nky, *nkz;
  99     real *fsx, *fsy, *fsz;      /* Fourierspacing in x,y,z dimension */
 100 } t_inputinfo;
 101
 102
 103 static void sep_line(FILE *fp)
 104 {
 105     fprintf(fp, "\n------------------------------------------------------------\n");
 106 }
 107
 108
 109 /* Wrapper for system calls */
 110 static int gmx_system_call(char *command)
 111 {
 112 #ifdef GMX_NO_SYSTEM
 113     gmx_fatal(FARGS,"No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n",command);
 114 #else
 115     return ( system(command) );
 116 #endif
 117 }
 118
 119
 120 /* Check if string starts with substring */
 121 static gmx_bool str_starts(const char *string, const char *substring)
 122 {
 123     return ( strncmp(string, substring, strlen(substring)) == 0);
 124 }
 125
 126
 127 static void cleandata(t_perf *perfdata, int test_nr)
 128 {
 129     perfdata->Gcycles[test_nr]    = 0.0;
 130     perfdata->ns_per_day[test_nr] = 0.0;
 131     perfdata->PME_f_load[test_nr] = 0.0;
 132
 133     return;
 134 }
 135
 136
 137 static gmx_bool is_equal(real a, real b)
 138 {
 139     real diff, eps=1.0e-7;
 140
 141
 142     diff = a - b;
 143
 144     if (diff < 0.0) diff = -diff;
 145
 146     if (diff < eps)
 147         return TRUE;
 148     else
 149         return FALSE;
 150 }
 151
 152
 153 static void finalize(const char *fn_out)
 154 {
 155     char buf[STRLEN];
 156     FILE *fp;
 157
 158
 159     fp = fopen(fn_out,"r");
 160     fprintf(stdout,"\n\n");
 161
 162     while( fgets(buf,STRLEN-1,fp) != NULL )
 163     {
 164         fprintf(stdout,"%s",buf);
 165     }
 166     fclose(fp);
 167     fprintf(stdout,"\n\n");
 168     thanx(stderr);
 169 }
 170
 171
 172 enum {eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr};
 173
 174 static int parse_logfile(const char *logfile, const char *errfile,
 175         t_perf *perfdata, int test_nr, int presteps, gmx_large_int_t cpt_steps,
 176         int nnodes)
 177 {
 178     FILE  *fp;
 179     char  line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
 180     const char matchstrdd[]="Domain decomposition grid";
 181     const char matchstrcr[]="resetting all time and cycle counters";
 182     const char matchstrbal[]="Average PME mesh/force load:";
 183     const char matchstring[]="R E A L   C Y C L E   A N D   T I M E   A C C O U N T I N G";
 184     const char errSIG[]="signal, stopping at the next";
 185     int   iFound;
 186     int   procs;
 187     float  dum1,dum2,dum3;
 188     int   npme;
 189     gmx_large_int_t resetsteps=-1;
 190     gmx_bool  bFoundResetStr = FALSE;
 191     gmx_bool  bResetChecked  = FALSE;
 192
 193
 194     if (!gmx_fexist(logfile))
 195     {
 196         fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
 197         cleandata(perfdata, test_nr);
 198         return eParselogNotFound;
 199     }
 200
 201     fp = fopen(logfile, "r");
 202     perfdata->PME_f_load[test_nr] = -1.0;
 203     perfdata->guessPME            = -1;
 204
 205     iFound = eFoundNothing;
 206     if (1 == nnodes)
 207         iFound = eFoundDDStr; /* Skip some case statements */
 208
 209     while (fgets(line, STRLEN, fp) != NULL)
 210     {
 211         /* Remove leading spaces */
 212         ltrim(line);
 213
 214         /* Check for TERM and INT signals from user: */
 215         if ( strstr(line, errSIG) != NULL )
 216         {
 217             fclose(fp);
 218             cleandata(perfdata, test_nr);
 219             return eParselogTerm;
 220         }
 221
 222         /* Check whether cycle resetting  worked */
 223         if (presteps > 0 && !bFoundResetStr)
 224         {
 225             if (strstr(line, matchstrcr) != NULL)
 226             {
 227                 sprintf(dumstring, "Step %s", gmx_large_int_pfmt);
 228                 sscanf(line, dumstring, &resetsteps);
 229                 bFoundResetStr = TRUE;
 230                 if (resetsteps == presteps+cpt_steps)
 231                 {
 232                     bResetChecked = TRUE;
 233                 }
 234                 else
 235                 {
 236                     sprintf(dumstring , gmx_large_int_pfmt, resetsteps);
 237                     sprintf(dumstring2, gmx_large_int_pfmt, presteps+cpt_steps);
 238                     fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
 239                                     "         though they were supposed to be reset at step %s!\n",
 240                             dumstring, dumstring2);
 241                 }
 242             }
 243         }
 244
 245         /* Look for strings that appear in a certain order in the log file: */
 246         switch(iFound)
 247         {
 248             case eFoundNothing:
 249                 /* Look for domain decomp grid and separate PME nodes: */
 250                 if (str_starts(line, matchstrdd))
 251                 {
 252                     sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
 253                             &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
 254                     if (perfdata->nPMEnodes == -1)
 255                         perfdata->guessPME = npme;
 256                     else if (perfdata->nPMEnodes != npme)
 257                         gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
 258                     iFound = eFoundDDStr;
 259                 }
 260                 /* Catch a few errors that might have occured: */
 261                 else if (str_starts(line, "There is no domain decomposition for"))
 262                 {
 263                     return eParselogNoDDGrid;
 264                 }
 265                 else if (str_starts(line, "reading tpx file"))
 266                 {
 267                     return eParselogTPXVersion;
 268                 }
 269                 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
 270                 {
 271                     return eParselogNotParallel;
 272                 }
 273                 break;
 274             case eFoundDDStr:
 275                 /* Look for PME mesh/force balance (not necessarily present, though) */
 276                 if (str_starts(line, matchstrbal))
 277                     sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
 278                 /* Look for matchstring */
 279                 if (str_starts(line, matchstring))
 280                     iFound = eFoundAccountingStr;
 281                 break;
 282             case eFoundAccountingStr:
 283                 /* Already found matchstring - look for cycle data */
 284                 if (str_starts(line, "Total  "))
 285                 {
 286                     sscanf(line,"Total %d %lf",&procs,&(perfdata->Gcycles[test_nr]));
 287                     iFound = eFoundCycleStr;
 288                 }
 289                 break;
 290             case eFoundCycleStr:
 291                 /* Already found cycle data - look for remaining performance info and return */
 292                 if (str_starts(line, "Performance:"))
 293                 {
 294                     sscanf(line,"%s %f %f %f %f", dumstring, &dum1, &dum2, &(perfdata->ns_per_day[test_nr]), &dum3);
 295                     fclose(fp);
 296                     if (bResetChecked || presteps == 0)
 297                         return eParselogOK;
 298                     else
 299                         return eParselogResetProblem;
 300                 }
 301                 break;
 302         }
 303     } /* while */
 304
 305     /* Check why there is no performance data in the log file.
 306      * Did a fatal errors occur? */
 307     if (gmx_fexist(errfile))
 308     {
 309         fp = fopen(errfile, "r");
 310         while (fgets(line, STRLEN, fp) != NULL)
 311         {
 312             if ( str_starts(line, "Fatal error:") )
 313             {
 314                 if (fgets(line, STRLEN, fp) != NULL)
 315                     fprintf(stderr, "\nWARNING: A fatal error has occured during this benchmark:\n"
 316                                     "%s\n", line);
 317                 fclose(fp);
 318                 cleandata(perfdata, test_nr);
 319                 return eParselogFatal;
 320             }
 321         }
 322         fclose(fp);
 323     }
 324     else
 325     {
 326         fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
 327     }
 328
 329     /* Giving up ... we could not find out why there is no performance data in
 330      * the log file. */
 331     fprintf(stdout, "No performance data in log file.\n");
 332     fclose(fp);
 333     cleandata(perfdata, test_nr);
 334
 335     return eParselogNoPerfData;
 336 }
 337
 338
 339 static gmx_bool analyze_data(
 340         FILE        *fp,
 341         const char  *fn,
 342         t_perf      **perfdata,
 343         int         nnodes,
 344         int         ntprs,
 345         int         ntests,
 346         int         nrepeats,
 347         t_inputinfo *info,
 348         int         *index_tpr,    /* OUT: Nr of mdp file with best settings */
 349         int         *npme_optimal) /* OUT: Optimal number of PME nodes */
 350 {
 351     int  i,j,k;
 352     int line=0, line_win=-1;
 353     int  k_win=-1, i_win=-1, winPME;
 354     double s=0.0;  /* standard deviation */
 355     t_perf *pd;
 356     char strbuf[STRLEN];
 357     char str_PME_f_load[13];
 358     gmx_bool bCanUseOrigTPR;
 359
 360
 361     if (nrepeats > 1)
 362     {
 363         sep_line(fp);
 364         fprintf(fp, "Summary of successful runs:\n");
 365         fprintf(fp, "Line tpr PME nodes  Gcycles Av.     Std.dev.       ns/day        PME/f");
 366         if (nnodes > 1)
 367             fprintf(fp, "    DD grid");
 368         fprintf(fp, "\n");
 369     }
 370
 371
 372     for (k=0; k<ntprs; k++)
 373     {
 374         for (i=0; i<ntests; i++)
 375         {
 376             /* Select the right dataset: */
 377             pd = &(perfdata[k][i]);
 378
 379             pd->Gcycles_Av    = 0.0;
 380             pd->PME_f_load_Av = 0.0;
 381             pd->ns_per_day_Av = 0.0;
 382
 383             if (pd->nPMEnodes == -1)
 384                 sprintf(strbuf, "(%3d)", pd->guessPME);
 385             else
 386                 sprintf(strbuf, "     ");
 387
 388             /* Get the average run time of a setting */
 389             for (j=0; j<nrepeats; j++)
 390             {
 391                 pd->Gcycles_Av    += pd->Gcycles[j];
 392                 pd->PME_f_load_Av += pd->PME_f_load[j];
 393             }
 394             pd->Gcycles_Av    /= nrepeats;
 395             pd->PME_f_load_Av /= nrepeats;
 396
 397             for (j=0; j<nrepeats; j++)
 398             {
 399                 if (pd->ns_per_day[j] > 0.0)
 400                     pd->ns_per_day_Av += pd->ns_per_day[j];
 401                 else
 402                 {
 403                     /* Somehow the performance number was not aquired for this run,
 404                      * therefor set the average to some negative value: */
 405                     pd->ns_per_day_Av = -1.0f*nrepeats;
 406                     break;
 407                 }
 408             }
 409             pd->ns_per_day_Av /= nrepeats;
 410
 411             /* Nicer output: */
 412             if (pd->PME_f_load_Av > 0.0)
 413                 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
 414             else
 415                 sprintf(str_PME_f_load, "%s", "         -  ");
 416
 417
 418             /* We assume we had a successful run if both averages are positive */
 419             if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
 420             {
 421                 /* Output statistics if repeats were done */
 422                 if (nrepeats > 1)
 423                 {
 424                     /* Calculate the standard deviation */
 425                     s = 0.0;
 426                     for (j=0; j<nrepeats; j++)
 427                         s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
 428                     s /= (nrepeats - 1);
 429                     s = sqrt(s);
 430
 431                     fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
 432                             line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
 433                             pd->ns_per_day_Av, str_PME_f_load);
 434                     if (nnodes > 1)
 435                         fprintf(fp, "  %3d %3d %3d", pd->nx, pd->ny, pd->nz);
 436                     fprintf(fp, "\n");
 437                 }
 438                 /* Store the index of the best run found so far in 'winner': */
 439                 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
 440                 {
 441                     k_win = k;
 442                     i_win = i;
 443                     line_win = line;
 444                 }
 445                 line++;
 446             }
 447         }
 448     }
 449
 450     if (k_win == -1)
 451         gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
 452
 453     sep_line(fp);
 454
 455     winPME = perfdata[k_win][i_win].nPMEnodes;
 456     if (winPME == -1)
 457         sprintf(strbuf, "%s", "the automatic number of");
 458     else
 459         sprintf(strbuf, "%d", winPME);
 460     fprintf(fp, "Best performance was achieved with %s PME nodes", strbuf);
 461     if (nrepeats > 1)
 462         fprintf(fp, " (see line %d)", line_win);
 463     fprintf(fp, "\n");
 464
 465     /* Only mention settings if they were modified: */
 466     bCanUseOrigTPR = TRUE;
 467     if ( !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]) )
 468     {
 469         fprintf(fp, "Optimized PME settings:\n"
 470                     "   New Coulomb radius: %f nm (was %f nm)\n",
 471                     info->rcoulomb[k_win], info->rcoulomb[0]);
 472         bCanUseOrigTPR = FALSE;
 473     }
 474
 475     if ( !is_equal(info->rvdw[k_win], info->rvdw[0]) )
 476     {
 477         fprintf(fp, "   New Van der Waals radius: %f nm (was %f nm)\n",
 478                 info->rvdw[k_win], info->rvdw[0]);
 479         bCanUseOrigTPR = FALSE;
 480     }
 481
 482     if ( ! (info->nkx[k_win] == info->nkx[0] &&
 483             info->nky[k_win] == info->nky[0] &&
 484             info->nkz[k_win] == info->nkz[0] ) )
 485     {
 486         fprintf(fp, "   New Fourier grid xyz: %d %d %d (was %d %d %d)\n",
 487                 info->nkx[k_win], info->nky[k_win], info->nkz[k_win],
 488                 info->nkx[0], info->nky[0], info->nkz[0]);
 489         bCanUseOrigTPR = FALSE;
 490     }
 491     if (bCanUseOrigTPR && ntprs > 1)
 492         fprintf(fp, "and original PME settings.\n");
 493
 494     fflush(fp);
 495
 496     /* Return the index of the mdp file that showed the highest performance
 497      * and the optimal number of PME nodes */
 498     *index_tpr    = k_win;
 499     *npme_optimal = winPME;
 500
 501     return bCanUseOrigTPR;
 502 }
 503
 504
 505 /* Get the commands we need to set up the runs from environment variables */
 506 static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char cmd_np[],
 507                               char *cmd_mdrun[], int repeats)
 508 {
 509     char *command=NULL;
 510     char *cp;
 511     char *cp2;
 512     char line[STRLEN];
 513     FILE *fp;
 514     const char def_mpirun[] = "mpirun";
 515     const char def_mdrun[]  = "mdrun";
 516     const char filename[]   = "benchtest.log";
 517     const char match_mpi[]  = "NNODES=";
 518     const char match_mdrun[]= "Program: ";
 519     const char empty_mpirun[] = "";
 520     gmx_bool  bMdrun = FALSE;
 521     gmx_bool  bMPI   = FALSE;
 522
 523
 524     /* Get the commands we need to set up the runs from environment variables */
 525     if (!bThreads)
 526     {
 527         if ( (cp = getenv("MPIRUN")) != NULL)
 528             *cmd_mpirun = strdup(cp);
 529         else
 530             *cmd_mpirun = strdup(def_mpirun);
 531     }
 532     else
 533     {
 534         *cmd_mpirun = strdup(empty_mpirun);
 535     }
 536
 537     if ( (cp = getenv("MDRUN" )) != NULL )
 538         *cmd_mdrun  = strdup(cp);
 539     else
 540         *cmd_mdrun  = strdup(def_mdrun);
 541
 542
 543     /* If no simulations have to be performed, we are done here */
 544     if (repeats <= 0)
 545         return;
 546
 547     /* Run a small test to see whether mpirun + mdrun work  */
 548     fprintf(stdout, "Making sure that mdrun can be executed. ");
 549     if (bThreads)
 550     {
 551         snew(command, strlen(*cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
 552         sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", *cmd_mdrun, cmd_np, filename);
 553     }
 554     else
 555     {
 556         snew(command, strlen(*cmd_mpirun) + strlen(cmd_np) + strlen(*cmd_mdrun) + strlen(filename) + 50);
 557         sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", *cmd_mpirun, cmd_np, *cmd_mdrun, filename);
 558     }
 559     fprintf(stdout, "Trying '%s' ... ", command);
 560     make_backup(filename);
 561     gmx_system_call(command);
 562
 563     /* Check if we find the characteristic string in the output: */
 564     if (!gmx_fexist(filename))
 565         gmx_fatal(FARGS, "Output from test run could not be found.");
 566
 567     fp = fopen(filename, "r");
 568     /* We need to scan the whole output file, since sometimes the queuing system
 569      * also writes stuff to stdout/err */
 570     while ( !feof(fp) )
 571     {
 572         cp2=fgets(line, STRLEN, fp);
 573         if (cp2!=NULL)
 574         {
 575             if ( str_starts(line, match_mdrun) )
 576                 bMdrun = TRUE;
 577             if ( str_starts(line, match_mpi) )
 578                 bMPI = TRUE;
 579         }
 580     }
 581     fclose(fp);
 582
 583     if (bThreads)
 584     {
 585         if (bMPI)
 586         {
 587             gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
 588                     "(%s)\n"
 589                     "seems to have been compiled with MPI instead.",
 590                     *cmd_mdrun);
 591         }
 592     }
 593     else
 594     {
 595         if (bMdrun && !bMPI)
 596         {
 597             gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
 598                     "(%s)\n"
 599                     "seems to have been compiled without MPI support.",
 600                     *cmd_mdrun);
 601         }
 602     }
 603
 604     if (!bMdrun)
 605     {
 606         gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
 607                 filename);
 608     }
 609
 610     fprintf(stdout, "passed.\n");
 611
 612     /* Clean up ... */
 613     remove(filename);
 614     sfree(command);
 615 }
 616
 617
 618 static void launch_simulation(
 619         gmx_bool bLaunch,           /* Should the simulation be launched? */
 620         FILE *fp,               /* General log file */
 621         gmx_bool bThreads,          /* whether to use threads */
 622         char *cmd_mpirun,       /* Command for mpirun */
 623         char *cmd_np,           /* Switch for -np or -nt or empty */
 624         char *cmd_mdrun,        /* Command for mdrun */
 625         char *args_for_mdrun,   /* Arguments for mdrun */
 626         const char *simulation_tpr,   /* This tpr will be simulated */
 627         int  nnodes,            /* Number of nodes to run on */
 628         int  nPMEnodes)         /* Number of PME nodes to use */
 629 {
 630     char  *command;
 631
 632
 633     /* Make enough space for the system call command,
 634      * (100 extra chars for -npme ... etc. options should suffice): */
 635     snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
 636
 637     /* Note that the -passall options requires args_for_mdrun to be at the end
 638      * of the command line string */
 639     if (bThreads)
 640     {
 641         sprintf(command, "%s%s-npme %d -s %s %s",
 642                 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
 643     }
 644     else
 645     {
 646         sprintf(command, "%s%s%s -npme %d -s %s %s",
 647                 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
 648     }
 649
 650     fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch? "Using":"Please use", command);
 651     sep_line(fp);
 652     fflush(fp);
 653
 654     /* Now the real thing! */
 655     if (bLaunch)
 656     {
 657         fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
 658         sep_line(stdout);
 659         fflush(stdout);
 660         gmx_system_call(command);
 661         thanx(fp);
 662     }
 663 }
 664
 665
 666 static void modify_PMEsettings(
 667         gmx_large_int_t simsteps,  /* Set this value as number of time steps */
 668         const char *fn_best_tpr,   /* tpr file with the best performance */
 669         const char *fn_sim_tpr)    /* name of tpr file to be launched */
 670 {
 671     t_inputrec   *ir;
 672     t_state      state;
 673     gmx_mtop_t   mtop;
 674     char         buf[200];
 675
 676     snew(ir,1);
 677     read_tpx_state(fn_best_tpr,ir,&state,NULL,&mtop);
 678
 679     /* Set nsteps to the right value */
 680     ir->nsteps = simsteps;
 681
 682     /* Write the tpr file which will be launched */
 683     sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, gmx_large_int_pfmt);
 684     fprintf(stdout,buf,ir->nsteps);
 685     fflush(stdout);
 686     write_tpx_state(fn_sim_tpr,ir,&state,&mtop);
 687
 688     sfree(ir);
 689 }
 690
 691
 692 typedef struct
 693 {
 694     int  nkx, nky, nkz;      /* Fourier grid                                  */
 695     real fac;                /* actual scaling factor                         */
 696     real fs;                 /* Fourierspacing                                */
 697 } t_pmegrid;
 698
 699
 700 static void copy_grid(t_pmegrid *ingrid, t_pmegrid *outgrid)
 701 {
 702     outgrid->nkx = ingrid->nkx;
 703     outgrid->nky = ingrid->nky;
 704     outgrid->nkz = ingrid->nkz;
 705     outgrid->fac = ingrid->fac;
 706     outgrid->fs  = ingrid->fs;
 707 }
 708
 709 /* Removes entry 'index' from the t_pmegrid list */
 710 static void remove_from_list(
 711         t_pmegrid gridlist[],
 712         int *nlist,           /* Length of the list                           */
 713         int index)            /* Index to remove from the list                */
 714 {
 715     int j;
 716
 717
 718     for (j = index; j < (*nlist - 1); j++)
 719     {
 720         copy_grid(&gridlist[j+1], &gridlist[j]);
 721     }
 722     *nlist = *nlist - 1;
 723 }
 724
 725
 726 /* Returns the index of the least necessary grid in the list.
 727  *
 728  * This is the one where the following conditions hold for the scaling factor:
 729  * 1. this grid has the smallest distance to its neighboring grid (distance
 730  *    measured by fac) -> this condition is true for two grids at the same time
 731  * 2. this grid (of the two) has the smaller distance to the other neighboring
 732  *    grid
 733  *
 734  * The extreme grids (the ones with the smallest and largest
 735  * scaling factor) are never thrown away.
 736  */
 737 static int get_throwaway_index(
 738         t_pmegrid gridlist[],
 739         int nlist)           /* Length of the list                           */
 740 {
 741     int i,index = -1;
 742     real dist,mindist,d_left,d_right;
 743
 744
 745     /* Find the two factors with the smallest mutual distance */
 746     mindist = GMX_FLOAT_MAX;
 747     for (i = 1; i < nlist; i++)
 748     {
 749         dist = fabs(gridlist[i].fac - gridlist[i-1].fac);
 750         if (dist < mindist)
 751         {
 752             index = i;
 753             mindist = dist;
 754         }
 755     }
 756     /* index and index-1 have the smallest mutual distance */
 757     if (index == 1)
 758     {
 759         /* Never return the first index, i.e. index == 0 */
 760         ;
 761     }
 762     else
 763     {
 764         d_left  = fabs(gridlist[index-1].fac - gridlist[index-2].fac);
 765         d_right = fabs(gridlist[index+1].fac - gridlist[index  ].fac);
 766
 767         /* Return the left index if its distance to its left neighbor is shorter
 768          * than the distance of the right index to its right neighbor */
 769         if (d_left < d_right)
 770             index--;
 771     }
 772     /* Never return the last index */
 773     if (index == nlist-1)
 774         index--;
 775
 776     return index;
 777 }
 778
 779
 780 static void make_grid_list(
 781         real fmin,            /* minimum scaling factor (downscaling fac)     */
 782         real fmax,            /* maximum scaling factor (upscaling fac)       */
 783         int ntprs,            /* Number of tpr files to test                  */
 784         matrix box,           /* The box                                      */
 785         t_pmegrid *griduse[], /* List of grids that have to be tested         */
 786         int *npmegrid,        /* Number of grids in the list                  */
 787         real fs)              /* Requested fourierspacing at a scaling factor
 788                                  of unity                                     */
 789 {
 790     real req_fac,act_fac=0,act_fs,eps;
 791     rvec box_size;
 792     int i,jmin=0,d,ngridall=0;
 793     int nkx=0,nky=0,nkz=0;
 794     int nkx_old=0,nky_old=0,nkz_old=0;
 795     t_pmegrid *gridall;
 796     int gridalloc,excess;
 797
 798
 799     /* Determine length of triclinic box vectors */
 800     for(d=0; d<DIM; d++)
 801     {
 802         box_size[d] = 0;
 803         for(i=0;i<DIM;i++)
 804             box_size[d] += box[d][i]*box[d][i];
 805         box_size[d] = sqrt(box_size[d]);
 806     }
 807
 808     gridalloc = 25;
 809     snew(gridall, gridalloc);
 810
 811     fprintf(stdout, "Possible PME grid settings (apart from input file settings):\n");
 812     fprintf(stdout, "  nkx  nky  nkz  max spacing  scaling factor\n");
 813
 814     /* eps should provide a fine enough spacing not to miss any grid */
 815     if (fmax != fmin)
 816         eps = (fmax-fmin)/(100.0*(ntprs-1));
 817     else
 818         eps = 1.0/max( (*griduse)[0].nkz, max( (*griduse)[0].nkx, (*griduse)[0].nky ) );
 819
 820     for (req_fac = fmin; act_fac < fmax; req_fac += eps)
 821     {
 822         nkx=0;
 823         nky=0;
 824         nkz=0;
 825         calc_grid(NULL,box,fs*req_fac,&nkx,&nky,&nkz);
 826         act_fs = max(box_size[XX]/nkx,max(box_size[YY]/nky,box_size[ZZ]/nkz));
 827         act_fac = act_fs/fs;
 828         if (    ! ( nkx==nkx_old           && nky==nky_old           && nkz==nkz_old           )    /* Exclude if grid is already in list */
 829              && ! ( nkx==(*griduse)[0].nkx && nky==(*griduse)[0].nky && nkz==(*griduse)[0].nkz ) )  /* Exclude input file grid */
 830         {
 831             /* We found a new grid that will do */
 832             nkx_old = nkx;
 833             nky_old = nky;
 834             nkz_old = nkz;
 835             gridall[ngridall].nkx = nkx;
 836             gridall[ngridall].nky = nky;
 837             gridall[ngridall].nkz = nkz;
 838             gridall[ngridall].fac = act_fac;
 839             gridall[ngridall].fs  = act_fs;
 840             fprintf(stdout, "%5d%5d%5d %12f %12f\n",nkx,nky,nkz,act_fs,act_fac);
 841             ngridall++;
 842             if (ngridall >= gridalloc)
 843             {
 844                 gridalloc += 25;
 845                 srenew(gridall, gridalloc);
 846             }
 847         }
 848     }
 849
 850     /* Return the actual number of grids that can be tested. We cannot test more
 851      * than the number of grids we found plus 1 (the input file) */
 852     *npmegrid = min(ngridall+1,ntprs);
 853
 854     /* excess is the number of grids we have to get rid of */
 855     excess = ngridall+1 - ntprs;
 856
 857     /* If we found less grids than tpr files were requested, simply test all
 858      * the grid there are ... */
 859     if (excess < 0)
 860     {
 861         fprintf(stdout, "NOTE: You requested %d tpr files, but apart from the input file grid only the\n"
 862                         "      above listed %d suitable PME grids were found. Will test all suitable settings.\n",
 863                         ntprs, ngridall);
 864     }
 865     else
 866     {
 867         if (2 == ntprs)
 868         {
 869             /* We can only keep the input tpr file plus one extra tpr file.
 870              * We make that choice depending on the values of -upfac and -downfac */
 871             if (fmin < 1.0)
 872             {
 873                 /* Keep the one with the -downfac as scaling factor. This is already
 874                  * stored in gridall[0] */
 875                 ;
 876             }
 877             else
 878             {
 879                 /* Keep the one with -upfac as scaling factor */
 880                 copy_grid(&(gridall[ngridall-1]), &(gridall[0]));
 881             }
 882
 883         }
 884         else
 885         {
 886             /* From the grid list throw away the unnecessary ones (keep the extremes) */
 887             for (i = 0; i < excess; i++)
 888             {
 889                 /* Get the index of the least necessary grid from the list ... */
 890                 jmin = get_throwaway_index(gridall, ngridall);
 891                 /* ... and remove the grid from the list */
 892                 remove_from_list(gridall, &ngridall, jmin);
 893             }
 894         }
 895     }
 896
 897     /* The remaining list contains the grids we want to test */
 898     for (i=1; i < *npmegrid; i++)
 899         copy_grid(&(gridall[i-1]), &((*griduse)[i]));
 900
 901     sfree(gridall);
 902 }
 903
 904
 905 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
 906
 907 /* Make additional TPR files with more computational load for the
 908  * direct space processors: */
 909 static void make_benchmark_tprs(
 910         const char *fn_sim_tpr,       /* READ : User-provided tpr file */
 911         char *fn_bench_tprs[],  /* WRITE: Names of benchmark tpr files */
 912         gmx_large_int_t benchsteps,  /* Number of time steps for benchmark runs */
 913         gmx_large_int_t statesteps,  /* Step counter in checkpoint file */
 914         real upfac,             /* Scale rcoulomb inbetween downfac and upfac */
 915         real downfac,
 916         int ntprs,              /* No. of TPRs to write, each with a different rcoulomb and fourierspacing */
 917         real fourierspacing,    /* Basic fourierspacing from tpr input file */
 918         t_inputinfo *info,      /* Contains information about mdp file options */
 919         FILE *fp)               /* Write the output here */
 920 {
 921     int          i,j,d;
 922     t_inputrec   *ir;
 923     t_state      state;
 924     gmx_mtop_t   mtop;
 925     real         nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials: */
 926     char         buf[200];
 927     rvec         box_size;
 928     gmx_bool         bNote = FALSE;
 929     t_pmegrid    *pmegrid=NULL; /* Grid settings for the PME grids to test    */
 930     int          npmegrid=1;    /* Number of grids that can be tested,
 931                                  * normally = ntpr but could be less          */
 932
 933
 934     sprintf(buf, "Making benchmark tpr file%s with %s time step%s",
 935             ntprs>1? "s":"", gmx_large_int_pfmt, benchsteps>1?"s":"");
 936     fprintf(stdout, buf, benchsteps);
 937     if (statesteps > 0)
 938     {
 939         sprintf(buf, " (adding %s steps from checkpoint file)", gmx_large_int_pfmt);
 940         fprintf(stdout, buf, statesteps);
 941         benchsteps += statesteps;
 942     }
 943     fprintf(stdout, ".\n");
 944
 945
 946     snew(ir,1);
 947     read_tpx_state(fn_sim_tpr,ir,&state,NULL,&mtop);
 948
 949     /* Check if some kind of PME was chosen */
 950     if (EEL_PME(ir->coulombtype) == FALSE)
 951         gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
 952                 EELTYPE(eelPME));
 953
 954     /* Check if rcoulomb == rlist, which is necessary for plain PME. */
 955     if (  (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist) )
 956     {
 957         gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
 958                 EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
 959     }
 960     /* For other PME types, rcoulomb is allowed to be smaller than rlist */
 961     else if (ir->rcoulomb > ir->rlist)
 962     {
 963         gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
 964                 EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
 965     }
 966
 967     /* Reduce the number of steps for the benchmarks */
 968     info->orig_sim_steps = ir->nsteps;
 969     ir->nsteps           = benchsteps;
 970
 971     /* For PME-switch potentials, keep the radial distance of the buffer region */
 972     nlist_buffer   = ir->rlist - ir->rcoulomb;
 973
 974     /* Determine length of triclinic box vectors */
 975     for(d=0; d<DIM; d++)
 976     {
 977         box_size[d] = 0;
 978         for(i=0;i<DIM;i++)
 979             box_size[d] += state.box[d][i]*state.box[d][i];
 980         box_size[d] = sqrt(box_size[d]);
 981     }
 982
 983     /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
 984     info->fsx[0] = box_size[XX]/ir->nkx;
 985     info->fsy[0] = box_size[YY]/ir->nky;
 986     info->fsz[0] = box_size[ZZ]/ir->nkz;
 987
 988     /* Put the input grid as first entry into the grid list */
 989     snew(pmegrid, ntprs);
 990     pmegrid[0].fac = 1.00;
 991     pmegrid[0].nkx = ir->nkx;
 992     pmegrid[0].nky = ir->nky;
 993     pmegrid[0].nkz = ir->nkz;
 994     pmegrid[0].fs  = max(box_size[ZZ]/ir->nkz, max(box_size[XX]/ir->nkx, box_size[YY]/ir->nky));
 995
 996     /* If no value for the fourierspacing was provided on the command line, we
 997      * use the reconstruction from the tpr file */
 998     if (fourierspacing <= 0)
 999     {
1000         fourierspacing = pmegrid[0].fs;
1001     }
1002
1003     fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing);
1004
1005     /* Print information about settings of which some are potentially modified: */
1006     fprintf(fp, "   Coulomb type         : %s\n", EELTYPE(ir->coulombtype));
1007     fprintf(fp, "   Grid spacing x y z   : %f %f %f\n",
1008             box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz);
1009     fprintf(fp, "   Van der Waals type   : %s\n", EVDWTYPE(ir->vdwtype));
1010     if (EVDW_SWITCHED(ir->vdwtype))
1011         fprintf(fp, "   rvdw_switch          : %f nm\n", ir->rvdw_switch);
1012     if (EPME_SWITCHED(ir->coulombtype))
1013         fprintf(fp, "   rlist                : %f nm\n", ir->rlist);
1014     if (ir->rlistlong != max_cutoff(ir->rvdw,ir->rcoulomb))
1015         fprintf(fp, "   rlistlong            : %f nm\n", ir->rlistlong);
1016
1017     /* Print a descriptive line about the tpr settings tested */
1018     fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
1019     fprintf(fp, " No.   scaling  rcoulomb");
1020     fprintf(fp, "  nkx  nky  nkz");
1021     fprintf(fp, "   spacing");
1022     if (evdwCUT == ir->vdwtype)
1023         fprintf(fp, "      rvdw");
1024     if (EPME_SWITCHED(ir->coulombtype))
1025         fprintf(fp, "     rlist");
1026     if ( ir->rlistlong != max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb)) )
1027         fprintf(fp, " rlistlong");
1028     fprintf(fp, "  tpr file\n");
1029
1030
1031     /* Get useful PME grids if requested, the actual number of entries is
1032      * returned in npmegrid */
1033     if (ntprs > 1)
1034         make_grid_list(downfac, upfac, ntprs, state.box, &pmegrid, &npmegrid, fourierspacing);
1035
1036     /* Loop to create the requested number of tpr input files */
1037     for (j = 0; j < npmegrid; j++)
1038     {
1039         /* The first one is the provided tpr file, just need to modify the number
1040          * of steps, so skip the following block */
1041         if (j > 0)
1042         {
1043             /* Scale the Coulomb radius */
1044             ir->rcoulomb = info->rcoulomb[0]*pmegrid[j].fac;
1045
1046             /* Adjust other radii since various conditions neet to be fulfilled */
1047             if (eelPME == ir->coulombtype)
1048             {
1049                 /* plain PME, rcoulomb must be equal to rlist */
1050                 ir->rlist = ir->rcoulomb;
1051             }
1052             else
1053             {
1054                 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
1055                 ir->rlist = ir->rcoulomb + nlist_buffer;
1056             }
1057
1058             if (evdwCUT == ir->vdwtype)
1059             {
1060                 /* For vdw cutoff, rvdw >= rlist */
1061                 ir->rvdw = max(info->rvdw[0], ir->rlist);
1062             }
1063
1064             ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
1065
1066             /* Copy the optimal grid dimensions to ir */
1067             ir->nkx = pmegrid[j].nkx;
1068             ir->nky = pmegrid[j].nky;
1069             ir->nkz = pmegrid[j].nkz;
1070
1071         } /* end of "if (j != 0)" */
1072
1073         /* for j==0: Save the original settings
1074          * for j >0: Save modified radii and fourier grids */
1075         info->rcoulomb[j]  = ir->rcoulomb;
1076         info->rvdw[j]      = ir->rvdw;
1077         info->nkx[j]       = ir->nkx;
1078         info->nky[j]       = ir->nky;
1079         info->nkz[j]       = ir->nkz;
1080         info->rlist[j]     = ir->rlist;
1081         info->rlistlong[j] = ir->rlistlong;
1082         info->fsx[j]       = pmegrid[j].fs;
1083         info->fsy[j]       = pmegrid[j].fs;
1084         info->fsz[j]       = pmegrid[j].fs;
1085
1086         /* Write the benchmark tpr file */
1087         strncpy(fn_bench_tprs[j],fn_sim_tpr,strlen(fn_sim_tpr)-strlen(".tpr"));
1088         sprintf(buf, "_bench%.2d.tpr", j);
1089         strcat(fn_bench_tprs[j], buf);
1090         fprintf(stdout,"Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
1091         fprintf(stdout, gmx_large_int_pfmt, ir->nsteps);
1092         if (j > 0)
1093             fprintf(stdout,", scaling factor %f\n", pmegrid[j].fac);
1094         else
1095             fprintf(stdout,", unmodified settings\n");
1096
1097         write_tpx_state(fn_bench_tprs[j],ir,&state,&mtop);
1098
1099         /* Write information about modified tpr settings to log file */
1100         if (j==0)
1101             fprintf(fp, "%4d%10s%10f", j, "-input-", ir->rcoulomb);
1102         else
1103             fprintf(fp, "%4d%10f%10f", j, pmegrid[j].fac, ir->rcoulomb);
1104         fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
1105         fprintf(fp, " %9f ", info->fsx[j]);
1106         if (evdwCUT == ir->vdwtype)
1107             fprintf(fp, "%10f", ir->rvdw);
1108         if (EPME_SWITCHED(ir->coulombtype))
1109             fprintf(fp, "%10f", ir->rlist);
1110         if ( info->rlistlong[0] != max_cutoff(info->rlist[0],max_cutoff(info->rvdw[0],info->rcoulomb[0])) )
1111             fprintf(fp, "%10f", ir->rlistlong);
1112         fprintf(fp, "  %-14s\n",fn_bench_tprs[j]);
1113
1114         /* Make it clear to the user that some additional settings were modified */
1115         if (   !is_equal(ir->rvdw     , info->rvdw[0])
1116             || !is_equal(ir->rlistlong, info->rlistlong[0]) )
1117         {
1118             bNote = TRUE;
1119         }
1120     }
1121     if (bNote)
1122         fprintf(fp, "\nNote that in addition to rcoulomb and the fourier grid\n"
1123                     "also other input settings were changed (see table above).\n"
1124                     "Please check if the modified settings are appropriate.\n");
1125     fflush(stdout);
1126     fflush(fp);
1127     sfree(ir);
1128 }
1129
1130
1131 /* Whether these files are written depends on tpr (or mdp) settings,
1132  * not on mdrun command line options! */
1133 static gmx_bool tpr_triggers_file(const char *opt)
1134 {
1135     if ( (0 == strcmp(opt, "-ro"))
1136       || (0 == strcmp(opt, "-ra"))
1137       || (0 == strcmp(opt, "-rt"))
1138       || (0 == strcmp(opt, "-rs"))
1139       || (0 == strcmp(opt, "-pf"))
1140       || (0 == strcmp(opt, "-px")) )
1141         return TRUE;
1142     else
1143         return FALSE;
1144 }
1145
1146
1147 /* Rename the files we want to keep to some meaningful filename and
1148  * delete the rest */
1149 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
1150                     int nPMEnodes, int nr, gmx_bool bKeepStderr)
1151 {
1152     char numstring[STRLEN];
1153     char newfilename[STRLEN];
1154     const char *fn=NULL;
1155     int i;
1156     const char *opt;
1157
1158
1159     fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
1160
1161     for (i=0; i<nfile; i++)
1162     {
1163         opt = (char *)fnm[i].opt;
1164         if ( strcmp(opt, "-p") == 0 )
1165         {
1166             /* do nothing; keep this file */
1167             ;
1168         }
1169         else if (strcmp(opt, "-bg") == 0)
1170         {
1171             /* Give the log file a nice name so one can later see which parameters were used */
1172             numstring[0] = '\0';
1173             if (nr > 0)
1174                 sprintf(numstring, "_%d", nr);
1175             sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg",nfile,fnm), k, nnodes, nPMEnodes, numstring);
1176             if (gmx_fexist(opt2fn("-bg",nfile,fnm)))
1177             {
1178                 fprintf(stdout, "renaming log file to %s\n", newfilename);
1179                 make_backup(newfilename);
1180                 rename(opt2fn("-bg",nfile,fnm), newfilename);
1181             }
1182         }
1183         else if (strcmp(opt, "-err") == 0)
1184         {
1185             /* This file contains the output of stderr. We want to keep it in
1186              * cases where there have been problems. */
1187             fn = opt2fn(opt, nfile, fnm);
1188             numstring[0] = '\0';
1189             if (nr > 0)
1190                 sprintf(numstring, "_%d", nr);
1191             sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
1192             if (gmx_fexist(fn))
1193             {
1194                 if (bKeepStderr)
1195                 {
1196                     fprintf(stdout, "Saving stderr output in %s\n", newfilename);
1197                     make_backup(newfilename);
1198                     rename(fn, newfilename);
1199                 }
1200                 else
1201                 {
1202                     fprintf(stdout, "Deleting %s\n", fn);
1203                     remove(fn);
1204                 }
1205             }
1206         }
1207         /* Delete the files which are created for each benchmark run: (options -b*) */
1208         else if ( ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt,nfile,fnm) || !is_optional(&fnm[i])) )
1209                   || tpr_triggers_file(opt) )
1210         {
1211             fn = opt2fn(opt, nfile, fnm);
1212             if (gmx_fexist(fn))
1213             {
1214                 fprintf(stdout, "Deleting %s\n", fn);
1215                 remove(fn);
1216             }
1217         }
1218     }
1219 }
1220
1221
1222 /* Returns the largest common factor of n1 and n2 */
1223 static int largest_common_factor(int n1, int n2)
1224 {
1225     int factor, nmax;
1226
1227     nmax = min(n1, n2);
1228     for (factor=nmax; factor > 0; factor--)
1229     {
1230         if ( 0==(n1 % factor) && 0==(n2 % factor) )
1231         {
1232             return(factor);
1233         }
1234     }
1235     return 0; /* one for the compiler */
1236 }
1237
1238 enum {eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr};
1239
1240 /* Create a list of numbers of PME nodes to test */
1241 static void make_npme_list(
1242         const char *npmevalues_opt,  /* Make a complete list with all
1243                            * possibilities or a short list that keeps only
1244                            * reasonable numbers of PME nodes                  */
1245         int *nentries,    /* Number of entries we put in the nPMEnodes list   */
1246         int *nPMEnodes[], /* Each entry contains the value for -npme          */
1247         int nnodes,       /* Total number of nodes to do the tests on         */
1248         int minPMEnodes,  /* Minimum number of PME nodes                      */
1249         int maxPMEnodes)  /* Maximum number of PME nodes                      */
1250 {
1251     int i,npme,npp;
1252     int min_factor=1;     /* We request that npp and npme have this minimal
1253                            * largest common factor (depends on npp)           */
1254     int nlistmax;         /* Max. list size                                   */
1255     int nlist;            /* Actual number of entries in list                 */
1256     int eNPME=0;
1257
1258
1259     /* Do we need to check all possible values for -npme or is a reduced list enough? */
1260     if ( 0 == strcmp(npmevalues_opt, "all") )
1261     {
1262         eNPME = eNpmeAll;
1263     }
1264     else if ( 0 == strcmp(npmevalues_opt, "subset") )
1265     {
1266         eNPME = eNpmeSubset;
1267     }
1268     else if ( 0 == strcmp(npmevalues_opt, "auto") )
1269     {
1270         if (nnodes <= 64)
1271             eNPME = eNpmeAll;
1272         else if (nnodes < 128)
1273             eNPME = eNpmeReduced;
1274         else
1275             eNPME = eNpmeSubset;
1276     }
1277     else
1278     {
1279         gmx_fatal(FARGS, "Unknown option for -npme in make_npme_list");
1280     }
1281
1282     /* Calculate how many entries we could possibly have (in case of -npme all) */
1283     if (nnodes > 2)
1284     {
1285         nlistmax = maxPMEnodes - minPMEnodes + 3;
1286         if (0 == minPMEnodes)
1287             nlistmax--;
1288     }
1289     else
1290         nlistmax = 1;
1291
1292     /* Now make the actual list which is at most of size nlist */
1293     snew(*nPMEnodes, nlistmax);
1294     nlist = 0; /* start counting again, now the real entries in the list */
1295     for (i = 0; i < nlistmax - 2; i++)
1296     {
1297         npme = maxPMEnodes - i;
1298         npp  = nnodes-npme;
1299         switch (eNPME)
1300         {
1301             case eNpmeAll:
1302                 min_factor = 1;
1303                 break;
1304             case eNpmeReduced:
1305                 min_factor = 2;
1306                 break;
1307             case eNpmeSubset:
1308                 /* For 2d PME we want a common largest factor of at least the cube
1309                  * root of the number of PP nodes */
1310                 min_factor = (int) pow(npp, 1.0/3.0);
1311                 break;
1312             default:
1313                 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1314                 break;
1315         }
1316         if (largest_common_factor(npp, npme) >= min_factor)
1317         {
1318             (*nPMEnodes)[nlist] = npme;
1319             nlist++;
1320         }
1321     }
1322     /* We always test 0 PME nodes and the automatic number */
1323     *nentries = nlist + 2;
1324     (*nPMEnodes)[nlist  ] =  0;
1325     (*nPMEnodes)[nlist+1] = -1;
1326
1327     fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1328     for (i=0; i<*nentries-1; i++)
1329         fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1330     fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1331 }
1332
1333
1334 /* Allocate memory to store the performance data */
1335 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1336 {
1337     int i, j, k;
1338
1339
1340     for (k=0; k<ntprs; k++)
1341     {
1342         snew(perfdata[k], datasets);
1343         for (i=0; i<datasets; i++)
1344         {
1345             for (j=0; j<repeats; j++)
1346             {
1347                 snew(perfdata[k][i].Gcycles   , repeats);
1348                 snew(perfdata[k][i].ns_per_day, repeats);
1349                 snew(perfdata[k][i].PME_f_load, repeats);
1350             }
1351         }
1352     }
1353 }
1354
1355
1356 static void do_the_tests(
1357         FILE *fp,                   /* General g_tune_pme output file         */
1358         char **tpr_names,           /* Filenames of the input files to test   */
1359         int maxPMEnodes,            /* Max fraction of nodes to use for PME   */
1360         int minPMEnodes,            /* Min fraction of nodes to use for PME   */
1361         const char *npmevalues_opt, /* Which -npme values should be tested    */
1362         t_perf **perfdata,          /* Here the performace data is stored     */
1363         int *pmeentries,            /* Entries in the nPMEnodes list          */
1364         int repeats,                /* Repeat each test this often            */
1365         int nnodes,                 /* Total number of nodes = nPP + nPME     */
1366         int nr_tprs,                /* Total number of tpr files to test      */
1367         gmx_bool bThreads,              /* Threads or MPI?                        */
1368         char *cmd_mpirun,           /* mpirun command string                  */
1369         char *cmd_np,               /* "-np", "-n", whatever mpirun needs     */
1370         char *cmd_mdrun,            /* mdrun command string                   */
1371         char *cmd_args_bench,       /* arguments for mdrun in a string        */
1372         const t_filenm *fnm,        /* List of filenames from command line    */
1373         int nfile,                  /* Number of files specified on the cmdl. */
1374         int sim_part,               /* For checkpointing                      */
1375         int presteps,               /* DLB equilibration steps, is checked    */
1376         gmx_large_int_t cpt_steps)  /* Time step counter in the checkpoint    */
1377 {
1378     int     i,nr,k,ret,count=0,totaltests;
1379     int     *nPMEnodes=NULL;
1380     t_perf  *pd=NULL;
1381     int     cmdline_length;
1382     char    *command, *cmd_stub;
1383     char    buf[STRLEN];
1384     gmx_bool    bResetProblem=FALSE;
1385
1386
1387     /* This string array corresponds to the eParselog enum type at the start
1388      * of this file */
1389     const char* ParseLog[] = {"OK.",
1390                               "Logfile not found!",
1391                               "No timings, logfile truncated?",
1392                               "Run was terminated.",
1393                               "Counters were not reset properly.",
1394                               "No DD grid found for these settings.",
1395                               "TPX version conflict!",
1396                               "mdrun was not started in parallel!",
1397                               "A fatal error occured!" };
1398     char    str_PME_f_load[13];
1399
1400
1401     /* Allocate space for the mdrun command line. 100 extra characters should
1402        be more than enough for the -npme etcetera arguments */
1403     cmdline_length =  strlen(cmd_mpirun)
1404                     + strlen(cmd_np)
1405                     + strlen(cmd_mdrun)
1406                     + strlen(cmd_args_bench)
1407                     + strlen(tpr_names[0]) + 100;
1408     snew(command , cmdline_length);
1409     snew(cmd_stub, cmdline_length);
1410
1411     /* Construct the part of the command line that stays the same for all tests: */
1412     if (bThreads)
1413     {
1414         sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1415     }
1416     else
1417     {
1418         sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1419     }
1420
1421     /* Create a list of numbers of PME nodes to test */
1422     make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1423                    nnodes, minPMEnodes, maxPMEnodes);
1424
1425     if (0 == repeats)
1426     {
1427         fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1428         fclose(fp);
1429         finalize(opt2fn("-p", nfile, fnm));
1430         exit(0);
1431     }
1432
1433     /* Allocate one dataset for each tpr input file: */
1434     init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1435
1436     /*****************************************/
1437     /* Main loop over all tpr files to test: */
1438     /*****************************************/
1439     totaltests = nr_tprs*(*pmeentries)*repeats;
1440     for (k=0; k<nr_tprs;k++)
1441     {
1442         fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1443         fprintf(fp, "PME nodes      Gcycles       ns/day        PME/f    Remark\n");
1444         /* Loop over various numbers of PME nodes: */
1445         for (i = 0; i < *pmeentries; i++)
1446         {
1447             pd = &perfdata[k][i];
1448
1449             /* Loop over the repeats for each scenario: */
1450             for (nr = 0; nr < repeats; nr++)
1451             {
1452                 pd->nPMEnodes = nPMEnodes[i];
1453
1454                 /* Add -npme and -s to the command line and save it. Note that
1455                  * the -passall (if set) options requires cmd_args_bench to be
1456                  * at the end of the command line string */
1457                 snew(pd->mdrun_cmd_line, cmdline_length);
1458                 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1459                         cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1460
1461                 /* Do a benchmark simulation: */
1462                 if (repeats > 1)
1463                     sprintf(buf, ", pass %d/%d", nr+1, repeats);
1464                 else
1465                     buf[0]='\0';
1466                 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1467                         (100.0*count)/totaltests,
1468                         k+1, nr_tprs, i+1, *pmeentries, buf);
1469                 make_backup(opt2fn("-err",nfile,fnm));
1470                 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err",nfile,fnm));
1471                 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1472                 gmx_system_call(command);
1473
1474                 /* Collect the performance data from the log file; also check stderr
1475                  * for fatal errors */
1476                 ret = parse_logfile(opt2fn("-bg",nfile,fnm), opt2fn("-err",nfile,fnm),
1477                         pd, nr, presteps, cpt_steps, nnodes);
1478                 if ((presteps > 0) && (ret == eParselogResetProblem))
1479                     bResetProblem = TRUE;
1480
1481                 if (-1 == pd->nPMEnodes)
1482                     sprintf(buf, "(%3d)", pd->guessPME);
1483                 else
1484                     sprintf(buf, "     ");
1485
1486                 /* Nicer output */
1487                 if (pd->PME_f_load[nr] > 0.0)
1488                     sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1489                 else
1490                     sprintf(str_PME_f_load, "%s", "         -  ");
1491
1492                 /* Write the data we got to disk */
1493                 fprintf(fp, "%4d%s %12.3f %12.3f %s    %s", pd->nPMEnodes,
1494                         buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1495                 if (! (ret==eParselogOK || ret==eParselogNoDDGrid || ret==eParselogNotFound) )
1496                     fprintf(fp, " Check %s file for problems.", ret==eParselogFatal? "err":"log");
1497                 fprintf(fp, "\n");
1498                 fflush(fp);
1499                 count++;
1500
1501                 /* Do some cleaning up and delete the files we do not need any more */
1502                 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret==eParselogFatal);
1503
1504                 /* If the first run with this number of processors already failed, do not try again: */
1505                 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1506                 {
1507                     fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1508                     count += repeats-(nr+1);
1509                     break;
1510                 }
1511             } /* end of repeats loop */
1512         } /* end of -npme loop */
1513     } /* end of tpr file loop */
1514     if (bResetProblem)
1515     {
1516         sep_line(fp);
1517         fprintf(fp, "WARNING: The cycle and time step counters could not be reset\n"
1518                     "properly. The reason could be that mpirun did not manage to\n"
1519                     "export the environment variable GMX_RESET_COUNTER. You might\n"
1520                     "have to give a special switch to mpirun for that.\n"
1521                     "Alternatively, you can manually set GMX_RESET_COUNTER to the\n"
1522                     "value normally provided by -presteps.");
1523         sep_line(fp);
1524     }
1525     sfree(command);
1526     sfree(cmd_stub);
1527 }
1528
1529
1530 static void check_input(
1531         int nnodes,
1532         int repeats,
1533         int *ntprs,
1534         real *upfac,
1535         real *downfac,
1536         real maxPMEfraction,
1537         real minPMEfraction,
1538         real fourierspacing,
1539         gmx_large_int_t bench_nsteps,
1540         const t_filenm *fnm,
1541         int nfile,
1542         int sim_part,
1543         int presteps,
1544         int npargs,
1545         t_pargs *pa)
1546 {
1547     /* Make sure the input file exists */
1548     if (!gmx_fexist(opt2fn("-s",nfile,fnm)))
1549         gmx_fatal(FARGS, "File %s not found.", opt2fn("-s",nfile,fnm));
1550
1551     /* Make sure that the checkpoint file is not overwritten by the benchmark runs */
1552     if ( (0 == strcmp(opt2fn("-cpi",nfile,fnm), opt2fn("-cpo",nfile,fnm)) ) && (sim_part > 1) )
1553         gmx_fatal(FARGS, "Checkpoint input and output file must not be identical,\nbecause then the input file might change during the benchmarks.");
1554
1555     /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1556     if (repeats < 0)
1557         gmx_fatal(FARGS, "Number of repeats < 0!");
1558
1559     /* Check number of nodes */
1560     if (nnodes < 1)
1561         gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
1562
1563     /* Automatically choose -ntpr if not set */
1564     if (*ntprs < 1)
1565     {
1566         if (nnodes < 16)
1567             *ntprs = 1;
1568         else
1569             *ntprs = 3;
1570         fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs==1?"":"s");
1571     }
1572     else
1573     {
1574         if (1 == *ntprs)
1575             fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1576     }
1577
1578     if ( is_equal(*downfac,*upfac) && (*ntprs > 2) && !(fourierspacing > 0.0))
1579     {
1580         fprintf(stderr, "WARNING: Resetting -ntpr to 2 since both scaling factors are the same.\n"
1581                         "Please choose upfac unequal to downfac to test various PME grid settings\n");
1582         *ntprs = 2;
1583     }
1584
1585     /* Check whether max and min fraction are within required values */
1586     if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1587         gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1588     if (minPMEfraction > 0.5 || minPMEfraction < 0)
1589         gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1590     if (maxPMEfraction < minPMEfraction)
1591         gmx_fatal(FARGS, "-max must be larger or equal to -min");
1592
1593     /* Check whether the number of steps - if it was set - has a reasonable value */
1594     if (bench_nsteps < 0)
1595         gmx_fatal(FARGS, "Number of steps must be positive.");
1596
1597     if (bench_nsteps > 10000 || bench_nsteps < 100)
1598     {
1599         fprintf(stderr, "WARNING: steps=");
1600         fprintf(stderr, gmx_large_int_pfmt, bench_nsteps);
1601         fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100)? "few" : "many");
1602     }
1603
1604     if (presteps < 0)
1605     {
1606         gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1607     }
1608
1609     if (*upfac <= 0.0 || *downfac <= 0.0 || *downfac > *upfac)
1610         gmx_fatal(FARGS, "Both scaling factors must be larger than zero and upper\n"
1611                          "scaling limit (%f) must be larger than lower limit (%f).",
1612                          *upfac, *downfac);
1613
1614     if (*downfac < 0.75 || *upfac > 1.25)
1615         fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1616
1617     if (fourierspacing < 0)
1618         gmx_fatal(FARGS, "Please choose a positive value for fourierspacing.");
1619
1620     /* Make shure that the scaling factor options are compatible with the number of tprs */
1621     if ( (*ntprs < 3) && ( opt2parg_bSet("-upfac",npargs,pa) || opt2parg_bSet("-downfac",npargs,pa) ) )
1622     {
1623         if (opt2parg_bSet("-upfac",npargs,pa) && opt2parg_bSet("-downfac",npargs,pa) && !is_equal(*upfac,*downfac))
1624         {
1625             fprintf(stderr, "NOTE: Specify -ntpr > 2 for both scaling factors to take effect.\n"
1626                              "(upfac=%f, downfac=%f)\n", *upfac, *downfac);
1627         }
1628         if (opt2parg_bSet("-upfac",npargs,pa))
1629             *downfac = *upfac;
1630         if (opt2parg_bSet("-downfac",npargs,pa))
1631             *upfac = *downfac;
1632     }
1633     if ( (2 == *ntprs) && (opt2parg_bSet("-downfac",npargs,pa)) && !is_equal(*downfac, 1.0))
1634     {
1635         *upfac = 1.0;
1636     }
1637 }
1638
1639
1640 /* Returns TRUE when "opt" is a switch for g_tune_pme itself */
1641 static gmx_bool is_main_switch(char *opt)
1642 {
1643     if ( (0 == strcmp(opt,"-s"        ))
1644       || (0 == strcmp(opt,"-p"        ))
1645       || (0 == strcmp(opt,"-launch"   ))
1646       || (0 == strcmp(opt,"-r"        ))
1647       || (0 == strcmp(opt,"-ntpr"     ))
1648       || (0 == strcmp(opt,"-max"      ))
1649       || (0 == strcmp(opt,"-min"      ))
1650       || (0 == strcmp(opt,"-upfac"    ))
1651       || (0 == strcmp(opt,"-downfac"  ))
1652       || (0 == strcmp(opt,"-four"     ))
1653       || (0 == strcmp(opt,"-steps"    ))
1654       || (0 == strcmp(opt,"-simsteps" ))
1655       || (0 == strcmp(opt,"-resetstep"))
1656       || (0 == strcmp(opt,"-so"       ))
1657       || (0 == strcmp(opt,"-npstring" ))
1658       || (0 == strcmp(opt,"-npme"     ))
1659       || (0 == strcmp(opt,"-passall"  )) )
1660     return TRUE;
1661
1662     return FALSE;
1663 }
1664
1665
1666 /* Returns TRUE when "opt" is needed at launch time */
1667 static gmx_bool is_launch_option(char *opt, gmx_bool bSet)
1668 {
1669     if (bSet)
1670         return TRUE;
1671     else
1672         return FALSE;
1673 }
1674
1675
1676 /* Returns TRUE when "opt" is needed at launch time */
1677 static gmx_bool is_launch_file(char *opt, gmx_bool bSet)
1678 {
1679     /* We need all options that were set on the command line
1680      * and that do not start with -b */
1681     if (0 == strncmp(opt,"-b", 2))
1682         return FALSE;
1683
1684     if (bSet)
1685         return TRUE;
1686     else
1687         return FALSE;
1688 }
1689
1690
1691 /* Returns TRUE when "opt" gives an option needed for the benchmarks runs */
1692 static gmx_bool is_bench_option(char *opt, gmx_bool bSet)
1693 {
1694     /* If option is set, we might need it for the benchmarks.
1695      * This includes -cpi */
1696     if (bSet)
1697     {
1698         if ( (0 == strcmp(opt, "-append"   ))
1699           || (0 == strcmp(opt, "-maxh"     ))
1700           || (0 == strcmp(opt, "-deffnm"   ))
1701           || (0 == strcmp(opt, "-resethway"))
1702           || (0 == strcmp(opt, "-cpnum"    )) )
1703             return FALSE;
1704         else
1705             return TRUE;
1706     }
1707     else
1708         return FALSE;
1709 }
1710
1711
1712 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1713 static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput)
1714 {
1715     /* All options starting with "-b" are for _b_enchmark files exclusively */
1716     if (0 == strncmp(opt,"-b", 2))
1717     {
1718         if (!bOptional || bSet)
1719             return TRUE;
1720         else
1721             return FALSE;
1722     }
1723     else
1724     {
1725         if (bIsOutput)
1726             return FALSE;
1727         else
1728             if (bSet) /* These are additional input files like -cpi -ei */
1729                 return TRUE;
1730             else
1731                 return FALSE;
1732     }
1733 }
1734
1735
1736 /* Adds 'buf' to 'str' */
1737 static void add_to_string(char **str, char *buf)
1738 {
1739     int len;
1740
1741
1742     len = strlen(*str) + strlen(buf) + 1;
1743     srenew(*str, len);
1744     strcat(*str, buf);
1745 }
1746
1747
1748 /* Create the command line for the benchmark as well as for the real run */
1749 static void create_command_line_snippets(
1750         gmx_bool     bThreads,
1751         int      presteps,
1752         int      nfile,
1753         t_filenm fnm[],
1754         int      npargs,
1755         t_pargs  *pa,
1756         const char *procstring,      /* How to pass the number of processors to $MPIRUN */
1757         char     *cmd_np[],          /* Actual command line snippet, e.g. '-np <N>' */
1758         char     *cmd_args_bench[],  /* command line arguments for benchmark runs */
1759         char     *cmd_args_launch[], /* command line arguments for simulation run */
1760         char     extra_args[])       /* Add this to the end of the command line */
1761 {
1762     int        i;
1763     char       *opt;
1764     const char *name;
1765     char       *np_or_nt;
1766 #define BUFLENGTH 255
1767     char       buf[BUFLENGTH];
1768     char       strbuf[BUFLENGTH];
1769     char       strbuf2[BUFLENGTH];
1770
1771
1772     if (bThreads)
1773         np_or_nt=strdup("-nt");
1774     else
1775         np_or_nt=strdup("-np");
1776
1777     /* strlen needs at least '\0' as a string: */
1778     snew(*cmd_args_bench ,1);
1779     snew(*cmd_args_launch,1);
1780     *cmd_args_launch[0]='\0';
1781     *cmd_args_bench[0] ='\0';
1782
1783
1784     /*******************************************/
1785     /* 1. Process other command line arguments */
1786     /*******************************************/
1787     for (i=0; i<npargs; i++)
1788     {
1789         /* What command line switch are we currently processing: */
1790         opt = (char *)pa[i].option;
1791         /* Skip options not meant for mdrun */
1792         if (!is_main_switch(opt))
1793         {
1794             /* Print it to a string buffer, strip away trailing whitespaces that pa_val also returns: */
1795             sprintf(strbuf2, "%s", pa_val(&pa[i],buf,BUFLENGTH));
1796             rtrim(strbuf2);
1797             sprintf(strbuf, "%s %s ", opt, strbuf2);
1798             /* We need the -np (or -nt) switch in a separate buffer - whether or not it was set! */
1799             if (0 == strcmp(opt,np_or_nt))
1800             {
1801                 if (strcmp(procstring, "none")==0 && !bThreads)
1802                 {
1803                     /* Omit -np <N> entirely */
1804                     snew(*cmd_np, 2);
1805                     sprintf(*cmd_np, " ");
1806                 }
1807                 else
1808                 {
1809                     /* This is the normal case with -np <N> */
1810                     snew(*cmd_np, strlen(procstring)+strlen(strbuf2)+4);
1811                     sprintf(*cmd_np, " %s %s ", bThreads? "-nt" : procstring, strbuf2);
1812                 }
1813             }
1814             else
1815             {
1816                 if (is_bench_option(opt,pa[i].bSet))
1817                     add_to_string(cmd_args_bench, strbuf);
1818
1819                 if (is_launch_option(opt,pa[i].bSet))
1820                     add_to_string(cmd_args_launch, strbuf);
1821             }
1822         }
1823     }
1824     if (presteps > 0)
1825     {
1826         /* Add equilibration steps to benchmark options */
1827         sprintf(strbuf, "-resetstep %d ", presteps);
1828         add_to_string(cmd_args_bench, strbuf);
1829     }
1830
1831     /********************/
1832     /* 2. Process files */
1833     /********************/
1834     for (i=0; i<nfile; i++)
1835     {
1836         opt  = (char *)fnm[i].opt;
1837         name = opt2fn(opt,nfile,fnm);
1838
1839         /* Strbuf contains the options, now let's sort out where we need that */
1840         sprintf(strbuf, "%s %s ", opt, name);
1841
1842         /* Skip options not meant for mdrun */
1843         if (!is_main_switch(opt))
1844         {
1845
1846             if ( is_bench_file(opt, opt2bSet(opt,nfile,fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1847             {
1848                 /* All options starting with -b* need the 'b' removed,
1849                  * therefore overwrite strbuf */
1850                 if (0 == strncmp(opt, "-b", 2))
1851                     sprintf(strbuf, "-%s %s ", &opt[2], name);
1852
1853                 add_to_string(cmd_args_bench, strbuf);
1854             }
1855
1856             if ( is_launch_file(opt,opt2bSet(opt,nfile,fnm)) )
1857                 add_to_string(cmd_args_launch, strbuf);
1858         }
1859     }
1860
1861     add_to_string(cmd_args_bench , extra_args);
1862     add_to_string(cmd_args_launch, extra_args);
1863 #undef BUFLENGTH
1864 }
1865
1866
1867 /* Set option opt */
1868 static void setopt(const char *opt,int nfile,t_filenm fnm[])
1869 {
1870   int i;
1871
1872   for(i=0; (i<nfile); i++)
1873     if (strcmp(opt,fnm[i].opt)==0)
1874       fnm[i].flag |= ffSET;
1875 }
1876
1877
1878 static void couple_files_options(int nfile, t_filenm fnm[])
1879 {
1880     int i;
1881     gmx_bool bSet,bBench;
1882     char *opt;
1883     char buf[20];
1884
1885
1886     for (i=0; i<nfile; i++)
1887     {
1888         opt  = (char *)fnm[i].opt;
1889         bSet = ((fnm[i].flag & ffSET) != 0);
1890         bBench = (0 == strncmp(opt,"-b", 2));
1891
1892         /* Check optional files */
1893         /* If e.g. -eo is set, then -beo also needs to be set */
1894         if (is_optional(&fnm[i]) && bSet && !bBench)
1895         {
1896             sprintf(buf, "-b%s", &opt[1]);
1897             setopt(buf,nfile,fnm);
1898         }
1899         /* If -beo is set, then -eo also needs to be! */
1900         if (is_optional(&fnm[i]) && bSet && bBench)
1901         {
1902             sprintf(buf, "-%s", &opt[2]);
1903             setopt(buf,nfile,fnm);
1904         }
1905     }
1906 }
1907
1908
1909 static double gettime()
1910 {
1911 #ifdef HAVE_GETTIMEOFDAY
1912     struct timeval t;
1913     struct timezone tz = { 0,0 };
1914     double seconds;
1915
1916     gettimeofday(&t,&tz);
1917
1918     seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
1919
1920     return seconds;
1921 #else
1922     double  seconds;
1923
1924     seconds = time(NULL);
1925
1926     return seconds;
1927 #endif
1928 }
1929
1930
1931 #define BENCHSTEPS (1000)
1932
1933 int gmx_tune_pme(int argc,char *argv[])
1934 {
1935     const char *desc[] = {
1936             "For a given number [TT]-np[tt] or [TT]-nt[tt] of processors/threads, this program systematically",
1937             "times mdrun with various numbers of PME-only nodes and determines",
1938             "which setting is fastest. It will also test whether performance can",
1939             "be enhanced by shifting load from the reciprocal to the real space",
1940             "part of the Ewald sum. ",
1941             "Simply pass your [TT].tpr[tt] file to g_tune_pme together with other options",
1942             "for mdrun as needed.[PAR]",
1943             "Which executables are used can be set in the environment variables",
1944             "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
1945             "will be used as defaults. Note that for certain MPI frameworks you",
1946             "need to provide a machine- or hostfile. This can also be passed",
1947             "via the MPIRUN variable, e.g.",
1948             "'export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"'[PAR]",
1949             "Please call g_tune_pme with the normal options you would pass to",
1950             "mdrun and add [TT]-np[tt] for the number of processors to perform the",
1951             "tests on, or [TT]-nt[tt] for the number of threads. You can also add [TT]-r[tt]",
1952             "to repeat each test several times to get better statistics. [PAR]",
1953             "g_tune_pme can test various real space / reciprocal space workloads",
1954             "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
1955             "written with enlarged cutoffs and smaller fourier grids respectively.",
1956             "Typically, the first test (no. 0) will be with the settings from the input",
1957             "[TT].tpr[tt] file; the last test (no. [TT]ntpr[tt]) will have cutoffs multiplied",
1958             "by (and at the same time fourier grid dimensions divided by) the scaling",
1959             "factor [TT]-fac[tt] (default 1.2). The remaining [TT].tpr[tt] files will have equally",
1960             "spaced values inbetween these extremes. Note that you can set [TT]-ntpr[tt] to 1",
1961             "if you just want to find the optimal number of PME-only nodes; in that case",
1962             "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
1963             "For the benchmark runs, the default of 1000 time steps should suffice for most",
1964             "MD systems. The dynamic load balancing needs about 100 time steps",
1965             "to adapt to local load imbalances, therefore the time step counters",
1966             "are by default reset after 100 steps. For large systems",
1967             "(>1M atoms) you may have to set [TT]-resetstep[tt] to a higher value.",
1968             "From the 'DD' load imbalance entries in the md.log output file you",
1969             "can tell after how many steps the load is sufficiently balanced.[PAR]"
1970             "Example call: [TT]g_tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
1971             "After calling mdrun several times, detailed performance information",
1972             "is available in the output file perf.out. ",
1973             "Note that during the benchmarks a couple of temporary files are written",
1974             "(options -b*), these will be automatically deleted after each test.[PAR]",
1975             "If you want the simulation to be started automatically with the",
1976             "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
1977     };
1978
1979     int        nnodes =1;
1980     int        repeats=2;
1981     int        pmeentries=0; /* How many values for -npme do we actually test for each tpr file */
1982     real       maxPMEfraction=0.50;
1983     real       minPMEfraction=0.25;
1984     int        maxPMEnodes, minPMEnodes;
1985     real       downfac=1.0,upfac=1.2;
1986     int        ntprs=0;
1987     real       fs=0.0;                    /* 0 indicates: not set by the user */
1988     gmx_large_int_t bench_nsteps=BENCHSTEPS;
1989     gmx_large_int_t new_sim_nsteps=-1;   /* -1 indicates: not set by the user */
1990     gmx_large_int_t cpt_steps=0;         /* Step counter in .cpt input file   */
1991     int        presteps=100;    /* Do a full cycle reset after presteps steps */
1992     gmx_bool       bOverwrite=FALSE, bKeepTPR;
1993     gmx_bool       bLaunch=FALSE;
1994     gmx_bool       bPassAll=FALSE;
1995     char       *ExtraArgs=NULL;
1996     char       **tpr_names=NULL;
1997     const char *simulation_tpr=NULL;
1998     int        best_npme, best_tpr;
1999     int        sim_part = 1;     /* For benchmarks with checkpoint files */
2000
2001     /* Default program names if nothing else is found */
2002     char        *cmd_mpirun=NULL, *cmd_mdrun=NULL;
2003     char        *cmd_args_bench, *cmd_args_launch;
2004     char        *cmd_np=NULL;
2005
2006     t_perf      **perfdata=NULL;
2007     t_inputinfo *info;
2008     int         i;
2009     FILE        *fp;
2010     t_commrec   *cr;
2011
2012     /* Print out how long the tuning took */
2013     double      seconds;
2014
2015     static t_filenm fnm[] = {
2016       /* g_tune_pme */
2017       { efOUT, "-p",      "perf",     ffWRITE },
2018       { efLOG, "-err",    "errors",   ffWRITE },
2019       { efTPX, "-so",     "tuned",    ffWRITE },
2020       /* mdrun: */
2021       { efTPX, NULL,      NULL,       ffREAD },
2022       { efTRN, "-o",      NULL,       ffWRITE },
2023       { efXTC, "-x",      NULL,       ffOPTWR },
2024       { efCPT, "-cpi",    NULL,       ffOPTRD },
2025       { efCPT, "-cpo",    NULL,       ffOPTWR },
2026       { efSTO, "-c",      "confout",  ffWRITE },
2027       { efEDR, "-e",      "ener",     ffWRITE },
2028       { efLOG, "-g",      "md",       ffWRITE },
2029       { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
2030       { efXVG, "-field",  "field",    ffOPTWR },
2031       { efXVG, "-table",  "table",    ffOPTRD },
2032       { efXVG, "-tablep", "tablep",   ffOPTRD },
2033       { efXVG, "-tableb", "table",    ffOPTRD },
2034       { efTRX, "-rerun",  "rerun",    ffOPTRD },
2035       { efXVG, "-tpi",    "tpi",      ffOPTWR },
2036       { efXVG, "-tpid",   "tpidist",  ffOPTWR },
2037       { efEDI, "-ei",     "sam",      ffOPTRD },
2038       { efEDO, "-eo",     "sam",      ffOPTWR },
2039       { efGCT, "-j",      "wham",     ffOPTRD },
2040       { efGCT, "-jo",     "bam",      ffOPTWR },
2041       { efXVG, "-ffout",  "gct",      ffOPTWR },
2042       { efXVG, "-devout", "deviatie", ffOPTWR },
2043       { efXVG, "-runav",  "runaver",  ffOPTWR },
2044       { efXVG, "-px",     "pullx",    ffOPTWR },
2045       { efXVG, "-pf",     "pullf",    ffOPTWR },
2046       { efXVG, "-ro",     "rotation", ffOPTWR },
2047       { efLOG, "-ra",     "rotangles",ffOPTWR },
2048       { efLOG, "-rs",     "rotslabs", ffOPTWR },
2049       { efLOG, "-rt",     "rottorque",ffOPTWR },
2050       { efMTX, "-mtx",    "nm",       ffOPTWR },
2051       { efNDX, "-dn",     "dipole",   ffOPTWR },
2052       /* Output files that are deleted after each benchmark run */
2053       { efTRN, "-bo",     "bench",    ffWRITE },
2054       { efXTC, "-bx",     "bench",    ffWRITE },
2055       { efCPT, "-bcpo",   "bench",    ffWRITE },
2056       { efSTO, "-bc",     "bench",    ffWRITE },
2057       { efEDR, "-be",     "bench",    ffWRITE },
2058       { efLOG, "-bg",     "bench",    ffWRITE },
2059       { efEDO, "-beo",    "bench",    ffOPTWR },
2060       { efXVG, "-bdhdl",  "benchdhdl",ffOPTWR },
2061       { efXVG, "-bfield", "benchfld" ,ffOPTWR },
2062       { efXVG, "-btpi",   "benchtpi", ffOPTWR },
2063       { efXVG, "-btpid",  "benchtpid",ffOPTWR },
2064       { efGCT, "-bjo",    "bench",    ffOPTWR },
2065       { efXVG, "-bffout", "benchgct", ffOPTWR },
2066       { efXVG, "-bdevout","benchdev", ffOPTWR },
2067       { efXVG, "-brunav", "benchrnav",ffOPTWR },
2068       { efXVG, "-bpx",    "benchpx",  ffOPTWR },
2069       { efXVG, "-bpf",    "benchpf",  ffOPTWR },
2070       { efXVG, "-bro",    "benchrot", ffOPTWR },
2071       { efLOG, "-bra",    "benchrota",ffOPTWR },
2072       { efLOG, "-brs",    "benchrots",ffOPTWR },
2073       { efLOG, "-brt",    "benchrott",ffOPTWR },
2074       { efMTX, "-bmtx",   "benchn",   ffOPTWR },
2075       { efNDX, "-bdn",    "bench",    ffOPTWR }
2076     };
2077
2078     /* Command line options of mdrun */
2079     gmx_bool bDDBondCheck = TRUE;
2080     gmx_bool bDDBondComm  = TRUE;
2081     gmx_bool bVerbose     = FALSE;
2082     gmx_bool bCompact     = TRUE;
2083     gmx_bool bSepPot      = FALSE;
2084     gmx_bool bRerunVSite  = FALSE;
2085     gmx_bool bIonize      = FALSE;
2086     gmx_bool bConfout     = TRUE;
2087     gmx_bool bReproducible = FALSE;
2088     gmx_bool bThreads     = FALSE;
2089
2090     int  nmultisim=0;
2091     int  nstglobalcomm=-1;
2092     int  repl_ex_nst=0;
2093     int  repl_ex_seed=-1;
2094     int  nstepout=100;
2095     int  nthreads=1;
2096
2097     const char *ddno_opt[ddnoNR+1] =
2098       { NULL, "interleave", "pp_pme", "cartesian", NULL };
2099     const char *dddlb_opt[] =
2100       { NULL, "auto", "no", "yes", NULL };
2101     const char *procstring[] =
2102       { NULL, "-np", "-n", "none", NULL };
2103     const char *npmevalues_opt[] =
2104       { NULL, "auto", "all", "subset", NULL };
2105     real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
2106     char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
2107     char *deffnm=NULL;
2108 #define STD_CPT_PERIOD (15.0)
2109     real cpt_period=STD_CPT_PERIOD,max_hours=-1;
2110     gmx_bool bAppendFiles=TRUE;
2111     gmx_bool bKeepAndNumCPT=FALSE;
2112     gmx_bool bResetCountersHalfWay=FALSE;
2113     output_env_t oenv=NULL;
2114
2115     t_pargs pa[] = {
2116       /***********************/
2117       /* g_tune_pme options: */
2118       /***********************/
2119       { "-np",       FALSE, etINT,  {&nnodes},
2120         "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
2121       { "-npstring", FALSE, etENUM, {procstring},
2122         "Specify the number of processors to $MPIRUN using this string"},
2123       { "-passall",  FALSE, etBOOL, {&bPassAll},
2124         "HIDDENPut arguments unknown to mdrun at the end of the command line. Can e.g. be used for debugging purposes. "},
2125       { "-nt",       FALSE, etINT,  {&nthreads},
2126         "Number of threads to run the tests on (turns MPI & mpirun off)"},
2127       { "-r",        FALSE, etINT,  {&repeats},
2128         "Repeat each test this often" },
2129       { "-max",      FALSE, etREAL, {&maxPMEfraction},
2130         "Max fraction of PME nodes to test with" },
2131       { "-min",      FALSE, etREAL, {&minPMEfraction},
2132         "Min fraction of PME nodes to test with" },
2133       { "-npme",     FALSE, etENUM, {npmevalues_opt},
2134         "Benchmark all possible values for -npme or just the subset that is expected to perform well"},
2135       { "-upfac",    FALSE, etREAL, {&upfac},
2136         "Upper limit for rcoulomb scaling factor (Note that rcoulomb upscaling results in fourier grid downscaling)" },
2137       { "-downfac",  FALSE, etREAL, {&downfac},
2138         "Lower limit for rcoulomb scaling factor" },
2139       { "-ntpr",     FALSE, etINT,  {&ntprs},
2140         "Number of tpr files to benchmark. Create these many files with scaling factors ranging from 1.0 to fac. If < 1, automatically choose the number of tpr files to test" },
2141       { "-four",     FALSE, etREAL, {&fs},
2142         "Use this fourierspacing value instead of the grid found in the tpr input file. (Spacing applies to a scaling factor of 1.0 if multiple tpr files are written)" },
2143       { "-steps",    FALSE, etGMX_LARGE_INT, {&bench_nsteps},
2144         "Take timings for these many steps in the benchmark runs" },
2145       { "-resetstep",FALSE, etINT,  {&presteps},
2146         "Let dlb equilibrate these many steps before timings are taken (reset cycle counters after these many steps)" },
2147       { "-simsteps", FALSE, etGMX_LARGE_INT, {&new_sim_nsteps},
2148         "If non-negative, perform these many steps in the real run (overwrite nsteps from tpr, add cpt steps)" },
2149       { "-launch",   FALSE, etBOOL, {&bLaunch},
2150         "Lauch the real simulation after optimization" },
2151       /******************/
2152       /* mdrun options: */
2153       /******************/
2154       { "-deffnm",    FALSE, etSTR, {&deffnm},
2155           "Set the default filename for all file options at launch time" },
2156       { "-ddorder",   FALSE, etENUM, {ddno_opt},
2157         "DD node order" },
2158       { "-ddcheck",   FALSE, etBOOL, {&bDDBondCheck},
2159         "Check for all bonded interactions with DD" },
2160       { "-ddbondcomm",FALSE, etBOOL, {&bDDBondComm},
2161         "HIDDENUse special bonded atom communication when -rdd > cut-off" },
2162       { "-rdd",       FALSE, etREAL, {&rdd},
2163         "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
2164       { "-rcon",      FALSE, etREAL, {&rconstr},
2165         "Maximum distance for P-LINCS (nm), 0 is estimate" },
2166       { "-dlb",       FALSE, etENUM, {dddlb_opt},
2167         "Dynamic load balancing (with DD)" },
2168       { "-dds",       FALSE, etREAL, {&dlb_scale},
2169         "Minimum allowed dlb scaling of the DD cell size" },
2170       { "-ddcsx",     FALSE, etSTR,  {&ddcsx},
2171         "HIDDENThe DD cell sizes in x" },
2172       { "-ddcsy",     FALSE, etSTR,  {&ddcsy},
2173         "HIDDENThe DD cell sizes in y" },
2174       { "-ddcsz",     FALSE, etSTR,  {&ddcsz},
2175         "HIDDENThe DD cell sizes in z" },
2176       { "-gcom",      FALSE, etINT,  {&nstglobalcomm},
2177         "Global communication frequency" },
2178       { "-v",         FALSE, etBOOL, {&bVerbose},
2179         "Be loud and noisy" },
2180       { "-compact",   FALSE, etBOOL, {&bCompact},
2181         "Write a compact log file" },
2182       { "-seppot",    FALSE, etBOOL, {&bSepPot},
2183         "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
2184       { "-pforce",    FALSE, etREAL, {&pforce},
2185         "Print all forces larger than this (kJ/mol nm)" },
2186       { "-reprod",    FALSE, etBOOL, {&bReproducible},
2187         "Try to avoid optimizations that affect binary reproducibility" },
2188       { "-cpt",       FALSE, etREAL, {&cpt_period},
2189         "Checkpoint interval (minutes)" },
2190       { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
2191         "Keep and number checkpoint files" },
2192       { "-append",    FALSE, etBOOL, {&bAppendFiles},
2193         "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
2194       { "-maxh",      FALSE, etREAL, {&max_hours},
2195         "Terminate after 0.99 times this time (hours)" },
2196       { "-multi",     FALSE, etINT,  {&nmultisim},
2197         "Do multiple simulations in parallel" },
2198       { "-replex",    FALSE, etINT,  {&repl_ex_nst},
2199         "Attempt replica exchange every # steps" },
2200       { "-reseed",    FALSE, etINT,  {&repl_ex_seed},
2201         "Seed for replica exchange, -1 is generate a seed" },
2202       { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
2203         "HIDDENRecalculate virtual site coordinates with -rerun" },
2204       { "-ionize",    FALSE, etBOOL, {&bIonize},
2205         "Do a simulation including the effect of an X-Ray bombardment on your system" },
2206       { "-confout",   FALSE, etBOOL, {&bConfout},
2207         "HIDDENWrite the last configuration with -c and force checkpointing at the last step" },
2208       { "-stepout",   FALSE, etINT,  {&nstepout},
2209         "HIDDENFrequency of writing the remaining runtime" },
2210       { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
2211         "HIDDENReset the cycle counters after half the number of steps or halfway -maxh (launch only)" }
2212     };
2213
2214
2215 #define NFILE asize(fnm)
2216
2217     CopyRight(stderr,argv[0]);
2218
2219     seconds = gettime();
2220
2221     parse_common_args(&argc,argv,PCA_NOEXIT_ON_ARGS,
2222                       NFILE,fnm,asize(pa),pa,asize(desc),desc,
2223                       0,NULL,&oenv);
2224
2225     /* Store the remaining unparsed command line entries in a string */
2226     snew(ExtraArgs, 1);
2227     ExtraArgs[0] = '\0';
2228     for (i=1; i<argc; i++) /* argc will now be 1 if everything was understood */
2229     {
2230         add_to_string(&ExtraArgs, argv[i]);
2231         add_to_string(&ExtraArgs, " ");
2232     }
2233     if ( !bPassAll && (ExtraArgs[0] != '\0') )
2234     {
2235         fprintf(stderr, "\nWARNING: The following arguments you provided have no effect:\n"
2236                         "%s\n"
2237                         "Use the -passall option to force them to appear on the command lines\n"
2238                         "for the benchmark simulations%s.\n\n",
2239                         ExtraArgs, bLaunch? " and at launch time" : "");
2240     }
2241
2242     if (opt2parg_bSet("-nt",asize(pa),pa))
2243     {
2244         bThreads=TRUE;
2245         if (opt2parg_bSet("-npstring",asize(pa),pa))
2246             fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2247
2248         if (nnodes > 1)
2249             gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2250         /* and now we just set this; a bit of an ugly hack*/
2251         nnodes=nthreads;
2252     }
2253     /* Automatically set -beo options if -eo is set etc. */
2254     couple_files_options(NFILE,fnm);
2255
2256     /* Construct the command line arguments for benchmark runs
2257      * as well as for the simulation run
2258      */
2259     create_command_line_snippets(bThreads,presteps,NFILE,fnm,asize(pa),pa,procstring[0],
2260                                  &cmd_np, &cmd_args_bench, &cmd_args_launch,
2261                                  bPassAll? ExtraArgs : (char *)"");
2262
2263     /* Read in checkpoint file if requested */
2264     sim_part = 1;
2265     if(opt2bSet("-cpi",NFILE,fnm))
2266     {
2267         snew(cr,1);
2268         cr->duty=DUTY_PP; /* makes the following routine happy */
2269         read_checkpoint_simulation_part(opt2fn("-cpi",NFILE,fnm),
2270                                         &sim_part,&cpt_steps,cr,
2271                                         FALSE,NFILE,fnm,NULL,NULL);
2272         sfree(cr);
2273         sim_part++;
2274         /* sim_part will now be 1 if no checkpoint file was found */
2275         if (sim_part<=1)
2276             gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi",
2277                                                                      NFILE,
2278                                                                      fnm));
2279     }
2280
2281     /* Open performance output file and write header info */
2282     fp = ffopen(opt2fn("-p",NFILE,fnm),"w");
2283
2284     /* Make a quick consistency check of command line parameters */
2285     check_input(nnodes, repeats, &ntprs, &upfac, &downfac, maxPMEfraction,
2286                 minPMEfraction, fs, bench_nsteps, fnm, NFILE, sim_part, presteps,
2287                 asize(pa),pa);
2288
2289     /* Determine max and min number of PME nodes to test: */
2290     if (nnodes > 2)
2291     {
2292         maxPMEnodes = floor(maxPMEfraction*nnodes);
2293         minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2294         fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2295         if (maxPMEnodes != minPMEnodes)
2296             fprintf(stdout, "- %d ", maxPMEnodes);
2297         fprintf(stdout, "PME-only nodes.\n  Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2298     }
2299     else
2300     {
2301         maxPMEnodes = 0;
2302         minPMEnodes = 0;
2303     }
2304
2305     /* Get the commands we need to set up the runs from environment variables */
2306     get_program_paths(bThreads, &cmd_mpirun, cmd_np, &cmd_mdrun, repeats);
2307
2308     /* Print some header info to file */
2309     sep_line(fp);
2310     fprintf(fp, "\n      P E R F O R M A N C E   R E S U L T S\n");
2311     sep_line(fp);
2312     fprintf(fp, "%s for Gromacs %s\n", ShortProgram(),GromacsVersion());
2313     if (!bThreads)
2314     {
2315         fprintf(fp, "Number of nodes         : %d\n", nnodes);
2316         fprintf(fp, "The mpirun command is   : %s\n", cmd_mpirun);
2317         if ( strcmp(procstring[0], "none") != 0)
2318             fprintf(fp, "Passing # of nodes via  : %s\n", procstring[0]);
2319         else
2320             fprintf(fp, "Not setting number of nodes in system call\n");
2321     }
2322     else
2323         fprintf(fp, "Number of threads       : %d\n", nnodes);
2324
2325     fprintf(fp, "The mdrun  command is   : %s\n", cmd_mdrun);
2326     fprintf(fp, "mdrun args benchmarks   : %s\n", cmd_args_bench);
2327     fprintf(fp, "Benchmark steps         : ");
2328     fprintf(fp, gmx_large_int_pfmt, bench_nsteps);
2329     fprintf(fp, "\n");
2330     fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2331     if (sim_part > 1)
2332     {
2333         fprintf(fp, "Checkpoint time step    : ");
2334         fprintf(fp, gmx_large_int_pfmt, cpt_steps);
2335         fprintf(fp, "\n");
2336     }
2337     if (bLaunch)
2338         fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2339     if (!bPassAll && ExtraArgs[0] != '\0')
2340         fprintf(fp, "Unused arguments        : %s\n", ExtraArgs);
2341     if (new_sim_nsteps >= 0)
2342     {
2343         bOverwrite = TRUE;
2344         fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so",NFILE,fnm));
2345         fprintf(stderr, gmx_large_int_pfmt, new_sim_nsteps+cpt_steps);
2346         fprintf(stderr, " steps.\n");
2347         fprintf(fp, "Simulation steps        : ");
2348         fprintf(fp, gmx_large_int_pfmt, new_sim_nsteps);
2349         fprintf(fp, "\n");
2350     }
2351     if (repeats > 1)
2352         fprintf(fp, "Repeats for each test   : %d\n", repeats);
2353
2354     if (fs > 0.0)
2355     {
2356         fprintf(fp, "Requested grid spacing  : %f (This will be the grid spacing at a scaling factor of 1.0)\n", fs);
2357     }
2358
2359     fprintf(fp, "Input file              : %s\n", opt2fn("-s",NFILE,fnm));
2360
2361     /* Allocate memory for the inputinfo struct: */
2362     snew(info, 1);
2363     info->nr_inputfiles = ntprs;
2364     for (i=0; i<ntprs; i++)
2365     {
2366         snew(info->rcoulomb , ntprs);
2367         snew(info->rvdw     , ntprs);
2368         snew(info->rlist    , ntprs);
2369         snew(info->rlistlong, ntprs);
2370         snew(info->nkx      , ntprs);
2371         snew(info->nky      , ntprs);
2372         snew(info->nkz      , ntprs);
2373         snew(info->fsx      , ntprs);
2374         snew(info->fsy      , ntprs);
2375         snew(info->fsz      , ntprs);
2376     }
2377     /* Make alternative tpr files to test: */
2378     snew(tpr_names, ntprs);
2379     for (i=0; i<ntprs; i++)
2380         snew(tpr_names[i], STRLEN);
2381
2382     make_benchmark_tprs(opt2fn("-s",NFILE,fnm), tpr_names, bench_nsteps+presteps,
2383             cpt_steps, upfac, downfac, ntprs, fs, info, fp);
2384
2385
2386     /********************************************************************************/
2387     /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats  */
2388     /********************************************************************************/
2389     snew(perfdata, ntprs);
2390     do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npmevalues_opt[0], perfdata, &pmeentries,
2391                  repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2392                  cmd_args_bench, fnm, NFILE, sim_part, presteps, cpt_steps);
2393
2394     fprintf(fp, "\nTuning took%8.1f minutes.\n", (gettime()-seconds)/60.0);
2395
2396     /* Analyse the results and give a suggestion for optimal settings: */
2397     bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2398                             repeats, info, &best_tpr, &best_npme);
2399
2400     /* Take the best-performing tpr file and enlarge nsteps to original value */
2401     if ( bKeepTPR && !bOverwrite && !(fs > 0.0) )
2402     {
2403         simulation_tpr = opt2fn("-s",NFILE,fnm);
2404     }
2405     else
2406     {
2407         simulation_tpr = opt2fn("-so",NFILE,fnm);
2408         modify_PMEsettings(bOverwrite? (new_sim_nsteps+cpt_steps) :
2409                            info->orig_sim_steps, tpr_names[best_tpr],
2410                            simulation_tpr);
2411     }
2412
2413     /* Now start the real simulation if the user requested it ... */
2414     launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2415                       cmd_args_launch, simulation_tpr, nnodes, best_npme);
2416     ffclose(fp);
2417
2418     /* ... or simply print the performance results to screen: */
2419     if (!bLaunch)
2420         finalize(opt2fn("-p", NFILE, fnm));
2421
2422     return 0;
2423 }