src/tools/gmx_pme_error.c

   1 /* $Id: gmx_tune_pme.c 9 2009-08-11 09:43:30Z dommert $
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  10  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  11  * Copyright (c) 2001-2008, The GROMACS development team,
  12  * check out http://www.gromacs.org for more information.
  13
  14  * This program is free software; you can redistribute it and/or
  15  * modify it under the terms of the GNU General Public License
  16  * as published by the Free Software Foundation; either version 2
  17  * of the License, or (at your option) any later version.
  18  *
  19  * If you want to redistribute modifications, please consider that
  20  * scientific software is very special. Version control is crucial -
  21  * bugs must be traceable. We will be happy to consider code for
  22  * inclusion in the official distribution, but derived work must not
  23  * be called official GROMACS. Details are found in the README & COPYING
  24  * files - if they are missing, get the official version at www.gromacs.org.
  25  *
  26  * To help us fund GROMACS development, we humbly ask that you cite
  27  * the papers on the package - you can find them in the top README file.
  28  *
  29  * For more info, check our website at http://www.gromacs.org
  30  *
  31  * And Hey:
  32  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  33  */
  34 #include "statutil.h"
  35 #include "typedefs.h"
  36 #include "smalloc.h"
  37 #include "vec.h"
  38 #include "copyrite.h"
  39 #include "tpxio.h"
  40 #include "string2.h"
  41 #include "readinp.h"
  42 #include "calcgrid.h"
  43 #include "checkpoint.h"
  44 #include "gmx_ana.h"
  45 #include "gmx_random.h"
  46 #include "physics.h"
  47 #include "mdatoms.h"
  48 #include "coulomb.h"
  49 #include "mtop_util.h"
  50 #include "network.h"
  51 #include "main.h"
  52
  53 /* We use the same defines as in mvdata.c here */
  54 #define  block_bc(cr,   d) gmx_bcast(     sizeof(d),     &(d),(cr))
  55 #define nblock_bc(cr,nr,d) gmx_bcast((nr)*sizeof((d)[0]), (d),(cr))
  56 #define   snew_bc(cr,d,nr) { if (!MASTER(cr)) snew((d),(nr)); }
  57 /* #define TAKETIME */
  58 /* #define DEBUG  */
  59 enum {
  60   ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
  61 };
  62
  63 /* Enum for situations that can occur during log file parsing */
  64 enum {
  65     eParselogOK,
  66     eParselogNotFound,
  67     eParselogNoPerfData,
  68     eParselogTerm,
  69     eParselogResetProblem,
  70     eParselogNr
  71 };
  72
  73
  74 typedef struct
  75 {
  76     int  nPMEnodes;       /* number of PME only nodes used in this test */
  77     int  nx, ny, nz;      /* DD grid */
  78     int  guessPME;        /* if nPMEnodes == -1, this is the guessed number of PME nodes */
  79     float *Gcycles;        /* This can contain more than one value if doing multiple tests */
  80     float Gcycles_Av;
  81     float *ns_per_day;
  82     float ns_per_day_Av;
  83     float *PME_f_load;     /* PME mesh/force load average*/
  84     float PME_f_load_Av;   /* Average average ;) ... */
  85     char *mdrun_cmd_line; /* Mdrun command line used for this test */
  86 } t_perf;
  87
  88
  89 typedef struct
  90 {
  91     gmx_large_int_t orig_sim_steps;  /* Number of steps to be done in the real simulation  */
  92     int  n_entries;             /* Number of entries in arrays                        */
  93     real volume;                /* The volume of the box                              */
  94     matrix recipbox;            /* The reciprocal box                                 */
  95     int  natoms;                /* The number of atoms in the MD system               */
  96     real *fac;                  /* The scaling factor                                 */
  97     real *rcoulomb;             /* The coulomb radii [0...nr_inputfiles]              */
  98     real *rvdw;                 /* The vdW radii                                      */
  99     int  *nkx, *nky, *nkz;      /* Number of k vectors in each spatial dimension      */
 100     real *fourier_sp;           /* Fourierspacing                                     */
 101     real *ewald_rtol;           /* Real space tolerance for Ewald, determines         */
 102                                 /* the real/reciprocal space relative weight          */
 103     real *ewald_beta;           /* Splitting parameter [1/nm]                         */
 104     real fracself;              /* fraction of particles for SI error                 */
 105     real q2all;                 /* sum ( q ^2 )                                       */
 106     real q2allnr;               /* nr of charges                                      */
 107     int  *pme_order;            /* Interpolation order for PME (bsplines)             */
 108     char **fn_out;              /* Name of the output tpr file                        */
 109     real *e_dir;                /* Direct space part of PME error with these settings */
 110     real *e_rec;                /* Reciprocal space part of PME error                 */
 111     gmx_bool bTUNE;                 /* flag for tuning */
 112 } t_inputinfo;
 113
 114
 115 /* Returns TRUE when atom is charged */
 116 static gmx_bool is_charge(real charge)
 117 {
 118     if (charge*charge > GMX_REAL_EPS)
 119         return TRUE;
 120     else
 121         return FALSE;
 122 }
 123
 124
 125 /* calculate charge density */
 126 static void calc_q2all(
 127         gmx_mtop_t *mtop,   /* molecular topology */
 128         real *q2all, real *q2allnr)
 129 {
 130     int imol,iatom;  /* indices for loops */
 131     real q2_all=0;   /* Sum of squared charges */
 132     int  nrq_mol;    /* Number of charges in a single molecule */
 133     int  nrq_all;    /* Total number of charges in the MD system */
 134     real nrq_all_r;  /* No of charges in real format */
 135     real qi,q2_mol;
 136     gmx_moltype_t *molecule;
 137     gmx_molblock_t *molblock;
 138
 139 #ifdef DEBUG
 140         fprintf(stderr, "\nCharge density:\n");
 141 #endif
 142     q2_all = 0.0;  /* total q squared */
 143     nrq_all = 0;   /* total number of charges in the system */
 144     for (imol=0; imol<mtop->nmolblock; imol++) /* Loop over molecule types */
 145     {
 146         q2_mol=0.0; /* q squared value of this molecule */
 147         nrq_mol=0;  /* number of charges this molecule carries */
 148         molecule = &(mtop->moltype[imol]);
 149         molblock = &(mtop->molblock[imol]);
 150         for (iatom=0; iatom<molblock->natoms_mol; iatom++) /* Loop over atoms in this molecule */
 151         {
 152             qi = molecule->atoms.atom[iatom].q; /* Charge of this atom */
 153             /* Is this charge worth to be considered? */
 154             if (is_charge(qi))
 155             {
 156                 q2_mol += qi*qi;
 157                 nrq_mol++;
 158             }
 159         }
 160         /* Multiply with the number of molecules present of this type and add */
 161         q2_all  += q2_mol*molblock->nmol;
 162         nrq_all += nrq_mol*molblock->nmol;
 163 #ifdef DEBUG
 164         fprintf(stderr, "Molecule %2d (%5d atoms) q2_mol=%10.3e nr.mol.charges=%5d (%6dx)  q2_all=%10.3e  tot.charges=%d\n",
 165                 imol,molblock->natoms_mol,q2_mol,nrq_mol,molblock->nmol,q2_all,nrq_all);
 166 #endif
 167     }
 168     nrq_all_r = nrq_all;
 169
 170     *q2all=q2_all;
 171     *q2allnr=nrq_all;
 172
 173 }
 174
 175
 176 /* Estimate the direct space part error of the SPME Ewald sum */
 177 static real estimate_direct(
 178         t_inputinfo *info
 179         )
 180 {
 181     real e_dir=0;    /* Error estimate */
 182     real beta=0;     /* Splitting parameter (1/nm) */
 183     real r_coulomb=0;  /* Cut-off in direct space */
 184
 185
 186     beta      = info->ewald_beta[0];
 187     r_coulomb = info->rcoulomb[0];
 188
 189     e_dir  = 2.0 * info->q2all * gmx_invsqrt( info->q2allnr  *  r_coulomb * info->volume );
 190     e_dir *= exp (-beta*beta*r_coulomb*r_coulomb);
 191
 192     return ONE_4PI_EPS0*e_dir;
 193 }
 194
 195 #define SUMORDER 6
 196
 197 /* the following 4 functions determine polynomials required for the reciprocal error estimate */
 198
 199 static inline real eps_poly1(
 200         real m,          /* grid coordinate in certain direction */
 201         real K,          /* grid size in corresponding direction */
 202         real n)          /* spline interpolation order of the SPME */
 203 {
 204     int i;
 205     real nom=0;  /* nominator */
 206     real denom=0; /* denominator */
 207     real tmp=0;
 208
 209     if ( m == 0.0 )
 210         return 0.0 ;
 211
 212     for(i=-SUMORDER ; i<0 ; i++)
 213     {
 214         tmp=m / K + i;
 215         tmp*=2.0*M_PI;
 216         nom+=pow( tmp , -n );
 217     }
 218
 219     for(i=SUMORDER ; i>0 ; i--)
 220     {
 221         tmp=m / K + i;
 222         tmp*=2.0*M_PI;
 223         nom+=pow( tmp , -n );
 224     }
 225
 226     tmp=m / K;
 227     tmp*=2.0*M_PI;
 228     denom=pow( tmp , -n )+nom;
 229
 230     return -nom/denom;
 231
 232 }
 233
 234 static inline real eps_poly2(
 235         real m,          /* grid coordinate in certain direction */
 236         real K,          /* grid size in corresponding direction */
 237         real n)          /* spline interpolation order of the SPME */
 238 {
 239     int i;
 240     real nom=0;  /* nominator */
 241     real denom=0; /* denominator */
 242     real tmp=0;
 243
 244     if ( m == 0.0 )
 245         return 0.0 ;
 246
 247     for(i=-SUMORDER ; i<0 ; i++)
 248     {
 249         tmp=m / K + i;
 250         tmp*=2.0*M_PI;
 251         nom+=pow( tmp , -2.0*n );
 252     }
 253
 254     for(i=SUMORDER ; i>0 ; i--)
 255     {
 256         tmp=m / K + i;
 257         tmp*=2.0*M_PI;
 258         nom+=pow( tmp , -2.0*n );
 259     }
 260
 261     for(i=-SUMORDER ; i<SUMORDER+1 ; i++)
 262     {
 263         tmp=m / K + i;
 264         tmp*=2.0*M_PI;
 265         denom+=pow( tmp , -n );
 266     }
 267     tmp=eps_poly1(m,K,n);
 268     return nom / denom / denom + tmp*tmp ;
 269
 270 }
 271
 272 static inline real eps_poly3(
 273         real m,          /* grid coordinate in certain direction */
 274         real K,          /* grid size in corresponding direction */
 275         real n)          /* spline interpolation order of the SPME */
 276 {
 277     int i;
 278     real nom=0;  /* nominator */
 279     real denom=0; /* denominator */
 280     real tmp=0;
 281
 282     if ( m == 0.0 )
 283         return 0.0 ;
 284
 285     for(i=-SUMORDER ; i<0 ; i++)
 286     {
 287         tmp=m / K + i;
 288         tmp*=2.0*M_PI;
 289         nom+= i * pow( tmp , -2.0*n );
 290     }
 291
 292     for(i=SUMORDER ; i>0 ; i--)
 293     {
 294         tmp=m / K + i;
 295         tmp*=2.0*M_PI;
 296         nom+= i * pow( tmp , -2.0*n );
 297     }
 298
 299     for(i=-SUMORDER ; i<SUMORDER+1 ; i++)
 300     {
 301         tmp=m / K + i;
 302         tmp*=2.0*M_PI;
 303         denom+=pow( tmp , -n );
 304     }
 305
 306     return 2.0 * M_PI * nom / denom / denom;
 307
 308 }
 309
 310 static inline real eps_poly4(
 311         real m,          /* grid coordinate in certain direction */
 312         real K,          /* grid size in corresponding direction */
 313         real n)          /* spline interpolation order of the SPME */
 314 {
 315     int i;
 316     real nom=0;  /* nominator */
 317     real denom=0; /* denominator */
 318     real tmp=0;
 319
 320     if ( m == 0.0 )
 321         return 0.0 ;
 322
 323     for(i=-SUMORDER ; i<0 ; i++)
 324     {
 325         tmp=m / K + i;
 326         tmp*=2.0*M_PI;
 327         nom+= i * i * pow( tmp , -2.0*n );
 328     }
 329
 330     for(i=SUMORDER ; i>0 ; i--)
 331     {
 332         tmp=m / K + i;
 333         tmp*=2.0*M_PI;
 334         nom+= i * i * pow( tmp , -2.0*n );
 335     }
 336
 337     for(i=-SUMORDER ; i<SUMORDER+1 ; i++)
 338     {
 339         tmp=m / K + i;
 340         tmp*=2.0*M_PI;
 341         denom+=pow( tmp , -n );
 342     }
 343
 344     return 4.0 * M_PI * M_PI * nom / denom / denom;
 345
 346 }
 347
 348 static inline real eps_self(
 349         real m,        /* grid coordinate in certain direction */
 350         real K,        /* grid size in corresponding direction */
 351         rvec rboxv,   /* reciprocal box vector */
 352         real n,        /* spline interpolation order of the SPME */
 353         rvec x)       /* coordinate of charge */
 354 {
 355     int i;
 356     real tmp=0; /* temporary variables for computations */
 357     real tmp1=0; /* temporary variables for computations */
 358     real tmp2=0; /* temporary variables for computations */
 359     real rcoord=0; /* coordinate in certain reciprocal space direction */
 360     real nom=0; /* nominator */
 361     real denom=0; /* denominator */
 362
 363
 364     if ( m == 0.0 )
 365         return 0.0 ;
 366
 367     rcoord=iprod(rboxv,x);
 368
 369
 370     for(i=-SUMORDER;i<0;i++)
 371     {
 372         tmp=-sin(2.0 * M_PI * i * K * rcoord);
 373         tmp1=2.0 * M_PI * m / K + 2.0 * M_PI * i;
 374         tmp2=pow(tmp1,-1.0*n);
 375         nom+=tmp * tmp2 * i;
 376         denom+=tmp2;
 377     }
 378
 379     for(i=SUMORDER;i>0;i--)
 380     {
 381         tmp=-sin(2.0 * M_PI * i * K * rcoord);
 382         tmp1=2.0 * M_PI * m / K + 2.0 * M_PI * i;
 383         tmp2=pow(tmp1,-1.0*n);
 384         nom+=tmp * tmp2 * i;
 385         denom+=tmp2;
 386     }
 387
 388
 389     tmp=2.0 * M_PI * m / K;
 390     tmp1=pow(tmp,-1.0*n);
 391     denom+=tmp1;
 392
 393    return 2.0 * M_PI * nom / denom * K  ;
 394
 395 }
 396
 397 #undef SUMORDER
 398
 399 /* The following routine is just a copy from pme.c */
 400
 401 static void calc_recipbox(matrix box,matrix recipbox)
 402 {
 403   /* Save some time by assuming upper right part is zero */
 404
 405   real tmp=1.0/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 406
 407   recipbox[XX][XX]=box[YY][YY]*box[ZZ][ZZ]*tmp;
 408   recipbox[XX][YY]=0;
 409   recipbox[XX][ZZ]=0;
 410   recipbox[YY][XX]=-box[YY][XX]*box[ZZ][ZZ]*tmp;
 411   recipbox[YY][YY]=box[XX][XX]*box[ZZ][ZZ]*tmp;
 412   recipbox[YY][ZZ]=0;
 413   recipbox[ZZ][XX]=(box[YY][XX]*box[ZZ][YY]-box[YY][YY]*box[ZZ][XX])*tmp;
 414   recipbox[ZZ][YY]=-box[ZZ][YY]*box[XX][XX]*tmp;
 415   recipbox[ZZ][ZZ]=box[XX][XX]*box[YY][YY]*tmp;
 416 }
 417
 418
 419 /* Estimate the reciprocal space part error of the SPME Ewald sum. */
 420 static real estimate_reciprocal(
 421         t_inputinfo *info,
 422         rvec x[],           /* array of particles */
 423         real q[],           /* array of charges */
 424         int nr,             /* number of charges = size of the charge array */
 425         FILE *fp_out,
 426         gmx_bool bVerbose,
 427         unsigned int seed,  /* The seed for the random number generator */
 428         int *nsamples,      /* Return the number of samples used if Monte Carlo
 429                              * algorithm is used for self energy error estimate */
 430         t_commrec *cr)
 431 {
 432     real e_rec=0;   /* reciprocal error estimate */
 433     real e_rec1=0;  /* Error estimate term 1*/
 434     real e_rec2=0;  /* Error estimate term 2*/
 435     real e_rec3=0;  /* Error estimate term 3 */
 436     real e_rec3x=0; /* part of Error estimate term 3 in x */
 437     real e_rec3y=0; /* part of Error estimate term 3 in y */
 438     real e_rec3z=0; /* part of Error estimate term 3 in z */
 439     int i,ci;
 440     int nx,ny,nz;   /* grid coordinates */
 441     real q2_all=0;  /* sum of squared charges */
 442     rvec gridpx;    /* reciprocal grid point in x direction*/
 443     rvec gridpxy;   /* reciprocal grid point in x and y direction*/
 444     rvec gridp;     /* complete reciprocal grid point in 3 directions*/
 445     rvec tmpvec;    /* template to create points from basis vectors */
 446     rvec tmpvec2;   /* template to create points from basis vectors */
 447     real coeff=0;   /* variable to compute coefficients of the error estimate */
 448     real coeff2=0;   /* variable to compute coefficients of the error estimate */
 449     real tmp=0;     /* variables to compute different factors from vectors */
 450     real tmp1=0;
 451     real tmp2=0;
 452     gmx_bool bFraction;
 453
 454     /* Random number generator */
 455     gmx_rng_t rng=NULL;
 456     int *numbers=NULL;
 457
 458     /* Index variables for parallel work distribution */
 459     int startglobal,stopglobal;
 460     int startlocal, stoplocal;
 461     int x_per_core;
 462     int xtot;
 463
 464 #ifdef TAKETIME
 465     double t0=0.0;
 466     double t1=0.0;
 467 #endif
 468
 469     rng=gmx_rng_init(seed);
 470
 471     clear_rvec(gridpx);
 472     clear_rvec(gridpxy);
 473     clear_rvec(gridp);
 474     clear_rvec(tmpvec);
 475     clear_rvec(tmpvec2);
 476
 477     for(i=0;i<nr;i++)
 478     {
 479         q2_all += q[i]*q[i];
 480     }
 481
 482     /* Calculate indices for work distribution */
 483     startglobal=-info->nkx[0]/2;
 484     stopglobal = info->nkx[0]/2;
 485     xtot = stopglobal*2+1;
 486     if (PAR(cr))
 487     {
 488         x_per_core = ceil((real)xtot / (real)cr->nnodes);
 489         startlocal = startglobal + x_per_core*cr->nodeid;
 490         stoplocal = startlocal + x_per_core -1;
 491         if (stoplocal > stopglobal)
 492              stoplocal = stopglobal;
 493     }
 494     else
 495     {
 496         startlocal = startglobal;
 497         stoplocal  = stopglobal;
 498         x_per_core = xtot;
 499     }
 500 /*
 501 #ifdef GMX_MPI
 502     MPI_Barrier(MPI_COMM_WORLD);
 503 #endif
 504 */
 505
 506 #ifdef TAKETIME
 507     if (MASTER(cr))
 508         t0 = MPI_Wtime();
 509 #endif
 510
 511     if (MASTER(cr)){
 512
 513         fprintf(stderr, "Calculating reciprocal error part 1 ...");
 514
 515     }
 516
 517     for(nx=startlocal; nx<=stoplocal; nx++)
 518     {
 519         svmul(nx,info->recipbox[XX],gridpx);
 520         for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++)
 521         {
 522             svmul(ny,info->recipbox[YY],tmpvec);
 523             rvec_add(gridpx,tmpvec,gridpxy);
 524             for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++)
 525             {
 526                 if (  0 == nx &&  0 == ny &&  0 == nz )
 527                     continue;
 528                 svmul(nz,info->recipbox[ZZ],tmpvec);
 529                 rvec_add(gridpxy,tmpvec,gridp);
 530                 tmp=norm2(gridp);
 531                 coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] ) ;
 532                 coeff/= 2.0 * M_PI * info->volume * tmp;
 533                 coeff2=tmp ;
 534
 535
 536                 tmp=eps_poly2(nx,info->nkx[0],info->pme_order[0]);
 537                 tmp+=eps_poly2(ny,info->nkx[0],info->pme_order[0]);
 538                 tmp+=eps_poly2(nz,info->nkx[0],info->pme_order[0]);
 539
 540                 tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
 541                 tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]);
 542
 543                 tmp+=2.0 * tmp1 * tmp2;
 544
 545                 tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
 546                 tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]);
 547
 548                 tmp+=2.0 * tmp1 * tmp2;
 549
 550                 tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
 551                 tmp2=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
 552
 553                 tmp+=2.0 * tmp1 * tmp2;
 554
 555                 tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
 556                 tmp1+=eps_poly1(ny,info->nky[0],info->pme_order[0]);
 557                 tmp1+=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
 558
 559                 tmp+= tmp1 * tmp1;
 560
 561                 e_rec1+= 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp  * q2_all * q2_all / nr ;
 562
 563                 tmp1=eps_poly3(nx,info->nkx[0],info->pme_order[0]);
 564                 tmp1*=info->nkx[0];
 565                 tmp2=iprod(gridp,info->recipbox[XX]);
 566
 567                 tmp=tmp1*tmp2;
 568
 569                 tmp1=eps_poly3(ny,info->nky[0],info->pme_order[0]);
 570                 tmp1*=info->nky[0];
 571                 tmp2=iprod(gridp,info->recipbox[YY]);
 572
 573                 tmp+=tmp1*tmp2;
 574
 575                 tmp1=eps_poly3(nz,info->nkz[0],info->pme_order[0]);
 576                 tmp1*=info->nkz[0];
 577                 tmp2=iprod(gridp,info->recipbox[ZZ]);
 578
 579                 tmp+=tmp1*tmp2;
 580
 581                 tmp*=4.0 * M_PI;
 582
 583                 tmp1=eps_poly4(nx,info->nkx[0],info->pme_order[0]);
 584                 tmp1*=norm2(info->recipbox[XX]);
 585                 tmp1*=info->nkx[0] * info->nkx[0];
 586
 587                 tmp+=tmp1;
 588
 589                 tmp1=eps_poly4(ny,info->nky[0],info->pme_order[0]);
 590                 tmp1*=norm2(info->recipbox[YY]);
 591                 tmp1*=info->nky[0] * info->nky[0];
 592
 593                 tmp+=tmp1;
 594
 595                 tmp1=eps_poly4(nz,info->nkz[0],info->pme_order[0]);
 596                 tmp1*=norm2(info->recipbox[ZZ]);
 597                 tmp1*=info->nkz[0] * info->nkz[0];
 598
 599                 tmp+=tmp1;
 600
 601                 e_rec2+= 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr ;
 602
 603             }
 604         }
 605         if (MASTER(cr))
 606             fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%", 100.0*(nx-startlocal+1)/(x_per_core));
 607
 608     }
 609
 610     if (MASTER(cr))
 611         fprintf(stderr, "\n");
 612
 613     /* Use just a fraction of all charges to estimate the self energy error term? */
 614     bFraction =  (info->fracself > 0.0) && (info->fracself < 1.0);
 615
 616     if (bFraction)
 617     {
 618         /* Here xtot is the number of samples taken for the Monte Carlo calculation
 619          * of the average of term IV of equation 35 in Wang2010. Round up to a
 620          * number of samples that is divisible by the number of nodes */
 621         x_per_core  = ceil(info->fracself * nr / (real)cr->nnodes);
 622         xtot = x_per_core * cr->nnodes;
 623     }
 624     else
 625     {
 626         /* In this case we use all nr particle positions */
 627         xtot = nr;
 628         x_per_core = ceil( (real)xtot / (real)cr->nnodes );
 629     }
 630
 631     startlocal = x_per_core *  cr->nodeid;
 632     stoplocal  = min(startlocal + x_per_core, xtot);  /* min needed if xtot == nr */
 633
 634     if (bFraction)
 635     {
 636         /* Make shure we get identical results in serial and parallel. Therefore,
 637          * take the sample indices from a single, global random number array that
 638          * is constructed on the master node and that only depends on the seed */
 639         snew(numbers, xtot);
 640         if (MASTER(cr))
 641         {
 642             for (i=0; i<xtot; i++)
 643             {
 644                 numbers[i] = floor(gmx_rng_uniform_real(rng) * nr );
 645             }
 646         }
 647         /* Broadcast the random number array to the other nodes */
 648         if (PAR(cr))
 649         {
 650             nblock_bc(cr,xtot,numbers);
 651         }
 652
 653         if (bVerbose && MASTER(cr))
 654         {
 655             fprintf(stdout, "Using %d sample%s to approximate the self interaction error term",
 656                     xtot, xtot==1?"":"s");
 657             if (PAR(cr))
 658                 fprintf(stdout, " (%d sample%s per node)", x_per_core, x_per_core==1?"":"s");
 659             fprintf(stdout, ".\n");
 660         }
 661     }
 662
 663     /* Return the number of positions used for the Monte Carlo algorithm */
 664     *nsamples = xtot;
 665
 666     for(i=startlocal;i<stoplocal;i++)
 667     {
 668         e_rec3x=0;
 669         e_rec3y=0;
 670         e_rec3z=0;
 671
 672         if (bFraction)
 673         {
 674             /* Randomly pick a charge */
 675             ci = numbers[i];
 676         }
 677         else
 678         {
 679             /* Use all charges */
 680             ci = i;
 681         }
 682
 683         /* for(nx=startlocal; nx<=stoplocal; nx++)*/
 684         for(nx=-info->nkx[0]/2; nx<info->nkx[0]/2+1; nx++)
 685         {
 686             svmul(nx,info->recipbox[XX],gridpx);
 687             for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++)
 688             {
 689                 svmul(ny,info->recipbox[YY],tmpvec);
 690                 rvec_add(gridpx,tmpvec,gridpxy);
 691                 for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++)
 692                 {
 693
 694                     if (  0 == nx && 0 == ny && 0 == nz)
 695                         continue;
 696
 697                     svmul(nz,info->recipbox[ZZ],tmpvec);
 698                     rvec_add(gridpxy,tmpvec,gridp);
 699                     tmp=norm2(gridp);
 700                     coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
 701                     coeff/= tmp ;
 702                     e_rec3x+=coeff*eps_self(nx,info->nkx[0],info->recipbox[XX],info->pme_order[0],x[ci]);
 703                     e_rec3y+=coeff*eps_self(ny,info->nky[0],info->recipbox[YY],info->pme_order[0],x[ci]);
 704                     e_rec3z+=coeff*eps_self(nz,info->nkz[0],info->recipbox[ZZ],info->pme_order[0],x[ci]);
 705
 706                 }
 707             }
 708         }
 709
 710         clear_rvec(tmpvec2);
 711
 712         svmul(e_rec3x,info->recipbox[XX],tmpvec);
 713         rvec_inc(tmpvec2,tmpvec);
 714         svmul(e_rec3y,info->recipbox[YY],tmpvec);
 715         rvec_inc(tmpvec2,tmpvec);
 716         svmul(e_rec3z,info->recipbox[ZZ],tmpvec);
 717         rvec_inc(tmpvec2,tmpvec);
 718
 719         e_rec3 += q[ci]*q[ci]*q[ci]*q[ci]*norm2(tmpvec2) / ( xtot * M_PI * info->volume * M_PI * info->volume);
 720         if (MASTER(cr)){
 721             fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%",
 722                     100.0*(i+1)/stoplocal);
 723
 724         }
 725     }
 726
 727     if (MASTER(cr))
 728         fprintf(stderr, "\n");
 729
 730
 731 #ifdef TAKETIME
 732     if (MASTER(cr))
 733     {
 734         t1= MPI_Wtime() - t0;
 735         fprintf(fp_out, "Recip. err. est. took   : %lf s\n", t1);
 736     }
 737 #endif
 738
 739 #ifdef DEBUG
 740     if (PAR(cr))
 741     {
 742         fprintf(stderr, "Node %3d: nx=[%3d...%3d]  e_rec3=%e\n",
 743                 cr->nodeid, startlocal, stoplocal, e_rec3);
 744     }
 745 #endif
 746
 747     if (PAR(cr))
 748     {
 749         gmx_sum(1,&e_rec1,cr);
 750         gmx_sum(1,&e_rec2,cr);
 751         gmx_sum(1,&e_rec3,cr);
 752     }
 753
 754     /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ;
 755        e_rec2*=  q2_all / M_PI / M_PI / info->volume / info->volume / nr ;
 756        e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ;
 757      */
 758     e_rec=sqrt(e_rec1+e_rec2+e_rec3);
 759
 760
 761     return ONE_4PI_EPS0 * e_rec;
 762 }
 763
 764
 765 /* Allocate memory for the inputinfo struct: */
 766 static void create_info(t_inputinfo *info)
 767 {
 768     snew(info->fac       , info->n_entries);
 769     snew(info->rcoulomb  , info->n_entries);
 770     snew(info->rvdw      , info->n_entries);
 771     snew(info->nkx       , info->n_entries);
 772     snew(info->nky       , info->n_entries);
 773     snew(info->nkz       , info->n_entries);
 774     snew(info->fourier_sp, info->n_entries);
 775     snew(info->ewald_rtol, info->n_entries);
 776     snew(info->ewald_beta, info->n_entries);
 777     snew(info->pme_order , info->n_entries);
 778     snew(info->fn_out    , info->n_entries);
 779     snew(info->e_dir     , info->n_entries);
 780     snew(info->e_rec     , info->n_entries);
 781 }
 782
 783
 784 /* Allocate and fill an array with coordinates and charges,
 785  * returns the number of charges found
 786  */
 787 static int prepare_x_q(real *q[], rvec *x[], gmx_mtop_t *mtop, rvec x_orig[], t_commrec *cr)
 788 {
 789     int i,anr_global;
 790     int nq; /* number of charged particles */
 791     t_atom *atom;
 792
 793
 794     if (MASTER(cr))
 795     {
 796         snew(*q, mtop->natoms);
 797         snew(*x, mtop->natoms);
 798         nq=0;
 799         for (i=0; i<mtop->natoms; i++)
 800         {
 801             anr_global = i;
 802             gmx_mtop_atomnr_to_atom(mtop,anr_global,&atom);
 803             if (is_charge(atom->q))
 804             {
 805                 (*q)[nq] = atom->q;
 806                 (*x)[nq][XX] = x_orig[i][XX];
 807                 (*x)[nq][YY] = x_orig[i][YY];
 808                 (*x)[nq][ZZ] = x_orig[i][ZZ];
 809                 nq++;
 810             }
 811         }
 812         /* Give back some unneeded memory */
 813         srenew(*q, nq);
 814         srenew(*x, nq);
 815     }
 816     /* Broadcast x and q in the parallel case */
 817     if (PAR(cr))
 818     {
 819         /* Transfer the number of charges */
 820         block_bc(cr,nq);
 821         snew_bc(cr, *x, nq);
 822         snew_bc(cr, *q, nq);
 823         nblock_bc(cr,nq,*x);
 824         nblock_bc(cr,nq,*q);
 825     }
 826
 827     return nq;
 828 }
 829
 830
 831
 832 /* Read in the tpr file and save information we need later in info */
 833 static void read_tpr_file(const char *fn_sim_tpr, t_inputinfo *info, t_state *state, gmx_mtop_t *mtop, t_inputrec *ir, real user_beta, real fracself)
 834 {
 835     read_tpx_state(fn_sim_tpr,ir,state,NULL,mtop);
 836
 837     /* The values of the original tpr input file are save in the first
 838      * place [0] of the arrays */
 839     info->orig_sim_steps = ir->nsteps;
 840     info->pme_order[0]   = ir->pme_order;
 841     info->rcoulomb[0]    = ir->rcoulomb;
 842     info->rvdw[0]        = ir->rvdw;
 843     info->nkx[0]         = ir->nkx;
 844     info->nky[0]         = ir->nky;
 845     info->nkz[0]         = ir->nkz;
 846     info->ewald_rtol[0]  = ir->ewald_rtol;
 847     info->fracself       = fracself;
 848     if (user_beta > 0)
 849         info->ewald_beta[0] = user_beta;
 850     else
 851         info->ewald_beta[0]  = calc_ewaldcoeff(info->rcoulomb[0],info->ewald_rtol[0]);
 852
 853     /* Check if PME was chosen */
 854     if (EEL_PME(ir->coulombtype) == FALSE)
 855         gmx_fatal(FARGS, "Can only do optimizations for simulations with PME");
 856
 857     /* Check if rcoulomb == rlist, which is necessary for PME */
 858     if (!(ir->rcoulomb == ir->rlist))
 859         gmx_fatal(FARGS, "PME requires rcoulomb (%f) to be equal to rlist (%f).", ir->rcoulomb, ir->rlist);
 860 }
 861
 862
 863 /* Transfer what we need for parallelizing the reciprocal error estimate */
 864 static void bcast_info(t_inputinfo *info, t_commrec *cr)
 865 {
 866     nblock_bc(cr, info->n_entries, info->nkx);
 867     nblock_bc(cr, info->n_entries, info->nky);
 868     nblock_bc(cr, info->n_entries, info->nkz);
 869     nblock_bc(cr, info->n_entries, info->ewald_beta);
 870     nblock_bc(cr, info->n_entries, info->pme_order);
 871     nblock_bc(cr, info->n_entries, info->e_dir);
 872     nblock_bc(cr, info->n_entries, info->e_rec);
 873     block_bc(cr, info->volume);
 874     block_bc(cr, info->recipbox);
 875     block_bc(cr, info->natoms);
 876     block_bc(cr, info->fracself);
 877     block_bc(cr, info->bTUNE);
 878     block_bc(cr, info->q2all);
 879     block_bc(cr, info->q2allnr);
 880 }
 881
 882
 883 /* Estimate the error of the SPME Ewald sum. This estimate is based upon
 884  * a) a homogeneous distribution of the charges
 885  * b) a total charge of zero.
 886  */
 887 static void estimate_PME_error(t_inputinfo *info, t_state *state,
 888         gmx_mtop_t *mtop, FILE *fp_out, gmx_bool bVerbose, unsigned int seed,
 889         t_commrec *cr)
 890 {
 891     rvec *x=NULL; /* The coordinates */
 892     real *q=NULL; /* The charges     */
 893     real  edir=0.0; /* real space error */
 894     real  erec=0.0; /* reciprocal space error */
 895     real  derr=0.0; /* difference of real and reciprocal space error */
 896     real  derr0=0.0; /* difference of real and reciprocal space error */
 897     real  beta=0.0; /* splitting parameter beta */
 898     real  beta0=0.0; /* splitting parameter beta */
 899     int ncharges; /* The number of atoms with charges */
 900     int nsamples; /* The number of samples used for the calculation of the
 901                    * self-energy error term */
 902     int i=0;
 903
 904     if (MASTER(cr))
 905         fprintf(fp_out, "\n--- PME ERROR ESTIMATE ---\n");
 906
 907     /* Prepare an x and q array with only the charged atoms */
 908     ncharges = prepare_x_q(&q, &x, mtop, state->x, cr);
 909     if (MASTER(cr))
 910     {
 911         calc_q2all(mtop, &(info->q2all), &(info->q2allnr));
 912         info->ewald_rtol[0]=gmx_erfc(info->rcoulomb[0]*info->ewald_beta[0]);
 913         /* Write some info to log file */
 914         fprintf(fp_out, "Box volume              : %g nm^3\n", info->volume);
 915         fprintf(fp_out, "Number of charged atoms : %d (total atoms %d)\n",ncharges, info->natoms);
 916         fprintf(fp_out, "Coulomb radius          : %g nm\n", info->rcoulomb[0]);
 917         fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
 918         fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
 919         fprintf(fp_out, "Interpolation order     : %d\n", info->pme_order[0]);
 920         fprintf(fp_out, "Fourier grid (nx,ny,nz) : %d x %d x %d\n",
 921                 info->nkx[0],info->nky[0],info->nkz[0]);
 922         fflush(fp_out);
 923
 924     }
 925
 926     if (PAR(cr))
 927         bcast_info(info, cr);
 928
 929
 930     /* Calculate direct space error */
 931     info->e_dir[0] = estimate_direct(info);
 932
 933     /* Calculate reciprocal space error */
 934     info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
 935                                          seed, &nsamples, cr);
 936
 937     if (PAR(cr))
 938         bcast_info(info, cr);
 939
 940     if (MASTER(cr))
 941     {
 942         fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 943         fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 944         fprintf(fp_out, "Self-energy error term was estimated using %d samples\n", nsamples);
 945         fflush(fp_out);
 946         fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
 947         fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
 948     }
 949
 950     i=0;
 951
 952     if (info->bTUNE)
 953     {
 954         if(MASTER(cr))
 955             fprintf(stderr,"Starting tuning ...\n");
 956         edir=info->e_dir[0];
 957         erec=info->e_rec[0];
 958         derr0=edir-erec;
 959         beta0=info->ewald_beta[0];
 960         if (derr>0.0)
 961             info->ewald_beta[0]+=0.1;
 962         else
 963             info->ewald_beta[0]-=0.1;
 964         info->e_dir[0] = estimate_direct(info);
 965         info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
 966                                              seed, &nsamples, cr);
 967
 968         if (PAR(cr))
 969             bcast_info(info, cr);
 970
 971
 972         edir=info->e_dir[0];
 973         erec=info->e_rec[0];
 974         derr=edir-erec;
 975         while ( fabs(derr/min(erec,edir)) > 1e-4)
 976         {
 977
 978             beta=info->ewald_beta[0];
 979             beta-=derr*(info->ewald_beta[0]-beta0)/(derr-derr0);
 980             beta0=info->ewald_beta[0];
 981             info->ewald_beta[0]=beta;
 982             derr0=derr;
 983
 984             info->e_dir[0] = estimate_direct(info);
 985             info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
 986                                                  seed, &nsamples, cr);
 987
 988             if (PAR(cr))
 989                 bcast_info(info, cr);
 990
 991             edir=info->e_dir[0];
 992             erec=info->e_rec[0];
 993             derr=edir-erec;
 994
 995             if (MASTER(cr))
 996             {
 997                 i++;
 998                 fprintf(stderr,"difference between real and rec. space error (step %d): %g\n",i,fabs(derr));
 999                 fprintf(stderr,"old beta: %f\n",beta0);
1000                 fprintf(stderr,"new beta: %f\n",beta);
1001             }
1002         }
1003
1004         info->ewald_rtol[0]=gmx_erfc(info->rcoulomb[0]*info->ewald_beta[0]);
1005
1006         if (MASTER(cr))
1007         {
1008             /* Write some info to log file */
1009             fflush(fp_out);
1010             fprintf(fp_out, "=========  After tuning ========\n");
1011             fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1012             fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1013             fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
1014             fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
1015             fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
1016             fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
1017             fflush(fp_out);
1018
1019         }
1020
1021     }
1022
1023 }
1024
1025
1026 int gmx_pme_error(int argc,char *argv[])
1027 {
1028     const char *desc[] = {
1029             "g_pme_error estimates the error of the electrostatic forces",
1030             "if using the sPME algorithm. The flag [TT]-tune[tt] will determine",
1031             "the splitting parameter such that the error is equally",
1032             "distributed over the real and reciprocal space part.",
1033             "The part of the error that stems from self interaction of the particles "
1034             "is computationally demanding. However, a good a approximation is to",
1035             "just use a fraction of the particles for this term which can be",
1036             "indicated by the flag [TT]-self[tt].[PAR]",
1037     };
1038
1039     real        fs=0.0;             /* 0 indicates: not set by the user */
1040     real        user_beta=-1.0;
1041     real        fracself=1.0;
1042     t_inputinfo info;
1043     t_state     state;     /* The state from the tpr input file */
1044     gmx_mtop_t  mtop;      /* The topology from the tpr input file */
1045     t_inputrec  *ir=NULL;  /* The inputrec from the tpr file */
1046     FILE        *fp=NULL;
1047     t_commrec   *cr;
1048     unsigned long PCA_Flags;
1049     gmx_bool        bTUNE=FALSE;
1050     gmx_bool    bVerbose=FALSE;
1051     int         seed=0;
1052
1053
1054     static t_filenm fnm[] = {
1055       { efTPX, "-s",     NULL,    ffREAD },
1056       { efOUT, "-o",    "error",  ffWRITE },
1057       { efTPX, "-so",   "tuned",  ffOPTWR }
1058     };
1059
1060     output_env_t oenv=NULL;
1061
1062     t_pargs pa[] = {
1063         { "-beta",     FALSE, etREAL, {&user_beta},
1064             "If positive, overwrite ewald_beta from tpr file with this value" },
1065         { "-tune",     FALSE, etBOOL, {&bTUNE},
1066             "Tune the splitting parameter such that the error is equally distributed between real and reciprocal space" },
1067         { "-self",     FALSE, etREAL, {&fracself},
1068             "If between 0.0 and 1.0, determine self interaction error from just this fraction of the charged particles" },
1069         { "-seed",     FALSE, etINT,  {&seed},
1070           "Random number seed used for Monte Carlo algorithm when -self is set to a value between 0.0 and 1.0" },
1071         { "-v",        FALSE, etBOOL, {&bVerbose},
1072             "Be loud and noisy" }
1073     };
1074
1075
1076 #define NFILE asize(fnm)
1077
1078     cr = init_par(&argc,&argv);
1079
1080     MPI_Barrier(MPI_COMM_WORLD);
1081
1082     if (MASTER(cr))
1083       CopyRight(stderr,argv[0]);
1084
1085     PCA_Flags = PCA_NOEXIT_ON_ARGS;
1086     PCA_Flags |= (MASTER(cr) ? 0 : PCA_QUIET);
1087
1088     parse_common_args(&argc,argv,PCA_Flags,
1089                       NFILE,fnm,asize(pa),pa,asize(desc),desc,
1090                       0,NULL,&oenv);
1091
1092     if (!bTUNE)
1093         bTUNE = opt2bSet("-so",NFILE,fnm);
1094
1095     info.n_entries = 1;
1096
1097     /* Allocate memory for the inputinfo struct: */
1098     create_info(&info);
1099     info.fourier_sp[0] = fs;
1100
1101     /* Read in the tpr file and open logfile for reading */
1102     if (MASTER(cr))
1103     {
1104         snew(ir,1);
1105         read_tpr_file(opt2fn("-s",NFILE,fnm), &info, &state, &mtop, ir, user_beta,fracself);
1106
1107         fp=fopen(opt2fn("-o",NFILE,fnm),"w");
1108     }
1109
1110     /* Check consistency if the user provided fourierspacing */
1111     if (fs > 0 && MASTER(cr))
1112     {
1113         /* Recalculate the grid dimensions using fourierspacing from user input */
1114         info.nkx[0] = 0;
1115         info.nky[0] = 0;
1116         info.nkz[0] = 0;
1117         calc_grid(stdout,state.box,info.fourier_sp[0],&(info.nkx[0]),&(info.nky[0]),&(info.nkz[0]));
1118         if ( (ir->nkx != info.nkx[0]) || (ir->nky != info.nky[0]) || (ir->nkz != info.nkz[0]) )
1119             gmx_fatal(FARGS, "Wrong fourierspacing %f nm, input file grid = %d x %d x %d, computed grid = %d x %d x %d",
1120                       fs,ir->nkx,ir->nky,ir->nkz,info.nkx[0],info.nky[0],info.nkz[0]);
1121     }
1122
1123     /* Estimate (S)PME force error */
1124
1125     /* Determine the volume of the simulation box */
1126     if (MASTER(cr))
1127     {
1128         info.volume = det(state.box);
1129         calc_recipbox(state.box,info.recipbox);
1130         info.natoms = mtop.natoms;
1131         info.bTUNE  = bTUNE;
1132     }
1133
1134     if (PAR(cr))
1135         bcast_info(&info, cr);
1136
1137     /* Get an error estimate of the input tpr file and do some tuning if requested */
1138     estimate_PME_error(&info, &state, &mtop, fp, bVerbose, seed, cr);
1139
1140     if (MASTER(cr))
1141     {
1142         /* Write out optimized tpr file if requested */
1143         if ( opt2bSet("-so",NFILE,fnm) || bTUNE )
1144         {
1145             ir->ewald_rtol=info.ewald_rtol[0];
1146             write_tpx_state(opt2fn("-so",NFILE,fnm),ir,&state,&mtop);
1147         }
1148         please_cite(fp,"Wang2010");
1149         fclose(fp);
1150     }
1151
1152     if (gmx_parallel_env_initialized())
1153     {
1154         gmx_finalize();
1155     }
1156
1157     return 0;
1158 }