src/programs/alexandria/tune_eem.cpp

   1 /*
   2  * This source file is part of the Alexandria Chemistry Toolkit.
   3  *
   4  * Copyright (C) 2014-2020
   5  *
   6  * Developers:
   7  *             Mohammad Mehdi Ghahremanpour,
   8  *             Paul J. van Maaren,
   9  *             David van der Spoel (Project leader)
  10  *
  11  * This program is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU General Public License
  13  * as published by the Free Software Foundation; either version 2
  14  * of the License, or (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor,
  24  * Boston, MA  02110-1301, USA.
  25  */
  26
  27 /*! \internal \brief
  28  * Implements part of the alexandria program.
  29  * \author Mohammad Mehdi Ghahremanpour <mohammad.ghahremanpour@icm.uu.se>
  30  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  31  */
  32
  33 #include <cctype>
  34 #include <cmath>
  35 #include <cstdio>
  36 #include <cstdlib>
  37
  38 #include <random>
  39
  40 #include "gromacs/commandline/pargs.h"
  41 #include "gromacs/commandline/viewit.h"
  42 #include "gromacs/fileio/xvgr.h"
  43 #include "gromacs/hardware/detecthardware.h"
  44 #include "gromacs/listed-forces/bonded.h"
  45 #include "gromacs/mdlib/force.h"
  46 #include "gromacs/mdlib/mdatoms.h"
  47 #include "gromacs/mdlib/shellfc.h"
  48 #include "gromacs/mdtypes/commrec.h"
  49 #include "gromacs/statistics/statistics.h"
  50 #include "gromacs/topology/mtop_util.h"
  51 #include "gromacs/utility/arraysize.h"
  52 #include "gromacs/utility/coolstuff.h"
  53 #include "gromacs/utility/cstringutil.h"
  54 #include "gromacs/utility/fatalerror.h"
  55 #include "gromacs/utility/futil.h"
  56 #include "gromacs/utility/smalloc.h"
  57
  58 #include "alex_modules.h"
  59 #include "gentop_core.h"
  60 #include "gmx_simple_comm.h"
  61 #include "molgen.h"
  62 #include "molprop_util.h"
  63 #include "mymol_low.h"
  64 #include "optparam.h"
  65 #include "poldata.h"
  66 #include "poldata_tables.h"
  67 #include "poldata_xml.h"
  68 #include "tuning_utility.h"
  69
  70 namespace alexandria
  71 {
  72
  73 class OptACM : public MolGen, Bayes
  74 {
  75     using param_type = std::vector<double>;
  76
  77     private:
  78         bool       bFullTensor_;
  79         bool       bFitAlpha_;
  80         bool       bFitZeta_;
  81         bool       bSameZeta_;
  82         bool       bFitChi_;
  83         bool       bUseCM5_;
  84         real       penalty_;
  85
  86     public:
  87
  88         OptACM()
  89             :
  90               bFullTensor_(false),
  91               bFitAlpha_(false),
  92               bFitZeta_(false),
  93               bSameZeta_(false),
  94               bFitChi_(true),
  95               bUseCM5_(false),
  96               penalty_(0)
  97         {}
  98
  99         ~OptACM() {}
 100
 101         bool bESP() const { return weight(ermsESP); }
 102
 103         bool dipole() const { return weight(ermsMU); }
 104
 105         bool quadrupole() const { return weight(ermsQUAD); }
 106
 107         bool fullTensor() const { return bFullTensor_; }
 108
 109         bool fitZeta() const { return bFitZeta_; }
 110
 111         bool sameZeta() const { return bSameZeta_; }
 112
 113         bool fitChi() const { return bFitChi_; }
 114
 115         bool useCM5() const {return bUseCM5_; }
 116
 117         bool penalize() const {return penalty_ > 0; }
 118
 119         void add_pargs(std::vector<t_pargs> *pargs)
 120         {
 121             t_pargs pa[] =
 122             {
 123                 { "-fullTensor", FALSE, etBOOL, {&bFullTensor_},
 124                   "Consider both diagonal and off-diagonal elements of the Q_Calc matrix for optimization" },
 125                 { "-fitalpha", FALSE, etBOOL, {&bFitAlpha_},
 126                   "Calibrate atomic polarizability." },
 127                 { "-fitzeta", FALSE, etBOOL, {&bFitZeta_},
 128                   "Calibrate orbital exponent." },
 129                 { "-samezeta", FALSE, etBOOL, {&bSameZeta_},
 130                   "Use the same zeta for both the core and the shell of the Drude model." },
 131                 { "-fitchi", FALSE, etBOOL, {&bFitChi_},
 132                   "Calibrate electronegativity and hardness." },
 133                 { "-penalty", FALSE, etREAL, {&penalty_},
 134                   "penalty to keep the Chi0 and J0 in order." },
 135                 { "-cm5", FALSE, etBOOL, {&bUseCM5_},
 136                   "Reproduce CM5 charges in fitting." },
 137             };
 138             for (size_t i = 0; i < asize(pa); i++)
 139             {
 140                 pargs->push_back(pa[i]);
 141             }
 142             addOptions(pargs, etuneEEM);
 143             Bayes::add_pargs(pargs);
 144         }
 145
 146         void optionsFinished()
 147         {
 148             MolGen::optionsFinished();
 149             setBoxConstraint(bConstrain());
 150         }
 151
 152         double l2_regularizer (double x,
 153                                double min,
 154                                double max)
 155         {
 156             if (x < min)
 157             {
 158                 return (0.5 * gmx::square(x-min));
 159             }
 160             else if (x > max)
 161             {
 162                 return (0.5 * gmx::square(x-max));
 163             }
 164             else
 165             {
 166                 return 0;
 167             }
 168         }
 169
 170         void initChargeGeneration();
 171
 172         /*! \brief
 173          *
 174          * Fill parameter vector based on Poldata.
 175          * \param[in] factor Scaling factor for parameters
 176          */
 177         void polData2TuneACM(real factor);
 178
 179         /*! \brief
 180          * Copy the optimization parameters to the poldata structure
 181          * \param[in] List over the parameters that have changed.
 182          */
 183         virtual void toPolData(const std::vector<bool> &changed);
 184
 185         void InitOpt(real factor);
 186
 187         virtual double calcDeviation();
 188
 189         double calcPenalty(AtomIndexIterator ai);
 190
 191         /*! \brief
 192          * Do the actual optimization.
 193          * \param[in] fp     FILE pointer for logging
 194          * \param[in] fplog  FILE pointer for logging
 195          * \param[in] oenv   Output environment for managing xvg files etc.
 196          * \param[in] xvgconv Output file monitoring parameters
 197          * \param[in] xvgepot Output file monitoring penalty function
 198          * \return true if better parameters were found.
 199          */
 200         bool optRun(FILE                   *fp,
 201                     FILE                   *fplog,
 202                     int                     nrun,
 203                     const gmx_output_env_t *oenv,
 204                     const char             *xvgconv,
 205                     const char             *xvgepot);
 206 };
 207
 208 void OptACM::initChargeGeneration()
 209 {
 210     std::string method, basis;
 211     splitLot(lot(), &method, &basis);
 212     for (auto &mymol : mymols())
 213     {
 214         if (mymol.eSupp_ != eSupportNo)
 215         {
 216             // If using ESP for fitting we need to be able to compute the
 217             // electrostatic potential, however we always want to report it
 218             // so have to initialize the data anyway.
 219             mymol.initQgresp(poldata(),
 220                              method, basis, nullptr,
 221                              watoms(),
 222                              maxPot());
 223             // ACM is needed always as well in this program
 224             mymol.Qgacm_.setInfo(poldata(),
 225                                  mymol.atoms_,
 226                                  hfac(),
 227                                  mymol.molProp()->getCharge());
 228         }
 229     }
 230 }
 231
 232 static void dumpQ(FILE *fp, const std::string &molname,
 233                   t_atoms *atoms)
 234 {
 235     if (fp)
 236     {
 237         fprintf(fp, "%s q:", molname.c_str());
 238         for (int i = 0; i < atoms->nr; i++)
 239         {
 240             fprintf(fp, " %.3f", atoms->atom[i].q);
 241         }
 242         fprintf(fp, "\n");
 243     }
 244 }
 245
 246 double OptACM::calcDeviation()
 247 {
 248     int                  n         = 0;
 249     double               EemRms    = 0;
 250     double               bound     = 0;
 251     double               penalty   = 0;
 252     std::vector<double>  qq;
 253     const param_type    &param     = Bayes::getParam();
 254
 255     if (MASTER(commrec()))
 256     {
 257         auto *ic = indexCount();
 258         for (auto ai = ic->beginIndex(); ai < ic->endIndex(); ++ai)
 259         {
 260             if (!ai->isConst())
 261             {
 262                 auto name = ai->name();
 263
 264                 if (bFitChi_)
 265                 {
 266                     auto J00  = param[n++];
 267                     bound    += l2_regularizer(J00, J0Min(), J0Max());
 268                     if (strcasecmp(name.c_str(), fixchi()) != 0)
 269                     {
 270                         auto Chi0 = param[n++];
 271                         bound    += l2_regularizer(Chi0, chi0Min(), chi0Max());
 272                     }
 273                     if (penalize())
 274                     {
 275                         penalty += calcPenalty(ai);
 276                     }
 277                 }
 278                 if (bFitZeta_)
 279                 {
 280                     auto nzeta = poldata()->getNzeta(ai->name());
 281                     if (nzeta == 2 && bSameZeta_)
 282                     {
 283                         nzeta = 1;
 284                     }
 285                     for (auto zz = 0; zz < nzeta; zz++)
 286                     {
 287                         auto zeta = param[n++];
 288                         bound += l2_regularizer(zeta, zetaMin(), zetaMax());
 289                     }
 290                 }
 291             }
 292         }
 293         if (optHfac())
 294         {
 295             setHfac(param[n++]);
 296             bound += 100*gmx::square(hfacDiff());
 297         }
 298     }
 299
 300     if (PAR(commrec()))
 301     {
 302         bool bFinal = final();
 303         gmx_bcast(sizeof(final()), &bFinal, commrec());
 304         if (bFinal)
 305         {
 306             setFinal();
 307         }
 308     }
 309
 310     if (PAR(commrec()) && !final())
 311     {
 312         poldata()->broadcast_eemprop(commrec());
 313         if (bFitAlpha_)
 314         {
 315             poldata()->broadcast_ptype(commrec());
 316         }
 317     }
 318     resetEnergies();
 319     for (auto &mymol : mymols())
 320     {
 321         if ((mymol.eSupp_ == eSupportLocal) ||
 322             (final() && (mymol.eSupp_ == eSupportRemote)))
 323         {
 324             auto q     = mymol.Qgacm_.q();
 325             auto natom = mymol.Qgacm_.natom();
 326
 327             qq.resize(natom + 1, 0);
 328             for (auto i = 0; i < natom + 1; i++)
 329             {
 330                 qq[i] = q[i][0];
 331             }
 332
 333             bool converged = false;
 334             int  iter      = 0;
 335             do
 336             {
 337                 // Update charges in mtop before doing
 338                 // shell optimization.
 339                 for (int i = 0; i < mymol.mtop_->natoms; i++)
 340                 {
 341                     mymol.mtop_->moltype[0].atoms.atom[i].q      =
 342                         mymol.mtop_->moltype[0].atoms.atom[i].qB =
 343                             mymol.atoms_->atom[i].q;
 344                 }
 345
 346                 if (nullptr != mymol.shellfc_)
 347                 {
 348                     if (bFitAlpha_)
 349                     {
 350                         mymol.UpdateIdef(poldata(), eitPOLARIZATION);
 351                     }
 352                     mymol.computeForces(nullptr, commrec());
 353                 }
 354
 355                 auto qgen =  mymol.Qgacm_.generateCharges(debug,
 356                                                           mymol.molProp()->getMolname().c_str(),
 357                                                           poldata(),
 358                                                           mymol.atoms_,
 359                                                           mymol.x());
 360                 if (qgen != eQGEN_OK)
 361                 {
 362                     gmx_fatal(FARGS, "Could not generate charges for %s: %s",
 363                               mymol.molProp()->getMolname().c_str(),
 364                               mymol.Qgacm_.message());
 365                 }
 366                 q       = mymol.Qgacm_.q();
 367                 EemRms  = 0;
 368                 for (int i = 0; i < natom + 1; i++)
 369                 {
 370                     EemRms   += gmx::square(qq[i] - q[i][0]);
 371                     qq[i]     = q[i][0];
 372                 }
 373                 EemRms   /= natom;
 374                 converged = (EemRms < qtol()) || (nullptr == mymol.shellfc_);
 375                 iter++;
 376             }
 377             while ((!converged) && (iter < qcycle()));
 378             for (int i = 0; i < mymol.mtop_->natoms; i++)
 379             {
 380                 mymol.mtop_->moltype[0].atoms.atom[i].q      =
 381                     mymol.mtop_->moltype[0].atoms.atom[i].qB =
 382                         mymol.atoms_->atom[i].q;
 383             }
 384             if (debug)
 385             {
 386                 dumpQ(debug, mymol.molProp()->getMolname(), mymol.atoms_);
 387             }
 388             if (weight(ermsCHARGE))
 389             {
 390                 int    nChargeResidual = 0; // number of charge residuals added per molecule
 391                 double ChargeResidual  = 0;
 392                 bool   isPolarizable   = (nullptr != mymol.shellfc_);
 393                 double qtot            = 0;
 394                 int    i, j;
 395                 for (j = i = 0; j < mymol.atoms_->nr; j++)
 396                 {
 397                     auto atomnr = mymol.atoms_->atom[j].atomnumber;
 398                     auto qq     = mymol.atoms_->atom[j].q;
 399                     qtot       += qq;
 400                     if (mymol.atoms_->atom[j].ptype == eptAtom ||
 401                         mymol.atoms_->atom[j].ptype == eptNucleus)
 402                     {
 403                         double qref = (isPolarizable ? mymol.atoms_->atom[j+1].q : 0);
 404                         double dq   = 0;
 405                         if (atomnr == 1)
 406                         {
 407                             // Penalty if qH < 0
 408                             dq = qq + qref;
 409                         }
 410                         else if ((atomnr == 8)  || (atomnr == 9) ||
 411                                  (atomnr == 16) || (atomnr == 17) ||
 412                                  (atomnr == 35) || (atomnr == 53))
 413                         {
 414                             // Penalty if qO > 0, therefore we reverse the sign
 415                             dq = -(qq + qref);
 416                         }
 417                         if (dq < 0)
 418                         {
 419                             ChargeResidual += gmx::square(dq);
 420                             nChargeResidual++;
 421                         }
 422                         if (useCM5())
 423                         {
 424                             ChargeResidual += gmx::square(qq + qref - mymol.chargeQM(qtCM5)[i++]);
 425                             nChargeResidual++;
 426                         }
 427                     }
 428                 }
 429                 ChargeResidual += gmx::square(qtot - mymol.molProp()->getCharge());
 430                 nChargeResidual++;
 431                 increaseEnergy(ermsCHARGE, (ChargeResidual/nChargeResidual));
 432             }
 433             if (weight(ermsESP))
 434             {
 435                 real rrms     = 0;
 436                 real wtot     = 0;
 437                 real cosangle = 0;
 438                 if (nullptr != mymol.shellfc_)
 439                 {
 440                     mymol.Qgresp_.updateAtomCoords(mymol.x());
 441                 }
 442                 if (bFitZeta_)
 443                 {
 444                     mymol.Qgresp_.updateZeta(mymol.atoms_, poldata());
 445                 }
 446                 mymol.Qgresp_.updateAtomCharges(mymol.atoms_);
 447                 mymol.Qgresp_.calcPot(poldata()->getEpsilonR());
 448                 auto myRms =
 449                     convert2gmx(mymol.Qgresp_.getRms(&wtot, &rrms, &cosangle),
 450                                 eg2cHartree_e);
 451                 increaseEnergy(ermsESP, gmx::square(myRms));
 452                 if (debug)
 453                 {
 454                     fprintf(debug, "%s ESPrms = %g cosangle = %g\n",
 455                             mymol.molProp()->getMolname().c_str(),
 456                             myRms, cosangle);
 457                 }
 458             }
 459             if (weight(ermsMU))
 460             {
 461                 mymol.CalcDipole();
 462                 mymol.rotateDipole(mymol.muQM(qtCalc), mymol.muQM(qtElec));
 463                 if (bQM())
 464                 {
 465                     rvec dmu;
 466                     rvec_sub(mymol.muQM(qtCalc), mymol.muQM(qtElec), dmu);
 467                     increaseEnergy(ermsMU, iprod(dmu, dmu));
 468                 }
 469                 else
 470                 {
 471                     increaseEnergy(ermsMU, gmx::square(mymol.dipQM(qtCalc) - mymol.dipExper()));
 472                 }
 473             }
 474             if (weight(ermsQUAD))
 475             {
 476                 mymol.CalcQuadrupole();
 477                 for (int mm = 0; mm < DIM; mm++)
 478                 {
 479                     for (int nn = 0; nn < DIM; nn++)
 480                     {
 481                         if (bFullTensor_ || mm == nn)
 482                         {
 483                             increaseEnergy(ermsQUAD, gmx::square(mymol.QQM(qtCalc)[mm][nn] - mymol.QQM(qtElec)[mm][nn]));
 484                         }
 485                     }
 486                 }
 487             }
 488         }
 489     }
 490     increaseEnergy(ermsBOUNDS, bound);
 491     if (penalize())
 492     {
 493         increaseEnergy(ermsPENALTY, penalty);
 494     }
 495     sumEnergies();
 496     printEnergies(debug);
 497     return energy(ermsTOT);
 498 }
 499
 500 void OptACM::polData2TuneACM(real factor)
 501 {
 502     auto *ic = indexCount();
 503     for (auto ai = ic->beginIndex(); ai < ic->endIndex(); ++ai)
 504     {
 505         if (!ai->isConst())
 506         {
 507             auto ei   = poldata()->ztype2Eem(ai->name());
 508             GMX_RELEASE_ASSERT(ei != poldata()->EndEemprops(),
 509                                gmx::formatString("Cannot find eemprops for %s",
 510                                                  ai->name().c_str()).c_str());
 511             ai->setEemProps(ei);
 512             if (bFitChi_)
 513             {
 514                 auto J00  = ei->getJ0();
 515                 Bayes::addParam(J00, factor);
 516                 Bayes::addParamName(gmx::formatString("%s-Eta", ai->name().c_str()));
 517
 518                 if (ai->name().compare(fixchi()) != 0)
 519                 {
 520                     auto Chi0 = ei->getChi0();
 521                     Bayes::addParam(Chi0, factor);
 522                     Bayes::addParamName(gmx::formatString("%s-Chi", ai->name().c_str()));
 523                 }
 524             }
 525             if (bFitZeta_)
 526             {
 527                 auto nzeta = ei->getNzeta();
 528                 if (nzeta == 2 && bSameZeta_)
 529                 {
 530                     nzeta = 1;
 531                 }
 532                 for(auto i = 0; i < nzeta; i++)
 533                 {
 534                     auto zeta  = ei->getZeta(i);
 535                     if (0 != zeta)
 536                     {
 537                         Bayes::addParam(zeta, factor);
 538                         Bayes::addParamName(gmx::formatString("%s-Zeta", ai->name().c_str()));
 539                     }
 540                     else
 541                     {
 542                         gmx_fatal(FARGS, "Zeta is zero for atom %s in model %s\n",
 543                                   ai->name().c_str(), getEemtypeName(poldata()->getChargeModel()));
 544                     }
 545                 }
 546             }
 547             if (bFitAlpha_)
 548             {
 549                 auto alpha     = 0.0;
 550                 auto sigma     = 0.0;
 551                 if (poldata()->getZtypePol(ai->name(), &alpha, &sigma))
 552                 {
 553                     if (0 != alpha)
 554                     {
 555                         Bayes::addParam(alpha, factor);
 556                         Bayes::addParamName(gmx::formatString("%s-alpha", ai->name().c_str()));
 557                     }
 558                     else
 559                     {
 560                         gmx_fatal(FARGS, "Polarizability is zero for atom %s\n", ai->name().c_str());
 561                     }
 562                 }
 563                 else
 564                 {
 565                     gmx_fatal(FARGS, "No Ptype for zeta type %s\n", ai->name().c_str());
 566                 }
 567             }
 568         }
 569     }
 570     if (optHfac())
 571     {
 572         Bayes::addParam(hfac(), factor);
 573         Bayes::addParamName(gmx::formatString("hfac"));
 574     }
 575
 576 }
 577
 578 void OptACM::toPolData(const std::vector<bool> &changed)
 579 {
 580     size_t   n           = 0;
 581     auto     pd          = poldata();
 582     bool     distributed = getEemtypeDistributed(pd->getChargeModel());
 583     auto    *ic          = indexCount();
 584     auto     param       = Bayes::getParam();
 585     auto     psigma      = Bayes::getPsigma();
 586     if (psigma.empty())
 587     {
 588         psigma.resize(param.size(), 0);
 589     }
 590     Bayes::dumpParam(debug);
 591     for (auto ai = ic->beginIndex(); ai < ic->endIndex(); ++ai)
 592     {
 593         if (!ai->isConst())
 594         {
 595             auto ei = ai->eemProps();
 596             if (bFitChi_)
 597             {
 598                 ei->setJ0(param[n]);
 599                 ei->setJ0_sigma(psigma[n++]);
 600                 if (ai->name().compare(fixchi()) != 0)
 601                 {
 602                     ei->setChi0(param[n]);
 603                     ei->setChi0_sigma(psigma[n++]);
 604                 }
 605             }
 606             if (bFitZeta_)
 607             {
 608                 std::string zstr, z_sig;
 609                 std::string qstr   = ei->getQstr();
 610                 std::string rowstr = ei->getRowstr();
 611                 auto        nZeta  = ei->getNzeta();
 612                 double      zeta   = 0;
 613                 double      sigma  = 0;
 614                 if (distributed)
 615                 {
 616                     bool readOne = bSameZeta_ && (nZeta == 2);
 617                     for (auto i = 0; i < nZeta; i++)
 618                     {
 619                         if (i == 0 || (i > 0 && !readOne))
 620                         {
 621                             zeta   = param[n];
 622                             sigma  = psigma[n++];
 623                         }
 624                         zstr.append(gmx::formatString("%g ", zeta));
 625                         z_sig.append(gmx::formatString("%g ", sigma));
 626                     }
 627                 }
 628                 ei->setRowZetaQ(rowstr, zstr, qstr);
 629                 ei->setZetastr(zstr);
 630                 ei->setZeta_sigma(z_sig);
 631             }
 632             if (bFitAlpha_)
 633             {
 634                 std::string ptype;
 635                 if (pd->ztypeToPtype(ai->name(), ptype))
 636                 {
 637                     pd->setPtypePolarizability(ptype, param[n], psigma[n]);
 638                     n++;
 639                 }
 640                 else
 641                 {
 642                     gmx_fatal(FARGS, "No Ptype for zeta type %s\n", ai->name().c_str());
 643                 }
 644             }
 645         }
 646     }
 647     if (optHfac())
 648     {
 649         setHfac(param[n++]);
 650     }
 651     GMX_RELEASE_ASSERT(n == changed.size(),
 652                        gmx::formatString("n = %zu changed.size() = %zu",
 653                                          n, changed.size()).c_str());
 654 }
 655
 656 void OptACM::InitOpt(real factor)
 657 {
 658     polData2TuneACM(factor);
 659 }
 660
 661 double OptACM::calcPenalty(AtomIndexIterator ai)
 662 {
 663     double         penalty = 0;
 664     const auto     pd      = poldata();
 665     auto           ei      = ai->eemProps();
 666     auto           ai_elem = pd->ztype2elem(ei->getName());
 667     auto           ai_chi  = ei->getChi0();
 668     auto           ai_J0   = ei->getJ0();
 669     auto           ai_atn  = gmx_atomprop_atomnumber(atomprop(), ai_elem.c_str());
 670
 671     if (penalty_ == 0.0)
 672     {
 673         return 0.0;
 674     }
 675     if (strlen(fixchi()) != 0)
 676     {
 677         const auto ref_eem  = pd->atype2Eem(fixchi());
 678         if (ai_chi < ref_eem->getChi0())
 679         {
 680             penalty += penalty_;
 681         }
 682     }
 683
 684     if (ai->name() == "z_c3" && (ai_chi < 5 or ai_chi > 8))
 685     {
 686         penalty += (ai_atn * penalty_);
 687     }
 688
 689     if (ai->name() == "z_h1" && ai_chi > 2.5)
 690     {
 691         penalty += (6 * penalty_);
 692     }
 693
 694     auto *ic = indexCount();
 695     for (auto aj = ic->beginIndex(); aj < ic->endIndex(); ++aj)
 696     {
 697         if (!aj->isConst())
 698         {
 699             const auto ej      = aj->eemProps();
 700             const auto aj_elem = pd->ztype2elem(ej->getName());
 701             auto       aj_atn  = gmx_atomprop_atomnumber(atomprop(), aj_elem.c_str());
 702
 703             if (ai_atn != aj_atn)
 704             {
 705                 //Penalize if HeavyAtoms_chi <= H_chi or HeavyAtoms_J0 <= H_J0
 706                 auto aj_chi = ej->getChi0();
 707                 auto aj_J0  = ej->getJ0();
 708                 if ((ai_atn == 1 && aj_atn > 1  && (aj_chi <= ai_chi || aj_J0 <= ai_J0)) ||
 709                     (ai_atn > 1  && aj_atn == 1 && (aj_chi <= ai_chi || aj_J0 <= ai_J0)))
 710                 {
 711                     penalty += (std::abs((aj_atn - ai_atn)) * penalty_);
 712                 }
 713             }
 714         }
 715     }
 716     return penalty;
 717 }
 718
 719 bool OptACM::optRun(FILE                   *fp,
 720                     FILE                   *fplog,
 721                     int                     nrun,
 722                     const gmx_output_env_t *oenv,
 723                     const char             *xvgconv,
 724                     const char             *xvgepot)
 725 {
 726     bool bMinimum = false;
 727
 728     auto func = [&] (const double v[]) {
 729             return Bayes::objFunction(v);
 730         };
 731
 732     if (MASTER(commrec()))
 733     {
 734         if (PAR(commrec()))
 735         {
 736             int niter = 3 + nrun*Bayes::maxIter()*Bayes::nParam();
 737             for (int dest = 1; dest < commrec()->nnodes; dest++)
 738             {
 739                 gmx_send_int(commrec(), dest, niter);
 740             }
 741         }
 742         double chi2;
 743         Bayes::setFunc(func, &chi2);
 744         Bayes::setOutputFiles(xvgconv, xvgepot, oenv);
 745         param_type param     = Bayes::getParam();
 746         double     chi2_min  = Bayes::objFunction(param.data());
 747         chi2             = chi2_min;
 748
 749         for (auto n = 0; n < nrun; n++)
 750         {
 751             if ((nullptr != fp) && (0 == n))
 752             {
 753                 fprintf(fp, "\nStarting run %d out of %d\n", n, nrun);
 754             }
 755             Bayes::MCMC();
 756             if (chi2 < chi2_min)
 757             {
 758                 bMinimum = true;
 759                 chi2_min = chi2;
 760             }
 761         }
 762         if (bMinimum)
 763         {
 764             auto best           = Bayes::getBestParam();
 765             auto pmean          = Bayes::getPmean();
 766             auto psigma         = Bayes::getPsigma();
 767             auto attemptedMoves = Bayes::getAttemptedMoves();
 768             auto acceptedMoves  = Bayes::getAcceptedMoves();
 769             auto paramNames     = Bayes::getParamNames();
 770             auto emin           = Bayes::objFunction(best.data());
 771             if (fplog)
 772             {
 773                 fprintf(fplog, "\nMinimum RMSD value during optimization: %.3f.\n", sqrt(emin));
 774                 fprintf(fplog, "Statistics of parameters after optimization\n");
 775                 for (size_t k = 0; k < Bayes::nParam(); k++)
 776                 {
 777                     double acceptance_ratio = 100*(double(acceptedMoves[k])/attemptedMoves[k]);
 778                     fprintf(fplog, "%-10s  Best value:%10g  Mean value:%10g  Sigma:%10g  Attempted moves:%3d  Acceptance ratio:%5g\n",
 779                             paramNames[k].c_str(), best[k], pmean[k], psigma[k], attemptedMoves[k], acceptance_ratio);
 780                 }
 781             }
 782         }
 783     }
 784     else
 785     {
 786         /* S L A V E   N O D E S */
 787         auto niter = gmx_recv_int(commrec(), 0);
 788         for (auto n = 0; n < niter; n++)
 789         {
 790             (void) calcDeviation();
 791         }
 792     }
 793     setFinal();
 794     if (MASTER(commrec()))
 795     {
 796         param_type best = Bayes::getBestParam();
 797         (void) Bayes::objFunction(best.data());
 798         printEnergies(fp);
 799         printEnergies(fplog);
 800     }
 801     return bMinimum;
 802 }
 803 }
 804
 805 int alex_tune_eem(int argc, char *argv[])
 806 {
 807     static const char          *desc[] = {
 808         "tune_eem read a series of molecules and corresponding experimental",
 809         "dipole moments from a file, and tunes parameters in an algorithm",
 810         "until the experimental dipole moments are reproduced by the",
 811         "charge generating algorithm AX as implemented in the gentop program.[PAR]",
 812         "Minima and maxima for the parameters can be set, these are however",
 813         "not strictly enforced, but rather they are penalized with a harmonic",
 814         "function, for which the force constant can be set explicitly.[PAR]",
 815         "At every reinit step parameters are changed by a random amount within",
 816         "the fraction set by step size, and within the boundaries given",
 817         "by the minima and maxima. If the [TT]-random[tt] flag is",
 818         "given a completely random set of parameters is generated at the start",
 819         "of each run. At reinit steps however, the parameters are only changed",
 820         "slightly, in order to speed-up local search but not global search."
 821         "In other words, complete random starts are done only at the beginning of each",
 822         "run, and only when explicitly requested.[PAR]",
 823         "The absolut dipole moment of a molecule remains unchanged if all the",
 824         "atoms swap the sign of the charge. To prevent this kind of mirror",
 825         "effects a penalty is added to the square deviation ",
 826         "if hydrogen atoms have a negative charge. Similarly a penalty is",
 827         "added if atoms from row VI or VII in the periodic table have a positive",
 828         "charge. The penalty is equal to the force constant given on the command line",
 829         "time the square of the charge.[PAR]",
 830         "One of the electronegativities (chi) is redundant in the optimization,",
 831         "only the relative values are meaningful.",
 832         "Therefore by default we fix the value for hydrogen to what is written",
 833         "in the eemprops.dat file (or whatever is given with the [tt]-d[TT] flag).",
 834         "A suitable value would be 2.3, the original, value due to Pauling,",
 835         "this can by overridden by setting the [tt]-fixchi[TT] flag to something else (e.g. a non-existing atom).[PAR]",
 836         "A selection of molecules into a training set and a test set (or ignore set)",
 837         "can be made using option [TT]-sel[tt]. The format of this file is:[BR]",
 838         "iupac|Train[BR]",
 839         "iupac|Test[BR]",
 840         "iupac|Ignore[BR]",
 841         "and you should ideally have a line for each molecule in the molecule database",
 842         "([TT]-f[tt] option). Missing molecules will be ignored."
 843     };
 844
 845     t_filenm                    fnm[] = {
 846         { efDAT, "-f",         "allmols",       ffREAD  },
 847         { efDAT, "-d",         "gentop",        ffOPTRD },
 848         { efDAT, "-o",         "tunedip",       ffWRITE },
 849         { efDAT, "-sel",       "molselect",     ffREAD  },
 850         { efXVG, "-table",     "table",         ffOPTRD },
 851         { efLOG, "-g",         "charges",       ffWRITE },
 852         { efXVG, "-qhisto",    "q_histo",       ffWRITE },
 853         { efXVG, "-dipcorr",   "dip_corr",      ffWRITE },
 854         { efXVG, "-mucorr",    "mu_corr",       ffWRITE },
 855         { efXVG, "-thetacorr", "theta_corr",    ffWRITE },
 856         { efXVG, "-espcorr",   "esp_corr",      ffWRITE },
 857         { efXVG, "-alphacorr", "alpha_corr",    ffWRITE },
 858         { efXVG, "-qcorr",     "q_corr",        ffWRITE },
 859         { efXVG, "-isopol",    "isopol_corr",   ffWRITE },
 860         { efXVG, "-anisopol",  "anisopol_corr", ffWRITE },
 861         { efXVG, "-conv",      "param-conv",    ffWRITE },
 862         { efXVG, "-epot",      "param-epot",    ffWRITE },
 863         { efTEX, "-latex",     "eemprop",       ffWRITE }
 864     };
 865
 866     const int                   NFILE         = asize(fnm);
 867
 868     int                         nrun          = 1;
 869     int                         reinit        = 0;
 870     real                        th_toler      = 170;
 871     real                        ph_toler      = 5;
 872     real                        dip_toler     = 0.5;
 873     real                        quad_toler    = 5;
 874     real                        alpha_toler   = 3;
 875     real                        isopol_toler  = 2;
 876     real                        factor        = 0.8;
 877     real                        efield        = 10;
 878     char                       *opt_elem      = nullptr;
 879     bool                        bRandom       = false;
 880     bool                        bcompress     = false;
 881     bool                        bPrintTable   = false;
 882     bool                        bZero         = true;
 883     bool                        bOptimize     = true;
 884     bool                        bForceOutput  = true;
 885
 886     t_pargs                     pa[]         = {
 887         { "-reinit", FALSE, etINT, {&reinit},
 888           "After this many iterations the search vectors are randomized again. A vlue of 0 means this is never done at all." },
 889         { "-nrun",   FALSE, etINT,  {&nrun},
 890           "This many runs will be done, before each run a complete randomization will be done" },
 891         { "-opt_elem",  FALSE, etSTR, {&opt_elem},
 892           "Space-separated list of atom types to optimize, e.g. \"H C Br\". The other available atom types in gentop.dat are left unmodified. If this variable is not set, all elements will be optimized." },
 893         { "-random", FALSE, etBOOL, {&bRandom},
 894           "Generate completely random starting parameters within the limits set by the options. This will be done at the very first step and before each subsequent run." },
 895         { "-zero", FALSE, etBOOL, {&bZero},
 896           "Use molecules with zero dipole in the fit as well" },
 897         { "-dip_toler", FALSE, etREAL, {&dip_toler},
 898           "Tolerance (Debye) for marking dipole as an outlier in the log file" },
 899         { "-quad_toler", FALSE, etREAL, {&quad_toler},
 900           "Tolerance (Buckingham) for marking quadrupole as an outlier in the log file" },
 901         { "-alpha_toler", FALSE, etREAL, {&alpha_toler},
 902           "Tolerance (A^3) for marking diagonal elements of the polarizability tensor as an outlier in the log file" },
 903         { "-isopol_toler", FALSE, etREAL, {&isopol_toler},
 904           "Tolerance (A^3) for marking isotropic polarizability as an outlier in the log file" },
 905         { "-th_toler", FALSE, etREAL, {&th_toler},
 906           "Minimum angle to be considered a linear A-B-C bond" },
 907         { "-ph_toler", FALSE, etREAL, {&ph_toler},
 908           "Maximum angle to be considered a planar A-B-C/B-C-D torsion" },
 909         { "-compress", FALSE, etBOOL, {&bcompress},
 910           "Compress output XML file" },
 911         { "-btex", FALSE, etBOOL, {&bPrintTable},
 912           "[HIDDEN]Print the latex table for the Gaussian and Slater exponents" },
 913         { "-factor", FALSE, etREAL, {&factor},
 914           "Factor for generating random parameters. Parameters will be taken within the limit factor*x - x/factor" },
 915         { "-efield",  FALSE, etREAL, {&efield},
 916           "The magnitude of the external electeric field to calculate polarizability tensor." },
 917         { "-optimize",     FALSE, etBOOL, {&bOptimize},
 918           "Do parameter optimization when true, or a single calculation otherwise." },
 919         { "-force_output", FALSE, etBOOL, {&bForceOutput},
 920           "Write output even if no new minimum is found" }
 921
 922     };
 923
 924     FILE                       *fp;
 925     gmx_output_env_t           *oenv;
 926     time_t                      my_t;
 927     MolSelect                   gms;
 928
 929     std::vector<t_pargs>        pargs;
 930     for (size_t i = 0; i < asize(pa); i++)
 931     {
 932         pargs.push_back(pa[i]);
 933     }
 934     alexandria::OptACM opt;
 935     opt.add_pargs(&pargs);
 936
 937     if (!parse_common_args(&argc, argv, PCA_CAN_VIEW, NFILE, fnm,
 938                            pargs.size(), pargs.data(),
 939                            asize(desc), desc, 0, nullptr, &oenv))
 940     {
 941         return 0;
 942     }
 943     opt.optionsFinished();
 944     if (MASTER(opt.commrec()))
 945     {
 946         fp = gmx_ffopen(opt2fn("-g", NFILE, fnm), "w");
 947
 948         time(&my_t);
 949         fprintf(fp, "# This file was created %s", ctime(&my_t));
 950         fprintf(fp, "# alexandria is part of GROMACS:\n#\n");
 951         fprintf(fp, "# %s\n#\n", gmx::bromacs().c_str());
 952     }
 953     else
 954     {
 955         fp = nullptr;
 956     }
 957     if (MASTER(opt.commrec()))
 958     {
 959         gms.read(opt2fn_null("-sel", NFILE, fnm));
 960     }
 961
 962     const char *tabfn = opt2fn_null("-table", NFILE, fnm);
 963
 964     opt.Read(fp ? fp : (debug ? debug : nullptr),
 965              opt2fn("-f", NFILE, fnm),
 966              opt2fn_null("-d", NFILE, fnm),
 967              bZero,
 968              opt_elem,
 969              gms,
 970              true,
 971              false,
 972              false,
 973              false,
 974              opt.fitZeta(),
 975              false,
 976              tabfn);
 977
 978     opt.initChargeGeneration();
 979
 980     bool bMinimum = false;
 981     if (bOptimize)
 982     {
 983         if (MASTER(opt.commrec()))
 984         {
 985             opt.InitOpt(factor);
 986         }
 987
 988         bMinimum = opt.optRun(MASTER(opt.commrec()) ? stderr : nullptr,
 989                               fp,
 990                               nrun,
 991                               oenv,
 992                               opt2fn("-conv", NFILE, fnm),
 993                               opt2fn("-epot", NFILE, fnm));
 994     }
 995
 996     if (MASTER(opt.commrec()))
 997     {
 998         if (bMinimum || bForceOutput)
 999         {
1000             auto  iModel = opt.poldata()->getChargeModel();
1001             bool  bPolar = getEemtypePolarizable(iModel);
1002
1003             auto *ic = opt.indexCount();
1004             print_electric_props(fp,
1005                                  opt.mymols(),
1006                                  opt.poldata(),
1007                                  opt.mdlog(),
1008                                  opt.atomprop(),
1009                                  opt.watoms(),
1010                                  opt.hfac(),
1011                                  opt.lot(),
1012                                  tabfn,
1013                                  opt.hwinfo(),
1014                                  opt.qcycle(),
1015                                  opt.maxPot(),
1016                                  opt.qtol(),
1017                                  opt2fn("-qhisto",    NFILE, fnm),
1018                                  opt2fn("-dipcorr",   NFILE, fnm),
1019                                  opt2fn("-mucorr",    NFILE, fnm),
1020                                  opt2fn("-thetacorr", NFILE, fnm),
1021                                  opt2fn("-espcorr",   NFILE, fnm),
1022                                  opt2fn("-alphacorr", NFILE, fnm),
1023                                  opt2fn("-isopol",    NFILE, fnm),
1024                                  opt2fn("-anisopol",  NFILE, fnm),
1025                                  opt2fn("-qcorr",     NFILE, fnm),
1026                                  dip_toler,
1027                                  quad_toler,
1028                                  alpha_toler,
1029                                  isopol_toler,
1030                                  oenv,
1031                                  bPolar,
1032                                  opt.dipole(),
1033                                  opt.quadrupole(),
1034                                  opt.fullTensor(),
1035                                  ic,
1036                                  opt.commrec(),
1037                                  efield);
1038             writePoldata(opt2fn("-o", NFILE, fnm), opt.poldata(), bcompress);
1039             if (bPrintTable)
1040             {
1041                 FILE        *tp;
1042                 tp = gmx_ffopen(opt2fn("-latex", NFILE, fnm), "w");
1043                 alexandria_poldata_eemprops_table(tp, opt.poldata());
1044                 gmx_ffclose(tp);
1045             }
1046         }
1047         else if (!bMinimum)
1048         {
1049             printf("No improved parameters found. Please try again with more iterations.\n");
1050         }
1051         gmx_ffclose(fp);
1052     }
1053     return 0;
1054 }