src/gromacs/awh/biasstate.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements the BiasState class.
  39  *
  40  * \author Viveca Lindahl
  41  * \author Berk Hess <hess@kth.se>
  42  * \ingroup module_awh
  43  */
  44
  45 #include "gmxpre.h"
  46
  47 #include "biasstate.h"
  48
  49 #include <assert.h>
  50
  51 #include <cmath>
  52 #include <cstdio>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57
  58 #include "gromacs/fileio/gmxfio.h"
  59 #include "gromacs/fileio/xvgr.h"
  60 #include "gromacs/gmxlib/network.h"
  61 #include "gromacs/math/utilities.h"
  62 #include "gromacs/mdtypes/awh-history.h"
  63 #include "gromacs/mdtypes/awh-params.h"
  64 #include "gromacs/mdtypes/commrec.h"
  65 #include "gromacs/simd/simd.h"
  66 #include "gromacs/simd/simd_math.h"
  67 #include "gromacs/utility/arrayref.h"
  68 #include "gromacs/utility/exceptions.h"
  69 #include "gromacs/utility/gmxassert.h"
  70 #include "gromacs/utility/smalloc.h"
  71 #include "gromacs/utility/stringutil.h"
  72
  73 #include "grid.h"
  74 #include "pointstate.h"
  75
  76 namespace gmx
  77 {
  78
  79 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
  80 {
  81     GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
  82
  83     /* The PMF is just the negative of the log of the sampled PMF histogram.
  84      * Points with zero target weight are ignored, they will mostly contain noise.
  85      */
  86     for (size_t i = 0; i < points_.size(); i++)
  87     {
  88         pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
  89     }
  90 }
  91
  92 namespace
  93 {
  94
  95 /*! \brief
  96  * Sum PMF over multiple simulations, when requested.
  97  *
  98  * \param[in,out] pointState         The state of the points in the bias.
  99  * \param[in]     numSharedUpdate    The number of biases sharing the histogram.
 100  * \param[in]     multiSimComm       Struct for multi-simulation communication.
 101  */
 102 void sumPmf(gmx::ArrayRef<PointState>  pointState,
 103             int                        numSharedUpdate,
 104             const gmx_multisim_t      *multiSimComm)
 105 {
 106     if (numSharedUpdate == 1)
 107     {
 108         return;
 109     }
 110     GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->nsim == 0, "numSharedUpdate should be a multiple of multiSimComm->nsim");
 111     GMX_ASSERT(numSharedUpdate == multiSimComm->nsim, "Sharing within a simulation is not implemented (yet)");
 112
 113     std::vector<double> buffer(pointState.size());
 114
 115     /* Need to temporarily exponentiate the log weights to sum over simulations */
 116     for (size_t i = 0; i < buffer.size(); i++)
 117     {
 118         buffer[i] = pointState[i].inTargetRegion() ? std::exp(static_cast<float>(pointState[i].logPmfSum())) : 0;
 119     }
 120
 121     gmx_sumd_sim(buffer.size(), buffer.data(), multiSimComm);
 122
 123     /* Take log again to get (non-normalized) PMF */
 124     double normFac = 1.0/numSharedUpdate;
 125     for (size_t i = 0; i < pointState.size(); i++)
 126     {
 127         if (pointState[i].inTargetRegion())
 128         {
 129             pointState[i].setLogPmfSum(-std::log(buffer[i]*normFac));
 130         }
 131     }
 132 }
 133
 134 /*! \brief
 135  * Find the minimum free energy value.
 136  *
 137  * \param[in] pointState  The state of the points.
 138  * \returns the minimum free energy value.
 139  */
 140 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
 141 {
 142     double fMin = GMX_FLOAT_MAX;
 143
 144     for (auto const &ps : pointState)
 145     {
 146         if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
 147         {
 148             fMin = ps.freeEnergy();
 149         }
 150     }
 151
 152     return fMin;
 153 }
 154
 155 /*! \brief
 156  * Find and return the log of the probability weight of a point given a coordinate value.
 157  *
 158  * The unnormalized weight is given by
 159  * w(point|value) = exp(bias(point) - U(value,point)),
 160  * where U is a harmonic umbrella potential.
 161  *
 162  * \param[in] dimParams     The bias dimensions parameters
 163  * \param[in] points        The point state.
 164  * \param[in] grid          The grid.
 165  * \param[in] pointIndex    Point to evaluate probability weight for.
 166  * \param[in] pointBias     Bias for the point (as a log weight).
 167  * \param[in] value         Coordinate value.
 168  * \returns the log of the biased probability weight.
 169  */
 170 double biasedLogWeightFromPoint(const std::vector<DimParams>  &dimParams,
 171                                 const std::vector<PointState> &points,
 172                                 const Grid                    &grid,
 173                                 int                            pointIndex,
 174                                 double                         pointBias,
 175                                 const awh_dvec                 value)
 176 {
 177     double logWeight = c_largeNegativeExponent;
 178
 179     /* Only points in the target reigon have non-zero weight */
 180     if (points[pointIndex].inTargetRegion())
 181     {
 182         logWeight = pointBias;
 183
 184         /* Add potential for all parameter dimensions */
 185         for (size_t d = 0; d < dimParams.size(); d++)
 186         {
 187             double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
 188             logWeight -= 0.5*dimParams[d].betak*dev*dev;
 189         }
 190     }
 191
 192     return logWeight;
 193 }
 194
 195 }   // namespace
 196
 197 void BiasState::calcConvolvedPmf(const std::vector<DimParams>  &dimParams,
 198                                  const Grid                    &grid,
 199                                  std::vector<float>            *convolvedPmf) const
 200 {
 201     size_t numPoints = grid.numPoints();
 202
 203     convolvedPmf->resize(numPoints);
 204
 205     /* Get the PMF to convolve. */
 206     std::vector<float> pmf(numPoints);
 207     getPmf(pmf);
 208
 209     for (size_t m = 0; m < numPoints; m++)
 210     {
 211         double           freeEnergyWeights = 0;
 212         const GridPoint &point             = grid.point(m);
 213         for (auto &neighbor : point.neighbor)
 214         {
 215             /* The negative PMF is a positive bias. */
 216             double biasNeighbor = -pmf[neighbor];
 217
 218             /* Add the convolved PMF weights for the neighbors of this point.
 219                Note that this function only adds point within the target > 0 region.
 220                Sum weights, take the logarithm last to get the free energy. */
 221             double logWeight   = biasedLogWeightFromPoint(dimParams, points_, grid,
 222                                                           neighbor, biasNeighbor,
 223                                                           point.coordValue);
 224             freeEnergyWeights += std::exp(logWeight);
 225         }
 226
 227         GMX_RELEASE_ASSERT(freeEnergyWeights > 0, "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
 228         (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
 229     }
 230 }
 231
 232 namespace
 233 {
 234
 235 /*! \brief
 236  * Updates the target distribution for all points.
 237  *
 238  * The target distribution is always updated for all points
 239  * at the same time.
 240  *
 241  * \param[in,out] pointState  The state of all points.
 242  * \param[in]     params      The bias parameters.
 243  */
 244 void updateTargetDistribution(gmx::ArrayRef<PointState>  pointState,
 245                               const BiasParams          &params)
 246 {
 247     double freeEnergyCutoff = 0;
 248     if (params.eTarget == eawhtargetCUTOFF)
 249     {
 250         freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
 251     }
 252
 253     double sumTarget = 0;
 254     for (PointState &ps : pointState)
 255     {
 256         sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
 257     }
 258     GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
 259
 260     /* Normalize to 1 */
 261     double invSum = 1.0/sumTarget;
 262     for (PointState &ps : pointState)
 263     {
 264         ps.scaleTarget(invSum);
 265     }
 266 }
 267
 268 /*! \brief
 269  * Puts together a string describing a grid point.
 270  *
 271  * \param[in] grid         The grid.
 272  * \param[in] point        Grid point index.
 273  * \returns a string for the point.
 274  */
 275 std::string gridPointValueString(const Grid &grid, int point)
 276 {
 277     std::string pointString;
 278
 279     pointString += "(";
 280
 281     for (int d = 0; d < grid.numDimensions(); d++)
 282     {
 283         pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
 284         if (d < grid.numDimensions() - 1)
 285         {
 286             pointString += ",";
 287         }
 288         else
 289         {
 290             pointString += ")";
 291         }
 292     }
 293
 294     return pointString;
 295 }
 296
 297 }   // namespace
 298
 299 int BiasState::warnForHistogramAnomalies(const Grid  &grid,
 300                                          int          biasIndex,
 301                                          double       t,
 302                                          FILE        *fplog,
 303                                          int          maxNumWarnings) const
 304 {
 305     const double maxHistogramRatio = 0.5;  /* Tolerance for printing a warning about the histogram ratios */
 306
 307     /* Sum up the histograms and get their normalization */
 308     double sumVisits  = 0;
 309     double sumWeights = 0;
 310     for (auto &pointState : points_)
 311     {
 312         if (pointState.inTargetRegion())
 313         {
 314             sumVisits  += pointState.numVisitsTot();
 315             sumWeights += pointState.weightSumTot();
 316         }
 317     }
 318     GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
 319     GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
 320     double invNormVisits = 1.0/sumVisits;
 321     double invNormWeight = 1.0/sumWeights;
 322
 323     /* Check all points for warnings */
 324     int         numWarnings  = 0;
 325     size_t      numPoints    = grid.numPoints();
 326     for (size_t m = 0; m < numPoints; m++)
 327     {
 328         /* Skip points close to boundary or non-target region */
 329         const GridPoint &gridPoint = grid.point(m);
 330         bool             skipPoint = false;
 331         for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
 332         {
 333             int neighbor = gridPoint.neighbor[n];
 334             skipPoint    = !points_[neighbor].inTargetRegion();
 335             for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
 336             {
 337                 const GridPoint &neighborPoint = grid.point(neighbor);
 338                 skipPoint =
 339                     neighborPoint.index[d] == 0 ||
 340                     neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
 341             }
 342         }
 343
 344         /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
 345         const double relativeWeight = points_[m].weightSumTot()*invNormWeight;
 346         const double relativeVisits = points_[m].numVisitsTot()*invNormVisits;
 347         if (!skipPoint &&
 348             relativeVisits < relativeWeight*maxHistogramRatio)
 349         {
 350             std::string pointValueString = gridPointValueString(grid, m);
 351             std::string warningMessage   =
 352                 gmx::formatString("\nawh%d warning: "
 353                                   "at t = %g ps the obtained coordinate distribution at coordinate value %s "
 354                                   "is less than a fraction %g of the reference distribution at that point. "
 355                                   "If you are not certain about your settings you might want to increase your pull force constant or "
 356                                   "modify your sampling region.\n",
 357                                   biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
 358             gmx::TextLineWrapper wrapper;
 359             wrapper.settings().setLineLength(c_linewidth);
 360             fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
 361
 362             numWarnings++;
 363         }
 364         if (numWarnings >= maxNumWarnings)
 365         {
 366             break;
 367         }
 368     }
 369
 370     return numWarnings;
 371 }
 372
 373 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams> &dimParams,
 374                                                 const Grid                   &grid,
 375                                                 int                           point,
 376                                                 awh_dvec                      force) const
 377 {
 378     double potential = 0;
 379     for (size_t d = 0; d < dimParams.size(); d++)
 380     {
 381         double deviation = getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
 382
 383         double k         = dimParams[d].k;
 384
 385         /* Force from harmonic potential 0.5*k*dev^2 */
 386         force[d]   = -k*deviation;
 387         potential += 0.5*k*deviation*deviation;
 388     }
 389
 390     return potential;
 391 }
 392
 393 void BiasState::calcConvolvedForce(const std::vector<DimParams> &dimParams,
 394                                    const Grid                   &grid,
 395                                    gmx::ArrayRef<const double>   probWeightNeighbor,
 396                                    awh_dvec                      force) const
 397 {
 398     for (size_t d = 0; d < dimParams.size(); d++)
 399     {
 400         force[d] = 0;
 401     }
 402
 403     /* Only neighboring points have non-negligible contribution. */
 404     const std::vector<int> &neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
 405     for (size_t n = 0; n < neighbor.size(); n++)
 406     {
 407         double weightNeighbor = probWeightNeighbor[n];
 408         int    indexNeighbor  = neighbor[n];
 409
 410         /* Get the umbrella force from this point. The returned potential is ignored here. */
 411         awh_dvec forceFromNeighbor;
 412         calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor,
 413                                       forceFromNeighbor);
 414
 415         /* Add the weighted umbrella force to the convolved force. */
 416         for (size_t d = 0; d < dimParams.size(); d++)
 417         {
 418             force[d] += forceFromNeighbor[d]*weightNeighbor;
 419         }
 420     }
 421 }
 422
 423 double BiasState::moveUmbrella(const std::vector<DimParams> &dimParams,
 424                                const Grid                   &grid,
 425                                gmx::ArrayRef<const double>   probWeightNeighbor,
 426                                awh_dvec                      biasForce,
 427                                gmx_int64_t                   step,
 428                                gmx_int64_t                   seed,
 429                                int                           indexSeed)
 430 {
 431     /* Generate and set a new coordinate reference value */
 432     coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor, step, seed, indexSeed);
 433
 434     awh_dvec newForce;
 435     double   newPotential =
 436         calcUmbrellaForceAndPotential(dimParams, grid,  coordState_.umbrellaGridpoint(), newForce);
 437
 438     /*  A modification of the reference value at time t will lead to a different
 439         force over t-dt/2 to t and over t to t+dt/2. For high switching rates
 440         this means the force and velocity will change signs roughly as often.
 441         To avoid any issues we take the average of the previous and new force
 442         at steps when the reference value has been moved. E.g. if the ref. value
 443         is set every step to (coord dvalue +/- delta) would give zero force.
 444      */
 445     for (size_t d = 0; d < dimParams.size(); d++)
 446     {
 447         /* clang thinks newForce[d] can be garbage */
 448  #ifndef __clang_analyzer__
 449         /* Average of the current and new force */
 450         biasForce[d] = 0.5*(biasForce[d] + newForce[d]);
 451  #endif
 452     }
 453
 454     return newPotential;
 455 }
 456
 457 namespace
 458 {
 459
 460 /*! \brief
 461  * Sets the histogram rescaling factors needed to control the histogram size.
 462  *
 463  * For sake of robustness, the reference weight histogram can grow at a rate
 464  * different from the actual sampling rate. Typically this happens for a limited
 465  * initial time, alternatively growth is scaled down by a constant factor for all
 466  * times. Since the size of the reference histogram sets the size of the free
 467  * energy update this should be reflected also in the PMF. Thus the PMF histogram
 468  * needs to be rescaled too.
 469  *
 470  * This function should only be called by the bias update function or wrapped by a function that
 471  * knows what scale factors should be applied when, e.g,
 472  * getSkippedUpdateHistogramScaleFactors().
 473  *
 474  * \param[in]  params             The bias parameters.
 475  * \param[in]  newHistogramSize   New reference weight histogram size.
 476  * \param[in]  oldHistogramSize   Previous reference weight histogram size (before adding new samples).
 477  * \param[out] weightHistScaling  Scaling factor for the reference weight histogram.
 478  * \param[out] logPmfSumScaling   Log of the scaling factor for the PMF histogram.
 479  */
 480 void setHistogramUpdateScaleFactors(const BiasParams &params,
 481                                     double            newHistogramSize,
 482                                     double            oldHistogramSize,
 483                                     double           *weightHistScaling,
 484                                     double           *logPmfSumScaling)
 485 {
 486
 487     /* The two scaling factors below are slightly different (ignoring the log factor) because the
 488        reference and the PMF histogram apply weight scaling differently. The weight histogram
 489        applies is  locally, i.e. each sample is scaled down meaning all samples get equal weight.
 490        It is done this way because that is what target type local Boltzmann (for which
 491        target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
 492        by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
 493        1) empirically this is necessary for converging the PMF; 2) since the extraction of
 494        the PMF is theoretically only valid for a constant bias, new samples should get more
 495        weight than old ones for which the bias is fluctuating more. */
 496     *weightHistScaling = newHistogramSize/(oldHistogramSize + params.updateWeight*params.localWeightScaling);
 497     *logPmfSumScaling  = std::log(newHistogramSize/(oldHistogramSize + params.updateWeight));
 498 }
 499
 500 }   // namespace
 501
 502 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams &params,
 503                                                       double           *weightHistScaling,
 504                                                       double           *logPmfSumScaling) const
 505 {
 506     GMX_ASSERT(params.skipUpdates(), "Calling function for skipped updates when skipping updates is not allowed");
 507
 508     if (inInitialStage())
 509     {
 510         /* In between global updates the reference histogram size is kept constant so we trivially know what the
 511             histogram size was at the time of the skipped update. */
 512         double histogramSize = histogramSize_.histogramSize();
 513         setHistogramUpdateScaleFactors(params, histogramSize, histogramSize,
 514                                        weightHistScaling, logPmfSumScaling);
 515     }
 516     else
 517     {
 518         /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
 519         *weightHistScaling = 1;
 520         *logPmfSumScaling  = 0;
 521     }
 522 }
 523
 524 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams &params)
 525 {
 526     double weightHistScaling;
 527     double logPmfsumScaling;
 528
 529     getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
 530
 531     for (auto &pointState : points_)
 532     {
 533         bool didUpdate = pointState.performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
 534
 535         /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
 536         if (didUpdate)
 537         {
 538             pointState.updateBias();
 539         }
 540     }
 541 }
 542
 543 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams &params,
 544                                                const Grid       &grid)
 545 {
 546     double weightHistScaling;
 547     double logPmfsumScaling;
 548
 549     getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
 550
 551     /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
 552     const std::vector<int> &neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
 553     for (auto &neighbor : neighbors)
 554     {
 555         bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
 556
 557         if (didUpdate)
 558         {
 559             points_[neighbor].updateBias();
 560         }
 561     }
 562 }
 563
 564 namespace
 565 {
 566
 567 /*! \brief
 568  * Merge update lists from multiple sharing simulations.
 569  *
 570  * \param[in,out] updateList    Update list for this simulation (assumed >= npoints long).
 571  * \param[in]     numPoints     Total number of points.
 572  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 573  */
 574 void mergeSharedUpdateLists(std::vector<int>     *updateList,
 575                             int                   numPoints,
 576                             const gmx_multisim_t *multiSimComm)
 577 {
 578     std::vector<int> numUpdatesOfPoint;
 579
 580     /* Flag the update points of this sim.
 581        TODO: we can probably avoid allocating this array and just use the input array. */
 582     numUpdatesOfPoint.resize(numPoints, 0);
 583     for (auto &pointIndex : *updateList)
 584     {
 585         numUpdatesOfPoint[pointIndex] = 1;
 586     }
 587
 588     /* Sum over the sims to get all the flagged points */
 589     gmx_sumi_sim(numPoints, numUpdatesOfPoint.data(), multiSimComm);
 590
 591     /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
 592     updateList->clear();
 593     for (int m = 0; m < numPoints; m++)
 594     {
 595         if (numUpdatesOfPoint[m] > 0)
 596         {
 597             updateList->push_back(m);
 598         }
 599     }
 600 }
 601
 602 /*! \brief
 603  * Generate an update list of points sampled since the last update.
 604  *
 605  * \param[in] grid              The AWH bias.
 606  * \param[in] points            The point state.
 607  * \param[in] originUpdatelist  The origin of the rectangular region that has been sampled since last update.
 608  * \param[in] endUpdatelist     The end of the rectangular that has been sampled since last update.
 609  * \param[in,out] updateList    Local update list to set (assumed >= npoints long).
 610  */
 611 void makeLocalUpdateList(const Grid                    &grid,
 612                          const std::vector<PointState> &points,
 613                          const awh_ivec                 originUpdatelist,
 614                          const awh_ivec                 endUpdatelist,
 615                          std::vector<int>              *updateList)
 616 {
 617     awh_ivec origin;
 618     awh_ivec numPoints;
 619
 620     /* Define the update search grid */
 621     for (int d = 0; d < grid.numDimensions(); d++)
 622     {
 623         origin[d]    = originUpdatelist[d];
 624         numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
 625
 626         /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
 627            This helps for calculating the distance/number of points but should be removed and fixed when the way of
 628            updating origin/end updatelist is changed (see sampleProbabilityWeights). */
 629         numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
 630     }
 631
 632     /* Make the update list */
 633     updateList->clear();
 634     int  pointIndex  = -1;
 635     bool pointExists = true;
 636     while (pointExists)
 637     {
 638         pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
 639
 640         if (pointExists && points[pointIndex].inTargetRegion())
 641         {
 642             updateList->push_back(pointIndex);
 643         }
 644     }
 645 }
 646
 647 }   // namespace
 648
 649 void BiasState::resetLocalUpdateRange(const Grid &grid)
 650 {
 651     const int gridpointIndex = coordState_.gridpointIndex();
 652     for (int d = 0; d < grid.numDimensions(); d++)
 653     {
 654         /* This gives the  minimum range consisting only of the current closest point. */
 655         originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
 656         endUpdatelist_[d]    = grid.point(gridpointIndex).index[d];
 657     }
 658 }
 659
 660 namespace
 661 {
 662
 663 /*! \brief
 664  * Add partial histograms (accumulating between updates) to accumulating histograms.
 665  *
 666  * \param[in,out] pointState         The state of the points in the bias.
 667  * \param[in,out] weightSumCovering  The weights for checking covering.
 668  * \param[in]     numSharedUpdate    The number of biases sharing the histrogram.
 669  * \param[in]     multiSimComm       Struct for multi-simulation communication.
 670  * \param[in]     localUpdateList    List of points with data.
 671  */
 672 void sumHistograms(gmx::ArrayRef<PointState>  pointState,
 673                    gmx::ArrayRef<double>      weightSumCovering,
 674                    int                        numSharedUpdate,
 675                    const gmx_multisim_t      *multiSimComm,
 676                    const std::vector<int>    &localUpdateList)
 677 {
 678     /* The covering checking histograms are added before summing over simulations, so that the weights from different
 679        simulations are kept distinguishable. */
 680     for (int globalIndex : localUpdateList)
 681     {
 682         weightSumCovering[globalIndex] +=
 683             pointState[globalIndex].weightSumIteration();
 684     }
 685
 686     /* Sum histograms over multiple simulations if needed. */
 687     if (numSharedUpdate > 1)
 688     {
 689         GMX_ASSERT(numSharedUpdate == multiSimComm->nsim, "Sharing within a simulation is not implemented (yet)");
 690
 691         /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
 692         std::vector<double> weightSum;
 693         std::vector<double> coordVisits;
 694
 695         weightSum.resize(localUpdateList.size());
 696         coordVisits.resize(localUpdateList.size());
 697
 698         for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
 699         {
 700             const PointState &ps = pointState[localUpdateList[localIndex]];
 701
 702             weightSum[localIndex]   = ps.weightSumIteration();
 703             coordVisits[localIndex] = ps.numVisitsIteration();
 704         }
 705
 706         gmx_sumd_sim(weightSum.size(), weightSum.data(), multiSimComm);
 707         gmx_sumd_sim(coordVisits.size(), coordVisits.data(), multiSimComm);
 708
 709         /* Transfer back the result */
 710         for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
 711         {
 712             PointState &ps = pointState[localUpdateList[localIndex]];
 713
 714             ps.setPartialWeightAndCount(weightSum[localIndex],
 715                                         coordVisits[localIndex]);
 716         }
 717     }
 718
 719     /* Now add the partial counts and weights to the accumulating histograms.
 720        Note: we still need to use the weights for the update so we wait
 721        with resetting them until the end of the update. */
 722     for (int globalIndex : localUpdateList)
 723     {
 724         pointState[globalIndex].addPartialWeightAndCount();
 725     }
 726 }
 727
 728 /*! \brief
 729  * Label points along an axis as covered or not.
 730  *
 731  * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
 732  *
 733  * \param[in]     visited        Visited? For each point.
 734  * \param[in]     checkCovering  Check for covering? For each point.
 735  * \param[in]     numPoints      The number of grid points along this dimension.
 736  * \param[in]     period         Period in number of points.
 737  * \param[in]     coverRadius    Cover radius, in points, needed for defining a point as covered.
 738  * \param[in,out] covered        In this array elements are 1 for covered points and 0 for non-covered points, this routine assumes that \p covered has at least size \p numPoints.
 739  */
 740 void labelCoveredPoints(const std::vector<bool> &visited,
 741                         const std::vector<bool> &checkCovering,
 742                         int                      numPoints,
 743                         int                      period,
 744                         int                      coverRadius,
 745                         gmx::ArrayRef<int>       covered)
 746 {
 747     GMX_ASSERT(covered.size() >= static_cast<size_t>(numPoints), "covered should be at least as large as the grid");
 748
 749     bool haveFirstNotVisited = false;
 750     int  firstNotVisited     = -1;
 751     int  notVisitedLow       = -1;
 752     int  notVisitedHigh      = -1;
 753
 754     for (int n = 0; n < numPoints; n++)
 755     {
 756         if (checkCovering[n] && !visited[n])
 757         {
 758             if (!haveFirstNotVisited)
 759             {
 760                 notVisitedLow       = n;
 761                 firstNotVisited     = n;
 762                 haveFirstNotVisited = true;
 763             }
 764             else
 765             {
 766                 notVisitedHigh = n;
 767
 768                 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
 769                    by unvisited points. The unvisted end points affect the coveredness of the visited
 770                    with a reach equal to the cover radius. */
 771                 int notCoveredLow  = notVisitedLow + coverRadius;
 772                 int notCoveredHigh = notVisitedHigh - coverRadius;
 773                 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
 774                 {
 775                     covered[i] = (i > notCoveredLow) && (i < notCoveredHigh);
 776                 }
 777
 778                 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval the
 779                    notVisitedLow of the next. */
 780                 notVisitedLow = notVisitedHigh;
 781             }
 782         }
 783     }
 784
 785     /* Have labelled all the internal points. Now take care of the boundary regions. */
 786     if (!haveFirstNotVisited)
 787     {
 788         /* No non-visited points <=> all points visited => all points covered. */
 789
 790         for (int n = 0; n < numPoints; n++)
 791         {
 792             covered[n] = 1;
 793         }
 794     }
 795     else
 796     {
 797         int lastNotVisited = notVisitedLow;
 798
 799         /* For periodic boundaries, non-visited points can influence points
 800            on the other side of the boundary so we need to wrap around. */
 801
 802         /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
 803            For non-periodic boundaries there is no lower end point so a dummy value is used. */
 804         int notVisitedHigh = firstNotVisited;
 805         int notVisitedLow  = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
 806
 807         int notCoveredLow  =  notVisitedLow + coverRadius;
 808         int notCoveredHigh = notVisitedHigh - coverRadius;
 809
 810         for (int i = 0; i <= notVisitedHigh; i++)
 811         {
 812             /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
 813             covered[i] = (i > notCoveredLow) && (i < notCoveredHigh);
 814         }
 815
 816         /* Upper end. Same as for lower end but in the other direction. */
 817         notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
 818         notVisitedLow  = lastNotVisited;
 819
 820         notCoveredLow  = notVisitedLow + coverRadius;
 821         notCoveredHigh = notVisitedHigh - coverRadius;
 822
 823         for (int i = notVisitedLow; i <= numPoints - 1; i++)
 824         {
 825             /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
 826             covered[i] = (i > notCoveredLow) && (i < notCoveredHigh);
 827         }
 828     }
 829 }
 830
 831 }   // namespace
 832
 833 bool BiasState::isSamplingRegionCovered(const BiasParams             &params,
 834                                         const std::vector<DimParams> &dimParams,
 835                                         const Grid                   &grid,
 836                                         const gmx_multisim_t         *multiSimComm) const
 837 {
 838     /* Allocate and initialize arrays: one for checking visits along each dimension,
 839        one for keeping track of which points to check and one for the covered points.
 840        Possibly these could be kept as AWH variables to avoid these allocations. */
 841     struct CheckDim
 842     {
 843         std::vector<bool> visited;
 844         std::vector<bool> checkCovering;
 845         // We use int for the covering array since we might use gmx_sumi_sim.
 846         std::vector<int>  covered;
 847     };
 848
 849     std::vector<CheckDim> checkDim;
 850     checkDim.resize(grid.numDimensions());
 851
 852     for (int d = 0; d < grid.numDimensions(); d++)
 853     {
 854         const size_t numPoints = grid.axis(d).numPoints();
 855         checkDim[d].visited.resize(numPoints, false);
 856         checkDim[d].checkCovering.resize(numPoints, false);
 857         checkDim[d].covered.resize(numPoints, 0);
 858     }
 859
 860     /* Set visited points along each dimension and which points should be checked for covering.
 861        Specifically, points above the free energy cutoff (if there is one) or points outside
 862        of the target region are ignored. */
 863
 864     /* Set the free energy cutoff */
 865     double maxFreeEnergy = GMX_FLOAT_MAX;
 866
 867     if (params.eTarget == eawhtargetCUTOFF)
 868     {
 869         maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
 870     }
 871
 872     /* Set the threshold weight for a point to be considered visited. */
 873     double weightThreshold = 1;
 874     for (int d = 0; d < grid.numDimensions(); d++)
 875     {
 876         weightThreshold *= grid.axis(d).spacing()*std::sqrt(dimParams[d].betak*0.5*M_1_PI);
 877     }
 878
 879     /* Project the sampling weights onto each dimension */
 880     for (size_t m = 0; m < grid.numPoints(); m++)
 881     {
 882         const PointState &pointState = points_[m];
 883
 884         for (int d = 0; d < grid.numDimensions(); d++)
 885         {
 886             int n = grid.point(m).index[d];
 887
 888             /* Is visited if it was already visited or if there is enough weight at the current point */
 889             checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
 890
 891             /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
 892             checkDim[d].checkCovering[n] = checkDim[d].checkCovering[n] || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
 893         }
 894     }
 895
 896     /* Label each point along each dimension as covered or not. */
 897     for (int d = 0; d < grid.numDimensions(); d++)
 898     {
 899         labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(), grid.axis(d).numPointsInPeriod(),
 900                            params.coverRadius()[d], checkDim[d].covered);
 901     }
 902
 903     /* Now check for global covering. Each dimension needs to be covered separately.
 904        A dimension is covered if each point is covered.  Multiple simulations collectively
 905        cover the points, i.e. a point is covered if any of the simulations covered it.
 906        However, visited points are not shared, i.e. if a point is covered or not is
 907        determined by the visits of a single simulation. In general the covering criterion is
 908        all points covered => all points are surrounded by visited points up to a radius = coverRadius.
 909        For 1 simulation, all points covered <=> all points visited. For multiple simulations
 910        however, all points visited collectively !=> all points covered, except for coverRadius = 0.
 911        In the limit of large coverRadius, all points covered => all points visited by at least one
 912        simulation (since no point will be covered until all points have been visited by a
 913        single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
 914        needs with surrounding points before sharing covering information with other simulations. */
 915
 916     /* Communicate the covered points between sharing simulations if needed. */
 917     if (params.numSharedUpdate > 1)
 918     {
 919         /* For multiple dimensions this may not be the best way to do it. */
 920         for (int d = 0; d < grid.numDimensions(); d++)
 921         {
 922             gmx_sumi_sim(grid.axis(d).numPoints(), checkDim[d].covered.data(), multiSimComm);
 923         }
 924     }
 925
 926     /* Now check if for each dimension all points are covered. Break if not true. */
 927     bool allPointsCovered = true;
 928     for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
 929     {
 930         for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
 931         {
 932             allPointsCovered = checkDim[d].covered[n];
 933         }
 934     }
 935
 936     return allPointsCovered;
 937 }
 938
 939 /*! \brief
 940  * Normalizes the free energy and PMF sum.
 941  *
 942  * \param[in] pointState  The state of the points.
 943  */
 944 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState> *pointState)
 945 {
 946     double minF = freeEnergyMinimumValue(*pointState);
 947
 948     for (PointState &ps : *pointState)
 949     {
 950         ps.normalizeFreeEnergyAndPmfSum(minF);
 951     }
 952 }
 953
 954 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams> &dimParams,
 955                                                          const Grid                   &grid,
 956                                                          const BiasParams             &params,
 957                                                          const gmx_multisim_t         *multiSimComm,
 958                                                          double                        t,
 959                                                          gmx_int64_t                   step,
 960                                                          FILE                         *fplog,
 961                                                          std::vector<int>             *updateList)
 962 {
 963     /* Note hat updateList is only used in this scope and is always
 964      * re-initialized. We do not use a local vector, because that would
 965      * cause reallocation every time this funtion is called and the vector
 966      * can be the size of the whole grid.
 967      */
 968
 969     /* Make a list of all local points, i.e. those that could have been touched since
 970        the last update. These are the points needed for summing histograms below
 971        (non-local points only add zeros). For local updates, this will also be the
 972        final update list. */
 973     makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_,
 974                         updateList);
 975     if (params.numSharedUpdate > 1)
 976     {
 977         mergeSharedUpdateLists(updateList, points_.size(), multiSimComm);
 978     }
 979
 980     /* Reset the range for the next update */
 981     resetLocalUpdateRange(grid);
 982
 983     /* Add samples to histograms for all local points and sync simulations if needed */
 984     sumHistograms(points_, weightSumCovering_,
 985                   params.numSharedUpdate, multiSimComm, *updateList);
 986
 987     sumPmf(points_, params.numSharedUpdate, multiSimComm);
 988
 989     /* Renormalize the free energy if values are too large. */
 990     bool needToNormalizeFreeEnergy = false;
 991     for (int &globalIndex : *updateList)
 992     {
 993         /* We want to keep the absolute value of the free energies to be less c_largePositiveExponent
 994            to be able to safely pass these values to exp(). The check below ensures this as long as
 995            the free energy values grow less than 0.5*c_largePositiveExponent in a return time to this
 996            neighborhood. For reasonable update sizes it's unlikely that this requirement would be
 997            broken. */
 998         if (std::abs(points_[globalIndex].freeEnergy()) > 0.5*c_largePositiveExponent)
 999         {
1000             needToNormalizeFreeEnergy = true;
1001             break;
1002         }
1003     }
1004
1005     /* Update target distribution? */
1006     bool needToUpdateTargetDistribution =
1007         (params.eTarget != eawhtargetCONSTANT &&
1008          params.isUpdateTargetStep(step));
1009
1010     /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1011     bool detectedCovering = false;
1012     if (inInitialStage())
1013     {
1014         detectedCovering = (params.isCheckStep(points_.size(), step) &&
1015                             isSamplingRegionCovered(params, dimParams, grid, multiSimComm));
1016     }
1017
1018     /* The weighthistogram size after this update. */
1019     double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_, weightSumCovering_, fplog);
1020
1021     /* Make the update list. Usually we try to only update local points,
1022      * but if the update has non-trivial or non-deterministic effects
1023      * on non-local points a global update is needed. This is the case when:
1024      * 1) a covering occurred in the initial stage, leading to non-trivial
1025      *    histogram rescaling factors; or
1026      * 2) the target distribution will be updated, since we don't make any
1027      *    assumption on its form; or
1028      * 3) the AWH parameters are such that we never attempt to skip non-local
1029      *    updates; or
1030      * 4) the free energy values have grown so large that a renormalization
1031      *    is needed.
1032      */
1033     if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1034     {
1035         /* Global update, just add all points. */
1036         updateList->clear();
1037         for (size_t m = 0; m < points_.size(); m++)
1038         {
1039             if (points_[m].inTargetRegion())
1040             {
1041                 updateList->push_back(m);
1042             }
1043         }
1044     }
1045
1046     /* Set histogram scale factors. */
1047     double weightHistScalingSkipped = 0;
1048     double logPmfsumScalingSkipped  = 0;
1049     if (params.skipUpdates())
1050     {
1051         getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1052     }
1053     double weightHistScalingNew;
1054     double logPmfsumScalingNew;
1055     setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1056                                    &weightHistScalingNew, &logPmfsumScalingNew);
1057
1058     /* Update free energy and reference weight histogram for points in the update list. */
1059     for (int pointIndex : *updateList)
1060     {
1061         PointState *pointStateToUpdate = &points_[pointIndex];
1062
1063         /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1064         if (params.skipUpdates())
1065         {
1066             pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScalingSkipped, logPmfsumScalingSkipped);
1067         }
1068
1069         /* Now do an update with new sampling data. */
1070         pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(), weightHistScalingNew, logPmfsumScalingNew);
1071     }
1072
1073     /* Only update the histogram size after we are done with the local point updates */
1074     histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1075
1076     if (needToNormalizeFreeEnergy)
1077     {
1078         normalizeFreeEnergyAndPmfSum(&points_);
1079     }
1080
1081     if (needToUpdateTargetDistribution)
1082     {
1083         /* The target distribution is always updated for all points at once. */
1084         updateTargetDistribution(points_, params);
1085     }
1086
1087     /* Update the bias. The bias is updated separately and last since it simply a function of
1088        the free energy and the target distribution and we want to avoid doing extra work. */
1089     for (int pointIndex : *updateList)
1090     {
1091         points_[pointIndex].updateBias();
1092     }
1093
1094     /* Increase the update counter. */
1095     histogramSize_.incrementNumUpdates();
1096 }
1097
1098 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams>                      &dimParams,
1099                                                            const Grid                                        &grid,
1100                                                            std::vector < double, AlignedAllocator < double>> *weight) const
1101 {
1102     /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1103     const std::vector<int> &neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1104
1105 #if GMX_SIMD_HAVE_DOUBLE
1106     typedef SimdDouble PackType;
1107     constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1108 #else
1109     typedef double PackType;
1110     constexpr int packSize = 1;
1111 #endif
1112     /* Round the size of the weight array up to packSize */
1113     const int     weightSize = ((neighbors.size() + packSize - 1)/packSize)*packSize;
1114     weight->resize(weightSize);
1115
1116     double * gmx_restrict weightData = weight->data();
1117     PackType              weightSumPack(0.0);
1118     for (size_t i = 0; i < neighbors.size(); i += packSize)
1119     {
1120         for (size_t n = i; n < i + packSize; n++)
1121         {
1122             if (n < neighbors.size())
1123             {
1124                 const int neighbor = neighbors[n];
1125                 (*weight)[n] = biasedLogWeightFromPoint(dimParams, points_, grid,
1126                                                         neighbor, points_[neighbor].bias(),
1127                                                         coordState_.coordValue());
1128             }
1129             else
1130             {
1131                 /* Pad with values that don't affect the result */
1132                 (*weight)[n] = c_largeNegativeExponent;
1133             }
1134         }
1135         PackType weightPack = load<PackType>(weightData + i);
1136         weightPack          = gmx::exp(weightPack);
1137         weightSumPack       = weightSumPack + weightPack;
1138         store(weightData + i, weightPack);
1139     }
1140     /* Sum of probability weights */
1141     double weightSum    = reduce(weightSumPack);
1142     GMX_RELEASE_ASSERT(weightSum > 0, "zero probability weight when updating AWH probability weights.");
1143
1144     /* Normalize probabilities to sum to 1 */
1145     double invWeightSum = 1/weightSum;
1146     for (double &w : *weight)
1147     {
1148         w *= invWeightSum;
1149     }
1150
1151     /* Return the convolved bias */
1152     return std::log(weightSum);
1153 }
1154
1155 double BiasState::calcConvolvedBias(const std::vector<DimParams>  &dimParams,
1156                                     const Grid                    &grid,
1157                                     const awh_dvec                &coordValue) const
1158 {
1159     int              point      = grid.nearestIndex(coordValue);
1160     const GridPoint &gridPoint  = grid.point(point);
1161
1162     /* Sum the probability weights from the neighborhood of the given point */
1163     double weightSum = 0;
1164     for (int neighbor : gridPoint.neighbor)
1165     {
1166         double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid,
1167                                                     neighbor, points_[neighbor].bias(),
1168                                                     coordValue);
1169         weightSum       += std::exp(logWeight);
1170     }
1171
1172     /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1173     return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1174 }
1175
1176 void BiasState::sampleProbabilityWeights(const Grid                  &grid,
1177                                          gmx::ArrayRef<const double>  probWeightNeighbor)
1178 {
1179     const std::vector<int> &neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1180
1181     /* Save weights for next update */
1182     for (size_t n = 0; n < neighbor.size(); n++)
1183     {
1184         points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1185     }
1186
1187     /* Update the local update range. Two corner points define this rectangular
1188      * domain. We need to choose two new corner points such that the new domain
1189      * contains both the old update range and the current neighborhood.
1190      * In the simplest case when an update is performed every sample,
1191      * the update range would simply equal the current neighborhood.
1192      */
1193     int neighborStart = neighbor[0];
1194     int neighborLast  = neighbor[neighbor.size() - 1];
1195     for (int d = 0; d < grid.numDimensions(); d++)
1196     {
1197         int origin  = grid.point(neighborStart).index[d];
1198         int last    = grid.point(neighborLast).index[d];
1199
1200         if (origin > last)
1201         {
1202             /* Unwrap if wrapped around the boundary (only happens for periodic
1203              * boundaries). This has been already for the stored index interval.
1204              */
1205             /* TODO: what we want to do is to find the smallest the update
1206              * interval that contains all points that need to be updated.
1207              * This amounts to combining two intervals, the current
1208              * [origin, end] update interval and the new touched neighborhood
1209              * into a new interval that contains all points from both the old
1210              * intervals.
1211              *
1212              * For periodic boundaries it becomes slightly more complicated
1213              * than for closed boundaries because then it needs not be
1214              * true that origin < end (so one can't simply relate the origin/end
1215              * in the min()/max() below). The strategy here is to choose the
1216              * origin closest to a reference point (index 0) and then unwrap
1217              * the end index if needed and choose the largest end index.
1218              * This ensures that both intervals are in the new interval
1219              * but it's not necessarily the smallest.
1220              * Currently we solve this by going through each possibility
1221              * and checking them.
1222              */
1223             last += grid.axis(d).numPointsInPeriod();
1224         }
1225
1226         originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1227         endUpdatelist_[d]    = std::max(endUpdatelist_[d], last);
1228     }
1229 }
1230
1231 void BiasState::sampleCoordAndPmf(const Grid                  &grid,
1232                                   gmx::ArrayRef<const double>  probWeightNeighbor,
1233                                   double                       convolvedBias)
1234 {
1235     /* Sampling-based deconvolution extracting the PMF.
1236      * Update the PMF histogram with the current coordinate value.
1237      *
1238      * Because of the finite width of the harmonic potential, the free energy
1239      * defined for each coordinate point does not exactly equal that of the
1240      * actual coordinate, the PMF. However, the PMF can be estimated by applying
1241      * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1242      * reweighted histogram of the coordinate value. Strictly, this relies on
1243      * the unknown normalization constant Z being either constant or known. Here,
1244      * neither is true except in the long simulation time limit. Empirically however,
1245      * it works (mainly because how the PMF histogram is rescaled).
1246      */
1247
1248     /* Only save coordinate data that is in range (the given index is always
1249      * in range even if the coordinate value is not).
1250      */
1251     if (grid.covers(coordState_.coordValue()))
1252     {
1253         /* Save PMF sum and keep a histogram of the sampled coordinate values */
1254         points_[coordState_.gridpointIndex()].samplePmf(convolvedBias);
1255     }
1256
1257     /* Save probability weights for the update */
1258     sampleProbabilityWeights(grid, probWeightNeighbor);
1259 }
1260
1261 void BiasState::initHistoryFromState(AwhBiasHistory *biasHistory) const
1262 {
1263     biasHistory->pointState.resize(points_.size());
1264 }
1265
1266 void BiasState::updateHistory(AwhBiasHistory *biasHistory,
1267                               const Grid     &grid) const
1268 {
1269     GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(), "The AWH history setup does not match the AWH state.");
1270
1271     AwhBiasStateHistory *stateHistory = &biasHistory->state;
1272     stateHistory->umbrellaGridpoint   = coordState_.umbrellaGridpoint();
1273
1274     for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1275     {
1276         AwhPointStateHistory *psh = &biasHistory->pointState[m];
1277
1278         points_[m].storeState(psh);
1279
1280         psh->weightsum_covering = weightSumCovering_[m];
1281     }
1282
1283     histogramSize_.storeState(stateHistory);
1284
1285     stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid,
1286                                                                       originUpdatelist_);
1287     stateHistory->end_index_updatelist    = multiDimGridIndexToLinear(grid,
1288                                                                       endUpdatelist_);
1289 }
1290
1291 void BiasState::restoreFromHistory(const AwhBiasHistory &biasHistory,
1292                                    const Grid           &grid)
1293 {
1294     const AwhBiasStateHistory &stateHistory = biasHistory.state;
1295
1296     coordState_.restoreFromHistory(stateHistory);
1297
1298     if (biasHistory.pointState.size() != points_.size())
1299     {
1300         GMX_THROW(InvalidInputError("Bias grid size in checkpoint and simulation do not match. Likely you provided a checkpoint from a different simulation."));
1301     }
1302     for (size_t m = 0; m < points_.size(); m++)
1303     {
1304         points_[m].setFromHistory(biasHistory.pointState[m]);
1305     }
1306
1307     for (size_t m = 0; m < weightSumCovering_.size(); m++)
1308     {
1309         weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1310     }
1311
1312     histogramSize_.restoreFromHistory(stateHistory);
1313
1314     linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1315     linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1316 }
1317
1318 void BiasState::broadcast(const t_commrec *commRecord)
1319 {
1320     gmx_bcast(sizeof(coordState_), &coordState_, commRecord);
1321
1322     gmx_bcast(points_.size()*sizeof(PointState), points_.data(), commRecord);
1323
1324     gmx_bcast(weightSumCovering_.size()*sizeof(double), weightSumCovering_.data(), commRecord);
1325
1326     gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord);
1327 }
1328
1329 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams>  &dimParams,
1330                                             const Grid                    &grid)
1331 {
1332     std::vector<float> convolvedPmf;
1333
1334     calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1335
1336     for (size_t m = 0; m < points_.size(); m++)
1337     {
1338         points_[m].setFreeEnergy(convolvedPmf[m]);
1339     }
1340 }
1341
1342 /*! \brief
1343  * Count trailing data rows containing only zeros.
1344  *
1345  * \param[in] data        2D data array.
1346  * \param[in] numRows     Number of rows in array.
1347  * \param[in] numColumns  Number of cols in array.
1348  * \returns the number of trailing zero rows.
1349  */
1350 static int countTrailingZeroRows(const double* const *data,
1351                                  int                  numRows,
1352                                  int                  numColumns)
1353 {
1354     int numZeroRows = 0;
1355     for (int m = numRows - 1; m >= 0; m--)
1356     {
1357         bool rowIsZero = true;
1358         for (int d = 0; d < numColumns; d++)
1359         {
1360             if (data[d][m] != 0)
1361             {
1362                 rowIsZero = false;
1363                 break;
1364             }
1365         }
1366
1367         if (!rowIsZero)
1368         {
1369             /* At a row with non-zero data */
1370             break;
1371         }
1372         else
1373         {
1374             /* Still at a zero data row, keep checking rows higher up. */
1375             numZeroRows++;
1376         }
1377     }
1378
1379     return numZeroRows;
1380 }
1381
1382 /*! \brief
1383  * Initializes the PMF and target with data read from an input table.
1384  *
1385  * \param[in]     dimParams   The dimension parameters.
1386  * \param[in]     grid        The grid.
1387  * \param[in]     filename    The filename to read PMF and target from.
1388  * \param[in]     numBias     Number of biases.
1389  * \param[in]     biasIndex   The index of the bias.
1390  * \param[in,out] pointState  The state of the points in this bias.
1391  */
1392 static void readUserPmfAndTargetDistribution(const std::vector<DimParams> &dimParams,
1393                                              const Grid                   &grid,
1394                                              const std::string            &filename,
1395                                              int                           numBias,
1396                                              int                           biasIndex,
1397                                              std::vector<PointState>      *pointState)
1398 {
1399     /* Read the PMF and target distribution.
1400        From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1401        base on the force constant. The free energy and target together determine the bias.
1402      */
1403     std::string filenameModified(filename);
1404     if (numBias > 1)
1405     {
1406         size_t n = filenameModified.rfind(".");
1407         GMX_RELEASE_ASSERT(n != std::string::npos, "The filename should contain an extension starting with .");
1408         filenameModified.insert(n, formatString("%d", biasIndex));
1409     }
1410
1411     std::string correctFormatMessage =
1412         formatString("%s is expected in the following format. "
1413                      "The first ndim column(s) should contain the coordinate values for each point, "
1414                      "each column containing values of one dimension (in ascending order). "
1415                      "For a multidimensional coordinate, points should be listed "
1416                      "in the order obtained by traversing lower dimensions first. "
1417                      "E.g. for two-dimensional grid of size nxn: "
1418                      "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1419                      "Column ndim +  1 should contain the PMF value for each coordinate value. "
1420                      "The target distribution values should be in column ndim + 2  or column ndim + 5. "
1421                      "Make sure the input file ends with a new line but has no trailing new lines.",
1422                      filename.c_str());
1423     gmx::TextLineWrapper wrapper;
1424     wrapper.settings().setLineLength(c_linewidth);
1425     correctFormatMessage = wrapper.wrapToString(correctFormatMessage).c_str();
1426
1427     double  **data;
1428     int       numColumns;
1429     int       numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1430
1431     /* Check basic data properties here. Grid takes care of more complicated things. */
1432
1433     if (numRows <= 0)
1434     {
1435         std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(), correctFormatMessage.c_str());
1436         GMX_THROW(InvalidInputError(mesg));
1437     }
1438
1439     /* Less than 2 points is not useful for PMF or target. */
1440     if (numRows <  2)
1441     {
1442         std::string mesg =
1443             gmx::formatString("%s contains too few data points (%d)."
1444                               "The minimum number of points is 2.",
1445                               filename.c_str(), numRows);
1446         GMX_THROW(InvalidInputError(mesg));
1447     }
1448
1449     /* Make sure there are enough columns of data.
1450
1451        Two formats are allowed. Either with columns  {coords, PMF, target} or
1452        {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1453        is how AWH output is written (x, y, z being other AWH variables). For this format,
1454        trailing columns are ignored.
1455      */
1456     int columnIndexTarget;
1457     int numColumnsMin  = dimParams.size() + 2;
1458     int columnIndexPmf = dimParams.size();
1459     if (numColumns == numColumnsMin)
1460     {
1461         columnIndexTarget = columnIndexPmf + 1;
1462     }
1463     else
1464     {
1465         columnIndexTarget = columnIndexPmf + 4;
1466     }
1467
1468     if (numColumns < numColumnsMin)
1469     {
1470         std::string mesg =
1471             gmx::formatString("The number of columns in %s (%d) should be at least %d."
1472                               "\n\n%s",
1473                               filename.c_str(), correctFormatMessage.c_str());
1474         GMX_THROW(InvalidInputError(mesg));
1475     }
1476
1477     /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1478        since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1479     int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1480     if (numZeroRows > 1)
1481     {
1482         std::string mesg = gmx::formatString("Found %d trailing zero data rows in %s. Please remove trailing empty lines and try again.",
1483                                              numZeroRows, filename.c_str());
1484         GMX_THROW(InvalidInputError(mesg));
1485     }
1486
1487     /* Convert from user units to internal units before sending the data of to grid. */
1488     for (size_t d = 0; d < dimParams.size(); d++)
1489     {
1490         double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1491         if (scalingFactor == 1)
1492         {
1493             continue;
1494         }
1495         for (size_t m = 0; m < pointState->size(); m++)
1496         {
1497             data[d][m] *= scalingFactor;
1498         }
1499     }
1500
1501     /* Get a data point for each AWH grid point so that they all get data. */
1502     std::vector<int> gridIndexToDataIndex(grid.numPoints());
1503     mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1504
1505     /* Extract the data for each grid point.
1506      * We check if the target distribution is zero for all points.
1507      */
1508     bool targetDistributionIsZero = true;
1509     for (size_t m = 0; m < pointState->size(); m++)
1510     {
1511         (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1512         double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1513
1514         /* Check if the values are allowed. */
1515         if (target < 0)
1516         {
1517             std::string mesg = gmx::formatString("Target distribution weight at point %d (%g) in %s is negative.",
1518                                                  m, target, filename.c_str());
1519             GMX_THROW(InvalidInputError(mesg));
1520         }
1521         if (target > 0)
1522         {
1523             targetDistributionIsZero = false;
1524         }
1525         (*pointState)[m].setTargetConstantWeight(target);
1526     }
1527
1528     if (targetDistributionIsZero)
1529     {
1530         std::string mesg = gmx::formatString("The target weights given in column %d in %s are all 0",
1531                                              filename.c_str(), columnIndexTarget);
1532         GMX_THROW(InvalidInputError(mesg));
1533     }
1534
1535     /* Free the arrays. */
1536     for (int m = 0; m < numColumns; m++)
1537     {
1538         sfree(data[m]);
1539     }
1540     sfree(data);
1541 }
1542
1543 void BiasState::normalizePmf(int numSharingSims)
1544 {
1545     /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1546        Approximately (for large enough force constant) we should have:
1547        sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1548      */
1549
1550     /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1551     double expSumPmf = 0;
1552     double expSumF   = 0;
1553     for (const PointState &pointState : points_)
1554     {
1555         if (pointState.inTargetRegion())
1556         {
1557             expSumPmf += std::exp( pointState.logPmfSum());
1558             expSumF   += std::exp(-pointState.freeEnergy());
1559         }
1560     }
1561     double numSamples = histogramSize_.histogramSize()/numSharingSims;
1562
1563     /* Renormalize */
1564     double logRenorm = std::log(numSamples*expSumF/expSumPmf);
1565     for (PointState &pointState : points_)
1566     {
1567         if (pointState.inTargetRegion())
1568         {
1569             pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1570         }
1571     }
1572 }
1573
1574 void BiasState::initGridPointState(const AwhBiasParams           &awhBiasParams,
1575                                    const std::vector<DimParams>  &dimParams,
1576                                    const Grid                    &grid,
1577                                    const BiasParams              &params,
1578                                    const std::string             &filename,
1579                                    int                            numBias)
1580 {
1581     /* Modify PMF, free energy and the constant target distribution factor
1582      * to user input values if there is data given.
1583      */
1584     if (awhBiasParams.bUserData)
1585     {
1586         readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1587         setFreeEnergyToConvolvedPmf(dimParams, grid);
1588     }
1589
1590     /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1591     GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN ||
1592                        points_[0].weightSumRef() != 0,
1593                        "AWH reference weight histogram not initialized properly with local Boltzmann target distribution.");
1594
1595     updateTargetDistribution(points_, params);
1596
1597     for (PointState &pointState : points_)
1598     {
1599         if (pointState.inTargetRegion())
1600         {
1601             pointState.updateBias();
1602         }
1603         else
1604         {
1605             /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1606             pointState.setTargetToZero();
1607         }
1608     }
1609
1610     /* Set the initial reference weighthistogram. */
1611     const double histogramSize = histogramSize_.histogramSize();
1612     for (auto &pointState : points_)
1613     {
1614         pointState.setInitialReferenceWeightHistogram(histogramSize);
1615     }
1616
1617     /* Make sure the pmf is normalized consistently with the histogram size.
1618        Note: the target distribution and free energy need to be set here. */
1619     normalizePmf(params.numSharedUpdate);
1620 }
1621
1622 BiasState::BiasState(const AwhBiasParams          &awhBiasParams,
1623                      double                        histogramSizeInitial,
1624                      const std::vector<DimParams> &dimParams,
1625                      const Grid                   &grid) :
1626     coordState_(awhBiasParams, dimParams, grid),
1627     points_(grid.numPoints()),
1628     weightSumCovering_(grid.numPoints()),
1629     histogramSize_(awhBiasParams, histogramSizeInitial)
1630 {
1631     /* The minimum and maximum multidimensional point indices that are affected by the next update */
1632     for (size_t d = 0; d < dimParams.size(); d++)
1633     {
1634         int index            = grid.point(coordState_.gridpointIndex()).index[d];
1635         originUpdatelist_[d] = index;
1636         endUpdatelist_[d]    = index;
1637     }
1638 }
1639
1640 } // namespace gmx