Add reading and writing to AWH module
[gromacs.git] / src / gromacs / awh / biasstate.cpp
blobfea6c83490f9c4db8fc43c950179a96a50d43939
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief
38 * Implements the BiasState class.
40 * \author Viveca Lindahl
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_awh
45 #include "gmxpre.h"
47 #include "biasstate.h"
49 #include <assert.h>
51 #include <cmath>
52 #include <cstdio>
53 #include <cstdlib>
54 #include <cstring>
56 #include <algorithm>
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/math/utilities.h"
62 #include "gromacs/mdtypes/awh-history.h"
63 #include "gromacs/mdtypes/awh-params.h"
64 #include "gromacs/mdtypes/commrec.h"
65 #include "gromacs/simd/simd.h"
66 #include "gromacs/simd/simd_math.h"
67 #include "gromacs/utility/arrayref.h"
68 #include "gromacs/utility/exceptions.h"
69 #include "gromacs/utility/gmxassert.h"
70 #include "gromacs/utility/smalloc.h"
71 #include "gromacs/utility/stringutil.h"
73 #include "grid.h"
74 #include "pointstate.h"
76 namespace gmx
79 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
81 GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
83 /* The PMF is just the negative of the log of the sampled PMF histogram.
84 * Points with zero target weight are ignored, they will mostly contain noise.
86 for (size_t i = 0; i < points_.size(); i++)
88 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
92 namespace
95 /*! \brief
96 * Sum PMF over multiple simulations, when requested.
98 * \param[in,out] pointState The state of the points in the bias.
99 * \param[in] numSharedUpdate The number of biases sharing the histogram.
100 * \param[in] multiSimComm Struct for multi-simulation communication.
102 void sumPmf(gmx::ArrayRef<PointState> pointState,
103 int numSharedUpdate,
104 const gmx_multisim_t *multiSimComm)
106 if (numSharedUpdate == 1)
108 return;
110 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->nsim == 0, "numSharedUpdate should be a multiple of multiSimComm->nsim");
111 GMX_ASSERT(numSharedUpdate == multiSimComm->nsim, "Sharing within a simulation is not implemented (yet)");
113 std::vector<double> buffer(pointState.size());
115 /* Need to temporarily exponentiate the log weights to sum over simulations */
116 for (size_t i = 0; i < buffer.size(); i++)
118 buffer[i] = pointState[i].inTargetRegion() ? std::exp(static_cast<float>(pointState[i].logPmfSum())) : 0;
121 gmx_sumd_sim(buffer.size(), buffer.data(), multiSimComm);
123 /* Take log again to get (non-normalized) PMF */
124 double normFac = 1.0/numSharedUpdate;
125 for (size_t i = 0; i < pointState.size(); i++)
127 if (pointState[i].inTargetRegion())
129 pointState[i].setLogPmfSum(-std::log(buffer[i]*normFac));
134 /*! \brief
135 * Find the minimum free energy value.
137 * \param[in] pointState The state of the points.
138 * \returns the minimum free energy value.
140 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
142 double fMin = GMX_FLOAT_MAX;
144 for (auto const &ps : pointState)
146 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
148 fMin = ps.freeEnergy();
152 return fMin;
155 /*! \brief
156 * Find and return the log of the probability weight of a point given a coordinate value.
158 * The unnormalized weight is given by
159 * w(point|value) = exp(bias(point) - U(value,point)),
160 * where U is a harmonic umbrella potential.
162 * \param[in] dimParams The bias dimensions parameters
163 * \param[in] points The point state.
164 * \param[in] grid The grid.
165 * \param[in] pointIndex Point to evaluate probability weight for.
166 * \param[in] pointBias Bias for the point (as a log weight).
167 * \param[in] value Coordinate value.
168 * \returns the log of the biased probability weight.
170 double biasedLogWeightFromPoint(const std::vector<DimParams> &dimParams,
171 const std::vector<PointState> &points,
172 const Grid &grid,
173 int pointIndex,
174 double pointBias,
175 const awh_dvec value)
177 double logWeight = c_largeNegativeExponent;
179 /* Only points in the target reigon have non-zero weight */
180 if (points[pointIndex].inTargetRegion())
182 logWeight = pointBias;
184 /* Add potential for all parameter dimensions */
185 for (size_t d = 0; d < dimParams.size(); d++)
187 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
188 logWeight -= 0.5*dimParams[d].betak*dev*dev;
192 return logWeight;
195 } // namespace
197 void BiasState::calcConvolvedPmf(const std::vector<DimParams> &dimParams,
198 const Grid &grid,
199 std::vector<float> *convolvedPmf) const
201 size_t numPoints = grid.numPoints();
203 convolvedPmf->resize(numPoints);
205 /* Get the PMF to convolve. */
206 std::vector<float> pmf(numPoints);
207 getPmf(pmf);
209 for (size_t m = 0; m < numPoints; m++)
211 double freeEnergyWeights = 0;
212 const GridPoint &point = grid.point(m);
213 for (auto &neighbor : point.neighbor)
215 /* The negative PMF is a positive bias. */
216 double biasNeighbor = -pmf[neighbor];
218 /* Add the convolved PMF weights for the neighbors of this point.
219 Note that this function only adds point within the target > 0 region.
220 Sum weights, take the logarithm last to get the free energy. */
221 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid,
222 neighbor, biasNeighbor,
223 point.coordValue);
224 freeEnergyWeights += std::exp(logWeight);
227 GMX_RELEASE_ASSERT(freeEnergyWeights > 0, "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
228 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
232 namespace
235 /*! \brief
236 * Updates the target distribution for all points.
238 * The target distribution is always updated for all points
239 * at the same time.
241 * \param[in,out] pointState The state of all points.
242 * \param[in] params The bias parameters.
244 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState,
245 const BiasParams &params)
247 double freeEnergyCutoff = 0;
248 if (params.eTarget == eawhtargetCUTOFF)
250 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
253 double sumTarget = 0;
254 for (PointState &ps : pointState)
256 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
258 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
260 /* Normalize to 1 */
261 double invSum = 1.0/sumTarget;
262 for (PointState &ps : pointState)
264 ps.scaleTarget(invSum);
268 /*! \brief
269 * Puts together a string describing a grid point.
271 * \param[in] grid The grid.
272 * \param[in] point Grid point index.
273 * \returns a string for the point.
275 std::string gridPointValueString(const Grid &grid, int point)
277 std::string pointString;
279 pointString += "(";
281 for (int d = 0; d < grid.numDimensions(); d++)
283 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
284 if (d < grid.numDimensions() - 1)
286 pointString += ",";
288 else
290 pointString += ")";
294 return pointString;
297 } // namespace
299 int BiasState::warnForHistogramAnomalies(const Grid &grid,
300 int biasIndex,
301 double t,
302 FILE *fplog,
303 int maxNumWarnings) const
305 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
307 /* Sum up the histograms and get their normalization */
308 double sumVisits = 0;
309 double sumWeights = 0;
310 for (auto &pointState : points_)
312 if (pointState.inTargetRegion())
314 sumVisits += pointState.numVisitsTot();
315 sumWeights += pointState.weightSumTot();
318 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
319 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
320 double invNormVisits = 1.0/sumVisits;
321 double invNormWeight = 1.0/sumWeights;
323 /* Check all points for warnings */
324 int numWarnings = 0;
325 size_t numPoints = grid.numPoints();
326 for (size_t m = 0; m < numPoints; m++)
328 /* Skip points close to boundary or non-target region */
329 const GridPoint &gridPoint = grid.point(m);
330 bool skipPoint = false;
331 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
333 int neighbor = gridPoint.neighbor[n];
334 skipPoint = !points_[neighbor].inTargetRegion();
335 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
337 const GridPoint &neighborPoint = grid.point(neighbor);
338 skipPoint =
339 neighborPoint.index[d] == 0 ||
340 neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
344 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
345 const double relativeWeight = points_[m].weightSumTot()*invNormWeight;
346 const double relativeVisits = points_[m].numVisitsTot()*invNormVisits;
347 if (!skipPoint &&
348 relativeVisits < relativeWeight*maxHistogramRatio)
350 std::string pointValueString = gridPointValueString(grid, m);
351 std::string warningMessage =
352 gmx::formatString("\nawh%d warning: "
353 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
354 "is less than a fraction %g of the reference distribution at that point. "
355 "If you are not certain about your settings you might want to increase your pull force constant or "
356 "modify your sampling region.\n",
357 biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
358 gmx::TextLineWrapper wrapper;
359 wrapper.settings().setLineLength(c_linewidth);
360 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
362 numWarnings++;
364 if (numWarnings >= maxNumWarnings)
366 break;
370 return numWarnings;
373 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams> &dimParams,
374 const Grid &grid,
375 int point,
376 awh_dvec force) const
378 double potential = 0;
379 for (size_t d = 0; d < dimParams.size(); d++)
381 double deviation = getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
383 double k = dimParams[d].k;
385 /* Force from harmonic potential 0.5*k*dev^2 */
386 force[d] = -k*deviation;
387 potential += 0.5*k*deviation*deviation;
390 return potential;
393 void BiasState::calcConvolvedForce(const std::vector<DimParams> &dimParams,
394 const Grid &grid,
395 gmx::ArrayRef<const double> probWeightNeighbor,
396 awh_dvec force) const
398 for (size_t d = 0; d < dimParams.size(); d++)
400 force[d] = 0;
403 /* Only neighboring points have non-negligible contribution. */
404 const std::vector<int> &neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
405 for (size_t n = 0; n < neighbor.size(); n++)
407 double weightNeighbor = probWeightNeighbor[n];
408 int indexNeighbor = neighbor[n];
410 /* Get the umbrella force from this point. The returned potential is ignored here. */
411 awh_dvec forceFromNeighbor;
412 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor,
413 forceFromNeighbor);
415 /* Add the weighted umbrella force to the convolved force. */
416 for (size_t d = 0; d < dimParams.size(); d++)
418 force[d] += forceFromNeighbor[d]*weightNeighbor;
423 double BiasState::moveUmbrella(const std::vector<DimParams> &dimParams,
424 const Grid &grid,
425 gmx::ArrayRef<const double> probWeightNeighbor,
426 awh_dvec biasForce,
427 gmx_int64_t step,
428 gmx_int64_t seed,
429 int indexSeed)
431 /* Generate and set a new coordinate reference value */
432 coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor, step, seed, indexSeed);
434 awh_dvec newForce;
435 double newPotential =
436 calcUmbrellaForceAndPotential(dimParams, grid, coordState_.umbrellaGridpoint(), newForce);
438 /* A modification of the reference value at time t will lead to a different
439 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
440 this means the force and velocity will change signs roughly as often.
441 To avoid any issues we take the average of the previous and new force
442 at steps when the reference value has been moved. E.g. if the ref. value
443 is set every step to (coord dvalue +/- delta) would give zero force.
445 for (size_t d = 0; d < dimParams.size(); d++)
447 /* clang thinks newForce[d] can be garbage */
448 #ifndef __clang_analyzer__
449 /* Average of the current and new force */
450 biasForce[d] = 0.5*(biasForce[d] + newForce[d]);
451 #endif
454 return newPotential;
457 namespace
460 /*! \brief
461 * Sets the histogram rescaling factors needed to control the histogram size.
463 * For sake of robustness, the reference weight histogram can grow at a rate
464 * different from the actual sampling rate. Typically this happens for a limited
465 * initial time, alternatively growth is scaled down by a constant factor for all
466 * times. Since the size of the reference histogram sets the size of the free
467 * energy update this should be reflected also in the PMF. Thus the PMF histogram
468 * needs to be rescaled too.
470 * This function should only be called by the bias update function or wrapped by a function that
471 * knows what scale factors should be applied when, e.g,
472 * getSkippedUpdateHistogramScaleFactors().
474 * \param[in] params The bias parameters.
475 * \param[in] newHistogramSize New reference weight histogram size.
476 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
477 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
478 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
480 void setHistogramUpdateScaleFactors(const BiasParams &params,
481 double newHistogramSize,
482 double oldHistogramSize,
483 double *weightHistScaling,
484 double *logPmfSumScaling)
487 /* The two scaling factors below are slightly different (ignoring the log factor) because the
488 reference and the PMF histogram apply weight scaling differently. The weight histogram
489 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
490 It is done this way because that is what target type local Boltzmann (for which
491 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
492 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
493 1) empirically this is necessary for converging the PMF; 2) since the extraction of
494 the PMF is theoretically only valid for a constant bias, new samples should get more
495 weight than old ones for which the bias is fluctuating more. */
496 *weightHistScaling = newHistogramSize/(oldHistogramSize + params.updateWeight*params.localWeightScaling);
497 *logPmfSumScaling = std::log(newHistogramSize/(oldHistogramSize + params.updateWeight));
500 } // namespace
502 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams &params,
503 double *weightHistScaling,
504 double *logPmfSumScaling) const
506 GMX_ASSERT(params.skipUpdates(), "Calling function for skipped updates when skipping updates is not allowed");
508 if (inInitialStage())
510 /* In between global updates the reference histogram size is kept constant so we trivially know what the
511 histogram size was at the time of the skipped update. */
512 double histogramSize = histogramSize_.histogramSize();
513 setHistogramUpdateScaleFactors(params, histogramSize, histogramSize,
514 weightHistScaling, logPmfSumScaling);
516 else
518 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
519 *weightHistScaling = 1;
520 *logPmfSumScaling = 0;
524 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams &params)
526 double weightHistScaling;
527 double logPmfsumScaling;
529 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
531 for (auto &pointState : points_)
533 bool didUpdate = pointState.performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
535 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
536 if (didUpdate)
538 pointState.updateBias();
543 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams &params,
544 const Grid &grid)
546 double weightHistScaling;
547 double logPmfsumScaling;
549 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
551 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
552 const std::vector<int> &neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
553 for (auto &neighbor : neighbors)
555 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
557 if (didUpdate)
559 points_[neighbor].updateBias();
564 namespace
567 /*! \brief
568 * Merge update lists from multiple sharing simulations.
570 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
571 * \param[in] numPoints Total number of points.
572 * \param[in] multiSimComm Struct for multi-simulation communication.
574 void mergeSharedUpdateLists(std::vector<int> *updateList,
575 int numPoints,
576 const gmx_multisim_t *multiSimComm)
578 std::vector<int> numUpdatesOfPoint;
580 /* Flag the update points of this sim.
581 TODO: we can probably avoid allocating this array and just use the input array. */
582 numUpdatesOfPoint.resize(numPoints, 0);
583 for (auto &pointIndex : *updateList)
585 numUpdatesOfPoint[pointIndex] = 1;
588 /* Sum over the sims to get all the flagged points */
589 gmx_sumi_sim(numPoints, numUpdatesOfPoint.data(), multiSimComm);
591 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
592 updateList->clear();
593 for (int m = 0; m < numPoints; m++)
595 if (numUpdatesOfPoint[m] > 0)
597 updateList->push_back(m);
602 /*! \brief
603 * Generate an update list of points sampled since the last update.
605 * \param[in] grid The AWH bias.
606 * \param[in] points The point state.
607 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since last update.
608 * \param[in] endUpdatelist The end of the rectangular that has been sampled since last update.
609 * \param[in,out] updateList Local update list to set (assumed >= npoints long).
611 void makeLocalUpdateList(const Grid &grid,
612 const std::vector<PointState> &points,
613 const awh_ivec originUpdatelist,
614 const awh_ivec endUpdatelist,
615 std::vector<int> *updateList)
617 awh_ivec origin;
618 awh_ivec numPoints;
620 /* Define the update search grid */
621 for (int d = 0; d < grid.numDimensions(); d++)
623 origin[d] = originUpdatelist[d];
624 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
626 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
627 This helps for calculating the distance/number of points but should be removed and fixed when the way of
628 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
629 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
632 /* Make the update list */
633 updateList->clear();
634 int pointIndex = -1;
635 bool pointExists = true;
636 while (pointExists)
638 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
640 if (pointExists && points[pointIndex].inTargetRegion())
642 updateList->push_back(pointIndex);
647 } // namespace
649 void BiasState::resetLocalUpdateRange(const Grid &grid)
651 const int gridpointIndex = coordState_.gridpointIndex();
652 for (int d = 0; d < grid.numDimensions(); d++)
654 /* This gives the minimum range consisting only of the current closest point. */
655 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
656 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
660 namespace
663 /*! \brief
664 * Add partial histograms (accumulating between updates) to accumulating histograms.
666 * \param[in,out] pointState The state of the points in the bias.
667 * \param[in,out] weightSumCovering The weights for checking covering.
668 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
669 * \param[in] multiSimComm Struct for multi-simulation communication.
670 * \param[in] localUpdateList List of points with data.
672 void sumHistograms(gmx::ArrayRef<PointState> pointState,
673 gmx::ArrayRef<double> weightSumCovering,
674 int numSharedUpdate,
675 const gmx_multisim_t *multiSimComm,
676 const std::vector<int> &localUpdateList)
678 /* The covering checking histograms are added before summing over simulations, so that the weights from different
679 simulations are kept distinguishable. */
680 for (int globalIndex : localUpdateList)
682 weightSumCovering[globalIndex] +=
683 pointState[globalIndex].weightSumIteration();
686 /* Sum histograms over multiple simulations if needed. */
687 if (numSharedUpdate > 1)
689 GMX_ASSERT(numSharedUpdate == multiSimComm->nsim, "Sharing within a simulation is not implemented (yet)");
691 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
692 std::vector<double> weightSum;
693 std::vector<double> coordVisits;
695 weightSum.resize(localUpdateList.size());
696 coordVisits.resize(localUpdateList.size());
698 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
700 const PointState &ps = pointState[localUpdateList[localIndex]];
702 weightSum[localIndex] = ps.weightSumIteration();
703 coordVisits[localIndex] = ps.numVisitsIteration();
706 gmx_sumd_sim(weightSum.size(), weightSum.data(), multiSimComm);
707 gmx_sumd_sim(coordVisits.size(), coordVisits.data(), multiSimComm);
709 /* Transfer back the result */
710 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
712 PointState &ps = pointState[localUpdateList[localIndex]];
714 ps.setPartialWeightAndCount(weightSum[localIndex],
715 coordVisits[localIndex]);
719 /* Now add the partial counts and weights to the accumulating histograms.
720 Note: we still need to use the weights for the update so we wait
721 with resetting them until the end of the update. */
722 for (int globalIndex : localUpdateList)
724 pointState[globalIndex].addPartialWeightAndCount();
728 /*! \brief
729 * Label points along an axis as covered or not.
731 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
733 * \param[in] visited Visited? For each point.
734 * \param[in] checkCovering Check for covering? For each point.
735 * \param[in] numPoints The number of grid points along this dimension.
736 * \param[in] period Period in number of points.
737 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
738 * \param[in,out] covered In this array elements are 1 for covered points and 0 for non-covered points, this routine assumes that \p covered has at least size \p numPoints.
740 void labelCoveredPoints(const std::vector<bool> &visited,
741 const std::vector<bool> &checkCovering,
742 int numPoints,
743 int period,
744 int coverRadius,
745 gmx::ArrayRef<int> covered)
747 GMX_ASSERT(covered.size() >= static_cast<size_t>(numPoints), "covered should be at least as large as the grid");
749 bool haveFirstNotVisited = false;
750 int firstNotVisited = -1;
751 int notVisitedLow = -1;
752 int notVisitedHigh = -1;
754 for (int n = 0; n < numPoints; n++)
756 if (checkCovering[n] && !visited[n])
758 if (!haveFirstNotVisited)
760 notVisitedLow = n;
761 firstNotVisited = n;
762 haveFirstNotVisited = true;
764 else
766 notVisitedHigh = n;
768 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
769 by unvisited points. The unvisted end points affect the coveredness of the visited
770 with a reach equal to the cover radius. */
771 int notCoveredLow = notVisitedLow + coverRadius;
772 int notCoveredHigh = notVisitedHigh - coverRadius;
773 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
775 covered[i] = (i > notCoveredLow) && (i < notCoveredHigh);
778 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval the
779 notVisitedLow of the next. */
780 notVisitedLow = notVisitedHigh;
785 /* Have labelled all the internal points. Now take care of the boundary regions. */
786 if (!haveFirstNotVisited)
788 /* No non-visited points <=> all points visited => all points covered. */
790 for (int n = 0; n < numPoints; n++)
792 covered[n] = 1;
795 else
797 int lastNotVisited = notVisitedLow;
799 /* For periodic boundaries, non-visited points can influence points
800 on the other side of the boundary so we need to wrap around. */
802 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
803 For non-periodic boundaries there is no lower end point so a dummy value is used. */
804 int notVisitedHigh = firstNotVisited;
805 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
807 int notCoveredLow = notVisitedLow + coverRadius;
808 int notCoveredHigh = notVisitedHigh - coverRadius;
810 for (int i = 0; i <= notVisitedHigh; i++)
812 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
813 covered[i] = (i > notCoveredLow) && (i < notCoveredHigh);
816 /* Upper end. Same as for lower end but in the other direction. */
817 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
818 notVisitedLow = lastNotVisited;
820 notCoveredLow = notVisitedLow + coverRadius;
821 notCoveredHigh = notVisitedHigh - coverRadius;
823 for (int i = notVisitedLow; i <= numPoints - 1; i++)
825 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
826 covered[i] = (i > notCoveredLow) && (i < notCoveredHigh);
831 } // namespace
833 bool BiasState::isSamplingRegionCovered(const BiasParams &params,
834 const std::vector<DimParams> &dimParams,
835 const Grid &grid,
836 const gmx_multisim_t *multiSimComm) const
838 /* Allocate and initialize arrays: one for checking visits along each dimension,
839 one for keeping track of which points to check and one for the covered points.
840 Possibly these could be kept as AWH variables to avoid these allocations. */
841 struct CheckDim
843 std::vector<bool> visited;
844 std::vector<bool> checkCovering;
845 // We use int for the covering array since we might use gmx_sumi_sim.
846 std::vector<int> covered;
849 std::vector<CheckDim> checkDim;
850 checkDim.resize(grid.numDimensions());
852 for (int d = 0; d < grid.numDimensions(); d++)
854 const size_t numPoints = grid.axis(d).numPoints();
855 checkDim[d].visited.resize(numPoints, false);
856 checkDim[d].checkCovering.resize(numPoints, false);
857 checkDim[d].covered.resize(numPoints, 0);
860 /* Set visited points along each dimension and which points should be checked for covering.
861 Specifically, points above the free energy cutoff (if there is one) or points outside
862 of the target region are ignored. */
864 /* Set the free energy cutoff */
865 double maxFreeEnergy = GMX_FLOAT_MAX;
867 if (params.eTarget == eawhtargetCUTOFF)
869 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
872 /* Set the threshold weight for a point to be considered visited. */
873 double weightThreshold = 1;
874 for (int d = 0; d < grid.numDimensions(); d++)
876 weightThreshold *= grid.axis(d).spacing()*std::sqrt(dimParams[d].betak*0.5*M_1_PI);
879 /* Project the sampling weights onto each dimension */
880 for (size_t m = 0; m < grid.numPoints(); m++)
882 const PointState &pointState = points_[m];
884 for (int d = 0; d < grid.numDimensions(); d++)
886 int n = grid.point(m).index[d];
888 /* Is visited if it was already visited or if there is enough weight at the current point */
889 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
891 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
892 checkDim[d].checkCovering[n] = checkDim[d].checkCovering[n] || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
896 /* Label each point along each dimension as covered or not. */
897 for (int d = 0; d < grid.numDimensions(); d++)
899 labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(), grid.axis(d).numPointsInPeriod(),
900 params.coverRadius()[d], checkDim[d].covered);
903 /* Now check for global covering. Each dimension needs to be covered separately.
904 A dimension is covered if each point is covered. Multiple simulations collectively
905 cover the points, i.e. a point is covered if any of the simulations covered it.
906 However, visited points are not shared, i.e. if a point is covered or not is
907 determined by the visits of a single simulation. In general the covering criterion is
908 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
909 For 1 simulation, all points covered <=> all points visited. For multiple simulations
910 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
911 In the limit of large coverRadius, all points covered => all points visited by at least one
912 simulation (since no point will be covered until all points have been visited by a
913 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
914 needs with surrounding points before sharing covering information with other simulations. */
916 /* Communicate the covered points between sharing simulations if needed. */
917 if (params.numSharedUpdate > 1)
919 /* For multiple dimensions this may not be the best way to do it. */
920 for (int d = 0; d < grid.numDimensions(); d++)
922 gmx_sumi_sim(grid.axis(d).numPoints(), checkDim[d].covered.data(), multiSimComm);
926 /* Now check if for each dimension all points are covered. Break if not true. */
927 bool allPointsCovered = true;
928 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
930 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
932 allPointsCovered = checkDim[d].covered[n];
936 return allPointsCovered;
939 /*! \brief
940 * Normalizes the free energy and PMF sum.
942 * \param[in] pointState The state of the points.
944 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState> *pointState)
946 double minF = freeEnergyMinimumValue(*pointState);
948 for (PointState &ps : *pointState)
950 ps.normalizeFreeEnergyAndPmfSum(minF);
954 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams> &dimParams,
955 const Grid &grid,
956 const BiasParams &params,
957 const gmx_multisim_t *multiSimComm,
958 double t,
959 gmx_int64_t step,
960 FILE *fplog,
961 std::vector<int> *updateList)
963 /* Note hat updateList is only used in this scope and is always
964 * re-initialized. We do not use a local vector, because that would
965 * cause reallocation every time this funtion is called and the vector
966 * can be the size of the whole grid.
969 /* Make a list of all local points, i.e. those that could have been touched since
970 the last update. These are the points needed for summing histograms below
971 (non-local points only add zeros). For local updates, this will also be the
972 final update list. */
973 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_,
974 updateList);
975 if (params.numSharedUpdate > 1)
977 mergeSharedUpdateLists(updateList, points_.size(), multiSimComm);
980 /* Reset the range for the next update */
981 resetLocalUpdateRange(grid);
983 /* Add samples to histograms for all local points and sync simulations if needed */
984 sumHistograms(points_, weightSumCovering_,
985 params.numSharedUpdate, multiSimComm, *updateList);
987 sumPmf(points_, params.numSharedUpdate, multiSimComm);
989 /* Renormalize the free energy if values are too large. */
990 bool needToNormalizeFreeEnergy = false;
991 for (int &globalIndex : *updateList)
993 /* We want to keep the absolute value of the free energies to be less c_largePositiveExponent
994 to be able to safely pass these values to exp(). The check below ensures this as long as
995 the free energy values grow less than 0.5*c_largePositiveExponent in a return time to this
996 neighborhood. For reasonable update sizes it's unlikely that this requirement would be
997 broken. */
998 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5*c_largePositiveExponent)
1000 needToNormalizeFreeEnergy = true;
1001 break;
1005 /* Update target distribution? */
1006 bool needToUpdateTargetDistribution =
1007 (params.eTarget != eawhtargetCONSTANT &&
1008 params.isUpdateTargetStep(step));
1010 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1011 bool detectedCovering = false;
1012 if (inInitialStage())
1014 detectedCovering = (params.isCheckStep(points_.size(), step) &&
1015 isSamplingRegionCovered(params, dimParams, grid, multiSimComm));
1018 /* The weighthistogram size after this update. */
1019 double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_, weightSumCovering_, fplog);
1021 /* Make the update list. Usually we try to only update local points,
1022 * but if the update has non-trivial or non-deterministic effects
1023 * on non-local points a global update is needed. This is the case when:
1024 * 1) a covering occurred in the initial stage, leading to non-trivial
1025 * histogram rescaling factors; or
1026 * 2) the target distribution will be updated, since we don't make any
1027 * assumption on its form; or
1028 * 3) the AWH parameters are such that we never attempt to skip non-local
1029 * updates; or
1030 * 4) the free energy values have grown so large that a renormalization
1031 * is needed.
1033 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1035 /* Global update, just add all points. */
1036 updateList->clear();
1037 for (size_t m = 0; m < points_.size(); m++)
1039 if (points_[m].inTargetRegion())
1041 updateList->push_back(m);
1046 /* Set histogram scale factors. */
1047 double weightHistScalingSkipped = 0;
1048 double logPmfsumScalingSkipped = 0;
1049 if (params.skipUpdates())
1051 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1053 double weightHistScalingNew;
1054 double logPmfsumScalingNew;
1055 setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1056 &weightHistScalingNew, &logPmfsumScalingNew);
1058 /* Update free energy and reference weight histogram for points in the update list. */
1059 for (int pointIndex : *updateList)
1061 PointState *pointStateToUpdate = &points_[pointIndex];
1063 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1064 if (params.skipUpdates())
1066 pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScalingSkipped, logPmfsumScalingSkipped);
1069 /* Now do an update with new sampling data. */
1070 pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(), weightHistScalingNew, logPmfsumScalingNew);
1073 /* Only update the histogram size after we are done with the local point updates */
1074 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1076 if (needToNormalizeFreeEnergy)
1078 normalizeFreeEnergyAndPmfSum(&points_);
1081 if (needToUpdateTargetDistribution)
1083 /* The target distribution is always updated for all points at once. */
1084 updateTargetDistribution(points_, params);
1087 /* Update the bias. The bias is updated separately and last since it simply a function of
1088 the free energy and the target distribution and we want to avoid doing extra work. */
1089 for (int pointIndex : *updateList)
1091 points_[pointIndex].updateBias();
1094 /* Increase the update counter. */
1095 histogramSize_.incrementNumUpdates();
1098 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams> &dimParams,
1099 const Grid &grid,
1100 std::vector < double, AlignedAllocator < double>> *weight) const
1102 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1103 const std::vector<int> &neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1105 #if GMX_SIMD_HAVE_DOUBLE
1106 typedef SimdDouble PackType;
1107 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1108 #else
1109 typedef double PackType;
1110 constexpr int packSize = 1;
1111 #endif
1112 /* Round the size of the weight array up to packSize */
1113 const int weightSize = ((neighbors.size() + packSize - 1)/packSize)*packSize;
1114 weight->resize(weightSize);
1116 double * gmx_restrict weightData = weight->data();
1117 PackType weightSumPack(0.0);
1118 for (size_t i = 0; i < neighbors.size(); i += packSize)
1120 for (size_t n = i; n < i + packSize; n++)
1122 if (n < neighbors.size())
1124 const int neighbor = neighbors[n];
1125 (*weight)[n] = biasedLogWeightFromPoint(dimParams, points_, grid,
1126 neighbor, points_[neighbor].bias(),
1127 coordState_.coordValue());
1129 else
1131 /* Pad with values that don't affect the result */
1132 (*weight)[n] = c_largeNegativeExponent;
1135 PackType weightPack = load<PackType>(weightData + i);
1136 weightPack = gmx::exp(weightPack);
1137 weightSumPack = weightSumPack + weightPack;
1138 store(weightData + i, weightPack);
1140 /* Sum of probability weights */
1141 double weightSum = reduce(weightSumPack);
1142 GMX_RELEASE_ASSERT(weightSum > 0, "zero probability weight when updating AWH probability weights.");
1144 /* Normalize probabilities to sum to 1 */
1145 double invWeightSum = 1/weightSum;
1146 for (double &w : *weight)
1148 w *= invWeightSum;
1151 /* Return the convolved bias */
1152 return std::log(weightSum);
1155 double BiasState::calcConvolvedBias(const std::vector<DimParams> &dimParams,
1156 const Grid &grid,
1157 const awh_dvec &coordValue) const
1159 int point = grid.nearestIndex(coordValue);
1160 const GridPoint &gridPoint = grid.point(point);
1162 /* Sum the probability weights from the neighborhood of the given point */
1163 double weightSum = 0;
1164 for (int neighbor : gridPoint.neighbor)
1166 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid,
1167 neighbor, points_[neighbor].bias(),
1168 coordValue);
1169 weightSum += std::exp(logWeight);
1172 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1173 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1176 void BiasState::sampleProbabilityWeights(const Grid &grid,
1177 gmx::ArrayRef<const double> probWeightNeighbor)
1179 const std::vector<int> &neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1181 /* Save weights for next update */
1182 for (size_t n = 0; n < neighbor.size(); n++)
1184 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1187 /* Update the local update range. Two corner points define this rectangular
1188 * domain. We need to choose two new corner points such that the new domain
1189 * contains both the old update range and the current neighborhood.
1190 * In the simplest case when an update is performed every sample,
1191 * the update range would simply equal the current neighborhood.
1193 int neighborStart = neighbor[0];
1194 int neighborLast = neighbor[neighbor.size() - 1];
1195 for (int d = 0; d < grid.numDimensions(); d++)
1197 int origin = grid.point(neighborStart).index[d];
1198 int last = grid.point(neighborLast).index[d];
1200 if (origin > last)
1202 /* Unwrap if wrapped around the boundary (only happens for periodic
1203 * boundaries). This has been already for the stored index interval.
1205 /* TODO: what we want to do is to find the smallest the update
1206 * interval that contains all points that need to be updated.
1207 * This amounts to combining two intervals, the current
1208 * [origin, end] update interval and the new touched neighborhood
1209 * into a new interval that contains all points from both the old
1210 * intervals.
1212 * For periodic boundaries it becomes slightly more complicated
1213 * than for closed boundaries because then it needs not be
1214 * true that origin < end (so one can't simply relate the origin/end
1215 * in the min()/max() below). The strategy here is to choose the
1216 * origin closest to a reference point (index 0) and then unwrap
1217 * the end index if needed and choose the largest end index.
1218 * This ensures that both intervals are in the new interval
1219 * but it's not necessarily the smallest.
1220 * Currently we solve this by going through each possibility
1221 * and checking them.
1223 last += grid.axis(d).numPointsInPeriod();
1226 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1227 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1231 void BiasState::sampleCoordAndPmf(const Grid &grid,
1232 gmx::ArrayRef<const double> probWeightNeighbor,
1233 double convolvedBias)
1235 /* Sampling-based deconvolution extracting the PMF.
1236 * Update the PMF histogram with the current coordinate value.
1238 * Because of the finite width of the harmonic potential, the free energy
1239 * defined for each coordinate point does not exactly equal that of the
1240 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1241 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1242 * reweighted histogram of the coordinate value. Strictly, this relies on
1243 * the unknown normalization constant Z being either constant or known. Here,
1244 * neither is true except in the long simulation time limit. Empirically however,
1245 * it works (mainly because how the PMF histogram is rescaled).
1248 /* Only save coordinate data that is in range (the given index is always
1249 * in range even if the coordinate value is not).
1251 if (grid.covers(coordState_.coordValue()))
1253 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1254 points_[coordState_.gridpointIndex()].samplePmf(convolvedBias);
1257 /* Save probability weights for the update */
1258 sampleProbabilityWeights(grid, probWeightNeighbor);
1261 void BiasState::initHistoryFromState(AwhBiasHistory *biasHistory) const
1263 biasHistory->pointState.resize(points_.size());
1266 void BiasState::updateHistory(AwhBiasHistory *biasHistory,
1267 const Grid &grid) const
1269 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(), "The AWH history setup does not match the AWH state.");
1271 AwhBiasStateHistory *stateHistory = &biasHistory->state;
1272 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1274 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1276 AwhPointStateHistory *psh = &biasHistory->pointState[m];
1278 points_[m].storeState(psh);
1280 psh->weightsum_covering = weightSumCovering_[m];
1283 histogramSize_.storeState(stateHistory);
1285 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid,
1286 originUpdatelist_);
1287 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid,
1288 endUpdatelist_);
1291 void BiasState::restoreFromHistory(const AwhBiasHistory &biasHistory,
1292 const Grid &grid)
1294 const AwhBiasStateHistory &stateHistory = biasHistory.state;
1296 coordState_.restoreFromHistory(stateHistory);
1298 if (biasHistory.pointState.size() != points_.size())
1300 GMX_THROW(InvalidInputError("Bias grid size in checkpoint and simulation do not match. Likely you provided a checkpoint from a different simulation."));
1302 for (size_t m = 0; m < points_.size(); m++)
1304 points_[m].setFromHistory(biasHistory.pointState[m]);
1307 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1309 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1312 histogramSize_.restoreFromHistory(stateHistory);
1314 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1315 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1318 void BiasState::broadcast(const t_commrec *commRecord)
1320 gmx_bcast(sizeof(coordState_), &coordState_, commRecord);
1322 gmx_bcast(points_.size()*sizeof(PointState), points_.data(), commRecord);
1324 gmx_bcast(weightSumCovering_.size()*sizeof(double), weightSumCovering_.data(), commRecord);
1326 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord);
1329 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams> &dimParams,
1330 const Grid &grid)
1332 std::vector<float> convolvedPmf;
1334 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1336 for (size_t m = 0; m < points_.size(); m++)
1338 points_[m].setFreeEnergy(convolvedPmf[m]);
1342 /*! \brief
1343 * Count trailing data rows containing only zeros.
1345 * \param[in] data 2D data array.
1346 * \param[in] numRows Number of rows in array.
1347 * \param[in] numColumns Number of cols in array.
1348 * \returns the number of trailing zero rows.
1350 static int countTrailingZeroRows(const double* const *data,
1351 int numRows,
1352 int numColumns)
1354 int numZeroRows = 0;
1355 for (int m = numRows - 1; m >= 0; m--)
1357 bool rowIsZero = true;
1358 for (int d = 0; d < numColumns; d++)
1360 if (data[d][m] != 0)
1362 rowIsZero = false;
1363 break;
1367 if (!rowIsZero)
1369 /* At a row with non-zero data */
1370 break;
1372 else
1374 /* Still at a zero data row, keep checking rows higher up. */
1375 numZeroRows++;
1379 return numZeroRows;
1382 /*! \brief
1383 * Initializes the PMF and target with data read from an input table.
1385 * \param[in] dimParams The dimension parameters.
1386 * \param[in] grid The grid.
1387 * \param[in] filename The filename to read PMF and target from.
1388 * \param[in] numBias Number of biases.
1389 * \param[in] biasIndex The index of the bias.
1390 * \param[in,out] pointState The state of the points in this bias.
1392 static void readUserPmfAndTargetDistribution(const std::vector<DimParams> &dimParams,
1393 const Grid &grid,
1394 const std::string &filename,
1395 int numBias,
1396 int biasIndex,
1397 std::vector<PointState> *pointState)
1399 /* Read the PMF and target distribution.
1400 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1401 base on the force constant. The free energy and target together determine the bias.
1403 std::string filenameModified(filename);
1404 if (numBias > 1)
1406 size_t n = filenameModified.rfind(".");
1407 GMX_RELEASE_ASSERT(n != std::string::npos, "The filename should contain an extension starting with .");
1408 filenameModified.insert(n, formatString("%d", biasIndex));
1411 std::string correctFormatMessage =
1412 formatString("%s is expected in the following format. "
1413 "The first ndim column(s) should contain the coordinate values for each point, "
1414 "each column containing values of one dimension (in ascending order). "
1415 "For a multidimensional coordinate, points should be listed "
1416 "in the order obtained by traversing lower dimensions first. "
1417 "E.g. for two-dimensional grid of size nxn: "
1418 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1419 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1420 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1421 "Make sure the input file ends with a new line but has no trailing new lines.",
1422 filename.c_str());
1423 gmx::TextLineWrapper wrapper;
1424 wrapper.settings().setLineLength(c_linewidth);
1425 correctFormatMessage = wrapper.wrapToString(correctFormatMessage).c_str();
1427 double **data;
1428 int numColumns;
1429 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1431 /* Check basic data properties here. Grid takes care of more complicated things. */
1433 if (numRows <= 0)
1435 std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(), correctFormatMessage.c_str());
1436 GMX_THROW(InvalidInputError(mesg));
1439 /* Less than 2 points is not useful for PMF or target. */
1440 if (numRows < 2)
1442 std::string mesg =
1443 gmx::formatString("%s contains too few data points (%d)."
1444 "The minimum number of points is 2.",
1445 filename.c_str(), numRows);
1446 GMX_THROW(InvalidInputError(mesg));
1449 /* Make sure there are enough columns of data.
1451 Two formats are allowed. Either with columns {coords, PMF, target} or
1452 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1453 is how AWH output is written (x, y, z being other AWH variables). For this format,
1454 trailing columns are ignored.
1456 int columnIndexTarget;
1457 int numColumnsMin = dimParams.size() + 2;
1458 int columnIndexPmf = dimParams.size();
1459 if (numColumns == numColumnsMin)
1461 columnIndexTarget = columnIndexPmf + 1;
1463 else
1465 columnIndexTarget = columnIndexPmf + 4;
1468 if (numColumns < numColumnsMin)
1470 std::string mesg =
1471 gmx::formatString("The number of columns in %s (%d) should be at least %d."
1472 "\n\n%s",
1473 filename.c_str(), correctFormatMessage.c_str());
1474 GMX_THROW(InvalidInputError(mesg));
1477 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1478 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1479 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1480 if (numZeroRows > 1)
1482 std::string mesg = gmx::formatString("Found %d trailing zero data rows in %s. Please remove trailing empty lines and try again.",
1483 numZeroRows, filename.c_str());
1484 GMX_THROW(InvalidInputError(mesg));
1487 /* Convert from user units to internal units before sending the data of to grid. */
1488 for (size_t d = 0; d < dimParams.size(); d++)
1490 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1491 if (scalingFactor == 1)
1493 continue;
1495 for (size_t m = 0; m < pointState->size(); m++)
1497 data[d][m] *= scalingFactor;
1501 /* Get a data point for each AWH grid point so that they all get data. */
1502 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1503 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1505 /* Extract the data for each grid point.
1506 * We check if the target distribution is zero for all points.
1508 bool targetDistributionIsZero = true;
1509 for (size_t m = 0; m < pointState->size(); m++)
1511 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1512 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1514 /* Check if the values are allowed. */
1515 if (target < 0)
1517 std::string mesg = gmx::formatString("Target distribution weight at point %d (%g) in %s is negative.",
1518 m, target, filename.c_str());
1519 GMX_THROW(InvalidInputError(mesg));
1521 if (target > 0)
1523 targetDistributionIsZero = false;
1525 (*pointState)[m].setTargetConstantWeight(target);
1528 if (targetDistributionIsZero)
1530 std::string mesg = gmx::formatString("The target weights given in column %d in %s are all 0",
1531 filename.c_str(), columnIndexTarget);
1532 GMX_THROW(InvalidInputError(mesg));
1535 /* Free the arrays. */
1536 for (int m = 0; m < numColumns; m++)
1538 sfree(data[m]);
1540 sfree(data);
1543 void BiasState::normalizePmf(int numSharingSims)
1545 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1546 Approximately (for large enough force constant) we should have:
1547 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1550 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1551 double expSumPmf = 0;
1552 double expSumF = 0;
1553 for (const PointState &pointState : points_)
1555 if (pointState.inTargetRegion())
1557 expSumPmf += std::exp( pointState.logPmfSum());
1558 expSumF += std::exp(-pointState.freeEnergy());
1561 double numSamples = histogramSize_.histogramSize()/numSharingSims;
1563 /* Renormalize */
1564 double logRenorm = std::log(numSamples*expSumF/expSumPmf);
1565 for (PointState &pointState : points_)
1567 if (pointState.inTargetRegion())
1569 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1574 void BiasState::initGridPointState(const AwhBiasParams &awhBiasParams,
1575 const std::vector<DimParams> &dimParams,
1576 const Grid &grid,
1577 const BiasParams &params,
1578 const std::string &filename,
1579 int numBias)
1581 /* Modify PMF, free energy and the constant target distribution factor
1582 * to user input values if there is data given.
1584 if (awhBiasParams.bUserData)
1586 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1587 setFreeEnergyToConvolvedPmf(dimParams, grid);
1590 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1591 GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN ||
1592 points_[0].weightSumRef() != 0,
1593 "AWH reference weight histogram not initialized properly with local Boltzmann target distribution.");
1595 updateTargetDistribution(points_, params);
1597 for (PointState &pointState : points_)
1599 if (pointState.inTargetRegion())
1601 pointState.updateBias();
1603 else
1605 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1606 pointState.setTargetToZero();
1610 /* Set the initial reference weighthistogram. */
1611 const double histogramSize = histogramSize_.histogramSize();
1612 for (auto &pointState : points_)
1614 pointState.setInitialReferenceWeightHistogram(histogramSize);
1617 /* Make sure the pmf is normalized consistently with the histogram size.
1618 Note: the target distribution and free energy need to be set here. */
1619 normalizePmf(params.numSharedUpdate);
1622 BiasState::BiasState(const AwhBiasParams &awhBiasParams,
1623 double histogramSizeInitial,
1624 const std::vector<DimParams> &dimParams,
1625 const Grid &grid) :
1626 coordState_(awhBiasParams, dimParams, grid),
1627 points_(grid.numPoints()),
1628 weightSumCovering_(grid.numPoints()),
1629 histogramSize_(awhBiasParams, histogramSizeInitial)
1631 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1632 for (size_t d = 0; d < dimParams.size(); d++)
1634 int index = grid.point(coordState_.gridpointIndex()).index[d];
1635 originUpdatelist_[d] = index;
1636 endUpdatelist_[d] = index;
1640 } // namespace gmx