Spelling fixes
[gromacs.git] / src / gromacs / trajectoryanalysis / modules / rdf.cpp
blob4393d15cd96ee3552c3c0df58f73f9a6cc4dac32
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2016, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \internal \file
38 * \brief
39 * Implements gmx::analysismodules::Rdf.
41 * \author Teemu Murtola <teemu.murtola@gmail.com> (C++ conversion)
42 * \ingroup module_trajectoryanalysis
44 #include "gmxpre.h"
46 #include "rdf.h"
48 #include <cmath>
50 #include <algorithm>
51 #include <limits>
52 #include <string>
53 #include <vector>
55 #include "gromacs/analysisdata/analysisdata.h"
56 #include "gromacs/analysisdata/modules/average.h"
57 #include "gromacs/analysisdata/modules/histogram.h"
58 #include "gromacs/analysisdata/modules/plot.h"
59 #include "gromacs/fileio/trx.h"
60 #include "gromacs/math/utilities.h"
61 #include "gromacs/math/vec.h"
62 #include "gromacs/options/basicoptions.h"
63 #include "gromacs/options/filenameoption.h"
64 #include "gromacs/options/options.h"
65 #include "gromacs/pbcutil/pbc.h"
66 #include "gromacs/selection/nbsearch.h"
67 #include "gromacs/selection/selection.h"
68 #include "gromacs/selection/selectionoption.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/trajectoryanalysis/analysismodule.h"
71 #include "gromacs/trajectoryanalysis/analysissettings.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/stringutil.h"
75 namespace gmx
78 namespace analysismodules
81 namespace
84 //! \addtogroup module_trajectoryanalysis
85 //! \{
87 /********************************************************************
88 * Actual analysis module
91 /*! \brief
92 * Implements `gmx rdf` trajectory analysis module.
94 class Rdf : public TrajectoryAnalysisModule
96 public:
97 Rdf();
99 virtual void initOptions(Options *options,
100 TrajectoryAnalysisSettings *settings);
101 virtual void optionsFinished(Options *options,
102 TrajectoryAnalysisSettings *settings);
103 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
104 const TopologyInformation &top);
105 virtual void initAfterFirstFrame(const TrajectoryAnalysisSettings &settings,
106 const t_trxframe &fr);
108 virtual TrajectoryAnalysisModuleDataPointer startFrames(
109 const AnalysisDataParallelOptions &opt,
110 const SelectionCollection &selections);
111 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
112 TrajectoryAnalysisModuleData *pdata);
114 virtual void finishAnalysis(int nframes);
115 virtual void writeOutput();
117 private:
118 std::string fnRdf_;
119 std::string fnCumulative_;
120 std::string surface_;
121 AnalysisDataPlotSettings plotSettings_;
123 /*! \brief
124 * Reference selection to compute RDFs around.
126 * With -surf, Selection::originalIds() and Selection::mappedIds()
127 * store the index of the surface group to which that position belongs.
128 * The RDF is computed by finding the nearest position from each
129 * surface group for each position, and then binning those distances.
131 Selection refSel_;
132 /*! \brief
133 * Selections to compute RDFs for.
135 SelectionList sel_;
137 /*! \brief
138 * Raw pairwise distance data from which the RDF is computed.
140 * There is a data set for each selection in `sel_`, with a single
141 * column. Each point set will contain a single pairwise distance
142 * that contributes to the RDF.
144 AnalysisData pairDist_;
145 /*! \brief
146 * Normalization factors for each frame.
148 * The first column contains the number of positions in `refSel_` for
149 * that frame (with surface RDF, the number of groups). There are
150 * `sel_.size()` more columns, each containing the number density of
151 * positions for one selection.
153 AnalysisData normFactors_;
154 /*! \brief
155 * Histogram module that computes the actual RDF from `pairDist_`.
157 * The per-frame histograms are raw pair counts in each bin;
158 * the averager is normalized by the average number of reference
159 * positions (average of the first column of `normFactors_`).
161 AnalysisDataSimpleHistogramModulePointer pairCounts_;
162 /*! \brief
163 * Average normalization factors.
165 AnalysisDataAverageModulePointer normAve_;
166 //! Neighborhood search with `refSel_` as the reference positions.
167 AnalysisNeighborhood nb_;
169 // User input options.
170 double binwidth_;
171 double cutoff_;
172 double rmax_;
173 bool bNormalize_;
174 bool bXY_;
175 bool bExclusions_;
177 // Pre-computed values for faster access during analysis.
178 real cut2_;
179 real rmax2_;
180 int surfaceGroupCount_;
182 // Copy and assign disallowed by base.
185 Rdf::Rdf()
186 : TrajectoryAnalysisModule(RdfInfo::name, RdfInfo::shortDescription),
187 pairCounts_(new AnalysisDataSimpleHistogramModule()),
188 normAve_(new AnalysisDataAverageModule()),
189 binwidth_(0.002), cutoff_(0.0), rmax_(0.0),
190 bNormalize_(true), bXY_(false), bExclusions_(false),
191 cut2_(0.0), rmax2_(0.0), surfaceGroupCount_(0)
193 pairDist_.setMultipoint(true);
194 pairDist_.addModule(pairCounts_);
195 registerAnalysisDataset(&pairDist_, "pairdist");
196 registerBasicDataset(pairCounts_.get(), "paircount");
198 normFactors_.addModule(normAve_);
199 registerAnalysisDataset(&normFactors_, "norm");
202 void
203 Rdf::initOptions(Options *options, TrajectoryAnalysisSettings * /*settings*/)
205 static const char *const desc[] = {
206 "[THISMODULE] calculates radial distribution functions from one",
207 "reference set of position (set with [TT]-ref[tt]) to one or more",
208 "sets of positions (set with [TT]-sel[tt]). To compute the RDF with",
209 "respect to the closest position in a set in [TT]-ref[tt] instead, use",
210 "[TT]-surf[tt]: if set, then [TT]-ref[tt] is partitioned into sets",
211 "based on the value of [TT]-surf[tt], and the closest position in each",
212 "set is used. To compute the RDF around axes parallel to the",
213 "[IT]z[it]-axis, i.e., only in the [IT]x[it]-[IT]y[it] plane, use",
214 "[TT]-xy[tt].[PAR]",
215 "To set the bin width and maximum distance to use in the RDF, use",
216 "[TT]-bin[tt] and [TT]-rmax[tt], respectively. The latter can be",
217 "used to limit the computational cost if the RDF is not of interest",
218 "up to the default (half of the box size with PBC, three times the",
219 "box size without PBC).[PAR]",
220 "To use exclusions from the topology ([TT]-s[tt]), set [TT]-excl[tt]",
221 "and ensure that both [TT]-ref[tt] and [TT]-sel[tt] only select atoms.",
222 "A rougher alternative to exclude intra-molecular peaks is to set",
223 "[TT]-cut[tt] to a non-zero value to clear the RDF at small",
224 "distances.[PAR]",
225 "The RDFs are normalized by 1) average number of positions in",
226 "[TT]-ref[tt] (the number of groups with [TT]-surf[tt]), 2) volume",
227 "of the bin, and 3) average particle density of [TT]-sel[tt] positions",
228 "for that selection. To only use the first factor for normalization,",
229 "set [TT]-nonorm[tt]. In this case, the RDF is only scaled with the",
230 "bin width to make the integral of the curve represent the number of",
231 "pairs within a range. Note that exclusions do not affect the",
232 "normalization: even if [TT]-excl[tt] is set, or [TT]-ref[tt] and",
233 "[TT]-sel[tt] contain the same selection, the normalization factor",
234 "is still N*M, not N*(M-excluded).[PAR]",
235 "For [TT]-surf[tt], the selection provided to [TT]-ref[tt] must",
236 "select atoms, i.e., centers of mass are not supported. Further,",
237 "[TT]-nonorm[tt] is implied, as the bins have irregular shapes and",
238 "the volume of a bin is not easily computable.[PAR]",
239 "Option [TT]-cn[tt] produces the cumulative number RDF,",
240 "i.e. the average number of particles within a distance r.[PAR]"
243 options->setDescription(desc);
245 options->addOption(FileNameOption("o").filetype(eftPlot).outputFile().required()
246 .store(&fnRdf_).defaultBasename("rdf")
247 .description("Computed RDFs"));
248 options->addOption(FileNameOption("cn").filetype(eftPlot).outputFile()
249 .store(&fnCumulative_).defaultBasename("rdf_cn")
250 .description("Cumulative RDFs"));
252 options->addOption(DoubleOption("bin").store(&binwidth_)
253 .description("Bin width (nm)"));
254 options->addOption(BooleanOption("norm").store(&bNormalize_)
255 .description("Normalize for bin volume and density"));
256 options->addOption(BooleanOption("xy").store(&bXY_)
257 .description("Use only the x and y components of the distance"));
258 options->addOption(BooleanOption("excl").store(&bExclusions_)
259 .description("Use exclusions from topology"));
260 options->addOption(DoubleOption("cut").store(&cutoff_)
261 .description("Shortest distance (nm) to be considered"));
262 options->addOption(DoubleOption("rmax").store(&rmax_)
263 .description("Largest distance (nm) to calculate"));
265 const char *const cSurfaceEnum[] = { "no", "mol", "res" };
266 options->addOption(StringOption("surf").enumValue(cSurfaceEnum)
267 .defaultEnumIndex(0).store(&surface_)
268 .description("RDF with respect to the surface of the reference"));
270 options->addOption(SelectionOption("ref").store(&refSel_).required()
271 .description("Reference selection for RDF computation"));
272 options->addOption(SelectionOption("sel").storeVector(&sel_)
273 .required().multiValue()
274 .description("Selections to compute RDFs for from the reference"));
277 void
278 Rdf::optionsFinished(Options *options, TrajectoryAnalysisSettings *settings)
280 if (surface_ != "no")
282 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
284 if (options->isSet("norm") && bNormalize_)
286 GMX_THROW(InconsistentInputError("-surf cannot be combined with -norm"));
288 bNormalize_ = false;
289 if (bExclusions_)
291 GMX_THROW(InconsistentInputError("-surf cannot be combined with -excl"));
294 if (bExclusions_)
296 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
298 if (cutoff_ < 0.0)
300 cutoff_ = 0.0;
304 void
305 Rdf::initAnalysis(const TrajectoryAnalysisSettings &settings,
306 const TopologyInformation &top)
308 pairDist_.setDataSetCount(sel_.size());
309 for (size_t i = 0; i < sel_.size(); ++i)
311 pairDist_.setColumnCount(i, 1);
313 plotSettings_ = settings.plotSettings();
314 nb_.setXYMode(bXY_);
316 normFactors_.setColumnCount(0, sel_.size() + 1);
318 const bool bSurface = (surface_ != "no");
319 if (bSurface)
321 if (!refSel_.hasOnlyAtoms())
323 GMX_THROW(InconsistentInputError("-surf only works with -refsel that consists of atoms"));
325 const e_index_t type = (surface_ == "mol" ? INDEX_MOL : INDEX_RES);
326 surfaceGroupCount_ = refSel_.initOriginalIdsToGroup(top.topology(), type);
329 if (bExclusions_)
331 if (!refSel_.hasOnlyAtoms())
333 GMX_THROW(InconsistentInputError("-excl only works with selections that consist of atoms"));
335 for (size_t i = 0; i < sel_.size(); ++i)
337 if (!sel_[i].hasOnlyAtoms())
339 GMX_THROW(InconsistentInputError("-excl only works with selections that consist of atoms"));
342 const t_topology *topology = top.topology();
343 if (topology->excls.nr == 0)
345 GMX_THROW(InconsistentInputError("-excl is set, but the file provided to -s does not define exclusions"));
347 nb_.setTopologyExclusions(&topology->excls);
351 void
352 Rdf::initAfterFirstFrame(const TrajectoryAnalysisSettings &settings,
353 const t_trxframe &fr)
355 // If -rmax is not provided, determine one from the box for the first frame.
356 if (rmax_ <= 0.0)
358 matrix box;
359 copy_mat(fr.box, box);
360 if (settings.hasPBC())
362 if (bXY_)
364 box[ZZ][ZZ] = 2*std::max(box[XX][XX], box[YY][YY]);
366 rmax_ = std::sqrt(0.99*0.99*max_cutoff2(bXY_ ? epbcXY : epbcXYZ, box));
368 else
370 if (bXY_)
372 clear_rvec(box[ZZ]);
374 rmax_ = 3*std::max(box[XX][XX], std::max(box[YY][YY], box[ZZ][ZZ]));
377 cut2_ = sqr(cutoff_);
378 rmax2_ = sqr(rmax_);
379 nb_.setCutoff(rmax_);
380 // We use the double amount of bins, so we can correctly
381 // write the rdf and rdf_cn output at i*binwidth values.
382 pairCounts_->init(histogramFromRange(0.0, rmax_).binWidth(binwidth_ / 2.0));
385 /*! \brief
386 * Temporary memory for use within a single-frame calculation.
388 class RdfModuleData : public TrajectoryAnalysisModuleData
390 public:
391 /*! \brief
392 * Reserves memory for the frame-local data.
394 * `surfaceGroupCount` will be zero if -surf is not specified.
396 RdfModuleData(TrajectoryAnalysisModule *module,
397 const AnalysisDataParallelOptions &opt,
398 const SelectionCollection &selections,
399 int surfaceGroupCount)
400 : TrajectoryAnalysisModuleData(module, opt, selections)
402 surfaceDist2_.resize(surfaceGroupCount);
405 virtual void finish() { finishDataHandles(); }
407 /*! \brief
408 * Minimum distance to each surface group.
410 * One entry for each group (residue/molecule, per -surf) in the
411 * reference selection.
412 * This is needed to support neighborhood searching, which may not
413 * return the reference positions in order: for each position, we need
414 * to search through all the reference positions and update this array
415 * to find the minimum distance to each surface group, and then compute
416 * the RDF from these numbers.
418 std::vector<real> surfaceDist2_;
421 TrajectoryAnalysisModuleDataPointer Rdf::startFrames(
422 const AnalysisDataParallelOptions &opt,
423 const SelectionCollection &selections)
425 return TrajectoryAnalysisModuleDataPointer(
426 new RdfModuleData(this, opt, selections, surfaceGroupCount_));
429 void
430 Rdf::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
431 TrajectoryAnalysisModuleData *pdata)
433 AnalysisDataHandle dh = pdata->dataHandle(pairDist_);
434 AnalysisDataHandle nh = pdata->dataHandle(normFactors_);
435 const Selection &refSel = pdata->parallelSelection(refSel_);
436 const SelectionList &sel = pdata->parallelSelections(sel_);
437 RdfModuleData &frameData = *static_cast<RdfModuleData *>(pdata);
438 const bool bSurface = !frameData.surfaceDist2_.empty();
440 matrix boxForVolume;
441 copy_mat(fr.box, boxForVolume);
442 if (bXY_)
444 // Set z-size to 1 so we get the surface are iso the volume
445 clear_rvec(boxForVolume[ZZ]);
446 boxForVolume[ZZ][ZZ] = 1;
448 const real inverseVolume = 1.0 / det(boxForVolume);
450 nh.startFrame(frnr, fr.time);
451 // Compute the normalization factor for the number of reference positions.
452 if (bSurface)
454 if (refSel.isDynamic())
456 // Count the number of distinct groups.
457 // This assumes that each group is continuous, which is currently
458 // the case.
459 int count = 0;
460 int prevId = -1;
461 for (int i = 0; i < refSel.posCount(); ++i)
463 const int id = refSel.position(i).mappedId();
464 if (id != prevId)
466 ++count;
467 prevId = id;
470 nh.setPoint(0, count);
472 else
474 nh.setPoint(0, surfaceGroupCount_);
477 else
479 nh.setPoint(0, refSel.posCount());
482 dh.startFrame(frnr, fr.time);
483 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
484 for (size_t g = 0; g < sel.size(); ++g)
486 dh.selectDataSet(g);
488 if (bSurface)
490 // Special loop for surface calculation, where a separate neighbor
491 // search is done for each position in the selection, and the
492 // nearest position from each surface group is tracked.
493 std::vector<real> &surfaceDist2 = frameData.surfaceDist2_;
494 for (int i = 0; i < sel[g].posCount(); ++i)
496 std::fill(surfaceDist2.begin(), surfaceDist2.end(),
497 std::numeric_limits<real>::max());
498 AnalysisNeighborhoodPairSearch pairSearch =
499 nbsearch.startPairSearch(sel[g].position(i));
500 AnalysisNeighborhoodPair pair;
501 while (pairSearch.findNextPair(&pair))
503 const real r2 = pair.distance2();
504 const int refId = refSel.position(pair.refIndex()).mappedId();
505 if (r2 < surfaceDist2[refId])
507 surfaceDist2[refId] = r2;
510 // Accumulate the RDF from the distances to the surface.
511 for (size_t i = 0; i < surfaceDist2.size(); ++i)
513 const real r2 = surfaceDist2[i];
514 // Here, we need to check for rmax, since the value might
515 // be above the cutoff if no points were close to some
516 // surface positions.
517 if (r2 > cut2_ && r2 <= rmax2_)
519 dh.setPoint(0, std::sqrt(r2));
520 dh.finishPointSet();
525 else
527 // Standard neighborhood search over all pairs within the cutoff
528 // for the -surf no case.
529 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
530 AnalysisNeighborhoodPair pair;
531 while (pairSearch.findNextPair(&pair))
533 const real r2 = pair.distance2();
534 if (r2 > cut2_)
536 // TODO: Consider whether the histogramming could be done with
537 // less overhead (after first measuring the overhead).
538 dh.setPoint(0, std::sqrt(r2));
539 dh.finishPointSet();
543 // Normalization factor for the number density (only used without
544 // -surf, but does not hurt to populate otherwise).
545 nh.setPoint(g + 1, sel[g].posCount() * inverseVolume);
547 dh.finishFrame();
548 nh.finishFrame();
551 void
552 Rdf::finishAnalysis(int /*nframes*/)
554 // Normalize the averager with the number of reference positions,
555 // from where the normalization propagates to all the output.
556 const real refPosCount = normAve_->average(0, 0);
557 pairCounts_->averager().scaleAll(1.0 / refPosCount);
558 pairCounts_->averager().done();
560 // TODO: Consider how these could be exposed to the testing framework
561 // through the dataset registration mechanism.
562 AverageHistogramPointer finalRdf
563 = pairCounts_->averager().resampleDoubleBinWidth(true);
565 if (bNormalize_)
567 // Normalize by the volume of the bins (volume of sphere segments or
568 // length of circle segments).
569 std::vector<real> invBinVolume;
570 const int nbin = finalRdf->settings().binCount();
571 invBinVolume.resize(nbin);
572 real prevSphereVolume = 0.0;
573 for (int i = 0; i < nbin; ++i)
575 const real r = (i + 0.5)*binwidth_;
576 real sphereVolume;
577 if (bXY_)
579 sphereVolume = M_PI*r*r;
581 else
583 sphereVolume = (4.0/3.0)*M_PI*r*r*r;
585 const real binVolume = sphereVolume - prevSphereVolume;
586 invBinVolume[i] = 1.0 / binVolume;
587 prevSphereVolume = sphereVolume;
589 finalRdf->scaleAllByVector(&invBinVolume[0]);
591 // Normalize by particle density.
592 for (size_t g = 0; g < sel_.size(); ++g)
594 finalRdf->scaleSingle(g, 1.0 / normAve_->average(0, g + 1));
597 else
599 // With -nonorm, just scale with bin width to make the integral of the
600 // curve (instead of raw bin sum) represent the pair count.
601 finalRdf->scaleAll(1.0 / binwidth_);
603 finalRdf->done();
605 // TODO: Consider if some of this should be done in writeOutput().
607 AnalysisDataPlotModulePointer plotm(
608 new AnalysisDataPlotModule(plotSettings_));
609 plotm->setFileName(fnRdf_);
610 plotm->setTitle("Radial distribution");
611 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
612 plotm->setXLabel("r (nm)");
613 plotm->setYLabel("g(r)");
614 for (size_t i = 0; i < sel_.size(); ++i)
616 plotm->appendLegend(sel_[i].name());
618 finalRdf->addModule(plotm);
621 if (!fnCumulative_.empty())
623 AverageHistogramPointer cumulativeRdf
624 = pairCounts_->averager().resampleDoubleBinWidth(false);
625 cumulativeRdf->makeCumulative();
626 cumulativeRdf->done();
628 AnalysisDataPlotModulePointer plotm(
629 new AnalysisDataPlotModule(plotSettings_));
630 plotm->setFileName(fnCumulative_);
631 plotm->setTitle("Cumulative Number RDF");
632 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
633 plotm->setXLabel("r (nm)");
634 plotm->setYLabel("number");
635 for (size_t i = 0; i < sel_.size(); ++i)
637 plotm->appendLegend(sel_[i].name());
639 cumulativeRdf->addModule(plotm);
643 void
644 Rdf::writeOutput()
648 //! \}
650 } // namespace
652 const char RdfInfo::name[] = "rdf";
653 const char RdfInfo::shortDescription[] =
654 "Calculate radial distribution functions";
656 TrajectoryAnalysisModulePointer RdfInfo::create()
658 return TrajectoryAnalysisModulePointer(new Rdf);
661 } // namespace analysismodules
663 } // namespace gmx