Split TopologyInformation into its own header
[gromacs.git] / src / gromacs / trajectoryanalysis / modules / pairdist.cpp
blob843e36655bc194955223092affae27203d664596
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
36 * \brief
37 * Implements gmx::analysismodules::PairDistance.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
42 #include "gmxpre.h"
44 #include "pairdist.h"
46 #include <cmath>
48 #include <algorithm>
49 #include <limits>
50 #include <string>
51 #include <vector>
53 #include "gromacs/analysisdata/analysisdata.h"
54 #include "gromacs/analysisdata/modules/plot.h"
55 #include "gromacs/options/basicoptions.h"
56 #include "gromacs/options/filenameoption.h"
57 #include "gromacs/options/ioptionscontainer.h"
58 #include "gromacs/selection/nbsearch.h"
59 #include "gromacs/selection/selection.h"
60 #include "gromacs/selection/selectionoption.h"
61 #include "gromacs/trajectory/trajectoryframe.h"
62 #include "gromacs/trajectoryanalysis/analysissettings.h"
63 #include "gromacs/trajectoryanalysis/topologyinformation.h"
64 #include "gromacs/utility/arrayref.h"
65 #include "gromacs/utility/exceptions.h"
66 #include "gromacs/utility/stringutil.h"
68 namespace gmx
71 namespace analysismodules
74 namespace
77 //! \addtogroup module_trajectoryanalysis
78 //! \{
80 //! Enum value to store the selected value for `-type`.
81 enum DistanceType
83 eDistanceType_Min,
84 eDistanceType_Max
87 //! Enum value to store the selected value for `-refgrouping`/`-selgrouping`.
88 enum GroupType
90 eGroupType_All,
91 eGroupType_Residue,
92 eGroupType_Molecule,
93 eGroupType_None
96 //! Strings corresponding to DistanceType.
97 const char *const c_distanceTypes[] = { "min", "max" };
98 //! Strings corresponding to GroupType.
99 const char *const c_groupTypes[] = { "all", "res", "mol", "none" };
101 /*! \brief
102 * Implements `gmx pairdist` trajectory analysis module.
104 class PairDistance : public TrajectoryAnalysisModule
106 public:
107 PairDistance();
109 virtual void initOptions(IOptionsContainer *options,
110 TrajectoryAnalysisSettings *settings);
111 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
112 const TopologyInformation &top);
114 virtual TrajectoryAnalysisModuleDataPointer startFrames(
115 const AnalysisDataParallelOptions &opt,
116 const SelectionCollection &selections);
117 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
118 TrajectoryAnalysisModuleData *pdata);
120 virtual void finishAnalysis(int nframes);
121 virtual void writeOutput();
123 private:
124 /*! \brief
125 * Computed distances as a function of time.
127 * There is one data set for each selection in `sel_`.
128 * Within each data set, there is one column for each distance to be
129 * computed, as explained in the `-h` text.
131 AnalysisData distances_;
133 /*! \brief
134 * Reference selection to compute distances to.
136 * mappedId() identifies the group (of type `refGroupType_`) into which
137 * each position belogs.
139 Selection refSel_;
140 /*! \brief
141 * Selections to compute distances from.
143 * mappedId() identifies the group (of type `selGroupType_`) into which
144 * each position belogs.
146 SelectionList sel_;
148 std::string fnDist_;
150 double cutoff_;
151 DistanceType distanceType_;
152 GroupType refGroupType_;
153 GroupType selGroupType_;
155 //! Number of groups in `refSel_`.
156 int refGroupCount_;
157 //! Maximum number of pairs of groups for one selection.
158 int maxGroupCount_;
159 //! Initial squared distance for distance accumulation.
160 real initialDist2_;
161 //! Cutoff squared for use in the actual calculation.
162 real cutoff2_;
164 //! Neighborhood search object for the pair search.
165 AnalysisNeighborhood nb_;
167 // Copy and assign disallowed by base.
170 PairDistance::PairDistance()
171 : cutoff_(0.0), distanceType_(eDistanceType_Min),
172 refGroupType_(eGroupType_All), selGroupType_(eGroupType_All),
173 refGroupCount_(0), maxGroupCount_(0), initialDist2_(0.0), cutoff2_(0.0)
175 registerAnalysisDataset(&distances_, "dist");
179 void
180 PairDistance::initOptions(IOptionsContainer *options, TrajectoryAnalysisSettings *settings)
182 static const char *const desc[] = {
183 "[THISMODULE] calculates pairwise distances between one reference",
184 "selection (given with [TT]-ref[tt]) and one or more other selections",
185 "(given with [TT]-sel[tt]). It can calculate either the minimum",
186 "distance (the default), or the maximum distance (with",
187 "[TT]-type max[tt]). Distances to each selection provided with",
188 "[TT]-sel[tt] are computed independently.[PAR]",
189 "By default, the global minimum/maximum distance is computed.",
190 "To compute more distances (e.g., minimum distances to each residue",
191 "in [TT]-ref[tt]), use [TT]-refgrouping[tt] and/or [TT]-selgrouping[tt]",
192 "to specify how the positions within each selection should be",
193 "grouped.[PAR]",
194 "Computed distances are written to the file specified with [TT]-o[tt].",
195 "If there are N groups in [TT]-ref[tt] and M groups in the first",
196 "selection in [TT]-sel[tt], then the output contains N*M columns",
197 "for the first selection. The columns contain distances like this:",
198 "r1-s1, r2-s1, ..., r1-s2, r2-s2, ..., where rn is the n'th group",
199 "in [TT]-ref[tt] and sn is the n'th group in the other selection.",
200 "The distances for the second selection comes as separate columns",
201 "after the first selection, and so on. If some selections are",
202 "dynamic, only the selected positions are used in the computation",
203 "but the same number of columns is always written out. If there",
204 "are no positions contributing to some group pair, then the cutoff",
205 "value is written (see below).[PAR]",
206 "[TT]-cutoff[tt] sets a cutoff for the computed distances.",
207 "If the result would contain a distance over the cutoff, the cutoff",
208 "value is written to the output file instead. By default, no cutoff",
209 "is used, but if you are not interested in values beyond a cutoff,",
210 "or if you know that the minimum distance is smaller than a cutoff,",
211 "you should set this option to allow the tool to use grid-based",
212 "searching and be significantly faster.[PAR]",
213 "If you want to compute distances between fixed pairs,",
214 "[gmx-distance] may be a more suitable tool."
217 settings->setHelpText(desc);
219 options->addOption(FileNameOption("o").filetype(eftPlot).outputFile().required()
220 .store(&fnDist_).defaultBasename("dist")
221 .description("Distances as function of time"));
223 options->addOption(DoubleOption("cutoff").store(&cutoff_)
224 .description("Maximum distance to consider"));
225 options->addOption(EnumOption<DistanceType>("type").store(&distanceType_)
226 .enumValue(c_distanceTypes)
227 .description("Type of distances to calculate"));
228 options->addOption(EnumOption<GroupType>("refgrouping").store(&refGroupType_)
229 .enumValue(c_groupTypes)
230 .description("Grouping of -ref positions to compute the min/max over"));
231 options->addOption(EnumOption<GroupType>("selgrouping").store(&selGroupType_)
232 .enumValue(c_groupTypes)
233 .description("Grouping of -sel positions to compute the min/max over"));
235 options->addOption(SelectionOption("ref").store(&refSel_).required()
236 .description("Reference positions to calculate distances from"));
237 options->addOption(SelectionOption("sel").storeVector(&sel_).required().multiValue()
238 .description("Positions to calculate distances for"));
241 //! Helper function to initialize the grouping for a selection.
242 int initSelectionGroups(Selection *sel, const gmx_mtop_t *top, int type)
244 e_index_t indexType = INDEX_UNKNOWN;
245 switch (type)
247 case eGroupType_All: indexType = INDEX_ALL; break;
248 case eGroupType_Residue: indexType = INDEX_RES; break;
249 case eGroupType_Molecule: indexType = INDEX_MOL; break;
250 case eGroupType_None: indexType = INDEX_ATOM; break;
252 return sel->initOriginalIdsToGroup(top, indexType);
256 void
257 PairDistance::initAnalysis(const TrajectoryAnalysisSettings &settings,
258 const TopologyInformation &top)
260 refGroupCount_ = initSelectionGroups(&refSel_, top.mtop(), refGroupType_);
262 maxGroupCount_ = 0;
263 distances_.setDataSetCount(sel_.size());
264 for (size_t i = 0; i < sel_.size(); ++i)
266 const int selGroupCount
267 = initSelectionGroups(&sel_[i], top.mtop(), selGroupType_);
268 const int columnCount = refGroupCount_ * selGroupCount;
269 maxGroupCount_ = std::max(maxGroupCount_, columnCount);
270 distances_.setColumnCount(i, columnCount);
273 if (!fnDist_.empty())
275 AnalysisDataPlotModulePointer plotm(
276 new AnalysisDataPlotModule(settings.plotSettings()));
277 plotm->setFileName(fnDist_);
278 if (distanceType_ == eDistanceType_Max)
280 plotm->setTitle("Maximum distance");
282 else
284 plotm->setTitle("Minimum distance");
286 // TODO: Figure out and add a descriptive subtitle and/or a longer
287 // title and/or better legends based on the grouping and the reference
288 // selection.
289 plotm->setXAxisIsTime();
290 plotm->setYLabel("Distance (nm)");
291 for (size_t g = 0; g < sel_.size(); ++g)
293 plotm->appendLegend(sel_[g].name());
295 distances_.addModule(plotm);
298 nb_.setCutoff(cutoff_);
299 if (cutoff_ <= 0.0)
301 cutoff_ = 0.0;
302 initialDist2_ = std::numeric_limits<real>::max();
304 else
306 initialDist2_ = cutoff_ * cutoff_;
308 if (distanceType_ == eDistanceType_Max)
310 initialDist2_ = 0.0;
312 cutoff2_ = cutoff_ * cutoff_;
315 /*! \brief
316 * Temporary memory for use within a single-frame calculation.
318 class PairDistanceModuleData : public TrajectoryAnalysisModuleData
320 public:
321 /*! \brief
322 * Reserves memory for the frame-local data.
324 PairDistanceModuleData(TrajectoryAnalysisModule *module,
325 const AnalysisDataParallelOptions &opt,
326 const SelectionCollection &selections,
327 int refGroupCount,
328 const Selection &refSel,
329 int maxGroupCount)
330 : TrajectoryAnalysisModuleData(module, opt, selections)
332 distArray_.resize(maxGroupCount);
333 countArray_.resize(maxGroupCount);
334 refCountArray_.resize(refGroupCount);
335 if (!refSel.isDynamic())
337 initRefCountArray(refSel);
341 virtual void finish() { finishDataHandles(); }
343 /*! \brief
344 * Computes the number of positions in each group in \p refSel
345 * and stores them into `refCountArray_`.
347 void initRefCountArray(const Selection &refSel)
349 std::fill(refCountArray_.begin(), refCountArray_.end(), 0);
350 int refPos = 0;
351 while (refPos < refSel.posCount())
353 const int refIndex = refSel.position(refPos).mappedId();
354 const int startPos = refPos;
355 ++refPos;
356 while (refPos < refSel.posCount()
357 && refSel.position(refPos).mappedId() == refIndex)
359 ++refPos;
361 refCountArray_[refIndex] = refPos - startPos;
365 /*! \brief
366 * Squared distance between each group
368 * One entry for each group pair for the current selection.
369 * Enough memory is allocated to fit the largest calculation selection.
370 * This is needed to support neighborhood searching, which may not
371 * return the pairs in order: for each group pair, we need to search
372 * through all the position pairs and update this array to find the
373 * minimum/maximum distance between them.
375 std::vector<real> distArray_;
376 /*! \brief
377 * Number of pairs within the cutoff that have contributed to the value
378 * in `distArray_`.
380 * This is needed to identify whether there were any pairs inside the
381 * cutoff and whether there were additional pairs outside the cutoff
382 * that were not covered by the neihborhood search.
384 std::vector<int> countArray_;
385 /*! \brief
386 * Number of positions within each reference group.
388 * This is used to more efficiently compute the total number of pairs
389 * (for comparison with `countArray_`), as otherwise these numbers
390 * would need to be recomputed for each selection.
392 std::vector<int> refCountArray_;
395 TrajectoryAnalysisModuleDataPointer PairDistance::startFrames(
396 const AnalysisDataParallelOptions &opt,
397 const SelectionCollection &selections)
399 return TrajectoryAnalysisModuleDataPointer(
400 new PairDistanceModuleData(this, opt, selections, refGroupCount_,
401 refSel_, maxGroupCount_));
404 void
405 PairDistance::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
406 TrajectoryAnalysisModuleData *pdata)
408 AnalysisDataHandle dh = pdata->dataHandle(distances_);
409 const Selection &refSel = pdata->parallelSelection(refSel_);
410 const SelectionList &sel = pdata->parallelSelections(sel_);
411 PairDistanceModuleData &frameData = *static_cast<PairDistanceModuleData *>(pdata);
412 std::vector<real> &distArray = frameData.distArray_;
413 std::vector<int> &countArray = frameData.countArray_;
415 if (cutoff_ > 0.0 && refSel.isDynamic())
417 // Count the number of reference positions in each group, so that
418 // this does not need to be computed again for each selection.
419 // This is needed only if it is possible that the neighborhood search
420 // does not cover all the pairs, hence the cutoff > 0.0 check.
421 // If refSel is static, then the array contents are static as well,
422 // and it has been initialized in the constructor of the data object.
423 frameData.initRefCountArray(refSel);
425 const std::vector<int> &refCountArray = frameData.refCountArray_;
427 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
428 dh.startFrame(frnr, fr.time);
429 for (size_t g = 0; g < sel.size(); ++g)
431 const int columnCount = distances_.columnCount(g);
432 std::fill(distArray.begin(), distArray.begin() + columnCount, initialDist2_);
433 std::fill(countArray.begin(), countArray.begin() + columnCount, 0);
435 // Accumulate the number of position pairs within the cutoff and the
436 // min/max distance for each group pair.
437 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
438 AnalysisNeighborhoodPair pair;
439 while (pairSearch.findNextPair(&pair))
441 const SelectionPosition &refPos = refSel.position(pair.refIndex());
442 const SelectionPosition &selPos = sel[g].position(pair.testIndex());
443 const int refIndex = refPos.mappedId();
444 const int selIndex = selPos.mappedId();
445 const int index = selIndex * refGroupCount_ + refIndex;
446 const real r2 = pair.distance2();
447 if (distanceType_ == eDistanceType_Min)
449 if (distArray[index] > r2)
451 distArray[index] = r2;
454 else
456 if (distArray[index] < r2)
458 distArray[index] = r2;
461 ++countArray[index];
464 // If it is possible that positions outside the cutoff (or lack of
465 // them) affects the result, then we need to check whether there were
466 // any. This is necessary for two cases:
467 // - With max distances, if there are pairs outside the cutoff, then
468 // the computed distance should be equal to the cutoff instead of
469 // the largest distance that was found above.
470 // - With either distance type, if all pairs are outside the cutoff,
471 // then countArray must be updated so that the presence flag
472 // in the output data reflects the dynamic selection status, not
473 // whether something was inside the cutoff or not.
474 if (cutoff_ > 0.0)
476 int selPos = 0;
477 // Loop over groups in this selection (at start, selPos is always
478 // the first position in the next group).
479 while (selPos < sel[g].posCount())
481 // Count the number of positions in this group.
482 const int selIndex = sel[g].position(selPos).mappedId();
483 const int startPos = selPos;
484 ++selPos;
485 while (selPos < sel[g].posCount()
486 && sel[g].position(selPos).mappedId() == selIndex)
488 ++selPos;
490 const int count = selPos - startPos;
491 // Check all group pairs that contain this group.
492 for (int i = 0; i < refGroupCount_; ++i)
494 const int index = selIndex * refGroupCount_ + i;
495 const int totalCount = refCountArray[i] * count;
496 // If there were positions outside the cutoff,
497 // update the distance if necessary and the count.
498 if (countArray[index] < totalCount)
500 if (distanceType_ == eDistanceType_Max)
502 distArray[index] = cutoff2_;
504 countArray[index] = totalCount;
510 // Write the computed distances to the output data.
511 dh.selectDataSet(g);
512 for (int i = 0; i < columnCount; ++i)
514 if (countArray[i] > 0)
516 dh.setPoint(i, std::sqrt(distArray[i]));
518 else
520 // If there are no contributing positions, write out the cutoff
521 // value.
522 dh.setPoint(i, cutoff_, false);
526 dh.finishFrame();
529 void
530 PairDistance::finishAnalysis(int /*nframes*/)
534 void
535 PairDistance::writeOutput()
539 //! \}
541 } // namespace
543 const char PairDistanceInfo::name[] = "pairdist";
544 const char PairDistanceInfo::shortDescription[] =
545 "Calculate pairwise distances between groups of positions";
547 TrajectoryAnalysisModulePointer PairDistanceInfo::create()
549 return TrajectoryAnalysisModulePointer(new PairDistance);
552 } // namespace analysismodules
554 } // namespace gmx