Merge branch 'origin/release-2020' into master
[gromacs.git] / src / gromacs / trajectoryanalysis.h
blob2130ed9bd63f239b91d794cfffa196428ef7c289
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2011,2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \defgroup module_trajectoryanalysis Framework for Trajectory Analysis (trajectoryanalysis)
36 * \ingroup group_analysismodules
37 * \brief
38 * Provides functionality for implementing trajectory analysis modules.
40 * This module implements a framework for implementing flexible trajectory
41 * analysis routines. It provides a base class for implementing analysis as
42 * reusable modules that can be used from different contexts and can also
43 * support per-frame parallelization. It integrally uses functionality from the
44 * following modules:
45 * - \ref module_options
46 * - \ref module_analysisdata
47 * - \ref module_selection
49 * The main interface of this module is the gmx::TrajectoryAnalysisModule class.
50 * Analysis modules should derive from this class, and override the necessary
51 * virtual methods to provide the actual initialization and analysis routines.
52 * Classes gmx::TrajectoryAnalysisSettings and gmx::TopologyInformation (in
53 * addition to classes declared in the above-mentioned modules) are used to pass
54 * information to and from these methods. gmx::TrajectoryAnalysisModuleData can
55 * be used in advanced scenarios where the tool requires custom thread-local
56 * data for parallel analysis.
58 * The sequence charts below provides an overview of how the trajectory
59 * analysis modules typically interact with other components.
60 * The first chart provides an overview of the call sequence of the most
61 * important methods in gmx::TrajectoryAnalysisModule.
62 * There is a runner, which is responsible for doing the work that is shared
63 * between all trajectory analysis (such as reading the trajectory and
64 * processing selections). The runner then calls different methods in the
65 * analysis module at appropriate points to perform the module-specific tasks.
66 * The analysis module is responsible for creating and managing
67 * gmx::AnalysisData objects, and the chart shows the most important
68 * interactions with this module as well. However, the runner takes
69 * responsibility of calling gmx::AnalysisData::finishFrameSerial().
70 * Interactions with options (for command-line option processing) and
71 * selections is not shown for brevity: see \ref module_options for an overview
72 * of how options work, and the second chart for a more detailed view of how
73 * selections are accessed from an analysis module.
74 * \msc
75 * runner,
76 * module [ URL="\ref gmx::TrajectoryAnalysisModule" ],
77 * data [ label="analysis data", URL="\ref module_analysisdata" ];
79 * runner box module [ label="caller owns runner and module objects" ];
80 * module => data [ label="create (in constructor)" ];
81 * runner => module [ label="initOptions()",
82 * URL="\ref gmx::TrajectoryAnalysisModule::initOptions()" ];
83 * runner => runner [ label="parse user input" ];
84 * runner => module [ label="optionsFinished()",
85 * URL="\ref gmx::TrajectoryAnalysisModule::optionsFinished()" ];
86 * runner => runner [ label="initialize topology\nand selections" ];
87 * runner => module [ label="initAnalysis()",
88 * URL="\ref gmx::TrajectoryAnalysisModule::initAnalysis()" ];
89 * module => data [ label="initialize" ];
90 * runner => runner [ label="read frame 0" ];
91 * runner => module [ label="initAfterFirstFrame()",
92 * URL="\ref gmx::TrajectoryAnalysisModule::initAfterFirstFrame()" ];
93 * --- [ label="loop over frames starts" ];
94 * runner => runner [ label="initialize frame 0" ];
95 * runner => module [ label="analyzeFrame(0)",
96 * URL="\ref gmx::TrajectoryAnalysisModule::analyzeFrame()" ];
97 * module => data [ label="add data",
98 * URL="\ref gmx::AnalysisDataHandle" ];
99 * module => data [ label="finishFrame()",
100 * URL="\ref gmx::AnalysisDataHandle::finishFrame()" ];
101 * runner => data [ label="finishFrameSerial()",
102 * URL="\ref gmx::AnalysisData::finishFrameSerial()" ];
103 * runner => runner [ label="read and initialize frame 1" ];
104 * runner => module [ label="analyzeFrame(1)",
105 * URL="\ref gmx::TrajectoryAnalysisModule::analyzeFrame()" ];
106 * ...;
107 * --- [ label="loop over frames ends" ];
108 * runner => module [ label="finishAnalysis()",
109 * URL="\ref gmx::TrajectoryAnalysisModule::finishAnalysis()" ];
110 * module => data [ label="post-process data" ];
111 * runner => module [ label="writeOutput()",
112 * URL="\ref gmx::TrajectoryAnalysisModule::writeOutput()" ];
113 * \endmsc
115 * The second chart below shows the interaction with selections and options
116 * with focus on selection options. The gmx::TrajectoryAnalysisModule object
117 * creates one or more gmx::Selection variables, and uses gmx::SelectionOption
118 * to indicate them as the destination for selections. This happens in
119 * gmx::TrajectoryAnalysisModule::initOptions(). After the options have been
120 * parsed (includes parsing any options present on the command-line or read
121 * from files, but not those provided interactively),
122 * gmx::TrajectoryAnalysisModule::optionsFinished() can adjust the selections
123 * using gmx::SelectionOptionInfo. This is done like this to allow the
124 * analysis module to influence the interactive prompt of selections based on
125 * what command-line options were given. After optionsFinished() returns, the
126 * interactive selection prompt is presented if necessary. After this point,
127 * all access to selections from the analysis module is through the
128 * gmx::Selection variables: the runner is responsible for calling methods in
129 * the selection library, and these methods update the content referenced by
130 * the gmx::Selection variables. See documentation of
131 * gmx::TrajectoryAnalysisModule for details of what the selections contain at
132 * each point.
133 * \msc
134 * runner,
135 * options [ label="Options", URL="\ref module_options" ],
136 * selection [ label="selections", URL="\ref module_selection" ],
137 * module [ label="module", URL="\ref gmx::TrajectoryAnalysisModule" ];
139 * runner box selection [ label="all these objects are owned by the framework" ];
140 * runner => module [ label="initOptions()",
141 * URL="\ref gmx::TrajectoryAnalysisModule::initOptions()" ];
142 * module => options [ label="addOption(SelectionOption)",
143 * URL="\ref gmx::SelectionOption" ];
144 * module => options [ label="addOption() (other options)",
145 * URL="\ref gmx::Options::addOption()" ];
146 * ...;
147 * runner << module;
148 * runner => options [ label="parse command-line parameters" ];
149 * options => selection [ label="parse selections" ];
150 * selection -> module [ label="initialize Selection variables",
151 * URL="\ref gmx::Selection" ];
152 * runner << options;
153 * runner => module [ label="optionsFinished()",
154 * URL="\ref gmx::TrajectoryAnalysisModule::optionsFinished()" ];
155 * module => selection [ label="adjust SelectionOptions",
156 * URL="\ref gmx::SelectionOptionInfo" ];
157 * runner << module;
158 * runner => selection [ label="prompt missing selections" ];
159 * selection -> module [ label="initialize Selection variables",
160 * URL="\ref gmx::Selection" ];
161 * runner => selection [ label="compile selections" ];
162 * selection -> module [ label="change content referenced\nby Selection variables" ];
163 * runner => module [ label="initAnalysis()",
164 * URL="\ref gmx::TrajectoryAnalysisModule::initAnalysis()" ];
165 * ...;
166 * --- [ label="loop over frames starts" ];
167 * runner => runner [ label="read and initialize frame 0" ];
168 * runner => selection [ label="evaluate selections for frame 0" ];
169 * selection -> module [ label="change content referenced\nby Selection variables" ];
170 * ...;
171 * \endmsc
173 * The final chart shows the flow within the frame loop in the case of parallel
174 * (threaded) execution and the interaction with the \ref module_analysisdata
175 * module in this case. Although parallelization has not yet been implemented,
176 * it has influenced the design and needs to be understood if one wants to
177 * write modules that can take advantage of the parallelization once it gets
178 * implemented. The parallelization takes part over frames: analyzing a single
179 * frame is one unit of work. When the frame loop is started,
180 * gmx::TrajectoryAnalysisModule::startFrames() is called for each thread, and
181 * initializes an object that contains thread-local data needed during the
182 * analysis. This includes selection information, gmx::AnalysisDataHandle
183 * objects, and possibly other module-specific variables. Then, the runner
184 * reads the frames in sequence and passes the work into the different threads,
185 * together with the appropriate thread-local data object.
186 * The gmx::TrajectoryAnalysisModule::analyzeFrame() calls are only allowed to modify
187 * the thread-local data object; everything else is read-only. For any output,
188 * they pass the information to gmx::AnalysisData, which together with the
189 * runner takes care of ordering the data from different frames such that it
190 * gets processed in the right order.
191 * When all frames are analyzed, gmx::TrajectoryAnalysisModule::finishFrames()
192 * is called for each thread-local data object to destroy them and to
193 * accumulate possible results from them into the main
194 * gmx::TrajectoryAnalysisModule object.
195 * Note that in the diagram, some part of the work attributed for the runner
196 * (e.g., evaluating selections) will actually be carried out in the analysis
197 * threads before gmx::TrajectoryAnalysisModule::analyzeFrame() gets called.
198 * \msc
199 * runner,
200 * module [ label="module object" ],
201 * thread1 [ label="analysis\nthread 1" ],
202 * thread2 [ label="analysis\nthread 2" ],
203 * data [ label="analysis data", URL="\ref module_analysisdata" ];
205 * module box thread2 [ label="single TrajectoryAnalysisModule object",
206 * URL="\ref gmx::TrajectoryAnalysisModule" ];
207 * ...;
208 * --- [ label="loop over frames starts" ];
209 * runner => thread1 [ label="startFrames()",
210 * URL="\ref gmx::TrajectoryAnalysisModule::startFrames()" ];
211 * thread1 => data [ label="startData()",
212 * URL="\ref gmx::AnalysisData::startData()" ];
213 * runner << thread1 [ label="pdata1" ];
214 * runner => thread2 [ label="startFrames()",
215 * URL="\ref gmx::TrajectoryAnalysisModule::startFrames()" ];
216 * thread2 => data [ label="startData()",
217 * URL="\ref gmx::AnalysisData::startData()" ];
218 * runner << thread2 [ label="pdata2" ];
219 * |||;
220 * runner => runner [ label="initialize frame 0" ];
221 * runner => thread1 [ label="analyzeFrame(0, pdata1)",
222 * URL="\ref gmx::TrajectoryAnalysisModule::analyzeFrame()" ];
223 * runner => runner [ label="read and initialize frame 1" ];
224 * runner => thread2 [ label="analyzeFrame(1, pdata2)",
225 * URL="\ref gmx::TrajectoryAnalysisModule::analyzeFrame()" ];
226 * thread1 => data [ label="add data",
227 * URL="\ref gmx::AnalysisDataHandle" ];
228 * thread2 => data [ label="add data",
229 * URL="\ref gmx::AnalysisDataHandle" ];
230 * thread2 => data [ label="finishFrame(1)",
231 * URL="\ref gmx::AnalysisDataHandle::finishFrame()" ];
232 * runner << thread2 [ label="analyzeFrame() (frame 1)" ];
233 * runner => runner [ label="read and initialize frame 2" ];
234 * runner => thread2 [ label="analyzeFrame(2)",
235 * URL="\ref gmx::TrajectoryAnalysisModule::analyzeFrame()" ];
236 * thread1 => data [ label="finishFrame(0)",
237 * URL="\ref gmx::AnalysisDataHandle::finishFrame()" ];
238 * runner << thread1 [ label="analyzeFrame() (frame 0)" ];
239 * runner => data [ label="finishFrameSerial() (frame 0)",
240 * URL="\ref gmx::AnalysisData::finishFrameSerial()" ];
241 * runner => data [ label="finishFrameSerial() (frame 1)",
242 * URL="\ref gmx::AnalysisData::finishFrameSerial()" ];
243 * ...;
244 * runner => thread1 [ label="finishFrames(pdata1)",
245 * URL="\ref gmx::TrajectoryAnalysisModule::finishFrames()" ];
246 * thread1 => data [ label="finishData()",
247 * URL="\ref gmx::AnalysisData::finishData()" ];
248 * thread1 -> module [ label="accumulate results" ];
249 * runner << thread1;
250 * runner => thread2 [ label="finishFrames(pdata2)",
251 * URL="\ref gmx::TrajectoryAnalysisModule::finishFrames()" ];
252 * thread2 => data [ label="finishData()",
253 * URL="\ref gmx::AnalysisData::finishData()" ];
254 * thread2 -> module [ label="accumulate results" ];
255 * runner << thread2;
256 * --- [ label="loop over frames ends" ];
257 * ...;
258 * \endmsc
260 * In addition to the framework for defining analysis modules, this module also
261 * provides gmx::TrajectoryAnalysisCommandLineRunner, which implements a
262 * command-line program that runs a certain analysis module.
264 * Internally, the module also defines a set of trajectory analysis modules that
265 * can currently be accessed only through gmx::registerTrajectoryAnalysisModules.
267 * For an example of how to implement an analysis tool using the framework, see
268 * \ref template.cpp.
270 * \author Teemu Murtola <teemu.murtola@gmail.com>
272 /*! \file
273 * \brief
274 * Public API convenience header for trajectory analysis framework
276 * \author Teemu Murtola <teemu.murtola@gmail.com>
277 * \inpublicapi
278 * \ingroup module_trajectoryanalysis
280 #ifndef GMX_TRAJECTORYANALYSIS_H
281 #define GMX_TRAJECTORYANALYSIS_H
283 #include "gromacs/analysisdata.h"
284 #include "gromacs/options.h"
285 #include "gromacs/selection.h"
286 #include "gromacs/selection/nbsearch.h"
287 #include "gromacs/topology/topology.h"
288 #include "gromacs/trajectory/trajectoryframe.h"
289 #include "gromacs/trajectoryanalysis/analysismodule.h"
290 #include "gromacs/trajectoryanalysis/analysissettings.h"
291 #include "gromacs/trajectoryanalysis/cmdlinerunner.h"
292 #include "gromacs/utility/arrayref.h"
293 #include "gromacs/utility/exceptions.h"
295 #endif