Separate multisim from commrec
[gromacs.git] / src / gromacs / mdrunutility / handlerestart.cpp
blob91db3478f25e5439a178930c6ef228aa4d1e55d0
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
37 * \brief This file declares functions for mdrun to call to manage the
38 * details of doing a restart (ie. reading checkpoints, appending
39 * output files).
41 * \todo Clean up the error-prone logic here. Add doxygen.
43 * \author Berk Hess <hess@kth.se>
44 * \author Erik Lindahl <erik@kth.se>
45 * \author Mark Abraham <mark.j.abraham@gmail.com>
47 * \ingroup module_mdrunutility
50 #include "gmxpre.h"
52 #include "handlerestart.h"
54 #include <string.h>
56 #include "gromacs/commandline/filenm.h"
57 #include "gromacs/fileio/checkpoint.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/gmxlib/network.h"
60 #include "gromacs/mdlib/main.h"
61 #include "gromacs/mdtypes/commrec.h"
62 #include "gromacs/utility/basedefinitions.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/smalloc.h"
66 /*! \brief Search for \p fnm_cp in fnm and return true iff found
68 * \todo This could be implemented sanely with a for loop. */
69 static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
71 int i;
73 /* Check if the output file name stored in the checkpoint file
74 * is one of the output file names of mdrun.
76 i = 0;
77 while (i < nfile &&
78 !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
80 i++;
83 return (i < nfile && gmx_fexist(fnm_cp));
86 /*! \brief Support handling restarts
88 * \todo Clean this up (next patch)
90 * Read just the simulation 'generation' and with bTryToAppendFiles check files.
91 * This is is needed at the beginning of mdrun,
92 * to be able to rename the logfile correctly.
93 * When file appending is requested, checks which output files are present,
94 * and issue a fatal error if some are not.
95 * Upon return, bAddPart will tell whether the simulation part
96 * needs to be added to the output file name, i.e. when we are doing checkpoint
97 * continuation without appending.
99 * This routine cannot print tons of data, since it is called before
100 * the log file is opened. */
101 static void
102 read_checkpoint_data(const char *filename, int *simulation_part,
103 t_commrec *cr,
104 gmx_bool bTryToAppendFiles,
105 int nfile, const t_filenm fnm[],
106 const char *part_suffix,
107 gmx_bool *bAddPart,
108 bool *bDoAppendFiles)
110 t_fileio *fp;
111 int nfiles;
112 gmx_file_position_t *outputfiles;
113 int nexist, f;
114 char *fn, suf_up[STRLEN];
116 *bDoAppendFiles = FALSE;
118 if (SIMMASTER(cr))
120 if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
122 *simulation_part = 0;
123 /* We have already warned the user that no checkpoint file existed before, don't
124 * need to do it again
127 else
129 read_checkpoint_simulation_part_and_filenames(fp,
130 simulation_part,
131 &nfiles,
132 &outputfiles);
134 if (bTryToAppendFiles)
136 nexist = 0;
137 for (f = 0; f < nfiles; f++)
139 if (exist_output_file(outputfiles[f].filename, nfile, fnm))
141 nexist++;
144 if (nexist == nfiles)
146 *bDoAppendFiles = bTryToAppendFiles;
148 else
150 // If we get here, the user requested restarting from a checkpoint file, that checkpoint
151 // file was found (so it is not the first part of a new run), but we are still missing
152 // some or all checkpoint files. In this case we issue a fatal error since there are
153 // so many special cases we cannot keep track of, and better safe than sorry.
154 fprintf(stderr,
155 "Output file appending has been requested,\n"
156 "but some output files listed in the checkpoint file %s\n"
157 "are not present or not named as the output files by the current program:\n",
158 filename);
159 fprintf(stderr, "Expect output files present:\n");
160 for (f = 0; f < nfiles; f++)
162 if (exist_output_file(outputfiles[f].filename,
163 nfile, fnm))
165 fprintf(stderr, " %s\n", outputfiles[f].filename);
168 fprintf(stderr, "\n");
169 fprintf(stderr, "Expected output files not present or named differently:\n");
170 for (f = 0; f < nfiles; f++)
172 if (!exist_output_file(outputfiles[f].filename,
173 nfile, fnm))
175 fprintf(stderr, " %s\n", outputfiles[f].filename);
179 gmx_fatal(FARGS,
180 "File appending requested, but %d of the %d output files are not present or are named differently. "
181 "For safety reasons, GROMACS-2016 and later only allows file appending to be used when all files "
182 "have the same names as they had in the original run. "
183 "Checkpointing is merely intended for plain continuation of runs. "
184 "For safety reasons you must specify all file names (e.g. with -deffnm), "
185 "and all these files must match the names used in the run prior to checkpointing "
186 "since we will append to them by default. If you used -deffnm and the files listed above as not "
187 "present are in fact present, try explicitly specifying them in respective mdrun options. "
188 "If the files are not available, you "
189 "can add the -noappend flag to mdrun and write separate new parts. "
190 "For mere concatenation of files, you should use the gmx trjcat tool instead.",
191 nfiles-nexist, nfiles);
195 if (*bDoAppendFiles)
197 if (nfiles == 0)
199 gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
201 fn = outputfiles[0].filename;
202 if (strlen(fn) < 4 ||
203 gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
205 gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
207 /* Set bAddPart to whether the suffix string '.part' is present
208 * in the log file name.
210 strcpy(suf_up, part_suffix);
211 upstring(suf_up);
212 *bAddPart = (strstr(fn, part_suffix) != nullptr ||
213 strstr(fn, suf_up) != nullptr);
216 sfree(outputfiles);
219 if (PAR(cr))
221 // Make sure all settings are in sync
222 gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
224 if (*simulation_part > 0 && bTryToAppendFiles)
226 gmx_bcast(sizeof(*bDoAppendFiles), bDoAppendFiles, cr);
227 gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
232 /* This routine cannot print tons of data, since it is called before the log file is opened. */
233 void
234 handleRestart(t_commrec *cr,
235 const gmx_multisim_t *ms,
236 gmx_bool bTryToAppendFiles,
237 const int NFILE,
238 t_filenm fnm[],
239 bool *bDoAppendFiles,
240 bool *bStartFromCpt)
242 gmx_bool bAddPart;
243 int sim_part, sim_part_fn;
244 const char *part_suffix = ".part";
245 FILE *fpmulti;
248 /* Check if there is ANY checkpoint file available */
249 sim_part = 1;
250 sim_part_fn = sim_part;
251 if (opt2bSet("-cpi", NFILE, fnm))
253 bAddPart = !bTryToAppendFiles;
255 read_checkpoint_data(opt2fn_master("-cpi", NFILE, fnm, cr),
256 &sim_part_fn, cr,
257 bTryToAppendFiles, NFILE, fnm,
258 part_suffix, &bAddPart, bDoAppendFiles);
259 if (sim_part_fn == 0 && isMasterSimMasterRank(ms, cr))
261 fprintf(stdout, "No previous checkpoint file present with -cpi option, assuming this is a new run.\n");
263 else
265 sim_part = sim_part_fn + 1;
268 // Master ranks of multi simulations should check that the
269 // simulation part number is consistent across the
270 // simulations.
271 if (isMultiSim(ms) && MASTER(cr))
273 // Only the master simulation should report on problems.
274 if (isMasterSimMasterRank(ms, cr))
276 /* Log file is not yet available, so if there's a
277 * problem we can only write to stderr. */
278 fpmulti = stderr;
280 else
282 fpmulti = nullptr;
284 check_multi_int(fpmulti, ms, sim_part, "simulation part", TRUE);
287 else
289 *bDoAppendFiles = false;
290 bAddPart = false;
293 *bStartFromCpt = sim_part > 1;
295 if (!*bDoAppendFiles)
297 sim_part_fn = sim_part;
300 if (bAddPart)
302 char suffix[STRLEN];
304 /* Rename all output files (except checkpoint files) */
305 /* create new part name first (zero-filled) */
306 sprintf(suffix, "%s%04d", part_suffix, sim_part_fn);
308 add_suffix_to_output_names(fnm, NFILE, suffix);
309 if (isMasterSimMasterRank(ms, cr))
311 fprintf(stdout, "Checkpoint file is from part %d, new output files will be suffixed '%s'.\n", sim_part-1, suffix);