Fixes #882 - looping bug in trxio.c
[gromacs.git] / src / gmxlib / checkpoint.c
blob0f7c8c71f0993b18e673649359091aa8b98e5d7d
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 /* The source code in this file should be thread-safe.
20 Please keep it that way. */
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
27 #include <string.h>
28 #include <time.h>
30 #ifdef HAVE_SYS_TIME_H
31 #include <sys/time.h>
32 #endif
35 #if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
36 /* _chsize_s */
37 #include <io.h>
38 #include <sys/locking.h>
39 #endif
42 #include "filenm.h"
43 #include "names.h"
44 #include "typedefs.h"
45 #include "smalloc.h"
46 #include "gmxfio.h"
47 #include "xdrf.h"
48 #include "statutil.h"
49 #include "txtdump.h"
50 #include "vec.h"
51 #include "network.h"
52 #include "gmx_random.h"
53 #include "checkpoint.h"
54 #include "futil.h"
55 #include "string2.h"
56 #include <fcntl.h>
59 #ifdef GMX_FAHCORE
60 #include "corewrap.h"
61 #endif
64 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
65 char *
66 gmx_ctime_r(const time_t *clock,char *buf, int n);
69 #define CPT_MAGIC1 171817
70 #define CPT_MAGIC2 171819
72 /* cpt_version should normally only be changed
73 * when the header of footer format changes.
74 * The state data format itself is backward and forward compatible.
75 * But old code can not read a new entry that is present in the file
76 * (but can read a new format when new entries are not present).
78 static const int cpt_version = 12;
81 const char *est_names[estNR]=
83 "FE-lambda",
84 "box", "box-rel", "box-v", "pres_prev",
85 "nosehoover-xi", "thermostat-integral",
86 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
87 "disre_initf", "disre_rm3tav",
88 "orire_initf", "orire_Dtav",
89 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev",
92 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
94 const char *eeks_names[eeksNR]=
96 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
97 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
100 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
101 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
102 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
103 eenhENERGY_DELTA_H_NN,
104 eenhENERGY_DELTA_H_LIST,
105 eenhENERGY_DELTA_H_STARTTIME,
106 eenhENERGY_DELTA_H_STARTLAMBDA,
107 eenhNR };
109 const char *eenh_names[eenhNR]=
111 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
112 "energy_sum_sim", "energy_nsum_sim",
113 "energy_nsteps", "energy_nsteps_sim",
114 "energy_delta_h_nn",
115 "energy_delta_h_list",
116 "energy_delta_h_start_time",
117 "energy_delta_h_start_lambda"
122 #if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
123 static int
124 gmx_wintruncate(const char *filename, __int64 size)
126 #ifdef GMX_FAHCORE
127 /*we do this elsewhere*/
128 return 0;
129 #else
130 FILE *fp;
131 int rc;
133 fp=fopen(filename,"rb+");
135 if(fp==NULL)
137 return -1;
140 return _chsize_s( fileno(fp), size);
141 #endif
143 #endif
146 enum { ecprREAL, ecprRVEC, ecprMATRIX };
148 static const char *st_names(int cptp,int ecpt)
150 switch (cptp)
152 case 0: return est_names [ecpt]; break;
153 case 1: return eeks_names[ecpt]; break;
154 case 2: return eenh_names[ecpt]; break;
157 return NULL;
160 static void cp_warning(FILE *fp)
162 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
165 static void cp_error()
167 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
170 static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
172 #define CPTSTRLEN 1024
173 bool_t res=0;
175 if (bRead)
177 snew(*s,CPTSTRLEN);
179 res = xdr_string(xd,s,CPTSTRLEN);
180 if (res == 0)
182 cp_error();
184 if (list)
186 fprintf(list,"%s = %s\n",desc,*s);
187 sfree(*s);
191 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
193 bool_t res=0;
195 res = xdr_int(xd,i);
196 if (res == 0)
198 return -1;
200 if (list)
202 fprintf(list,"%s = %d\n",desc,*i);
204 return 0;
207 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
209 bool_t res=1;
210 int j;
211 if (list)
213 fprintf(list,"%s = ",desc);
215 for (j=0; j<n && res; j++)
217 res &= xdr_u_char(xd,&i[j]);
218 if (list)
220 fprintf(list,"%02x",i[j]);
223 if (list)
225 fprintf(list,"\n");
227 if (res == 0)
229 return -1;
232 return 0;
235 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
237 if (do_cpt_int(xd,desc,i,list) < 0)
239 cp_error();
243 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
245 bool_t res=0;
246 char buf[STEPSTRSIZE];
248 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
249 if (res == 0)
251 cp_error();
253 if (list)
255 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
259 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
261 bool_t res=0;
263 res = xdr_double(xd,f);
264 if (res == 0)
266 cp_error();
268 if (list)
270 fprintf(list,"%s = %f\n",desc,*f);
274 /* If nval >= 0, nval is used; on read this should match the passed value.
275 * If nval n<0, *nptr is used; on read the value is stored in nptr
277 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
278 int nval,int *nptr,real **v,
279 FILE *list,int erealtype)
281 bool_t res=0;
282 #ifndef GMX_DOUBLE
283 int dtc=xdr_datatype_float;
284 #else
285 int dtc=xdr_datatype_double;
286 #endif
287 real *vp,*va=NULL;
288 float *vf;
289 double *vd;
290 int nf,dt,i;
292 if (list == NULL)
294 if (nval >= 0)
296 nf = nval;
298 else
300 if (nptr == NULL)
302 gmx_incons("*ntpr=NULL in do_cpte_reals_low");
304 nf = *nptr;
307 res = xdr_int(xd,&nf);
308 if (res == 0)
310 return -1;
312 if (list == NULL)
314 if (nval >= 0)
316 if (nf != nval)
318 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
321 else
323 *nptr = nf;
326 dt = dtc;
327 res = xdr_int(xd,&dt);
328 if (res == 0)
330 return -1;
332 if (dt != dtc)
334 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
335 st_names(cptp,ecpt),xdr_datatype_names[dtc],
336 xdr_datatype_names[dt]);
338 if (list || !(sflags & (1<<ecpt)))
340 snew(va,nf);
341 vp = va;
343 else
345 if (*v == NULL)
347 snew(*v,nf);
349 vp = *v;
351 if (dt == xdr_datatype_float)
353 if (dtc == xdr_datatype_float)
355 vf = (float *)vp;
357 else
359 snew(vf,nf);
361 res = xdr_vector(xd,(char *)vf,nf,
362 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
363 if (res == 0)
365 return -1;
367 if (dtc != xdr_datatype_float)
369 for(i=0; i<nf; i++)
371 vp[i] = vf[i];
373 sfree(vf);
376 else
378 if (dtc == xdr_datatype_double)
380 vd = (double *)vp;
382 else
384 snew(vd,nf);
386 res = xdr_vector(xd,(char *)vd,nf,
387 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
388 if (res == 0)
390 return -1;
392 if (dtc != xdr_datatype_double)
394 for(i=0; i<nf; i++)
396 vp[i] = vd[i];
398 sfree(vd);
402 if (list)
404 switch (erealtype)
406 case ecprREAL:
407 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
408 break;
409 case ecprRVEC:
410 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
411 break;
412 default:
413 gmx_incons("Unknown checkpoint real type");
416 if (va)
418 sfree(va);
421 return 0;
425 /* This function stores n along with the reals for reading,
426 * but on reading it assumes that n matches the value in the checkpoint file,
427 * a fatal error is generated when this is not the case.
429 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
430 int n,real **v,FILE *list)
432 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
435 /* This function does the same as do_cpte_reals,
436 * except that on reading it ignores the passed value of *n
437 * and stored the value read from the checkpoint file in *n.
439 static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
440 int *n,real **v,FILE *list)
442 return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
445 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
446 real *r,FILE *list)
448 int n;
450 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
453 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
454 int n,int **v,FILE *list)
456 bool_t res=0;
457 int dtc=xdr_datatype_int;
458 int *vp,*va=NULL;
459 int nf,dt,i;
461 nf = n;
462 res = xdr_int(xd,&nf);
463 if (res == 0)
465 return -1;
467 if (list == NULL && v != NULL && nf != n)
469 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
471 dt = dtc;
472 res = xdr_int(xd,&dt);
473 if (res == 0)
475 return -1;
477 if (dt != dtc)
479 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
480 st_names(cptp,ecpt),xdr_datatype_names[dtc],
481 xdr_datatype_names[dt]);
483 if (list || !(sflags & (1<<ecpt)) || v == NULL)
485 snew(va,nf);
486 vp = va;
488 else
490 if (*v == NULL)
492 snew(*v,nf);
494 vp = *v;
496 res = xdr_vector(xd,(char *)vp,nf,
497 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
498 if (res == 0)
500 return -1;
502 if (list)
504 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
506 if (va)
508 sfree(va);
511 return 0;
514 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
515 int *i,FILE *list)
517 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
520 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
521 int n,double **v,FILE *list)
523 bool_t res=0;
524 int dtc=xdr_datatype_double;
525 double *vp,*va=NULL;
526 int nf,dt,i;
528 nf = n;
529 res = xdr_int(xd,&nf);
530 if (res == 0)
532 return -1;
534 if (list == NULL && nf != n)
536 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
538 dt = dtc;
539 res = xdr_int(xd,&dt);
540 if (res == 0)
542 return -1;
544 if (dt != dtc)
546 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
547 st_names(cptp,ecpt),xdr_datatype_names[dtc],
548 xdr_datatype_names[dt]);
550 if (list || !(sflags & (1<<ecpt)))
552 snew(va,nf);
553 vp = va;
555 else
557 if (*v == NULL)
559 snew(*v,nf);
561 vp = *v;
563 res = xdr_vector(xd,(char *)vp,nf,
564 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
565 if (res == 0)
567 return -1;
569 if (list)
571 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
573 if (va)
575 sfree(va);
578 return 0;
581 static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
582 double *r,FILE *list)
584 return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
588 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
589 int n,rvec **v,FILE *list)
591 int n3;
593 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
594 n*DIM,NULL,(real **)v,list,ecprRVEC);
597 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
598 matrix v,FILE *list)
600 real *vr;
601 real ret;
603 vr = (real *)&(v[0][0]);
604 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
605 DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
607 if (list && ret == 0)
609 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
612 return ret;
615 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
616 int n,matrix **v,FILE *list)
618 bool_t res=0;
619 matrix *vp,*va=NULL;
620 real *vr;
621 int nf,i,j,k;
622 int ret;
624 nf = n;
625 res = xdr_int(xd,&nf);
626 if (res == 0)
628 return -1;
630 if (list == NULL && nf != n)
632 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
634 if (list || !(sflags & (1<<ecpt)))
636 snew(va,nf);
637 vp = va;
639 else
641 if (*v == NULL)
643 snew(*v,nf);
645 vp = *v;
647 snew(vr,nf*DIM*DIM);
648 for(i=0; i<nf; i++)
650 for(j=0; j<DIM; j++)
652 for(k=0; k<DIM; k++)
654 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
658 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
659 nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
660 for(i=0; i<nf; i++)
662 for(j=0; j<DIM; j++)
664 for(k=0; k<DIM; k++)
666 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
670 sfree(vr);
672 if (list && ret == 0)
674 for(i=0; i<nf; i++)
676 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
679 if (va)
681 sfree(va);
684 return ret;
687 static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
688 char **version,char **btime,char **buser,char **bmach,
689 char **fprog,char **ftime,
690 int *eIntegrator,int *simulation_part,
691 gmx_large_int_t *step,double *t,
692 int *nnodes,int *dd_nc,int *npme,
693 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
694 int *flags_state,int *flags_eks,int *flags_enh,
695 FILE *list)
697 bool_t res=0;
698 int magic;
699 int idum=0;
700 int i;
701 char *fhost;
703 if (bRead)
705 magic = -1;
707 else
709 magic = CPT_MAGIC1;
711 res = xdr_int(xd,&magic);
712 if (res == 0)
714 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
716 if (magic != CPT_MAGIC1)
718 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
719 "The checkpoint file is corrupted or not a checkpoint file",
720 magic,CPT_MAGIC1);
722 if (!bRead)
724 snew(fhost,255);
725 #ifdef HAVE_UNISTD_H
726 if (gethostname(fhost,255) != 0)
728 sprintf(fhost,"unknown");
730 #else
731 sprintf(fhost,"unknown");
732 #endif
734 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
735 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
736 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
737 do_cpt_string_err(xd,bRead,"GROMACS build machine" ,bmach,list);
738 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
739 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
740 *file_version = cpt_version;
741 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
742 if (*file_version > cpt_version)
744 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
746 if (*file_version >= 12)
748 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
749 if (list == NULL)
751 sfree(fhost);
754 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
755 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
756 if (*file_version >= 10)
758 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
760 else
762 *nhchainlength = 1;
764 if (*file_version >= 11)
766 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
768 else
770 *nnhpres = 0;
772 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
773 if (*file_version >= 3)
775 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
777 else
779 *simulation_part = 1;
781 if (*file_version >= 5)
783 do_cpt_step_err(xd,"step" ,step ,list);
785 else
787 do_cpt_int_err(xd,"step" ,&idum ,list);
788 *step = idum;
790 do_cpt_double_err(xd,"t" ,t ,list);
791 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
792 idum = 1;
793 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
794 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
795 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
796 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
797 do_cpt_int_err(xd,"state flags",flags_state,list);
798 if (*file_version >= 4)
800 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
801 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
803 else
805 *flags_eks = 0;
806 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
807 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
808 (1<<(estORIRE_DTAV+2)) |
809 (1<<(estORIRE_DTAV+3))));
813 static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
815 bool_t res=0;
816 int magic;
818 if (file_version >= 2)
820 magic = CPT_MAGIC2;
821 res = xdr_int(xd,&magic);
822 if (res == 0)
824 cp_error();
826 if (magic != CPT_MAGIC2)
828 return -1;
832 return 0;
835 static int do_cpt_state(XDR *xd,gmx_bool bRead,
836 int fflags,t_state *state,
837 gmx_bool bReadRNG,FILE *list)
839 int sflags;
840 int **rng_p,**rngi_p;
841 int i;
842 int ret;
843 int nnht,nnhtp;
845 ret = 0;
847 nnht = state->nhchainlength*state->ngtc;
848 nnhtp = state->nhchainlength*state->nnhpres;
850 if (bReadRNG)
852 rng_p = (int **)&state->ld_rng;
853 rngi_p = &state->ld_rngi;
855 else
857 /* Do not read the RNG data */
858 rng_p = NULL;
859 rngi_p = NULL;
862 sflags = state->flags;
863 for(i=0; (i<estNR && ret == 0); i++)
865 if (fflags & (1<<i))
867 switch (i)
869 case estLAMBDA: ret = do_cpte_real(xd,0,i,sflags,&state->lambda,list); break;
870 case estBOX: ret = do_cpte_matrix(xd,0,i,sflags,state->box,list); break;
871 case estBOX_REL: ret = do_cpte_matrix(xd,0,i,sflags,state->box_rel,list); break;
872 case estBOXV: ret = do_cpte_matrix(xd,0,i,sflags,state->boxv,list); break;
873 case estPRES_PREV: ret = do_cpte_matrix(xd,0,i,sflags,state->pres_prev,list); break;
874 case estSVIR_PREV: ret = do_cpte_matrix(xd,0,i,sflags,state->svir_prev,list); break;
875 case estFVIR_PREV: ret = do_cpte_matrix(xd,0,i,sflags,state->fvir_prev,list); break;
876 case estNH_XI: ret = do_cpte_doubles(xd,0,i,sflags,nnht,&state->nosehoover_xi,list); break;
877 case estNH_VXI: ret = do_cpte_doubles(xd,0,i,sflags,nnht,&state->nosehoover_vxi,list); break;
878 case estNHPRES_XI: ret = do_cpte_doubles(xd,0,i,sflags,nnhtp,&state->nhpres_xi,list); break;
879 case estNHPRES_VXI: ret = do_cpte_doubles(xd,0,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
880 case estTC_INT: ret = do_cpte_doubles(xd,0,i,sflags,state->ngtc,&state->therm_integral,list); break;
881 case estVETA: ret = do_cpte_real(xd,0,i,sflags,&state->veta,list); break;
882 case estVOL0: ret = do_cpte_real(xd,0,i,sflags,&state->vol0,list); break;
883 case estX: ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->x,list); break;
884 case estV: ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->v,list); break;
885 case estSDX: ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->sd_X,list); break;
886 case estLD_RNG: ret = do_cpte_ints(xd,0,i,sflags,state->nrng,rng_p,list); break;
887 case estLD_RNGI: ret = do_cpte_ints(xd,0,i,sflags,state->nrngi,rngi_p,list); break;
888 case estDISRE_INITF: ret = do_cpte_real (xd,0,i,sflags,&state->hist.disre_initf,list); break;
889 case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd,0,i,sflags,&state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
890 case estORIRE_INITF: ret = do_cpte_real (xd,0,i,sflags,&state->hist.orire_initf,list); break;
891 case estORIRE_DTAV: ret = do_cpte_n_reals(xd,0,i,sflags,&state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
892 default:
893 gmx_fatal(FARGS,"Unknown state entry %d\n"
894 "You are probably reading a new checkpoint file with old code",i);
899 return ret;
902 static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
903 int fflags,ekinstate_t *ekins,
904 FILE *list)
906 int i;
907 int ret;
909 ret = 0;
911 for(i=0; (i<eeksNR && ret == 0); i++)
913 if (fflags & (1<<i))
915 switch (i)
918 case eeksEKIN_N: ret = do_cpte_int(xd,1,i,fflags,&ekins->ekin_n,list); break;
919 case eeksEKINH : ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
920 case eeksEKINF: ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
921 case eeksEKINO: ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
922 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,1,i,fflags,ekins->ekin_total,list); break;
923 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
924 case eeksVSCALE: ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
925 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
926 case eeksDEKINDL : ret = do_cpte_real(xd,1,i,fflags,&ekins->dekindl,list); break;
927 case eeksMVCOS: ret = do_cpte_real(xd,1,i,fflags,&ekins->mvcos,list); break;
928 default:
929 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
930 "You are probably reading a new checkpoint file with old code",i);
935 return ret;
939 static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
940 int fflags,energyhistory_t *enerhist,
941 FILE *list)
943 int i;
944 int j;
945 int ret;
947 ret = 0;
949 if (bRead)
951 enerhist->nsteps = 0;
952 enerhist->nsum = 0;
953 enerhist->nsteps_sim = 0;
954 enerhist->nsum_sim = 0;
955 enerhist->dht = NULL;
957 if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
959 snew(enerhist->dht,1);
960 enerhist->dht->ndh = NULL;
961 enerhist->dht->dh = NULL;
962 enerhist->dht->start_lambda_set=FALSE;
966 for(i=0; (i<eenhNR && ret == 0); i++)
968 if (fflags & (1<<i))
970 switch (i)
972 case eenhENERGY_N: ret = do_cpte_int(xd,2,i,fflags,&enerhist->nener,list); break;
973 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
974 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
975 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
976 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
977 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
978 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
979 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
980 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
981 if (bRead) /* now allocate memory for it */
983 snew(enerhist->dht->dh, enerhist->dht->nndh);
984 snew(enerhist->dht->ndh, enerhist->dht->nndh);
985 for(j=0;j<enerhist->dht->nndh;j++)
987 enerhist->dht->ndh[j] = 0;
988 enerhist->dht->dh[j] = NULL;
991 break;
992 case eenhENERGY_DELTA_H_LIST:
993 for(j=0;j<enerhist->dht->nndh;j++)
995 ret=do_cpte_n_reals(xd, 2, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
997 break;
998 case eenhENERGY_DELTA_H_STARTTIME:
999 ret=do_cpte_double(xd, 2, i, fflags, &(enerhist->dht->start_time), list); break;
1000 case eenhENERGY_DELTA_H_STARTLAMBDA:
1001 enerhist->dht->start_lambda_set=TRUE;
1002 ret=do_cpte_double(xd, 2, i, fflags, &(enerhist->dht->start_lambda), list); break;
1003 default:
1004 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
1005 "You are probably reading a new checkpoint file with old code",i);
1010 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
1012 /* Assume we have an old file format and copy sum to sum_sim */
1013 srenew(enerhist->ener_sum_sim,enerhist->nener);
1014 for(i=0; i<enerhist->nener; i++)
1016 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
1018 fflags |= (1<<eenhENERGY_SUM_SIM);
1021 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
1022 !(fflags & (1<<eenhENERGY_NSTEPS)))
1024 /* Assume we have an old file format and copy nsum to nsteps */
1025 enerhist->nsteps = enerhist->nsum;
1026 fflags |= (1<<eenhENERGY_NSTEPS);
1028 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
1029 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
1031 /* Assume we have an old file format and copy nsum to nsteps */
1032 enerhist->nsteps_sim = enerhist->nsum_sim;
1033 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
1036 return ret;
1039 static int do_cpt_files(XDR *xd, gmx_bool bRead,
1040 gmx_file_position_t **p_outputfiles, int *nfiles,
1041 FILE *list, int file_version)
1043 int i,j;
1044 gmx_off_t offset;
1045 gmx_off_t mask = 0xFFFFFFFFL;
1046 int offset_high,offset_low;
1047 char *buf;
1048 gmx_file_position_t *outputfiles;
1050 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
1052 return -1;
1055 if(bRead)
1057 snew(*p_outputfiles,*nfiles);
1060 outputfiles = *p_outputfiles;
1062 for(i=0;i<*nfiles;i++)
1064 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
1065 if(bRead)
1067 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1068 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
1069 if(list==NULL)
1071 sfree(buf);
1074 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1076 return -1;
1078 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1080 return -1;
1082 #if (SIZEOF_GMX_OFF_T > 4)
1083 outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
1084 #else
1085 outputfiles[i].offset = offset_low;
1086 #endif
1088 else
1090 buf = outputfiles[i].filename;
1091 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
1092 /* writing */
1093 offset = outputfiles[i].offset;
1094 if (offset == -1)
1096 offset_low = -1;
1097 offset_high = -1;
1099 else
1101 #if (SIZEOF_GMX_OFF_T > 4)
1102 offset_low = (int) (offset & mask);
1103 offset_high = (int) ((offset >> 32) & mask);
1104 #else
1105 offset_low = offset;
1106 offset_high = 0;
1107 #endif
1109 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1111 return -1;
1113 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1115 return -1;
1118 if (file_version >= 8)
1120 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1121 list) != 0)
1123 return -1;
1125 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1127 return -1;
1130 else
1132 outputfiles[i].chksum_size = -1;
1135 return 0;
1139 void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
1140 FILE *fplog,t_commrec *cr,
1141 int eIntegrator,int simulation_part,
1142 gmx_large_int_t step,double t,t_state *state)
1144 t_fileio *fp;
1145 int file_version;
1146 char *version;
1147 char *btime;
1148 char *buser;
1149 char *bmach;
1150 char *fprog;
1151 char *fntemp; /* the temporary checkpoint file name */
1152 time_t now;
1153 char timebuf[STRLEN];
1154 int nppnodes,npmenodes,flag_64bit;
1155 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1156 gmx_file_position_t *outputfiles;
1157 int noutputfiles;
1158 char *ftime;
1159 int flags_eks,flags_enh,i;
1160 t_fileio *ret;
1162 if (PAR(cr))
1164 if (DOMAINDECOMP(cr))
1166 nppnodes = cr->dd->nnodes;
1167 npmenodes = cr->npmenodes;
1169 else
1171 nppnodes = cr->nnodes;
1172 npmenodes = 0;
1175 else
1177 nppnodes = 1;
1178 npmenodes = 0;
1181 /* make the new temporary filename */
1182 snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
1183 strcpy(fntemp,fn);
1184 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1185 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1186 strcat(fntemp,suffix);
1187 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1189 time(&now);
1190 gmx_ctime_r(&now,timebuf,STRLEN);
1192 if (fplog)
1194 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1195 gmx_step_str(step,buf),timebuf);
1198 /* Get offsets for open files */
1199 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1201 fp = gmx_fio_open(fntemp,"w");
1203 if (state->ekinstate.bUpToDate)
1205 flags_eks =
1206 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1207 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1208 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1210 else
1212 flags_eks = 0;
1215 flags_enh = 0;
1216 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1218 flags_enh |= (1<<eenhENERGY_N);
1219 if (state->enerhist.nsum > 0)
1221 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1222 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1224 if (state->enerhist.nsum_sim > 0)
1226 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1227 (1<<eenhENERGY_NSUM_SIM));
1229 if (state->enerhist.dht)
1231 flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
1232 (1<< eenhENERGY_DELTA_H_LIST) |
1233 (1<< eenhENERGY_DELTA_H_STARTTIME) |
1234 (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
1239 version = gmx_strdup(VERSION);
1240 btime = gmx_strdup(BUILD_TIME);
1241 buser = gmx_strdup(BUILD_USER);
1242 bmach = gmx_strdup(BUILD_MACHINE);
1243 fprog = gmx_strdup(Program());
1245 ftime = &(timebuf[0]);
1247 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1248 &version,&btime,&buser,&bmach,&fprog,&ftime,
1249 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1250 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1251 &state->natoms,&state->ngtc,&state->nnhpres,
1252 &state->nhchainlength, &state->flags,&flags_eks,&flags_enh,
1253 NULL);
1255 sfree(version);
1256 sfree(btime);
1257 sfree(buser);
1258 sfree(bmach);
1259 sfree(fprog);
1261 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1262 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1263 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1264 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1265 file_version) < 0))
1267 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1270 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1272 /* we really, REALLY, want to make sure to physically write the checkpoint,
1273 and all the files it depends on, out to disk. Because we've
1274 opened the checkpoint with gmx_fio_open(), it's in our list
1275 of open files. */
1276 ret=gmx_fio_all_output_fsync();
1278 if (ret)
1280 char buf[STRLEN];
1281 sprintf(buf,
1282 "Cannot fsync '%s'; maybe you are out of disk space?",
1283 gmx_fio_getname(ret));
1285 if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
1287 gmx_file(buf);
1289 else
1291 gmx_warning(buf);
1295 if( gmx_fio_close(fp) != 0)
1297 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1300 /* we don't move the checkpoint if the user specified they didn't want it,
1301 or if the fsyncs failed */
1302 if (!bNumberAndKeep && !ret)
1304 if (gmx_fexist(fn))
1306 /* Rename the previous checkpoint file */
1307 strcpy(buf,fn);
1308 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1309 strcat(buf,"_prev");
1310 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1311 #ifndef GMX_FAHCORE
1312 /* we copy here so that if something goes wrong between now and
1313 * the rename below, there's always a state.cpt.
1314 * If renames are atomic (such as in POSIX systems),
1315 * this copying should be unneccesary.
1317 gmx_file_copy(fn, buf, FALSE);
1318 /* We don't really care if this fails:
1319 * there's already a new checkpoint.
1321 #else
1322 gmx_file_rename(fn, buf);
1323 #endif
1325 if (gmx_file_rename(fntemp, fn) != 0)
1327 gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
1331 sfree(outputfiles);
1332 sfree(fntemp);
1334 #ifdef GMX_FAHCORE
1335 /*code for alternate checkpointing scheme. moved from top of loop over
1336 steps */
1337 fcRequestCheckPoint();
1338 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1339 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1341 #endif /* end GMX_FAHCORE block */
1344 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1346 int i;
1348 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1349 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1350 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1351 for(i=0; i<estNR; i++)
1353 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1355 fprintf(fplog," %24s %11s %11s\n",
1356 est_names[i],
1357 (sflags & (1<<i)) ? " present " : "not present",
1358 (fflags & (1<<i)) ? " present " : "not present");
1363 static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
1365 FILE *fp = fplog ? fplog : stderr;
1367 if (p != f)
1369 fprintf(fp," %s mismatch,\n",type);
1370 fprintf(fp," current program: %d\n",p);
1371 fprintf(fp," checkpoint file: %d\n",f);
1372 fprintf(fp,"\n");
1373 *mm = TRUE;
1377 static void check_string(FILE *fplog,const char *type,const char *p,
1378 const char *f,gmx_bool *mm)
1380 FILE *fp = fplog ? fplog : stderr;
1382 if (strcmp(p,f) != 0)
1384 fprintf(fp," %s mismatch,\n",type);
1385 fprintf(fp," current program: %s\n",p);
1386 fprintf(fp," checkpoint file: %s\n",f);
1387 fprintf(fp,"\n");
1388 *mm = TRUE;
1392 static void check_match(FILE *fplog,
1393 char *version,
1394 char *btime,char *buser,char *bmach,char *fprog,
1395 t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
1396 ivec dd_nc,ivec dd_nc_f)
1398 int npp;
1399 gmx_bool mm;
1401 mm = FALSE;
1403 check_string(fplog,"Version" ,VERSION ,version,&mm);
1404 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1405 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1406 check_string(fplog,"Build machine",BUILD_MACHINE,bmach ,&mm);
1407 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1409 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1410 if (bPartDecomp)
1412 dd_nc[XX] = 1;
1413 dd_nc[YY] = 1;
1414 dd_nc[ZZ] = 1;
1416 if (cr->nnodes > 1)
1418 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1420 npp = cr->nnodes;
1421 if (cr->npmenodes >= 0)
1423 npp -= cr->npmenodes;
1425 if (npp == npp_f)
1427 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1428 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1429 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1433 if (mm)
1435 fprintf(stderr,
1436 "Gromacs binary or parallel settings not identical to previous run.\n"
1437 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1438 fplog ? ",\n see the log file for details" : "");
1440 if (fplog)
1442 fprintf(fplog,
1443 "Gromacs binary or parallel settings not identical to previous run.\n"
1444 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1449 static void read_checkpoint(const char *fn,FILE **pfplog,
1450 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1451 int eIntegrator,gmx_large_int_t *step,double *t,
1452 t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1453 int *simulation_part,
1454 gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
1456 t_fileio *fp;
1457 int i,j,rc;
1458 int file_version;
1459 char *version,*btime,*buser,*bmach,*fprog,*ftime;
1460 char filename[STRLEN],buf[STEPSTRSIZE];
1461 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1462 ivec dd_nc_f;
1463 int natoms,ngtc,nnhpres,nhchainlength,fflags,flags_eks,flags_enh;
1464 int d;
1465 int ret;
1466 gmx_file_position_t *outputfiles;
1467 int nfiles;
1468 t_fileio *chksum_file;
1469 FILE* fplog = *pfplog;
1470 unsigned char digest[16];
1471 #if !((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
1472 struct flock fl; /* don't initialize here: the struct order is OS
1473 dependent! */
1474 #endif
1476 const char *int_warn=
1477 "WARNING: The checkpoint file was generated with integrator %s,\n"
1478 " while the simulation uses integrator %s\n\n";
1479 const char *sd_note=
1480 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1481 " while the simulation uses %d SD or BD nodes,\n"
1482 " continuation will be exact, except for the random state\n\n";
1484 #if !((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
1485 fl.l_type=F_WRLCK;
1486 fl.l_whence=SEEK_SET;
1487 fl.l_start=0;
1488 fl.l_len=0;
1489 fl.l_pid=0;
1490 #endif
1492 if (PARTDECOMP(cr))
1494 gmx_fatal(FARGS,
1495 "read_checkpoint not (yet) supported with particle decomposition");
1498 fp = gmx_fio_open(fn,"r");
1499 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1500 &version,&btime,&buser,&bmach,&fprog,&ftime,
1501 &eIntegrator_f,simulation_part,step,t,
1502 &nppnodes_f,dd_nc_f,&npmenodes_f,
1503 &natoms,&ngtc,&nnhpres,&nhchainlength,
1504 &fflags,&flags_eks,&flags_enh,NULL);
1506 if (cr == NULL || MASTER(cr))
1508 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1509 fn,ftime);
1512 /* This will not be written if we do appending, since fplog is still NULL then */
1513 if (fplog)
1515 fprintf(fplog,"\n");
1516 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1517 fprintf(fplog," file generated by: %s\n",fprog);
1518 fprintf(fplog," file generated at: %s\n",ftime);
1519 fprintf(fplog," GROMACS build time: %s\n",btime);
1520 fprintf(fplog," GROMACS build user: %s\n",buser);
1521 fprintf(fplog," GROMACS build machine: %s\n",bmach);
1522 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1523 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1524 fprintf(fplog," time: %f\n",*t);
1525 fprintf(fplog,"\n");
1528 if (natoms != state->natoms)
1530 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1532 if (ngtc != state->ngtc)
1534 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1536 if (nnhpres != state->nnhpres)
1538 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1541 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1542 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1544 if (eIntegrator_f != eIntegrator)
1546 if (MASTER(cr))
1548 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1550 if(bAppendOutputFiles)
1552 gmx_fatal(FARGS,
1553 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1554 "Stopping the run to prevent you from ruining all your data...\n"
1555 "If you _really_ know what you are doing, try with the -noappend option.\n");
1557 if (fplog)
1559 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1563 if (!PAR(cr))
1565 nppnodes = 1;
1566 cr->npmenodes = 0;
1568 else if (bPartDecomp)
1570 nppnodes = cr->nnodes;
1571 cr->npmenodes = 0;
1573 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1575 if (cr->npmenodes < 0)
1577 cr->npmenodes = npmenodes_f;
1579 nppnodes = cr->nnodes - cr->npmenodes;
1580 if (nppnodes == nppnodes_f)
1582 for(d=0; d<DIM; d++)
1584 if (dd_nc[d] == 0)
1586 dd_nc[d] = dd_nc_f[d];
1591 else
1593 /* The number of PP nodes has not been set yet */
1594 nppnodes = -1;
1597 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1599 /* Correct the RNG state size for the number of PP nodes.
1600 * Such assignments should all be moved to one central function.
1602 state->nrng = nppnodes*gmx_rng_n();
1603 state->nrngi = nppnodes;
1606 *bReadRNG = TRUE;
1607 if (fflags != state->flags)
1610 if (MASTER(cr))
1612 if(bAppendOutputFiles)
1614 gmx_fatal(FARGS,
1615 "Output file appending requested, but input and checkpoint states are not identical.\n"
1616 "Stopping the run to prevent you from ruining all your data...\n"
1617 "You can try with the -noappend option, and get more info in the log file.\n");
1620 if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
1622 gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
1624 else
1626 fprintf(stderr,
1627 "WARNING: The checkpoint state entries do not match the simulation,\n"
1628 " see the log file for details\n\n");
1632 if(fplog)
1634 print_flag_mismatch(fplog,state->flags,fflags);
1637 else
1639 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1640 nppnodes != nppnodes_f)
1642 *bReadRNG = FALSE;
1643 if (MASTER(cr))
1645 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1647 if (fplog)
1649 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1652 if (MASTER(cr))
1654 check_match(fplog,version,btime,buser,bmach,fprog,
1655 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1658 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1659 if (ret)
1661 cp_error();
1663 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1664 flags_eks,&state->ekinstate,NULL);
1665 if (ret)
1667 cp_error();
1669 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1670 (flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE)));
1672 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1673 flags_enh,&state->enerhist,NULL);
1674 if (ret)
1676 cp_error();
1679 if (file_version < 6)
1681 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1683 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1684 if (fplog)
1686 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1688 state->enerhist.nsum = *step;
1689 state->enerhist.nsum_sim = *step;
1692 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
1693 if (ret)
1695 cp_error();
1698 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1699 if (ret)
1701 cp_error();
1703 if( gmx_fio_close(fp) != 0)
1705 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1708 sfree(fprog);
1709 sfree(ftime);
1710 sfree(btime);
1711 sfree(buser);
1712 sfree(bmach);
1714 /* If the user wants to append to output files,
1715 * we use the file pointer positions of the output files stored
1716 * in the checkpoint file and truncate the files such that any frames
1717 * written after the checkpoint time are removed.
1718 * All files are md5sum checked such that we can be sure that
1719 * we do not truncate other (maybe imprortant) files.
1721 if (bAppendOutputFiles)
1723 if (fn2ftp(outputfiles[0].filename)!=efLOG)
1725 /* make sure first file is log file so that it is OK to use it for
1726 * locking
1728 gmx_fatal(FARGS,"The first output file should always be the log "
1729 "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
1731 for(i=0;i<nfiles;i++)
1733 if (outputfiles[i].offset < 0)
1735 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
1736 "is larger than 2 GB, but mdrun did not support large file"
1737 " offsets. Can not append. Run mdrun with -noappend",
1738 outputfiles[i].filename);
1740 #ifdef GMX_FAHCORE
1741 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
1743 #else
1744 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
1746 /* lock log file */
1747 if (i==0)
1749 /* Note that there are systems where the lock operation
1750 * will succeed, but a second process can also lock the file.
1751 * We should probably try to detect this.
1753 #if !((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
1754 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
1755 ==-1)
1756 #else
1757 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
1758 #endif
1760 if (errno == ENOSYS)
1762 if (!bForceAppend)
1764 gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
1766 else
1768 fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1769 if (fplog)
1771 fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
1775 else if (errno == EACCES || errno == EAGAIN)
1777 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
1778 "simulation?", outputfiles[i].filename);
1780 else
1782 gmx_fatal(FARGS,"Failed to lock: %s. %s.",
1783 outputfiles[i].filename, strerror(errno));
1788 /* compute md5 chksum */
1789 if (outputfiles[i].chksum_size != -1)
1791 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
1792 digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
1794 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
1795 outputfiles[i].chksum_size,
1796 outputfiles[i].filename);
1799 if (i==0) /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
1801 if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
1803 gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
1806 #endif
1808 if (i==0) /*open log file here - so that lock is never lifted
1809 after chksum is calculated */
1811 *pfplog = gmx_fio_getfp(chksum_file);
1813 else
1815 gmx_fio_close(chksum_file);
1817 #ifndef GMX_FAHCORE
1818 /* compare md5 chksum */
1819 if (outputfiles[i].chksum_size != -1 &&
1820 memcmp(digest,outputfiles[i].chksum,16)!=0)
1822 if (debug)
1824 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
1825 for (j=0; j<16; j++)
1827 fprintf(debug,"%02x",digest[j]);
1829 fprintf(debug,"\n");
1831 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
1832 outputfiles[i].filename);
1834 #endif
1837 if (i!=0) /*log file is already seeked to correct position */
1839 #if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
1840 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
1841 #else
1842 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
1843 #endif
1844 if(rc!=0)
1846 gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
1852 sfree(outputfiles);
1856 void load_checkpoint(const char *fn,FILE **fplog,
1857 t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
1858 t_inputrec *ir,t_state *state,
1859 gmx_bool *bReadRNG,gmx_bool *bReadEkin,
1860 gmx_bool bAppend,gmx_bool bForceAppend)
1862 gmx_large_int_t step;
1863 double t;
1865 if (SIMMASTER(cr)) {
1866 /* Read the state from the checkpoint file */
1867 read_checkpoint(fn,fplog,
1868 cr,bPartDecomp,dd_nc,
1869 ir->eI,&step,&t,state,bReadRNG,bReadEkin,
1870 &ir->simulation_part,bAppend,bForceAppend);
1872 if (PAR(cr)) {
1873 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
1874 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
1875 gmx_bcast(sizeof(step),&step,cr);
1876 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
1877 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
1879 ir->bContinuation = TRUE;
1880 if (ir->nsteps >= 0)
1882 ir->nsteps += ir->init_step - step;
1884 ir->init_step = step;
1885 ir->simulation_part += 1;
1888 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
1889 gmx_large_int_t *step,double *t,t_state *state,
1890 gmx_bool bReadRNG,
1891 int *nfiles,gmx_file_position_t **outputfiles)
1893 int file_version;
1894 char *version,*btime,*buser,*bmach,*fprog,*ftime;
1895 int eIntegrator;
1896 int nppnodes,npme;
1897 ivec dd_nc;
1898 int flags_eks,flags_enh;
1899 int nfiles_loc;
1900 gmx_file_position_t *files_loc=NULL;
1901 int ret;
1903 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1904 &version,&btime,&buser,&bmach,&fprog,&ftime,
1905 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
1906 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
1907 &state->flags,&flags_eks,&flags_enh,NULL);
1908 ret =
1909 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
1910 if (ret)
1912 cp_error();
1914 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1915 flags_eks,&state->ekinstate,NULL);
1916 if (ret)
1918 cp_error();
1920 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1921 flags_enh,&state->enerhist,NULL);
1922 if (ret)
1924 cp_error();
1927 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
1928 outputfiles != NULL ? outputfiles : &files_loc,
1929 outputfiles != NULL ? nfiles : &nfiles_loc,
1930 NULL,file_version);
1931 if (files_loc != NULL)
1933 sfree(files_loc);
1936 if (ret)
1938 cp_error();
1941 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1942 if (ret)
1944 cp_error();
1947 sfree(fprog);
1948 sfree(ftime);
1949 sfree(btime);
1950 sfree(buser);
1951 sfree(bmach);
1954 void
1955 read_checkpoint_state(const char *fn,int *simulation_part,
1956 gmx_large_int_t *step,double *t,t_state *state)
1958 t_fileio *fp;
1960 fp = gmx_fio_open(fn,"r");
1961 read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
1962 if( gmx_fio_close(fp) != 0)
1964 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
1968 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
1970 t_state state;
1971 int simulation_part;
1972 gmx_large_int_t step;
1973 double t;
1975 init_state(&state,0,0,0,0);
1977 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
1979 fr->natoms = state.natoms;
1980 fr->bTitle = FALSE;
1981 fr->bStep = TRUE;
1982 fr->step = gmx_large_int_to_int(step,
1983 "conversion of checkpoint to trajectory");
1984 fr->bTime = TRUE;
1985 fr->time = t;
1986 fr->bLambda = TRUE;
1987 fr->lambda = state.lambda;
1988 fr->bAtoms = FALSE;
1989 fr->bX = (state.flags & (1<<estX));
1990 if (fr->bX)
1992 fr->x = state.x;
1993 state.x = NULL;
1995 fr->bV = (state.flags & (1<<estV));
1996 if (fr->bV)
1998 fr->v = state.v;
1999 state.v = NULL;
2001 fr->bF = FALSE;
2002 fr->bBox = (state.flags & (1<<estBOX));
2003 if (fr->bBox)
2005 copy_mat(state.box,fr->box);
2007 done_state(&state);
2010 void list_checkpoint(const char *fn,FILE *out)
2012 t_fileio *fp;
2013 int file_version;
2014 char *version,*btime,*buser,*bmach,*fprog,*ftime;
2015 int eIntegrator,simulation_part,nppnodes,npme;
2016 gmx_large_int_t step;
2017 double t;
2018 ivec dd_nc;
2019 t_state state;
2020 int flags_eks,flags_enh;
2021 int indent;
2022 int i,j;
2023 int ret;
2024 gmx_file_position_t *outputfiles;
2025 int nfiles;
2027 init_state(&state,-1,-1,-1,-1);
2029 fp = gmx_fio_open(fn,"r");
2030 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
2031 &version,&btime,&buser,&bmach,&fprog,&ftime,
2032 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
2033 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
2034 &state.flags,&flags_eks,&flags_enh,out);
2035 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
2036 if (ret)
2038 cp_error();
2040 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
2041 flags_eks,&state.ekinstate,out);
2042 if (ret)
2044 cp_error();
2046 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
2047 flags_enh,&state.enerhist,out);
2049 if (ret == 0)
2051 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
2054 if (ret == 0)
2056 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
2059 if (ret)
2061 cp_warning(out);
2063 if( gmx_fio_close(fp) != 0)
2065 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2068 done_state(&state);
2072 static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
2074 int i;
2076 /* Check if the output file name stored in the checkpoint file
2077 * is one of the output file names of mdrun.
2079 i = 0;
2080 while (i < nfile &&
2081 !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
2083 i++;
2086 return (i < nfile && gmx_fexist(fnm_cp));
2089 /* This routine cannot print tons of data, since it is called before the log file is opened. */
2090 gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
2091 gmx_large_int_t *cpt_step,t_commrec *cr,
2092 gmx_bool bAppendReq,
2093 int nfile,const t_filenm fnm[],
2094 const char *part_suffix,gmx_bool *bAddPart)
2096 t_fileio *fp;
2097 gmx_large_int_t step=0;
2098 double t;
2099 t_state state;
2100 int nfiles;
2101 gmx_file_position_t *outputfiles;
2102 int nexist,f;
2103 gmx_bool bAppend;
2104 char *fn,suf_up[STRLEN];
2106 bAppend = FALSE;
2108 if (SIMMASTER(cr)) {
2109 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
2111 *simulation_part = 0;
2113 else
2115 init_state(&state,0,0,0,0);
2117 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
2118 &nfiles,&outputfiles);
2119 if( gmx_fio_close(fp) != 0)
2121 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
2123 done_state(&state);
2125 if (bAppendReq)
2127 nexist = 0;
2128 for(f=0; f<nfiles; f++)
2130 if (exist_output_file(outputfiles[f].filename,nfile,fnm))
2132 nexist++;
2135 if (nexist == nfiles)
2137 bAppend = bAppendReq;
2139 else if (nexist > 0)
2141 fprintf(stderr,
2142 "Output file appending has been requested,\n"
2143 "but some output files listed in the checkpoint file %s\n"
2144 "are not present or are named differently by the current program:\n",
2145 filename);
2146 fprintf(stderr,"output files present:");
2147 for(f=0; f<nfiles; f++)
2149 if (exist_output_file(outputfiles[f].filename,
2150 nfile,fnm))
2152 fprintf(stderr," %s",outputfiles[f].filename);
2155 fprintf(stderr,"\n");
2156 fprintf(stderr,"output files not present or named differently:");
2157 for(f=0; f<nfiles; f++)
2159 if (!exist_output_file(outputfiles[f].filename,
2160 nfile,fnm))
2162 fprintf(stderr," %s",outputfiles[f].filename);
2165 fprintf(stderr,"\n");
2167 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
2171 if (bAppend)
2173 if (nfiles == 0)
2175 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
2177 fn = outputfiles[0].filename;
2178 if (strlen(fn) < 4 ||
2179 gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
2181 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
2183 /* Set bAddPart to whether the suffix string '.part' is present
2184 * in the log file name.
2186 strcpy(suf_up,part_suffix);
2187 upstring(suf_up);
2188 *bAddPart = (strstr(fn,part_suffix) != NULL ||
2189 strstr(fn,suf_up) != NULL);
2192 sfree(outputfiles);
2195 if (PAR(cr))
2197 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
2199 if (*simulation_part > 0 && bAppendReq)
2201 gmx_bcast(sizeof(bAppend),&bAppend,cr);
2202 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
2205 if (NULL != cpt_step)
2207 *cpt_step = step;
2210 return bAppend;