added mdrun -cptnum option to keep and number checkpoints
[gromacs.git] / src / gmxlib / checkpoint.c
blob006252c12a7cf0ce2ad771657502e0eec5c4acef
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
23 #include <string.h>
24 #include <time.h>
26 #if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
27 /* _chsize_s */
28 #include <io.h>
29 #include <sys/locking.h>
30 #endif
33 #include "filenm.h"
34 #include "names.h"
35 #include "typedefs.h"
36 #include "smalloc.h"
37 #include "gmxfio.h"
38 #include "xdrf.h"
39 #include "statutil.h"
40 #include "txtdump.h"
41 #include "vec.h"
42 #include "network.h"
43 #include "gmx_random.h"
44 #include "checkpoint.h"
45 #include "futil.h"
46 #include "string2.h"
47 #include <fcntl.h>
50 #ifdef GMX_FAHCORE
51 #include "corewrap.h"
52 #endif
54 #define CPT_MAGIC1 171817
55 #define CPT_MAGIC2 171819
57 /* The source code in this file should be thread-safe.
58 Please keep it that way. */
60 /* cpt_version should normally only be changed
61 * when the header of footer format changes.
62 * The state data format itself is backward and forward compatible.
63 * But old code can not read a new entry that is present in the file
64 * (but can read a new format when new entries are not present).
66 static const int cpt_version = 12;
68 enum { ecpdtINT, ecpdtFLOAT, ecpdtDOUBLE, ecpdtNR };
70 const char *ecpdt_names[ecpdtNR] = { "int", "float", "double" };
72 const char *est_names[estNR]=
74 "FE-lambda",
75 "box", "box-rel", "box-v", "pres_prev",
76 "nosehoover-xi", "thermostat-integral",
77 "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
78 "disre_initf", "disre_rm3tav",
79 "orire_initf", "orire_Dtav",
80 "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev",
83 enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
85 const char *eeks_names[eeksNR]=
87 "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
88 "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
91 enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
92 eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
93 eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM, eenhNR };
95 const char *eenh_names[eenhNR]=
97 "energy_n", "energy_aver", "energy_sum", "energy_nsum",
98 "energy_sum_sim", "energy_nsum_sim",
99 "energy_nsteps", "energy_nsteps_sim"
104 #if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
105 static int
106 gmx_wintruncate(const char *filename, __int64 size)
108 #ifdef GMX_FAHCORE
109 /*we do this elsewhere*/
110 return 0;
111 #else
112 FILE *fp;
113 int rc;
115 fp=fopen(filename,"rb+");
117 if(fp==NULL)
119 return -1;
122 return _chsize_s( fileno(fp), size);
123 #endif
125 #endif
128 enum { ecprREAL, ecprRVEC, ecprMATRIX };
130 static const char *st_names(int cptp,int ecpt)
132 switch (cptp)
134 case 0: return est_names [ecpt]; break;
135 case 1: return eeks_names[ecpt]; break;
136 case 2: return eenh_names[ecpt]; break;
139 return NULL;
142 static void cp_warning(FILE *fp)
144 fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
147 static void cp_error()
149 gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of quota?");
152 static void do_cpt_string_err(XDR *xd,bool bRead,const char *desc,char **s,FILE *list)
154 #define CPTSTRLEN 1024
155 bool_t res=0;
157 if (bRead)
159 snew(*s,CPTSTRLEN);
161 res = xdr_string(xd,s,CPTSTRLEN);
162 if (res == 0)
164 cp_error();
166 if (list)
168 fprintf(list,"%s = %s\n",desc,*s);
169 sfree(*s);
173 static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
175 bool_t res=0;
177 res = xdr_int(xd,i);
178 if (res == 0)
180 return -1;
182 if (list)
184 fprintf(list,"%s = %d\n",desc,*i);
186 return 0;
189 static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
191 bool_t res=1;
192 int j;
193 if (list)
195 fprintf(list,"%s = ",desc);
197 for (j=0; j<n && res; j++)
199 res &= xdr_u_char(xd,&i[j]);
200 if (list)
202 fprintf(list,"%02x",i[j]);
205 if (list)
207 fprintf(list,"\n");
209 if (res == 0)
211 return -1;
214 return 0;
217 static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
219 if (do_cpt_int(xd,desc,i,list) < 0)
221 cp_error();
225 static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
227 bool_t res=0;
228 char buf[STEPSTRSIZE];
230 res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
231 if (res == 0)
233 cp_error();
235 if (list)
237 fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
241 static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
243 bool_t res=0;
245 res = xdr_double(xd,f);
246 if (res == 0)
248 cp_error();
250 if (list)
252 fprintf(list,"%s = %f\n",desc,*f);
257 static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
258 int n,real **v,
259 FILE *list,int erealtype)
261 bool_t res=0;
262 #ifndef GMX_DOUBLE
263 int dtc=ecpdtFLOAT;
264 #else
265 int dtc=ecpdtDOUBLE;
266 #endif
267 real *vp,*va=NULL;
268 float *vf;
269 double *vd;
270 int nf,dt,i;
272 nf = n;
273 res = xdr_int(xd,&nf);
274 if (res == 0)
276 return -1;
278 if (list == NULL && nf != n)
280 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
282 dt = dtc;
283 res = xdr_int(xd,&dt);
284 if (res == 0)
286 return -1;
288 if (dt != dtc)
290 fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
291 st_names(cptp,ecpt),ecpdt_names[dtc],ecpdt_names[dt]);
293 if (list || !(sflags & (1<<ecpt)))
295 snew(va,nf);
296 vp = va;
298 else
300 if (*v == NULL)
302 snew(*v,nf);
304 vp = *v;
306 if (dt == ecpdtFLOAT)
308 if (dtc == ecpdtFLOAT)
310 vf = (float *)vp;
312 else
314 snew(vf,nf);
316 res = xdr_vector(xd,(char *)vf,nf,
317 (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
318 if (res == 0)
320 return -1;
322 if (dtc != ecpdtFLOAT)
324 for(i=0; i<nf; i++)
326 vp[i] = vf[i];
328 sfree(vf);
331 else
333 if (dtc == ecpdtDOUBLE)
335 vd = (double *)vp;
337 else
339 snew(vd,nf);
341 res = xdr_vector(xd,(char *)vd,nf,
342 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
343 if (res == 0)
345 return -1;
347 if (dtc != ecpdtDOUBLE)
349 for(i=0; i<nf; i++)
351 vp[i] = vd[i];
353 sfree(vd);
357 if (list)
359 switch (erealtype)
361 case ecprREAL:
362 pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
363 break;
364 case ecprRVEC:
365 pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
366 break;
367 default:
368 gmx_incons("Unknown checkpoint real type");
371 if (va)
373 sfree(va);
376 return 0;
381 static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
382 int n,real **v,FILE *list)
384 return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,v,list,ecprREAL);
387 static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
388 real *r,FILE *list)
390 return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,&r,list,ecprREAL);
393 static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
394 int n,int **v,FILE *list)
396 bool_t res=0;
397 int dtc=ecpdtINT;
398 int *vp,*va=NULL;
399 int nf,dt,i;
401 nf = n;
402 res = xdr_int(xd,&nf);
403 if (res == 0)
405 return -1;
407 if (list == NULL && v != NULL && nf != n)
409 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
411 dt = dtc;
412 res = xdr_int(xd,&dt);
413 if (res == 0)
415 return -1;
417 if (dt != dtc)
419 gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
420 st_names(cptp,ecpt),ecpdt_names[dtc],ecpdt_names[dt]);
422 if (list || !(sflags & (1<<ecpt)) || v == NULL)
424 snew(va,nf);
425 vp = va;
427 else
429 if (*v == NULL)
431 snew(*v,nf);
433 vp = *v;
435 res = xdr_vector(xd,(char *)vp,nf,
436 (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
437 if (res == 0)
439 return -1;
441 if (list)
443 pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
445 if (va)
447 sfree(va);
450 return 0;
453 static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
454 int *i,FILE *list)
456 return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
459 static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
460 int n,double **v,FILE *list)
462 bool_t res=0;
463 int dtc=ecpdtDOUBLE;
464 double *vp,*va=NULL;
465 int nf,dt,i;
467 nf = n;
468 res = xdr_int(xd,&nf);
469 if (res == 0)
471 return -1;
473 if (list == NULL && nf != n)
475 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
477 dt = dtc;
478 res = xdr_int(xd,&dt);
479 if (res == 0)
481 return -1;
483 if (dt != dtc)
485 gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
486 st_names(cptp,ecpt),ecpdt_names[dtc],ecpdt_names[dt]);
488 if (list || !(sflags & (1<<ecpt)))
490 snew(va,nf);
491 vp = va;
493 else
495 if (*v == NULL)
497 snew(*v,nf);
499 vp = *v;
501 res = xdr_vector(xd,(char *)vp,nf,
502 (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
503 if (res == 0)
505 return -1;
507 if (list)
509 pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
511 if (va)
513 sfree(va);
516 return 0;
520 static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
521 int n,rvec **v,FILE *list)
523 return do_cpte_reals_low(xd,cptp,ecpt,sflags,
524 n*DIM,(real **)v,list,ecprRVEC);
527 static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
528 matrix v,FILE *list)
530 real *vr;
531 real ret;
533 vr = (real *)&(v[0][0]);
534 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,DIM*DIM,&vr,NULL,ecprMATRIX);
536 if (list && ret == 0)
538 pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
541 return ret;
544 static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
545 int n,matrix **v,FILE *list)
547 bool_t res=0;
548 matrix *vp,*va=NULL;
549 real *vr;
550 int nf,i,j,k;
551 int ret;
553 nf = n;
554 res = xdr_int(xd,&nf);
555 if (res == 0)
557 return -1;
559 if (list == NULL && nf != n)
561 gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
563 if (list || !(sflags & (1<<ecpt)))
565 snew(va,nf);
566 vp = va;
568 else
570 if (*v == NULL)
572 snew(*v,nf);
574 vp = *v;
576 snew(vr,nf*DIM*DIM);
577 for(i=0; i<nf; i++)
579 for(j=0; j<DIM; j++)
581 for(k=0; k<DIM; k++)
583 vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
587 ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
588 nf*DIM*DIM,&vr,NULL,ecprMATRIX);
589 for(i=0; i<nf; i++)
591 for(j=0; j<DIM; j++)
593 for(k=0; k<DIM; k++)
595 vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
599 sfree(vr);
601 if (list && ret == 0)
603 for(i=0; i<nf; i++)
605 pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
608 if (va)
610 sfree(va);
613 return ret;
616 static void do_cpt_header(XDR *xd,bool bRead,int *file_version,
617 char **version,char **btime,char **buser,char **bmach,
618 char **fprog,char **ftime,
619 int *eIntegrator,int *simulation_part,
620 gmx_large_int_t *step,double *t,
621 int *nnodes,int *dd_nc,int *npme,
622 int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
623 int *flags_state,int *flags_eks,int *flags_enh,
624 FILE *list)
626 bool_t res=0;
627 int magic;
628 int idum=0;
629 int i;
630 char *fhost;
632 if (bRead)
634 magic = -1;
636 else
638 magic = CPT_MAGIC1;
640 res = xdr_int(xd,&magic);
641 if (res == 0)
643 gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of quota?");
645 if (magic != CPT_MAGIC1)
647 gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
648 "The checkpoint file is corrupted or not a checkpoint file",
649 magic,CPT_MAGIC1);
651 if (!bRead)
653 snew(fhost,255);
654 #ifdef HAVE_UNISTD_H
655 if (gethostname(fhost,255) != 0)
657 sprintf(fhost,"unknown");
659 #else
660 sprintf(fhost,"unknown");
661 #endif
663 do_cpt_string_err(xd,bRead,"GROMACS version" ,version,list);
664 do_cpt_string_err(xd,bRead,"GROMACS build time" ,btime,list);
665 do_cpt_string_err(xd,bRead,"GROMACS build user" ,buser,list);
666 do_cpt_string_err(xd,bRead,"GROMACS build machine" ,bmach,list);
667 do_cpt_string_err(xd,bRead,"generating program" ,fprog,list);
668 do_cpt_string_err(xd,bRead,"generation time" ,ftime,list);
669 *file_version = cpt_version;
670 do_cpt_int_err(xd,"checkpoint file version",file_version,list);
671 if (*file_version > cpt_version)
673 gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
675 if (*file_version >= 12)
677 do_cpt_string_err(xd,bRead,"generating host" ,&fhost,list);
678 if (list == NULL)
680 sfree(fhost);
683 do_cpt_int_err(xd,"#atoms" ,natoms ,list);
684 do_cpt_int_err(xd,"#T-coupling groups",ngtc ,list);
685 if (*file_version >= 10)
687 do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
689 else
691 *nhchainlength = 1;
693 if (*file_version >= 11)
695 do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
697 else
699 *nnhpres = 0;
701 do_cpt_int_err(xd,"integrator" ,eIntegrator,list);
702 if (*file_version >= 3)
704 do_cpt_int_err(xd,"simulation part #", simulation_part,list);
706 else
708 *simulation_part = 1;
710 if (*file_version >= 5)
712 do_cpt_step_err(xd,"step" ,step ,list);
714 else
716 do_cpt_int_err(xd,"step" ,&idum ,list);
717 *step = idum;
719 do_cpt_double_err(xd,"t" ,t ,list);
720 do_cpt_int_err(xd,"#PP-nodes" ,nnodes ,list);
721 idum = 1;
722 do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
723 do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
724 do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
725 do_cpt_int_err(xd,"#PME-only nodes",npme,list);
726 do_cpt_int_err(xd,"state flags",flags_state,list);
727 if (*file_version >= 4)
729 do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
730 do_cpt_int_err(xd,"energy history flags",flags_enh,list);
732 else
734 *flags_eks = 0;
735 *flags_enh = (*flags_state >> (estORIRE_DTAV+1));
736 *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
737 (1<<(estORIRE_DTAV+2)) |
738 (1<<(estORIRE_DTAV+3))));
742 static int do_cpt_footer(XDR *xd,bool bRead,int file_version)
744 bool_t res=0;
745 int magic;
747 if (file_version >= 2)
749 magic = CPT_MAGIC2;
750 res = xdr_int(xd,&magic);
751 if (res == 0)
753 cp_error();
755 if (magic != CPT_MAGIC2)
757 return -1;
761 return 0;
764 static int do_cpt_state(XDR *xd,bool bRead,
765 int fflags,t_state *state,
766 bool bReadRNG,FILE *list)
768 int sflags;
769 int **rng_p,**rngi_p;
770 int i;
771 int ret;
772 int nnht,nnhtp;
774 ret = 0;
776 nnht = state->nhchainlength*state->ngtc;
777 nnhtp = state->nhchainlength*state->nnhpres;
779 if (bReadRNG)
781 rng_p = (int **)&state->ld_rng;
782 rngi_p = &state->ld_rngi;
784 else
786 /* Do not read the RNG data */
787 rng_p = NULL;
788 rngi_p = NULL;
791 sflags = state->flags;
792 for(i=0; (i<estNR && ret == 0); i++)
794 if (fflags & (1<<i))
796 switch (i)
798 case estLAMBDA: ret = do_cpte_real(xd,0,i,sflags,&state->lambda,list); break;
799 case estBOX: ret = do_cpte_matrix(xd,0,i,sflags,state->box,list); break;
800 case estBOX_REL: ret = do_cpte_matrix(xd,0,i,sflags,state->box_rel,list); break;
801 case estBOXV: ret = do_cpte_matrix(xd,0,i,sflags,state->boxv,list); break;
802 case estPRES_PREV: ret = do_cpte_matrix(xd,0,i,sflags,state->pres_prev,list); break;
803 case estSVIR_PREV: ret = do_cpte_matrix(xd,0,i,sflags,state->svir_prev,list); break;
804 case estFVIR_PREV: ret = do_cpte_matrix(xd,0,i,sflags,state->fvir_prev,list); break;
805 case estNH_XI: ret = do_cpte_doubles(xd,0,i,sflags,nnht,&state->nosehoover_xi,list); break;
806 case estNH_VXI: ret = do_cpte_doubles(xd,0,i,sflags,nnht,&state->nosehoover_vxi,list); break;
807 case estNHPRES_XI: ret = do_cpte_doubles(xd,0,i,sflags,nnhtp,&state->nhpres_xi,list); break;
808 case estNHPRES_VXI: ret = do_cpte_doubles(xd,0,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
809 case estTC_INT: ret = do_cpte_doubles(xd,0,i,sflags,state->ngtc,&state->therm_integral,list); break;
810 case estVETA: ret = do_cpte_real(xd,0,i,sflags,&state->veta,list); break;
811 case estVOL0: ret = do_cpte_real(xd,0,i,sflags,&state->vol0,list); break;
812 case estX: ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->x,list); break;
813 case estV: ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->v,list); break;
814 case estSDX: ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->sd_X,list); break;
815 case estLD_RNG: ret = do_cpte_ints(xd,0,i,sflags,state->nrng,rng_p,list); break;
816 case estLD_RNGI: ret = do_cpte_ints(xd,0,i,sflags,state->nrngi,rngi_p,list); break;
817 case estDISRE_INITF: ret = do_cpte_real (xd,0,i,sflags,&state->hist.disre_initf,list); break;
818 case estDISRE_RM3TAV: ret = do_cpte_reals(xd,0,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
819 case estORIRE_INITF: ret = do_cpte_real (xd,0,i,sflags,&state->hist.orire_initf,list); break;
820 case estORIRE_DTAV: ret = do_cpte_reals(xd,0,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
821 default:
822 gmx_fatal(FARGS,"Unknown state entry %d\n"
823 "You are probably reading a new checkpoint file with old code",i);
828 return ret;
831 static int do_cpt_ekinstate(XDR *xd,bool bRead,
832 int fflags,ekinstate_t *ekins,
833 FILE *list)
835 int i;
836 int ret;
838 ret = 0;
840 for(i=0; (i<eeksNR && ret == 0); i++)
842 if (fflags & (1<<i))
844 switch (i)
847 case eeksEKIN_N: ret = do_cpte_int(xd,1,i,fflags,&ekins->ekin_n,list); break;
848 case eeksEKINH : ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
849 case eeksEKINF: ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
850 case eeksEKINO: ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
851 case eeksEKINTOTAL: ret = do_cpte_matrix(xd,1,i,fflags,ekins->ekin_total,list); break;
852 case eeksEKINSCALEF: ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
853 case eeksVSCALE: ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
854 case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
855 case eeksDEKINDL : ret = do_cpte_real(xd,1,i,fflags,&ekins->dekindl,list); break;
856 case eeksMVCOS: ret = do_cpte_real(xd,1,i,fflags,&ekins->mvcos,list); break;
857 default:
858 gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
859 "You are probably reading a new checkpoint file with old code",i);
864 return ret;
867 static int do_cpt_enerhist(XDR *xd,bool bRead,
868 int fflags,energyhistory_t *enerhist,
869 FILE *list)
871 int i;
872 int ret;
874 ret = 0;
876 if (bRead)
878 enerhist->nsteps = 0;
879 enerhist->nsum = 0;
880 enerhist->nsteps_sim = 0;
881 enerhist->nsum_sim = 0;
884 for(i=0; (i<eenhNR && ret == 0); i++)
886 if (fflags & (1<<i))
888 switch (i)
890 case eenhENERGY_N: ret = do_cpte_int(xd,2,i,fflags,&enerhist->nener,list); break;
891 case eenhENERGY_AVER: ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
892 case eenhENERGY_SUM: ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
893 case eenhENERGY_NSUM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
894 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
895 case eenhENERGY_NSUM_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
896 case eenhENERGY_NSTEPS: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
897 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
898 default:
899 gmx_fatal(FARGS,"Unknown energy history entry %d\n"
900 "You are probably reading a new checkpoint file with old code",i);
905 if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
907 /* Assume we have an old file format and copy sum to sum_sim */
908 srenew(enerhist->ener_sum_sim,enerhist->nener);
909 for(i=0; i<enerhist->nener; i++)
911 enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
913 fflags |= (1<<eenhENERGY_SUM_SIM);
916 if ( (fflags & (1<<eenhENERGY_NSUM)) &&
917 !(fflags & (1<<eenhENERGY_NSTEPS)))
919 /* Assume we have an old file format and copy nsum to nsteps */
920 enerhist->nsteps = enerhist->nsum;
921 fflags |= (1<<eenhENERGY_NSTEPS);
923 if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
924 !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
926 /* Assume we have an old file format and copy nsum to nsteps */
927 enerhist->nsteps_sim = enerhist->nsum_sim;
928 fflags |= (1<<eenhENERGY_NSTEPS_SIM);
931 return ret;
934 static int do_cpt_files(XDR *xd, bool bRead,
935 gmx_file_position_t **p_outputfiles, int *nfiles,
936 FILE *list, int file_version)
938 int i,j;
939 off_t offset;
940 off_t mask = 0xFFFFFFFFL;
941 int offset_high,offset_low;
942 char *buf;
943 gmx_file_position_t *outputfiles;
945 if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
947 return -1;
950 if(bRead)
952 snew(*p_outputfiles,*nfiles);
955 outputfiles = *p_outputfiles;
957 for(i=0;i<*nfiles;i++)
959 /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
960 if(bRead)
962 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
963 strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
964 if(list==NULL)
966 sfree(buf);
969 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
971 return -1;
973 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
975 return -1;
977 #if (SIZEOF_OFF_T > 4)
978 outputfiles[i].offset = ( ((off_t) offset_high) << 32 ) | ( (off_t) offset_low & mask );
979 #else
980 outputfiles[i].offset = offset_low;
981 #endif
983 else
985 buf = outputfiles[i].filename;
986 do_cpt_string_err(xd,bRead,"output filename",&buf,list);
987 /* writing */
988 offset = outputfiles[i].offset;
989 if (offset == -1)
991 offset_low = -1;
992 offset_high = -1;
994 else
996 #if (SIZEOF_OFF_T > 4)
997 offset_low = (int) (offset & mask);
998 offset_high = (int) ((offset >> 32) & mask);
999 #else
1000 offset_low = offset;
1001 offset_high = 0;
1002 #endif
1004 if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
1006 return -1;
1008 if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
1010 return -1;
1013 if (file_version >= 8)
1015 if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
1016 list) != 0)
1018 return -1;
1020 if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
1022 return -1;
1025 else
1027 outputfiles[i].chksum_size = -1;
1030 return 0;
1034 void write_checkpoint(const char *fn,bool bNumberAndKeep,
1035 FILE *fplog,t_commrec *cr,
1036 int eIntegrator,int simulation_part,
1037 gmx_large_int_t step,double t,t_state *state)
1039 t_fileio *fp;
1040 int file_version;
1041 char *version;
1042 char *btime;
1043 char *buser;
1044 char *bmach;
1045 char *fprog;
1046 char *ftime;
1047 char *fntemp; /* the temporary checkpoint file name */
1048 time_t now;
1049 int nppnodes,npmenodes,flag_64bit;
1050 char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
1051 gmx_file_position_t *outputfiles;
1052 int noutputfiles;
1053 int flags_eks,flags_enh,i;
1055 if (PAR(cr))
1057 if (DOMAINDECOMP(cr))
1059 nppnodes = cr->dd->nnodes;
1060 npmenodes = cr->npmenodes;
1062 else
1064 nppnodes = cr->nnodes;
1065 npmenodes = 0;
1068 else
1070 nppnodes = 1;
1071 npmenodes = 0;
1074 /* make the new temporary filename */
1075 snew(fntemp, strlen(fn)+5);
1076 strcpy(fntemp,fn);
1077 fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1078 sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
1079 strcat(fntemp,suffix);
1080 strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1082 now = time(NULL);
1083 ftime = strdup(ctime(&now));
1084 ftime[strlen(ftime)-1] = '\0';
1086 /* No need to pollute stderr every time we write a checkpoint file */
1087 /* fprintf(stderr,"\nWriting checkpoint, step %d at %s\n",step,ftime); */
1088 if (fplog)
1090 fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
1091 gmx_step_str(step,buf),ftime);
1094 /* Get offsets for open files */
1095 gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
1097 fp = gmx_fio_open(fntemp,"w");
1099 if (state->ekinstate.bUpToDate)
1101 flags_eks =
1102 ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
1103 (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
1104 (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
1106 else
1108 flags_eks = 0;
1111 flags_enh = 0;
1112 if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
1114 flags_enh |= (1<<eenhENERGY_N);
1115 if (state->enerhist.nsum > 0)
1117 flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
1118 (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
1120 if (state->enerhist.nsum_sim > 0)
1122 flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
1123 (1<<eenhENERGY_NSUM_SIM));
1128 version = strdup(VERSION);
1129 btime = strdup(BUILD_TIME);
1130 buser = strdup(BUILD_USER);
1131 bmach = strdup(BUILD_MACHINE);
1132 fprog = strdup(Program());
1134 do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
1135 &version,&btime,&buser,&bmach,&fprog,&ftime,
1136 &eIntegrator,&simulation_part,&step,&t,&nppnodes,
1137 DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
1138 &state->natoms,&state->ngtc,&state->nnhpres,
1139 &state->nhchainlength, &state->flags,&flags_eks,&flags_enh,
1140 NULL);
1142 sfree(version);
1143 sfree(btime);
1144 sfree(buser);
1145 sfree(bmach);
1146 sfree(fprog);
1148 if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0) ||
1149 (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
1150 (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0) ||
1151 (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
1152 file_version) < 0))
1154 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of quota?");
1157 do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
1159 /* we really, REALLY, want the checkpoint file and all files it depends
1160 on to be physically written out do disk: */
1161 gmx_fio_all_output_fsync();
1163 if( gmx_fio_close(fp) != 0)
1165 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of quota?");
1168 if (!bNumberAndKeep)
1170 if (gmx_fexist(fn))
1172 /* Rename the previous checkpoint file */
1173 strcpy(buf,fn);
1174 buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
1175 strcat(buf,"_prev");
1176 strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
1177 #ifndef GMX_FAHCORE
1178 /* we copy here so that if something goes wrong between now and
1179 * the rename below, there's always a state.cpt.
1180 * If renames are atomic (such as in POSIX systems),
1181 * this copying should be unneccesary.
1183 gmx_file_copy(fn, buf, FALSE);
1184 /* We don't really care if this fails:
1185 * there's already a new checkpoint.
1187 #else
1188 gmx_file_rename(fn, buf);
1189 #endif
1191 if (gmx_file_rename(fntemp, fn) != 0)
1193 gmx_file("Cannot rename checkpoint file; maybe you are out of quota?");
1197 sfree(ftime);
1198 sfree(outputfiles);
1199 sfree(fntemp);
1201 #ifdef GMX_FAHCORE
1202 /*code for alternate checkpointing scheme. moved from top of loop over
1203 steps */
1204 fcRequestCheckPoint();
1205 if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
1206 gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
1208 #endif /* end GMX_FAHCORE block */
1211 static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
1213 int i;
1215 fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
1216 fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
1217 fprintf(fplog," %24s %11s %11s\n","","simulation","checkpoint");
1218 for(i=0; i<estNR; i++)
1220 if ((sflags & (1<<i)) || (fflags & (1<<i)))
1222 fprintf(fplog," %24s %11s %11s\n",
1223 est_names[i],
1224 (sflags & (1<<i)) ? " present " : "not present",
1225 (fflags & (1<<i)) ? " present " : "not present");
1230 static void check_int(FILE *fplog,const char *type,int p,int f,bool *mm)
1232 FILE *fp = fplog ? fplog : stderr;
1234 if (p != f)
1236 fprintf(fp," %s mismatch,\n",type);
1237 fprintf(fp," current program: %d\n",p);
1238 fprintf(fp," checkpoint file: %d\n",f);
1239 fprintf(fp,"\n");
1240 *mm = TRUE;
1244 static void check_string(FILE *fplog,const char *type,const char *p,
1245 const char *f,bool *mm)
1247 FILE *fp = fplog ? fplog : stderr;
1249 if (strcmp(p,f) != 0)
1251 fprintf(fp," %s mismatch,\n",type);
1252 fprintf(fp," current program: %s\n",p);
1253 fprintf(fp," checkpoint file: %s\n",f);
1254 fprintf(fp,"\n");
1255 *mm = TRUE;
1259 static void check_match(FILE *fplog,
1260 char *version,
1261 char *btime,char *buser,char *bmach,char *fprog,
1262 t_commrec *cr,bool bPartDecomp,int npp_f,int npme_f,
1263 ivec dd_nc,ivec dd_nc_f)
1265 int npp;
1266 bool mm;
1268 mm = FALSE;
1270 check_string(fplog,"Version" ,VERSION ,version,&mm);
1271 check_string(fplog,"Build time" ,BUILD_TIME ,btime ,&mm);
1272 check_string(fplog,"Build user" ,BUILD_USER ,buser ,&mm);
1273 check_string(fplog,"Build machine",BUILD_MACHINE,bmach ,&mm);
1274 check_string(fplog,"Program name" ,Program() ,fprog ,&mm);
1276 npp = cr->nnodes - cr->npmenodes;
1277 check_int (fplog,"#nodes" ,cr->nnodes ,npp_f+npme_f ,&mm);
1278 if (bPartDecomp)
1280 dd_nc[XX] = 1;
1281 dd_nc[YY] = 1;
1282 dd_nc[ZZ] = 1;
1284 if (npp > 1)
1286 check_int (fplog,"#PME-nodes" ,cr->npmenodes,npme_f ,&mm);
1287 if (npp == npp_f)
1289 check_int (fplog,"#DD-cells[x]",dd_nc[XX] ,dd_nc_f[XX],&mm);
1290 check_int (fplog,"#DD-cells[y]",dd_nc[YY] ,dd_nc_f[YY],&mm);
1291 check_int (fplog,"#DD-cells[z]",dd_nc[ZZ] ,dd_nc_f[ZZ],&mm);
1295 if (mm)
1297 fprintf(stderr,
1298 "Gromacs binary or parallel settings not identical to previous run.\n"
1299 "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
1300 fplog ? ",\n see the log file for details" : "");
1302 if (fplog)
1304 fprintf(fplog,
1305 "Gromacs binary or parallel settings not identical to previous run.\n"
1306 "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
1311 static void read_checkpoint(const char *fn,FILE **pfplog,
1312 t_commrec *cr,bool bPartDecomp,ivec dd_nc,
1313 int eIntegrator,gmx_large_int_t *step,double *t,
1314 t_state *state,bool *bReadRNG,bool *bReadEkin,
1315 int *simulation_part,bool bAppendOutputFiles)
1317 t_fileio *fp;
1318 int i,j,rc;
1319 int file_version;
1320 char *version,*btime,*buser,*bmach,*fprog,*ftime;
1321 char filename[STRLEN],buf[STEPSTRSIZE];
1322 int nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
1323 ivec dd_nc_f;
1324 int natoms,ngtc,nnhpres,nhchainlength,fflags,flags_eks,flags_enh;
1325 int d;
1326 int ret;
1327 gmx_file_position_t *outputfiles;
1328 int nfiles;
1329 t_fileio *chksum_file;
1330 FILE* fplog = *pfplog;
1331 unsigned char digest[16];
1332 #if !((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
1333 struct flock fl = { 0, SEEK_SET, 0, F_WRLCK, 0 };
1334 #endif
1336 const char *int_warn=
1337 "WARNING: The checkpoint file was generator with integrator %s,\n"
1338 " while the simulation uses integrator %s\n\n";
1339 const char *sd_note=
1340 "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
1341 " while the simulation uses %d SD or BD nodes,\n"
1342 " continuation will be exact, except for the random state\n\n";
1344 if (PARTDECOMP(cr))
1346 gmx_fatal(FARGS,
1347 "read_checkpoint not (yet) supported with particle decomposition");
1350 fp = gmx_fio_open(fn,"r");
1351 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1352 &version,&btime,&buser,&bmach,&fprog,&ftime,
1353 &eIntegrator_f,simulation_part,step,t,
1354 &nppnodes_f,dd_nc_f,&npmenodes_f,
1355 &natoms,&ngtc,&nnhpres,&nhchainlength,
1356 &fflags,&flags_eks,&flags_enh,NULL);
1358 if (cr == NULL || MASTER(cr))
1360 fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
1361 fn,ftime);
1364 /* This will not be written if we do appending, since fplog is still NULL then */
1365 if (fplog)
1367 fprintf(fplog,"\n");
1368 fprintf(fplog,"Reading checkpoint file %s\n",fn);
1369 fprintf(fplog," file generated by: %s\n",fprog);
1370 fprintf(fplog," file generated at: %s\n",ftime);
1371 fprintf(fplog," GROMACS build time: %s\n",btime);
1372 fprintf(fplog," GROMACS build user: %s\n",buser);
1373 fprintf(fplog," GROMACS build machine: %s\n",bmach);
1374 fprintf(fplog," simulation part #: %d\n",*simulation_part);
1375 fprintf(fplog," step: %s\n",gmx_step_str(*step,buf));
1376 fprintf(fplog," time: %f\n",*t);
1377 fprintf(fplog,"\n");
1380 if (natoms != state->natoms)
1382 gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
1384 if (ngtc != state->ngtc)
1386 gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
1388 if (nnhpres != state->nnhpres)
1390 gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
1393 init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
1394 /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
1396 if (eIntegrator_f != eIntegrator)
1398 if (MASTER(cr))
1400 fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1402 if(bAppendOutputFiles)
1404 gmx_fatal(FARGS,
1405 "Output file appending requested, but input/checkpoint integrators do not match.\n"
1406 "Stopping the run to prevent you from ruining all your data...\n"
1407 "If you _really_ know what you are doing, try without the -append option.\n");
1409 if (fplog)
1411 fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
1415 if (!PAR(cr))
1417 nppnodes = 1;
1419 else if (bPartDecomp)
1421 nppnodes = cr->nnodes;
1423 else if (cr->nnodes == nppnodes_f + npmenodes_f)
1425 if (cr->npmenodes < 0)
1427 cr->npmenodes = npmenodes_f;
1429 nppnodes = cr->nnodes - cr->npmenodes;
1430 if (nppnodes == nppnodes_f)
1432 for(d=0; d<DIM; d++)
1434 if (dd_nc[d] == 0)
1436 dd_nc[d] = dd_nc_f[d];
1441 else
1443 /* The number of PP nodes has not been set yet */
1444 nppnodes = -1;
1447 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
1449 /* Correct the RNG state size for the number of PP nodes.
1450 * Such assignments should all be moved to one central function.
1452 state->nrng = nppnodes*gmx_rng_n();
1453 state->nrngi = nppnodes;
1456 *bReadRNG = TRUE;
1457 if (fflags != state->flags)
1460 if (MASTER(cr))
1462 if(bAppendOutputFiles)
1464 gmx_fatal(FARGS,
1465 "Output file appending requested, but input and checkpoint states are not identical.\n"
1466 "Stopping the run to prevent you from ruining all your data...\n"
1467 "You can try without the -append option, and get more info in the log file.\n");
1470 fprintf(stderr,
1471 "WARNING: The checkpoint state entries do not match the simulation,\n"
1472 " see the log file for details\n\n");
1475 if(fplog)
1477 print_flag_mismatch(fplog,state->flags,fflags);
1480 else
1482 if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
1483 nppnodes != nppnodes_f)
1485 *bReadRNG = FALSE;
1486 if (MASTER(cr))
1488 fprintf(stderr,sd_note,nppnodes_f,nppnodes);
1490 if (fplog)
1492 fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
1495 if (MASTER(cr))
1497 check_match(fplog,version,btime,buser,bmach,fprog,
1498 cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
1501 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
1502 if (ret)
1504 cp_error();
1506 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1507 flags_eks,&state->ekinstate,NULL);
1508 if (ret)
1510 cp_error();
1512 *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
1513 (flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE)));
1515 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1516 flags_enh,&state->enerhist,NULL);
1517 if (ret)
1519 cp_error();
1522 if (file_version < 6)
1524 const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
1526 fprintf(stderr,"\nWARNING: %s\n\n",warn);
1527 if (fplog)
1529 fprintf(fplog,"\nWARNING: %s\n\n",warn);
1531 state->enerhist.nsum = *step;
1532 state->enerhist.nsum_sim = *step;
1535 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
1536 if (ret)
1538 cp_error();
1541 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1542 if (ret)
1544 cp_error();
1546 if( gmx_fio_close(fp) != 0)
1548 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of quota?");
1551 sfree(fprog);
1552 sfree(ftime);
1553 sfree(btime);
1554 sfree(buser);
1555 sfree(bmach);
1557 /* If the user wants to append to output files (danger), we use the file pointer
1558 * positions of the output files stored in the checkpoint file and truncate the
1559 * files such that any frames written after the checkpoint time are removed.
1561 * You will get REALLY fun problems if you use the -append option by provide
1562 * mdrun with other input files (in-frame truncation in the wrong places). Suit yourself!
1564 if (bAppendOutputFiles)
1566 if (fn2ftp(outputfiles[0].filename)!=efLOG)
1568 /* make sure first file is log file so that it is OK to use it for
1569 * locking
1571 gmx_fatal(FARGS,"The first output file should always be the log "
1572 "file but instead is: %s", outputfiles[0].filename);
1574 for(i=0;i<nfiles;i++)
1576 if (outputfiles[i].filename,outputfiles[i].offset < 0)
1578 gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
1579 "is larger than 2 GB, but mdrun did not support large file"
1580 " offsets. Can not append. Run mdrun without -append",
1581 outputfiles[i].filename);
1583 #ifdef GMX_FAHCORE
1584 chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
1586 #else
1587 chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
1589 /* lock log file */
1590 if (i==0)
1592 #if !((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
1593 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
1594 ==-1)
1595 #else
1596 if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
1597 #endif
1599 gmx_fatal(FARGS,"Failed to lock: %s. Already running "
1600 "simulation?", outputfiles[i].filename);
1604 /* compute md5 chksum */
1605 if (outputfiles[i].chksum_size != -1)
1607 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
1608 digest) != outputfiles[i].chksum_size)
1610 gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents has been modified.",
1611 outputfiles[i].chksum_size,
1612 outputfiles[i].filename);
1615 else if (i==0) /*log file need to be seeked even when not reading md5*/
1617 gmx_fio_seek(chksum_file,outputfiles[i].offset);
1619 #endif
1621 if (i==0) /*open log file here - so that lock is never lifted
1622 after chksum is calculated */
1624 *pfplog = gmx_fio_getfp(chksum_file);
1626 else
1628 gmx_fio_close(chksum_file);
1630 #ifndef GMX_FAHCORE
1631 /* compare md5 chksum */
1632 if (outputfiles[i].chksum_size != -1 &&
1633 memcmp(digest,outputfiles[i].chksum,16)!=0)
1635 if (debug)
1637 fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
1638 for (j=0; j<16; j++)
1640 fprintf(debug,"%02x",digest[j]);
1642 fprintf(debug,"\n");
1644 gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents has been modified.",
1645 outputfiles[i].filename);
1647 #endif
1650 if (i!=0) /*log file is already seeked to correct position */
1652 #if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
1653 rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
1654 #else
1655 rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
1656 #endif
1657 if(rc!=0)
1659 gmx_fatal(FARGS,"Truncation of file %s failed.",outputfiles[i].filename);
1665 sfree(outputfiles);
1669 void load_checkpoint(const char *fn,FILE **fplog,
1670 t_commrec *cr,bool bPartDecomp,ivec dd_nc,
1671 t_inputrec *ir,t_state *state,
1672 bool *bReadRNG,bool *bReadEkin,bool bAppend)
1674 gmx_large_int_t step;
1675 double t;
1677 if (SIMMASTER(cr)) {
1678 /* Read the state from the checkpoint file */
1679 read_checkpoint(fn,fplog,
1680 cr,bPartDecomp,dd_nc,
1681 ir->eI,&step,&t,state,bReadRNG,bReadEkin,
1682 &ir->simulation_part,bAppend);
1684 if (PAR(cr)) {
1685 gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
1686 gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
1687 gmx_bcast(sizeof(step),&step,cr);
1688 gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
1689 gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
1691 ir->bContinuation = TRUE;
1692 if (ir->nsteps >= 0)
1694 ir->nsteps += ir->init_step - step;
1696 ir->init_step = step;
1697 ir->simulation_part += 1;
1700 static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
1701 gmx_large_int_t *step,double *t,t_state *state,
1702 bool bReadRNG,
1703 int *nfiles,gmx_file_position_t **outputfiles)
1705 int file_version;
1706 char *version,*btime,*buser,*bmach,*fprog,*ftime;
1707 int eIntegrator;
1708 int nppnodes,npme;
1709 ivec dd_nc;
1710 int flags_eks,flags_enh;
1711 int nfiles_loc;
1712 gmx_file_position_t *files_loc=NULL;
1713 int ret;
1715 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1716 &version,&btime,&buser,&bmach,&fprog,&ftime,
1717 &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
1718 &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
1719 &state->flags,&flags_eks,&flags_enh,NULL);
1720 ret =
1721 do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
1722 if (ret)
1724 cp_error();
1726 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1727 flags_eks,&state->ekinstate,NULL);
1728 if (ret)
1730 cp_error();
1732 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1733 flags_enh,&state->enerhist,NULL);
1734 if (ret)
1736 cp_error();
1739 ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
1740 outputfiles != NULL ? outputfiles : &files_loc,
1741 outputfiles != NULL ? nfiles : &nfiles_loc,
1742 NULL,file_version);
1743 if (files_loc != NULL)
1745 sfree(files_loc);
1748 if (ret)
1750 cp_error();
1753 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1754 if (ret)
1756 cp_error();
1759 sfree(fprog);
1760 sfree(ftime);
1761 sfree(btime);
1762 sfree(buser);
1763 sfree(bmach);
1766 void
1767 read_checkpoint_state(const char *fn,int *simulation_part,
1768 gmx_large_int_t *step,double *t,t_state *state)
1770 t_fileio *fp;
1772 fp = gmx_fio_open(fn,"r");
1773 read_checkpoint_data(fp,simulation_part,step,t,state,TRUE,NULL,NULL);
1774 if( gmx_fio_close(fp) != 0)
1776 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of quota?");
1780 void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
1782 t_state state;
1783 int simulation_part;
1784 gmx_large_int_t step;
1785 double t;
1787 init_state(&state,0,0,0,0);
1789 read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
1791 fr->natoms = state.natoms;
1792 fr->bTitle = FALSE;
1793 fr->bStep = TRUE;
1794 fr->step = gmx_large_int_to_int(step,
1795 "conversion of checkpoint to trajectory");
1796 fr->bTime = TRUE;
1797 fr->time = t;
1798 fr->bLambda = TRUE;
1799 fr->lambda = state.lambda;
1800 fr->bAtoms = FALSE;
1801 fr->bX = (state.flags & (1<<estX));
1802 if (fr->bX)
1804 fr->x = state.x;
1805 state.x = NULL;
1807 fr->bV = (state.flags & (1<<estV));
1808 if (fr->bV)
1810 fr->v = state.v;
1811 state.v = NULL;
1813 fr->bF = FALSE;
1814 fr->bBox = (state.flags & (1<<estBOX));
1815 if (fr->bBox)
1817 copy_mat(state.box,fr->box);
1819 done_state(&state);
1822 void list_checkpoint(const char *fn,FILE *out)
1824 t_fileio *fp;
1825 int file_version;
1826 char *version,*btime,*buser,*bmach,*fprog,*ftime;
1827 int eIntegrator,simulation_part,nppnodes,npme;
1828 gmx_large_int_t step;
1829 double t;
1830 ivec dd_nc;
1831 t_state state;
1832 int flags_eks,flags_enh;
1833 int indent;
1834 int i,j;
1835 int ret;
1836 gmx_file_position_t *outputfiles;
1837 int nfiles;
1839 init_state(&state,-1,-1,-1,-1);
1841 fp = gmx_fio_open(fn,"r");
1842 do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
1843 &version,&btime,&buser,&bmach,&fprog,&ftime,
1844 &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
1845 &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
1846 &state.flags,&flags_eks,&flags_enh,out);
1847 ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
1848 if (ret)
1850 cp_error();
1852 ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
1853 flags_eks,&state.ekinstate,out);
1854 if (ret)
1856 cp_error();
1858 ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
1859 flags_enh,&state.enerhist,out);
1861 if (ret == 0)
1863 do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
1866 if (ret == 0)
1868 ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
1871 if (ret)
1873 cp_warning(out);
1875 if( gmx_fio_close(fp) != 0)
1877 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of quota?");
1880 done_state(&state);
1884 /* This routine cannot print tons of data, since it is called before the log file is opened. */
1885 bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
1886 gmx_large_int_t *cpt_step,t_commrec *cr,
1887 bool bAppendReq,
1888 const char *part_suffix,bool *bAddPart)
1890 t_fileio *fp;
1891 gmx_large_int_t step=0;
1892 double t;
1893 t_state state;
1894 int nfiles;
1895 gmx_file_position_t *outputfiles;
1896 int nexist,f;
1897 bool bAppend;
1898 char *fn,suf_up[STRLEN];
1900 bAppend = FALSE;
1902 if (SIMMASTER(cr)) {
1903 if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
1905 *simulation_part = 0;
1907 else
1909 init_state(&state,0,0,0,0);
1911 read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
1912 &nfiles,&outputfiles);
1913 if( gmx_fio_close(fp) != 0)
1915 gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of quota?");
1917 done_state(&state);
1919 if (bAppendReq)
1921 nexist = 0;
1922 for(f=0; f<nfiles; f++)
1924 if (gmx_fexist(outputfiles[f].filename))
1926 nexist++;
1929 if (nexist == nfiles)
1931 bAppend = bAppendReq;
1933 else if (nexist > 0)
1935 fprintf(stderr,"Output files present:");
1936 for(f=0; f<nfiles; f++)
1938 if (gmx_fexist(outputfiles[f].filename))
1940 fprintf(stderr," %s",outputfiles[f].filename);
1943 fprintf(stderr,"\n");
1944 fprintf(stderr,"Output files not present:");
1945 for(f=0; f<nfiles; f++)
1947 if (!gmx_fexist(outputfiles[f].filename))
1949 fprintf(stderr," %s",outputfiles[f].filename);
1952 fprintf(stderr,"\n");
1954 gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
1958 if (bAppend)
1960 if (nfiles == 0)
1962 gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
1964 fn = outputfiles[0].filename;
1965 if (strlen(fn) < 4 ||
1966 strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
1968 gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
1970 /* Set bAddPart to whether the suffix string '.part' is present
1971 * in the log file name.
1973 strcpy(suf_up,part_suffix);
1974 upstring(suf_up);
1975 *bAddPart = (strstr(fn,part_suffix) != NULL ||
1976 strstr(fn,suf_up) != NULL);
1979 sfree(outputfiles);
1982 if (PAR(cr))
1984 gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
1986 if (*simulation_part > 0 && bAppendReq)
1988 gmx_bcast(sizeof(bAppend),&bAppend,cr);
1989 gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
1992 if (NULL != cpt_step)
1994 *cpt_step = step;
1997 return bAppend;