1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
42 #include "gmx_wallcycle.h"
43 #include "gmx_cyclecounter.h"
61 typedef struct gmx_wallcycle
64 /* variables for testing/debugging */
69 gmx_cycles_t cycle_prev
;
70 gmx_large_int_t reset_counters
;
72 MPI_Comm mpi_comm_mygroup
;
76 /* Each name should not exceed 19 characters */
77 static const char *wcn
[ewcNR
] =
78 { "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Comm. coord.", "Neighbor search", "Force", "Wait + Comm. F", "PME mesh", "PME mesh", "Wait + Comm. X/F", "Wait + Recv. PME F", "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies", "Test", "Born radii", "Enforced rotation" };
80 bool wallcycle_have_counter(void)
82 return gmx_cycles_have_counter();
85 gmx_wallcycle_t
wallcycle_init(FILE *fplog
,t_commrec
*cr
)
91 if (!wallcycle_have_counter())
98 wc
->wc_barrier
= FALSE
;
104 if (PAR(cr
) && getenv("GMX_CYCLE_BARRIER") != NULL
)
108 fprintf(fplog
,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
110 wc
->wc_barrier
= TRUE
;
111 wc
->mpi_comm_mygroup
= cr
->mpi_comm_mygroup
;
116 if (getenv("GMX_CYCLE_ALL") != NULL
)
118 /*#ifndef GMX_THREADS*/
121 fprintf(fplog
,"\nWill time all the code during the run\n\n");
123 snew(wc
->wcc_all
,ewcNR
*ewcNR
);
125 gmx_fatal(FARGS
, "GMX_CYCLE_ALL is incompatible with threaded code");
129 /* Read variable GMX_RESET_COUNTER from environment */
130 wc
->reset_counters
= -1;
131 if ((env_ptr
=getenv("GMX_RESET_COUNTERS")) != NULL
)
133 sscanf(env_ptr
,gmx_large_int_pfmt
,&wc
->reset_counters
);
139 void wallcycle_destroy(gmx_wallcycle_t wc
)
150 if (wc
->wcc_all
!= NULL
)
157 static void wallcycle_all_start(gmx_wallcycle_t wc
,int ewc
,gmx_cycles_t cycle
)
160 wc
->cycle_prev
= cycle
;
163 static void wallcycle_all_stop(gmx_wallcycle_t wc
,int ewc
,gmx_cycles_t cycle
)
165 wc
->wcc_all
[wc
->ewc_prev
*ewcNR
+ewc
].n
+= 1;
166 wc
->wcc_all
[wc
->ewc_prev
*ewcNR
+ewc
].c
+= cycle
- wc
->cycle_prev
;
169 void wallcycle_start(gmx_wallcycle_t wc
, int ewc
)
181 MPI_Barrier(wc
->mpi_comm_mygroup
);
185 cycle
= gmx_cycles_read();
186 wc
->wcc
[ewc
].start
= cycle
;
187 if (wc
->wcc_all
!= NULL
)
192 wallcycle_all_start(wc
,ewc
,cycle
);
194 else if (wc
->wc_depth
== 3)
196 wallcycle_all_stop(wc
,ewc
,cycle
);
201 double wallcycle_stop(gmx_wallcycle_t wc
, int ewc
)
203 gmx_cycles_t cycle
,last
;
213 MPI_Barrier(wc
->mpi_comm_mygroup
);
217 cycle
= gmx_cycles_read();
218 last
= cycle
- wc
->wcc
[ewc
].start
;
219 wc
->wcc
[ewc
].c
+= last
;
226 wallcycle_all_stop(wc
,ewc
,cycle
);
228 else if (wc
->wc_depth
== 2)
230 wallcycle_all_start(wc
,ewc
,cycle
);
237 void wallcycle_reset_all(gmx_wallcycle_t wc
)
246 for(i
=0; i
<ewcNR
; i
++)
250 wc
->wcc
[i
].start
= 0;
255 void wallcycle_sum(t_commrec
*cr
, gmx_wallcycle_t wc
,double cycles
[])
258 double buf
[ewcNR
],*cyc_all
,*buf_all
;
268 if (wcc
[ewcDDCOMMLOAD
].n
> 0)
270 wcc
[ewcDOMDEC
].c
-= wcc
[ewcDDCOMMLOAD
].c
;
272 if (wcc
[ewcDDCOMMBOUND
].n
> 0)
274 wcc
[ewcDOMDEC
].c
-= wcc
[ewcDDCOMMBOUND
].c
;
276 if (wcc
[ewcPMEMESH
].n
> 0)
278 wcc
[ewcFORCE
].c
-= wcc
[ewcPMEMESH
].c
;
281 if (wcc
[ewcPMEMESH_SEP
].n
> 0)
283 /* This must be a PME only node, calculate the Wait + Comm. time */
284 wcc
[ewcPMEWAITCOMM
].c
= wcc
[ewcRUN
].c
- wcc
[ewcPMEMESH_SEP
].c
;
288 /* Correct the PME mesh only call count */
289 wcc
[ewcPMEMESH_SEP
].n
= wcc
[ewcFORCE
].n
;
290 wcc
[ewcPMEWAITCOMM
].n
= wcc
[ewcFORCE
].n
;
293 /* Store the cycles in a double buffer for summing */
294 for(i
=0; i
<ewcNR
; i
++)
296 cycles
[i
] = (double)wcc
[i
].c
;
299 if (wcc
[ewcUPDATE
].n
> 0)
301 /* Remove the constraint part from the update count */
302 cycles
[ewcUPDATE
] -= cycles
[ewcCONSTR
];
308 MPI_Allreduce(cycles
,buf
,ewcNR
,MPI_DOUBLE
,MPI_SUM
,
310 for(i
=0; i
<ewcNR
; i
++)
314 if (wc
->wcc_all
!= NULL
)
316 snew(cyc_all
,ewcNR
*ewcNR
);
317 snew(buf_all
,ewcNR
*ewcNR
);
318 for(i
=0; i
<ewcNR
*ewcNR
; i
++)
320 cyc_all
[i
] = wc
->wcc_all
[i
].c
;
322 MPI_Allreduce(cyc_all
,buf_all
,ewcNR
*ewcNR
,MPI_DOUBLE
,MPI_SUM
,
324 for(i
=0; i
<ewcNR
*ewcNR
; i
++)
326 wc
->wcc_all
[i
].c
= buf_all
[i
];
335 static void print_cycles(FILE *fplog
, double c2t
, const char *name
, int nnodes
,
336 int n
, gmx_cycles_t c
, gmx_cycles_t tot
)
342 sprintf(num
,"%10d",n
);
345 fprintf(fplog
," %-19s %4d %10s %12.3f %10.1f %5.1f\n",
346 name
,nnodes
,num
,c
*1e-9,c
*c2t
,100*(double)c
/(double)tot
);
350 void wallcycle_print(FILE *fplog
, int nnodes
, int npme
, double realtime
,
351 gmx_wallcycle_t wc
, double cycles
[])
356 const char *myline
= "-----------------------------------------------------------------------";
371 tot
= cycles
[ewcRUN
];
372 /* Conversion factor from cycles to seconds */
375 c2t
= nnodes
*realtime
/tot
;
382 fprintf(fplog
,"\n R E A L C Y C L E A N D T I M E A C C O U N T I N G\n\n");
384 fprintf(fplog
," Computing: Nodes Number G-Cycles Seconds %c\n",'%');
385 fprintf(fplog
,"%s\n",myline
);
387 for(i
=ewcPPDURINGPME
+1; i
<ewcNR
; i
++)
389 print_cycles(fplog
,c2t
,wcn
[i
],
390 (i
==ewcPMEMESH_SEP
|| i
==ewcPMEWAITCOMM
) ? npme
: npp
,
391 wc
->wcc
[i
].n
,cycles
[i
],tot
);
394 if (wc
->wcc_all
!= NULL
)
396 for(i
=0; i
<ewcNR
; i
++)
398 for(j
=0; j
<ewcNR
; j
++)
400 sprintf(buf
,"%-9s",wcn
[i
]);
402 sprintf(buf
+10,"%-9s",wcn
[j
]);
404 print_cycles(fplog
,c2t
,buf
,
405 (i
==ewcPMEMESH_SEP
|| i
==ewcPMEWAITCOMM
) ? npme
: npp
,
406 wc
->wcc_all
[i
*ewcNR
+j
].n
,
407 wc
->wcc_all
[i
*ewcNR
+j
].c
,
409 sum
+= wc
->wcc_all
[i
*ewcNR
+j
].c
;
413 print_cycles(fplog
,c2t
,"Rest",npp
,0,tot
-sum
,tot
);
414 fprintf(fplog
,"%s\n",myline
);
415 print_cycles(fplog
,c2t
,"Total",nnodes
,0,tot
,tot
);
416 fprintf(fplog
,"%s\n",myline
);
418 if (cycles
[ewcMoveE
] > tot
*0.05)
421 "NOTE: %d %% of the run time was spent communicating energies,\n"
422 " you might want to use the -gcom option of mdrun\n",
423 (int)(100*cycles
[ewcMoveE
]/tot
+0.5));
426 fprintf(fplog
,"\n%s\n",buf
);
428 /* Only the sim master calls this function, so always print to stderr */
429 fprintf(stderr
,"\n%s\n",buf
);
433 extern gmx_large_int_t
wcycle_get_reset_counters(gmx_wallcycle_t wc
)
440 return wc
->reset_counters
;
443 extern void wcycle_set_reset_counters(gmx_wallcycle_t wc
, gmx_large_int_t reset_counters
)
448 wc
->reset_counters
= reset_counters
;