1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
42 #include "gmx_wallcycle.h"
43 #include "gmx_cyclecounter.h"
50 #include "thread_mpi.h"
61 typedef struct gmx_wallcycle
64 /* variables for testing/debugging */
69 gmx_cycles_t cycle_prev
;
70 gmx_step_t reset_counters
;
72 MPI_Comm mpi_comm_mygroup
;
76 /* Each name should not exceed 19 characters */
77 static const char *wcn
[ewcNR
] =
78 { "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Comm. coord.", "Neighbor search", "Force", "Wait + Comm. F", "PME mesh", "PME mesh", "Wait + Comm. X/F", "Wait + Recv. PME F", "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies", "Test", "Born radii" };
80 bool wallcycle_have_counter(void)
82 return gmx_cycles_have_counter();
85 gmx_wallcycle_t
wallcycle_init(FILE *fplog
,t_commrec
*cr
)
91 if (!wallcycle_have_counter())
98 wc
->wc_barrier
= FALSE
;
104 if (PAR(cr
) && getenv("GMX_CYCLE_BARRIER") != NULL
)
108 fprintf(fplog
,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
110 wc
->wc_barrier
= TRUE
;
111 wc
->mpi_comm_mygroup
= cr
->mpi_comm_mygroup
;
116 if (getenv("GMX_CYCLE_ALL") != NULL
)
120 fprintf(fplog
,"\nWill time all the code during the run\n\n");
122 snew(wc
->wcc_all
,ewcNR
*ewcNR
);
125 /* Read variable GMX_RESET_COUNTER from environment */
126 wc
->reset_counters
= -1;
127 if ((env_ptr
=getenv("GMX_RESET_COUNTERS")) != NULL
)
129 sscanf(env_ptr
,gmx_step_pfmt
,&wc
->reset_counters
);
135 void wallcycle_destroy(gmx_wallcycle_t wc
)
146 if (wc
->wcc_all
!= NULL
)
153 static void wallcycle_all_start(gmx_wallcycle_t wc
,int ewc
,gmx_cycles_t cycle
)
156 wc
->cycle_prev
= cycle
;
159 static void wallcycle_all_stop(gmx_wallcycle_t wc
,int ewc
,gmx_cycles_t cycle
)
161 wc
->wcc_all
[wc
->ewc_prev
*ewcNR
+ewc
].n
+= 1;
162 wc
->wcc_all
[wc
->ewc_prev
*ewcNR
+ewc
].c
+= cycle
- wc
->cycle_prev
;
165 void wallcycle_start(gmx_wallcycle_t wc
, int ewc
)
177 MPI_Barrier(wc
->mpi_comm_mygroup
);
181 cycle
= gmx_cycles_read();
182 wc
->wcc
[ewc
].start
= cycle
;
183 if (wc
->wcc_all
!= NULL
)
188 wallcycle_all_start(wc
,ewc
,cycle
);
190 else if (wc
->wc_depth
== 3)
192 wallcycle_all_stop(wc
,ewc
,cycle
);
197 double wallcycle_stop(gmx_wallcycle_t wc
, int ewc
)
199 gmx_cycles_t cycle
,last
;
209 MPI_Barrier(wc
->mpi_comm_mygroup
);
213 cycle
= gmx_cycles_read();
214 last
= cycle
- wc
->wcc
[ewc
].start
;
215 wc
->wcc
[ewc
].c
+= last
;
222 wallcycle_all_stop(wc
,ewc
,cycle
);
224 else if (wc
->wc_depth
== 2)
226 wallcycle_all_start(wc
,ewc
,cycle
);
233 void wallcycle_reset_all(gmx_wallcycle_t wc
)
242 for(i
=0; i
<ewcNR
; i
++)
246 wc
->wcc
[i
].start
= 0;
251 void wallcycle_sum(t_commrec
*cr
, gmx_wallcycle_t wc
,double cycles
[])
254 double buf
[ewcNR
],*cyc_all
,*buf_all
;
264 if (wcc
[ewcDDCOMMLOAD
].n
> 0)
266 wcc
[ewcDOMDEC
].c
-= wcc
[ewcDDCOMMLOAD
].c
;
268 if (wcc
[ewcDDCOMMBOUND
].n
> 0)
270 wcc
[ewcDOMDEC
].c
-= wcc
[ewcDDCOMMBOUND
].c
;
272 if (wcc
[ewcPMEMESH
].n
> 0)
274 wcc
[ewcFORCE
].c
-= wcc
[ewcPMEMESH
].c
;
277 if (wcc
[ewcPMEMESH_SEP
].n
> 0)
279 /* This must be a PME only node, calculate the Wait + Comm. time */
280 wcc
[ewcPMEWAITCOMM
].c
= wcc
[ewcRUN
].c
- wcc
[ewcPMEMESH_SEP
].c
;
284 /* Correct the PME mesh only call count */
285 wcc
[ewcPMEMESH_SEP
].n
= wcc
[ewcFORCE
].n
;
286 wcc
[ewcPMEWAITCOMM
].n
= wcc
[ewcFORCE
].n
;
289 /* Store the cycles in a double buffer for summing */
290 for(i
=0; i
<ewcNR
; i
++)
292 cycles
[i
] = (double)wcc
[i
].c
;
295 if (wcc
[ewcUPDATE
].n
> 0)
297 /* Remove the constraint part from the update count */
298 cycles
[ewcUPDATE
] -= cycles
[ewcCONSTR
];
304 MPI_Allreduce(cycles
,buf
,ewcNR
,MPI_DOUBLE
,MPI_SUM
,
306 for(i
=0; i
<ewcNR
; i
++)
310 if (wc
->wcc_all
!= NULL
)
312 snew(cyc_all
,ewcNR
*ewcNR
);
313 snew(buf_all
,ewcNR
*ewcNR
);
314 for(i
=0; i
<ewcNR
*ewcNR
; i
++)
316 cyc_all
[i
] = wc
->wcc_all
[i
].c
;
318 MPI_Allreduce(cyc_all
,buf_all
,ewcNR
*ewcNR
,MPI_DOUBLE
,MPI_SUM
,
320 for(i
=0; i
<ewcNR
*ewcNR
; i
++)
322 wc
->wcc_all
[i
].c
= buf_all
[i
];
331 static void print_cycles(FILE *fplog
, double c2t
, const char *name
, int nnodes
,
332 int n
, gmx_cycles_t c
, gmx_cycles_t tot
)
338 sprintf(num
,"%10d",n
);
341 fprintf(fplog
," %-19s %4d %10s %12.3f %10.1f %5.1f\n",
342 name
,nnodes
,num
,c
*1e-9,c
*c2t
,100*(double)c
/(double)tot
);
346 void wallcycle_print(FILE *fplog
, int nnodes
, int npme
, double realtime
,
347 gmx_wallcycle_t wc
, double cycles
[])
352 const char *myline
= "-----------------------------------------------------------------------";
367 tot
= cycles
[ewcRUN
];
368 /* Conversion factor from cycles to seconds */
371 c2t
= nnodes
*realtime
/tot
;
378 fprintf(fplog
,"\n R E A L C Y C L E A N D T I M E A C C O U N T I N G\n\n");
380 fprintf(fplog
," Computing: Nodes Number G-Cycles Seconds %c\n",'%');
381 fprintf(fplog
,"%s\n",myline
);
383 for(i
=ewcPPDURINGPME
+1; i
<ewcNR
; i
++)
385 print_cycles(fplog
,c2t
,wcn
[i
],
386 (i
==ewcPMEMESH_SEP
|| i
==ewcPMEWAITCOMM
) ? npme
: npp
,
387 wc
->wcc
[i
].n
,cycles
[i
],tot
);
390 if (wc
->wcc_all
!= NULL
)
392 for(i
=0; i
<ewcNR
; i
++)
394 for(j
=0; j
<ewcNR
; j
++)
396 sprintf(buf
,"%-9s",wcn
[i
]);
398 sprintf(buf
+10,"%-9s",wcn
[j
]);
400 print_cycles(fplog
,c2t
,buf
,
401 (i
==ewcPMEMESH_SEP
|| i
==ewcPMEWAITCOMM
) ? npme
: npp
,
402 wc
->wcc_all
[i
*ewcNR
+j
].n
,
403 wc
->wcc_all
[i
*ewcNR
+j
].c
,
405 sum
+= wc
->wcc_all
[i
*ewcNR
+j
].c
;
409 print_cycles(fplog
,c2t
,"Rest",npp
,0,tot
-sum
,tot
);
410 fprintf(fplog
,"%s\n",myline
);
411 print_cycles(fplog
,c2t
,"Total",nnodes
,0,tot
,tot
);
412 fprintf(fplog
,"%s\n",myline
);
414 if (cycles
[ewcMoveE
] > tot
*0.05)
417 "NOTE: %d %% of the run time was spent communicating energies,\n"
418 " you might want to use the -gcom option of mdrun\n",
419 (int)(100*cycles
[ewcMoveE
]/tot
+0.5));
422 fprintf(fplog
,"\n%s\n",buf
);
424 /* Only the sim master calls this function, so always print to stderr */
425 fprintf(stderr
,"\n%s\n",buf
);
429 extern gmx_step_t
wcycle_get_reset_counters(gmx_wallcycle_t wc
)
431 return wc
->reset_counters
;
434 extern void wcycle_set_reset_counters(gmx_wallcycle_t wc
, gmx_step_t reset_counters
)
436 wc
->reset_counters
= reset_counters
;