Counter resetting also on the PME only nodes
[gromacs/AngularHB.git] / src / mdlib / gmx_wallcycle.c
blobb77c85967368751ec19ae1d8b07fac6e5376ea5b
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
41 #include <string.h>
42 #include "gmx_wallcycle.h"
43 #include "gmx_cyclecounter.h"
44 #include "smalloc.h"
46 #ifdef GMX_LIB_MPI
47 #include <mpi.h>
48 #endif
49 #ifdef GMX_THREAD_MPI
50 #include "thread_mpi.h"
51 #endif
53 typedef struct
55 int n;
56 gmx_cycles_t c;
57 gmx_cycles_t start;
58 gmx_cycles_t last;
59 } wallcc_t;
61 typedef struct gmx_wallcycle
63 wallcc_t *wcc;
64 /* variables for testing/debugging */
65 bool wc_barrier;
66 wallcc_t *wcc_all;
67 int wc_depth;
68 int ewc_prev;
69 gmx_cycles_t cycle_prev;
70 gmx_step_t reset_counters;
71 #ifdef GMX_MPI
72 MPI_Comm mpi_comm_mygroup;
73 #endif
74 } gmx_wallcycle_t_t;
76 /* Each name should not exceed 19 characters */
77 static const char *wcn[ewcNR] =
78 { "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Comm. coord.", "Neighbor search", "Force", "Wait + Comm. F", "PME mesh", "PME mesh", "Wait + Comm. X/F", "Wait + Recv. PME F", "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies", "Test", "Born radii" };
80 bool wallcycle_have_counter(void)
82 return gmx_cycles_have_counter();
85 gmx_wallcycle_t wallcycle_init(FILE *fplog,t_commrec *cr)
87 gmx_wallcycle_t wc;
88 char *env_ptr;
91 if (!wallcycle_have_counter())
93 return NULL;
96 snew(wc,1);
98 wc->wc_barrier = FALSE;
99 wc->wcc_all = NULL;
100 wc->wc_depth = 0;
101 wc->ewc_prev = -1;
103 #ifdef GMX_MPI
104 if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
106 if (fplog)
108 fprintf(fplog,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
110 wc->wc_barrier = TRUE;
111 wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
113 #endif
115 snew(wc->wcc,ewcNR);
116 if (getenv("GMX_CYCLE_ALL") != NULL)
118 if (fplog)
120 fprintf(fplog,"\nWill time all the code during the run\n\n");
122 snew(wc->wcc_all,ewcNR*ewcNR);
125 /* Read variable GMX_RESET_COUNTER from environment */
126 wc->reset_counters = -1;
127 if ((env_ptr=getenv("GMX_RESET_COUNTERS")) != NULL)
129 sscanf(env_ptr,gmx_step_pfmt,&wc->reset_counters);
132 return wc;
135 void wallcycle_destroy(gmx_wallcycle_t wc)
137 if (wc == NULL)
139 return;
142 if (wc->wcc != NULL)
144 sfree(wc->wcc);
146 if (wc->wcc_all != NULL)
148 sfree(wc->wcc_all);
150 sfree(wc);
153 static void wallcycle_all_start(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
155 wc->ewc_prev = ewc;
156 wc->cycle_prev = cycle;
159 static void wallcycle_all_stop(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
161 wc->wcc_all[wc->ewc_prev*ewcNR+ewc].n += 1;
162 wc->wcc_all[wc->ewc_prev*ewcNR+ewc].c += cycle - wc->cycle_prev;
165 void wallcycle_start(gmx_wallcycle_t wc, int ewc)
167 gmx_cycles_t cycle;
169 if (wc == NULL)
171 return;
174 #ifdef GMX_MPI
175 if (wc->wc_barrier)
177 MPI_Barrier(wc->mpi_comm_mygroup);
179 #endif
181 cycle = gmx_cycles_read();
182 wc->wcc[ewc].start = cycle;
183 if (wc->wcc_all != NULL)
185 wc->wc_depth++;
186 if (ewc == ewcRUN)
188 wallcycle_all_start(wc,ewc,cycle);
190 else if (wc->wc_depth == 3)
192 wallcycle_all_stop(wc,ewc,cycle);
197 double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
199 gmx_cycles_t cycle,last;
201 if (wc == NULL)
203 return 0;
206 #ifdef GMX_MPI
207 if (wc->wc_barrier)
209 MPI_Barrier(wc->mpi_comm_mygroup);
211 #endif
213 cycle = gmx_cycles_read();
214 last = cycle - wc->wcc[ewc].start;
215 wc->wcc[ewc].c += last;
216 wc->wcc[ewc].n++;
217 if (wc->wcc_all)
219 wc->wc_depth--;
220 if (ewc == ewcRUN)
222 wallcycle_all_stop(wc,ewc,cycle);
224 else if (wc->wc_depth == 2)
226 wallcycle_all_start(wc,ewc,cycle);
230 return last;
233 void wallcycle_reset_all(gmx_wallcycle_t wc)
235 int i;
237 if (wc == NULL)
239 return;
242 for(i=0; i<ewcNR; i++)
244 wc->wcc[i].n = 0;
245 wc->wcc[i].c = 0;
246 wc->wcc[i].start = 0;
247 wc->wcc[i].last = 0;
251 void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc,double cycles[])
253 wallcc_t *wcc;
254 double buf[ewcNR],*cyc_all,*buf_all;
255 int i;
257 if (wc == NULL)
259 return;
262 wcc = wc->wcc;
264 if (wcc[ewcDDCOMMLOAD].n > 0)
266 wcc[ewcDOMDEC].c -= wcc[ewcDDCOMMLOAD].c;
268 if (wcc[ewcDDCOMMBOUND].n > 0)
270 wcc[ewcDOMDEC].c -= wcc[ewcDDCOMMBOUND].c;
272 if (wcc[ewcPMEMESH].n > 0)
274 wcc[ewcFORCE].c -= wcc[ewcPMEMESH].c;
277 if (wcc[ewcPMEMESH_SEP].n > 0)
279 /* This must be a PME only node, calculate the Wait + Comm. time */
280 wcc[ewcPMEWAITCOMM].c = wcc[ewcRUN].c - wcc[ewcPMEMESH_SEP].c;
282 else
284 /* Correct the PME mesh only call count */
285 wcc[ewcPMEMESH_SEP].n = wcc[ewcFORCE].n;
286 wcc[ewcPMEWAITCOMM].n = wcc[ewcFORCE].n;
289 /* Store the cycles in a double buffer for summing */
290 for(i=0; i<ewcNR; i++)
292 cycles[i] = (double)wcc[i].c;
295 if (wcc[ewcUPDATE].n > 0)
297 /* Remove the constraint part from the update count */
298 cycles[ewcUPDATE] -= cycles[ewcCONSTR];
301 #ifdef GMX_MPI
302 if (cr->nnodes > 1)
304 MPI_Allreduce(cycles,buf,ewcNR,MPI_DOUBLE,MPI_SUM,
305 cr->mpi_comm_mysim);
306 for(i=0; i<ewcNR; i++)
308 cycles[i] = buf[i];
310 if (wc->wcc_all != NULL)
312 snew(cyc_all,ewcNR*ewcNR);
313 snew(buf_all,ewcNR*ewcNR);
314 for(i=0; i<ewcNR*ewcNR; i++)
316 cyc_all[i] = wc->wcc_all[i].c;
318 MPI_Allreduce(cyc_all,buf_all,ewcNR*ewcNR,MPI_DOUBLE,MPI_SUM,
319 cr->mpi_comm_mysim);
320 for(i=0; i<ewcNR*ewcNR; i++)
322 wc->wcc_all[i].c = buf_all[i];
324 sfree(buf_all);
325 sfree(cyc_all);
328 #endif
331 static void print_cycles(FILE *fplog, double c2t, const char *name, int nnodes,
332 int n, gmx_cycles_t c, gmx_cycles_t tot)
334 char num[11];
336 if (c > 0) {
337 if (n > 0)
338 sprintf(num,"%10d",n);
339 else
340 sprintf(num," ");
341 fprintf(fplog," %-19s %4d %10s %12.3f %10.1f %5.1f\n",
342 name,nnodes,num,c*1e-9,c*c2t,100*(double)c/(double)tot);
346 void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
347 gmx_wallcycle_t wc, double cycles[])
349 double c2t,tot,sum;
350 int i,j,npp;
351 char buf[STRLEN];
352 const char *myline = "-----------------------------------------------------------------------";
354 if (wc == NULL)
356 return;
359 if (npme > 0)
361 npp = nnodes - npme;
363 else
365 npp = nnodes;
367 tot = cycles[ewcRUN];
368 /* Conversion factor from cycles to seconds */
369 if (tot > 0)
371 c2t = nnodes*realtime/tot;
373 else
375 c2t = 0;
378 fprintf(fplog,"\n R E A L C Y C L E A N D T I M E A C C O U N T I N G\n\n");
380 fprintf(fplog," Computing: Nodes Number G-Cycles Seconds %c\n",'%');
381 fprintf(fplog,"%s\n",myline);
382 sum = 0;
383 for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
385 print_cycles(fplog,c2t,wcn[i],
386 (i==ewcPMEMESH_SEP || i==ewcPMEWAITCOMM) ? npme : npp,
387 wc->wcc[i].n,cycles[i],tot);
388 sum += cycles[i];
390 if (wc->wcc_all != NULL)
392 for(i=0; i<ewcNR; i++)
394 for(j=0; j<ewcNR; j++)
396 sprintf(buf,"%-9s",wcn[i]);
397 buf[9] = ' ';
398 sprintf(buf+10,"%-9s",wcn[j]);
399 buf[19] = '\0';
400 print_cycles(fplog,c2t,buf,
401 (i==ewcPMEMESH_SEP || i==ewcPMEWAITCOMM) ? npme : npp,
402 wc->wcc_all[i*ewcNR+j].n,
403 wc->wcc_all[i*ewcNR+j].c,
404 tot);
405 sum += wc->wcc_all[i*ewcNR+j].c;
409 print_cycles(fplog,c2t,"Rest",npp,0,tot-sum,tot);
410 fprintf(fplog,"%s\n",myline);
411 print_cycles(fplog,c2t,"Total",nnodes,0,tot,tot);
412 fprintf(fplog,"%s\n",myline);
414 if (cycles[ewcMoveE] > tot*0.05)
416 sprintf(buf,
417 "NOTE: %d %% of the run time was spent communicating energies,\n"
418 " you might want to use the -gcom option of mdrun\n",
419 (int)(100*cycles[ewcMoveE]/tot+0.5));
420 if (fplog)
422 fprintf(fplog,"\n%s\n",buf);
424 /* Only the sim master calls this function, so always print to stderr */
425 fprintf(stderr,"\n%s\n",buf);
429 extern gmx_step_t wcycle_get_reset_counters(gmx_wallcycle_t wc)
431 return wc->reset_counters;
434 extern void wcycle_set_reset_counters(gmx_wallcycle_t wc, gmx_step_t reset_counters)
436 wc->reset_counters = reset_counters;