Revised wording in pdb2gmx.c, hopefully clearer now.
[gromacs/rigid-bodies.git] / src / gmxlib / nrnb.c
blobe99331eda7a85280be868ef0f6e13da1141103e1
1 /*
2 *
3 * This source code is part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * GROningen Mixture of Alchemy and Childrens' Stories
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
39 #include <string.h>
40 #include "sysstuff.h"
41 #include "gmx_fatal.h"
42 #include "names.h"
43 #include "macros.h"
44 #include "nrnb.h"
45 #include "main.h"
46 #include "smalloc.h"
47 #include "copyrite.h"
49 typedef struct {
50 const char *name;
51 int flop;
52 } t_nrnb_data;
55 static const t_nrnb_data nbdata[eNRNB] = {
56 { "LJ", 33 }, /* nb_kernel010 */
57 { "Buckingham", 61 }, /* nb_kernel020 */
58 { "VdW(T)", 54 }, /* nb_kernel030 */
59 { "Coulomb", 27 }, /* nb_kernel100 */
60 { "Coulomb [W3]", 80 }, /* nb_kernel101 */
61 { "Coulomb [W3-W3]", 234 }, /* nb_kernel102 */
62 { "Coulomb [W4]", 80 }, /* nb_kernel103 */
63 { "Coulomb [W4-W4]", 234 }, /* nb_kernel104 */
64 { "Coulomb + LJ", 38 }, /* nb_kernel110 */
65 { "Coulomb + LJ [W3]", 91 }, /* nb_kernel111 */
66 { "Coulomb + LJ [W3-W3]", 245 }, /* nb_kernel112 */
67 { "Coulomb + LJ [W4]", 113 }, /* nb_kernel113 */
68 { "Coulomb + LJ [W4-W4]", 267 }, /* nb_kernel114 */
69 { "Coulomb + Bham ", 64 }, /* nb_kernel120 */
70 { "Coulomb + Bham [W3]", 117 }, /* nb_kernel121 */
71 { "Coulomb + Bham [W3-W3]", 271 }, /* nb_kernel122 */
72 { "Coulomb + Bham [W4]", 141 }, /* nb_kernel123 */
73 { "Coulomb + Bham [W4-W4]", 295 }, /* nb_kernel124 */
74 { "Coulomb + VdW(T) ", 59 }, /* nb_kernel130 */
75 { "Coulomb + VdW(T) [W3]", 112 }, /* nb_kernel131 */
76 { "Coulomb + VdW(T) [W3-W3]", 266 }, /* nb_kernel132 */
77 { "Coulomb + VdW(T) [W4]", 134 }, /* nb_kernel133 */
78 { "Coulomb + VdW(T) [W4-W4]", 288 }, /* nb_kernel134 */
79 { "RF Coul", 33 }, /* nb_kernel200 */
80 { "RF Coul [W3]", 98 }, /* nb_kernel201 */
81 { "RF Coul [W3-W3]", 288 }, /* nb_kernel202 */
82 { "RF Coul [W4]", 98 }, /* nb_kernel203 */
83 { "RF Coul [W4-W4]", 288 }, /* nb_kernel204 */
84 { "RF Coul + LJ", 44 }, /* nb_kernel210 */
85 { "RF Coul + LJ [W3]", 109 }, /* nb_kernel211 */
86 { "RF Coul + LJ [W3-W3]", 299 }, /* nb_kernel212 */
87 { "RF Coul + LJ [W4]", 131 }, /* nb_kernel213 */
88 { "RF Coul + LJ [W4-W4]", 321 }, /* nb_kernel214 */
89 { "RF Coul + Bham ", 70 }, /* nb_kernel220 */
90 { "RF Coul + Bham [W3]", 135 }, /* nb_kernel221 */
91 { "RF Coul + Bham [W3-W3]", 325 }, /* nb_kernel222 */
92 { "RF Coul + Bham [W4]", 159 }, /* nb_kernel223 */
93 { "RF Coul + Bham [W4-W4]", 349 }, /* nb_kernel224 */
94 { "RF Coul + VdW(T) ", 65 }, /* nb_kernel230 */
95 { "RF Coul + VdW(T) [W3]", 130 }, /* nb_kernel231 */
96 { "RF Coul + VdW(T) [W3-W3]", 320 }, /* nb_kernel232 */
97 { "RF Coul + VdW(T) [W4]", 152 }, /* nb_kernel233 */
98 { "RF Coul + VdW(T) [W4-W4]", 342 }, /* nb_kernel234 */
99 { "Coul(T)", 42 }, /* nb_kernel300 */
100 { "Coul(T) [W3]", 125 }, /* nb_kernel301 */
101 { "Coul(T) [W3-W3]", 369 }, /* nb_kernel302 */
102 { "Coul(T) [W4]", 125 }, /* nb_kernel303 */
103 { "Coul(T) [W4-W4]", 369 }, /* nb_kernel304 */
104 { "Coul(T) + LJ", 55 }, /* nb_kernel310 */
105 { "Coul(T) + LJ [W3]", 138 }, /* nb_kernel311 */
106 { "Coul(T) + LJ [W3-W3]", 382 }, /* nb_kernel312 */
107 { "Coul(T) + LJ [W4]", 158 }, /* nb_kernel313 */
108 { "Coul(T) + LJ [W4-W4]", 402 }, /* nb_kernel314 */
109 { "Coul(T) + Bham", 81 }, /* nb_kernel320 */
110 { "Coul(T) + Bham [W3]", 164 }, /* nb_kernel321 */
111 { "Coul(T) + Bham [W3-W3]", 408 }, /* nb_kernel322 */
112 { "Coul(T) + Bham [W4]", 186 }, /* nb_kernel323 */
113 { "Coul(T) + Bham [W4-W4]", 430 }, /* nb_kernel324 */
114 { "Coul(T) + VdW(T)", 68 }, /* nb_kernel330 */
115 { "Coul(T) + VdW(T) [W3]", 151 }, /* nb_kernel331 */
116 { "Coul(T) + VdW(T) [W3-W3]", 395 }, /* nb_kernel332 */
117 { "Coul(T) + VdW(T) [W4]", 179 }, /* nb_kernel333 */
118 { "Coul(T) + VdW(T) [W4-W4]", 423 }, /* nb_kernel334 */
119 { "Generalized Born Coulomb", 48 }, /* nb_kernel400 */
120 { "GB Coulomb + LJ", 61 }, /* nb_kernel410 */
121 { "GB Coulomb + VdW(T)", 79 }, /* nb_kernel430 */
122 { "LJ NF", 19 }, /* nb_kernel010nf */
123 { "Buckingham NF", 48 }, /* nb_kernel020nf */
124 { "VdW(T) NF", 33 }, /* nb_kernel030nf */
125 { "Coulomb NF", 16 }, /* nb_kernel100nf */
126 { "Coulomb [W3] NF", 47 }, /* nb_kernel101nf */
127 { "Coulomb [W3-W3] NF", 135 }, /* nb_kernel102nf */
128 { "Coulomb [W4] NF", 47 }, /* nb_kernel103nf */
129 { "Coulomb [W4-W4] NF", 135 }, /* nb_kernel104nf */
130 { "Coulomb + LJ NF", 24 }, /* nb_kernel110nf */
131 { "Coulomb + LJ [W3] NF", 55 }, /* nb_kernel111nf */
132 { "Coulomb + LJ [W3-W3] NF", 143 }, /* nb_kernel112nf */
133 { "Coulomb + LJ [W4] NF", 66 }, /* nb_kernel113nf */
134 { "Coulomb + LJ [W4-W4] NF", 154 }, /* nb_kernel114nf */
135 { "Coulomb + Bham NF", 51 }, /* nb_kernel120nf */
136 { "Coulomb + Bham [W3] NF", 82 }, /* nb_kernel121nf */
137 { "Coulomb + Bham [W3-W3] NF", 170 }, /* nb_kernel122nf */
138 { "Coulomb + Bham [W4] NF", 95 }, /* nb_kernel123nf */
139 { "Coulomb + Bham [W4-W4] NF", 183 }, /* nb_kernel124nf */
140 { "Coulomb + VdW(T) NF", 36 }, /* nb_kernel130nf */
141 { "Coulomb + VdW(T) [W3] NF", 67 }, /* nb_kernel131nf */
142 { "Coulomb + VdW(T) [W3-W3] NF", 155 }, /* nb_kernel132nf */
143 { "Coulomb + VdW(T) [W4] NF", 80 }, /* nb_kernel133nf */
144 { "Coulomb + VdW(T) [W4-W4] NF", 168 }, /* nb_kernel134nf */
145 { "RF Coul NF", 19 }, /* nb_kernel200nf */
146 { "RF Coul [W3] NF", 56 }, /* nb_kernel201nf */
147 { "RF Coul [W3-W3] NF", 162 }, /* nb_kernel202nf */
148 { "RF Coul [W4] NF", 56 }, /* nb_kernel203nf */
149 { "RF Coul [W4-W4] NF", 162 }, /* nb_kernel204nf */
150 { "RF Coul + LJ NF", 27 }, /* nb_kernel210nf */
151 { "RF Coul + LJ [W3] NF", 64 }, /* nb_kernel211nf */
152 { "RF Coul + LJ [W3-W3] NF", 170 }, /* nb_kernel212nf */
153 { "RF Coul + LJ [W4] NF", 75 }, /* nb_kernel213nf */
154 { "RF Coul + LJ [W4-W4] NF", 181 }, /* nb_kernel214nf */
155 { "RF Coul + Bham NF", 54 }, /* nb_kernel220nf */
156 { "RF Coul + Bham [W3] NF", 91 }, /* nb_kernel221nf */
157 { "RF Coul + Bham [W3-W3] NF", 197 }, /* nb_kernel222nf */
158 { "RF Coul + Bham [W4] NF", 104 }, /* nb_kernel223nf */
159 { "RF Coul + Bham [W4-W4] NF", 210 }, /* nb_kernel224nf */
160 { "RF Coul + VdW(T) NF", 39 }, /* nb_kernel230nf */
161 { "RF Coul + VdW(T) [W3] NF", 76 }, /* nb_kernel231nf */
162 { "RF Coul + VdW(T) [W3-W3] NF", 182 }, /* nb_kernel232nf */
163 { "RF Coul + VdW(T) [W4] NF", 89 }, /* nb_kernel233nf */
164 { "RF Coul + VdW(T) [W4-W4] NF", 195 }, /* nb_kernel234nf */
165 { "Coul(T) NF", 26 }, /* nb_kernel300nf */
166 { "Coul(T) [W3] NF", 77 }, /* nb_kernel301nf */
167 { "Coul(T) [W3-W3] NF", 225 }, /* nb_kernel302nf */
168 { "Coul(T) [W4] NF", 77 }, /* nb_kernel303nf */
169 { "Coul(T) [W4-W4] NF", 225 }, /* nb_kernel304nf */
170 { "Coul(T) + LJ NF", 34 }, /* nb_kernel310nf */
171 { "Coul(T) + LJ [W3] NF", 85 }, /* nb_kernel311nf */
172 { "Coul(T) + LJ [W3-W3] NF", 233 }, /* nb_kernel312nf */
173 { "Coul(T) + LJ [W4] NF", 96 }, /* nb_kernel313nf */
174 { "Coul(T) + LJ [W4-W4] NF", 244 }, /* nb_kernel314nf */
175 { "Coul(T) + Bham NF", 61 }, /* nb_kernel320nf */
176 { "Coul(T) + Bham [W3] NF", 112 }, /* nb_kernel321nf */
177 { "Coul(T) + Bham [W3-W3] NF", 260 }, /* nb_kernel322nf */
178 { "Coul(T) + Bham [W4] NF", 125 }, /* nb_kernel323nf */
179 { "Coul(T) + Bham [W4-W4] NF", 273 }, /* nb_kernel324nf */
180 { "Coul(T) + VdW(T) NF", 42 }, /* nb_kernel330nf */
181 { "Coul(T) + VdW(T) [W3] NF", 93 }, /* nb_kernel331nf */
182 { "Coul(T) + VdW(T) [W3-W3] NF", 241 }, /* nb_kernel332nf */
183 { "Coul(T) + VdW(T) [W4] NF", 110 }, /* nb_kernel333nf */
184 { "Coul(T) + VdW(T) [W4-W4] NF", 258 }, /* nb_kernel334nf */
185 { "Generalized Born Coulomb NF", 29 }, /* nb_kernel400nf */
186 { "GB Coulomb + LJ NF", 37 }, /* nb_kernel410nf */
187 { "GB Coulomb + VdW(T) NF", 49 }, /* nb_kernel430nf */
188 { "Free energy innerloop", 150 }, /* free energy, estimate */
189 { "All-vs-All, Coul + LJ", 38 },
190 { "All-vs-All, GB + LJ", 61 },
191 { "Outer nonbonded loop", 10 },
192 { "1,4 nonbonded interactions", 90 },
193 { "Born radii (Still)", 47 },
194 { "Born radii (HCT/OBC)", 183 },
195 { "Born force chain rule", 15 },
196 { "All-vs-All Still radii", 47 },
197 { "All-vs-All HCT/OBC radii", 183 },
198 { "All-vs-All Born chain rule", 15 },
199 { "Calc Weights", 36 },
200 { "Spread Q", 6 },
201 { "Spread Q Bspline", 2 },
202 { "Gather F", 23 },
203 { "Gather F Bspline", 6 },
204 { "3D-FFT", 8 },
205 { "Convolution", 4 },
206 { "Solve PME", 64 },
207 { "NS-Pairs", 21 },
208 { "Reset In Box", 3 },
209 { "Shift-X", 6 },
210 { "CG-CoM", 3 },
211 { "Sum Forces", 1 },
212 { "Bonds", 59 },
213 { "G96Bonds", 44 },
214 { "FENE Bonds", 58 },
215 { "Tab. Bonds", 62 },
216 { "Restraint Potential", 86 },
217 { "Angles", 168 },
218 { "G96Angles", 150 },
219 { "Quartic Angles", 160 },
220 { "Tab. Angles", 169 },
221 { "Propers", 229 },
222 { "Impropers", 208 },
223 { "RB-Dihedrals", 247 },
224 { "Four. Dihedrals", 247 },
225 { "Tab. Dihedrals", 227 },
226 { "Dist. Restr.", 200 },
227 { "Orient. Restr.", 200 },
228 { "Dihedral Restr.", 200 },
229 { "Pos. Restr.", 50 },
230 { "Angle Restr.", 191 },
231 { "Angle Restr. Z", 164 },
232 { "Morse Potent.", 58 },
233 { "Cubic Bonds", 54 },
234 { "Walls", 31 },
235 { "Water Pol.", 62 },
236 { "Thole Pol.", 296 },
237 { "Virial", 18 },
238 { "Update", 31 },
239 { "Ext.ens. Update", 54 },
240 { "Stop-CM", 10 },
241 { "P-Coupling", 6 },
242 { "Calc-Ekin", 27 },
243 { "Lincs", 60 },
244 { "Lincs-Mat", 4 },
245 { "Shake", 30 },
246 { "Constraint-V", 8 },
247 { "Shake-Init", 10 },
248 { "Constraint-Vir", 24 },
249 { "Settle", 323 },
250 { "Virtual Site 2", 23 },
251 { "Virtual Site 3", 37 },
252 { "Virtual Site 3fd", 95 },
253 { "Virtual Site 3fad", 176 },
254 { "Virtual Site 3out", 87 },
255 { "Virtual Site 4fd", 110 },
256 { "Virtual Site 4fdn", 254 },
257 { "Virtual Site N", 15 },
258 { "Mixed Generalized Born stuff", 10 }
262 void init_nrnb(t_nrnb *nrnb)
264 int i;
266 for(i=0; (i<eNRNB); i++)
267 nrnb->n[i]=0.0;
270 void cp_nrnb(t_nrnb *dest, t_nrnb *src)
272 int i;
274 for(i=0; (i<eNRNB); i++)
275 dest->n[i]=src->n[i];
278 void add_nrnb(t_nrnb *dest, t_nrnb *s1, t_nrnb *s2)
280 int i;
282 for(i=0; (i<eNRNB); i++)
283 dest->n[i]=s1->n[i]+s2->n[i];
286 void print_nrnb(FILE *out, t_nrnb *nrnb)
288 int i;
290 for(i=0; (i<eNRNB); i++)
291 if (nrnb->n[i] > 0)
292 fprintf(out," %-26s %10.0f.\n",nbdata[i].name,nrnb->n[i]);
295 void _inc_nrnb(t_nrnb *nrnb,int enr,int inc,char *file,int line)
297 nrnb->n[enr]+=inc;
298 #ifdef DEBUG_NRNB
299 printf("nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
300 nbdata[enr].name,enr,inc,file,line);
301 #endif
304 void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
306 int i;
307 double mni,frac,tfrac,tflop;
308 const char *myline = "-----------------------------------------------------------------------------";
310 *nbfs = 0.0;
311 for(i=0; (i<eNR_NBKERNEL_NR); i++) {
312 if (strstr(nbdata[i].name,"W3-W3") != NULL)
313 *nbfs += 9e-6*nrnb->n[i];
314 else if (strstr(nbdata[i].name,"W3") != NULL)
315 *nbfs += 3e-6*nrnb->n[i];
316 else if (strstr(nbdata[i].name,"W4-W4") != NULL)
317 *nbfs += 10e-6*nrnb->n[i];
318 else if (strstr(nbdata[i].name,"W4") != NULL)
319 *nbfs += 4e-6*nrnb->n[i];
320 else
321 *nbfs += 1e-6*nrnb->n[i];
323 tflop=0;
324 for(i=0; (i<eNRNB); i++)
325 tflop+=1e-6*nrnb->n[i]*nbdata[i].flop;
327 if (tflop == 0) {
328 fprintf(out,"No MEGA Flopsen this time\n");
329 return;
331 if (out) {
332 fprintf(out,"\n\tM E G A - F L O P S A C C O U N T I N G\n\n");
335 if (out) {
336 fprintf(out," RF=Reaction-Field FE=Free Energy SCFE=Soft-Core/Free Energy\n");
337 fprintf(out," T=Tabulated W3=SPC/TIP3p W4=TIP4p (single or pairs)\n");
338 fprintf(out," NF=No Forces\n\n");
340 fprintf(out," %-32s %16s %15s %7s\n",
341 "Computing:","M-Number","M-Flops","% Flops");
342 fprintf(out,"%s\n",myline);
344 *mflop=0.0;
345 tfrac=0.0;
346 for(i=0; (i<eNRNB); i++) {
347 mni = 1e-6*nrnb->n[i];
348 *mflop += mni*nbdata[i].flop;
349 frac = 100.0*mni*nbdata[i].flop/tflop;
350 tfrac += frac;
351 if (out && mni != 0)
352 fprintf(out," %-32s %16.6f %15.3f %6.1f\n",
353 nbdata[i].name,mni,mni*nbdata[i].flop,frac);
355 if (out) {
356 fprintf(out,"%s\n",myline);
357 fprintf(out," %-32s %16s %15.3f %6.1f\n",
358 "Total","",*mflop,tfrac);
359 fprintf(out,"%s\n\n",myline);
363 void print_perf(FILE *out,double nodetime,double realtime,int nprocs,
364 gmx_large_int_t nsteps,real delta_t,
365 double nbfs,double mflop)
367 real runtime;
369 fprintf(out,"\n");
371 if (nodetime == 0.0) {
372 fprintf(out,"nodetime = 0! Infinite Giga flopses!\n");
374 #ifdef GMX_OPENMM
375 nodetime = realtime;
376 fprintf(out,"\tOpenMM run - timing based on wallclock.\n\n");
377 #else
378 if (nprocs > 1)
380 nodetime = realtime;
381 fprintf(out,"\tParallel run - timing based on wallclock.\n\n");
383 #endif
385 if ((nodetime > 0) && (realtime > 0)) {
386 fprintf(out,"%12s %10s %10s %8s\n","","NODE (s)","Real (s)","(%)");
387 fprintf(out,"%12s %10.3f %10.3f %8.1f\n","Time:",
388 nodetime, realtime, 100.0*nodetime/realtime);
389 if (nodetime > 60) {
390 fprintf(out,"%12s %10s","","");
391 pr_difftime(out,nodetime);
393 if (delta_t > 0) {
394 mflop = mflop/nodetime;
395 runtime = nsteps*delta_t;
396 fprintf(out,"%12s %10s %10s %10s %10s\n",
397 "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
398 "(ns/day)","(hour/ns)");
399 fprintf(out,"%12s %10.3f %10.3f %10.3f %10.3f\n","Performance:",
400 nbfs/nodetime,(mflop > 1000) ? (mflop/1000) : mflop,
401 runtime*24*3.6/nodetime,1000*nodetime/(3600*runtime));
402 } else {
403 fprintf(out,"%12s %10s %10s %14s\n",
404 "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
405 "(steps/hour)");
406 fprintf(out,"%12s %10.3f %10.3f %14.1f\n","Performance:",
407 nbfs/nodetime,(mflop > 1000) ? (mflop/1000) : mflop,
408 nsteps*3600.0/nodetime);
413 int cost_nrnb(int enr)
415 return nbdata[enr].flop;
418 const char *nrnb_str(int enr)
420 return nbdata[enr].name;
423 static const int force_index[]={
424 eNR_BONDS, eNR_ANGLES, eNR_PROPER, eNR_IMPROPER,
425 eNR_RB, eNR_DISRES, eNR_ORIRES, eNR_POSRES,
426 eNR_NS, eNR_NBKERNEL_OUTER
428 #define NFORCE_INDEX asize(force_index)
430 static const int constr_index[]={
431 eNR_SHAKE, eNR_SHAKE_RIJ, eNR_SETTLE, eNR_UPDATE, eNR_PCOUPL,
432 eNR_CONSTR_VIR,eNR_CONSTR_V
434 #define NCONSTR_INDEX asize(constr_index)
436 static double pr_av(FILE *log,t_commrec *cr,
437 double fav,double ftot[],const char *title)
439 int i,perc;
440 double dperc,unb;
442 unb=0;
443 if (fav > 0) {
444 fav /= cr->nnodes - cr->npmenodes;
445 fprintf(log,"\n %-26s",title);
446 for(i=0; (i<cr->nnodes); i++) {
447 dperc=(100.0*ftot[i])/fav;
448 unb=max(unb,dperc);
449 perc=dperc;
450 fprintf(log,"%3d ",perc);
452 if (unb > 0) {
453 perc=10000.0/unb;
454 fprintf(log,"%6d%%\n\n",perc);
456 else
457 fprintf(log,"\n\n");
459 return unb;
462 void pr_load(FILE *log,t_commrec *cr,t_nrnb nrnb[])
464 int i,j,perc;
465 double dperc,unb,uf,us;
466 double *ftot,fav;
467 double *stot,sav;
468 t_nrnb *av;
470 snew(av,1);
471 snew(ftot,cr->nnodes);
472 snew(stot,cr->nnodes);
473 init_nrnb(av);
474 for(i=0; (i<cr->nnodes); i++) {
475 add_nrnb(av,av,&(nrnb[i]));
476 /* Cost due to forces */
477 for(j=0; (j<eNR_NBKERNEL_NR); j++)
478 ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
479 for(j=0; (j<NFORCE_INDEX); j++)
480 ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
481 /* Due to shake */
482 for(j=0; (j<NCONSTR_INDEX); j++) {
483 stot[i]+=nrnb[i].n[constr_index[j]]*cost_nrnb(constr_index[j]);
486 for(j=0; (j<eNRNB); j++)
487 av->n[j]=av->n[j]/(double)(cr->nnodes - cr->npmenodes);
489 fprintf(log,"\nDetailed load balancing info in percentage of average\n");
491 fprintf(log," Type NODE:");
492 for(i=0; (i<cr->nnodes); i++)
493 fprintf(log,"%3d ",i);
494 fprintf(log,"Scaling\n");
495 fprintf(log,"---------------------------");
496 for(i=0; (i<cr->nnodes); i++)
497 fprintf(log,"----");
498 fprintf(log,"-------\n");
500 for(j=0; (j<eNRNB); j++) {
501 unb=100.0;
502 if (av->n[j] > 0) {
503 fprintf(log," %-26s",nrnb_str(j));
504 for(i=0; (i<cr->nnodes); i++) {
505 dperc=(100.0*nrnb[i].n[j])/av->n[j];
506 unb=max(unb,dperc);
507 perc=dperc;
508 fprintf(log,"%3d ",perc);
510 if (unb > 0) {
511 perc=10000.0/unb;
512 fprintf(log,"%6d%%\n",perc);
514 else
515 fprintf(log,"\n");
518 fav=sav=0;
519 for(i=0; (i<cr->nnodes); i++) {
520 fav+=ftot[i];
521 sav+=stot[i];
523 uf=pr_av(log,cr,fav,ftot,"Total Force");
524 us=pr_av(log,cr,sav,stot,"Total Constr.");
526 unb=(uf*fav+us*sav)/(fav+sav);
527 if (unb > 0) {
528 unb=10000.0/unb;
529 fprintf(log,"\nTotal Scaling: %.0f%% of max performance\n\n",unb);