Merge branch 'master' into rotation
[gromacs.git] / src / mdlib / domdec.c
blob0e235b5d3e787f6d20693ded57ea3830fa89b4b2
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
23 #include <stdio.h>
24 #include <time.h>
25 #include <math.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include "typedefs.h"
29 #include "smalloc.h"
30 #include "vec.h"
31 #include "domdec.h"
32 #include "domdec_network.h"
33 #include "nrnb.h"
34 #include "pbc.h"
35 #include "constr.h"
36 #include "mdatoms.h"
37 #include "names.h"
38 #include "pdbio.h"
39 #include "futil.h"
40 #include "force.h"
41 #include "pme.h"
42 #include "pull.h"
43 #include "gmx_wallcycle.h"
44 #include "mdrun.h"
45 #include "nsgrid.h"
46 #include "shellfc.h"
47 #include "mtop_util.h"
48 #include "gmxfio.h"
49 #include "gmx_ga2la.h"
51 #ifdef GMX_LIB_MPI
52 #include <mpi.h>
53 #endif
54 #ifdef GMX_THREADS
55 #include "tmpi.h"
56 #endif
58 #define DDRANK(dd,rank) (rank)
59 #define DDMASTERRANK(dd) (dd->masterrank)
61 typedef struct gmx_domdec_master
63 /* The cell boundaries */
64 real **cell_x;
65 /* The global charge group division */
66 int *ncg; /* Number of home charge groups for each node */
67 int *index; /* Index of nnodes+1 into cg */
68 int *cg; /* Global charge group index */
69 int *nat; /* Number of home atoms for each node. */
70 int *ibuf; /* Buffer for communication */
71 rvec *vbuf; /* Buffer for state scattering and gathering */
72 } gmx_domdec_master_t;
74 typedef struct
76 /* The numbers of charge groups to send and receive for each cell
77 * that requires communication, the last entry contains the total
78 * number of atoms that needs to be communicated.
80 int nsend[DD_MAXIZONE+2];
81 int nrecv[DD_MAXIZONE+2];
82 /* The charge groups to send */
83 int *index;
84 int nalloc;
85 /* The atom range for non-in-place communication */
86 int cell2at0[DD_MAXIZONE];
87 int cell2at1[DD_MAXIZONE];
88 } gmx_domdec_ind_t;
90 typedef struct
92 int np; /* Number of grid pulses in this dimension */
93 int np_dlb; /* For dlb, for use with edlbAUTO */
94 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
95 int np_nalloc;
96 bool bInPlace; /* Can we communicate in place? */
97 } gmx_domdec_comm_dim_t;
99 typedef struct
101 bool *bCellMin; /* Temp. var.: is this cell size at the limit */
102 real *cell_f; /* State var.: cell boundaries, box relative */
103 real *old_cell_f; /* Temp. var.: old cell size */
104 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
105 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
106 real *bound_min; /* Temp. var.: lower limit for cell boundary */
107 real *bound_max; /* Temp. var.: upper limit for cell boundary */
108 bool bLimited; /* State var.: is DLB limited in this dim and row */
109 real *buf_ncd; /* Temp. var. */
110 } gmx_domdec_root_t;
112 #define DD_NLOAD_MAX 9
114 /* Here floats are accurate enough, since these variables
115 * only influence the load balancing, not the actual MD results.
117 typedef struct
119 int nload;
120 float *load;
121 float sum;
122 float max;
123 float sum_m;
124 float cvol_min;
125 float mdf;
126 float pme;
127 int flags;
128 } gmx_domdec_load_t;
130 typedef struct
132 int nsc;
133 int ind_gl;
134 int ind;
135 } gmx_cgsort_t;
137 typedef struct
139 gmx_cgsort_t *sort1,*sort2;
140 int sort_nalloc;
141 gmx_cgsort_t *sort_new;
142 int sort_new_nalloc;
143 int *ibuf;
144 int ibuf_nalloc;
145 } gmx_domdec_sort_t;
147 typedef struct
149 rvec *v;
150 int nalloc;
151 } vec_rvec_t;
153 /* This enum determines the order of the coordinates.
154 * ddnatHOME and ddnatZONE should be first and second,
155 * the others can be ordered as wanted.
157 enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
159 enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
160 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
162 typedef struct
164 int dimind; /* The dimension index */
165 int nslab; /* The number of PME slabs in this dimension */
166 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
167 int *pp_min; /* The minimum pp node location, size nslab */
168 int *pp_max; /* The maximum pp node location,size nslab */
169 int maxshift; /* The maximum shift for coordinate redistribution in PME */
170 } gmx_ddpme_t;
172 typedef struct
174 real min0; /* The minimum bottom of this zone */
175 real max1; /* The maximum top of this zone */
176 real mch0; /* The maximum bottom communicaton height for this zone */
177 real mch1; /* The maximum top communicaton height for this zone */
178 real p1_0; /* The bottom value of the first cell in this zone */
179 real p1_1; /* The top value of the first cell in this zone */
180 } gmx_ddzone_t;
182 typedef struct gmx_domdec_comm
184 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
185 * unless stated otherwise.
188 /* The number of decomposition dimensions for PME, 0: no PME */
189 int npmedecompdim;
190 /* The number of nodes doing PME (PP/PME or only PME) */
191 int npmenodes;
192 int npmenodes_major;
193 /* The communication setup including the PME only nodes */
194 bool bCartesianPP_PME;
195 ivec ntot;
196 int cartpmedim;
197 int *pmenodes; /* size npmenodes */
198 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
199 * but with bCartesianPP_PME */
200 gmx_ddpme_t ddpme[2];
202 /* The DD particle-particle nodes only */
203 bool bCartesianPP;
204 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
206 /* The global charge groups */
207 t_block cgs_gl;
209 /* Should we sort the cgs */
210 int nstSortCG;
211 gmx_domdec_sort_t *sort;
213 /* Are there bonded and multi-body interactions between charge groups? */
214 bool bInterCGBondeds;
215 bool bInterCGMultiBody;
217 /* Data for the optional bonded interaction atom communication range */
218 bool bBondComm;
219 t_blocka *cglink;
220 char *bLocalCG;
222 /* The DLB option */
223 int eDLB;
224 /* Are we actually using DLB? */
225 bool bDynLoadBal;
227 /* Cell sizes for static load balancing, first index cartesian */
228 real **slb_frac;
230 /* The width of the communicated boundaries */
231 real cutoff_mbody;
232 real cutoff;
233 /* The minimum cell size (including triclinic correction) */
234 rvec cellsize_min;
235 /* For dlb, for use with edlbAUTO */
236 rvec cellsize_min_dlb;
237 /* The lower limit for the DD cell size with DLB */
238 real cellsize_limit;
239 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
240 bool bVacDLBNoLimit;
242 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
243 ivec tric_dir;
244 /* box0 and box_size are required with dim's without pbc and -gcom */
245 rvec box0;
246 rvec box_size;
248 /* The cell boundaries */
249 rvec cell_x0;
250 rvec cell_x1;
252 /* The old location of the cell boundaries, to check cg displacements */
253 rvec old_cell_x0;
254 rvec old_cell_x1;
256 /* The communication setup and charge group boundaries for the zones */
257 gmx_domdec_zones_t zones;
259 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
260 * cell boundaries of neighboring cells for dynamic load balancing.
262 gmx_ddzone_t zone_d1[2];
263 gmx_ddzone_t zone_d2[2][2];
265 /* The coordinate/force communication setup and indices */
266 gmx_domdec_comm_dim_t cd[DIM];
267 /* The maximum number of cells to communicate with in one dimension */
268 int maxpulse;
270 /* Which cg distribution is stored on the master node */
271 int master_cg_ddp_count;
273 /* The number of cg's received from the direct neighbors */
274 int zone_ncg1[DD_MAXZONE];
276 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
277 int nat[ddnatNR];
279 /* Communication buffer for general use */
280 int *buf_int;
281 int nalloc_int;
283 /* Communication buffer for general use */
284 vec_rvec_t vbuf;
286 /* Communication buffers only used with multiple grid pulses */
287 int *buf_int2;
288 int nalloc_int2;
289 vec_rvec_t vbuf2;
291 /* Communication buffers for local redistribution */
292 int **cggl_flag;
293 int cggl_flag_nalloc[DIM*2];
294 rvec **cgcm_state;
295 int cgcm_state_nalloc[DIM*2];
297 /* Cell sizes for dynamic load balancing */
298 gmx_domdec_root_t **root;
299 real *cell_f_row;
300 real cell_f0[DIM];
301 real cell_f1[DIM];
302 real cell_f_max0[DIM];
303 real cell_f_min1[DIM];
305 /* Stuff for load communication */
306 bool bRecordLoad;
307 gmx_domdec_load_t *load;
308 #ifdef GMX_MPI
309 MPI_Comm *mpi_comm_load;
310 #endif
311 /* Cycle counters */
312 float cycl[ddCyclNr];
313 int cycl_n[ddCyclNr];
314 float cycl_max[ddCyclNr];
315 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
316 int eFlop;
317 double flop;
318 int flop_n;
319 /* Have often have did we have load measurements */
320 int n_load_have;
321 /* Have often have we collected the load measurements */
322 int n_load_collect;
324 /* Statistics */
325 double sum_nat[ddnatNR-ddnatZONE];
326 int ndecomp;
327 int nload;
328 double load_step;
329 double load_sum;
330 double load_max;
331 ivec load_lim;
332 double load_mdf;
333 double load_pme;
335 /* The last partition step */
336 gmx_step_t partition_step;
338 /* Debugging */
339 int nstDDDump;
340 int nstDDDumpGrid;
341 int DD_debug;
342 } gmx_domdec_comm_t;
344 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
345 #define DD_CGIBS 2
347 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
348 #define DD_FLAG_NRCG 65535
349 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
350 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
352 /* Zone permutation required to obtain consecutive charge groups
353 * for neighbor searching.
355 static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
357 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
358 * components see only j zones with that component 0.
361 /* The DD zone order */
362 static const ivec dd_zo[DD_MAXZONE] =
363 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
365 /* The 3D setup */
366 #define dd_z3n 8
367 #define dd_zp3n 4
368 static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
370 /* The 2D setup */
371 #define dd_z2n 4
372 #define dd_zp2n 2
373 static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
375 /* The 1D setup */
376 #define dd_z1n 2
377 #define dd_zp1n 1
378 static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
380 /* Factors used to avoid problems due to rounding issues */
381 #define DD_CELL_MARGIN 1.0001
382 #define DD_CELL_MARGIN2 1.00005
383 /* Factor to account for pressure scaling during nstlist steps */
384 #define DD_PRES_SCALE_MARGIN 1.02
386 /* Allowed performance loss before we DLB or warn */
387 #define DD_PERF_LOSS 0.05
389 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
391 /* Use separate MPI send and receive commands
392 * when nnodes <= GMX_DD_NNODES_SENDRECV.
393 * This saves memory (and some copying for small nnodes).
394 * For high parallelization scatter and gather calls are used.
396 #define GMX_DD_NNODES_SENDRECV 4
400 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
402 static void index2xyz(ivec nc,int ind,ivec xyz)
404 xyz[XX] = ind % nc[XX];
405 xyz[YY] = (ind / nc[XX]) % nc[YY];
406 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
410 /* This order is required to minimize the coordinate communication in PME
411 * which uses decomposition in the x direction.
413 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
415 static void ddindex2xyz(ivec nc,int ind,ivec xyz)
417 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
418 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
419 xyz[ZZ] = ind % nc[ZZ];
422 static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
424 int ddindex;
425 int ddnodeid=-1;
427 ddindex = dd_index(dd->nc,c);
428 if (dd->comm->bCartesianPP_PME)
430 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
432 else if (dd->comm->bCartesianPP)
434 #ifdef GMX_MPI
435 MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
436 #endif
438 else
440 ddnodeid = ddindex;
443 return ddnodeid;
446 static bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
448 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
451 int ddglatnr(gmx_domdec_t *dd,int i)
453 int atnr;
455 if (dd == NULL)
457 atnr = i + 1;
459 else
461 if (i >= dd->comm->nat[ddnatNR-1])
463 gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
465 atnr = dd->gatindex[i] + 1;
468 return atnr;
471 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
473 return &dd->comm->cgs_gl;
476 static void check_vec_rvec_alloc(vec_rvec_t *v,int n)
478 if (n > v->nalloc)
480 v->nalloc = over_alloc_dd(n);
481 srenew(v->v,v->nalloc);
485 void dd_store_state(gmx_domdec_t *dd,t_state *state)
487 int i;
489 if (state->ddp_count != dd->ddp_count)
491 gmx_incons("The state does not the domain decomposition state");
494 state->ncg_gl = dd->ncg_home;
495 if (state->ncg_gl > state->cg_gl_nalloc)
497 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
498 srenew(state->cg_gl,state->cg_gl_nalloc);
500 for(i=0; i<state->ncg_gl; i++)
502 state->cg_gl[i] = dd->index_gl[i];
505 state->ddp_count_cg_gl = dd->ddp_count;
508 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
510 return &dd->comm->zones;
513 void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
514 int *jcg0,int *jcg1,ivec shift0,ivec shift1)
516 gmx_domdec_zones_t *zones;
517 int izone,d,dim;
519 zones = &dd->comm->zones;
521 izone = 0;
522 while (icg >= zones->izone[izone].cg1)
524 izone++;
527 if (izone == 0)
529 *jcg0 = icg;
531 else if (izone < zones->nizone)
533 *jcg0 = zones->izone[izone].jcg0;
535 else
537 gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
538 icg,izone,zones->nizone);
541 *jcg1 = zones->izone[izone].jcg1;
543 for(d=0; d<dd->ndim; d++)
545 dim = dd->dim[d];
546 shift0[dim] = zones->izone[izone].shift0[dim];
547 shift1[dim] = zones->izone[izone].shift1[dim];
548 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
550 /* A conservative approach, this can be optimized */
551 shift0[dim] -= 1;
552 shift1[dim] += 1;
557 int dd_natoms_vsite(gmx_domdec_t *dd)
559 return dd->comm->nat[ddnatVSITE];
562 void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
564 *at_start = dd->comm->nat[ddnatCON-1];
565 *at_end = dd->comm->nat[ddnatCON];
568 void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
570 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
571 int *index,*cgindex;
572 gmx_domdec_comm_t *comm;
573 gmx_domdec_comm_dim_t *cd;
574 gmx_domdec_ind_t *ind;
575 rvec shift={0,0,0},*buf,*rbuf;
576 bool bPBC,bScrew;
578 comm = dd->comm;
580 cgindex = dd->cgindex;
582 buf = comm->vbuf.v;
584 nzone = 1;
585 nat_tot = dd->nat_home;
586 for(d=0; d<dd->ndim; d++)
588 bPBC = (dd->ci[dd->dim[d]] == 0);
589 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
590 if (bPBC)
592 copy_rvec(box[dd->dim[d]],shift);
594 cd = &comm->cd[d];
595 for(p=0; p<cd->np; p++)
597 ind = &cd->ind[p];
598 index = ind->index;
599 n = 0;
600 if (!bPBC)
602 for(i=0; i<ind->nsend[nzone]; i++)
604 at0 = cgindex[index[i]];
605 at1 = cgindex[index[i]+1];
606 for(j=at0; j<at1; j++)
608 copy_rvec(x[j],buf[n]);
609 n++;
613 else if (!bScrew)
615 for(i=0; i<ind->nsend[nzone]; i++)
617 at0 = cgindex[index[i]];
618 at1 = cgindex[index[i]+1];
619 for(j=at0; j<at1; j++)
621 /* We need to shift the coordinates */
622 rvec_add(x[j],shift,buf[n]);
623 n++;
627 else
629 for(i=0; i<ind->nsend[nzone]; i++)
631 at0 = cgindex[index[i]];
632 at1 = cgindex[index[i]+1];
633 for(j=at0; j<at1; j++)
635 /* Shift x */
636 buf[n][XX] = x[j][XX] + shift[XX];
637 /* Rotate y and z.
638 * This operation requires a special shift force
639 * treatment, which is performed in calc_vir.
641 buf[n][YY] = box[YY][YY] - x[j][YY];
642 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
643 n++;
648 if (cd->bInPlace)
650 rbuf = x + nat_tot;
652 else
654 rbuf = comm->vbuf2.v;
656 /* Send and receive the coordinates */
657 dd_sendrecv_rvec(dd, d, dddirBackward,
658 buf, ind->nsend[nzone+1],
659 rbuf, ind->nrecv[nzone+1]);
660 if (!cd->bInPlace)
662 j = 0;
663 for(zone=0; zone<nzone; zone++)
665 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
667 copy_rvec(rbuf[j],x[i]);
668 j++;
672 nat_tot += ind->nrecv[nzone+1];
674 nzone += nzone;
678 void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
680 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
681 int *index,*cgindex;
682 gmx_domdec_comm_t *comm;
683 gmx_domdec_comm_dim_t *cd;
684 gmx_domdec_ind_t *ind;
685 rvec *buf,*sbuf;
686 ivec vis;
687 int is;
688 bool bPBC,bScrew;
690 comm = dd->comm;
692 cgindex = dd->cgindex;
694 buf = comm->vbuf.v;
696 n = 0;
697 nzone = comm->zones.n/2;
698 nat_tot = dd->nat_tot;
699 for(d=dd->ndim-1; d>=0; d--)
701 bPBC = (dd->ci[dd->dim[d]] == 0);
702 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
703 if (fshift == NULL && !bScrew)
705 bPBC = FALSE;
707 /* Determine which shift vector we need */
708 clear_ivec(vis);
709 vis[dd->dim[d]] = 1;
710 is = IVEC2IS(vis);
712 cd = &comm->cd[d];
713 for(p=cd->np-1; p>=0; p--) {
714 ind = &cd->ind[p];
715 nat_tot -= ind->nrecv[nzone+1];
716 if (cd->bInPlace)
718 sbuf = f + nat_tot;
720 else
722 sbuf = comm->vbuf2.v;
723 j = 0;
724 for(zone=0; zone<nzone; zone++)
726 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
728 copy_rvec(f[i],sbuf[j]);
729 j++;
733 /* Communicate the forces */
734 dd_sendrecv_rvec(dd, d, dddirForward,
735 sbuf, ind->nrecv[nzone+1],
736 buf, ind->nsend[nzone+1]);
737 index = ind->index;
738 /* Add the received forces */
739 n = 0;
740 if (!bPBC)
742 for(i=0; i<ind->nsend[nzone]; i++)
744 at0 = cgindex[index[i]];
745 at1 = cgindex[index[i]+1];
746 for(j=at0; j<at1; j++)
748 rvec_inc(f[j],buf[n]);
749 n++;
753 else if (!bScrew)
755 for(i=0; i<ind->nsend[nzone]; i++)
757 at0 = cgindex[index[i]];
758 at1 = cgindex[index[i]+1];
759 for(j=at0; j<at1; j++)
761 rvec_inc(f[j],buf[n]);
762 /* Add this force to the shift force */
763 rvec_inc(fshift[is],buf[n]);
764 n++;
768 else
770 for(i=0; i<ind->nsend[nzone]; i++)
772 at0 = cgindex[index[i]];
773 at1 = cgindex[index[i]+1];
774 for(j=at0; j<at1; j++)
776 /* Rotate the force */
777 f[j][XX] += buf[n][XX];
778 f[j][YY] -= buf[n][YY];
779 f[j][ZZ] -= buf[n][ZZ];
780 if (fshift)
782 /* Add this force to the shift force */
783 rvec_inc(fshift[is],buf[n]);
785 n++;
790 nzone /= 2;
794 void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
796 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
797 int *index,*cgindex;
798 gmx_domdec_comm_t *comm;
799 gmx_domdec_comm_dim_t *cd;
800 gmx_domdec_ind_t *ind;
801 real *buf,*rbuf;
803 comm = dd->comm;
805 cgindex = dd->cgindex;
807 buf = &comm->vbuf.v[0][0];
809 nzone = 1;
810 nat_tot = dd->nat_home;
811 for(d=0; d<dd->ndim; d++)
813 cd = &comm->cd[d];
814 for(p=0; p<cd->np; p++)
816 ind = &cd->ind[p];
817 index = ind->index;
818 n = 0;
819 for(i=0; i<ind->nsend[nzone]; i++)
821 at0 = cgindex[index[i]];
822 at1 = cgindex[index[i]+1];
823 for(j=at0; j<at1; j++)
825 buf[n] = v[j];
826 n++;
830 if (cd->bInPlace)
832 rbuf = v + nat_tot;
834 else
836 rbuf = &comm->vbuf2.v[0][0];
838 /* Send and receive the coordinates */
839 dd_sendrecv_real(dd, d, dddirBackward,
840 buf, ind->nsend[nzone+1],
841 rbuf, ind->nrecv[nzone+1]);
842 if (!cd->bInPlace)
844 j = 0;
845 for(zone=0; zone<nzone; zone++)
847 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
849 v[i] = rbuf[j];
850 j++;
854 nat_tot += ind->nrecv[nzone+1];
856 nzone += nzone;
860 void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
862 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
863 int *index,*cgindex;
864 gmx_domdec_comm_t *comm;
865 gmx_domdec_comm_dim_t *cd;
866 gmx_domdec_ind_t *ind;
867 real *buf,*sbuf;
869 comm = dd->comm;
871 cgindex = dd->cgindex;
873 buf = &comm->vbuf.v[0][0];
875 n = 0;
876 nzone = comm->zones.n/2;
877 nat_tot = dd->nat_tot;
878 for(d=dd->ndim-1; d>=0; d--)
880 cd = &comm->cd[d];
881 for(p=cd->np-1; p>=0; p--) {
882 ind = &cd->ind[p];
883 nat_tot -= ind->nrecv[nzone+1];
884 if (cd->bInPlace)
886 sbuf = v + nat_tot;
888 else
890 sbuf = &comm->vbuf2.v[0][0];
891 j = 0;
892 for(zone=0; zone<nzone; zone++)
894 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
896 sbuf[j] = v[i];
897 j++;
901 /* Communicate the forces */
902 dd_sendrecv_real(dd, d, dddirForward,
903 sbuf, ind->nrecv[nzone+1],
904 buf, ind->nsend[nzone+1]);
905 index = ind->index;
906 /* Add the received forces */
907 n = 0;
908 for(i=0; i<ind->nsend[nzone]; i++)
910 at0 = cgindex[index[i]];
911 at1 = cgindex[index[i]+1];
912 for(j=at0; j<at1; j++)
914 v[j] += buf[n];
915 n++;
919 nzone /= 2;
923 static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
925 fprintf(fp,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
926 d,i,j,
927 zone->min0,zone->max1,
928 zone->mch0,zone->mch0,
929 zone->p1_0,zone->p1_1);
932 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
933 int ddimind,int direction,
934 gmx_ddzone_t *buf_s,int n_s,
935 gmx_ddzone_t *buf_r,int n_r)
937 rvec vbuf_s[5*2],vbuf_r[5*2];
938 int i;
940 for(i=0; i<n_s; i++)
942 vbuf_s[i*2 ][0] = buf_s[i].min0;
943 vbuf_s[i*2 ][1] = buf_s[i].max1;
944 vbuf_s[i*2 ][2] = buf_s[i].mch0;
945 vbuf_s[i*2+1][0] = buf_s[i].mch1;
946 vbuf_s[i*2+1][1] = buf_s[i].p1_0;
947 vbuf_s[i*2+1][2] = buf_s[i].p1_1;
950 dd_sendrecv_rvec(dd, ddimind, direction,
951 vbuf_s, n_s*2,
952 vbuf_r, n_r*2);
954 for(i=0; i<n_r; i++)
956 buf_r[i].min0 = vbuf_r[i*2 ][0];
957 buf_r[i].max1 = vbuf_r[i*2 ][1];
958 buf_r[i].mch0 = vbuf_r[i*2 ][2];
959 buf_r[i].mch1 = vbuf_r[i*2+1][0];
960 buf_r[i].p1_0 = vbuf_r[i*2+1][1];
961 buf_r[i].p1_1 = vbuf_r[i*2+1][2];
965 static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
966 rvec cell_ns_x0,rvec cell_ns_x1)
968 int d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
969 gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
970 rvec extr_s[2],extr_r[2];
971 rvec dh;
972 real dist_d,c=0,det;
973 gmx_domdec_comm_t *comm;
974 bool bPBC,bUse;
976 comm = dd->comm;
978 for(d=1; d<dd->ndim; d++)
980 dim = dd->dim[d];
981 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
982 zp->min0 = cell_ns_x0[dim];
983 zp->max1 = cell_ns_x1[dim];
984 zp->mch0 = cell_ns_x0[dim];
985 zp->mch1 = cell_ns_x1[dim];
986 zp->p1_0 = cell_ns_x0[dim];
987 zp->p1_1 = cell_ns_x1[dim];
990 for(d=dd->ndim-2; d>=0; d--)
992 dim = dd->dim[d];
993 bPBC = (dim < ddbox->npbcdim);
995 /* Use an rvec to store two reals */
996 extr_s[d][0] = comm->cell_f0[d+1];
997 extr_s[d][1] = comm->cell_f1[d+1];
998 extr_s[d][2] = 0;
1000 pos = 0;
1001 /* Store the extremes in the backward sending buffer,
1002 * so the get updated separately from the forward communication.
1004 for(d1=d; d1<dd->ndim-1; d1++)
1006 /* We invert the order to be able to use the same loop for buf_e */
1007 buf_s[pos].min0 = extr_s[d1][1];
1008 buf_s[pos].max1 = extr_s[d1][0];
1009 buf_s[pos].mch0 = 0;
1010 buf_s[pos].mch1 = 0;
1011 /* Store the cell corner of the dimension we communicate along */
1012 buf_s[pos].p1_0 = comm->cell_x0[dim];
1013 buf_s[pos].p1_1 = 0;
1014 pos++;
1017 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1018 pos++;
1020 if (dd->ndim == 3 && d == 0)
1022 buf_s[pos] = comm->zone_d2[0][1];
1023 pos++;
1024 buf_s[pos] = comm->zone_d1[0];
1025 pos++;
1028 /* We only need to communicate the extremes
1029 * in the forward direction
1031 npulse = comm->cd[d].np;
1032 if (bPBC)
1034 /* Take the minimum to avoid double communication */
1035 npulse_min = min(npulse,dd->nc[dim]-1-npulse);
1037 else
1039 /* Without PBC we should really not communicate over
1040 * the boundaries, but implementing that complicates
1041 * the communication setup and therefore we simply
1042 * do all communication, but ignore some data.
1044 npulse_min = npulse;
1046 for(p=0; p<npulse_min; p++)
1048 /* Communicate the extremes forward */
1049 bUse = (bPBC || dd->ci[dim] > 0);
1051 dd_sendrecv_rvec(dd, d, dddirForward,
1052 extr_s+d, dd->ndim-d-1,
1053 extr_r+d, dd->ndim-d-1);
1055 if (bUse)
1057 for(d1=d; d1<dd->ndim-1; d1++)
1059 extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
1060 extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
1065 buf_size = pos;
1066 for(p=0; p<npulse; p++)
1068 /* Communicate all the zone information backward */
1069 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1071 dd_sendrecv_ddzone(dd, d, dddirBackward,
1072 buf_s, buf_size,
1073 buf_r, buf_size);
1075 clear_rvec(dh);
1076 if (p > 0)
1078 for(d1=d+1; d1<dd->ndim; d1++)
1080 /* Determine the decrease of maximum required
1081 * communication height along d1 due to the distance along d,
1082 * this avoids a lot of useless atom communication.
1084 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1086 if (ddbox->tric_dir[dim])
1088 /* c is the off-diagonal coupling between the cell planes
1089 * along directions d and d1.
1091 c = ddbox->v[dim][dd->dim[d1]][dim];
1093 else
1095 c = 0;
1097 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1098 if (det > 0)
1100 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1102 else
1104 /* A negative value signals out of range */
1105 dh[d1] = -1;
1110 /* Accumulate the extremes over all pulses */
1111 for(i=0; i<buf_size; i++)
1113 if (p == 0)
1115 buf_e[i] = buf_r[i];
1117 else
1119 if (bUse)
1121 buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
1122 buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
1125 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1127 d1 = 1;
1129 else
1131 d1 = d + 1;
1133 if (bUse && dh[d1] >= 0)
1135 buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
1136 buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
1139 /* Copy the received buffer to the send buffer,
1140 * to pass the data through with the next pulse.
1142 buf_s[i] = buf_r[i];
1144 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1145 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1147 /* Store the extremes */
1148 pos = 0;
1150 for(d1=d; d1<dd->ndim-1; d1++)
1152 extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
1153 extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
1154 pos++;
1157 if (d == 1 || (d == 0 && dd->ndim == 3))
1159 for(i=d; i<2; i++)
1161 comm->zone_d2[1-d][i] = buf_e[pos];
1162 pos++;
1165 if (d == 0)
1167 comm->zone_d1[1] = buf_e[pos];
1168 pos++;
1174 if (dd->ndim >= 2)
1176 dim = dd->dim[1];
1177 for(i=0; i<2; i++)
1179 if (debug)
1181 print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
1183 cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
1184 cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
1187 if (dd->ndim >= 3)
1189 dim = dd->dim[2];
1190 for(i=0; i<2; i++)
1192 for(j=0; j<2; j++)
1194 if (debug)
1196 print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
1198 cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
1199 cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
1203 for(d=1; d<dd->ndim; d++)
1205 comm->cell_f_max0[d] = extr_s[d-1][0];
1206 comm->cell_f_min1[d] = extr_s[d-1][1];
1207 if (debug)
1209 fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
1210 d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
1215 static void dd_collect_cg(gmx_domdec_t *dd,
1216 t_state *state_local)
1218 gmx_domdec_master_t *ma=NULL;
1219 int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
1220 t_block *cgs_gl;
1222 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1224 /* The master has the correct distribution */
1225 return;
1228 if (state_local->ddp_count == dd->ddp_count)
1230 ncg_home = dd->ncg_home;
1231 cg = dd->index_gl;
1232 nat_home = dd->nat_home;
1234 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1236 cgs_gl = &dd->comm->cgs_gl;
1238 ncg_home = state_local->ncg_gl;
1239 cg = state_local->cg_gl;
1240 nat_home = 0;
1241 for(i=0; i<ncg_home; i++)
1243 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1246 else
1248 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1251 buf2[0] = dd->ncg_home;
1252 buf2[1] = dd->nat_home;
1253 if (DDMASTER(dd))
1255 ma = dd->ma;
1256 ibuf = ma->ibuf;
1258 else
1260 ibuf = NULL;
1262 /* Collect the charge group and atom counts on the master */
1263 dd_gather(dd,2*sizeof(int),buf2,ibuf);
1265 if (DDMASTER(dd))
1267 ma->index[0] = 0;
1268 for(i=0; i<dd->nnodes; i++)
1270 ma->ncg[i] = ma->ibuf[2*i];
1271 ma->nat[i] = ma->ibuf[2*i+1];
1272 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1275 /* Make byte counts and indices */
1276 for(i=0; i<dd->nnodes; i++)
1278 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1279 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1281 if (debug)
1283 fprintf(debug,"Initial charge group distribution: ");
1284 for(i=0; i<dd->nnodes; i++)
1285 fprintf(debug," %d",ma->ncg[i]);
1286 fprintf(debug,"\n");
1290 /* Collect the charge group indices on the master */
1291 dd_gatherv(dd,
1292 dd->ncg_home*sizeof(int),dd->index_gl,
1293 DDMASTER(dd) ? ma->ibuf : NULL,
1294 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1295 DDMASTER(dd) ? ma->cg : NULL);
1297 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1300 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1301 rvec *lv,rvec *v)
1303 gmx_domdec_master_t *ma;
1304 int n,i,c,a,nalloc=0;
1305 rvec *buf=NULL;
1306 t_block *cgs_gl;
1308 ma = dd->ma;
1310 if (!DDMASTER(dd))
1312 #ifdef GMX_MPI
1313 MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
1314 dd->rank,dd->mpi_comm_all);
1315 #endif
1316 } else {
1317 /* Copy the master coordinates to the global array */
1318 cgs_gl = &dd->comm->cgs_gl;
1320 n = DDMASTERRANK(dd);
1321 a = 0;
1322 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1324 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1326 copy_rvec(lv[a++],v[c]);
1330 for(n=0; n<dd->nnodes; n++)
1332 if (n != dd->rank)
1334 if (ma->nat[n] > nalloc)
1336 nalloc = over_alloc_dd(ma->nat[n]);
1337 srenew(buf,nalloc);
1339 #ifdef GMX_MPI
1340 MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
1341 n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
1342 #endif
1343 a = 0;
1344 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1346 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1348 copy_rvec(buf[a++],v[c]);
1353 sfree(buf);
1357 static void get_commbuffer_counts(gmx_domdec_t *dd,
1358 int **counts,int **disps)
1360 gmx_domdec_master_t *ma;
1361 int n;
1363 ma = dd->ma;
1365 /* Make the rvec count and displacment arrays */
1366 *counts = ma->ibuf;
1367 *disps = ma->ibuf + dd->nnodes;
1368 for(n=0; n<dd->nnodes; n++)
1370 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1371 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1375 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1376 rvec *lv,rvec *v)
1378 gmx_domdec_master_t *ma;
1379 int *rcounts=NULL,*disps=NULL;
1380 int n,i,c,a;
1381 rvec *buf=NULL;
1382 t_block *cgs_gl;
1384 ma = dd->ma;
1386 if (DDMASTER(dd))
1388 get_commbuffer_counts(dd,&rcounts,&disps);
1390 buf = ma->vbuf;
1393 dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
1395 if (DDMASTER(dd))
1397 cgs_gl = &dd->comm->cgs_gl;
1399 a = 0;
1400 for(n=0; n<dd->nnodes; n++)
1402 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1404 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1406 copy_rvec(buf[a++],v[c]);
1413 void dd_collect_vec(gmx_domdec_t *dd,
1414 t_state *state_local,rvec *lv,rvec *v)
1416 gmx_domdec_master_t *ma;
1417 int n,i,c,a,nalloc=0;
1418 rvec *buf=NULL;
1420 dd_collect_cg(dd,state_local);
1422 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1424 dd_collect_vec_sendrecv(dd,lv,v);
1426 else
1428 dd_collect_vec_gatherv(dd,lv,v);
1433 void dd_collect_state(gmx_domdec_t *dd,
1434 t_state *state_local,t_state *state)
1436 int est,i;
1438 if (DDMASTER(dd))
1440 state->lambda = state_local->lambda;
1441 copy_mat(state_local->box,state->box);
1442 copy_mat(state_local->boxv,state->boxv);
1443 copy_mat(state_local->pres_prev,state->pres_prev);
1444 for(i=0; i<state_local->ngtc; i++)
1446 state->nosehoover_xi[i] = state_local->nosehoover_xi[i];
1447 state->therm_integral[i] = state_local->therm_integral[i];
1450 for(est=estX; est<estNR; est++)
1452 if (state_local->flags & (1<<est))
1454 switch (est) {
1455 case estX:
1456 dd_collect_vec(dd,state_local,state_local->x,state->x);
1457 break;
1458 case estV:
1459 dd_collect_vec(dd,state_local,state_local->v,state->v);
1460 break;
1461 case estSDX:
1462 dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
1463 break;
1464 case estCGP:
1465 dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
1466 break;
1467 case estLD_RNG:
1468 if (state->nrngi == 1)
1470 if (DDMASTER(dd))
1472 for(i=0; i<state_local->nrng; i++)
1474 state->ld_rng[i] = state_local->ld_rng[i];
1478 else
1480 dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
1481 state_local->ld_rng,state->ld_rng);
1483 break;
1484 case estLD_RNGI:
1485 if (state->nrngi == 1)
1487 if (DDMASTER(dd))
1489 state->ld_rngi[0] = state_local->ld_rngi[0];
1492 else
1494 dd_gather(dd,sizeof(state->ld_rngi[0]),
1495 state_local->ld_rngi,state->ld_rngi);
1497 break;
1498 case estDISRE_INITF:
1499 case estDISRE_RM3TAV:
1500 case estORIRE_INITF:
1501 case estORIRE_DTAV:
1502 break;
1503 default:
1504 gmx_incons("Unknown state entry encountered in dd_collect_state");
1510 static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
1512 if (debug)
1514 fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
1516 fr->cg_nalloc = over_alloc_dd(nalloc);
1517 srenew(fr->cg_cm,fr->cg_nalloc);
1518 srenew(fr->cginfo,fr->cg_nalloc);
1521 static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
1523 int est;
1525 if (debug)
1527 fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
1530 state->nalloc = over_alloc_dd(nalloc);
1532 for(est=estX; est<estNR; est++)
1534 if (state->flags & (1<<est))
1536 switch(est) {
1537 case estX:
1538 srenew(state->x,state->nalloc);
1539 break;
1540 case estV:
1541 srenew(state->v,state->nalloc);
1542 break;
1543 case estSDX:
1544 srenew(state->sd_X,state->nalloc);
1545 break;
1546 case estCGP:
1547 srenew(state->cg_p,state->nalloc);
1548 break;
1549 case estLD_RNG:
1550 case estLD_RNGI:
1551 case estDISRE_INITF:
1552 case estDISRE_RM3TAV:
1553 case estORIRE_INITF:
1554 case estORIRE_DTAV:
1555 /* No reallocation required */
1556 break;
1557 default:
1558 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1563 if (f != NULL)
1565 srenew(*f,state->nalloc);
1569 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
1570 rvec *v,rvec *lv)
1572 gmx_domdec_master_t *ma;
1573 int n,i,c,a,nalloc=0;
1574 rvec *buf=NULL;
1576 if (DDMASTER(dd))
1578 ma = dd->ma;
1580 for(n=0; n<dd->nnodes; n++)
1582 if (n != dd->rank)
1584 if (ma->nat[n] > nalloc)
1586 nalloc = over_alloc_dd(ma->nat[n]);
1587 srenew(buf,nalloc);
1589 /* Use lv as a temporary buffer */
1590 a = 0;
1591 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1593 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1595 copy_rvec(v[c],buf[a++]);
1598 if (a != ma->nat[n])
1600 gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
1601 a,ma->nat[n]);
1604 #ifdef GMX_MPI
1605 MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
1606 DDRANK(dd,n),n,dd->mpi_comm_all);
1607 #endif
1610 sfree(buf);
1611 n = DDMASTERRANK(dd);
1612 a = 0;
1613 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1615 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1617 copy_rvec(v[c],lv[a++]);
1621 else
1623 #ifdef GMX_MPI
1624 MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
1625 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
1626 #endif
1630 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
1631 rvec *v,rvec *lv)
1633 gmx_domdec_master_t *ma;
1634 int *scounts=NULL,*disps=NULL;
1635 int n,i,c,a,nalloc=0;
1636 rvec *buf=NULL;
1638 if (DDMASTER(dd))
1640 ma = dd->ma;
1642 get_commbuffer_counts(dd,&scounts,&disps);
1644 buf = ma->vbuf;
1645 a = 0;
1646 for(n=0; n<dd->nnodes; n++)
1648 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1650 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1652 copy_rvec(v[c],buf[a++]);
1658 dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
1661 static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
1663 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1665 dd_distribute_vec_sendrecv(dd,cgs,v,lv);
1667 else
1669 dd_distribute_vec_scatterv(dd,cgs,v,lv);
1673 static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
1674 t_state *state,t_state *state_local,
1675 rvec **f)
1677 int i;
1679 if (DDMASTER(dd))
1681 state_local->lambda = state->lambda;
1682 copy_mat(state->box,state_local->box);
1683 copy_mat(state->box_rel,state_local->box_rel);
1684 copy_mat(state->boxv,state_local->boxv);
1685 for(i=0; i<state_local->ngtc; i++)
1687 state_local->nosehoover_xi[i] = state->nosehoover_xi[i];
1688 state_local->therm_integral[i] = state->therm_integral[i];
1691 dd_bcast(dd,sizeof(real),&state_local->lambda);
1692 dd_bcast(dd,sizeof(state_local->box),state_local->box);
1693 dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
1694 dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
1695 dd_bcast(dd,state_local->ngtc*sizeof(real),state_local->nosehoover_xi);
1696 dd_bcast(dd,state_local->ngtc*sizeof(real),state_local->therm_integral);
1698 if (dd->nat_home > state_local->nalloc)
1700 dd_realloc_state(state_local,f,dd->nat_home);
1702 for(i=estX; i<estNR; i++)
1704 if (state_local->flags & (1<<i))
1706 switch (i) {
1707 case estX:
1708 dd_distribute_vec(dd,cgs,state->x,state_local->x);
1709 break;
1710 case estV:
1711 dd_distribute_vec(dd,cgs,state->v,state_local->v);
1712 break;
1713 case estSDX:
1714 dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
1715 break;
1716 case estCGP:
1717 dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
1718 break;
1719 case estLD_RNG:
1720 if (state->nrngi == 1)
1722 dd_bcastc(dd,
1723 state_local->nrng*sizeof(state_local->ld_rng[0]),
1724 state->ld_rng,state_local->ld_rng);
1726 else
1728 dd_scatter(dd,
1729 state_local->nrng*sizeof(state_local->ld_rng[0]),
1730 state->ld_rng,state_local->ld_rng);
1732 break;
1733 case estLD_RNGI:
1734 if (state->nrngi == 1)
1736 dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
1737 state->ld_rngi,state_local->ld_rngi);
1739 else
1741 dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
1742 state->ld_rngi,state_local->ld_rngi);
1744 break;
1745 case estDISRE_INITF:
1746 case estDISRE_RM3TAV:
1747 case estORIRE_INITF:
1748 case estORIRE_DTAV:
1749 /* Not implemented yet */
1750 break;
1751 default:
1752 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1758 static char dim2char(int dim)
1760 char c='?';
1762 switch (dim)
1764 case XX: c = 'X'; break;
1765 case YY: c = 'Y'; break;
1766 case ZZ: c = 'Z'; break;
1767 default: gmx_fatal(FARGS,"Unknown dim %d",dim);
1770 return c;
1773 static void write_dd_grid_pdb(const char *fn,gmx_step_t step,
1774 gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
1776 rvec grid_s[2],*grid_r=NULL,cx,r;
1777 char fname[STRLEN],format[STRLEN],buf[22];
1778 FILE *out;
1779 int a,i,d,z,y,x;
1780 matrix tric;
1781 real vol;
1783 copy_rvec(dd->comm->cell_x0,grid_s[0]);
1784 copy_rvec(dd->comm->cell_x1,grid_s[1]);
1786 if (DDMASTER(dd))
1788 snew(grid_r,2*dd->nnodes);
1791 dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
1793 if (DDMASTER(dd))
1795 for(d=0; d<DIM; d++)
1797 for(i=0; i<DIM; i++)
1799 if (d == i)
1801 tric[d][i] = 1;
1803 else
1805 if (dd->nc[d] > 1 && d < ddbox->npbcdim)
1807 tric[d][i] = box[i][d]/box[i][i];
1809 else
1811 tric[d][i] = 0;
1816 sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
1817 sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
1818 out = gmx_fio_fopen(fname,"w");
1819 gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
1820 a = 1;
1821 for(i=0; i<dd->nnodes; i++)
1823 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1824 for(d=0; d<DIM; d++)
1826 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1828 for(z=0; z<2; z++)
1830 for(y=0; y<2; y++)
1832 for(x=0; x<2; x++)
1834 cx[XX] = grid_r[i*2+x][XX];
1835 cx[YY] = grid_r[i*2+y][YY];
1836 cx[ZZ] = grid_r[i*2+z][ZZ];
1837 mvmul(tric,cx,r);
1838 fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
1839 10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
1843 for(d=0; d<DIM; d++)
1845 for(x=0; x<4; x++)
1847 switch(d)
1849 case 0: y = 1 + i*8 + 2*x; break;
1850 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1851 case 2: y = 1 + i*8 + x; break;
1853 fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
1857 gmx_fio_fclose(out);
1858 sfree(grid_r);
1862 void write_dd_pdb(const char *fn,gmx_step_t step,const char *title,
1863 gmx_mtop_t *mtop,t_commrec *cr,
1864 int natoms,rvec x[],matrix box)
1866 char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
1867 FILE *out;
1868 int i,ii,resnr,c;
1869 char *atomname,*resname;
1870 real b;
1871 gmx_domdec_t *dd;
1873 dd = cr->dd;
1874 if (natoms == -1)
1876 natoms = dd->comm->nat[ddnatVSITE];
1879 sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
1881 sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
1882 sprintf(format4,"%s%s\n",pdbformat4,"%6.2f%6.2f");
1884 out = gmx_fio_fopen(fname,"w");
1886 fprintf(out,"TITLE %s\n",title);
1887 gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
1888 for(i=0; i<natoms; i++)
1890 ii = dd->gatindex[i];
1891 gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
1892 if (i < dd->comm->nat[ddnatZONE])
1894 c = 0;
1895 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
1897 c++;
1899 b = c;
1901 else if (i < dd->comm->nat[ddnatVSITE])
1903 b = dd->comm->zones.n;
1905 else
1907 b = dd->comm->zones.n + 1;
1909 fprintf(out,strlen(atomname)<4 ? format : format4,
1910 "ATOM",(ii+1)%100000,
1911 atomname,resname,' ',(resnr+1)%10000,' ',
1912 10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
1914 fprintf(out,"TER\n");
1916 gmx_fio_fclose(out);
1919 real dd_cutoff_mbody(gmx_domdec_t *dd)
1921 gmx_domdec_comm_t *comm;
1922 int di;
1923 real r;
1925 comm = dd->comm;
1927 r = -1;
1928 if (comm->bInterCGBondeds)
1930 if (comm->cutoff_mbody > 0)
1932 r = comm->cutoff_mbody;
1934 else
1936 /* cutoff_mbody=0 means we do not have DLB */
1937 r = comm->cellsize_min[dd->dim[0]];
1938 for(di=1; di<dd->ndim; di++)
1940 r = min(r,comm->cellsize_min[dd->dim[di]]);
1942 if (comm->bBondComm)
1944 r = max(r,comm->cutoff_mbody);
1946 else
1948 r = min(r,comm->cutoff);
1953 return r;
1956 real dd_cutoff_twobody(gmx_domdec_t *dd)
1958 real r_mb;
1960 r_mb = dd_cutoff_mbody(dd);
1962 return max(dd->comm->cutoff,r_mb);
1966 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
1968 int nc,ntot;
1970 nc = dd->nc[dd->comm->cartpmedim];
1971 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1972 copy_ivec(coord,coord_pme);
1973 coord_pme[dd->comm->cartpmedim] =
1974 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1977 static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
1979 /* Here we assign a PME node to communicate with this DD node
1980 * by assuming that the major index of both is x.
1981 * We add cr->npmenodes/2 to obtain an even distribution.
1983 return (ddindex*npme + npme/2)/ndd;
1986 static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
1988 return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
1991 static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
1993 return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
1996 static int *dd_pmenodes(t_commrec *cr)
1998 int *pmenodes;
1999 int n,i,p0,p1;
2001 snew(pmenodes,cr->npmenodes);
2002 n = 0;
2003 for(i=0; i<cr->dd->nnodes; i++) {
2004 p0 = cr_ddindex2pmeindex(cr,i);
2005 p1 = cr_ddindex2pmeindex(cr,i+1);
2006 if (i+1 == cr->dd->nnodes || p1 > p0) {
2007 if (debug)
2008 fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
2009 pmenodes[n] = i + 1 + n;
2010 n++;
2014 return pmenodes;
2017 static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
2019 gmx_domdec_t *dd;
2020 ivec coords,coords_pme,nc;
2021 int slab;
2023 dd = cr->dd;
2025 if (dd->comm->bCartesian) {
2026 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2027 dd_coords2pmecoords(dd,coords,coords_pme);
2028 copy_ivec(dd->ntot,nc);
2029 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2030 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2032 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2033 } else {
2034 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2037 coords[XX] = x;
2038 coords[YY] = y;
2039 coords[ZZ] = z;
2040 slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
2042 return slab;
2045 static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
2047 gmx_domdec_comm_t *comm;
2048 ivec coords;
2049 int ddindex,nodeid=-1;
2051 comm = cr->dd->comm;
2053 coords[XX] = x;
2054 coords[YY] = y;
2055 coords[ZZ] = z;
2056 if (comm->bCartesianPP_PME)
2058 #ifdef GMX_MPI
2059 MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
2060 #endif
2062 else
2064 ddindex = dd_index(cr->dd->nc,coords);
2065 if (comm->bCartesianPP)
2067 nodeid = comm->ddindex2simnodeid[ddindex];
2069 else
2071 if (comm->pmenodes)
2073 nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
2075 else
2077 nodeid = ddindex;
2082 return nodeid;
2085 static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
2087 gmx_domdec_t *dd;
2088 gmx_domdec_comm_t *comm;
2089 ivec coord,coord_pme;
2090 int i;
2091 int pmenode=-1;
2093 dd = cr->dd;
2094 comm = dd->comm;
2096 /* This assumes a uniform x domain decomposition grid cell size */
2097 if (comm->bCartesianPP_PME)
2099 #ifdef GMX_MPI
2100 MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
2101 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2103 /* This is a PP node */
2104 dd_cart_coord2pmecoord(dd,coord,coord_pme);
2105 MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
2107 #endif
2109 else if (comm->bCartesianPP)
2111 if (sim_nodeid < dd->nnodes)
2113 pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
2116 else
2118 /* This assumes DD cells with identical x coordinates
2119 * are numbered sequentially.
2121 if (dd->comm->pmenodes == NULL)
2123 if (sim_nodeid < dd->nnodes)
2125 /* The DD index equals the nodeid */
2126 pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
2129 else
2131 i = 0;
2132 while (sim_nodeid > dd->comm->pmenodes[i])
2134 i++;
2136 if (sim_nodeid < dd->comm->pmenodes[i])
2138 pmenode = dd->comm->pmenodes[i];
2143 return pmenode;
2146 bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
2148 bool bPMEOnlyNode;
2150 if (DOMAINDECOMP(cr))
2152 bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
2154 else
2156 bPMEOnlyNode = FALSE;
2159 return bPMEOnlyNode;
2162 void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
2163 int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
2165 gmx_domdec_t *dd;
2166 int x,y,z;
2167 ivec coord,coord_pme;
2169 dd = cr->dd;
2171 snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2173 *nmy_ddnodes = 0;
2174 for(x=0; x<dd->nc[XX]; x++)
2176 for(y=0; y<dd->nc[YY]; y++)
2178 for(z=0; z<dd->nc[ZZ]; z++)
2180 if (dd->comm->bCartesianPP_PME)
2182 coord[XX] = x;
2183 coord[YY] = y;
2184 coord[ZZ] = z;
2185 dd_cart_coord2pmecoord(dd,coord,coord_pme);
2186 if (dd->ci[XX] == coord_pme[XX] &&
2187 dd->ci[YY] == coord_pme[YY] &&
2188 dd->ci[ZZ] == coord_pme[ZZ])
2189 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
2191 else
2193 /* The slab corresponds to the nodeid in the PME group */
2194 if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
2196 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
2203 /* The last PP-only node is the peer node */
2204 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2206 if (debug)
2208 fprintf(debug,"Receive coordinates from PP nodes:");
2209 for(x=0; x<*nmy_ddnodes; x++)
2211 fprintf(debug," %d",(*my_ddnodes)[x]);
2213 fprintf(debug,"\n");
2217 static bool receive_vir_ener(t_commrec *cr)
2219 gmx_domdec_comm_t *comm;
2220 int pmenode,coords[DIM],rank;
2221 bool bReceive;
2223 bReceive = TRUE;
2224 if (cr->npmenodes < cr->dd->nnodes)
2226 comm = cr->dd->comm;
2227 if (comm->bCartesianPP_PME)
2229 pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
2230 #ifdef GMX_MPI
2231 MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
2232 coords[comm->cartpmedim]++;
2233 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2235 MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
2236 if (dd_simnode2pmenode(cr,rank) == pmenode)
2238 /* This is not the last PP node for pmenode */
2239 bReceive = FALSE;
2242 #endif
2244 else
2246 pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
2247 if (cr->sim_nodeid+1 < cr->nnodes &&
2248 dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
2250 /* This is not the last PP node for pmenode */
2251 bReceive = FALSE;
2256 return bReceive;
2259 static void set_zones_ncg_home(gmx_domdec_t *dd)
2261 gmx_domdec_zones_t *zones;
2262 int i;
2264 zones = &dd->comm->zones;
2266 zones->cg_range[0] = 0;
2267 for(i=1; i<zones->n+1; i++)
2269 zones->cg_range[i] = dd->ncg_home;
2273 static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
2275 int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
2277 ind = state->cg_gl;
2278 dd_cg_gl = dd->index_gl;
2279 cgindex = dd->cgindex;
2280 nat = 0;
2281 cgindex[0] = nat;
2282 for(i=0; i<state->ncg_gl; i++)
2284 cgindex[i] = nat;
2285 cg_gl = ind[i];
2286 dd_cg_gl[i] = cg_gl;
2287 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2289 cgindex[i] = nat;
2291 dd->ncg_home = state->ncg_gl;
2292 dd->nat_home = nat;
2294 set_zones_ncg_home(dd);
2297 static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
2299 while (cg >= cginfo_mb->cg_end)
2301 cginfo_mb++;
2304 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2307 static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
2308 t_forcerec *fr,char *bLocalCG)
2310 cginfo_mb_t *cginfo_mb;
2311 int *cginfo;
2312 int cg;
2314 if (fr != NULL)
2316 cginfo_mb = fr->cginfo_mb;
2317 cginfo = fr->cginfo;
2319 for(cg=cg0; cg<cg1; cg++)
2321 cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
2325 if (bLocalCG != NULL)
2327 for(cg=cg0; cg<cg1; cg++)
2329 bLocalCG[index_gl[cg]] = TRUE;
2334 static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
2336 int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
2337 int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
2338 gmx_ga2la_t *ga2la;
2339 char *bLocalCG;
2341 bLocalCG = dd->comm->bLocalCG;
2343 if (dd->nat_tot > dd->gatindex_nalloc)
2345 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2346 srenew(dd->gatindex,dd->gatindex_nalloc);
2349 nzone = dd->comm->zones.n;
2350 zone2cg = dd->comm->zones.cg_range;
2351 zone_ncg1 = dd->comm->zone_ncg1;
2352 index_gl = dd->index_gl;
2353 gatindex = dd->gatindex;
2355 if (zone2cg[1] != dd->ncg_home)
2357 gmx_incons("dd->ncg_zone is not up to date");
2360 /* Make the local to global and global to local atom index */
2361 a = dd->cgindex[cg_start];
2362 for(zone=0; zone<nzone; zone++)
2364 if (zone == 0)
2366 cg0 = cg_start;
2368 else
2370 cg0 = zone2cg[zone];
2372 for(cg=cg0; cg<zone2cg[zone+1]; cg++)
2374 zone1 = zone;
2375 if (cg - cg0 >= zone_ncg1[zone])
2377 /* Signal that this cg is from more than one zone away */
2378 zone1 += nzone;
2380 cg_gl = index_gl[cg];
2381 for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
2383 gatindex[a] = a_gl;
2384 ga2la_set(dd->ga2la,a_gl,a,zone1);
2385 a++;
2391 static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
2392 const char *where)
2394 int ncg,i,ngl,nerr;
2396 nerr = 0;
2397 if (bLocalCG == NULL)
2399 return nerr;
2401 for(i=0; i<dd->ncg_tot; i++)
2403 if (!bLocalCG[dd->index_gl[i]])
2405 fprintf(stderr,
2406 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
2407 nerr++;
2410 ngl = 0;
2411 for(i=0; i<ncg_sys; i++)
2413 if (bLocalCG[i])
2415 ngl++;
2418 if (ngl != dd->ncg_tot)
2420 fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
2421 nerr++;
2424 return nerr;
2427 static void check_index_consistency(gmx_domdec_t *dd,
2428 int natoms_sys,int ncg_sys,
2429 const char *where)
2431 int nerr,ngl,i,a,cell;
2432 int *have;
2434 nerr = 0;
2436 if (dd->comm->DD_debug > 1)
2438 snew(have,natoms_sys);
2439 for(a=0; a<dd->nat_tot; a++)
2441 if (have[dd->gatindex[a]] > 0)
2443 fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
2445 else
2447 have[dd->gatindex[a]] = a + 1;
2450 sfree(have);
2453 snew(have,dd->nat_tot);
2455 ngl = 0;
2456 for(i=0; i<natoms_sys; i++)
2458 if (ga2la_get(dd->ga2la,i,&a,&cell))
2460 if (a >= dd->nat_tot)
2462 fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
2463 nerr++;
2465 else
2467 have[a] = 1;
2468 if (dd->gatindex[a] != i)
2470 fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
2471 nerr++;
2474 ngl++;
2477 if (ngl != dd->nat_tot)
2479 fprintf(stderr,
2480 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2481 dd->rank,where,ngl,dd->nat_tot);
2483 for(a=0; a<dd->nat_tot; a++)
2485 if (have[a] == 0)
2487 fprintf(stderr,
2488 "DD node %d, %s: local atom %d, global %d has no global index\n",
2489 dd->rank,where,a+1,dd->gatindex[a]+1);
2492 sfree(have);
2494 nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
2496 if (nerr > 0) {
2497 gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
2498 dd->rank,where,nerr);
2502 static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
2504 int i;
2505 char *bLocalCG;
2507 if (a_start == 0)
2509 /* Clear the whole list without searching */
2510 ga2la_clear(dd->ga2la);
2512 else
2514 for(i=a_start; i<dd->nat_tot; i++)
2516 ga2la_del(dd->ga2la,dd->gatindex[i]);
2520 bLocalCG = dd->comm->bLocalCG;
2521 if (bLocalCG)
2523 for(i=cg_start; i<dd->ncg_tot; i++)
2525 bLocalCG[dd->index_gl[i]] = FALSE;
2529 dd_clear_local_vsite_indices(dd);
2531 if (dd->constraints)
2533 dd_clear_local_constraint_indices(dd);
2537 static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
2539 real grid_jump_limit;
2541 /* The distance between the boundaries of cells at distance
2542 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2543 * and by the fact that cells should not be shifted by more than
2544 * half their size, such that cg's only shift by one cell
2545 * at redecomposition.
2547 grid_jump_limit = comm->cellsize_limit;
2548 if (!comm->bVacDLBNoLimit)
2550 grid_jump_limit = max(grid_jump_limit,
2551 comm->cutoff/comm->cd[dim_ind].np);
2554 return grid_jump_limit;
2557 static void check_grid_jump(gmx_step_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
2559 gmx_domdec_comm_t *comm;
2560 int d,dim;
2561 real limit,bfac;
2563 comm = dd->comm;
2565 for(d=1; d<dd->ndim; d++)
2567 dim = dd->dim[d];
2568 limit = grid_jump_limit(comm,d);
2569 bfac = ddbox->box_size[dim];
2570 if (ddbox->tric_dir[dim])
2572 bfac *= ddbox->skew_fac[dim];
2574 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2575 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2577 char buf[22];
2578 gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2579 gmx_step_str(step,buf),
2580 dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
2585 static int dd_load_count(gmx_domdec_comm_t *comm)
2587 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2590 static float dd_force_load(gmx_domdec_comm_t *comm)
2592 float load;
2594 if (comm->eFlop)
2596 load = comm->flop;
2597 if (comm->eFlop > 1)
2599 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2602 else
2604 load = comm->cycl[ddCyclF];
2605 if (comm->cycl_n[ddCyclF] > 1)
2607 /* Subtract the maximum of the last n cycle counts
2608 * to get rid of possible high counts due to other soures,
2609 * for instance system activity, that would otherwise
2610 * affect the dynamic load balancing.
2612 load -= comm->cycl_max[ddCyclF];
2616 return load;
2619 static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dimind,real **dim_f)
2621 gmx_domdec_comm_t *comm;
2622 int i;
2624 comm = dd->comm;
2626 if (dd->dim[dimind] != dimind)
2628 *dim_f = NULL;
2629 return;
2632 snew(*dim_f,dd->nc[dimind]+1);
2633 (*dim_f)[0] = 0;
2634 for(i=1; i<dd->nc[dimind]; i++)
2636 if (comm->slb_frac[dimind])
2638 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dimind][i-1];
2640 else
2642 (*dim_f)[i] = (real)i/(real)dd->nc[dimind];
2645 (*dim_f)[dd->nc[dimind]] = 1;
2648 static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
2649 int dimind,int nslab)
2651 int pmeindex,slab,nso,i;
2652 ivec xyz;
2654 ddpme->dimind = dimind;
2655 ddpme->nslab = nslab;
2657 if (nslab <= 1)
2659 return;
2662 nso = dd->comm->npmenodes/nslab;
2663 /* Determine for each PME slab the PP locacation range for dimension dim */
2664 snew(ddpme->pp_min,nslab);
2665 snew(ddpme->pp_max,nslab);
2666 for(slab=0; slab<nslab; slab++) {
2667 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2668 ddpme->pp_max[slab] = 0;
2670 for(i=0; i<dd->nnodes; i++) {
2671 ddindex2xyz(dd->nc,i,xyz);
2672 /* For y only use our y/z slab.
2673 * This assumes that the PME x grid size matches the DD grid size.
2675 if (dimind == 0 || xyz[YY] == dd->ci[YY]) {
2676 pmeindex = ddindex2pmeindex(dd,i);
2677 if (dimind == 0) {
2678 slab = pmeindex/nso;
2679 } else {
2680 slab = pmeindex % nslab;
2682 ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
2683 ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
2687 set_slb_pme_dim_f(dd,ddpme->dimind,&ddpme->slb_dim_f);
2690 int dd_pme_maxshift0(gmx_domdec_t *dd)
2692 return dd->comm->ddpme[0].maxshift;
2695 int dd_pme_maxshift1(gmx_domdec_t *dd)
2697 return dd->comm->ddpme[1].maxshift;
2700 static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
2701 bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
2703 gmx_domdec_comm_t *comm;
2704 int dim,nc,ns,s;
2705 int *xmin,*xmax;
2706 real range,pme_boundary;
2707 int sh;
2709 comm = dd->comm;
2710 dim = ddpme->dimind;
2711 nc = dd->nc[dim];
2712 ns = ddpme->nslab;
2714 if (dd->dim[dim] != dim)
2716 /* PP decomposition is not along dim: the worst situation */
2717 sh = ns/2;
2719 else if (ns <= 3 || (bUniform && ns == nc))
2721 /* The optimal situation */
2722 sh = 1;
2724 else
2726 /* We need to check for all pme nodes which nodes they
2727 * could possibly need to communicate with.
2729 xmin = ddpme->pp_min;
2730 xmax = ddpme->pp_max;
2731 /* Allow for atoms to be maximally half the cell size or cut-off
2732 * out of their DD cell.
2734 range = 0.5*min(comm->cellsize_min[dim],comm->cutoff);
2735 range /= ddbox->skew_fac[dim]*ddbox->box_size[dim];
2736 /* Avoid unlucky rounding at exactly 0.5 */
2737 range *= 0.999;
2739 sh = 1;
2740 for(s=0; s<ns; s++)
2742 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2743 pme_boundary = (real)s/ns;
2744 while (sh+1 < ns &&
2745 ((s-(sh+1) >= 0 &&
2746 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
2747 (s-(sh+1) < 0 &&
2748 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
2750 sh++;
2752 pme_boundary = (real)(s+1)/ns;
2753 while (sh+1 < ns &&
2754 ((s+(sh+1) < ns &&
2755 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
2756 (s+(sh+1) >= ns &&
2757 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
2759 sh++;
2764 ddpme->maxshift = sh;
2766 if (debug)
2768 fprintf(debug,"PME slab communication range for dimind %d is %d\n",
2769 ddpme->dimind,ddpme->maxshift);
2773 static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
2775 int d,dim;
2777 for(d=0; d<dd->ndim; d++)
2779 dim = dd->dim[d];
2780 if (dim < ddbox->nboundeddim &&
2781 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
2782 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
2784 gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2785 dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
2786 dd->nc[dim],dd->comm->cellsize_limit);
2791 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
2792 bool bMaster,ivec npulse)
2794 gmx_domdec_comm_t *comm;
2795 int d,j;
2796 rvec cellsize_min;
2797 real *cell_x,cell_dx,cellsize;
2799 comm = dd->comm;
2801 for(d=0; d<DIM; d++)
2803 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
2804 npulse[d] = 1;
2805 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
2807 /* Uniform grid */
2808 cell_dx = ddbox->box_size[d]/dd->nc[d];
2809 if (bMaster)
2811 for(j=0; j<dd->nc[d]+1; j++)
2813 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
2816 else
2818 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
2819 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
2821 cellsize = cell_dx*ddbox->skew_fac[d];
2822 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
2824 npulse[d]++;
2826 cellsize_min[d] = cellsize;
2828 else
2830 /* Statically load balanced grid */
2831 /* Also when we are not doing a master distribution we determine
2832 * all cell borders in a loop to obtain identical values
2833 * to the master distribution case and to determine npulse.
2835 if (bMaster)
2837 cell_x = dd->ma->cell_x[d];
2839 else
2841 snew(cell_x,dd->nc[d]+1);
2843 cell_x[0] = ddbox->box0[d];
2844 for(j=0; j<dd->nc[d]; j++)
2846 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
2847 cell_x[j+1] = cell_x[j] + cell_dx;
2848 cellsize = cell_dx*ddbox->skew_fac[d];
2849 while (cellsize*npulse[d] < comm->cutoff &&
2850 npulse[d] < dd->nc[d]-1)
2852 npulse[d]++;
2854 cellsize_min[d] = min(cellsize_min[d],cellsize);
2856 if (!bMaster)
2858 comm->cell_x0[d] = cell_x[dd->ci[d]];
2859 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
2860 sfree(cell_x);
2863 /* The following limitation is to avoid that a cell would receive
2864 * some of its own home charge groups back over the periodic boundary.
2865 * Double charge groups cause trouble with the global indices.
2867 if (d < ddbox->npbcdim &&
2868 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
2870 if (DDMASTER(dd))
2872 gmx_fatal(FARGS,"The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2873 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
2874 comm->cutoff,
2875 dd->nc[d],dd->nc[d],
2876 dd->nnodes > dd->nc[d] ? "cells" : "processors");
2878 #ifdef GMX_MPI
2879 MPI_Abort(MPI_COMM_WORLD, 0);
2880 #else
2881 exit(0);
2882 #endif
2886 if (!comm->bDynLoadBal)
2888 copy_rvec(cellsize_min,comm->cellsize_min);
2891 for(d=0; d<comm->npmedecompdim; d++)
2893 set_pme_maxshift(dd,&comm->ddpme[d],
2894 comm->slb_frac[dd->dim[d]]==NULL,ddbox,
2895 comm->ddpme[d].slb_dim_f);
2900 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
2901 int d,int dim,gmx_domdec_root_t *root,
2902 gmx_ddbox_t *ddbox,
2903 bool bUniform,gmx_step_t step, real cellsize_limit_f, int range[])
2905 gmx_domdec_comm_t *comm;
2906 int ncd,i,j,nmin,nmin_old;
2907 bool bLimLo,bLimHi;
2908 real *cell_size;
2909 real fac,halfway,cellsize_limit_f_i,region_size;
2910 bool bPBC,bLastHi=FALSE;
2911 int nrange[]={range[0],range[1]};
2913 region_size= root->cell_f[range[1]]-root->cell_f[range[0]];
2915 comm = dd->comm;
2917 ncd = dd->nc[dim];
2919 bPBC = (dim < ddbox->npbcdim);
2921 cell_size = root->buf_ncd;
2923 if (debug)
2925 fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
2928 /* First we need to check if the scaling does not make cells
2929 * smaller than the smallest allowed size.
2930 * We need to do this iteratively, since if a cell is too small,
2931 * it needs to be enlarged, which makes all the other cells smaller,
2932 * which could in turn make another cell smaller than allowed.
2934 for(i=range[0]; i<range[1]; i++)
2936 root->bCellMin[i] = FALSE;
2938 nmin = 0;
2941 nmin_old = nmin;
2942 /* We need the total for normalization */
2943 fac = 0;
2944 for(i=range[0]; i<range[1]; i++)
2946 if (root->bCellMin[i] == FALSE)
2948 fac += cell_size[i];
2951 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
2952 /* Determine the cell boundaries */
2953 for(i=range[0]; i<range[1]; i++)
2955 if (root->bCellMin[i] == FALSE)
2957 cell_size[i] *= fac;
2958 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
2960 cellsize_limit_f_i = 0;
2962 else
2964 cellsize_limit_f_i = cellsize_limit_f;
2966 if (cell_size[i] < cellsize_limit_f_i)
2968 root->bCellMin[i] = TRUE;
2969 cell_size[i] = cellsize_limit_f_i;
2970 nmin++;
2973 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
2976 while (nmin > nmin_old);
2978 i=range[1]-1;
2979 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
2980 /* For this check we should not use DD_CELL_MARGIN,
2981 * but a slightly smaller factor,
2982 * since rounding could get use below the limit.
2984 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
2986 char buf[22];
2987 gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
2988 gmx_step_str(step,buf),
2989 dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
2990 ncd,comm->cellsize_min[dim]);
2993 root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
2995 if (!bUniform)
2997 /* Check if the boundary did not displace more than halfway
2998 * each of the cells it bounds, as this could cause problems,
2999 * especially when the differences between cell sizes are large.
3000 * If changes are applied, they will not make cells smaller
3001 * than the cut-off, as we check all the boundaries which
3002 * might be affected by a change and if the old state was ok,
3003 * the cells will at most be shrunk back to their old size.
3005 for(i=range[0]+1; i<range[1]; i++)
3007 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3008 if (root->cell_f[i] < halfway)
3010 root->cell_f[i] = halfway;
3011 /* Check if the change also causes shifts of the next boundaries */
3012 for(j=i+1; j<range[1]; j++)
3014 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3015 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3018 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3019 if (root->cell_f[i] > halfway)
3021 root->cell_f[i] = halfway;
3022 /* Check if the change also causes shifts of the next boundaries */
3023 for(j=i-1; j>=range[0]+1; j--)
3025 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3026 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3032 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3033 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3034 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3035 * for a and b nrange is used */
3036 if (d > 0)
3038 /* Take care of the staggering of the cell boundaries */
3039 if (bUniform)
3041 for(i=range[0]; i<range[1]; i++)
3043 root->cell_f_max0[i] = root->cell_f[i];
3044 root->cell_f_min1[i] = root->cell_f[i+1];
3047 else
3049 for(i=range[0]+1; i<range[1]; i++)
3051 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3052 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3053 if (bLimLo && bLimHi)
3055 /* Both limits violated, try the best we can */
3056 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3057 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3058 nrange[0]=range[0];
3059 nrange[1]=i;
3060 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3062 nrange[0]=i;
3063 nrange[1]=range[1];
3064 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3066 return;
3068 else if (bLimLo)
3070 /* root->cell_f[i] = root->bound_min[i]; */
3071 nrange[1]=i; /* only store violation location. There could be a LimLo violation following with an higher index */
3072 bLastHi=FALSE;
3074 else if (bLimHi && !bLastHi)
3076 bLastHi=TRUE;
3077 if (nrange[1] < range[1]) /* found a LimLo before */
3079 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3080 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3081 nrange[0]=nrange[1];
3083 root->cell_f[i] = root->bound_max[i];
3084 nrange[1]=i;
3085 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3086 nrange[0]=i;
3087 nrange[1]=range[1];
3090 if (nrange[1] < range[1]) /* found last a LimLo */
3092 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3093 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3094 nrange[0]=nrange[1];
3095 nrange[1]=range[1];
3096 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3098 else if (nrange[0] > range[0]) /* found at least one LimHi */
3100 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3107 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3108 int d,int dim,gmx_domdec_root_t *root,
3109 gmx_ddbox_t *ddbox,bool bDynamicBox,
3110 bool bUniform,gmx_step_t step)
3112 gmx_domdec_comm_t *comm;
3113 int ncd,d1,i,j,pos;
3114 real *cell_size;
3115 real load_aver,load_i,imbalance,change,change_max,sc;
3116 real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
3117 real change_limit = 0.1;
3118 real relax = 0.5;
3119 bool bPBC;
3120 int range[] = { 0, 0 };
3122 comm = dd->comm;
3124 ncd = dd->nc[dim];
3126 bPBC = (dim < ddbox->npbcdim);
3128 cell_size = root->buf_ncd;
3130 /* Store the original boundaries */
3131 for(i=0; i<ncd+1; i++)
3133 root->old_cell_f[i] = root->cell_f[i];
3135 if (bUniform) {
3136 for(i=0; i<ncd; i++)
3138 cell_size[i] = 1.0/ncd;
3141 else if (dd_load_count(comm))
3143 load_aver = comm->load[d].sum_m/ncd;
3144 change_max = 0;
3145 for(i=0; i<ncd; i++)
3147 /* Determine the relative imbalance of cell i */
3148 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3149 imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
3150 /* Determine the change of the cell size using underrelaxation */
3151 change = -relax*imbalance;
3152 change_max = max(change_max,max(change,-change));
3154 /* Limit the amount of scaling.
3155 * We need to use the same rescaling for all cells in one row,
3156 * otherwise the load balancing might not converge.
3158 sc = relax;
3159 if (change_max > change_limit)
3161 sc *= change_limit/change_max;
3163 for(i=0; i<ncd; i++)
3165 /* Determine the relative imbalance of cell i */
3166 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3167 imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
3168 /* Determine the change of the cell size using underrelaxation */
3169 change = -sc*imbalance;
3170 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3174 cellsize_limit_f = comm->cellsize_min[dim]/ddbox->box_size[dim];
3175 cellsize_limit_f *= DD_CELL_MARGIN;
3176 dist_min_f_hard = grid_jump_limit(comm,d)/ddbox->box_size[dim];
3177 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3178 if (ddbox->tric_dir[dim])
3180 cellsize_limit_f /= ddbox->skew_fac[dim];
3181 dist_min_f /= ddbox->skew_fac[dim];
3183 if (bDynamicBox && d > 0)
3185 dist_min_f *= DD_PRES_SCALE_MARGIN;
3187 if (d > 0 && !bUniform)
3189 /* Make sure that the grid is not shifted too much */
3190 for(i=1; i<ncd; i++) {
3191 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3193 gmx_incons("Inconsistent DD boundary staggering limits!");
3195 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3196 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3197 if (space > 0) {
3198 root->bound_min[i] += 0.5*space;
3200 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3201 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3202 if (space < 0) {
3203 root->bound_max[i] += 0.5*space;
3205 if (debug)
3207 fprintf(debug,
3208 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3209 d,i,
3210 root->cell_f_max0[i-1] + dist_min_f,
3211 root->bound_min[i],root->cell_f[i],root->bound_max[i],
3212 root->cell_f_min1[i] - dist_min_f);
3216 range[1]=ncd;
3217 root->cell_f[0] = 0;
3218 root->cell_f[ncd] = 1;
3219 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3222 /* After the checks above, the cells should obey the cut-off
3223 * restrictions, but it does not hurt to check.
3225 for(i=0; i<ncd; i++)
3227 if (debug)
3229 fprintf(debug,"Relative bounds dim %d cell %d: %f %f\n",
3230 dim,i,root->cell_f[i],root->cell_f[i+1]);
3233 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3234 root->cell_f[i+1] - root->cell_f[i] <
3235 cellsize_limit_f/DD_CELL_MARGIN)
3237 char buf[22];
3238 fprintf(stderr,
3239 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3240 gmx_step_str(step,buf),dim2char(dim),i,
3241 (root->cell_f[i+1] - root->cell_f[i])
3242 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3246 pos = ncd + 1;
3247 /* Store the cell boundaries of the lower dimensions at the end */
3248 for(d1=0; d1<d; d1++)
3250 root->cell_f[pos++] = comm->cell_f0[d1];
3251 root->cell_f[pos++] = comm->cell_f1[d1];
3254 if (d < comm->npmedecompdim)
3256 /* The master determines the maximum shift for
3257 * the coordinate communication between separate PME nodes.
3259 set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
3261 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3262 if (d >= 1)
3264 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3268 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3269 gmx_ddbox_t *ddbox,int dimind)
3271 gmx_domdec_comm_t *comm;
3272 int dim;
3274 comm = dd->comm;
3276 /* Set the cell dimensions */
3277 dim = dd->dim[dimind];
3278 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3279 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3280 if (dim >= ddbox->nboundeddim)
3282 comm->cell_x0[dim] += ddbox->box0[dim];
3283 comm->cell_x1[dim] += ddbox->box0[dim];
3287 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3288 int d,int dim,real *cell_f_row,
3289 gmx_ddbox_t *ddbox)
3291 gmx_domdec_comm_t *comm;
3292 int d1,dim1,pos;
3294 comm = dd->comm;
3296 #ifdef GMX_MPI
3297 /* Each node would only need to know two fractions,
3298 * but it is probably cheaper to broadcast the whole array.
3300 MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
3301 0,comm->mpi_comm_load[d]);
3302 #endif
3303 /* Copy the fractions for this dimension from the buffer */
3304 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3305 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3306 /* The whole array was communicated, so set the buffer position */
3307 pos = dd->nc[dim] + 1;
3308 for(d1=0; d1<=d; d1++)
3310 if (d1 < d)
3312 /* Copy the cell fractions of the lower dimensions */
3313 comm->cell_f0[d1] = cell_f_row[pos++];
3314 comm->cell_f1[d1] = cell_f_row[pos++];
3316 relative_to_absolute_cell_bounds(dd,ddbox,d1);
3318 /* Convert the communicated shift from float to int */
3319 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3320 if (d >= 1)
3322 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3326 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3327 gmx_ddbox_t *ddbox,bool bDynamicBox,
3328 bool bUniform,gmx_step_t step)
3330 gmx_domdec_comm_t *comm;
3331 int d,dim,d1;
3332 bool bRowMember,bRowRoot;
3333 real *cell_f_row;
3335 comm = dd->comm;
3337 for(d=0; d<dd->ndim; d++)
3339 dim = dd->dim[d];
3340 bRowMember = TRUE;
3341 bRowRoot = TRUE;
3342 for(d1=d; d1<dd->ndim; d1++)
3344 if (dd->ci[dd->dim[d1]] > 0)
3346 if (d1 > d)
3348 bRowMember = FALSE;
3350 bRowRoot = FALSE;
3353 if (bRowMember)
3355 if (bRowRoot)
3357 set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
3358 ddbox,bDynamicBox,bUniform,step);
3359 cell_f_row = comm->root[d]->cell_f;
3361 else
3363 cell_f_row = comm->cell_f_row;
3365 distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
3370 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
3372 int d;
3374 /* This function assumes the box is static and should therefore
3375 * not be called when the box has changed since the last
3376 * call to dd_partition_system.
3378 for(d=0; d<dd->ndim; d++)
3380 relative_to_absolute_cell_bounds(dd,ddbox,d);
3386 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3387 gmx_ddbox_t *ddbox,bool bDynamicBox,
3388 bool bUniform,bool bDoDLB,gmx_step_t step,
3389 gmx_wallcycle_t wcycle)
3391 gmx_domdec_comm_t *comm;
3392 int dim;
3394 comm = dd->comm;
3396 if (bDoDLB)
3398 wallcycle_start(wcycle,ewcDDCOMMBOUND);
3399 set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
3400 wallcycle_stop(wcycle,ewcDDCOMMBOUND);
3402 else if (bDynamicBox)
3404 set_dd_cell_sizes_dlb_nochange(dd,ddbox);
3407 /* Set the dimensions for which no DD is used */
3408 for(dim=0; dim<DIM; dim++) {
3409 if (dd->nc[dim] == 1) {
3410 comm->cell_x0[dim] = 0;
3411 comm->cell_x1[dim] = ddbox->box_size[dim];
3412 if (dim >= ddbox->nboundeddim)
3414 comm->cell_x0[dim] += ddbox->box0[dim];
3415 comm->cell_x1[dim] += ddbox->box0[dim];
3421 static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
3423 int d,np,i;
3424 gmx_domdec_comm_dim_t *cd;
3426 for(d=0; d<dd->ndim; d++)
3428 cd = &dd->comm->cd[d];
3429 np = npulse[dd->dim[d]];
3430 if (np > cd->np_nalloc)
3432 if (debug)
3434 fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
3435 dim2char(dd->dim[d]),np);
3437 if (DDMASTER(dd) && cd->np_nalloc > 0)
3439 fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
3441 srenew(cd->ind,np);
3442 for(i=cd->np_nalloc; i<np; i++)
3444 cd->ind[i].index = NULL;
3445 cd->ind[i].nalloc = 0;
3447 cd->np_nalloc = np;
3449 cd->np = np;
3454 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3455 gmx_ddbox_t *ddbox,bool bDynamicBox,
3456 bool bUniform,bool bDoDLB,gmx_step_t step,
3457 gmx_wallcycle_t wcycle)
3459 gmx_domdec_comm_t *comm;
3460 int d;
3461 ivec npulse;
3463 comm = dd->comm;
3465 /* Copy the old cell boundaries for the cg displacement check */
3466 copy_rvec(comm->cell_x0,comm->old_cell_x0);
3467 copy_rvec(comm->cell_x1,comm->old_cell_x1);
3469 if (comm->bDynLoadBal)
3471 if (DDMASTER(dd))
3473 check_box_size(dd,ddbox);
3475 set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
3477 else
3479 set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
3480 realloc_comm_ind(dd,npulse);
3483 if (debug)
3485 for(d=0; d<DIM; d++)
3487 fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
3488 d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
3493 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3494 gmx_ddbox_t *ddbox,
3495 rvec cell_ns_x0,rvec cell_ns_x1,
3496 gmx_step_t step)
3498 gmx_domdec_comm_t *comm;
3499 int dim_ind,dim;
3501 comm = dd->comm;
3503 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
3505 dim = dd->dim[dim_ind];
3507 /* Without PBC we don't have restrictions on the outer cells */
3508 if (!(dim >= ddbox->npbcdim &&
3509 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3510 comm->bDynLoadBal &&
3511 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3512 comm->cellsize_min[dim])
3514 char buf[22];
3515 gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3516 gmx_step_str(step,buf),dim2char(dim),
3517 comm->cell_x1[dim] - comm->cell_x0[dim],
3518 ddbox->skew_fac[dim],
3519 dd->comm->cellsize_min[dim],
3520 dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
3524 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3526 /* Communicate the boundaries and update cell_ns_x0/1 */
3527 dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
3528 if (dd->bGridJump && dd->ndim > 1)
3530 check_grid_jump(step,dd,ddbox);
3535 static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
3537 if (YY < npbcdim)
3539 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3541 else
3543 tcm[YY][XX] = 0;
3545 if (ZZ < npbcdim)
3547 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3548 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3550 else
3552 tcm[ZZ][XX] = 0;
3553 tcm[ZZ][YY] = 0;
3557 static void check_screw_box(matrix box)
3559 /* Mathematical limitation */
3560 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3562 gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3565 /* Limitation due to the asymmetry of the eighth shell method */
3566 if (box[ZZ][YY] != 0)
3568 gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
3572 static void distribute_cg(FILE *fplog,gmx_step_t step,
3573 matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
3574 gmx_domdec_t *dd)
3576 gmx_domdec_master_t *ma;
3577 int **tmp_ind=NULL,*tmp_nalloc=NULL;
3578 int i,icg,j,k,k0,k1,d,npbcdim;
3579 matrix tcm;
3580 rvec box_size,cg_cm;
3581 ivec ind;
3582 real nrcg,inv_ncg,pos_d;
3583 atom_id *cgindex;
3584 bool bUnbounded,bScrew;
3586 ma = dd->ma;
3588 if (tmp_ind == NULL)
3590 snew(tmp_nalloc,dd->nnodes);
3591 snew(tmp_ind,dd->nnodes);
3592 for(i=0; i<dd->nnodes; i++)
3594 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3595 snew(tmp_ind[i],tmp_nalloc[i]);
3599 /* Clear the count */
3600 for(i=0; i<dd->nnodes; i++)
3602 ma->ncg[i] = 0;
3603 ma->nat[i] = 0;
3606 make_tric_corr_matrix(dd->npbcdim,box,tcm);
3608 cgindex = cgs->index;
3610 /* Compute the center of geometry for all charge groups */
3611 for(icg=0; icg<cgs->nr; icg++)
3613 k0 = cgindex[icg];
3614 k1 = cgindex[icg+1];
3615 nrcg = k1 - k0;
3616 if (nrcg == 1)
3618 copy_rvec(pos[k0],cg_cm);
3620 else
3622 inv_ncg = 1.0/nrcg;
3624 clear_rvec(cg_cm);
3625 for(k=k0; (k<k1); k++)
3627 rvec_inc(cg_cm,pos[k]);
3629 for(d=0; (d<DIM); d++)
3631 cg_cm[d] *= inv_ncg;
3634 /* Put the charge group in the box and determine the cell index */
3635 for(d=DIM-1; d>=0; d--) {
3636 pos_d = cg_cm[d];
3637 if (d < dd->npbcdim)
3639 bScrew = (dd->bScrewPBC && d == XX);
3640 if (tric_dir[d] && dd->nc[d] > 1)
3642 /* Use triclinic coordintates for this dimension */
3643 for(j=d+1; j<DIM; j++)
3645 pos_d += cg_cm[j]*tcm[j][d];
3648 while(pos_d >= box[d][d])
3650 pos_d -= box[d][d];
3651 rvec_dec(cg_cm,box[d]);
3652 if (bScrew)
3654 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3655 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3657 for(k=k0; (k<k1); k++)
3659 rvec_dec(pos[k],box[d]);
3660 if (bScrew)
3662 pos[k][YY] = box[YY][YY] - pos[k][YY];
3663 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3667 while(pos_d < 0)
3669 pos_d += box[d][d];
3670 rvec_inc(cg_cm,box[d]);
3671 if (bScrew)
3673 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3674 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3676 for(k=k0; (k<k1); k++)
3678 rvec_inc(pos[k],box[d]);
3679 if (bScrew) {
3680 pos[k][YY] = box[YY][YY] - pos[k][YY];
3681 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3686 /* This could be done more efficiently */
3687 ind[d] = 0;
3688 while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3690 ind[d]++;
3693 i = dd_index(dd->nc,ind);
3694 if (ma->ncg[i] == tmp_nalloc[i])
3696 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3697 srenew(tmp_ind[i],tmp_nalloc[i]);
3699 tmp_ind[i][ma->ncg[i]] = icg;
3700 ma->ncg[i]++;
3701 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
3704 k1 = 0;
3705 for(i=0; i<dd->nnodes; i++)
3707 ma->index[i] = k1;
3708 for(k=0; k<ma->ncg[i]; k++)
3710 ma->cg[k1++] = tmp_ind[i][k];
3713 ma->index[dd->nnodes] = k1;
3715 for(i=0; i<dd->nnodes; i++)
3717 sfree(tmp_ind[i]);
3719 sfree(tmp_ind);
3720 sfree(tmp_nalloc);
3722 if (fplog)
3724 char buf[22];
3725 fprintf(fplog,"Charge group distribution at step %s:",
3726 gmx_step_str(step,buf));
3727 for(i=0; i<dd->nnodes; i++)
3729 fprintf(fplog," %d",ma->ncg[i]);
3731 fprintf(fplog,"\n");
3735 static void get_cg_distribution(FILE *fplog,gmx_step_t step,gmx_domdec_t *dd,
3736 t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
3737 rvec pos[])
3739 gmx_domdec_master_t *ma=NULL;
3740 ivec npulse;
3741 int i,cg_gl;
3742 int *ibuf,buf2[2] = { 0, 0 };
3744 if (DDMASTER(dd))
3746 ma = dd->ma;
3748 if (dd->bScrewPBC)
3750 check_screw_box(box);
3753 set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
3755 distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
3756 for(i=0; i<dd->nnodes; i++)
3758 ma->ibuf[2*i] = ma->ncg[i];
3759 ma->ibuf[2*i+1] = ma->nat[i];
3761 ibuf = ma->ibuf;
3763 else
3765 ibuf = NULL;
3767 dd_scatter(dd,2*sizeof(int),ibuf,buf2);
3769 dd->ncg_home = buf2[0];
3770 dd->nat_home = buf2[1];
3771 dd->ncg_tot = dd->ncg_home;
3772 dd->nat_tot = dd->nat_home;
3773 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
3775 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
3776 srenew(dd->index_gl,dd->cg_nalloc);
3777 srenew(dd->cgindex,dd->cg_nalloc+1);
3779 if (DDMASTER(dd))
3781 for(i=0; i<dd->nnodes; i++)
3783 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
3784 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
3788 dd_scatterv(dd,
3789 DDMASTER(dd) ? ma->ibuf : NULL,
3790 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
3791 DDMASTER(dd) ? ma->cg : NULL,
3792 dd->ncg_home*sizeof(int),dd->index_gl);
3794 /* Determine the home charge group sizes */
3795 dd->cgindex[0] = 0;
3796 for(i=0; i<dd->ncg_home; i++)
3798 cg_gl = dd->index_gl[i];
3799 dd->cgindex[i+1] =
3800 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
3803 if (debug)
3805 fprintf(debug,"Home charge groups:\n");
3806 for(i=0; i<dd->ncg_home; i++)
3808 fprintf(debug," %d",dd->index_gl[i]);
3809 if (i % 10 == 9)
3810 fprintf(debug,"\n");
3812 fprintf(debug,"\n");
3816 static int compact_and_copy_vec_at(int ncg,int *move,
3817 int *cgindex,
3818 int nvec,int vec,
3819 rvec *src,gmx_domdec_comm_t *comm,
3820 bool bCompact)
3822 int m,icg,i,i0,i1,nrcg;
3823 int home_pos;
3824 int pos_vec[DIM*2];
3826 home_pos = 0;
3828 for(m=0; m<DIM*2; m++)
3830 pos_vec[m] = 0;
3833 i0 = 0;
3834 for(icg=0; icg<ncg; icg++)
3836 i1 = cgindex[icg+1];
3837 m = move[icg];
3838 if (m == -1)
3840 if (bCompact)
3842 /* Compact the home array in place */
3843 for(i=i0; i<i1; i++)
3845 copy_rvec(src[i],src[home_pos++]);
3849 else
3851 /* Copy to the communication buffer */
3852 nrcg = i1 - i0;
3853 pos_vec[m] += 1 + vec*nrcg;
3854 for(i=i0; i<i1; i++)
3856 copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
3858 pos_vec[m] += (nvec - vec - 1)*nrcg;
3860 if (!bCompact)
3862 home_pos += i1 - i0;
3864 i0 = i1;
3867 return home_pos;
3870 static int compact_and_copy_vec_cg(int ncg,int *move,
3871 int *cgindex,
3872 int nvec,rvec *src,gmx_domdec_comm_t *comm,
3873 bool bCompact)
3875 int m,icg,i0,i1,nrcg;
3876 int home_pos;
3877 int pos_vec[DIM*2];
3879 home_pos = 0;
3881 for(m=0; m<DIM*2; m++)
3883 pos_vec[m] = 0;
3886 i0 = 0;
3887 for(icg=0; icg<ncg; icg++)
3889 i1 = cgindex[icg+1];
3890 m = move[icg];
3891 if (m == -1)
3893 if (bCompact)
3895 /* Compact the home array in place */
3896 copy_rvec(src[icg],src[home_pos++]);
3899 else
3901 nrcg = i1 - i0;
3902 /* Copy to the communication buffer */
3903 copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
3904 pos_vec[m] += 1 + nrcg*nvec;
3906 i0 = i1;
3908 if (!bCompact)
3910 home_pos = ncg;
3913 return home_pos;
3916 static int compact_ind(int ncg,int *move,
3917 int *index_gl,int *cgindex,
3918 int *gatindex,
3919 gmx_ga2la_t ga2la,char *bLocalCG,
3920 int *cginfo)
3922 int cg,nat,a0,a1,a,a_gl;
3923 int home_pos;
3925 home_pos = 0;
3926 nat = 0;
3927 for(cg=0; cg<ncg; cg++)
3929 a0 = cgindex[cg];
3930 a1 = cgindex[cg+1];
3931 if (move[cg] == -1)
3933 /* Compact the home arrays in place.
3934 * Anything that can be done here avoids access to global arrays.
3936 cgindex[home_pos] = nat;
3937 for(a=a0; a<a1; a++)
3939 a_gl = gatindex[a];
3940 gatindex[nat] = a_gl;
3941 /* The cell number stays 0, so we don't need to set it */
3942 ga2la_change_la(ga2la,a_gl,nat);
3943 nat++;
3945 index_gl[home_pos] = index_gl[cg];
3946 cginfo[home_pos] = cginfo[cg];
3947 /* The charge group remains local, so bLocalCG does not change */
3948 home_pos++;
3950 else
3952 /* Clear the global indices */
3953 for(a=a0; a<a1; a++)
3955 ga2la_del(ga2la,gatindex[a]);
3957 if (bLocalCG)
3959 bLocalCG[index_gl[cg]] = FALSE;
3963 cgindex[home_pos] = nat;
3965 return home_pos;
3968 static void clear_and_mark_ind(int ncg,int *move,
3969 int *index_gl,int *cgindex,int *gatindex,
3970 gmx_ga2la_t ga2la,char *bLocalCG,
3971 int *cell_index)
3973 int cg,a0,a1,a;
3975 for(cg=0; cg<ncg; cg++)
3977 if (move[cg] >= 0)
3979 a0 = cgindex[cg];
3980 a1 = cgindex[cg+1];
3981 /* Clear the global indices */
3982 for(a=a0; a<a1; a++)
3984 ga2la_del(ga2la,gatindex[a]);
3986 if (bLocalCG)
3988 bLocalCG[index_gl[cg]] = FALSE;
3990 /* Signal that this cg has moved using the ns cell index.
3991 * Here we set it to -1.
3992 * fill_grid will change it from -1 to 4*grid->ncells.
3994 cell_index[cg] = -1;
3999 static void print_cg_move(FILE *fplog,
4000 gmx_domdec_t *dd,
4001 gmx_step_t step,int cg,int dim,int dir,
4002 real limitd,
4003 rvec cm_old,rvec cm_new,real pos_d)
4005 gmx_domdec_comm_t *comm;
4006 char buf[22];
4008 comm = dd->comm;
4010 fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
4011 fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4012 ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
4013 fprintf(fplog,"distance out of cell %f\n",
4014 dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4015 fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
4016 cm_old[XX],cm_old[YY],cm_old[ZZ]);
4017 fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
4018 cm_new[XX],cm_new[YY],cm_new[ZZ]);
4019 fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4020 dim2char(dim),
4021 comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
4022 fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4023 dim2char(dim),
4024 comm->cell_x0[dim],comm->cell_x1[dim]);
4027 static void cg_move_error(FILE *fplog,
4028 gmx_domdec_t *dd,
4029 gmx_step_t step,int cg,int dim,int dir,
4030 real limitd,
4031 rvec cm_old,rvec cm_new,real pos_d)
4033 if (fplog)
4035 print_cg_move(fplog, dd,step,cg,dim,dir,limitd,cm_old,cm_new,pos_d);
4037 print_cg_move(stderr,dd,step,cg,dim,dir,limitd,cm_old,cm_new,pos_d);
4038 gmx_fatal(FARGS,
4039 "A charge group moved too far between two domain decomposition steps\n"
4040 "This usually means that your system is not well equilibrated");
4043 static void rotate_state_atom(t_state *state,int a)
4045 int est;
4047 for(est=estX; est<estNR; est++)
4049 if (state->flags & (1<<est)) {
4050 switch (est) {
4051 case estX:
4052 /* Rotate the complete state; for a rectangular box only */
4053 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4054 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4055 break;
4056 case estV:
4057 state->v[a][YY] = -state->v[a][YY];
4058 state->v[a][ZZ] = -state->v[a][ZZ];
4059 break;
4060 case estSDX:
4061 state->sd_X[a][YY] = -state->sd_X[a][YY];
4062 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4063 break;
4064 case estCGP:
4065 state->cg_p[a][YY] = -state->cg_p[a][YY];
4066 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4067 break;
4068 case estDISRE_INITF:
4069 case estDISRE_RM3TAV:
4070 case estORIRE_INITF:
4071 case estORIRE_DTAV:
4072 /* These are distances, so not affected by rotation */
4073 break;
4074 default:
4075 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4081 static int dd_redistribute_cg(FILE *fplog,gmx_step_t step,
4082 gmx_domdec_t *dd,ivec tric_dir,
4083 t_state *state,rvec **f,
4084 t_forcerec *fr,t_mdatoms *md,
4085 bool bCompact,
4086 t_nrnb *nrnb)
4088 int *move;
4089 int npbcdim;
4090 int ncg[DIM*2],nat[DIM*2];
4091 int c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
4092 int mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
4093 int sbuf[2],rbuf[2];
4094 int home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
4095 int flag;
4096 bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
4097 bool bScrew;
4098 ivec dev;
4099 real inv_ncg,pos_d;
4100 matrix tcm;
4101 rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
4102 atom_id *cgindex;
4103 cginfo_mb_t *cginfo_mb;
4104 gmx_domdec_comm_t *comm;
4106 if (dd->bScrewPBC)
4108 check_screw_box(state->box);
4111 comm = dd->comm;
4112 cg_cm = fr->cg_cm;
4114 for(i=estX; i<estNR; i++)
4116 switch (i)
4118 case estX: /* Always present */ break;
4119 case estV: bV = (state->flags & (1<<i)); break;
4120 case estSDX: bSDX = (state->flags & (1<<i)); break;
4121 case estCGP: bCGP = (state->flags & (1<<i)); break;
4122 case estLD_RNG:
4123 case estLD_RNGI:
4124 case estDISRE_INITF:
4125 case estDISRE_RM3TAV:
4126 case estORIRE_INITF:
4127 case estORIRE_DTAV:
4128 /* No processing required */
4129 break;
4130 default:
4131 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4135 if (dd->ncg_tot > comm->nalloc_int)
4137 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4138 srenew(comm->buf_int,comm->nalloc_int);
4140 move = comm->buf_int;
4142 /* Clear the count */
4143 for(c=0; c<dd->ndim*2; c++)
4145 ncg[c] = 0;
4146 nat[c] = 0;
4149 npbcdim = dd->npbcdim;
4151 for(d=0; (d<DIM); d++)
4153 limitd[d] = dd->comm->cellsize_min[d];
4154 if (d >= npbcdim && dd->ci[d] == 0)
4156 cell_x0[d] = -GMX_FLOAT_MAX;
4158 else
4160 cell_x0[d] = comm->cell_x0[d];
4162 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4164 cell_x1[d] = GMX_FLOAT_MAX;
4166 else
4168 cell_x1[d] = comm->cell_x1[d];
4170 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4171 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4174 make_tric_corr_matrix(npbcdim,state->box,tcm);
4176 cgindex = dd->cgindex;
4178 /* Compute the center of geometry for all home charge groups
4179 * and put them in the box and determine where they should go.
4181 for(cg=0; cg<dd->ncg_home; cg++)
4183 k0 = cgindex[cg];
4184 k1 = cgindex[cg+1];
4185 nrcg = k1 - k0;
4186 if (nrcg == 1)
4188 copy_rvec(state->x[k0],cm_new);
4190 else
4192 inv_ncg = 1.0/nrcg;
4194 clear_rvec(cm_new);
4195 for(k=k0; (k<k1); k++)
4197 rvec_inc(cm_new,state->x[k]);
4199 for(d=0; (d<DIM); d++)
4201 cm_new[d] = inv_ncg*cm_new[d];
4205 clear_ivec(dev);
4206 /* Do pbc and check DD cell boundary crossings */
4207 for(d=DIM-1; d>=0; d--)
4209 if (dd->nc[d] > 1)
4211 bScrew = (dd->bScrewPBC && d == XX);
4212 /* Determine the location of this cg in lattice coordinates */
4213 pos_d = cm_new[d];
4214 if (tric_dir[d])
4216 for(d2=d+1; d2<DIM; d2++)
4218 pos_d += cm_new[d2]*tcm[d2][d];
4221 /* Put the charge group in the triclinic unit-cell */
4222 if (pos_d >= cell_x1[d])
4224 if (pos_d >= limit1[d])
4226 cg_move_error(fplog,dd,step,cg,d,1,limitd[d],
4227 cg_cm[cg],cm_new,pos_d);
4229 dev[d] = 1;
4230 if (dd->ci[d] == dd->nc[d] - 1)
4232 rvec_dec(cm_new,state->box[d]);
4233 if (bScrew)
4235 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4236 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4238 for(k=k0; (k<k1); k++)
4240 rvec_dec(state->x[k],state->box[d]);
4241 if (bScrew)
4243 rotate_state_atom(state,k);
4248 else if (pos_d < cell_x0[d])
4250 if (pos_d < limit0[d])
4252 cg_move_error(fplog,dd,step,cg,d,-1,limitd[d],
4253 cg_cm[cg],cm_new,pos_d);
4255 dev[d] = -1;
4256 if (dd->ci[d] == 0)
4258 rvec_inc(cm_new,state->box[d]);
4259 if (bScrew)
4261 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4262 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4264 for(k=k0; (k<k1); k++)
4266 rvec_inc(state->x[k],state->box[d]);
4267 if (bScrew)
4269 rotate_state_atom(state,k);
4275 else if (d < npbcdim)
4277 /* Put the charge group in the rectangular unit-cell */
4278 while (cm_new[d] >= state->box[d][d])
4280 rvec_dec(cm_new,state->box[d]);
4281 for(k=k0; (k<k1); k++)
4283 rvec_dec(state->x[k],state->box[d]);
4286 while (cm_new[d] < 0)
4288 rvec_inc(cm_new,state->box[d]);
4289 for(k=k0; (k<k1); k++)
4291 rvec_inc(state->x[k],state->box[d]);
4297 copy_rvec(cm_new,cg_cm[cg]);
4299 /* Determine where this cg should go */
4300 flag = 0;
4301 mc = -1;
4302 for(d=0; d<dd->ndim; d++)
4304 dim = dd->dim[d];
4305 if (dev[dim] == 1)
4307 flag |= DD_FLAG_FW(d);
4308 if (mc == -1)
4310 mc = d*2;
4313 else if (dev[dim] == -1)
4315 flag |= DD_FLAG_BW(d);
4316 if (mc == -1) {
4317 if (dd->nc[dim] > 2)
4319 mc = d*2 + 1;
4321 else
4323 mc = d*2;
4328 move[cg] = mc;
4329 if (mc >= 0)
4331 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4333 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4334 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4336 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4337 /* We store the cg size in the lower 16 bits
4338 * and the place where the charge group should go
4339 * in the next 6 bits. This saves some communication volume.
4341 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4342 ncg[mc] += 1;
4343 nat[mc] += nrcg;
4347 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
4348 inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
4350 nvec = 1;
4351 if (bV)
4353 nvec++;
4355 if (bSDX)
4357 nvec++;
4359 if (bCGP)
4361 nvec++;
4364 /* Make sure the communication buffers are large enough */
4365 for(mc=0; mc<dd->ndim*2; mc++)
4367 nvr = ncg[mc] + nat[mc]*nvec;
4368 if (nvr > comm->cgcm_state_nalloc[mc])
4370 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4371 srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
4375 /* Recalculating cg_cm might be cheaper than communicating,
4376 * but that could give rise to rounding issues.
4378 home_pos_cg =
4379 compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
4380 nvec,cg_cm,comm,bCompact);
4382 vec = 0;
4383 home_pos_at =
4384 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4385 nvec,vec++,state->x,comm,bCompact);
4386 if (bV)
4388 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4389 nvec,vec++,state->v,comm,bCompact);
4391 if (bSDX)
4393 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4394 nvec,vec++,state->sd_X,comm,bCompact);
4396 if (bCGP)
4398 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4399 nvec,vec++,state->cg_p,comm,bCompact);
4402 if (bCompact)
4404 compact_ind(dd->ncg_home,move,
4405 dd->index_gl,dd->cgindex,dd->gatindex,
4406 dd->ga2la,comm->bLocalCG,
4407 fr->cginfo);
4409 else
4411 clear_and_mark_ind(dd->ncg_home,move,
4412 dd->index_gl,dd->cgindex,dd->gatindex,
4413 dd->ga2la,comm->bLocalCG,
4414 fr->ns.grid->cell_index);
4417 cginfo_mb = fr->cginfo_mb;
4419 ncg_stay_home = home_pos_cg;
4420 for(d=0; d<dd->ndim; d++)
4422 dim = dd->dim[d];
4423 ncg_recv = 0;
4424 nat_recv = 0;
4425 nvr = 0;
4426 for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
4428 cdd = d*2 + dir;
4429 /* Communicate the cg and atom counts */
4430 sbuf[0] = ncg[cdd];
4431 sbuf[1] = nat[cdd];
4432 if (debug)
4434 fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
4435 d,dir,sbuf[0],sbuf[1]);
4437 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4439 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4441 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4442 srenew(comm->buf_int,comm->nalloc_int);
4445 /* Communicate the charge group indices, sizes and flags */
4446 dd_sendrecv_int(dd, d, dir,
4447 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4448 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4450 nvs = ncg[cdd] + nat[cdd]*nvec;
4451 i = rbuf[0] + rbuf[1] *nvec;
4452 check_vec_rvec_alloc(&comm->vbuf,nvr+i);
4454 /* Communicate cgcm and state */
4455 dd_sendrecv_rvec(dd, d, dir,
4456 comm->cgcm_state[cdd], nvs,
4457 comm->vbuf.v+nvr, i);
4458 ncg_recv += rbuf[0];
4459 nat_recv += rbuf[1];
4460 nvr += i;
4463 /* Process the received charge groups */
4464 buf_pos = 0;
4465 for(cg=0; cg<ncg_recv; cg++)
4467 flag = comm->buf_int[cg*DD_CGIBS+1];
4468 mc = -1;
4469 if (d < dd->ndim-1)
4471 /* Check which direction this cg should go */
4472 for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
4474 if (dd->bGridJump)
4476 /* The cell boundaries for dimension d2 are not equal
4477 * for each cell row of the lower dimension(s),
4478 * therefore we might need to redetermine where
4479 * this cg should go.
4481 dim2 = dd->dim[d2];
4482 /* If this cg crosses the box boundary in dimension d2
4483 * we can use the communicated flag, so we do not
4484 * have to worry about pbc.
4486 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4487 (flag & DD_FLAG_FW(d2))) ||
4488 (dd->ci[dim2] == 0 &&
4489 (flag & DD_FLAG_BW(d2)))))
4491 /* Clear the two flags for this dimension */
4492 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4493 /* Determine the location of this cg
4494 * in lattice coordinates
4496 pos_d = comm->vbuf.v[buf_pos][dim2];
4497 if (tric_dir[dim2])
4499 for(d3=dim2+1; d3<DIM; d3++)
4501 pos_d +=
4502 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4505 /* Check of we are not at the box edge.
4506 * pbc is only handled in the first step above,
4507 * but this check could move over pbc while
4508 * the first step did not due to different rounding.
4510 if (pos_d >= cell_x1[dim2] &&
4511 dd->ci[dim2] != dd->nc[dim2]-1)
4513 flag |= DD_FLAG_FW(d2);
4515 else if (pos_d < cell_x0[dim2] &&
4516 dd->ci[dim2] != 0)
4518 flag |= DD_FLAG_BW(d2);
4520 comm->buf_int[cg*DD_CGIBS+1] = flag;
4523 /* Set to which neighboring cell this cg should go */
4524 if (flag & DD_FLAG_FW(d2))
4526 mc = d2*2;
4528 else if (flag & DD_FLAG_BW(d2))
4530 if (dd->nc[dd->dim[d2]] > 2)
4532 mc = d2*2+1;
4534 else
4536 mc = d2*2;
4542 nrcg = flag & DD_FLAG_NRCG;
4543 if (mc == -1)
4545 if (home_pos_cg+1 > dd->cg_nalloc)
4547 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
4548 srenew(dd->index_gl,dd->cg_nalloc);
4549 srenew(dd->cgindex,dd->cg_nalloc+1);
4551 /* Set the global charge group index and size */
4552 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
4553 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
4554 /* Copy the state from the buffer */
4555 if (home_pos_cg >= fr->cg_nalloc)
4557 dd_realloc_fr_cg(fr,home_pos_cg+1);
4558 cg_cm = fr->cg_cm;
4560 copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
4561 /* Set the cginfo */
4562 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
4563 dd->index_gl[home_pos_cg]);
4564 if (comm->bLocalCG)
4566 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
4569 if (home_pos_at+nrcg > state->nalloc)
4571 dd_realloc_state(state,f,home_pos_at+nrcg);
4573 for(i=0; i<nrcg; i++)
4575 copy_rvec(comm->vbuf.v[buf_pos++],
4576 state->x[home_pos_at+i]);
4578 if (bV)
4580 for(i=0; i<nrcg; i++)
4582 copy_rvec(comm->vbuf.v[buf_pos++],
4583 state->v[home_pos_at+i]);
4586 if (bSDX)
4588 for(i=0; i<nrcg; i++)
4590 copy_rvec(comm->vbuf.v[buf_pos++],
4591 state->sd_X[home_pos_at+i]);
4594 if (bCGP)
4596 for(i=0; i<nrcg; i++)
4598 copy_rvec(comm->vbuf.v[buf_pos++],
4599 state->cg_p[home_pos_at+i]);
4602 home_pos_cg += 1;
4603 home_pos_at += nrcg;
4605 else
4607 /* Reallocate the buffers if necessary */
4608 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4610 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4611 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4613 nvr = ncg[mc] + nat[mc]*nvec;
4614 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
4616 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
4617 srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
4619 /* Copy from the receive to the send buffers */
4620 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
4621 comm->buf_int + cg*DD_CGIBS,
4622 DD_CGIBS*sizeof(int));
4623 memcpy(comm->cgcm_state[mc][nvr],
4624 comm->vbuf.v[buf_pos],
4625 (1+nrcg*nvec)*sizeof(rvec));
4626 buf_pos += 1 + nrcg*nvec;
4627 ncg[mc] += 1;
4628 nat[mc] += nrcg;
4633 /* With sorting (!bCompact) the indices are now only partially up to date
4634 * and ncg_home and nat_home are not the real count, since there are
4635 * "holes" in the arrays for the charge groups that moved to neighbors.
4637 dd->ncg_home = home_pos_cg;
4638 dd->nat_home = home_pos_at;
4640 if (debug)
4642 fprintf(debug,"Finished repartitioning\n");
4645 return ncg_stay_home;
4648 void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
4650 dd->comm->cycl[ddCycl] += cycles;
4651 dd->comm->cycl_n[ddCycl]++;
4652 if (cycles > dd->comm->cycl_max[ddCycl])
4654 dd->comm->cycl_max[ddCycl] = cycles;
4658 static double force_flop_count(t_nrnb *nrnb)
4660 int i;
4661 double sum;
4662 const char *name;
4664 sum = 0;
4665 for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
4667 /* To get closer to the real timings, we half the count
4668 * for the normal loops and again half it for water loops.
4670 name = nrnb_str(i);
4671 if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
4673 sum += nrnb->n[i]*0.25*cost_nrnb(i);
4675 else
4677 sum += nrnb->n[i]*0.50*cost_nrnb(i);
4680 for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
4682 name = nrnb_str(i);
4683 if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
4684 sum += nrnb->n[i]*cost_nrnb(i);
4686 for(i=eNR_BONDS; i<=eNR_WALLS; i++)
4688 sum += nrnb->n[i]*cost_nrnb(i);
4691 return sum;
4694 void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
4696 if (dd->comm->eFlop)
4698 dd->comm->flop -= force_flop_count(nrnb);
4701 void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
4703 if (dd->comm->eFlop)
4705 dd->comm->flop += force_flop_count(nrnb);
4706 dd->comm->flop_n++;
4710 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
4712 int i;
4714 for(i=0; i<ddCyclNr; i++)
4716 dd->comm->cycl[i] = 0;
4717 dd->comm->cycl_n[i] = 0;
4718 dd->comm->cycl_max[i] = 0;
4720 dd->comm->flop = 0;
4721 dd->comm->flop_n = 0;
4724 static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
4726 gmx_domdec_comm_t *comm;
4727 gmx_domdec_load_t *load;
4728 gmx_domdec_root_t *root=NULL;
4729 int d,dim,cid,i,pos;
4730 float cell_frac=0,sbuf[DD_NLOAD_MAX];
4731 bool bSepPME;
4733 if (debug)
4735 fprintf(debug,"get_load_distribution start\n");
4738 wallcycle_start(wcycle,ewcDDCOMMLOAD);
4740 comm = dd->comm;
4742 bSepPME = (dd->pme_nodeid >= 0);
4744 for(d=dd->ndim-1; d>=0; d--)
4746 dim = dd->dim[d];
4747 /* Check if we participate in the communication in this dimension */
4748 if (d == dd->ndim-1 ||
4749 (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
4751 load = &comm->load[d];
4752 if (dd->bGridJump)
4754 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
4756 pos = 0;
4757 if (d == dd->ndim-1)
4759 sbuf[pos++] = dd_force_load(comm);
4760 sbuf[pos++] = sbuf[0];
4761 if (dd->bGridJump)
4763 sbuf[pos++] = sbuf[0];
4764 sbuf[pos++] = cell_frac;
4765 if (d > 0)
4767 sbuf[pos++] = comm->cell_f_max0[d];
4768 sbuf[pos++] = comm->cell_f_min1[d];
4771 if (bSepPME)
4773 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
4774 sbuf[pos++] = comm->cycl[ddCyclPME];
4777 else
4779 sbuf[pos++] = comm->load[d+1].sum;
4780 sbuf[pos++] = comm->load[d+1].max;
4781 if (dd->bGridJump)
4783 sbuf[pos++] = comm->load[d+1].sum_m;
4784 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
4785 sbuf[pos++] = comm->load[d+1].flags;
4786 if (d > 0)
4788 sbuf[pos++] = comm->cell_f_max0[d];
4789 sbuf[pos++] = comm->cell_f_min1[d];
4792 if (bSepPME)
4794 sbuf[pos++] = comm->load[d+1].mdf;
4795 sbuf[pos++] = comm->load[d+1].pme;
4798 load->nload = pos;
4799 /* Communicate a row in DD direction d.
4800 * The communicators are setup such that the root always has rank 0.
4802 #ifdef GMX_MPI
4803 MPI_Gather(sbuf ,load->nload*sizeof(float),MPI_BYTE,
4804 load->load,load->nload*sizeof(float),MPI_BYTE,
4805 0,comm->mpi_comm_load[d]);
4806 #endif
4807 if (dd->ci[dim] == dd->master_ci[dim])
4809 /* We are the root, process this row */
4810 if (comm->bDynLoadBal)
4812 root = comm->root[d];
4814 load->sum = 0;
4815 load->max = 0;
4816 load->sum_m = 0;
4817 load->cvol_min = 1;
4818 load->flags = 0;
4819 load->mdf = 0;
4820 load->pme = 0;
4821 pos = 0;
4822 for(i=0; i<dd->nc[dim]; i++)
4824 load->sum += load->load[pos++];
4825 load->max = max(load->max,load->load[pos]);
4826 pos++;
4827 if (dd->bGridJump)
4829 if (root->bLimited)
4831 /* This direction could not be load balanced properly,
4832 * therefore we need to use the maximum iso the average load.
4834 load->sum_m = max(load->sum_m,load->load[pos]);
4836 else
4838 load->sum_m += load->load[pos];
4840 pos++;
4841 load->cvol_min = min(load->cvol_min,load->load[pos]);
4842 pos++;
4843 if (d < dd->ndim-1)
4845 load->flags = (int)(load->load[pos++] + 0.5);
4847 if (d > 0)
4849 root->cell_f_max0[i] = load->load[pos++];
4850 root->cell_f_min1[i] = load->load[pos++];
4853 if (bSepPME)
4855 load->mdf = max(load->mdf,load->load[pos]);
4856 pos++;
4857 load->pme = max(load->pme,load->load[pos]);
4858 pos++;
4861 if (comm->bDynLoadBal && root->bLimited)
4863 load->sum_m *= dd->nc[dim];
4864 load->flags |= (1<<d);
4870 if (DDMASTER(dd))
4872 comm->nload += dd_load_count(comm);
4873 comm->load_step += comm->cycl[ddCyclStep];
4874 comm->load_sum += comm->load[0].sum;
4875 comm->load_max += comm->load[0].max;
4876 if (comm->bDynLoadBal)
4878 for(d=0; d<dd->ndim; d++)
4880 if (comm->load[0].flags & (1<<d))
4882 comm->load_lim[d]++;
4886 if (bSepPME)
4888 comm->load_mdf += comm->load[0].mdf;
4889 comm->load_pme += comm->load[0].pme;
4893 wallcycle_stop(wcycle,ewcDDCOMMLOAD);
4895 if (debug)
4897 fprintf(debug,"get_load_distribution finished\n");
4901 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
4903 /* Return the relative performance loss on the total run time
4904 * due to the force calculation load imbalance.
4906 if (dd->comm->nload > 0)
4908 return
4909 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
4910 (dd->comm->load_step*dd->nnodes);
4912 else
4914 return 0;
4918 static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
4920 char buf[STRLEN];
4921 int npp,npme,nnodes,d,limp;
4922 float imbal,pme_f_ratio,lossf,lossp=0;
4923 bool bLim;
4924 gmx_domdec_comm_t *comm;
4926 comm = dd->comm;
4927 if (DDMASTER(dd) && comm->nload > 0)
4929 npp = dd->nnodes;
4930 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
4931 nnodes = npp + npme;
4932 imbal = comm->load_max*npp/comm->load_sum - 1;
4933 lossf = dd_force_imb_perf_loss(dd);
4934 sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
4935 fprintf(fplog,"%s",buf);
4936 fprintf(stderr,"\n");
4937 fprintf(stderr,"%s",buf);
4938 sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
4939 fprintf(fplog,"%s",buf);
4940 fprintf(stderr,"%s",buf);
4941 bLim = FALSE;
4942 if (comm->bDynLoadBal)
4944 sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
4945 for(d=0; d<dd->ndim; d++)
4947 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
4948 sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
4949 if (limp >= 50)
4951 bLim = TRUE;
4954 sprintf(buf+strlen(buf),"\n");
4955 fprintf(fplog,"%s",buf);
4956 fprintf(stderr,"%s",buf);
4958 if (npme > 0)
4960 pme_f_ratio = comm->load_pme/comm->load_mdf;
4961 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
4962 if (lossp <= 0)
4964 lossp *= (float)npme/(float)nnodes;
4966 else
4968 lossp *= (float)npp/(float)nnodes;
4970 sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
4971 fprintf(fplog,"%s",buf);
4972 fprintf(stderr,"%s",buf);
4973 sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
4974 fprintf(fplog,"%s",buf);
4975 fprintf(stderr,"%s",buf);
4977 fprintf(fplog,"\n");
4978 fprintf(stderr,"\n");
4980 if (lossf >= DD_PERF_LOSS)
4982 sprintf(buf,
4983 "NOTE: %.1f %% performance was lost due to load imbalance\n"
4984 " in the domain decomposition.\n",lossf*100);
4985 if (!comm->bDynLoadBal)
4987 sprintf(buf+strlen(buf)," You might want to use dynamic load balancing (option -dlb.)\n");
4989 else if (bLim)
4991 sprintf(buf+strlen(buf)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
4993 fprintf(fplog,"%s\n",buf);
4994 fprintf(stderr,"%s\n",buf);
4996 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
4998 sprintf(buf,
4999 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5000 " had %s work to do than the PP nodes.\n"
5001 " You might want to %s the number of PME nodes\n"
5002 " or %s the cut-off and the grid spacing.\n",
5003 fabs(lossp*100),
5004 (lossp < 0) ? "less" : "more",
5005 (lossp < 0) ? "decrease" : "increase",
5006 (lossp < 0) ? "decrease" : "increase");
5007 fprintf(fplog,"%s\n",buf);
5008 fprintf(stderr,"%s\n",buf);
5013 static float dd_vol_min(gmx_domdec_t *dd)
5015 return dd->comm->load[0].cvol_min*dd->nnodes;
5018 static bool dd_load_flags(gmx_domdec_t *dd)
5020 return dd->comm->load[0].flags;
5023 static float dd_f_imbal(gmx_domdec_t *dd)
5025 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5028 static float dd_pme_f_ratio(gmx_domdec_t *dd)
5030 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5033 static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_step_t step)
5035 int flags,d;
5036 char buf[22];
5038 flags = dd_load_flags(dd);
5039 if (flags)
5041 fprintf(fplog,
5042 "DD load balancing is limited by minimum cell size in dimension");
5043 for(d=0; d<dd->ndim; d++)
5045 if (flags & (1<<d))
5047 fprintf(fplog," %c",dim2char(dd->dim[d]));
5050 fprintf(fplog,"\n");
5052 fprintf(fplog,"DD step %s",gmx_step_str(step,buf));
5053 if (dd->comm->bDynLoadBal)
5055 fprintf(fplog," vol min/aver %5.3f%c",
5056 dd_vol_min(dd),flags ? '!' : ' ');
5058 fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
5059 if (dd->comm->cycl_n[ddCyclPME])
5061 fprintf(fplog," pme mesh/force %5.3f",dd_pme_f_ratio(dd));
5063 fprintf(fplog,"\n\n");
5066 static void dd_print_load_verbose(gmx_domdec_t *dd)
5068 if (dd->comm->bDynLoadBal)
5070 fprintf(stderr,"vol %4.2f%c ",
5071 dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
5073 fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
5074 if (dd->comm->cycl_n[ddCyclPME])
5076 fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
5080 #ifdef GMX_MPI
5081 static void make_load_communicator(gmx_domdec_t *dd,MPI_Group g_all,
5082 int dim_ind,ivec loc)
5084 MPI_Group g_row;
5085 MPI_Comm c_row;
5086 int dim,i,*rank;
5087 ivec loc_c;
5088 gmx_domdec_root_t *root;
5090 dim = dd->dim[dim_ind];
5091 copy_ivec(loc,loc_c);
5092 snew(rank,dd->nc[dim]);
5093 for(i=0; i<dd->nc[dim]; i++)
5095 loc_c[dim] = i;
5096 rank[i] = dd_index(dd->nc,loc_c);
5098 /* Here we create a new group, that does not necessarily
5099 * include our process. But MPI_Comm_create needs to be
5100 * called by all the processes in the original communicator.
5101 * Calling MPI_Group_free afterwards gives errors, so I assume
5102 * also the group is needed by all processes. (B. Hess)
5104 MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
5105 MPI_Comm_create(dd->mpi_comm_all,g_row,&c_row);
5106 if (c_row != MPI_COMM_NULL)
5108 /* This process is part of the group */
5109 dd->comm->mpi_comm_load[dim_ind] = c_row;
5110 if (dd->comm->eDLB != edlbNO)
5112 if (dd->ci[dim] == dd->master_ci[dim])
5114 /* This is the root process of this row */
5115 snew(dd->comm->root[dim_ind],1);
5116 root = dd->comm->root[dim_ind];
5117 snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
5118 snew(root->old_cell_f,dd->nc[dim]+1);
5119 snew(root->bCellMin,dd->nc[dim]);
5120 if (dim_ind > 0)
5122 snew(root->cell_f_max0,dd->nc[dim]);
5123 snew(root->cell_f_min1,dd->nc[dim]);
5124 snew(root->bound_min,dd->nc[dim]);
5125 snew(root->bound_max,dd->nc[dim]);
5127 snew(root->buf_ncd,dd->nc[dim]);
5129 else
5131 /* This is not a root process, we only need to receive cell_f */
5132 snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
5135 if (dd->ci[dim] == dd->master_ci[dim])
5137 snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
5140 sfree(rank);
5142 #endif
5144 static void make_load_communicators(gmx_domdec_t *dd)
5146 #ifdef GMX_MPI
5147 MPI_Group g_all;
5148 int dim0,dim1,i,j;
5149 ivec loc;
5151 if (debug)
5152 fprintf(debug,"Making load communicators\n");
5154 MPI_Comm_group(dd->mpi_comm_all,&g_all);
5156 snew(dd->comm->load,dd->ndim);
5157 snew(dd->comm->mpi_comm_load,dd->ndim);
5159 clear_ivec(loc);
5160 make_load_communicator(dd,g_all,0,loc);
5161 if (dd->ndim > 1) {
5162 dim0 = dd->dim[0];
5163 for(i=0; i<dd->nc[dim0]; i++) {
5164 loc[dim0] = i;
5165 make_load_communicator(dd,g_all,1,loc);
5168 if (dd->ndim > 2) {
5169 dim0 = dd->dim[0];
5170 for(i=0; i<dd->nc[dim0]; i++) {
5171 loc[dim0] = i;
5172 dim1 = dd->dim[1];
5173 for(j=0; j<dd->nc[dim1]; j++) {
5174 loc[dim1] = j;
5175 make_load_communicator(dd,g_all,2,loc);
5180 MPI_Group_free(&g_all);
5182 if (debug)
5183 fprintf(debug,"Finished making load communicators\n");
5184 #endif
5187 void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
5189 bool bZYX;
5190 int d,dim,i,j,m;
5191 ivec tmp,s;
5192 int nzone,nzonep;
5193 ivec dd_zp[DD_MAXIZONE];
5194 gmx_domdec_zones_t *zones;
5195 gmx_domdec_ns_ranges_t *izone;
5197 for(d=0; d<dd->ndim; d++)
5199 dim = dd->dim[d];
5200 copy_ivec(dd->ci,tmp);
5201 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5202 dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
5203 copy_ivec(dd->ci,tmp);
5204 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5205 dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
5206 if (debug)
5208 fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5209 dd->rank,dim,
5210 dd->neighbor[d][0],
5211 dd->neighbor[d][1]);
5215 if (DDMASTER(dd))
5217 fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
5218 dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
5220 if (fplog)
5222 fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5223 dd->ndim,
5224 dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
5225 dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5227 switch (dd->ndim)
5229 case 3:
5230 nzone = dd_z3n;
5231 nzonep = dd_zp3n;
5232 for(i=0; i<nzonep; i++)
5234 copy_ivec(dd_zp3[i],dd_zp[i]);
5236 break;
5237 case 2:
5238 nzone = dd_z2n;
5239 nzonep = dd_zp2n;
5240 for(i=0; i<nzonep; i++)
5242 copy_ivec(dd_zp2[i],dd_zp[i]);
5244 break;
5245 case 1:
5246 nzone = dd_z1n;
5247 nzonep = dd_zp1n;
5248 for(i=0; i<nzonep; i++)
5250 copy_ivec(dd_zp1[i],dd_zp[i]);
5252 break;
5253 default:
5254 gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
5255 nzone = 0;
5256 nzonep = 0;
5259 zones = &dd->comm->zones;
5261 for(i=0; i<nzone; i++)
5263 m = 0;
5264 clear_ivec(zones->shift[i]);
5265 for(d=0; d<dd->ndim; d++)
5267 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5271 zones->n = nzone;
5272 for(i=0; i<nzone; i++)
5274 for(d=0; d<DIM; d++)
5276 s[d] = dd->ci[d] - zones->shift[i][d];
5277 if (s[d] < 0)
5279 s[d] += dd->nc[d];
5281 else if (s[d] >= dd->nc[d])
5283 s[d] -= dd->nc[d];
5287 zones->nizone = nzonep;
5288 for(i=0; i<zones->nizone; i++)
5290 if (dd_zp[i][0] != i)
5292 gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
5294 izone = &zones->izone[i];
5295 izone->j0 = dd_zp[i][1];
5296 izone->j1 = dd_zp[i][2];
5297 for(dim=0; dim<DIM; dim++)
5299 if (dd->nc[dim] == 1)
5301 /* All shifts should be allowed */
5302 izone->shift0[dim] = -1;
5303 izone->shift1[dim] = 1;
5305 else
5308 izone->shift0[d] = 0;
5309 izone->shift1[d] = 0;
5310 for(j=izone->j0; j<izone->j1; j++) {
5311 if (dd->shift[j][d] > dd->shift[i][d])
5312 izone->shift0[d] = -1;
5313 if (dd->shift[j][d] < dd->shift[i][d])
5314 izone->shift1[d] = 1;
5318 int shift_diff;
5320 /* Assume the shift are not more than 1 cell */
5321 izone->shift0[dim] = 1;
5322 izone->shift1[dim] = -1;
5323 for(j=izone->j0; j<izone->j1; j++)
5325 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5326 if (shift_diff < izone->shift0[dim])
5328 izone->shift0[dim] = shift_diff;
5330 if (shift_diff > izone->shift1[dim])
5332 izone->shift1[dim] = shift_diff;
5339 if (dd->comm->eDLB != edlbNO)
5341 snew(dd->comm->root,dd->ndim);
5344 if (dd->comm->bRecordLoad)
5346 make_load_communicators(dd);
5350 static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
5352 gmx_domdec_t *dd;
5353 gmx_domdec_comm_t *comm;
5354 int i,rank,*buf;
5355 ivec periods;
5356 #ifdef GMX_MPI
5357 MPI_Comm comm_cart;
5358 #endif
5360 dd = cr->dd;
5361 comm = dd->comm;
5363 #ifdef GMX_MPI
5364 if (comm->bCartesianPP)
5366 /* Set up cartesian communication for the particle-particle part */
5367 if (fplog)
5369 fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
5370 dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
5373 for(i=0; i<DIM; i++)
5375 periods[i] = TRUE;
5377 MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
5378 &comm_cart);
5379 /* We overwrite the old communicator with the new cartesian one */
5380 cr->mpi_comm_mygroup = comm_cart;
5383 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5384 MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
5386 if (comm->bCartesianPP_PME)
5388 /* Since we want to use the original cartesian setup for sim,
5389 * and not the one after split, we need to make an index.
5391 snew(comm->ddindex2ddnodeid,dd->nnodes);
5392 comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
5393 gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
5394 /* Get the rank of the DD master,
5395 * above we made sure that the master node is a PP node.
5397 if (MASTER(cr))
5399 rank = dd->rank;
5401 else
5403 rank = 0;
5405 MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
5407 else if (comm->bCartesianPP)
5409 if (cr->npmenodes == 0)
5411 /* The PP communicator is also
5412 * the communicator for this simulation
5414 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5416 cr->nodeid = dd->rank;
5418 MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
5420 /* We need to make an index to go from the coordinates
5421 * to the nodeid of this simulation.
5423 snew(comm->ddindex2simnodeid,dd->nnodes);
5424 snew(buf,dd->nnodes);
5425 if (cr->duty & DUTY_PP)
5427 buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
5429 /* Communicate the ddindex to simulation nodeid index */
5430 MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
5431 cr->mpi_comm_mysim);
5432 sfree(buf);
5434 /* Determine the master coordinates and rank.
5435 * The DD master should be the same node as the master of this sim.
5437 for(i=0; i<dd->nnodes; i++)
5439 if (comm->ddindex2simnodeid[i] == 0)
5441 ddindex2xyz(dd->nc,i,dd->master_ci);
5442 MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
5445 if (debug)
5447 fprintf(debug,"The master rank is %d\n",dd->masterrank);
5450 else
5452 /* No Cartesian communicators */
5453 /* We use the rank in dd->comm->all as DD index */
5454 ddindex2xyz(dd->nc,dd->rank,dd->ci);
5455 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5456 dd->masterrank = 0;
5457 clear_ivec(dd->master_ci);
5459 #endif
5461 if (fplog)
5463 fprintf(fplog,
5464 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5465 dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5467 if (debug)
5469 fprintf(debug,
5470 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5471 dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5475 static void receive_ddindex2simnodeid(t_commrec *cr)
5477 gmx_domdec_t *dd;
5479 gmx_domdec_comm_t *comm;
5480 int *buf;
5482 dd = cr->dd;
5483 comm = dd->comm;
5485 #ifdef GMX_MPI
5486 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
5488 snew(comm->ddindex2simnodeid,dd->nnodes);
5489 snew(buf,dd->nnodes);
5490 if (cr->duty & DUTY_PP)
5492 buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
5494 #ifdef GMX_MPI
5495 /* Communicate the ddindex to simulation nodeid index */
5496 MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
5497 cr->mpi_comm_mysim);
5498 #endif
5499 sfree(buf);
5501 #endif
5504 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
5505 int ncg,int natoms)
5507 gmx_domdec_master_t *ma;
5508 int i;
5510 snew(ma,1);
5512 snew(ma->ncg,dd->nnodes);
5513 snew(ma->index,dd->nnodes+1);
5514 snew(ma->cg,ncg);
5515 snew(ma->nat,dd->nnodes);
5516 snew(ma->ibuf,dd->nnodes*2);
5517 snew(ma->cell_x,DIM);
5518 for(i=0; i<DIM; i++)
5520 snew(ma->cell_x[i],dd->nc[i]+1);
5523 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
5525 ma->vbuf = NULL;
5527 else
5529 snew(ma->vbuf,natoms);
5532 return ma;
5535 static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
5536 int reorder)
5538 gmx_domdec_t *dd;
5539 gmx_domdec_comm_t *comm;
5540 int i,rank;
5541 bool bDiv[DIM];
5542 ivec periods;
5543 #ifdef GMX_MPI
5544 MPI_Comm comm_cart;
5545 #endif
5547 dd = cr->dd;
5548 comm = dd->comm;
5550 if (comm->bCartesianPP)
5552 for(i=1; i<DIM; i++)
5554 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
5556 if (bDiv[YY] || bDiv[ZZ])
5558 comm->bCartesianPP_PME = TRUE;
5559 /* We choose the direction that provides the thinnest slab
5560 * of PME only nodes as this will have the least effect
5561 * on the PP communication.
5562 * But for the PME communication the opposite might be better.
5564 if (bDiv[YY] && (!bDiv[ZZ] || dd->nc[YY] <= dd->nc[ZZ]))
5566 comm->cartpmedim = YY;
5568 else
5570 comm->cartpmedim = ZZ;
5572 comm->ntot[comm->cartpmedim]
5573 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
5575 else if (fplog)
5577 fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
5578 fprintf(fplog,
5579 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5583 #ifdef GMX_MPI
5584 if (comm->bCartesianPP_PME)
5586 if (fplog)
5588 fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
5591 for(i=0; i<DIM; i++)
5593 periods[i] = TRUE;
5595 MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
5596 &comm_cart);
5598 MPI_Comm_rank(comm_cart,&rank);
5599 if (MASTERNODE(cr) && rank != 0)
5601 gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5604 /* With this assigment we loose the link to the original communicator
5605 * which will usually be MPI_COMM_WORLD, unless have multisim.
5607 cr->mpi_comm_mysim = comm_cart;
5608 cr->sim_nodeid = rank;
5610 MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
5612 if (fplog)
5614 fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5615 cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5618 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
5620 cr->duty = DUTY_PP;
5622 if (cr->npmenodes == 0 ||
5623 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
5625 cr->duty = DUTY_PME;
5628 /* Split the sim communicator into PP and PME only nodes */
5629 MPI_Comm_split(cr->mpi_comm_mysim,
5630 cr->duty,
5631 dd_index(comm->ntot,dd->ci),
5632 &cr->mpi_comm_mygroup);
5634 else
5636 switch (dd_node_order)
5638 case ddnoPP_PME:
5639 if (fplog)
5641 fprintf(fplog,"Order of the nodes: PP first, PME last\n");
5643 break;
5644 case ddnoINTERLEAVE:
5645 /* Interleave the PP-only and PME-only nodes,
5646 * as on clusters with dual-core machines this will double
5647 * the communication bandwidth of the PME processes
5648 * and thus speed up the PP <-> PME and inter PME communication.
5650 if (fplog)
5652 fprintf(fplog,"Interleaving PP and PME nodes\n");
5654 comm->pmenodes = dd_pmenodes(cr);
5655 break;
5656 case ddnoCARTESIAN:
5657 break;
5658 default:
5659 gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
5662 if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
5664 cr->duty = DUTY_PME;
5666 else
5668 cr->duty = DUTY_PP;
5671 /* Split the sim communicator into PP and PME only nodes */
5672 MPI_Comm_split(cr->mpi_comm_mysim,
5673 cr->duty,
5674 cr->nodeid,
5675 &cr->mpi_comm_mygroup);
5676 MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
5678 #endif
5680 if (fplog)
5682 fprintf(fplog,"This is a %s only node\n\n",
5683 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
5687 void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
5689 gmx_domdec_t *dd;
5690 gmx_domdec_comm_t *comm;
5691 int CartReorder;
5693 dd = cr->dd;
5694 comm = dd->comm;
5696 copy_ivec(dd->nc,comm->ntot);
5698 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
5699 comm->bCartesianPP_PME = FALSE;
5701 /* Reorder the nodes by default. This might change the MPI ranks.
5702 * Real reordering is only supported on very few architectures,
5703 * Blue Gene is one of them.
5705 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
5707 if (cr->npmenodes > 0)
5709 /* Split the communicator into a PP and PME part */
5710 split_communicator(fplog,cr,dd_node_order,CartReorder);
5711 if (comm->bCartesianPP_PME)
5713 /* We (possibly) reordered the nodes in split_communicator,
5714 * so it is no longer required in make_pp_communicator.
5716 CartReorder = FALSE;
5719 else
5721 /* All nodes do PP and PME */
5722 #ifdef GMX_MPI
5723 /* We do not require separate communicators */
5724 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
5725 #endif
5728 if (cr->duty & DUTY_PP)
5730 /* Copy or make a new PP communicator */
5731 make_pp_communicator(fplog,cr,CartReorder);
5733 else
5735 receive_ddindex2simnodeid(cr);
5738 if (!(cr->duty & DUTY_PME))
5740 /* Set up the commnuication to our PME node */
5741 dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
5742 dd->pme_receive_vir_ener = receive_vir_ener(cr);
5743 if (debug)
5745 fprintf(debug,"My pme_nodeid %d receive ener %d\n",
5746 dd->pme_nodeid,dd->pme_receive_vir_ener);
5749 else
5751 dd->pme_nodeid = -1;
5754 if (DDMASTER(dd))
5756 dd->ma = init_gmx_domdec_master_t(dd,
5757 comm->cgs_gl.nr,
5758 comm->cgs_gl.index[comm->cgs_gl.nr]);
5762 static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
5764 real *slb_frac,tot;
5765 int i,n;
5766 double dbl;
5768 slb_frac = NULL;
5769 if (nc > 1 && size_string != NULL)
5771 if (fplog)
5773 fprintf(fplog,"Using static load balancing for the %s direction\n",
5774 dir);
5776 snew(slb_frac,nc);
5777 tot = 0;
5778 for (i=0; i<nc; i++)
5780 dbl = 0;
5781 sscanf(size_string,"%lf%n",&dbl,&n);
5782 if (dbl == 0)
5784 gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
5786 slb_frac[i] = dbl;
5787 size_string += n;
5788 tot += slb_frac[i];
5790 /* Normalize */
5791 if (fplog)
5793 fprintf(fplog,"Relative cell sizes:");
5795 for (i=0; i<nc; i++)
5797 slb_frac[i] /= tot;
5798 if (fplog)
5800 fprintf(fplog," %5.3f",slb_frac[i]);
5803 if (fplog)
5805 fprintf(fplog,"\n");
5809 return slb_frac;
5812 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
5814 int n,nmol,ftype;
5815 gmx_mtop_ilistloop_t iloop;
5816 t_ilist *il;
5818 n = 0;
5819 iloop = gmx_mtop_ilistloop_init(mtop);
5820 while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
5822 for(ftype=0; ftype<F_NRE; ftype++)
5824 if ((interaction_function[ftype].flags & IF_BOND) &&
5825 NRAL(ftype) > 2)
5827 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
5832 return n;
5835 static int dd_nst_env(FILE *fplog,const char *env_var,int def)
5837 char *val;
5838 int nst;
5840 nst = def;
5841 val = getenv(env_var);
5842 if (val)
5844 if (sscanf(val,"%d",&nst) <= 0)
5846 nst = 1;
5848 if (fplog)
5850 fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
5851 env_var,val,nst);
5855 return nst;
5858 static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
5860 if (MASTER(cr))
5862 fprintf(stderr,"\n%s\n",warn_string);
5864 if (fplog)
5866 fprintf(fplog,"\n%s\n",warn_string);
5870 static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
5871 t_inputrec *ir,FILE *fplog)
5873 if (ir->ePBC == epbcSCREW &&
5874 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
5876 gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
5879 if (ir->ns_type == ensSIMPLE)
5881 gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
5884 if (ir->nstlist == 0)
5886 gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
5889 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
5891 dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
5895 static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
5897 int di,d;
5898 real r;
5900 r = ddbox->box_size[XX];
5901 for(di=0; di<dd->ndim; di++)
5903 d = dd->dim[di];
5904 /* Check using the initial average cell size */
5905 r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
5908 return r;
5911 static int check_dlb_support(FILE *fplog,t_commrec *cr,
5912 const char *dlb_opt,bool bRecordLoad,
5913 unsigned long Flags,t_inputrec *ir)
5915 gmx_domdec_t *dd;
5916 int eDLB=-1;
5917 char buf[STRLEN];
5919 switch (dlb_opt[0])
5921 case 'a': eDLB = edlbAUTO; break;
5922 case 'n': eDLB = edlbNO; break;
5923 case 'y': eDLB = edlbYES; break;
5924 default: gmx_incons("Unknown dlb_opt");
5927 if (Flags & MD_RERUN)
5929 return edlbNO;
5932 if (!EI_DYNAMICS(ir->eI))
5934 if (eDLB == edlbYES)
5936 sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
5937 dd_warning(cr,fplog,buf);
5940 return edlbNO;
5943 if (!bRecordLoad)
5945 dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
5947 return edlbNO;
5950 if (Flags & MD_REPRODUCIBLE)
5952 switch (eDLB)
5954 case edlbNO:
5955 break;
5956 case edlbAUTO:
5957 dd_warning(cr,fplog,"NOTE: reproducability requested, will not use dynamic load balancing\n");
5958 eDLB = edlbNO;
5959 break;
5960 case edlbYES:
5961 dd_warning(cr,fplog,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
5962 break;
5963 default:
5964 gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
5965 break;
5969 return eDLB;
5972 static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
5974 int dim;
5976 dd->ndim = 0;
5977 if (getenv("GMX_DD_ORDER_ZYX"))
5979 /* Decomposition order z,y,x */
5980 if (fplog)
5982 fprintf(fplog,"Using domain decomposition order z, y, x\n");
5984 for(dim=DIM-1; dim>=0; dim--)
5986 if (dd->nc[dim] > 1)
5988 dd->dim[dd->ndim++] = dim;
5992 else
5994 /* Decomposition order x,y,z */
5995 for(dim=0; dim<DIM; dim++)
5997 if (dd->nc[dim] > 1)
5999 dd->dim[dd->ndim++] = dim;
6005 gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
6006 unsigned long Flags,
6007 ivec nc,
6008 real comm_distance_min,real rconstr,
6009 const char *dlb_opt,real dlb_scale,
6010 const char *sizex,const char *sizey,const char *sizez,
6011 gmx_mtop_t *mtop,t_inputrec *ir,
6012 matrix box,rvec *x,
6013 gmx_ddbox_t *ddbox,
6014 int *npme_major)
6016 gmx_domdec_t *dd;
6017 gmx_domdec_comm_t *comm;
6018 int recload;
6019 int d,i,j;
6020 real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
6021 bool bC;
6022 char buf[STRLEN];
6024 if (fplog)
6026 fprintf(fplog,
6027 "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
6030 snew(dd,1);
6031 snew(dd->comm,1);
6032 comm = dd->comm;
6033 snew(comm->cggl_flag,DIM*2);
6034 snew(comm->cgcm_state,DIM*2);
6036 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6037 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6039 dd->bSendRecv2 = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
6040 comm->eFlop = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
6041 recload = dd_nst_env(fplog,"GMX_DD_LOAD",1);
6042 comm->nstSortCG = dd_nst_env(fplog,"GMX_DD_SORT",1);
6043 comm->nstDDDump = dd_nst_env(fplog,"GMX_DD_DUMP",0);
6044 comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
6045 comm->DD_debug = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
6047 if (dd->bSendRecv2 && fplog)
6049 fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6051 if (comm->eFlop)
6053 if (fplog)
6055 fprintf(fplog,"Will load balance based on FLOP count\n");
6057 if (comm->eFlop > 1)
6059 srand(1+cr->nodeid);
6061 comm->bRecordLoad = TRUE;
6063 else
6065 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6069 comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
6071 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6072 if (fplog)
6074 fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
6076 dd->bGridJump = comm->bDynLoadBal;
6078 if (comm->nstSortCG)
6080 if (fplog)
6082 if (comm->nstSortCG == 1)
6084 fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
6086 else
6088 fprintf(fplog,"Will sort the charge groups every %d steps\n",
6089 comm->nstSortCG);
6092 snew(comm->sort,1);
6094 else
6096 if (fplog)
6098 fprintf(fplog,"Will not sort the charge groups\n");
6102 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6103 if (comm->bInterCGBondeds)
6105 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6107 else
6109 comm->bInterCGMultiBody = FALSE;
6112 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6114 if (ir->rlistlong == 0)
6116 /* Set the cut-off to some very large value,
6117 * so we don't need if statements everywhere in the code.
6118 * We use sqrt, since the cut-off is squared in some places.
6120 comm->cutoff = GMX_CUTOFF_INF;
6122 else
6124 comm->cutoff = ir->rlistlong;
6126 comm->cutoff_mbody = 0;
6128 comm->cellsize_limit = 0;
6129 comm->bBondComm = FALSE;
6131 if (comm->bInterCGBondeds)
6133 if (comm_distance_min > 0)
6135 comm->cutoff_mbody = comm_distance_min;
6136 if (Flags & MD_DDBONDCOMM)
6138 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6140 else
6142 comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
6144 r_bonded_limit = comm->cutoff_mbody;
6146 else if (ir->bPeriodicMols)
6148 /* Can not easily determine the required cut-off */
6149 dd_warning(cr,fplog,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6150 comm->cutoff_mbody = comm->cutoff/2;
6151 r_bonded_limit = comm->cutoff_mbody;
6153 else
6155 if (MASTER(cr))
6157 dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
6158 Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
6160 gmx_bcast(sizeof(r_2b),&r_2b,cr);
6161 gmx_bcast(sizeof(r_mb),&r_mb,cr);
6163 /* We use an initial margin of 10% for the minimum cell size,
6164 * except when we are just below the non-bonded cut-off.
6166 if (Flags & MD_DDBONDCOMM)
6168 if (max(r_2b,r_mb) > comm->cutoff)
6170 r_bonded = max(r_2b,r_mb);
6171 r_bonded_limit = 1.1*r_bonded;
6172 comm->bBondComm = TRUE;
6174 else
6176 r_bonded = r_mb;
6177 r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
6179 /* We determine cutoff_mbody later */
6181 else
6183 /* No special bonded communication,
6184 * simply increase the DD cut-off.
6186 r_bonded_limit = 1.1*max(r_2b,r_mb);
6187 comm->cutoff_mbody = r_bonded_limit;
6188 comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
6191 comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
6192 if (fplog)
6194 fprintf(fplog,
6195 "Minimum cell size due to bonded interactions: %.3f nm\n",
6196 comm->cellsize_limit);
6200 if (dd->bInterCGcons && rconstr <= 0)
6202 /* There is a cell size limit due to the constraints (P-LINCS) */
6203 rconstr = constr_r_max(fplog,mtop,ir);
6204 if (fplog)
6206 fprintf(fplog,
6207 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6208 rconstr);
6209 if (rconstr > comm->cellsize_limit)
6211 fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
6215 else if (rconstr > 0 && fplog)
6217 /* Here we do not check for dd->bInterCGcons,
6218 * because one can also set a cell size limit for virtual sites only
6219 * and at this point we don't know yet if there are intercg v-sites.
6221 fprintf(fplog,
6222 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6223 rconstr);
6225 comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
6227 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6229 if (nc[XX] > 0)
6231 copy_ivec(nc,dd->nc);
6232 set_dd_dim(fplog,dd);
6233 set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
6235 if (cr->npmenodes == -1)
6237 cr->npmenodes = 0;
6239 acs = average_cellsize_min(dd,ddbox);
6240 if (acs < comm->cellsize_limit)
6242 if (fplog)
6244 fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
6246 if (MASTER(cr))
6248 gmx_fatal(FARGS,"The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",acs,comm->cellsize_limit);
6250 #ifdef GMX_MPI
6251 MPI_Abort(MPI_COMM_WORLD, 0);
6252 #else
6253 exit(0);
6254 #endif
6257 else
6259 set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
6261 /* We need to choose the optimal DD grid and possibly PME nodes */
6262 limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
6263 comm->eDLB!=edlbNO,dlb_scale,
6264 comm->cellsize_limit,comm->cutoff,
6265 comm->bInterCGBondeds,comm->bInterCGMultiBody);
6267 if (dd->nc[XX] == 0)
6269 if (MASTER(cr))
6271 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6272 sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
6273 !bC ? "-rdd" : "-rcon",
6274 comm->eDLB!=edlbNO ? " or -dds" : "",
6275 bC ? " or your LINCS settings" : "");
6276 gmx_fatal(FARGS,"There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6277 "%s\n"
6278 "Look in the log file for details on the domain decomposition",
6279 cr->nnodes-cr->npmenodes,limit,buf);
6281 #ifdef GMX_MPI
6282 MPI_Abort(MPI_COMM_WORLD, 0);
6283 #else
6284 exit(0);
6285 #endif
6287 set_dd_dim(fplog,dd);
6290 if (fplog)
6292 fprintf(fplog,
6293 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6294 dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
6297 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6298 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6300 gmx_fatal(FARGS,"The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6301 dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
6303 if (cr->npmenodes > dd->nnodes)
6305 gmx_fatal(FARGS,"The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
6307 if (cr->npmenodes > 0)
6309 comm->npmenodes = cr->npmenodes;
6311 else
6313 comm->npmenodes = dd->nnodes;
6316 if (EEL_PME(ir->coulombtype))
6318 if (FALSE && dd->nc[XX] > cr->npmenodes && cr->npmenodes % dd->nc[XX] == 0)
6320 comm->npmedecompdim = 2;
6321 comm->npmenodes_major = dd->nc[XX];
6323 else
6325 comm->npmedecompdim = 1;
6326 comm->npmenodes_major = comm->npmenodes;
6329 else
6331 comm->npmedecompdim = 0;
6332 comm->npmenodes_major = 0;
6334 *npme_major = comm->npmenodes_major;
6336 snew(comm->slb_frac,DIM);
6337 if (comm->eDLB == edlbNO)
6339 comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
6340 comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
6341 comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
6344 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6346 if (comm->bBondComm || comm->eDLB != edlbNO)
6348 /* Set the bonded communication distance to halfway
6349 * the minimum and the maximum,
6350 * since the extra communication cost is nearly zero.
6352 acs = average_cellsize_min(dd,ddbox);
6353 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6354 if (comm->eDLB != edlbNO)
6356 /* Check if this does not limit the scaling */
6357 comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
6359 if (!comm->bBondComm)
6361 /* Without bBondComm do not go beyond the n.b. cut-off */
6362 comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
6363 if (comm->cellsize_limit >= comm->cutoff)
6365 /* We don't loose a lot of efficieny
6366 * when increasing it to the n.b. cut-off.
6367 * It can even be slightly faster, because we need
6368 * less checks for the communication setup.
6370 comm->cutoff_mbody = comm->cutoff;
6373 /* Check if we did not end up below our original limit */
6374 comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
6376 if (comm->cutoff_mbody > comm->cellsize_limit)
6378 comm->cellsize_limit = comm->cutoff_mbody;
6381 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6384 if (debug)
6386 fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
6387 "cellsize limit %f\n",
6388 comm->bBondComm,comm->cellsize_limit);
6391 if (MASTER(cr))
6393 check_dd_restrictions(cr,dd,ir,fplog);
6396 comm->partition_step = INT_MIN;
6397 dd->ddp_count = 0;
6399 return dd;
6402 static void set_dlb_limits(gmx_domdec_t *dd)
6405 int d;
6407 for(d=0; d<dd->ndim; d++)
6409 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
6410 dd->comm->cellsize_min[dd->dim[d]] =
6411 dd->comm->cellsize_min_dlb[dd->dim[d]];
6416 static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_step_t step)
6418 gmx_domdec_t *dd;
6419 gmx_domdec_comm_t *comm;
6420 real cellsize_min;
6421 int d,nc,i;
6422 char buf[STRLEN];
6424 dd = cr->dd;
6425 comm = dd->comm;
6427 if (fplog)
6429 fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
6432 cellsize_min = comm->cellsize_min[dd->dim[0]];
6433 for(d=1; d<dd->ndim; d++)
6435 cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
6438 if (cellsize_min < comm->cellsize_limit*1.05)
6440 dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6442 return;
6445 dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
6446 comm->bDynLoadBal = TRUE;
6447 dd->bGridJump = TRUE;
6449 set_dlb_limits(dd);
6451 /* We can set the required cell size info here,
6452 * so we do not need to communicate this.
6453 * The grid is completely uniform.
6455 for(d=0; d<dd->ndim; d++)
6457 if (comm->root[d])
6459 comm->load[d].sum_m = comm->load[d].sum;
6461 nc = dd->nc[dd->dim[d]];
6462 for(i=0; i<nc; i++)
6464 comm->root[d]->cell_f[i] = i/(real)nc;
6465 if (d > 0)
6467 comm->root[d]->cell_f_max0[i] = i /(real)nc;
6468 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
6471 comm->root[d]->cell_f[nc] = 1.0;
6476 static char *init_bLocalCG(gmx_mtop_t *mtop)
6478 int ncg,cg;
6479 char *bLocalCG;
6481 ncg = ncg_mtop(mtop);
6482 snew(bLocalCG,ncg);
6483 for(cg=0; cg<ncg; cg++)
6485 bLocalCG[cg] = FALSE;
6488 return bLocalCG;
6491 void dd_init_bondeds(FILE *fplog,
6492 gmx_domdec_t *dd,gmx_mtop_t *mtop,
6493 gmx_vsite_t *vsite,gmx_constr_t constr,
6494 t_inputrec *ir,bool bBCheck,cginfo_mb_t *cginfo_mb)
6496 gmx_domdec_comm_t *comm;
6497 bool bBondComm;
6498 int d;
6500 dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
6502 comm = dd->comm;
6504 if (comm->bBondComm)
6506 /* Communicate atoms beyond the cut-off for bonded interactions */
6507 comm = dd->comm;
6509 comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
6511 comm->bLocalCG = init_bLocalCG(mtop);
6513 else
6515 /* Only communicate atoms based on cut-off */
6516 comm->cglink = NULL;
6517 comm->bLocalCG = NULL;
6521 static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
6522 t_inputrec *ir,
6523 bool bDynLoadBal,real dlb_scale,
6524 gmx_ddbox_t *ddbox)
6526 gmx_domdec_comm_t *comm;
6527 int d;
6528 ivec np;
6529 real limit,shrink;
6530 char buf[64];
6532 if (fplog == NULL)
6534 return;
6537 comm = dd->comm;
6539 if (bDynLoadBal)
6541 fprintf(fplog,"The maximum number of communication pulses is:");
6542 for(d=0; d<dd->ndim; d++)
6544 fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
6546 fprintf(fplog,"\n");
6547 fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
6548 fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
6549 fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
6550 for(d=0; d<DIM; d++)
6552 if (dd->nc[d] > 1)
6554 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
6556 shrink = 0;
6558 else
6560 shrink =
6561 comm->cellsize_min_dlb[d]/
6562 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6564 fprintf(fplog," %c %.2f",dim2char(d),shrink);
6567 fprintf(fplog,"\n");
6569 else
6571 set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
6572 fprintf(fplog,"The initial number of communication pulses is:");
6573 for(d=0; d<dd->ndim; d++)
6575 fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
6577 fprintf(fplog,"\n");
6578 fprintf(fplog,"The initial domain decomposition cell size is:");
6579 for(d=0; d<DIM; d++) {
6580 if (dd->nc[d] > 1)
6582 fprintf(fplog," %c %.2f nm",
6583 dim2char(d),dd->comm->cellsize_min[d]);
6586 fprintf(fplog,"\n\n");
6589 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
6591 fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
6592 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6593 "non-bonded interactions","",comm->cutoff);
6595 if (bDynLoadBal)
6597 limit = dd->comm->cellsize_limit;
6599 else
6601 if (dynamic_dd_box(ddbox,ir))
6603 fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
6605 limit = dd->comm->cellsize_min[XX];
6606 for(d=1; d<DIM; d++)
6608 limit = min(limit,dd->comm->cellsize_min[d]);
6612 if (comm->bInterCGBondeds)
6614 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6615 "two-body bonded interactions","(-rdd)",
6616 max(comm->cutoff,comm->cutoff_mbody));
6617 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6618 "multi-body bonded interactions","(-rdd)",
6619 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
6621 if (dd->vsite_comm)
6623 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6624 "virtual site constructions","(-rcon)",limit);
6626 if (dd->constraint_comm)
6628 sprintf(buf,"atoms separated by up to %d constraints",
6629 1+ir->nProjOrder);
6630 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6631 buf,"(-rcon)",limit);
6633 fprintf(fplog,"\n");
6636 fflush(fplog);
6639 void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
6640 t_inputrec *ir,t_forcerec *fr,
6641 gmx_ddbox_t *ddbox)
6643 gmx_domdec_comm_t *comm;
6644 int d,dim,npulse,npulse_d_max,npulse_d;
6645 bool bNoCutOff;
6646 int natoms_tot;
6647 real vol_frac;
6649 comm = dd->comm;
6651 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
6653 if (EEL_PME(ir->coulombtype))
6655 init_ddpme(dd,&comm->ddpme[0],0,comm->npmenodes_major);
6656 if (comm->npmedecompdim >= 2)
6658 init_ddpme(dd,&comm->ddpme[1],1,
6659 comm->npmenodes/comm->npmenodes_major);
6662 else
6664 comm->npmenodes = 0;
6665 if (dd->pme_nodeid >= 0)
6667 gmx_fatal(FARGS,"Can not have separate PME nodes without PME electrostatics");
6671 /* If each molecule is a single charge group
6672 * or we use domain decomposition for each periodic dimension,
6673 * we do not need to take pbc into account for the bonded interactions.
6675 if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
6676 (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
6678 fr->bMolPBC = FALSE;
6680 else
6682 fr->bMolPBC = TRUE;
6685 if (debug)
6687 fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
6689 if (comm->eDLB != edlbNO)
6691 /* Determine the maximum number of comm. pulses in one dimension */
6693 comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
6695 /* Determine the maximum required number of grid pulses */
6696 if (comm->cellsize_limit >= comm->cutoff)
6698 /* Only a single pulse is required */
6699 npulse = 1;
6701 else if (!bNoCutOff && comm->cellsize_limit > 0)
6703 /* We round down slightly here to avoid overhead due to the latency
6704 * of extra communication calls when the cut-off
6705 * would be only slightly longer than the cell size.
6706 * Later cellsize_limit is redetermined,
6707 * so we can not miss interactions due to this rounding.
6709 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
6711 else
6713 /* There is no cell size limit */
6714 npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
6717 if (!bNoCutOff && npulse > 1)
6719 /* See if we can do with less pulses, based on dlb_scale */
6720 npulse_d_max = 0;
6721 for(d=0; d<dd->ndim; d++)
6723 dim = dd->dim[d];
6724 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
6725 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
6726 npulse_d_max = max(npulse_d_max,npulse_d);
6728 npulse = min(npulse,npulse_d_max);
6731 /* This env var can override npulse */
6732 d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
6733 if (d > 0)
6735 npulse = d;
6738 comm->maxpulse = 1;
6739 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
6740 for(d=0; d<dd->ndim; d++)
6742 comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
6743 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
6744 snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
6745 comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
6746 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
6748 comm->bVacDLBNoLimit = FALSE;
6752 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6753 if (!comm->bVacDLBNoLimit)
6755 comm->cellsize_limit = max(comm->cellsize_limit,
6756 comm->cutoff/comm->maxpulse);
6758 comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
6759 /* Set the minimum cell size for each DD dimension */
6760 for(d=0; d<dd->ndim; d++)
6762 if (comm->bVacDLBNoLimit ||
6763 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
6765 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
6767 else
6769 comm->cellsize_min_dlb[dd->dim[d]] =
6770 comm->cutoff/comm->cd[d].np_dlb;
6773 if (comm->cutoff_mbody <= 0)
6775 comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
6777 if (comm->bDynLoadBal)
6779 set_dlb_limits(dd);
6783 print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
6784 if (comm->eDLB == edlbAUTO)
6786 if (fplog)
6788 fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
6790 print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
6793 vol_frac = 1/(real)dd->nnodes + comm_box_frac(dd->nc,comm->cutoff,ddbox);
6794 if (debug)
6796 fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
6798 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
6800 dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
6803 static void merge_cg_buffers(int ncell,
6804 gmx_domdec_comm_dim_t *cd, int pulse,
6805 int *ncg_cell,
6806 int *index_gl, int *recv_i,
6807 rvec *cg_cm, rvec *recv_vr,
6808 int *cgindex,
6809 cginfo_mb_t *cginfo_mb,int *cginfo)
6811 gmx_domdec_ind_t *ind,*ind_p;
6812 int p,cell,c,cg,cg0,cg1,cg_gl,nat;
6813 int shift,shift_at;
6815 ind = &cd->ind[pulse];
6817 /* First correct the already stored data */
6818 shift = ind->nrecv[ncell];
6819 for(cell=ncell-1; cell>=0; cell--)
6821 shift -= ind->nrecv[cell];
6822 if (shift > 0)
6824 /* Move the cg's present from previous grid pulses */
6825 cg0 = ncg_cell[ncell+cell];
6826 cg1 = ncg_cell[ncell+cell+1];
6827 cgindex[cg1+shift] = cgindex[cg1];
6828 for(cg=cg1-1; cg>=cg0; cg--)
6830 index_gl[cg+shift] = index_gl[cg];
6831 copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
6832 cgindex[cg+shift] = cgindex[cg];
6833 cginfo[cg+shift] = cginfo[cg];
6835 /* Correct the already stored send indices for the shift */
6836 for(p=1; p<=pulse; p++)
6838 ind_p = &cd->ind[p];
6839 cg0 = 0;
6840 for(c=0; c<cell; c++)
6842 cg0 += ind_p->nsend[c];
6844 cg1 = cg0 + ind_p->nsend[cell];
6845 for(cg=cg0; cg<cg1; cg++)
6847 ind_p->index[cg] += shift;
6853 /* Merge in the communicated buffers */
6854 shift = 0;
6855 shift_at = 0;
6856 cg0 = 0;
6857 for(cell=0; cell<ncell; cell++)
6859 cg1 = ncg_cell[ncell+cell+1] + shift;
6860 if (shift_at > 0)
6862 /* Correct the old cg indices */
6863 for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
6865 cgindex[cg+1] += shift_at;
6868 for(cg=0; cg<ind->nrecv[cell]; cg++)
6870 /* Copy this charge group from the buffer */
6871 index_gl[cg1] = recv_i[cg0];
6872 copy_rvec(recv_vr[cg0],cg_cm[cg1]);
6873 /* Add it to the cgindex */
6874 cg_gl = index_gl[cg1];
6875 cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
6876 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
6877 cgindex[cg1+1] = cgindex[cg1] + nat;
6878 cg0++;
6879 cg1++;
6880 shift_at += nat;
6882 shift += ind->nrecv[cell];
6883 ncg_cell[ncell+cell+1] = cg1;
6887 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
6888 int nzone,int cg0,const int *cgindex)
6890 int cg,zone,p;
6892 /* Store the atom block boundaries for easy copying of communication buffers
6894 cg = cg0;
6895 for(zone=0; zone<nzone; zone++)
6897 for(p=0; p<cd->np; p++) {
6898 cd->ind[p].cell2at0[zone] = cgindex[cg];
6899 cg += cd->ind[p].nrecv[zone];
6900 cd->ind[p].cell2at1[zone] = cgindex[cg];
6905 static bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
6907 int i;
6908 bool bMiss;
6910 bMiss = FALSE;
6911 for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
6913 if (!bLocalCG[link->a[i]])
6915 bMiss = TRUE;
6919 return bMiss;
6922 static void setup_dd_communication(gmx_domdec_t *dd,
6923 matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
6925 int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
6926 int nzone,nzone_send,zone,zonei,cg0,cg1;
6927 int c,i,j,cg,cg_gl,nrcg;
6928 int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
6929 gmx_domdec_comm_t *comm;
6930 gmx_domdec_zones_t *zones;
6931 gmx_domdec_comm_dim_t *cd;
6932 gmx_domdec_ind_t *ind;
6933 cginfo_mb_t *cginfo_mb;
6934 bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
6935 real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
6936 rvec rb,rn;
6937 real corner[DIM][4],corner_round_0=0,corner_round_1[4];
6938 real bcorner[DIM],bcorner_round_1=0;
6939 ivec tric_dist;
6940 rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
6941 real skew_fac2_d,skew_fac_01;
6942 rvec sf2_round;
6943 int nsend,nat;
6945 if (debug)
6947 fprintf(debug,"Setting up DD communication\n");
6950 comm = dd->comm;
6951 cg_cm = fr->cg_cm;
6953 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
6955 dim = dd->dim[dim_ind];
6957 /* Check if we need to use triclinic distances */
6958 tric_dist[dim_ind] = 0;
6959 for(i=0; i<=dim_ind; i++)
6961 if (ddbox->tric_dir[dd->dim[i]])
6963 tric_dist[dim_ind] = 1;
6968 bBondComm = comm->bBondComm;
6970 /* Do we need to determine extra distances for multi-body bondeds? */
6971 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
6973 /* Do we need to determine extra distances for only two-body bondeds? */
6974 bDist2B = (bBondComm && !bDistMB);
6976 r_comm2 = sqr(comm->cutoff);
6977 r_bcomm2 = sqr(comm->cutoff_mbody);
6979 if (debug)
6981 fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
6984 zones = &comm->zones;
6986 dim0 = dd->dim[0];
6987 /* The first dimension is equal for all cells */
6988 corner[0][0] = comm->cell_x0[dim0];
6989 if (bDistMB)
6991 bcorner[0] = corner[0][0];
6993 if (dd->ndim >= 2)
6995 dim1 = dd->dim[1];
6996 /* This cell row is only seen from the first row */
6997 corner[1][0] = comm->cell_x0[dim1];
6998 /* All rows can see this row */
6999 corner[1][1] = comm->cell_x0[dim1];
7000 if (dd->bGridJump)
7002 corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
7003 if (bDistMB)
7005 /* For the multi-body distance we need the maximum */
7006 bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
7009 /* Set the upper-right corner for rounding */
7010 corner_round_0 = comm->cell_x1[dim0];
7012 if (dd->ndim >= 3)
7014 dim2 = dd->dim[2];
7015 for(j=0; j<4; j++)
7017 corner[2][j] = comm->cell_x0[dim2];
7019 if (dd->bGridJump)
7021 /* Use the maximum of the i-cells that see a j-cell */
7022 for(i=0; i<zones->nizone; i++)
7024 for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
7026 if (j >= 4)
7028 corner[2][j-4] =
7029 max(corner[2][j-4],
7030 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7034 if (bDistMB)
7036 /* For the multi-body distance we need the maximum */
7037 bcorner[2] = comm->cell_x0[dim2];
7038 for(i=0; i<2; i++)
7040 for(j=0; j<2; j++)
7042 bcorner[2] = max(bcorner[2],
7043 comm->zone_d2[i][j].p1_0);
7049 /* Set the upper-right corner for rounding */
7050 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7051 * Only cell (0,0,0) can see cell 7 (1,1,1)
7053 corner_round_1[0] = comm->cell_x1[dim1];
7054 corner_round_1[3] = comm->cell_x1[dim1];
7055 if (dd->bGridJump)
7057 corner_round_1[0] = max(comm->cell_x1[dim1],
7058 comm->zone_d1[1].mch1);
7059 if (bDistMB)
7061 /* For the multi-body distance we need the maximum */
7062 bcorner_round_1 = max(comm->cell_x1[dim1],
7063 comm->zone_d1[1].p1_1);
7069 /* Triclinic stuff */
7070 normal = ddbox->normal;
7071 skew_fac_01 = 0;
7072 if (dd->ndim >= 2)
7074 v_0 = ddbox->v[dim0];
7075 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
7077 /* Determine the coupling coefficient for the distances
7078 * to the cell planes along dim0 and dim1 through dim2.
7079 * This is required for correct rounding.
7081 skew_fac_01 =
7082 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
7083 if (debug)
7085 fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
7089 if (dd->ndim >= 3)
7091 v_1 = ddbox->v[dim1];
7094 zone_cg_range = zones->cg_range;
7095 index_gl = dd->index_gl;
7096 cgindex = dd->cgindex;
7097 cginfo_mb = fr->cginfo_mb;
7099 zone_cg_range[0] = 0;
7100 zone_cg_range[1] = dd->ncg_home;
7101 comm->zone_ncg1[0] = dd->ncg_home;
7102 pos_cg = dd->ncg_home;
7104 nat_tot = dd->nat_home;
7105 nzone = 1;
7106 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
7108 dim = dd->dim[dim_ind];
7109 cd = &comm->cd[dim_ind];
7111 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
7113 /* No pbc in this dimension, the first node should not comm. */
7114 nzone_send = 0;
7116 else
7118 nzone_send = nzone;
7121 bScrew = (dd->bScrewPBC && dim == XX);
7123 v_d = ddbox->v[dim];
7124 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
7126 cd->bInPlace = TRUE;
7127 for(p=0; p<cd->np; p++)
7129 /* Only atoms communicated in the first pulse are used
7130 * for multi-body bonded interactions or for bBondComm.
7132 bDistBonded = ((bDistMB || bDist2B) && p == 0);
7133 bDistMB_pulse = (bDistMB && bDistBonded);
7135 ind = &cd->ind[p];
7136 nsend = 0;
7137 nat = 0;
7138 for(zone=0; zone<nzone_send; zone++)
7140 if (tric_dist[dim_ind] && dim_ind > 0)
7142 /* Determine slightly more optimized skew_fac's
7143 * for rounding.
7144 * This reduces the number of communicated atoms
7145 * by about 10% for 3D DD of rhombic dodecahedra.
7147 for(dimd=0; dimd<dim; dimd++)
7149 sf2_round[dimd] = 1;
7150 if (ddbox->tric_dir[dimd])
7152 for(i=dd->dim[dimd]+1; i<DIM; i++)
7154 /* If we are shifted in dimension i
7155 * and the cell plane is tilted forward
7156 * in dimension i, skip this coupling.
7158 if (!(zones->shift[nzone+zone][i] &&
7159 ddbox->v[dimd][i][dimd] >= 0))
7161 sf2_round[dimd] +=
7162 sqr(ddbox->v[dimd][i][dimd]);
7165 sf2_round[dimd] = 1/sf2_round[dimd];
7170 zonei = zone_perm[dim_ind][zone];
7171 if (p == 0)
7173 /* Here we permutate the zones to obtain a convenient order
7174 * for neighbor searching
7176 cg0 = zone_cg_range[zonei];
7177 cg1 = zone_cg_range[zonei+1];
7179 else
7181 /* Look only at the cg's received in the previous grid pulse
7183 cg1 = zone_cg_range[nzone+zone+1];
7184 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
7186 ind->nsend[zone] = 0;
7187 for(cg=cg0; cg<cg1; cg++)
7189 r2 = 0;
7190 rb2 = 0;
7191 if (tric_dist[dim_ind] == 0)
7193 /* Rectangular direction, easy */
7194 r = cg_cm[cg][dim] - corner[dim_ind][zone];
7195 if (r > 0)
7197 r2 += r*r;
7199 if (bDistMB_pulse)
7201 r = cg_cm[cg][dim] - bcorner[dim_ind];
7202 if (r > 0)
7204 rb2 += r*r;
7207 /* Rounding gives at most a 16% reduction
7208 * in communicated atoms
7210 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7212 r = cg_cm[cg][dim0] - corner_round_0;
7213 /* This is the first dimension, so always r >= 0 */
7214 r2 += r*r;
7215 if (bDistMB_pulse)
7217 rb2 += r*r;
7220 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7222 r = cg_cm[cg][dim1] - corner_round_1[zone];
7223 if (r > 0)
7225 r2 += r*r;
7227 if (bDistMB_pulse)
7229 r = cg_cm[cg][dim1] - bcorner_round_1;
7230 if (r > 0)
7232 rb2 += r*r;
7237 else
7239 /* Triclinic direction, more complicated */
7240 clear_rvec(rn);
7241 clear_rvec(rb);
7242 /* Rounding, conservative as the skew_fac multiplication
7243 * will slightly underestimate the distance.
7245 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7247 rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
7248 for(i=dim0+1; i<DIM; i++)
7250 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7252 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7253 if (bDistMB_pulse)
7255 rb[dim0] = rn[dim0];
7256 rb2 = r2;
7258 /* Take care that the cell planes along dim0 might not
7259 * be orthogonal to those along dim1 and dim2.
7261 for(i=1; i<=dim_ind; i++)
7263 dimd = dd->dim[i];
7264 if (normal[dim0][dimd] > 0)
7266 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7267 if (bDistMB_pulse)
7269 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7274 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7276 rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
7277 tric_sh = 0;
7278 for(i=dim1+1; i<DIM; i++)
7280 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7282 rn[dim1] += tric_sh;
7283 if (rn[dim1] > 0)
7285 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7286 /* Take care of coupling of the distances
7287 * to the planes along dim0 and dim1 through dim2.
7289 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7290 /* Take care that the cell planes along dim1
7291 * might not be orthogonal to that along dim2.
7293 if (normal[dim1][dim2] > 0)
7295 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7298 if (bDistMB_pulse)
7300 rb[dim1] +=
7301 cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
7302 if (rb[dim1] > 0)
7304 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7305 /* Take care of coupling of the distances
7306 * to the planes along dim0 and dim1 through dim2.
7308 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7309 /* Take care that the cell planes along dim1
7310 * might not be orthogonal to that along dim2.
7312 if (normal[dim1][dim2] > 0)
7314 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7319 /* The distance along the communication direction */
7320 rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
7321 tric_sh = 0;
7322 for(i=dim+1; i<DIM; i++)
7324 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7326 rn[dim] += tric_sh;
7327 if (rn[dim] > 0)
7329 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7330 /* Take care of coupling of the distances
7331 * to the planes along dim0 and dim1 through dim2.
7333 if (dim_ind == 1 && zonei == 1)
7335 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7338 if (bDistMB_pulse)
7340 clear_rvec(rb);
7341 rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
7342 if (rb[dim] > 0)
7344 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7345 /* Take care of coupling of the distances
7346 * to the planes along dim0 and dim1 through dim2.
7348 if (dim_ind == 1 && zonei == 1)
7350 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7356 if (r2 < r_comm2 ||
7357 (bDistBonded &&
7358 ((bDistMB && rb2 < r_bcomm2) ||
7359 (bDist2B && r2 < r_bcomm2)) &&
7360 (!bBondComm ||
7361 (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
7362 missing_link(comm->cglink,index_gl[cg],
7363 comm->bLocalCG)))))
7365 /* Make an index to the local charge groups */
7366 if (nsend+1 > ind->nalloc)
7368 ind->nalloc = over_alloc_large(nsend+1);
7369 srenew(ind->index,ind->nalloc);
7371 if (nsend+1 > comm->nalloc_int)
7373 comm->nalloc_int = over_alloc_large(nsend+1);
7374 srenew(comm->buf_int,comm->nalloc_int);
7376 ind->index[nsend] = cg;
7377 comm->buf_int[nsend] = index_gl[cg];
7378 ind->nsend[zone]++;
7379 check_vec_rvec_alloc(&comm->vbuf,nsend+1);
7381 if (dd->ci[dim] == 0)
7383 /* Correct cg_cm for pbc */
7384 rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
7385 if (bScrew)
7387 comm->vbuf.v[nsend][YY] =
7388 box[YY][YY]-comm->vbuf.v[nsend][YY];
7389 comm->vbuf.v[nsend][ZZ] =
7390 box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
7393 else
7395 copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
7397 nsend++;
7398 nat += cgindex[cg+1] - cgindex[cg];
7402 /* Clear the counts in case we do not have pbc */
7403 for(zone=nzone_send; zone<nzone; zone++)
7405 ind->nsend[zone] = 0;
7407 ind->nsend[nzone] = nsend;
7408 ind->nsend[nzone+1] = nat;
7409 /* Communicate the number of cg's and atoms to receive */
7410 dd_sendrecv_int(dd, dim_ind, dddirBackward,
7411 ind->nsend, nzone+2,
7412 ind->nrecv, nzone+2);
7414 /* The rvec buffer is also required for atom buffers of size nsend
7415 * in dd_move_x and dd_move_f.
7417 check_vec_rvec_alloc(&comm->vbuf,ind->nsend[nzone+1]);
7419 if (p > 0)
7421 /* We can receive in place if only the last zone is not empty */
7422 for(zone=0; zone<nzone-1; zone++)
7424 if (ind->nrecv[zone] > 0)
7426 cd->bInPlace = FALSE;
7429 if (!cd->bInPlace)
7431 /* The int buffer is only required here for the cg indices */
7432 if (ind->nrecv[nzone] > comm->nalloc_int2)
7434 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
7435 srenew(comm->buf_int2,comm->nalloc_int2);
7437 /* The rvec buffer is also required for atom buffers
7438 * of size nrecv in dd_move_x and dd_move_f.
7440 i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
7441 check_vec_rvec_alloc(&comm->vbuf2,i);
7445 /* Make space for the global cg indices */
7446 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
7447 || dd->cg_nalloc == 0)
7449 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
7450 srenew(index_gl,dd->cg_nalloc);
7451 srenew(cgindex,dd->cg_nalloc+1);
7453 /* Communicate the global cg indices */
7454 if (cd->bInPlace)
7456 recv_i = index_gl + pos_cg;
7458 else
7460 recv_i = comm->buf_int2;
7462 dd_sendrecv_int(dd, dim_ind, dddirBackward,
7463 comm->buf_int, nsend,
7464 recv_i, ind->nrecv[nzone]);
7466 /* Make space for cg_cm */
7467 if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
7469 dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
7470 cg_cm = fr->cg_cm;
7472 /* Communicate cg_cm */
7473 if (cd->bInPlace)
7475 recv_vr = cg_cm + pos_cg;
7477 else
7479 recv_vr = comm->vbuf2.v;
7481 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
7482 comm->vbuf.v, nsend,
7483 recv_vr, ind->nrecv[nzone]);
7485 /* Make the charge group index */
7486 if (cd->bInPlace)
7488 zone = (p == 0 ? 0 : nzone - 1);
7489 while (zone < nzone)
7491 for(cg=0; cg<ind->nrecv[zone]; cg++)
7493 cg_gl = index_gl[pos_cg];
7494 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
7495 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
7496 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
7497 if (bBondComm)
7499 /* Update the charge group presence,
7500 * so we can use it in the next pass of the loop.
7502 comm->bLocalCG[cg_gl] = TRUE;
7504 pos_cg++;
7506 if (p == 0)
7508 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
7510 zone++;
7511 zone_cg_range[nzone+zone] = pos_cg;
7514 else
7516 /* This part of the code is never executed with bBondComm. */
7517 merge_cg_buffers(nzone,cd,p,zone_cg_range,
7518 index_gl,recv_i,cg_cm,recv_vr,
7519 cgindex,fr->cginfo_mb,fr->cginfo);
7520 pos_cg += ind->nrecv[nzone];
7522 nat_tot += ind->nrecv[nzone+1];
7524 if (!cd->bInPlace)
7526 /* Store the atom block for easy copying of communication buffers */
7527 make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
7529 nzone += nzone;
7531 dd->index_gl = index_gl;
7532 dd->cgindex = cgindex;
7534 dd->ncg_tot = zone_cg_range[zones->n];
7535 dd->nat_tot = nat_tot;
7536 comm->nat[ddnatHOME] = dd->nat_home;
7537 for(i=ddnatZONE; i<ddnatNR; i++)
7539 comm->nat[i] = dd->nat_tot;
7542 if (!bBondComm)
7544 /* We don't need to update cginfo, since that was alrady done above.
7545 * So we pass NULL for the forcerec.
7547 dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
7548 NULL,comm->bLocalCG);
7551 if (debug)
7553 fprintf(debug,"Finished setting up DD communication, zones:");
7554 for(c=0; c<zones->n; c++)
7556 fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
7558 fprintf(debug,"\n");
7562 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
7564 int c;
7566 for(c=0; c<zones->nizone; c++)
7568 zones->izone[c].cg1 = zones->cg_range[c+1];
7569 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
7570 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
7574 static int comp_cgsort(const void *a,const void *b)
7576 int comp;
7578 gmx_cgsort_t *cga,*cgb;
7579 cga = (gmx_cgsort_t *)a;
7580 cgb = (gmx_cgsort_t *)b;
7582 comp = cga->nsc - cgb->nsc;
7583 if (comp == 0)
7585 comp = cga->ind_gl - cgb->ind_gl;
7588 return comp;
7591 static void order_int_cg(int n,gmx_cgsort_t *sort,
7592 int *a,int *buf)
7594 int i;
7596 /* Order the data */
7597 for(i=0; i<n; i++)
7599 buf[i] = a[sort[i].ind];
7602 /* Copy back to the original array */
7603 for(i=0; i<n; i++)
7605 a[i] = buf[i];
7609 static void order_vec_cg(int n,gmx_cgsort_t *sort,
7610 rvec *v,rvec *buf)
7612 int i;
7614 /* Order the data */
7615 for(i=0; i<n; i++)
7617 copy_rvec(v[sort[i].ind],buf[i]);
7620 /* Copy back to the original array */
7621 for(i=0; i<n; i++)
7623 copy_rvec(buf[i],v[i]);
7627 static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
7628 rvec *v,rvec *buf)
7630 int a,atot,cg,cg0,cg1,i;
7632 /* Order the data */
7633 a = 0;
7634 for(cg=0; cg<ncg; cg++)
7636 cg0 = cgindex[sort[cg].ind];
7637 cg1 = cgindex[sort[cg].ind+1];
7638 for(i=cg0; i<cg1; i++)
7640 copy_rvec(v[i],buf[a]);
7641 a++;
7644 atot = a;
7646 /* Copy back to the original array */
7647 for(a=0; a<atot; a++)
7649 copy_rvec(buf[a],v[a]);
7653 static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
7654 int nsort_new,gmx_cgsort_t *sort_new,
7655 gmx_cgsort_t *sort1)
7657 int i1,i2,i_new;
7659 /* The new indices are not very ordered, so we qsort them */
7660 qsort(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
7662 /* sort2 is already ordered, so now we can merge the two arrays */
7663 i1 = 0;
7664 i2 = 0;
7665 i_new = 0;
7666 while(i2 < nsort2 || i_new < nsort_new)
7668 if (i2 == nsort2)
7670 sort1[i1++] = sort_new[i_new++];
7672 else if (i_new == nsort_new)
7674 sort1[i1++] = sort2[i2++];
7676 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
7677 (sort2[i2].nsc == sort_new[i_new].nsc &&
7678 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
7680 sort1[i1++] = sort2[i2++];
7682 else
7684 sort1[i1++] = sort_new[i_new++];
7689 static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
7690 rvec *cgcm,t_forcerec *fr,t_state *state,
7691 int ncg_home_old)
7693 gmx_domdec_sort_t *sort;
7694 gmx_cgsort_t *cgsort,*sort_i;
7695 int ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
7696 rvec *vbuf;
7698 sort = dd->comm->sort;
7700 if (dd->ncg_home > sort->sort_nalloc)
7702 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
7703 srenew(sort->sort1,sort->sort_nalloc);
7704 srenew(sort->sort2,sort->sort_nalloc);
7707 if (ncg_home_old >= 0)
7709 /* The charge groups that remained in the same ns grid cell
7710 * are completely ordered. So we can sort efficiently by sorting
7711 * the charge groups that did move into the stationary list.
7713 ncg_new = 0;
7714 nsort2 = 0;
7715 nsort_new = 0;
7716 for(i=0; i<dd->ncg_home; i++)
7718 /* Check if this cg did not move to another node */
7719 cell_index = fr->ns.grid->cell_index[i];
7720 if (cell_index != 4*fr->ns.grid->ncells)
7722 if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
7724 /* This cg is new on this node or moved ns grid cell */
7725 if (nsort_new >= sort->sort_new_nalloc)
7727 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
7728 srenew(sort->sort_new,sort->sort_new_nalloc);
7730 sort_i = &(sort->sort_new[nsort_new++]);
7732 else
7734 /* This cg did not move */
7735 sort_i = &(sort->sort2[nsort2++]);
7737 /* Sort on the ns grid cell indices
7738 * and the global topology index
7740 sort_i->nsc = cell_index;
7741 sort_i->ind_gl = dd->index_gl[i];
7742 sort_i->ind = i;
7743 ncg_new++;
7746 if (debug)
7748 fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
7749 nsort2,nsort_new);
7751 /* Sort efficiently */
7752 ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
7754 else
7756 cgsort = sort->sort1;
7757 ncg_new = 0;
7758 for(i=0; i<dd->ncg_home; i++)
7760 /* Sort on the ns grid cell indices
7761 * and the global topology index
7763 cgsort[i].nsc = fr->ns.grid->cell_index[i];
7764 cgsort[i].ind_gl = dd->index_gl[i];
7765 cgsort[i].ind = i;
7766 if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
7768 ncg_new++;
7771 if (debug)
7773 fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
7775 /* Determine the order of the charge groups using qsort */
7776 qsort(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
7778 cgsort = sort->sort1;
7780 /* We alloc with the old size, since cgindex is still old */
7781 check_vec_rvec_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
7782 vbuf = dd->comm->vbuf.v;
7784 /* Remove the charge groups which are no longer at home here */
7785 dd->ncg_home = ncg_new;
7787 /* Reorder the state */
7788 for(i=estX; i<estNR; i++)
7790 if (state->flags & (1<<i))
7792 switch (i)
7794 case estX:
7795 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
7796 break;
7797 case estV:
7798 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
7799 break;
7800 case estSDX:
7801 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
7802 break;
7803 case estCGP:
7804 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
7805 break;
7806 case estLD_RNG:
7807 case estLD_RNGI:
7808 case estDISRE_INITF:
7809 case estDISRE_RM3TAV:
7810 case estORIRE_INITF:
7811 case estORIRE_DTAV:
7812 /* No ordering required */
7813 break;
7814 default:
7815 gmx_incons("Unknown state entry encountered in dd_sort_state");
7816 break;
7820 /* Reorder cgcm */
7821 order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
7823 if (dd->ncg_home+1 > sort->ibuf_nalloc)
7825 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
7826 srenew(sort->ibuf,sort->ibuf_nalloc);
7828 ibuf = sort->ibuf;
7829 /* Reorder the global cg index */
7830 order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
7831 /* Reorder the cginfo */
7832 order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
7833 /* Rebuild the local cg index */
7834 ibuf[0] = 0;
7835 for(i=0; i<dd->ncg_home; i++)
7837 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
7838 ibuf[i+1] = ibuf[i] + cgsize;
7840 for(i=0; i<dd->ncg_home+1; i++)
7842 dd->cgindex[i] = ibuf[i];
7844 /* Set the home atom number */
7845 dd->nat_home = dd->cgindex[dd->ncg_home];
7847 /* Copy the sorted ns cell indices back to the ns grid struct */
7848 for(i=0; i<dd->ncg_home; i++)
7850 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
7852 fr->ns.grid->nr = dd->ncg_home;
7855 static void add_dd_statistics(gmx_domdec_t *dd)
7857 gmx_domdec_comm_t *comm;
7858 int ddnat;
7860 comm = dd->comm;
7862 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
7864 comm->sum_nat[ddnat-ddnatZONE] +=
7865 comm->nat[ddnat] - comm->nat[ddnat-1];
7867 comm->ndecomp++;
7870 void reset_dd_statistics_counters(gmx_domdec_t *dd)
7872 gmx_domdec_comm_t *comm;
7873 int ddnat;
7875 comm = dd->comm;
7877 /* Reset all the statistics and counters for total run counting */
7878 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
7880 comm->sum_nat[ddnat-ddnatZONE] = 0;
7882 comm->ndecomp = 0;
7883 comm->nload = 0;
7884 comm->load_step = 0;
7885 comm->load_sum = 0;
7886 comm->load_max = 0;
7887 clear_ivec(comm->load_lim);
7888 comm->load_mdf = 0;
7889 comm->load_pme = 0;
7892 void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
7894 gmx_domdec_comm_t *comm;
7895 int ddnat;
7896 double av;
7898 comm = cr->dd->comm;
7900 gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
7902 if (fplog == NULL)
7904 return;
7907 fprintf(fplog,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
7909 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
7911 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
7912 switch(ddnat)
7914 case ddnatZONE:
7915 fprintf(fplog,
7916 " av. #atoms communicated per step for force: %d x %.1f\n",
7917 2,av);
7918 break;
7919 case ddnatVSITE:
7920 if (cr->dd->vsite_comm)
7922 fprintf(fplog,
7923 " av. #atoms communicated per step for vsites: %d x %.1f\n",
7924 (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
7925 av);
7927 break;
7928 case ddnatCON:
7929 if (cr->dd->constraint_comm)
7931 fprintf(fplog,
7932 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
7933 1 + ir->nLincsIter,av);
7935 break;
7936 default:
7937 gmx_incons(" Unknown type for DD statistics");
7940 fprintf(fplog,"\n");
7942 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
7944 print_dd_load_av(fplog,cr->dd);
7948 void dd_partition_system(FILE *fplog,
7949 gmx_step_t step,
7950 t_commrec *cr,
7951 bool bMasterState,
7952 int nstglobalcomm,
7953 t_state *state_global,
7954 gmx_mtop_t *top_global,
7955 t_inputrec *ir,
7956 t_state *state_local,
7957 rvec **f,
7958 t_mdatoms *mdatoms,
7959 gmx_localtop_t *top_local,
7960 t_forcerec *fr,
7961 gmx_vsite_t *vsite,
7962 gmx_shellfc_t shellfc,
7963 gmx_constr_t constr,
7964 t_nrnb *nrnb,
7965 gmx_wallcycle_t wcycle,
7966 bool bVerbose)
7968 gmx_domdec_t *dd;
7969 gmx_domdec_comm_t *comm;
7970 gmx_ddbox_t ddbox;
7971 t_block *cgs_gl;
7972 gmx_step_t step_pcoupl;
7973 rvec cell_ns_x0,cell_ns_x1;
7974 int i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
7975 bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
7976 bool bRedist,bSortCG,bResortAll;
7977 ivec ncells_old,np;
7978 real grid_density;
7979 char sbuf[22];
7981 dd = cr->dd;
7982 comm = dd->comm;
7984 bBoxChanged = (bMasterState || DEFORM(*ir));
7985 if (ir->epc != epcNO)
7987 /* With nstcalcenery > 1 pressure coupling happens.
7988 * one step after calculating the energies.
7989 * Box scaling happens at the end of the MD step,
7990 * after the DD partitioning.
7991 * We therefore have to do DLB in the first partitioning
7992 * after an MD step where P-coupling occured.
7993 * We need to determine the last step in which p-coupling occurred.
7995 n = ir->nstcalcenergy;
7996 if (n == 1)
7998 step_pcoupl = step - 1;
8000 else
8002 step_pcoupl = ((step - 1)/n)*n + 1;
8004 if (step_pcoupl >= comm->partition_step)
8006 bBoxChanged = TRUE;
8010 bNStGlobalComm = (step >= comm->partition_step + nstglobalcomm);
8012 if (!comm->bDynLoadBal)
8014 bDoDLB = FALSE;
8016 else
8018 /* Should we do dynamic load balacing this step?
8019 * Since it requires (possibly expensive) global communication,
8020 * we might want to do DLB less frequently.
8022 if (bBoxChanged || ir->epc != epcNO)
8024 bDoDLB = bBoxChanged;
8026 else
8028 bDoDLB = bNStGlobalComm;
8032 /* Check if we have recorded loads on the nodes */
8033 if (comm->bRecordLoad && dd_load_count(comm))
8035 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
8037 /* Check if we should use DLB at the second partitioning
8038 * and every 100 partitionings,
8039 * so the extra communication cost is negligible.
8041 n = max(100,nstglobalcomm);
8042 bCheckDLB = (comm->n_load_collect == 0 ||
8043 comm->n_load_have % n == n-1);
8045 else
8047 bCheckDLB = FALSE;
8050 /* Print load every nstlog, first and last step to the log file */
8051 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
8052 comm->n_load_collect == 0 ||
8053 (step + ir->nstlist > ir->init_step + ir->nsteps));
8055 /* Avoid extra communication due to verbose screen output
8056 * when nstglobalcomm is set.
8058 if (bDoDLB || bLogLoad || bCheckDLB ||
8059 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
8061 get_load_distribution(dd,wcycle);
8062 if (DDMASTER(dd))
8064 if (bLogLoad)
8066 dd_print_load(fplog,dd,step-1);
8068 if (bVerbose)
8070 dd_print_load_verbose(dd);
8073 comm->n_load_collect++;
8075 if (bCheckDLB) {
8076 /* Since the timings are node dependent, the master decides */
8077 if (DDMASTER(dd))
8079 bTurnOnDLB =
8080 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
8081 if (debug)
8083 fprintf(debug,"step %s, imb loss %f\n",
8084 gmx_step_str(step,sbuf),
8085 dd_force_imb_perf_loss(dd));
8088 dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
8089 if (bTurnOnDLB)
8091 turn_on_dlb(fplog,cr,step);
8092 bDoDLB = TRUE;
8096 comm->n_load_have++;
8099 cgs_gl = &comm->cgs_gl;
8101 bRedist = FALSE;
8102 if (bMasterState)
8104 /* Clear the old state */
8105 clear_dd_indices(dd,0,0);
8107 set_ddbox(dd,bMasterState,cr,ir,state_global->box,
8108 TRUE,cgs_gl,state_global->x,&ddbox);
8110 get_cg_distribution(fplog,step,dd,cgs_gl,
8111 state_global->box,&ddbox,state_global->x);
8113 dd_distribute_state(dd,cgs_gl,
8114 state_global,state_local,f);
8116 dd_make_local_cgs(dd,&top_local->cgs);
8118 if (dd->ncg_home > fr->cg_nalloc)
8120 dd_realloc_fr_cg(fr,dd->ncg_home);
8122 calc_cgcm(fplog,0,dd->ncg_home,
8123 &top_local->cgs,state_local->x,fr->cg_cm);
8125 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
8127 dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
8129 cg0 = 0;
8131 else if (state_local->ddp_count != dd->ddp_count)
8133 if (state_local->ddp_count > dd->ddp_count)
8135 gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
8138 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
8140 gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
8143 /* Clear the old state */
8144 clear_dd_indices(dd,0,0);
8146 /* Build the new indices */
8147 rebuild_cgindex(dd,cgs_gl->index,state_local);
8148 make_dd_indices(dd,cgs_gl->index,0);
8150 /* Redetermine the cg COMs */
8151 calc_cgcm(fplog,0,dd->ncg_home,
8152 &top_local->cgs,state_local->x,fr->cg_cm);
8154 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
8156 dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
8158 set_ddbox(dd,bMasterState,cr,ir,state_local->box,
8159 TRUE,&top_local->cgs,state_local->x,&ddbox);
8161 bRedist = comm->bDynLoadBal;
8163 else
8165 /* We have the full state, only redistribute the cgs */
8167 /* Clear the non-home indices */
8168 clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
8170 /* Avoid global communication for dim's without pbc and -gcom */
8171 if (!bNStGlobalComm)
8173 copy_rvec(comm->box0 ,ddbox.box0 );
8174 copy_rvec(comm->box_size,ddbox.box_size);
8176 set_ddbox(dd,bMasterState,cr,ir,state_local->box,
8177 bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
8179 bBoxChanged = TRUE;
8180 bRedist = TRUE;
8182 /* For dim's without pbc and -gcom */
8183 copy_rvec(ddbox.box0 ,comm->box0 );
8184 copy_rvec(ddbox.box_size,comm->box_size);
8186 set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
8187 step,wcycle);
8189 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
8191 write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
8194 /* Check if we should sort the charge groups */
8195 if (comm->nstSortCG > 0)
8197 bSortCG = (bMasterState ||
8198 (bRedist && (step % comm->nstSortCG == 0)));
8200 else
8202 bSortCG = FALSE;
8205 ncg_home_old = dd->ncg_home;
8207 if (bRedist)
8209 cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
8210 state_local,f,fr,mdatoms,
8211 !bSortCG,nrnb);
8214 get_nsgrid_boundaries(fr->ns.grid,dd,
8215 state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
8216 dd->ncg_home,fr->cg_cm,
8217 cell_ns_x0,cell_ns_x1,&grid_density);
8219 if (bBoxChanged)
8221 comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
8224 copy_ivec(fr->ns.grid->n,ncells_old);
8225 grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
8226 state_local->box,cell_ns_x0,cell_ns_x1,
8227 fr->rlistlong,grid_density);
8228 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8229 copy_ivec(ddbox.tric_dir,comm->tric_dir);
8231 if (bSortCG)
8233 /* Sort the state on charge group position.
8234 * This enables exact restarts from this step.
8235 * It also improves performance by about 15% with larger numbers
8236 * of atoms per node.
8239 /* Fill the ns grid with the home cell,
8240 * so we can sort with the indices.
8242 set_zones_ncg_home(dd);
8243 fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
8244 0,dd->ncg_home,fr->cg_cm);
8246 /* Check if we can user the old order and ns grid cell indices
8247 * of the charge groups to sort the charge groups efficiently.
8249 bResortAll = (bMasterState ||
8250 fr->ns.grid->n[XX] != ncells_old[XX] ||
8251 fr->ns.grid->n[YY] != ncells_old[YY] ||
8252 fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
8254 if (debug)
8256 fprintf(debug,"Step %s, sorting the %d home charge groups\n",
8257 gmx_step_str(step,sbuf),dd->ncg_home);
8259 dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
8260 bResortAll ? -1 : ncg_home_old);
8261 /* Rebuild all the indices */
8262 cg0 = 0;
8263 ga2la_clear(dd->ga2la);
8266 /* Setup up the communication and communicate the coordinates */
8267 setup_dd_communication(dd,state_local->box,&ddbox,fr);
8269 /* Set the indices */
8270 make_dd_indices(dd,cgs_gl->index,cg0);
8272 /* Set the charge group boundaries for neighbor searching */
8273 set_cg_boundaries(&comm->zones);
8276 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8277 -1,state_local->x,state_local->box);
8280 /* Extract a local topology from the global topology */
8281 for(i=0; i<dd->ndim; i++)
8283 np[dd->dim[i]] = comm->cd[i].np;
8285 dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
8286 comm->cellsize_min,np,
8287 fr,vsite,top_global,top_local);
8289 /* Set up the special atom communication */
8290 n = comm->nat[ddnatZONE];
8291 for(i=ddnatZONE+1; i<ddnatNR; i++)
8293 switch(i)
8295 case ddnatVSITE:
8296 if (vsite && vsite->n_intercg_vsite)
8298 n = dd_make_local_vsites(dd,n,top_local->idef.il);
8300 break;
8301 case ddnatCON:
8302 if (dd->bInterCGcons)
8304 /* Only for inter-cg constraints we need special code */
8305 n = dd_make_local_constraints(dd,n,top_global,
8306 constr,ir->nProjOrder,
8307 &top_local->idef.il[F_CONSTR]);
8309 break;
8310 default:
8311 gmx_incons("Unknown special atom type setup");
8313 comm->nat[i] = n;
8316 /* Make space for the extra coordinates for virtual site
8317 * or constraint communication.
8319 state_local->natoms = comm->nat[ddnatNR-1];
8320 if (state_local->natoms > state_local->nalloc)
8322 dd_realloc_state(state_local,f,state_local->natoms);
8325 if (fr->bF_NoVirSum)
8327 if (vsite && vsite->n_intercg_vsite)
8329 nat_f_novirsum = comm->nat[ddnatVSITE];
8331 else
8333 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
8335 nat_f_novirsum = dd->nat_tot;
8337 else
8339 nat_f_novirsum = dd->nat_home;
8343 else
8345 nat_f_novirsum = 0;
8348 /* Set the number of atoms required for the force calculation */
8349 forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,dd->nat_tot,nat_f_novirsum);
8351 /* We make the all mdatoms up to nat_tot_con.
8352 * We could save some work by only setting invmass
8353 * between nat_tot and nat_tot_con.
8355 /* This call also sets the new number of home particles to dd->nat_home */
8356 atoms2md(top_global,ir,
8357 comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
8359 if (shellfc)
8361 /* Make the local shell stuff, currently no communication is done */
8362 make_local_shells(cr,mdatoms,shellfc);
8365 if (ir->implicit_solvent)
8367 make_local_gb(cr,fr->born,ir->gb_algorithm);
8370 if (!(cr->duty & DUTY_PME))
8372 /* Send the charges to our PME only node */
8373 gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
8374 mdatoms->chargeA,mdatoms->chargeB,
8375 comm->ddpme[0].maxshift,comm->ddpme[1].maxshift);
8378 if (constr)
8380 set_constraints(constr,top_local,ir,mdatoms,cr);
8383 if (ir->ePull != epullNO)
8385 /* Update the local pull groups */
8386 dd_make_local_pull_groups(dd,ir->pull,mdatoms);
8389 if (ir->bRot)
8391 /* Update the local rotation groups */
8392 dd_make_local_rotation_groups(dd,ir->rot,mdatoms);
8396 add_dd_statistics(dd);
8398 /* Make sure we only count the cycles for this DD partitioning */
8399 clear_dd_cycle_counts(dd);
8401 /* Because the order of the atoms might have changed since
8402 * the last vsite construction, we need to communicate the constructing
8403 * atom coordinates again (for spreading the forces this MD step).
8405 dd_move_x_vsites(dd,state_local->box,state_local->x);
8407 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
8409 dd_move_x(dd,state_local->box,state_local->x);
8410 write_dd_pdb("dd_dump",step,"dump",top_global,cr,
8411 -1,state_local->x,state_local->box);
8414 /* Store the partitioning step */
8415 comm->partition_step = step;
8417 /* Increase the DD partitioning counter */
8418 dd->ddp_count++;
8419 /* The state currently matches this DD partitioning count, store it */
8420 state_local->ddp_count = dd->ddp_count;
8421 if (bMasterState)
8423 /* The DD master node knows the complete cg distribution,
8424 * store the count so we can possibly skip the cg info communication.
8426 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
8429 if (comm->DD_debug > 0)
8431 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8432 check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
8433 "after partitioning");