fixed PME communication range (no silent errors)
[gromacs.git] / src / mdlib / domdec.c
blob603475699e93f7bc7b679625cc27ffd09b6443af
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
23 #include <stdio.h>
24 #include <time.h>
25 #include <math.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include "typedefs.h"
29 #include "smalloc.h"
30 #include "vec.h"
31 #include "domdec.h"
32 #include "domdec_network.h"
33 #include "nrnb.h"
34 #include "pbc.h"
35 #include "chargegroup.h"
36 #include "constr.h"
37 #include "mdatoms.h"
38 #include "names.h"
39 #include "pdbio.h"
40 #include "futil.h"
41 #include "force.h"
42 #include "pme.h"
43 #include "pull.h"
44 #include "gmx_wallcycle.h"
45 #include "mdrun.h"
46 #include "nsgrid.h"
47 #include "shellfc.h"
48 #include "mtop_util.h"
49 #include "gmxfio.h"
50 #include "gmx_ga2la.h"
51 #include "gmx_sort.h"
53 #ifdef GMX_LIB_MPI
54 #include <mpi.h>
55 #endif
56 #ifdef GMX_THREADS
57 #include "tmpi.h"
58 #endif
60 #define DDRANK(dd,rank) (rank)
61 #define DDMASTERRANK(dd) (dd->masterrank)
63 typedef struct gmx_domdec_master
65 /* The cell boundaries */
66 real **cell_x;
67 /* The global charge group division */
68 int *ncg; /* Number of home charge groups for each node */
69 int *index; /* Index of nnodes+1 into cg */
70 int *cg; /* Global charge group index */
71 int *nat; /* Number of home atoms for each node. */
72 int *ibuf; /* Buffer for communication */
73 rvec *vbuf; /* Buffer for state scattering and gathering */
74 } gmx_domdec_master_t;
76 typedef struct
78 /* The numbers of charge groups to send and receive for each cell
79 * that requires communication, the last entry contains the total
80 * number of atoms that needs to be communicated.
82 int nsend[DD_MAXIZONE+2];
83 int nrecv[DD_MAXIZONE+2];
84 /* The charge groups to send */
85 int *index;
86 int nalloc;
87 /* The atom range for non-in-place communication */
88 int cell2at0[DD_MAXIZONE];
89 int cell2at1[DD_MAXIZONE];
90 } gmx_domdec_ind_t;
92 typedef struct
94 int np; /* Number of grid pulses in this dimension */
95 int np_dlb; /* For dlb, for use with edlbAUTO */
96 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
97 int np_nalloc;
98 bool bInPlace; /* Can we communicate in place? */
99 } gmx_domdec_comm_dim_t;
101 typedef struct
103 bool *bCellMin; /* Temp. var.: is this cell size at the limit */
104 real *cell_f; /* State var.: cell boundaries, box relative */
105 real *old_cell_f; /* Temp. var.: old cell size */
106 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
107 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
108 real *bound_min; /* Temp. var.: lower limit for cell boundary */
109 real *bound_max; /* Temp. var.: upper limit for cell boundary */
110 bool bLimited; /* State var.: is DLB limited in this dim and row */
111 real *buf_ncd; /* Temp. var. */
112 } gmx_domdec_root_t;
114 #define DD_NLOAD_MAX 9
116 /* Here floats are accurate enough, since these variables
117 * only influence the load balancing, not the actual MD results.
119 typedef struct
121 int nload;
122 float *load;
123 float sum;
124 float max;
125 float sum_m;
126 float cvol_min;
127 float mdf;
128 float pme;
129 int flags;
130 } gmx_domdec_load_t;
132 typedef struct
134 int nsc;
135 int ind_gl;
136 int ind;
137 } gmx_cgsort_t;
139 typedef struct
141 gmx_cgsort_t *sort1,*sort2;
142 int sort_nalloc;
143 gmx_cgsort_t *sort_new;
144 int sort_new_nalloc;
145 int *ibuf;
146 int ibuf_nalloc;
147 } gmx_domdec_sort_t;
149 typedef struct
151 rvec *v;
152 int nalloc;
153 } vec_rvec_t;
155 /* This enum determines the order of the coordinates.
156 * ddnatHOME and ddnatZONE should be first and second,
157 * the others can be ordered as wanted.
159 enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
161 enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
162 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
164 typedef struct
166 int dim; /* The dimension */
167 bool dim_match;/* Tells if DD and PME dims match */
168 int nslab; /* The number of PME slabs in this dimension */
169 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
170 int *pp_min; /* The minimum pp node location, size nslab */
171 int *pp_max; /* The maximum pp node location,size nslab */
172 int maxshift; /* The maximum shift for coordinate redistribution in PME */
173 } gmx_ddpme_t;
175 typedef struct
177 real min0; /* The minimum bottom of this zone */
178 real max1; /* The maximum top of this zone */
179 real mch0; /* The maximum bottom communicaton height for this zone */
180 real mch1; /* The maximum top communicaton height for this zone */
181 real p1_0; /* The bottom value of the first cell in this zone */
182 real p1_1; /* The top value of the first cell in this zone */
183 } gmx_ddzone_t;
185 typedef struct gmx_domdec_comm
187 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
188 * unless stated otherwise.
191 /* The number of decomposition dimensions for PME, 0: no PME */
192 int npmedecompdim;
193 /* The number of nodes doing PME (PP/PME or only PME) */
194 int npmenodes;
195 int npmenodes_major;
196 int npmenodes_minor;
197 /* The communication setup including the PME only nodes */
198 bool bCartesianPP_PME;
199 ivec ntot;
200 int cartpmedim;
201 int *pmenodes; /* size npmenodes */
202 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
203 * but with bCartesianPP_PME */
204 gmx_ddpme_t ddpme[2];
206 /* The DD particle-particle nodes only */
207 bool bCartesianPP;
208 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
210 /* The global charge groups */
211 t_block cgs_gl;
213 /* Should we sort the cgs */
214 int nstSortCG;
215 gmx_domdec_sort_t *sort;
217 /* Are there bonded and multi-body interactions between charge groups? */
218 bool bInterCGBondeds;
219 bool bInterCGMultiBody;
221 /* Data for the optional bonded interaction atom communication range */
222 bool bBondComm;
223 t_blocka *cglink;
224 char *bLocalCG;
226 /* The DLB option */
227 int eDLB;
228 /* Are we actually using DLB? */
229 bool bDynLoadBal;
231 /* Cell sizes for static load balancing, first index cartesian */
232 real **slb_frac;
234 /* The width of the communicated boundaries */
235 real cutoff_mbody;
236 real cutoff;
237 /* The minimum cell size (including triclinic correction) */
238 rvec cellsize_min;
239 /* For dlb, for use with edlbAUTO */
240 rvec cellsize_min_dlb;
241 /* The lower limit for the DD cell size with DLB */
242 real cellsize_limit;
243 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
244 bool bVacDLBNoLimit;
246 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
247 ivec tric_dir;
248 /* box0 and box_size are required with dim's without pbc and -gcom */
249 rvec box0;
250 rvec box_size;
252 /* The cell boundaries */
253 rvec cell_x0;
254 rvec cell_x1;
256 /* The old location of the cell boundaries, to check cg displacements */
257 rvec old_cell_x0;
258 rvec old_cell_x1;
260 /* The communication setup and charge group boundaries for the zones */
261 gmx_domdec_zones_t zones;
263 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
264 * cell boundaries of neighboring cells for dynamic load balancing.
266 gmx_ddzone_t zone_d1[2];
267 gmx_ddzone_t zone_d2[2][2];
269 /* The coordinate/force communication setup and indices */
270 gmx_domdec_comm_dim_t cd[DIM];
271 /* The maximum number of cells to communicate with in one dimension */
272 int maxpulse;
274 /* Which cg distribution is stored on the master node */
275 int master_cg_ddp_count;
277 /* The number of cg's received from the direct neighbors */
278 int zone_ncg1[DD_MAXZONE];
280 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
281 int nat[ddnatNR];
283 /* Communication buffer for general use */
284 int *buf_int;
285 int nalloc_int;
287 /* Communication buffer for general use */
288 vec_rvec_t vbuf;
290 /* Communication buffers only used with multiple grid pulses */
291 int *buf_int2;
292 int nalloc_int2;
293 vec_rvec_t vbuf2;
295 /* Communication buffers for local redistribution */
296 int **cggl_flag;
297 int cggl_flag_nalloc[DIM*2];
298 rvec **cgcm_state;
299 int cgcm_state_nalloc[DIM*2];
301 /* Cell sizes for dynamic load balancing */
302 gmx_domdec_root_t **root;
303 real *cell_f_row;
304 real cell_f0[DIM];
305 real cell_f1[DIM];
306 real cell_f_max0[DIM];
307 real cell_f_min1[DIM];
309 /* Stuff for load communication */
310 bool bRecordLoad;
311 gmx_domdec_load_t *load;
312 #ifdef GMX_MPI
313 MPI_Comm *mpi_comm_load;
314 #endif
315 /* Cycle counters */
316 float cycl[ddCyclNr];
317 int cycl_n[ddCyclNr];
318 float cycl_max[ddCyclNr];
319 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
320 int eFlop;
321 double flop;
322 int flop_n;
323 /* Have often have did we have load measurements */
324 int n_load_have;
325 /* Have often have we collected the load measurements */
326 int n_load_collect;
328 /* Statistics */
329 double sum_nat[ddnatNR-ddnatZONE];
330 int ndecomp;
331 int nload;
332 double load_step;
333 double load_sum;
334 double load_max;
335 ivec load_lim;
336 double load_mdf;
337 double load_pme;
339 /* The last partition step */
340 gmx_large_int_t partition_step;
342 /* Debugging */
343 int nstDDDump;
344 int nstDDDumpGrid;
345 int DD_debug;
346 } gmx_domdec_comm_t;
348 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
349 #define DD_CGIBS 2
351 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
352 #define DD_FLAG_NRCG 65535
353 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
354 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
356 /* Zone permutation required to obtain consecutive charge groups
357 * for neighbor searching.
359 static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
361 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
362 * components see only j zones with that component 0.
365 /* The DD zone order */
366 static const ivec dd_zo[DD_MAXZONE] =
367 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
369 /* The 3D setup */
370 #define dd_z3n 8
371 #define dd_zp3n 4
372 static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
374 /* The 2D setup */
375 #define dd_z2n 4
376 #define dd_zp2n 2
377 static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
379 /* The 1D setup */
380 #define dd_z1n 2
381 #define dd_zp1n 1
382 static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
384 /* Factors used to avoid problems due to rounding issues */
385 #define DD_CELL_MARGIN 1.0001
386 #define DD_CELL_MARGIN2 1.00005
387 /* Factor to account for pressure scaling during nstlist steps */
388 #define DD_PRES_SCALE_MARGIN 1.02
390 /* Allowed performance loss before we DLB or warn */
391 #define DD_PERF_LOSS 0.05
393 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
395 /* Use separate MPI send and receive commands
396 * when nnodes <= GMX_DD_NNODES_SENDRECV.
397 * This saves memory (and some copying for small nnodes).
398 * For high parallelization scatter and gather calls are used.
400 #define GMX_DD_NNODES_SENDRECV 4
404 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
406 static void index2xyz(ivec nc,int ind,ivec xyz)
408 xyz[XX] = ind % nc[XX];
409 xyz[YY] = (ind / nc[XX]) % nc[YY];
410 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
414 /* This order is required to minimize the coordinate communication in PME
415 * which uses decomposition in the x direction.
417 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
419 static void ddindex2xyz(ivec nc,int ind,ivec xyz)
421 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
422 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
423 xyz[ZZ] = ind % nc[ZZ];
426 static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
428 int ddindex;
429 int ddnodeid=-1;
431 ddindex = dd_index(dd->nc,c);
432 if (dd->comm->bCartesianPP_PME)
434 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
436 else if (dd->comm->bCartesianPP)
438 #ifdef GMX_MPI
439 MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
440 #endif
442 else
444 ddnodeid = ddindex;
447 return ddnodeid;
450 static bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
452 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
455 int ddglatnr(gmx_domdec_t *dd,int i)
457 int atnr;
459 if (dd == NULL)
461 atnr = i + 1;
463 else
465 if (i >= dd->comm->nat[ddnatNR-1])
467 gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
469 atnr = dd->gatindex[i] + 1;
472 return atnr;
475 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
477 return &dd->comm->cgs_gl;
480 static void vec_rvec_init(vec_rvec_t *v)
482 v->nalloc = 0;
483 v->v = NULL;
486 static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
488 if (n > v->nalloc)
490 v->nalloc = over_alloc_dd(n);
491 srenew(v->v,v->nalloc);
495 void dd_store_state(gmx_domdec_t *dd,t_state *state)
497 int i;
499 if (state->ddp_count != dd->ddp_count)
501 gmx_incons("The state does not the domain decomposition state");
504 state->ncg_gl = dd->ncg_home;
505 if (state->ncg_gl > state->cg_gl_nalloc)
507 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
508 srenew(state->cg_gl,state->cg_gl_nalloc);
510 for(i=0; i<state->ncg_gl; i++)
512 state->cg_gl[i] = dd->index_gl[i];
515 state->ddp_count_cg_gl = dd->ddp_count;
518 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
520 return &dd->comm->zones;
523 void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
524 int *jcg0,int *jcg1,ivec shift0,ivec shift1)
526 gmx_domdec_zones_t *zones;
527 int izone,d,dim;
529 zones = &dd->comm->zones;
531 izone = 0;
532 while (icg >= zones->izone[izone].cg1)
534 izone++;
537 if (izone == 0)
539 *jcg0 = icg;
541 else if (izone < zones->nizone)
543 *jcg0 = zones->izone[izone].jcg0;
545 else
547 gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
548 icg,izone,zones->nizone);
551 *jcg1 = zones->izone[izone].jcg1;
553 for(d=0; d<dd->ndim; d++)
555 dim = dd->dim[d];
556 shift0[dim] = zones->izone[izone].shift0[dim];
557 shift1[dim] = zones->izone[izone].shift1[dim];
558 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
560 /* A conservative approach, this can be optimized */
561 shift0[dim] -= 1;
562 shift1[dim] += 1;
567 int dd_natoms_vsite(gmx_domdec_t *dd)
569 return dd->comm->nat[ddnatVSITE];
572 void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
574 *at_start = dd->comm->nat[ddnatCON-1];
575 *at_end = dd->comm->nat[ddnatCON];
578 void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
580 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
581 int *index,*cgindex;
582 gmx_domdec_comm_t *comm;
583 gmx_domdec_comm_dim_t *cd;
584 gmx_domdec_ind_t *ind;
585 rvec shift={0,0,0},*buf,*rbuf;
586 bool bPBC,bScrew;
588 comm = dd->comm;
590 cgindex = dd->cgindex;
592 buf = comm->vbuf.v;
594 nzone = 1;
595 nat_tot = dd->nat_home;
596 for(d=0; d<dd->ndim; d++)
598 bPBC = (dd->ci[dd->dim[d]] == 0);
599 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
600 if (bPBC)
602 copy_rvec(box[dd->dim[d]],shift);
604 cd = &comm->cd[d];
605 for(p=0; p<cd->np; p++)
607 ind = &cd->ind[p];
608 index = ind->index;
609 n = 0;
610 if (!bPBC)
612 for(i=0; i<ind->nsend[nzone]; i++)
614 at0 = cgindex[index[i]];
615 at1 = cgindex[index[i]+1];
616 for(j=at0; j<at1; j++)
618 copy_rvec(x[j],buf[n]);
619 n++;
623 else if (!bScrew)
625 for(i=0; i<ind->nsend[nzone]; i++)
627 at0 = cgindex[index[i]];
628 at1 = cgindex[index[i]+1];
629 for(j=at0; j<at1; j++)
631 /* We need to shift the coordinates */
632 rvec_add(x[j],shift,buf[n]);
633 n++;
637 else
639 for(i=0; i<ind->nsend[nzone]; i++)
641 at0 = cgindex[index[i]];
642 at1 = cgindex[index[i]+1];
643 for(j=at0; j<at1; j++)
645 /* Shift x */
646 buf[n][XX] = x[j][XX] + shift[XX];
647 /* Rotate y and z.
648 * This operation requires a special shift force
649 * treatment, which is performed in calc_vir.
651 buf[n][YY] = box[YY][YY] - x[j][YY];
652 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
653 n++;
658 if (cd->bInPlace)
660 rbuf = x + nat_tot;
662 else
664 rbuf = comm->vbuf2.v;
666 /* Send and receive the coordinates */
667 dd_sendrecv_rvec(dd, d, dddirBackward,
668 buf, ind->nsend[nzone+1],
669 rbuf, ind->nrecv[nzone+1]);
670 if (!cd->bInPlace)
672 j = 0;
673 for(zone=0; zone<nzone; zone++)
675 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
677 copy_rvec(rbuf[j],x[i]);
678 j++;
682 nat_tot += ind->nrecv[nzone+1];
684 nzone += nzone;
688 void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
690 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
691 int *index,*cgindex;
692 gmx_domdec_comm_t *comm;
693 gmx_domdec_comm_dim_t *cd;
694 gmx_domdec_ind_t *ind;
695 rvec *buf,*sbuf;
696 ivec vis;
697 int is;
698 bool bPBC,bScrew;
700 comm = dd->comm;
702 cgindex = dd->cgindex;
704 buf = comm->vbuf.v;
706 n = 0;
707 nzone = comm->zones.n/2;
708 nat_tot = dd->nat_tot;
709 for(d=dd->ndim-1; d>=0; d--)
711 bPBC = (dd->ci[dd->dim[d]] == 0);
712 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
713 if (fshift == NULL && !bScrew)
715 bPBC = FALSE;
717 /* Determine which shift vector we need */
718 clear_ivec(vis);
719 vis[dd->dim[d]] = 1;
720 is = IVEC2IS(vis);
722 cd = &comm->cd[d];
723 for(p=cd->np-1; p>=0; p--) {
724 ind = &cd->ind[p];
725 nat_tot -= ind->nrecv[nzone+1];
726 if (cd->bInPlace)
728 sbuf = f + nat_tot;
730 else
732 sbuf = comm->vbuf2.v;
733 j = 0;
734 for(zone=0; zone<nzone; zone++)
736 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
738 copy_rvec(f[i],sbuf[j]);
739 j++;
743 /* Communicate the forces */
744 dd_sendrecv_rvec(dd, d, dddirForward,
745 sbuf, ind->nrecv[nzone+1],
746 buf, ind->nsend[nzone+1]);
747 index = ind->index;
748 /* Add the received forces */
749 n = 0;
750 if (!bPBC)
752 for(i=0; i<ind->nsend[nzone]; i++)
754 at0 = cgindex[index[i]];
755 at1 = cgindex[index[i]+1];
756 for(j=at0; j<at1; j++)
758 rvec_inc(f[j],buf[n]);
759 n++;
763 else if (!bScrew)
765 for(i=0; i<ind->nsend[nzone]; i++)
767 at0 = cgindex[index[i]];
768 at1 = cgindex[index[i]+1];
769 for(j=at0; j<at1; j++)
771 rvec_inc(f[j],buf[n]);
772 /* Add this force to the shift force */
773 rvec_inc(fshift[is],buf[n]);
774 n++;
778 else
780 for(i=0; i<ind->nsend[nzone]; i++)
782 at0 = cgindex[index[i]];
783 at1 = cgindex[index[i]+1];
784 for(j=at0; j<at1; j++)
786 /* Rotate the force */
787 f[j][XX] += buf[n][XX];
788 f[j][YY] -= buf[n][YY];
789 f[j][ZZ] -= buf[n][ZZ];
790 if (fshift)
792 /* Add this force to the shift force */
793 rvec_inc(fshift[is],buf[n]);
795 n++;
800 nzone /= 2;
804 void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
806 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
807 int *index,*cgindex;
808 gmx_domdec_comm_t *comm;
809 gmx_domdec_comm_dim_t *cd;
810 gmx_domdec_ind_t *ind;
811 real *buf,*rbuf;
813 comm = dd->comm;
815 cgindex = dd->cgindex;
817 buf = &comm->vbuf.v[0][0];
819 nzone = 1;
820 nat_tot = dd->nat_home;
821 for(d=0; d<dd->ndim; d++)
823 cd = &comm->cd[d];
824 for(p=0; p<cd->np; p++)
826 ind = &cd->ind[p];
827 index = ind->index;
828 n = 0;
829 for(i=0; i<ind->nsend[nzone]; i++)
831 at0 = cgindex[index[i]];
832 at1 = cgindex[index[i]+1];
833 for(j=at0; j<at1; j++)
835 buf[n] = v[j];
836 n++;
840 if (cd->bInPlace)
842 rbuf = v + nat_tot;
844 else
846 rbuf = &comm->vbuf2.v[0][0];
848 /* Send and receive the coordinates */
849 dd_sendrecv_real(dd, d, dddirBackward,
850 buf, ind->nsend[nzone+1],
851 rbuf, ind->nrecv[nzone+1]);
852 if (!cd->bInPlace)
854 j = 0;
855 for(zone=0; zone<nzone; zone++)
857 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
859 v[i] = rbuf[j];
860 j++;
864 nat_tot += ind->nrecv[nzone+1];
866 nzone += nzone;
870 void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
872 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
873 int *index,*cgindex;
874 gmx_domdec_comm_t *comm;
875 gmx_domdec_comm_dim_t *cd;
876 gmx_domdec_ind_t *ind;
877 real *buf,*sbuf;
879 comm = dd->comm;
881 cgindex = dd->cgindex;
883 buf = &comm->vbuf.v[0][0];
885 n = 0;
886 nzone = comm->zones.n/2;
887 nat_tot = dd->nat_tot;
888 for(d=dd->ndim-1; d>=0; d--)
890 cd = &comm->cd[d];
891 for(p=cd->np-1; p>=0; p--) {
892 ind = &cd->ind[p];
893 nat_tot -= ind->nrecv[nzone+1];
894 if (cd->bInPlace)
896 sbuf = v + nat_tot;
898 else
900 sbuf = &comm->vbuf2.v[0][0];
901 j = 0;
902 for(zone=0; zone<nzone; zone++)
904 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
906 sbuf[j] = v[i];
907 j++;
911 /* Communicate the forces */
912 dd_sendrecv_real(dd, d, dddirForward,
913 sbuf, ind->nrecv[nzone+1],
914 buf, ind->nsend[nzone+1]);
915 index = ind->index;
916 /* Add the received forces */
917 n = 0;
918 for(i=0; i<ind->nsend[nzone]; i++)
920 at0 = cgindex[index[i]];
921 at1 = cgindex[index[i]+1];
922 for(j=at0; j<at1; j++)
924 v[j] += buf[n];
925 n++;
929 nzone /= 2;
933 static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
935 fprintf(fp,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
936 d,i,j,
937 zone->min0,zone->max1,
938 zone->mch0,zone->mch0,
939 zone->p1_0,zone->p1_1);
942 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
943 int ddimind,int direction,
944 gmx_ddzone_t *buf_s,int n_s,
945 gmx_ddzone_t *buf_r,int n_r)
947 rvec vbuf_s[5*2],vbuf_r[5*2];
948 int i;
950 for(i=0; i<n_s; i++)
952 vbuf_s[i*2 ][0] = buf_s[i].min0;
953 vbuf_s[i*2 ][1] = buf_s[i].max1;
954 vbuf_s[i*2 ][2] = buf_s[i].mch0;
955 vbuf_s[i*2+1][0] = buf_s[i].mch1;
956 vbuf_s[i*2+1][1] = buf_s[i].p1_0;
957 vbuf_s[i*2+1][2] = buf_s[i].p1_1;
960 dd_sendrecv_rvec(dd, ddimind, direction,
961 vbuf_s, n_s*2,
962 vbuf_r, n_r*2);
964 for(i=0; i<n_r; i++)
966 buf_r[i].min0 = vbuf_r[i*2 ][0];
967 buf_r[i].max1 = vbuf_r[i*2 ][1];
968 buf_r[i].mch0 = vbuf_r[i*2 ][2];
969 buf_r[i].mch1 = vbuf_r[i*2+1][0];
970 buf_r[i].p1_0 = vbuf_r[i*2+1][1];
971 buf_r[i].p1_1 = vbuf_r[i*2+1][2];
975 static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
976 rvec cell_ns_x0,rvec cell_ns_x1)
978 int d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
979 gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
980 rvec extr_s[2],extr_r[2];
981 rvec dh;
982 real dist_d,c=0,det;
983 gmx_domdec_comm_t *comm;
984 bool bPBC,bUse;
986 comm = dd->comm;
988 for(d=1; d<dd->ndim; d++)
990 dim = dd->dim[d];
991 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
992 zp->min0 = cell_ns_x0[dim];
993 zp->max1 = cell_ns_x1[dim];
994 zp->mch0 = cell_ns_x0[dim];
995 zp->mch1 = cell_ns_x1[dim];
996 zp->p1_0 = cell_ns_x0[dim];
997 zp->p1_1 = cell_ns_x1[dim];
1000 for(d=dd->ndim-2; d>=0; d--)
1002 dim = dd->dim[d];
1003 bPBC = (dim < ddbox->npbcdim);
1005 /* Use an rvec to store two reals */
1006 extr_s[d][0] = comm->cell_f0[d+1];
1007 extr_s[d][1] = comm->cell_f1[d+1];
1008 extr_s[d][2] = 0;
1010 pos = 0;
1011 /* Store the extremes in the backward sending buffer,
1012 * so the get updated separately from the forward communication.
1014 for(d1=d; d1<dd->ndim-1; d1++)
1016 /* We invert the order to be able to use the same loop for buf_e */
1017 buf_s[pos].min0 = extr_s[d1][1];
1018 buf_s[pos].max1 = extr_s[d1][0];
1019 buf_s[pos].mch0 = 0;
1020 buf_s[pos].mch1 = 0;
1021 /* Store the cell corner of the dimension we communicate along */
1022 buf_s[pos].p1_0 = comm->cell_x0[dim];
1023 buf_s[pos].p1_1 = 0;
1024 pos++;
1027 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1028 pos++;
1030 if (dd->ndim == 3 && d == 0)
1032 buf_s[pos] = comm->zone_d2[0][1];
1033 pos++;
1034 buf_s[pos] = comm->zone_d1[0];
1035 pos++;
1038 /* We only need to communicate the extremes
1039 * in the forward direction
1041 npulse = comm->cd[d].np;
1042 if (bPBC)
1044 /* Take the minimum to avoid double communication */
1045 npulse_min = min(npulse,dd->nc[dim]-1-npulse);
1047 else
1049 /* Without PBC we should really not communicate over
1050 * the boundaries, but implementing that complicates
1051 * the communication setup and therefore we simply
1052 * do all communication, but ignore some data.
1054 npulse_min = npulse;
1056 for(p=0; p<npulse_min; p++)
1058 /* Communicate the extremes forward */
1059 bUse = (bPBC || dd->ci[dim] > 0);
1061 dd_sendrecv_rvec(dd, d, dddirForward,
1062 extr_s+d, dd->ndim-d-1,
1063 extr_r+d, dd->ndim-d-1);
1065 if (bUse)
1067 for(d1=d; d1<dd->ndim-1; d1++)
1069 extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
1070 extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
1075 buf_size = pos;
1076 for(p=0; p<npulse; p++)
1078 /* Communicate all the zone information backward */
1079 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1081 dd_sendrecv_ddzone(dd, d, dddirBackward,
1082 buf_s, buf_size,
1083 buf_r, buf_size);
1085 clear_rvec(dh);
1086 if (p > 0)
1088 for(d1=d+1; d1<dd->ndim; d1++)
1090 /* Determine the decrease of maximum required
1091 * communication height along d1 due to the distance along d,
1092 * this avoids a lot of useless atom communication.
1094 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1096 if (ddbox->tric_dir[dim])
1098 /* c is the off-diagonal coupling between the cell planes
1099 * along directions d and d1.
1101 c = ddbox->v[dim][dd->dim[d1]][dim];
1103 else
1105 c = 0;
1107 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1108 if (det > 0)
1110 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1112 else
1114 /* A negative value signals out of range */
1115 dh[d1] = -1;
1120 /* Accumulate the extremes over all pulses */
1121 for(i=0; i<buf_size; i++)
1123 if (p == 0)
1125 buf_e[i] = buf_r[i];
1127 else
1129 if (bUse)
1131 buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
1132 buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
1135 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1137 d1 = 1;
1139 else
1141 d1 = d + 1;
1143 if (bUse && dh[d1] >= 0)
1145 buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
1146 buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
1149 /* Copy the received buffer to the send buffer,
1150 * to pass the data through with the next pulse.
1152 buf_s[i] = buf_r[i];
1154 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1155 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1157 /* Store the extremes */
1158 pos = 0;
1160 for(d1=d; d1<dd->ndim-1; d1++)
1162 extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
1163 extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
1164 pos++;
1167 if (d == 1 || (d == 0 && dd->ndim == 3))
1169 for(i=d; i<2; i++)
1171 comm->zone_d2[1-d][i] = buf_e[pos];
1172 pos++;
1175 if (d == 0)
1177 comm->zone_d1[1] = buf_e[pos];
1178 pos++;
1184 if (dd->ndim >= 2)
1186 dim = dd->dim[1];
1187 for(i=0; i<2; i++)
1189 if (debug)
1191 print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
1193 cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
1194 cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
1197 if (dd->ndim >= 3)
1199 dim = dd->dim[2];
1200 for(i=0; i<2; i++)
1202 for(j=0; j<2; j++)
1204 if (debug)
1206 print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
1208 cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
1209 cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
1213 for(d=1; d<dd->ndim; d++)
1215 comm->cell_f_max0[d] = extr_s[d-1][0];
1216 comm->cell_f_min1[d] = extr_s[d-1][1];
1217 if (debug)
1219 fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
1220 d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
1225 static void dd_collect_cg(gmx_domdec_t *dd,
1226 t_state *state_local)
1228 gmx_domdec_master_t *ma=NULL;
1229 int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
1230 t_block *cgs_gl;
1232 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1234 /* The master has the correct distribution */
1235 return;
1238 if (state_local->ddp_count == dd->ddp_count)
1240 ncg_home = dd->ncg_home;
1241 cg = dd->index_gl;
1242 nat_home = dd->nat_home;
1244 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1246 cgs_gl = &dd->comm->cgs_gl;
1248 ncg_home = state_local->ncg_gl;
1249 cg = state_local->cg_gl;
1250 nat_home = 0;
1251 for(i=0; i<ncg_home; i++)
1253 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1256 else
1258 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1261 buf2[0] = dd->ncg_home;
1262 buf2[1] = dd->nat_home;
1263 if (DDMASTER(dd))
1265 ma = dd->ma;
1266 ibuf = ma->ibuf;
1268 else
1270 ibuf = NULL;
1272 /* Collect the charge group and atom counts on the master */
1273 dd_gather(dd,2*sizeof(int),buf2,ibuf);
1275 if (DDMASTER(dd))
1277 ma->index[0] = 0;
1278 for(i=0; i<dd->nnodes; i++)
1280 ma->ncg[i] = ma->ibuf[2*i];
1281 ma->nat[i] = ma->ibuf[2*i+1];
1282 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1285 /* Make byte counts and indices */
1286 for(i=0; i<dd->nnodes; i++)
1288 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1289 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1291 if (debug)
1293 fprintf(debug,"Initial charge group distribution: ");
1294 for(i=0; i<dd->nnodes; i++)
1295 fprintf(debug," %d",ma->ncg[i]);
1296 fprintf(debug,"\n");
1300 /* Collect the charge group indices on the master */
1301 dd_gatherv(dd,
1302 dd->ncg_home*sizeof(int),dd->index_gl,
1303 DDMASTER(dd) ? ma->ibuf : NULL,
1304 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1305 DDMASTER(dd) ? ma->cg : NULL);
1307 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1310 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1311 rvec *lv,rvec *v)
1313 gmx_domdec_master_t *ma;
1314 int n,i,c,a,nalloc=0;
1315 rvec *buf=NULL;
1316 t_block *cgs_gl;
1318 ma = dd->ma;
1320 if (!DDMASTER(dd))
1322 #ifdef GMX_MPI
1323 MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
1324 dd->rank,dd->mpi_comm_all);
1325 #endif
1326 } else {
1327 /* Copy the master coordinates to the global array */
1328 cgs_gl = &dd->comm->cgs_gl;
1330 n = DDMASTERRANK(dd);
1331 a = 0;
1332 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1334 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1336 copy_rvec(lv[a++],v[c]);
1340 for(n=0; n<dd->nnodes; n++)
1342 if (n != dd->rank)
1344 if (ma->nat[n] > nalloc)
1346 nalloc = over_alloc_dd(ma->nat[n]);
1347 srenew(buf,nalloc);
1349 #ifdef GMX_MPI
1350 MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
1351 n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
1352 #endif
1353 a = 0;
1354 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1356 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1358 copy_rvec(buf[a++],v[c]);
1363 sfree(buf);
1367 static void get_commbuffer_counts(gmx_domdec_t *dd,
1368 int **counts,int **disps)
1370 gmx_domdec_master_t *ma;
1371 int n;
1373 ma = dd->ma;
1375 /* Make the rvec count and displacment arrays */
1376 *counts = ma->ibuf;
1377 *disps = ma->ibuf + dd->nnodes;
1378 for(n=0; n<dd->nnodes; n++)
1380 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1381 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1385 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1386 rvec *lv,rvec *v)
1388 gmx_domdec_master_t *ma;
1389 int *rcounts=NULL,*disps=NULL;
1390 int n,i,c,a;
1391 rvec *buf=NULL;
1392 t_block *cgs_gl;
1394 ma = dd->ma;
1396 if (DDMASTER(dd))
1398 get_commbuffer_counts(dd,&rcounts,&disps);
1400 buf = ma->vbuf;
1403 dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
1405 if (DDMASTER(dd))
1407 cgs_gl = &dd->comm->cgs_gl;
1409 a = 0;
1410 for(n=0; n<dd->nnodes; n++)
1412 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1414 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1416 copy_rvec(buf[a++],v[c]);
1423 void dd_collect_vec(gmx_domdec_t *dd,
1424 t_state *state_local,rvec *lv,rvec *v)
1426 gmx_domdec_master_t *ma;
1427 int n,i,c,a,nalloc=0;
1428 rvec *buf=NULL;
1430 dd_collect_cg(dd,state_local);
1432 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1434 dd_collect_vec_sendrecv(dd,lv,v);
1436 else
1438 dd_collect_vec_gatherv(dd,lv,v);
1443 void dd_collect_state(gmx_domdec_t *dd,
1444 t_state *state_local,t_state *state)
1446 int est,i,j,nh;
1448 nh = state->nhchainlength;
1450 if (DDMASTER(dd))
1452 state->lambda = state_local->lambda;
1453 state->veta = state_local->veta;
1454 state->vol0 = state_local->vol0;
1455 copy_mat(state_local->box,state->box);
1456 copy_mat(state_local->boxv,state->boxv);
1457 copy_mat(state_local->svir_prev,state->svir_prev);
1458 copy_mat(state_local->fvir_prev,state->fvir_prev);
1459 copy_mat(state_local->pres_prev,state->pres_prev);
1462 for(i=0; i<state_local->ngtc; i++)
1464 for(j=0; j<nh; j++) {
1465 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1466 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1468 state->therm_integral[i] = state_local->therm_integral[i];
1470 for(i=0; i<state_local->nnhpres; i++)
1472 for(j=0; j<nh; j++) {
1473 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1474 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1478 for(est=0; est<estNR; est++)
1480 if (EST_DISTR(est) && state_local->flags & (1<<est))
1482 switch (est) {
1483 case estX:
1484 dd_collect_vec(dd,state_local,state_local->x,state->x);
1485 break;
1486 case estV:
1487 dd_collect_vec(dd,state_local,state_local->v,state->v);
1488 break;
1489 case estSDX:
1490 dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
1491 break;
1492 case estCGP:
1493 dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
1494 break;
1495 case estLD_RNG:
1496 if (state->nrngi == 1)
1498 if (DDMASTER(dd))
1500 for(i=0; i<state_local->nrng; i++)
1502 state->ld_rng[i] = state_local->ld_rng[i];
1506 else
1508 dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
1509 state_local->ld_rng,state->ld_rng);
1511 break;
1512 case estLD_RNGI:
1513 if (state->nrngi == 1)
1515 if (DDMASTER(dd))
1517 state->ld_rngi[0] = state_local->ld_rngi[0];
1520 else
1522 dd_gather(dd,sizeof(state->ld_rngi[0]),
1523 state_local->ld_rngi,state->ld_rngi);
1525 break;
1526 case estDISRE_INITF:
1527 case estDISRE_RM3TAV:
1528 case estORIRE_INITF:
1529 case estORIRE_DTAV:
1530 break;
1531 default:
1532 gmx_incons("Unknown state entry encountered in dd_collect_state");
1538 static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
1540 if (debug)
1542 fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
1544 fr->cg_nalloc = over_alloc_dd(nalloc);
1545 srenew(fr->cg_cm,fr->cg_nalloc);
1546 srenew(fr->cginfo,fr->cg_nalloc);
1549 static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
1551 int est;
1553 if (debug)
1555 fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
1558 state->nalloc = over_alloc_dd(nalloc);
1560 for(est=0; est<estNR; est++)
1562 if (EST_DISTR(est) && state->flags & (1<<est))
1564 switch(est) {
1565 case estX:
1566 srenew(state->x,state->nalloc);
1567 break;
1568 case estV:
1569 srenew(state->v,state->nalloc);
1570 break;
1571 case estSDX:
1572 srenew(state->sd_X,state->nalloc);
1573 break;
1574 case estCGP:
1575 srenew(state->cg_p,state->nalloc);
1576 break;
1577 case estLD_RNG:
1578 case estLD_RNGI:
1579 case estDISRE_INITF:
1580 case estDISRE_RM3TAV:
1581 case estORIRE_INITF:
1582 case estORIRE_DTAV:
1583 /* No reallocation required */
1584 break;
1585 default:
1586 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1591 if (f != NULL)
1593 srenew(*f,state->nalloc);
1597 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
1598 rvec *v,rvec *lv)
1600 gmx_domdec_master_t *ma;
1601 int n,i,c,a,nalloc=0;
1602 rvec *buf=NULL;
1604 if (DDMASTER(dd))
1606 ma = dd->ma;
1608 for(n=0; n<dd->nnodes; n++)
1610 if (n != dd->rank)
1612 if (ma->nat[n] > nalloc)
1614 nalloc = over_alloc_dd(ma->nat[n]);
1615 srenew(buf,nalloc);
1617 /* Use lv as a temporary buffer */
1618 a = 0;
1619 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1621 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1623 copy_rvec(v[c],buf[a++]);
1626 if (a != ma->nat[n])
1628 gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
1629 a,ma->nat[n]);
1632 #ifdef GMX_MPI
1633 MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
1634 DDRANK(dd,n),n,dd->mpi_comm_all);
1635 #endif
1638 sfree(buf);
1639 n = DDMASTERRANK(dd);
1640 a = 0;
1641 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1643 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1645 copy_rvec(v[c],lv[a++]);
1649 else
1651 #ifdef GMX_MPI
1652 MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
1653 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
1654 #endif
1658 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
1659 rvec *v,rvec *lv)
1661 gmx_domdec_master_t *ma;
1662 int *scounts=NULL,*disps=NULL;
1663 int n,i,c,a,nalloc=0;
1664 rvec *buf=NULL;
1666 if (DDMASTER(dd))
1668 ma = dd->ma;
1670 get_commbuffer_counts(dd,&scounts,&disps);
1672 buf = ma->vbuf;
1673 a = 0;
1674 for(n=0; n<dd->nnodes; n++)
1676 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1678 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1680 copy_rvec(v[c],buf[a++]);
1686 dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
1689 static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
1691 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1693 dd_distribute_vec_sendrecv(dd,cgs,v,lv);
1695 else
1697 dd_distribute_vec_scatterv(dd,cgs,v,lv);
1701 static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
1702 t_state *state,t_state *state_local,
1703 rvec **f)
1705 int i,j,ngtch,ngtcp,nh;
1707 nh = state->nhchainlength;
1709 if (DDMASTER(dd))
1711 state_local->lambda = state->lambda;
1712 state_local->veta = state->veta;
1713 state_local->vol0 = state->vol0;
1714 copy_mat(state->box,state_local->box);
1715 copy_mat(state->box_rel,state_local->box_rel);
1716 copy_mat(state->boxv,state_local->boxv);
1717 copy_mat(state->svir_prev,state_local->svir_prev);
1718 copy_mat(state->fvir_prev,state_local->fvir_prev);
1719 for(i=0; i<state_local->ngtc; i++)
1721 for(j=0; j<nh; j++) {
1722 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1723 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1725 state_local->therm_integral[i] = state->therm_integral[i];
1727 for(i=0; i<state_local->nnhpres; i++)
1729 for(j=0; j<nh; j++) {
1730 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1731 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1735 dd_bcast(dd,sizeof(real),&state_local->lambda);
1736 dd_bcast(dd,sizeof(real),&state_local->veta);
1737 dd_bcast(dd,sizeof(real),&state_local->vol0);
1738 dd_bcast(dd,sizeof(state_local->box),state_local->box);
1739 dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
1740 dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
1741 dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
1742 dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
1743 dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
1744 dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
1745 dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
1746 dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
1747 dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
1749 if (dd->nat_home > state_local->nalloc)
1751 dd_realloc_state(state_local,f,dd->nat_home);
1753 for(i=0; i<estNR; i++)
1755 if (EST_DISTR(i) && state_local->flags & (1<<i))
1757 switch (i) {
1758 case estX:
1759 dd_distribute_vec(dd,cgs,state->x,state_local->x);
1760 break;
1761 case estV:
1762 dd_distribute_vec(dd,cgs,state->v,state_local->v);
1763 break;
1764 case estSDX:
1765 dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
1766 break;
1767 case estCGP:
1768 dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
1769 break;
1770 case estLD_RNG:
1771 if (state->nrngi == 1)
1773 dd_bcastc(dd,
1774 state_local->nrng*sizeof(state_local->ld_rng[0]),
1775 state->ld_rng,state_local->ld_rng);
1777 else
1779 dd_scatter(dd,
1780 state_local->nrng*sizeof(state_local->ld_rng[0]),
1781 state->ld_rng,state_local->ld_rng);
1783 break;
1784 case estLD_RNGI:
1785 if (state->nrngi == 1)
1787 dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
1788 state->ld_rngi,state_local->ld_rngi);
1790 else
1792 dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
1793 state->ld_rngi,state_local->ld_rngi);
1795 break;
1796 case estDISRE_INITF:
1797 case estDISRE_RM3TAV:
1798 case estORIRE_INITF:
1799 case estORIRE_DTAV:
1800 /* Not implemented yet */
1801 break;
1802 default:
1803 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1809 static char dim2char(int dim)
1811 char c='?';
1813 switch (dim)
1815 case XX: c = 'X'; break;
1816 case YY: c = 'Y'; break;
1817 case ZZ: c = 'Z'; break;
1818 default: gmx_fatal(FARGS,"Unknown dim %d",dim);
1821 return c;
1824 static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
1825 gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
1827 rvec grid_s[2],*grid_r=NULL,cx,r;
1828 char fname[STRLEN],format[STRLEN],buf[22];
1829 FILE *out;
1830 int a,i,d,z,y,x;
1831 matrix tric;
1832 real vol;
1834 copy_rvec(dd->comm->cell_x0,grid_s[0]);
1835 copy_rvec(dd->comm->cell_x1,grid_s[1]);
1837 if (DDMASTER(dd))
1839 snew(grid_r,2*dd->nnodes);
1842 dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
1844 if (DDMASTER(dd))
1846 for(d=0; d<DIM; d++)
1848 for(i=0; i<DIM; i++)
1850 if (d == i)
1852 tric[d][i] = 1;
1854 else
1856 if (dd->nc[d] > 1 && d < ddbox->npbcdim)
1858 tric[d][i] = box[i][d]/box[i][i];
1860 else
1862 tric[d][i] = 0;
1867 sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
1868 sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
1869 out = gmx_fio_fopen(fname,"w");
1870 gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
1871 a = 1;
1872 for(i=0; i<dd->nnodes; i++)
1874 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1875 for(d=0; d<DIM; d++)
1877 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1879 for(z=0; z<2; z++)
1881 for(y=0; y<2; y++)
1883 for(x=0; x<2; x++)
1885 cx[XX] = grid_r[i*2+x][XX];
1886 cx[YY] = grid_r[i*2+y][YY];
1887 cx[ZZ] = grid_r[i*2+z][ZZ];
1888 mvmul(tric,cx,r);
1889 fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
1890 10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
1894 for(d=0; d<DIM; d++)
1896 for(x=0; x<4; x++)
1898 switch(d)
1900 case 0: y = 1 + i*8 + 2*x; break;
1901 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1902 case 2: y = 1 + i*8 + x; break;
1904 fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
1908 gmx_fio_fclose(out);
1909 sfree(grid_r);
1913 void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
1914 gmx_mtop_t *mtop,t_commrec *cr,
1915 int natoms,rvec x[],matrix box)
1917 char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
1918 FILE *out;
1919 int i,ii,resnr,c;
1920 char *atomname,*resname;
1921 real b;
1922 gmx_domdec_t *dd;
1924 dd = cr->dd;
1925 if (natoms == -1)
1927 natoms = dd->comm->nat[ddnatVSITE];
1930 sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
1932 sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
1933 sprintf(format4,"%s%s\n",pdbformat4,"%6.2f%6.2f");
1935 out = gmx_fio_fopen(fname,"w");
1937 fprintf(out,"TITLE %s\n",title);
1938 gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
1939 for(i=0; i<natoms; i++)
1941 ii = dd->gatindex[i];
1942 gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
1943 if (i < dd->comm->nat[ddnatZONE])
1945 c = 0;
1946 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
1948 c++;
1950 b = c;
1952 else if (i < dd->comm->nat[ddnatVSITE])
1954 b = dd->comm->zones.n;
1956 else
1958 b = dd->comm->zones.n + 1;
1960 fprintf(out,strlen(atomname)<4 ? format : format4,
1961 "ATOM",(ii+1)%100000,
1962 atomname,resname,' ',resnr%10000,' ',
1963 10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
1965 fprintf(out,"TER\n");
1967 gmx_fio_fclose(out);
1970 real dd_cutoff_mbody(gmx_domdec_t *dd)
1972 gmx_domdec_comm_t *comm;
1973 int di;
1974 real r;
1976 comm = dd->comm;
1978 r = -1;
1979 if (comm->bInterCGBondeds)
1981 if (comm->cutoff_mbody > 0)
1983 r = comm->cutoff_mbody;
1985 else
1987 /* cutoff_mbody=0 means we do not have DLB */
1988 r = comm->cellsize_min[dd->dim[0]];
1989 for(di=1; di<dd->ndim; di++)
1991 r = min(r,comm->cellsize_min[dd->dim[di]]);
1993 if (comm->bBondComm)
1995 r = max(r,comm->cutoff_mbody);
1997 else
1999 r = min(r,comm->cutoff);
2004 return r;
2007 real dd_cutoff_twobody(gmx_domdec_t *dd)
2009 real r_mb;
2011 r_mb = dd_cutoff_mbody(dd);
2013 return max(dd->comm->cutoff,r_mb);
2017 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
2019 int nc,ntot;
2021 nc = dd->nc[dd->comm->cartpmedim];
2022 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2023 copy_ivec(coord,coord_pme);
2024 coord_pme[dd->comm->cartpmedim] =
2025 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2028 static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
2030 /* Here we assign a PME node to communicate with this DD node
2031 * by assuming that the major index of both is x.
2032 * We add cr->npmenodes/2 to obtain an even distribution.
2034 return (ddindex*npme + npme/2)/ndd;
2037 static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
2039 return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
2042 static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
2044 return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
2047 static int *dd_pmenodes(t_commrec *cr)
2049 int *pmenodes;
2050 int n,i,p0,p1;
2052 snew(pmenodes,cr->npmenodes);
2053 n = 0;
2054 for(i=0; i<cr->dd->nnodes; i++) {
2055 p0 = cr_ddindex2pmeindex(cr,i);
2056 p1 = cr_ddindex2pmeindex(cr,i+1);
2057 if (i+1 == cr->dd->nnodes || p1 > p0) {
2058 if (debug)
2059 fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
2060 pmenodes[n] = i + 1 + n;
2061 n++;
2065 return pmenodes;
2068 static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
2070 gmx_domdec_t *dd;
2071 ivec coords,coords_pme,nc;
2072 int slab;
2074 dd = cr->dd;
2076 if (dd->comm->bCartesian) {
2077 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2078 dd_coords2pmecoords(dd,coords,coords_pme);
2079 copy_ivec(dd->ntot,nc);
2080 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2081 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2083 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2084 } else {
2085 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2088 coords[XX] = x;
2089 coords[YY] = y;
2090 coords[ZZ] = z;
2091 slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
2093 return slab;
2096 static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
2098 gmx_domdec_comm_t *comm;
2099 ivec coords;
2100 int ddindex,nodeid=-1;
2102 comm = cr->dd->comm;
2104 coords[XX] = x;
2105 coords[YY] = y;
2106 coords[ZZ] = z;
2107 if (comm->bCartesianPP_PME)
2109 #ifdef GMX_MPI
2110 MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
2111 #endif
2113 else
2115 ddindex = dd_index(cr->dd->nc,coords);
2116 if (comm->bCartesianPP)
2118 nodeid = comm->ddindex2simnodeid[ddindex];
2120 else
2122 if (comm->pmenodes)
2124 nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
2126 else
2128 nodeid = ddindex;
2133 return nodeid;
2136 static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
2138 gmx_domdec_t *dd;
2139 gmx_domdec_comm_t *comm;
2140 ivec coord,coord_pme;
2141 int i;
2142 int pmenode=-1;
2144 dd = cr->dd;
2145 comm = dd->comm;
2147 /* This assumes a uniform x domain decomposition grid cell size */
2148 if (comm->bCartesianPP_PME)
2150 #ifdef GMX_MPI
2151 MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
2152 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2154 /* This is a PP node */
2155 dd_cart_coord2pmecoord(dd,coord,coord_pme);
2156 MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
2158 #endif
2160 else if (comm->bCartesianPP)
2162 if (sim_nodeid < dd->nnodes)
2164 pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
2167 else
2169 /* This assumes DD cells with identical x coordinates
2170 * are numbered sequentially.
2172 if (dd->comm->pmenodes == NULL)
2174 if (sim_nodeid < dd->nnodes)
2176 /* The DD index equals the nodeid */
2177 pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
2180 else
2182 i = 0;
2183 while (sim_nodeid > dd->comm->pmenodes[i])
2185 i++;
2187 if (sim_nodeid < dd->comm->pmenodes[i])
2189 pmenode = dd->comm->pmenodes[i];
2194 return pmenode;
2197 bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
2199 bool bPMEOnlyNode;
2201 if (DOMAINDECOMP(cr))
2203 bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
2205 else
2207 bPMEOnlyNode = FALSE;
2210 return bPMEOnlyNode;
2213 void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
2214 int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
2216 gmx_domdec_t *dd;
2217 int x,y,z;
2218 ivec coord,coord_pme;
2220 dd = cr->dd;
2222 snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2224 *nmy_ddnodes = 0;
2225 for(x=0; x<dd->nc[XX]; x++)
2227 for(y=0; y<dd->nc[YY]; y++)
2229 for(z=0; z<dd->nc[ZZ]; z++)
2231 if (dd->comm->bCartesianPP_PME)
2233 coord[XX] = x;
2234 coord[YY] = y;
2235 coord[ZZ] = z;
2236 dd_cart_coord2pmecoord(dd,coord,coord_pme);
2237 if (dd->ci[XX] == coord_pme[XX] &&
2238 dd->ci[YY] == coord_pme[YY] &&
2239 dd->ci[ZZ] == coord_pme[ZZ])
2240 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
2242 else
2244 /* The slab corresponds to the nodeid in the PME group */
2245 if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
2247 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
2254 /* The last PP-only node is the peer node */
2255 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2257 if (debug)
2259 fprintf(debug,"Receive coordinates from PP nodes:");
2260 for(x=0; x<*nmy_ddnodes; x++)
2262 fprintf(debug," %d",(*my_ddnodes)[x]);
2264 fprintf(debug,"\n");
2268 static bool receive_vir_ener(t_commrec *cr)
2270 gmx_domdec_comm_t *comm;
2271 int pmenode,coords[DIM],rank;
2272 bool bReceive;
2274 bReceive = TRUE;
2275 if (cr->npmenodes < cr->dd->nnodes)
2277 comm = cr->dd->comm;
2278 if (comm->bCartesianPP_PME)
2280 pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
2281 #ifdef GMX_MPI
2282 MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
2283 coords[comm->cartpmedim]++;
2284 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2286 MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
2287 if (dd_simnode2pmenode(cr,rank) == pmenode)
2289 /* This is not the last PP node for pmenode */
2290 bReceive = FALSE;
2293 #endif
2295 else
2297 pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
2298 if (cr->sim_nodeid+1 < cr->nnodes &&
2299 dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
2301 /* This is not the last PP node for pmenode */
2302 bReceive = FALSE;
2307 return bReceive;
2310 static void set_zones_ncg_home(gmx_domdec_t *dd)
2312 gmx_domdec_zones_t *zones;
2313 int i;
2315 zones = &dd->comm->zones;
2317 zones->cg_range[0] = 0;
2318 for(i=1; i<zones->n+1; i++)
2320 zones->cg_range[i] = dd->ncg_home;
2324 static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
2326 int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
2328 ind = state->cg_gl;
2329 dd_cg_gl = dd->index_gl;
2330 cgindex = dd->cgindex;
2331 nat = 0;
2332 cgindex[0] = nat;
2333 for(i=0; i<state->ncg_gl; i++)
2335 cgindex[i] = nat;
2336 cg_gl = ind[i];
2337 dd_cg_gl[i] = cg_gl;
2338 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2340 cgindex[i] = nat;
2342 dd->ncg_home = state->ncg_gl;
2343 dd->nat_home = nat;
2345 set_zones_ncg_home(dd);
2348 static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
2350 while (cg >= cginfo_mb->cg_end)
2352 cginfo_mb++;
2355 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2358 static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
2359 t_forcerec *fr,char *bLocalCG)
2361 cginfo_mb_t *cginfo_mb;
2362 int *cginfo;
2363 int cg;
2365 if (fr != NULL)
2367 cginfo_mb = fr->cginfo_mb;
2368 cginfo = fr->cginfo;
2370 for(cg=cg0; cg<cg1; cg++)
2372 cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
2376 if (bLocalCG != NULL)
2378 for(cg=cg0; cg<cg1; cg++)
2380 bLocalCG[index_gl[cg]] = TRUE;
2385 static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
2387 int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
2388 int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
2389 gmx_ga2la_t *ga2la;
2390 char *bLocalCG;
2392 bLocalCG = dd->comm->bLocalCG;
2394 if (dd->nat_tot > dd->gatindex_nalloc)
2396 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2397 srenew(dd->gatindex,dd->gatindex_nalloc);
2400 nzone = dd->comm->zones.n;
2401 zone2cg = dd->comm->zones.cg_range;
2402 zone_ncg1 = dd->comm->zone_ncg1;
2403 index_gl = dd->index_gl;
2404 gatindex = dd->gatindex;
2406 if (zone2cg[1] != dd->ncg_home)
2408 gmx_incons("dd->ncg_zone is not up to date");
2411 /* Make the local to global and global to local atom index */
2412 a = dd->cgindex[cg_start];
2413 for(zone=0; zone<nzone; zone++)
2415 if (zone == 0)
2417 cg0 = cg_start;
2419 else
2421 cg0 = zone2cg[zone];
2423 for(cg=cg0; cg<zone2cg[zone+1]; cg++)
2425 zone1 = zone;
2426 if (cg - cg0 >= zone_ncg1[zone])
2428 /* Signal that this cg is from more than one zone away */
2429 zone1 += nzone;
2431 cg_gl = index_gl[cg];
2432 for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
2434 gatindex[a] = a_gl;
2435 ga2la_set(dd->ga2la,a_gl,a,zone1);
2436 a++;
2442 static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
2443 const char *where)
2445 int ncg,i,ngl,nerr;
2447 nerr = 0;
2448 if (bLocalCG == NULL)
2450 return nerr;
2452 for(i=0; i<dd->ncg_tot; i++)
2454 if (!bLocalCG[dd->index_gl[i]])
2456 fprintf(stderr,
2457 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
2458 nerr++;
2461 ngl = 0;
2462 for(i=0; i<ncg_sys; i++)
2464 if (bLocalCG[i])
2466 ngl++;
2469 if (ngl != dd->ncg_tot)
2471 fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
2472 nerr++;
2475 return nerr;
2478 static void check_index_consistency(gmx_domdec_t *dd,
2479 int natoms_sys,int ncg_sys,
2480 const char *where)
2482 int nerr,ngl,i,a,cell;
2483 int *have;
2485 nerr = 0;
2487 if (dd->comm->DD_debug > 1)
2489 snew(have,natoms_sys);
2490 for(a=0; a<dd->nat_tot; a++)
2492 if (have[dd->gatindex[a]] > 0)
2494 fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
2496 else
2498 have[dd->gatindex[a]] = a + 1;
2501 sfree(have);
2504 snew(have,dd->nat_tot);
2506 ngl = 0;
2507 for(i=0; i<natoms_sys; i++)
2509 if (ga2la_get(dd->ga2la,i,&a,&cell))
2511 if (a >= dd->nat_tot)
2513 fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
2514 nerr++;
2516 else
2518 have[a] = 1;
2519 if (dd->gatindex[a] != i)
2521 fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
2522 nerr++;
2525 ngl++;
2528 if (ngl != dd->nat_tot)
2530 fprintf(stderr,
2531 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2532 dd->rank,where,ngl,dd->nat_tot);
2534 for(a=0; a<dd->nat_tot; a++)
2536 if (have[a] == 0)
2538 fprintf(stderr,
2539 "DD node %d, %s: local atom %d, global %d has no global index\n",
2540 dd->rank,where,a+1,dd->gatindex[a]+1);
2543 sfree(have);
2545 nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
2547 if (nerr > 0) {
2548 gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
2549 dd->rank,where,nerr);
2553 static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
2555 int i;
2556 char *bLocalCG;
2558 if (a_start == 0)
2560 /* Clear the whole list without searching */
2561 ga2la_clear(dd->ga2la);
2563 else
2565 for(i=a_start; i<dd->nat_tot; i++)
2567 ga2la_del(dd->ga2la,dd->gatindex[i]);
2571 bLocalCG = dd->comm->bLocalCG;
2572 if (bLocalCG)
2574 for(i=cg_start; i<dd->ncg_tot; i++)
2576 bLocalCG[dd->index_gl[i]] = FALSE;
2580 dd_clear_local_vsite_indices(dd);
2582 if (dd->constraints)
2584 dd_clear_local_constraint_indices(dd);
2588 static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
2590 real grid_jump_limit;
2592 /* The distance between the boundaries of cells at distance
2593 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2594 * and by the fact that cells should not be shifted by more than
2595 * half their size, such that cg's only shift by one cell
2596 * at redecomposition.
2598 grid_jump_limit = comm->cellsize_limit;
2599 if (!comm->bVacDLBNoLimit)
2601 grid_jump_limit = max(grid_jump_limit,
2602 comm->cutoff/comm->cd[dim_ind].np);
2605 return grid_jump_limit;
2608 static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
2610 gmx_domdec_comm_t *comm;
2611 int d,dim;
2612 real limit,bfac;
2614 comm = dd->comm;
2616 for(d=1; d<dd->ndim; d++)
2618 dim = dd->dim[d];
2619 limit = grid_jump_limit(comm,d);
2620 bfac = ddbox->box_size[dim];
2621 if (ddbox->tric_dir[dim])
2623 bfac *= ddbox->skew_fac[dim];
2625 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2626 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2628 char buf[22];
2629 gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2630 gmx_step_str(step,buf),
2631 dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
2636 static int dd_load_count(gmx_domdec_comm_t *comm)
2638 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2641 static float dd_force_load(gmx_domdec_comm_t *comm)
2643 float load;
2645 if (comm->eFlop)
2647 load = comm->flop;
2648 if (comm->eFlop > 1)
2650 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2653 else
2655 load = comm->cycl[ddCyclF];
2656 if (comm->cycl_n[ddCyclF] > 1)
2658 /* Subtract the maximum of the last n cycle counts
2659 * to get rid of possible high counts due to other soures,
2660 * for instance system activity, that would otherwise
2661 * affect the dynamic load balancing.
2663 load -= comm->cycl_max[ddCyclF];
2667 return load;
2670 static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
2672 gmx_domdec_comm_t *comm;
2673 int i;
2675 comm = dd->comm;
2677 snew(*dim_f,dd->nc[dim]+1);
2678 (*dim_f)[0] = 0;
2679 for(i=1; i<dd->nc[dim]; i++)
2681 if (comm->slb_frac[dim])
2683 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2685 else
2687 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2690 (*dim_f)[dd->nc[dim]] = 1;
2693 static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
2694 int dimind,int nslab)
2696 int pmeindex,slab,nso,i;
2697 ivec xyz;
2699 if (dd->comm->npmedecompdim == 1 && dd->dim[0] == YY)
2701 ddpme->dim = YY;
2703 else
2705 ddpme->dim = dimind;
2707 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2709 ddpme->nslab = nslab;
2711 if (ddpme->nslab <= 1)
2713 return;
2716 nso = dd->comm->npmenodes/nslab;
2717 /* Determine for each PME slab the PP location range for dimension dim */
2718 snew(ddpme->pp_min,nslab);
2719 snew(ddpme->pp_max,nslab);
2720 for(slab=0; slab<nslab; slab++) {
2721 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2722 ddpme->pp_max[slab] = 0;
2724 for(i=0; i<dd->nnodes; i++) {
2725 ddindex2xyz(dd->nc,i,xyz);
2726 /* For y only use our y/z slab.
2727 * This assumes that the PME x grid size matches the DD grid size.
2729 if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
2730 pmeindex = ddindex2pmeindex(dd,i);
2731 if (dimind == 0) {
2732 slab = pmeindex/nso;
2733 } else {
2734 slab = pmeindex % nslab;
2736 ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
2737 ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
2741 set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
2744 int dd_pme_maxshift0(gmx_domdec_t *dd)
2746 return dd->comm->ddpme[0].maxshift;
2749 int dd_pme_maxshift1(gmx_domdec_t *dd)
2751 /* This should return the maxshift for dim Y,
2752 * where comm indexes ddpme with dimind.
2754 if (dd->comm->npmedecompdim == 1 && dd->dim[0] == YY)
2756 return dd->comm->ddpme[0].maxshift;
2758 else
2760 return dd->comm->ddpme[1].maxshift;
2764 static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
2765 bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
2767 gmx_domdec_comm_t *comm;
2768 int nc,ns,s;
2769 int *xmin,*xmax;
2770 real range,pme_boundary;
2771 int sh;
2773 comm = dd->comm;
2774 nc = dd->nc[ddpme->dim];
2775 ns = ddpme->nslab;
2777 if (!ddpme->dim_match)
2779 /* PP decomposition is not along dim: the worst situation */
2780 sh = ns/2;
2782 else if (ns <= 3 || (bUniform && ns == nc))
2784 /* The optimal situation */
2785 sh = 1;
2787 else
2789 /* We need to check for all pme nodes which nodes they
2790 * could possibly need to communicate with.
2792 xmin = ddpme->pp_min;
2793 xmax = ddpme->pp_max;
2794 /* Allow for atoms to be maximally 2/3 times the cut-off
2795 * out of their DD cell. This is a reasonable balance between
2796 * between performance and support for most charge-group/cut-off
2797 * combinations.
2799 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
2800 /* Avoid extra communication when we are exactly at a boundary */
2801 range *= 0.999;
2803 sh = 1;
2804 for(s=0; s<ns; s++)
2806 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2807 pme_boundary = (real)s/ns;
2808 while (sh+1 < ns &&
2809 ((s-(sh+1) >= 0 &&
2810 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
2811 (s-(sh+1) < 0 &&
2812 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
2814 sh++;
2816 pme_boundary = (real)(s+1)/ns;
2817 while (sh+1 < ns &&
2818 ((s+(sh+1) < ns &&
2819 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
2820 (s+(sh+1) >= ns &&
2821 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
2823 sh++;
2828 ddpme->maxshift = sh;
2830 if (debug)
2832 fprintf(debug,"PME slab communication range for dim %d is %d\n",
2833 ddpme->dim,ddpme->maxshift);
2837 static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
2839 int d,dim;
2841 for(d=0; d<dd->ndim; d++)
2843 dim = dd->dim[d];
2844 if (dim < ddbox->nboundeddim &&
2845 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
2846 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
2848 gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2849 dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
2850 dd->nc[dim],dd->comm->cellsize_limit);
2855 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
2856 bool bMaster,ivec npulse)
2858 gmx_domdec_comm_t *comm;
2859 int d,j;
2860 rvec cellsize_min;
2861 real *cell_x,cell_dx,cellsize;
2863 comm = dd->comm;
2865 for(d=0; d<DIM; d++)
2867 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
2868 npulse[d] = 1;
2869 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
2871 /* Uniform grid */
2872 cell_dx = ddbox->box_size[d]/dd->nc[d];
2873 if (bMaster)
2875 for(j=0; j<dd->nc[d]+1; j++)
2877 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
2880 else
2882 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
2883 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
2885 cellsize = cell_dx*ddbox->skew_fac[d];
2886 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
2888 npulse[d]++;
2890 cellsize_min[d] = cellsize;
2892 else
2894 /* Statically load balanced grid */
2895 /* Also when we are not doing a master distribution we determine
2896 * all cell borders in a loop to obtain identical values
2897 * to the master distribution case and to determine npulse.
2899 if (bMaster)
2901 cell_x = dd->ma->cell_x[d];
2903 else
2905 snew(cell_x,dd->nc[d]+1);
2907 cell_x[0] = ddbox->box0[d];
2908 for(j=0; j<dd->nc[d]; j++)
2910 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
2911 cell_x[j+1] = cell_x[j] + cell_dx;
2912 cellsize = cell_dx*ddbox->skew_fac[d];
2913 while (cellsize*npulse[d] < comm->cutoff &&
2914 npulse[d] < dd->nc[d]-1)
2916 npulse[d]++;
2918 cellsize_min[d] = min(cellsize_min[d],cellsize);
2920 if (!bMaster)
2922 comm->cell_x0[d] = cell_x[dd->ci[d]];
2923 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
2924 sfree(cell_x);
2927 /* The following limitation is to avoid that a cell would receive
2928 * some of its own home charge groups back over the periodic boundary.
2929 * Double charge groups cause trouble with the global indices.
2931 if (d < ddbox->npbcdim &&
2932 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
2934 gmx_fatal_collective(FARGS,NULL,dd,
2935 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2936 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
2937 comm->cutoff,
2938 dd->nc[d],dd->nc[d],
2939 dd->nnodes > dd->nc[d] ? "cells" : "processors");
2943 if (!comm->bDynLoadBal)
2945 copy_rvec(cellsize_min,comm->cellsize_min);
2948 for(d=0; d<comm->npmedecompdim; d++)
2950 set_pme_maxshift(dd,&comm->ddpme[d],
2951 comm->slb_frac[dd->dim[d]]==NULL,ddbox,
2952 comm->ddpme[d].slb_dim_f);
2957 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
2958 int d,int dim,gmx_domdec_root_t *root,
2959 gmx_ddbox_t *ddbox,
2960 bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
2962 gmx_domdec_comm_t *comm;
2963 int ncd,i,j,nmin,nmin_old;
2964 bool bLimLo,bLimHi;
2965 real *cell_size;
2966 real fac,halfway,cellsize_limit_f_i,region_size;
2967 bool bPBC,bLastHi=FALSE;
2968 int nrange[]={range[0],range[1]};
2970 region_size= root->cell_f[range[1]]-root->cell_f[range[0]];
2972 comm = dd->comm;
2974 ncd = dd->nc[dim];
2976 bPBC = (dim < ddbox->npbcdim);
2978 cell_size = root->buf_ncd;
2980 if (debug)
2982 fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
2985 /* First we need to check if the scaling does not make cells
2986 * smaller than the smallest allowed size.
2987 * We need to do this iteratively, since if a cell is too small,
2988 * it needs to be enlarged, which makes all the other cells smaller,
2989 * which could in turn make another cell smaller than allowed.
2991 for(i=range[0]; i<range[1]; i++)
2993 root->bCellMin[i] = FALSE;
2995 nmin = 0;
2998 nmin_old = nmin;
2999 /* We need the total for normalization */
3000 fac = 0;
3001 for(i=range[0]; i<range[1]; i++)
3003 if (root->bCellMin[i] == FALSE)
3005 fac += cell_size[i];
3008 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3009 /* Determine the cell boundaries */
3010 for(i=range[0]; i<range[1]; i++)
3012 if (root->bCellMin[i] == FALSE)
3014 cell_size[i] *= fac;
3015 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3017 cellsize_limit_f_i = 0;
3019 else
3021 cellsize_limit_f_i = cellsize_limit_f;
3023 if (cell_size[i] < cellsize_limit_f_i)
3025 root->bCellMin[i] = TRUE;
3026 cell_size[i] = cellsize_limit_f_i;
3027 nmin++;
3030 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3033 while (nmin > nmin_old);
3035 i=range[1]-1;
3036 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3037 /* For this check we should not use DD_CELL_MARGIN,
3038 * but a slightly smaller factor,
3039 * since rounding could get use below the limit.
3041 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3043 char buf[22];
3044 gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3045 gmx_step_str(step,buf),
3046 dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
3047 ncd,comm->cellsize_min[dim]);
3050 root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
3052 if (!bUniform)
3054 /* Check if the boundary did not displace more than halfway
3055 * each of the cells it bounds, as this could cause problems,
3056 * especially when the differences between cell sizes are large.
3057 * If changes are applied, they will not make cells smaller
3058 * than the cut-off, as we check all the boundaries which
3059 * might be affected by a change and if the old state was ok,
3060 * the cells will at most be shrunk back to their old size.
3062 for(i=range[0]+1; i<range[1]; i++)
3064 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3065 if (root->cell_f[i] < halfway)
3067 root->cell_f[i] = halfway;
3068 /* Check if the change also causes shifts of the next boundaries */
3069 for(j=i+1; j<range[1]; j++)
3071 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3072 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3075 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3076 if (root->cell_f[i] > halfway)
3078 root->cell_f[i] = halfway;
3079 /* Check if the change also causes shifts of the next boundaries */
3080 for(j=i-1; j>=range[0]+1; j--)
3082 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3083 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3089 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3090 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3091 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3092 * for a and b nrange is used */
3093 if (d > 0)
3095 /* Take care of the staggering of the cell boundaries */
3096 if (bUniform)
3098 for(i=range[0]; i<range[1]; i++)
3100 root->cell_f_max0[i] = root->cell_f[i];
3101 root->cell_f_min1[i] = root->cell_f[i+1];
3104 else
3106 for(i=range[0]+1; i<range[1]; i++)
3108 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3109 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3110 if (bLimLo && bLimHi)
3112 /* Both limits violated, try the best we can */
3113 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3114 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3115 nrange[0]=range[0];
3116 nrange[1]=i;
3117 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3119 nrange[0]=i;
3120 nrange[1]=range[1];
3121 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3123 return;
3125 else if (bLimLo)
3127 /* root->cell_f[i] = root->bound_min[i]; */
3128 nrange[1]=i; /* only store violation location. There could be a LimLo violation following with an higher index */
3129 bLastHi=FALSE;
3131 else if (bLimHi && !bLastHi)
3133 bLastHi=TRUE;
3134 if (nrange[1] < range[1]) /* found a LimLo before */
3136 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3137 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3138 nrange[0]=nrange[1];
3140 root->cell_f[i] = root->bound_max[i];
3141 nrange[1]=i;
3142 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3143 nrange[0]=i;
3144 nrange[1]=range[1];
3147 if (nrange[1] < range[1]) /* found last a LimLo */
3149 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3150 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3151 nrange[0]=nrange[1];
3152 nrange[1]=range[1];
3153 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3155 else if (nrange[0] > range[0]) /* found at least one LimHi */
3157 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3164 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3165 int d,int dim,gmx_domdec_root_t *root,
3166 gmx_ddbox_t *ddbox,bool bDynamicBox,
3167 bool bUniform,gmx_large_int_t step)
3169 gmx_domdec_comm_t *comm;
3170 int ncd,d1,i,j,pos;
3171 real *cell_size;
3172 real load_aver,load_i,imbalance,change,change_max,sc;
3173 real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
3174 real change_limit = 0.1;
3175 real relax = 0.5;
3176 bool bPBC;
3177 int range[] = { 0, 0 };
3179 comm = dd->comm;
3181 ncd = dd->nc[dim];
3183 bPBC = (dim < ddbox->npbcdim);
3185 cell_size = root->buf_ncd;
3187 /* Store the original boundaries */
3188 for(i=0; i<ncd+1; i++)
3190 root->old_cell_f[i] = root->cell_f[i];
3192 if (bUniform) {
3193 for(i=0; i<ncd; i++)
3195 cell_size[i] = 1.0/ncd;
3198 else if (dd_load_count(comm))
3200 load_aver = comm->load[d].sum_m/ncd;
3201 change_max = 0;
3202 for(i=0; i<ncd; i++)
3204 /* Determine the relative imbalance of cell i */
3205 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3206 imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
3207 /* Determine the change of the cell size using underrelaxation */
3208 change = -relax*imbalance;
3209 change_max = max(change_max,max(change,-change));
3211 /* Limit the amount of scaling.
3212 * We need to use the same rescaling for all cells in one row,
3213 * otherwise the load balancing might not converge.
3215 sc = relax;
3216 if (change_max > change_limit)
3218 sc *= change_limit/change_max;
3220 for(i=0; i<ncd; i++)
3222 /* Determine the relative imbalance of cell i */
3223 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3224 imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
3225 /* Determine the change of the cell size using underrelaxation */
3226 change = -sc*imbalance;
3227 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3231 cellsize_limit_f = comm->cellsize_min[dim]/ddbox->box_size[dim];
3232 cellsize_limit_f *= DD_CELL_MARGIN;
3233 dist_min_f_hard = grid_jump_limit(comm,d)/ddbox->box_size[dim];
3234 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3235 if (ddbox->tric_dir[dim])
3237 cellsize_limit_f /= ddbox->skew_fac[dim];
3238 dist_min_f /= ddbox->skew_fac[dim];
3240 if (bDynamicBox && d > 0)
3242 dist_min_f *= DD_PRES_SCALE_MARGIN;
3244 if (d > 0 && !bUniform)
3246 /* Make sure that the grid is not shifted too much */
3247 for(i=1; i<ncd; i++) {
3248 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3250 gmx_incons("Inconsistent DD boundary staggering limits!");
3252 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3253 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3254 if (space > 0) {
3255 root->bound_min[i] += 0.5*space;
3257 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3258 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3259 if (space < 0) {
3260 root->bound_max[i] += 0.5*space;
3262 if (debug)
3264 fprintf(debug,
3265 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3266 d,i,
3267 root->cell_f_max0[i-1] + dist_min_f,
3268 root->bound_min[i],root->cell_f[i],root->bound_max[i],
3269 root->cell_f_min1[i] - dist_min_f);
3273 range[1]=ncd;
3274 root->cell_f[0] = 0;
3275 root->cell_f[ncd] = 1;
3276 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3279 /* After the checks above, the cells should obey the cut-off
3280 * restrictions, but it does not hurt to check.
3282 for(i=0; i<ncd; i++)
3284 if (debug)
3286 fprintf(debug,"Relative bounds dim %d cell %d: %f %f\n",
3287 dim,i,root->cell_f[i],root->cell_f[i+1]);
3290 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3291 root->cell_f[i+1] - root->cell_f[i] <
3292 cellsize_limit_f/DD_CELL_MARGIN)
3294 char buf[22];
3295 fprintf(stderr,
3296 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3297 gmx_step_str(step,buf),dim2char(dim),i,
3298 (root->cell_f[i+1] - root->cell_f[i])
3299 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3303 pos = ncd + 1;
3304 /* Store the cell boundaries of the lower dimensions at the end */
3305 for(d1=0; d1<d; d1++)
3307 root->cell_f[pos++] = comm->cell_f0[d1];
3308 root->cell_f[pos++] = comm->cell_f1[d1];
3311 if (d < comm->npmedecompdim)
3313 /* The master determines the maximum shift for
3314 * the coordinate communication between separate PME nodes.
3316 set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
3318 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3319 if (d >= 1)
3321 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3325 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3326 gmx_ddbox_t *ddbox,int dimind)
3328 gmx_domdec_comm_t *comm;
3329 int dim;
3331 comm = dd->comm;
3333 /* Set the cell dimensions */
3334 dim = dd->dim[dimind];
3335 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3336 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3337 if (dim >= ddbox->nboundeddim)
3339 comm->cell_x0[dim] += ddbox->box0[dim];
3340 comm->cell_x1[dim] += ddbox->box0[dim];
3344 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3345 int d,int dim,real *cell_f_row,
3346 gmx_ddbox_t *ddbox)
3348 gmx_domdec_comm_t *comm;
3349 int d1,dim1,pos;
3351 comm = dd->comm;
3353 #ifdef GMX_MPI
3354 /* Each node would only need to know two fractions,
3355 * but it is probably cheaper to broadcast the whole array.
3357 MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
3358 0,comm->mpi_comm_load[d]);
3359 #endif
3360 /* Copy the fractions for this dimension from the buffer */
3361 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3362 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3363 /* The whole array was communicated, so set the buffer position */
3364 pos = dd->nc[dim] + 1;
3365 for(d1=0; d1<=d; d1++)
3367 if (d1 < d)
3369 /* Copy the cell fractions of the lower dimensions */
3370 comm->cell_f0[d1] = cell_f_row[pos++];
3371 comm->cell_f1[d1] = cell_f_row[pos++];
3373 relative_to_absolute_cell_bounds(dd,ddbox,d1);
3375 /* Convert the communicated shift from float to int */
3376 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3377 if (d >= 1)
3379 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3383 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3384 gmx_ddbox_t *ddbox,bool bDynamicBox,
3385 bool bUniform,gmx_large_int_t step)
3387 gmx_domdec_comm_t *comm;
3388 int d,dim,d1;
3389 bool bRowMember,bRowRoot;
3390 real *cell_f_row;
3392 comm = dd->comm;
3394 for(d=0; d<dd->ndim; d++)
3396 dim = dd->dim[d];
3397 bRowMember = TRUE;
3398 bRowRoot = TRUE;
3399 for(d1=d; d1<dd->ndim; d1++)
3401 if (dd->ci[dd->dim[d1]] > 0)
3403 if (d1 > d)
3405 bRowMember = FALSE;
3407 bRowRoot = FALSE;
3410 if (bRowMember)
3412 if (bRowRoot)
3414 set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
3415 ddbox,bDynamicBox,bUniform,step);
3416 cell_f_row = comm->root[d]->cell_f;
3418 else
3420 cell_f_row = comm->cell_f_row;
3422 distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
3427 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
3429 int d;
3431 /* This function assumes the box is static and should therefore
3432 * not be called when the box has changed since the last
3433 * call to dd_partition_system.
3435 for(d=0; d<dd->ndim; d++)
3437 relative_to_absolute_cell_bounds(dd,ddbox,d);
3443 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3444 gmx_ddbox_t *ddbox,bool bDynamicBox,
3445 bool bUniform,bool bDoDLB,gmx_large_int_t step,
3446 gmx_wallcycle_t wcycle)
3448 gmx_domdec_comm_t *comm;
3449 int dim;
3451 comm = dd->comm;
3453 if (bDoDLB)
3455 wallcycle_start(wcycle,ewcDDCOMMBOUND);
3456 set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
3457 wallcycle_stop(wcycle,ewcDDCOMMBOUND);
3459 else if (bDynamicBox)
3461 set_dd_cell_sizes_dlb_nochange(dd,ddbox);
3464 /* Set the dimensions for which no DD is used */
3465 for(dim=0; dim<DIM; dim++) {
3466 if (dd->nc[dim] == 1) {
3467 comm->cell_x0[dim] = 0;
3468 comm->cell_x1[dim] = ddbox->box_size[dim];
3469 if (dim >= ddbox->nboundeddim)
3471 comm->cell_x0[dim] += ddbox->box0[dim];
3472 comm->cell_x1[dim] += ddbox->box0[dim];
3478 static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
3480 int d,np,i;
3481 gmx_domdec_comm_dim_t *cd;
3483 for(d=0; d<dd->ndim; d++)
3485 cd = &dd->comm->cd[d];
3486 np = npulse[dd->dim[d]];
3487 if (np > cd->np_nalloc)
3489 if (debug)
3491 fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
3492 dim2char(dd->dim[d]),np);
3494 if (DDMASTER(dd) && cd->np_nalloc > 0)
3496 fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
3498 srenew(cd->ind,np);
3499 for(i=cd->np_nalloc; i<np; i++)
3501 cd->ind[i].index = NULL;
3502 cd->ind[i].nalloc = 0;
3504 cd->np_nalloc = np;
3506 cd->np = np;
3511 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3512 gmx_ddbox_t *ddbox,bool bDynamicBox,
3513 bool bUniform,bool bDoDLB,gmx_large_int_t step,
3514 gmx_wallcycle_t wcycle)
3516 gmx_domdec_comm_t *comm;
3517 int d;
3518 ivec npulse;
3520 comm = dd->comm;
3522 /* Copy the old cell boundaries for the cg displacement check */
3523 copy_rvec(comm->cell_x0,comm->old_cell_x0);
3524 copy_rvec(comm->cell_x1,comm->old_cell_x1);
3526 if (comm->bDynLoadBal)
3528 if (DDMASTER(dd))
3530 check_box_size(dd,ddbox);
3532 set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
3534 else
3536 set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
3537 realloc_comm_ind(dd,npulse);
3540 if (debug)
3542 for(d=0; d<DIM; d++)
3544 fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
3545 d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
3550 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3551 gmx_ddbox_t *ddbox,
3552 rvec cell_ns_x0,rvec cell_ns_x1,
3553 gmx_large_int_t step)
3555 gmx_domdec_comm_t *comm;
3556 int dim_ind,dim;
3558 comm = dd->comm;
3560 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
3562 dim = dd->dim[dim_ind];
3564 /* Without PBC we don't have restrictions on the outer cells */
3565 if (!(dim >= ddbox->npbcdim &&
3566 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3567 comm->bDynLoadBal &&
3568 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3569 comm->cellsize_min[dim])
3571 char buf[22];
3572 gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3573 gmx_step_str(step,buf),dim2char(dim),
3574 comm->cell_x1[dim] - comm->cell_x0[dim],
3575 ddbox->skew_fac[dim],
3576 dd->comm->cellsize_min[dim],
3577 dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
3581 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3583 /* Communicate the boundaries and update cell_ns_x0/1 */
3584 dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
3585 if (dd->bGridJump && dd->ndim > 1)
3587 check_grid_jump(step,dd,ddbox);
3592 static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
3594 if (YY < npbcdim)
3596 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3598 else
3600 tcm[YY][XX] = 0;
3602 if (ZZ < npbcdim)
3604 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3605 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3607 else
3609 tcm[ZZ][XX] = 0;
3610 tcm[ZZ][YY] = 0;
3614 static void check_screw_box(matrix box)
3616 /* Mathematical limitation */
3617 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3619 gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3622 /* Limitation due to the asymmetry of the eighth shell method */
3623 if (box[ZZ][YY] != 0)
3625 gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
3629 static void distribute_cg(FILE *fplog,gmx_large_int_t step,
3630 matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
3631 gmx_domdec_t *dd)
3633 gmx_domdec_master_t *ma;
3634 int **tmp_ind=NULL,*tmp_nalloc=NULL;
3635 int i,icg,j,k,k0,k1,d,npbcdim;
3636 matrix tcm;
3637 rvec box_size,cg_cm;
3638 ivec ind;
3639 real nrcg,inv_ncg,pos_d;
3640 atom_id *cgindex;
3641 bool bUnbounded,bScrew;
3643 ma = dd->ma;
3645 if (tmp_ind == NULL)
3647 snew(tmp_nalloc,dd->nnodes);
3648 snew(tmp_ind,dd->nnodes);
3649 for(i=0; i<dd->nnodes; i++)
3651 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3652 snew(tmp_ind[i],tmp_nalloc[i]);
3656 /* Clear the count */
3657 for(i=0; i<dd->nnodes; i++)
3659 ma->ncg[i] = 0;
3660 ma->nat[i] = 0;
3663 make_tric_corr_matrix(dd->npbcdim,box,tcm);
3665 cgindex = cgs->index;
3667 /* Compute the center of geometry for all charge groups */
3668 for(icg=0; icg<cgs->nr; icg++)
3670 k0 = cgindex[icg];
3671 k1 = cgindex[icg+1];
3672 nrcg = k1 - k0;
3673 if (nrcg == 1)
3675 copy_rvec(pos[k0],cg_cm);
3677 else
3679 inv_ncg = 1.0/nrcg;
3681 clear_rvec(cg_cm);
3682 for(k=k0; (k<k1); k++)
3684 rvec_inc(cg_cm,pos[k]);
3686 for(d=0; (d<DIM); d++)
3688 cg_cm[d] *= inv_ncg;
3691 /* Put the charge group in the box and determine the cell index */
3692 for(d=DIM-1; d>=0; d--) {
3693 pos_d = cg_cm[d];
3694 if (d < dd->npbcdim)
3696 bScrew = (dd->bScrewPBC && d == XX);
3697 if (tric_dir[d] && dd->nc[d] > 1)
3699 /* Use triclinic coordintates for this dimension */
3700 for(j=d+1; j<DIM; j++)
3702 pos_d += cg_cm[j]*tcm[j][d];
3705 while(pos_d >= box[d][d])
3707 pos_d -= box[d][d];
3708 rvec_dec(cg_cm,box[d]);
3709 if (bScrew)
3711 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3712 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3714 for(k=k0; (k<k1); k++)
3716 rvec_dec(pos[k],box[d]);
3717 if (bScrew)
3719 pos[k][YY] = box[YY][YY] - pos[k][YY];
3720 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3724 while(pos_d < 0)
3726 pos_d += box[d][d];
3727 rvec_inc(cg_cm,box[d]);
3728 if (bScrew)
3730 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3731 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3733 for(k=k0; (k<k1); k++)
3735 rvec_inc(pos[k],box[d]);
3736 if (bScrew) {
3737 pos[k][YY] = box[YY][YY] - pos[k][YY];
3738 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3743 /* This could be done more efficiently */
3744 ind[d] = 0;
3745 while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3747 ind[d]++;
3750 i = dd_index(dd->nc,ind);
3751 if (ma->ncg[i] == tmp_nalloc[i])
3753 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3754 srenew(tmp_ind[i],tmp_nalloc[i]);
3756 tmp_ind[i][ma->ncg[i]] = icg;
3757 ma->ncg[i]++;
3758 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
3761 k1 = 0;
3762 for(i=0; i<dd->nnodes; i++)
3764 ma->index[i] = k1;
3765 for(k=0; k<ma->ncg[i]; k++)
3767 ma->cg[k1++] = tmp_ind[i][k];
3770 ma->index[dd->nnodes] = k1;
3772 for(i=0; i<dd->nnodes; i++)
3774 sfree(tmp_ind[i]);
3776 sfree(tmp_ind);
3777 sfree(tmp_nalloc);
3779 if (fplog)
3781 char buf[22];
3782 fprintf(fplog,"Charge group distribution at step %s:",
3783 gmx_step_str(step,buf));
3784 for(i=0; i<dd->nnodes; i++)
3786 fprintf(fplog," %d",ma->ncg[i]);
3788 fprintf(fplog,"\n");
3792 static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
3793 t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
3794 rvec pos[])
3796 gmx_domdec_master_t *ma=NULL;
3797 ivec npulse;
3798 int i,cg_gl;
3799 int *ibuf,buf2[2] = { 0, 0 };
3801 if (DDMASTER(dd))
3803 ma = dd->ma;
3805 if (dd->bScrewPBC)
3807 check_screw_box(box);
3810 set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
3812 distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
3813 for(i=0; i<dd->nnodes; i++)
3815 ma->ibuf[2*i] = ma->ncg[i];
3816 ma->ibuf[2*i+1] = ma->nat[i];
3818 ibuf = ma->ibuf;
3820 else
3822 ibuf = NULL;
3824 dd_scatter(dd,2*sizeof(int),ibuf,buf2);
3826 dd->ncg_home = buf2[0];
3827 dd->nat_home = buf2[1];
3828 dd->ncg_tot = dd->ncg_home;
3829 dd->nat_tot = dd->nat_home;
3830 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
3832 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
3833 srenew(dd->index_gl,dd->cg_nalloc);
3834 srenew(dd->cgindex,dd->cg_nalloc+1);
3836 if (DDMASTER(dd))
3838 for(i=0; i<dd->nnodes; i++)
3840 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
3841 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
3845 dd_scatterv(dd,
3846 DDMASTER(dd) ? ma->ibuf : NULL,
3847 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
3848 DDMASTER(dd) ? ma->cg : NULL,
3849 dd->ncg_home*sizeof(int),dd->index_gl);
3851 /* Determine the home charge group sizes */
3852 dd->cgindex[0] = 0;
3853 for(i=0; i<dd->ncg_home; i++)
3855 cg_gl = dd->index_gl[i];
3856 dd->cgindex[i+1] =
3857 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
3860 if (debug)
3862 fprintf(debug,"Home charge groups:\n");
3863 for(i=0; i<dd->ncg_home; i++)
3865 fprintf(debug," %d",dd->index_gl[i]);
3866 if (i % 10 == 9)
3867 fprintf(debug,"\n");
3869 fprintf(debug,"\n");
3873 static int compact_and_copy_vec_at(int ncg,int *move,
3874 int *cgindex,
3875 int nvec,int vec,
3876 rvec *src,gmx_domdec_comm_t *comm,
3877 bool bCompact)
3879 int m,icg,i,i0,i1,nrcg;
3880 int home_pos;
3881 int pos_vec[DIM*2];
3883 home_pos = 0;
3885 for(m=0; m<DIM*2; m++)
3887 pos_vec[m] = 0;
3890 i0 = 0;
3891 for(icg=0; icg<ncg; icg++)
3893 i1 = cgindex[icg+1];
3894 m = move[icg];
3895 if (m == -1)
3897 if (bCompact)
3899 /* Compact the home array in place */
3900 for(i=i0; i<i1; i++)
3902 copy_rvec(src[i],src[home_pos++]);
3906 else
3908 /* Copy to the communication buffer */
3909 nrcg = i1 - i0;
3910 pos_vec[m] += 1 + vec*nrcg;
3911 for(i=i0; i<i1; i++)
3913 copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
3915 pos_vec[m] += (nvec - vec - 1)*nrcg;
3917 if (!bCompact)
3919 home_pos += i1 - i0;
3921 i0 = i1;
3924 return home_pos;
3927 static int compact_and_copy_vec_cg(int ncg,int *move,
3928 int *cgindex,
3929 int nvec,rvec *src,gmx_domdec_comm_t *comm,
3930 bool bCompact)
3932 int m,icg,i0,i1,nrcg;
3933 int home_pos;
3934 int pos_vec[DIM*2];
3936 home_pos = 0;
3938 for(m=0; m<DIM*2; m++)
3940 pos_vec[m] = 0;
3943 i0 = 0;
3944 for(icg=0; icg<ncg; icg++)
3946 i1 = cgindex[icg+1];
3947 m = move[icg];
3948 if (m == -1)
3950 if (bCompact)
3952 /* Compact the home array in place */
3953 copy_rvec(src[icg],src[home_pos++]);
3956 else
3958 nrcg = i1 - i0;
3959 /* Copy to the communication buffer */
3960 copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
3961 pos_vec[m] += 1 + nrcg*nvec;
3963 i0 = i1;
3965 if (!bCompact)
3967 home_pos = ncg;
3970 return home_pos;
3973 static int compact_ind(int ncg,int *move,
3974 int *index_gl,int *cgindex,
3975 int *gatindex,
3976 gmx_ga2la_t ga2la,char *bLocalCG,
3977 int *cginfo)
3979 int cg,nat,a0,a1,a,a_gl;
3980 int home_pos;
3982 home_pos = 0;
3983 nat = 0;
3984 for(cg=0; cg<ncg; cg++)
3986 a0 = cgindex[cg];
3987 a1 = cgindex[cg+1];
3988 if (move[cg] == -1)
3990 /* Compact the home arrays in place.
3991 * Anything that can be done here avoids access to global arrays.
3993 cgindex[home_pos] = nat;
3994 for(a=a0; a<a1; a++)
3996 a_gl = gatindex[a];
3997 gatindex[nat] = a_gl;
3998 /* The cell number stays 0, so we don't need to set it */
3999 ga2la_change_la(ga2la,a_gl,nat);
4000 nat++;
4002 index_gl[home_pos] = index_gl[cg];
4003 cginfo[home_pos] = cginfo[cg];
4004 /* The charge group remains local, so bLocalCG does not change */
4005 home_pos++;
4007 else
4009 /* Clear the global indices */
4010 for(a=a0; a<a1; a++)
4012 ga2la_del(ga2la,gatindex[a]);
4014 if (bLocalCG)
4016 bLocalCG[index_gl[cg]] = FALSE;
4020 cgindex[home_pos] = nat;
4022 return home_pos;
4025 static void clear_and_mark_ind(int ncg,int *move,
4026 int *index_gl,int *cgindex,int *gatindex,
4027 gmx_ga2la_t ga2la,char *bLocalCG,
4028 int *cell_index)
4030 int cg,a0,a1,a;
4032 for(cg=0; cg<ncg; cg++)
4034 if (move[cg] >= 0)
4036 a0 = cgindex[cg];
4037 a1 = cgindex[cg+1];
4038 /* Clear the global indices */
4039 for(a=a0; a<a1; a++)
4041 ga2la_del(ga2la,gatindex[a]);
4043 if (bLocalCG)
4045 bLocalCG[index_gl[cg]] = FALSE;
4047 /* Signal that this cg has moved using the ns cell index.
4048 * Here we set it to -1.
4049 * fill_grid will change it from -1 to 4*grid->ncells.
4051 cell_index[cg] = -1;
4056 static void print_cg_move(FILE *fplog,
4057 gmx_domdec_t *dd,
4058 gmx_large_int_t step,int cg,int dim,int dir,
4059 bool bHaveLimitdAndCMOld,real limitd,
4060 rvec cm_old,rvec cm_new,real pos_d)
4062 gmx_domdec_comm_t *comm;
4063 char buf[22];
4065 comm = dd->comm;
4067 fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
4068 if (bHaveLimitdAndCMOld)
4070 fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4071 ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
4073 else
4075 fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4076 ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
4078 fprintf(fplog,"distance out of cell %f\n",
4079 dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4080 if (bHaveLimitdAndCMOld)
4082 fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
4083 cm_old[XX],cm_old[YY],cm_old[ZZ]);
4085 fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
4086 cm_new[XX],cm_new[YY],cm_new[ZZ]);
4087 fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4088 dim2char(dim),
4089 comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
4090 fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4091 dim2char(dim),
4092 comm->cell_x0[dim],comm->cell_x1[dim]);
4095 static void cg_move_error(FILE *fplog,
4096 gmx_domdec_t *dd,
4097 gmx_large_int_t step,int cg,int dim,int dir,
4098 bool bHaveLimitdAndCMOld,real limitd,
4099 rvec cm_old,rvec cm_new,real pos_d)
4101 if (fplog)
4103 print_cg_move(fplog, dd,step,cg,dim,dir,
4104 bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
4106 print_cg_move(stderr,dd,step,cg,dim,dir,
4107 bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
4108 gmx_fatal(FARGS,
4109 "A charge group moved too far between two domain decomposition steps\n"
4110 "This usually means that your system is not well equilibrated");
4113 static void rotate_state_atom(t_state *state,int a)
4115 int est;
4117 for(est=0; est<estNR; est++)
4119 if (EST_DISTR(est) && state->flags & (1<<est)) {
4120 switch (est) {
4121 case estX:
4122 /* Rotate the complete state; for a rectangular box only */
4123 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4124 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4125 break;
4126 case estV:
4127 state->v[a][YY] = -state->v[a][YY];
4128 state->v[a][ZZ] = -state->v[a][ZZ];
4129 break;
4130 case estSDX:
4131 state->sd_X[a][YY] = -state->sd_X[a][YY];
4132 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4133 break;
4134 case estCGP:
4135 state->cg_p[a][YY] = -state->cg_p[a][YY];
4136 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4137 break;
4138 case estDISRE_INITF:
4139 case estDISRE_RM3TAV:
4140 case estORIRE_INITF:
4141 case estORIRE_DTAV:
4142 /* These are distances, so not affected by rotation */
4143 break;
4144 default:
4145 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4151 static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
4152 gmx_domdec_t *dd,ivec tric_dir,
4153 t_state *state,rvec **f,
4154 t_forcerec *fr,t_mdatoms *md,
4155 bool bCompact,
4156 t_nrnb *nrnb)
4158 int *move;
4159 int npbcdim;
4160 int ncg[DIM*2],nat[DIM*2];
4161 int c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
4162 int mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
4163 int sbuf[2],rbuf[2];
4164 int home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
4165 int flag;
4166 bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
4167 bool bScrew;
4168 ivec dev;
4169 real inv_ncg,pos_d;
4170 matrix tcm;
4171 rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
4172 atom_id *cgindex;
4173 cginfo_mb_t *cginfo_mb;
4174 gmx_domdec_comm_t *comm;
4176 if (dd->bScrewPBC)
4178 check_screw_box(state->box);
4181 comm = dd->comm;
4182 cg_cm = fr->cg_cm;
4184 for(i=0; i<estNR; i++)
4186 if (EST_DISTR(i))
4188 switch (i)
4190 case estX: /* Always present */ break;
4191 case estV: bV = (state->flags & (1<<i)); break;
4192 case estSDX: bSDX = (state->flags & (1<<i)); break;
4193 case estCGP: bCGP = (state->flags & (1<<i)); break;
4194 case estLD_RNG:
4195 case estLD_RNGI:
4196 case estDISRE_INITF:
4197 case estDISRE_RM3TAV:
4198 case estORIRE_INITF:
4199 case estORIRE_DTAV:
4200 /* No processing required */
4201 break;
4202 default:
4203 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4208 if (dd->ncg_tot > comm->nalloc_int)
4210 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4211 srenew(comm->buf_int,comm->nalloc_int);
4213 move = comm->buf_int;
4215 /* Clear the count */
4216 for(c=0; c<dd->ndim*2; c++)
4218 ncg[c] = 0;
4219 nat[c] = 0;
4222 npbcdim = dd->npbcdim;
4224 for(d=0; (d<DIM); d++)
4226 limitd[d] = dd->comm->cellsize_min[d];
4227 if (d >= npbcdim && dd->ci[d] == 0)
4229 cell_x0[d] = -GMX_FLOAT_MAX;
4231 else
4233 cell_x0[d] = comm->cell_x0[d];
4235 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4237 cell_x1[d] = GMX_FLOAT_MAX;
4239 else
4241 cell_x1[d] = comm->cell_x1[d];
4243 if (d < npbcdim)
4245 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4246 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4248 else
4250 /* We check after communication if a charge group moved
4251 * more than one cell. Set the pre-comm check limit to float_max.
4253 limit0[d] = -GMX_FLOAT_MAX;
4254 limit1[d] = GMX_FLOAT_MAX;
4258 make_tric_corr_matrix(npbcdim,state->box,tcm);
4260 cgindex = dd->cgindex;
4262 /* Compute the center of geometry for all home charge groups
4263 * and put them in the box and determine where they should go.
4265 for(cg=0; cg<dd->ncg_home; cg++)
4267 k0 = cgindex[cg];
4268 k1 = cgindex[cg+1];
4269 nrcg = k1 - k0;
4270 if (nrcg == 1)
4272 copy_rvec(state->x[k0],cm_new);
4274 else
4276 inv_ncg = 1.0/nrcg;
4278 clear_rvec(cm_new);
4279 for(k=k0; (k<k1); k++)
4281 rvec_inc(cm_new,state->x[k]);
4283 for(d=0; (d<DIM); d++)
4285 cm_new[d] = inv_ncg*cm_new[d];
4289 clear_ivec(dev);
4290 /* Do pbc and check DD cell boundary crossings */
4291 for(d=DIM-1; d>=0; d--)
4293 if (dd->nc[d] > 1)
4295 bScrew = (dd->bScrewPBC && d == XX);
4296 /* Determine the location of this cg in lattice coordinates */
4297 pos_d = cm_new[d];
4298 if (tric_dir[d])
4300 for(d2=d+1; d2<DIM; d2++)
4302 pos_d += cm_new[d2]*tcm[d2][d];
4305 /* Put the charge group in the triclinic unit-cell */
4306 if (pos_d >= cell_x1[d])
4308 if (pos_d >= limit1[d])
4310 cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
4311 cg_cm[cg],cm_new,pos_d);
4313 dev[d] = 1;
4314 if (dd->ci[d] == dd->nc[d] - 1)
4316 rvec_dec(cm_new,state->box[d]);
4317 if (bScrew)
4319 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4320 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4322 for(k=k0; (k<k1); k++)
4324 rvec_dec(state->x[k],state->box[d]);
4325 if (bScrew)
4327 rotate_state_atom(state,k);
4332 else if (pos_d < cell_x0[d])
4334 if (pos_d < limit0[d])
4336 cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
4337 cg_cm[cg],cm_new,pos_d);
4339 dev[d] = -1;
4340 if (dd->ci[d] == 0)
4342 rvec_inc(cm_new,state->box[d]);
4343 if (bScrew)
4345 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4346 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4348 for(k=k0; (k<k1); k++)
4350 rvec_inc(state->x[k],state->box[d]);
4351 if (bScrew)
4353 rotate_state_atom(state,k);
4359 else if (d < npbcdim)
4361 /* Put the charge group in the rectangular unit-cell */
4362 while (cm_new[d] >= state->box[d][d])
4364 rvec_dec(cm_new,state->box[d]);
4365 for(k=k0; (k<k1); k++)
4367 rvec_dec(state->x[k],state->box[d]);
4370 while (cm_new[d] < 0)
4372 rvec_inc(cm_new,state->box[d]);
4373 for(k=k0; (k<k1); k++)
4375 rvec_inc(state->x[k],state->box[d]);
4381 copy_rvec(cm_new,cg_cm[cg]);
4383 /* Determine where this cg should go */
4384 flag = 0;
4385 mc = -1;
4386 for(d=0; d<dd->ndim; d++)
4388 dim = dd->dim[d];
4389 if (dev[dim] == 1)
4391 flag |= DD_FLAG_FW(d);
4392 if (mc == -1)
4394 mc = d*2;
4397 else if (dev[dim] == -1)
4399 flag |= DD_FLAG_BW(d);
4400 if (mc == -1) {
4401 if (dd->nc[dim] > 2)
4403 mc = d*2 + 1;
4405 else
4407 mc = d*2;
4412 move[cg] = mc;
4413 if (mc >= 0)
4415 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4417 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4418 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4420 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4421 /* We store the cg size in the lower 16 bits
4422 * and the place where the charge group should go
4423 * in the next 6 bits. This saves some communication volume.
4425 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4426 ncg[mc] += 1;
4427 nat[mc] += nrcg;
4431 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
4432 inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
4434 nvec = 1;
4435 if (bV)
4437 nvec++;
4439 if (bSDX)
4441 nvec++;
4443 if (bCGP)
4445 nvec++;
4448 /* Make sure the communication buffers are large enough */
4449 for(mc=0; mc<dd->ndim*2; mc++)
4451 nvr = ncg[mc] + nat[mc]*nvec;
4452 if (nvr > comm->cgcm_state_nalloc[mc])
4454 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4455 srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
4459 /* Recalculating cg_cm might be cheaper than communicating,
4460 * but that could give rise to rounding issues.
4462 home_pos_cg =
4463 compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
4464 nvec,cg_cm,comm,bCompact);
4466 vec = 0;
4467 home_pos_at =
4468 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4469 nvec,vec++,state->x,comm,bCompact);
4470 if (bV)
4472 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4473 nvec,vec++,state->v,comm,bCompact);
4475 if (bSDX)
4477 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4478 nvec,vec++,state->sd_X,comm,bCompact);
4480 if (bCGP)
4482 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4483 nvec,vec++,state->cg_p,comm,bCompact);
4486 if (bCompact)
4488 compact_ind(dd->ncg_home,move,
4489 dd->index_gl,dd->cgindex,dd->gatindex,
4490 dd->ga2la,comm->bLocalCG,
4491 fr->cginfo);
4493 else
4495 clear_and_mark_ind(dd->ncg_home,move,
4496 dd->index_gl,dd->cgindex,dd->gatindex,
4497 dd->ga2la,comm->bLocalCG,
4498 fr->ns.grid->cell_index);
4501 cginfo_mb = fr->cginfo_mb;
4503 ncg_stay_home = home_pos_cg;
4504 for(d=0; d<dd->ndim; d++)
4506 dim = dd->dim[d];
4507 ncg_recv = 0;
4508 nat_recv = 0;
4509 nvr = 0;
4510 for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
4512 cdd = d*2 + dir;
4513 /* Communicate the cg and atom counts */
4514 sbuf[0] = ncg[cdd];
4515 sbuf[1] = nat[cdd];
4516 if (debug)
4518 fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
4519 d,dir,sbuf[0],sbuf[1]);
4521 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4523 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4525 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4526 srenew(comm->buf_int,comm->nalloc_int);
4529 /* Communicate the charge group indices, sizes and flags */
4530 dd_sendrecv_int(dd, d, dir,
4531 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4532 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4534 nvs = ncg[cdd] + nat[cdd]*nvec;
4535 i = rbuf[0] + rbuf[1] *nvec;
4536 vec_rvec_check_alloc(&comm->vbuf,nvr+i);
4538 /* Communicate cgcm and state */
4539 dd_sendrecv_rvec(dd, d, dir,
4540 comm->cgcm_state[cdd], nvs,
4541 comm->vbuf.v+nvr, i);
4542 ncg_recv += rbuf[0];
4543 nat_recv += rbuf[1];
4544 nvr += i;
4547 /* Process the received charge groups */
4548 buf_pos = 0;
4549 for(cg=0; cg<ncg_recv; cg++)
4551 flag = comm->buf_int[cg*DD_CGIBS+1];
4553 if (dim >= npbcdim && dd->nc[dim] > 2)
4555 /* No pbc in this dim and more than one domain boundary.
4556 * We to a separate check if a charge did not move too far.
4558 if (((flag & DD_FLAG_FW(d)) &&
4559 comm->vbuf.v[buf_pos][d] > cell_x1[dim]) ||
4560 ((flag & DD_FLAG_BW(d)) &&
4561 comm->vbuf.v[buf_pos][d] < cell_x0[dim]))
4563 cg_move_error(fplog,dd,step,cg,d,
4564 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4565 FALSE,0,
4566 comm->vbuf.v[buf_pos],
4567 comm->vbuf.v[buf_pos],
4568 comm->vbuf.v[buf_pos][d]);
4572 mc = -1;
4573 if (d < dd->ndim-1)
4575 /* Check which direction this cg should go */
4576 for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
4578 if (dd->bGridJump)
4580 /* The cell boundaries for dimension d2 are not equal
4581 * for each cell row of the lower dimension(s),
4582 * therefore we might need to redetermine where
4583 * this cg should go.
4585 dim2 = dd->dim[d2];
4586 /* If this cg crosses the box boundary in dimension d2
4587 * we can use the communicated flag, so we do not
4588 * have to worry about pbc.
4590 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4591 (flag & DD_FLAG_FW(d2))) ||
4592 (dd->ci[dim2] == 0 &&
4593 (flag & DD_FLAG_BW(d2)))))
4595 /* Clear the two flags for this dimension */
4596 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4597 /* Determine the location of this cg
4598 * in lattice coordinates
4600 pos_d = comm->vbuf.v[buf_pos][dim2];
4601 if (tric_dir[dim2])
4603 for(d3=dim2+1; d3<DIM; d3++)
4605 pos_d +=
4606 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4609 /* Check of we are not at the box edge.
4610 * pbc is only handled in the first step above,
4611 * but this check could move over pbc while
4612 * the first step did not due to different rounding.
4614 if (pos_d >= cell_x1[dim2] &&
4615 dd->ci[dim2] != dd->nc[dim2]-1)
4617 flag |= DD_FLAG_FW(d2);
4619 else if (pos_d < cell_x0[dim2] &&
4620 dd->ci[dim2] != 0)
4622 flag |= DD_FLAG_BW(d2);
4624 comm->buf_int[cg*DD_CGIBS+1] = flag;
4627 /* Set to which neighboring cell this cg should go */
4628 if (flag & DD_FLAG_FW(d2))
4630 mc = d2*2;
4632 else if (flag & DD_FLAG_BW(d2))
4634 if (dd->nc[dd->dim[d2]] > 2)
4636 mc = d2*2+1;
4638 else
4640 mc = d2*2;
4646 nrcg = flag & DD_FLAG_NRCG;
4647 if (mc == -1)
4649 if (home_pos_cg+1 > dd->cg_nalloc)
4651 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
4652 srenew(dd->index_gl,dd->cg_nalloc);
4653 srenew(dd->cgindex,dd->cg_nalloc+1);
4655 /* Set the global charge group index and size */
4656 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
4657 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
4658 /* Copy the state from the buffer */
4659 if (home_pos_cg >= fr->cg_nalloc)
4661 dd_realloc_fr_cg(fr,home_pos_cg+1);
4662 cg_cm = fr->cg_cm;
4664 copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
4665 /* Set the cginfo */
4666 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
4667 dd->index_gl[home_pos_cg]);
4668 if (comm->bLocalCG)
4670 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
4673 if (home_pos_at+nrcg > state->nalloc)
4675 dd_realloc_state(state,f,home_pos_at+nrcg);
4677 for(i=0; i<nrcg; i++)
4679 copy_rvec(comm->vbuf.v[buf_pos++],
4680 state->x[home_pos_at+i]);
4682 if (bV)
4684 for(i=0; i<nrcg; i++)
4686 copy_rvec(comm->vbuf.v[buf_pos++],
4687 state->v[home_pos_at+i]);
4690 if (bSDX)
4692 for(i=0; i<nrcg; i++)
4694 copy_rvec(comm->vbuf.v[buf_pos++],
4695 state->sd_X[home_pos_at+i]);
4698 if (bCGP)
4700 for(i=0; i<nrcg; i++)
4702 copy_rvec(comm->vbuf.v[buf_pos++],
4703 state->cg_p[home_pos_at+i]);
4706 home_pos_cg += 1;
4707 home_pos_at += nrcg;
4709 else
4711 /* Reallocate the buffers if necessary */
4712 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4714 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4715 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4717 nvr = ncg[mc] + nat[mc]*nvec;
4718 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
4720 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
4721 srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
4723 /* Copy from the receive to the send buffers */
4724 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
4725 comm->buf_int + cg*DD_CGIBS,
4726 DD_CGIBS*sizeof(int));
4727 memcpy(comm->cgcm_state[mc][nvr],
4728 comm->vbuf.v[buf_pos],
4729 (1+nrcg*nvec)*sizeof(rvec));
4730 buf_pos += 1 + nrcg*nvec;
4731 ncg[mc] += 1;
4732 nat[mc] += nrcg;
4737 /* With sorting (!bCompact) the indices are now only partially up to date
4738 * and ncg_home and nat_home are not the real count, since there are
4739 * "holes" in the arrays for the charge groups that moved to neighbors.
4741 dd->ncg_home = home_pos_cg;
4742 dd->nat_home = home_pos_at;
4744 if (debug)
4746 fprintf(debug,"Finished repartitioning\n");
4749 return ncg_stay_home;
4752 void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
4754 dd->comm->cycl[ddCycl] += cycles;
4755 dd->comm->cycl_n[ddCycl]++;
4756 if (cycles > dd->comm->cycl_max[ddCycl])
4758 dd->comm->cycl_max[ddCycl] = cycles;
4762 static double force_flop_count(t_nrnb *nrnb)
4764 int i;
4765 double sum;
4766 const char *name;
4768 sum = 0;
4769 for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
4771 /* To get closer to the real timings, we half the count
4772 * for the normal loops and again half it for water loops.
4774 name = nrnb_str(i);
4775 if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
4777 sum += nrnb->n[i]*0.25*cost_nrnb(i);
4779 else
4781 sum += nrnb->n[i]*0.50*cost_nrnb(i);
4784 for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
4786 name = nrnb_str(i);
4787 if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
4788 sum += nrnb->n[i]*cost_nrnb(i);
4790 for(i=eNR_BONDS; i<=eNR_WALLS; i++)
4792 sum += nrnb->n[i]*cost_nrnb(i);
4795 return sum;
4798 void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
4800 if (dd->comm->eFlop)
4802 dd->comm->flop -= force_flop_count(nrnb);
4805 void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
4807 if (dd->comm->eFlop)
4809 dd->comm->flop += force_flop_count(nrnb);
4810 dd->comm->flop_n++;
4814 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
4816 int i;
4818 for(i=0; i<ddCyclNr; i++)
4820 dd->comm->cycl[i] = 0;
4821 dd->comm->cycl_n[i] = 0;
4822 dd->comm->cycl_max[i] = 0;
4824 dd->comm->flop = 0;
4825 dd->comm->flop_n = 0;
4828 static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
4830 gmx_domdec_comm_t *comm;
4831 gmx_domdec_load_t *load;
4832 gmx_domdec_root_t *root=NULL;
4833 int d,dim,cid,i,pos;
4834 float cell_frac=0,sbuf[DD_NLOAD_MAX];
4835 bool bSepPME;
4837 if (debug)
4839 fprintf(debug,"get_load_distribution start\n");
4842 wallcycle_start(wcycle,ewcDDCOMMLOAD);
4844 comm = dd->comm;
4846 bSepPME = (dd->pme_nodeid >= 0);
4848 for(d=dd->ndim-1; d>=0; d--)
4850 dim = dd->dim[d];
4851 /* Check if we participate in the communication in this dimension */
4852 if (d == dd->ndim-1 ||
4853 (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
4855 load = &comm->load[d];
4856 if (dd->bGridJump)
4858 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
4860 pos = 0;
4861 if (d == dd->ndim-1)
4863 sbuf[pos++] = dd_force_load(comm);
4864 sbuf[pos++] = sbuf[0];
4865 if (dd->bGridJump)
4867 sbuf[pos++] = sbuf[0];
4868 sbuf[pos++] = cell_frac;
4869 if (d > 0)
4871 sbuf[pos++] = comm->cell_f_max0[d];
4872 sbuf[pos++] = comm->cell_f_min1[d];
4875 if (bSepPME)
4877 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
4878 sbuf[pos++] = comm->cycl[ddCyclPME];
4881 else
4883 sbuf[pos++] = comm->load[d+1].sum;
4884 sbuf[pos++] = comm->load[d+1].max;
4885 if (dd->bGridJump)
4887 sbuf[pos++] = comm->load[d+1].sum_m;
4888 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
4889 sbuf[pos++] = comm->load[d+1].flags;
4890 if (d > 0)
4892 sbuf[pos++] = comm->cell_f_max0[d];
4893 sbuf[pos++] = comm->cell_f_min1[d];
4896 if (bSepPME)
4898 sbuf[pos++] = comm->load[d+1].mdf;
4899 sbuf[pos++] = comm->load[d+1].pme;
4902 load->nload = pos;
4903 /* Communicate a row in DD direction d.
4904 * The communicators are setup such that the root always has rank 0.
4906 #ifdef GMX_MPI
4907 MPI_Gather(sbuf ,load->nload*sizeof(float),MPI_BYTE,
4908 load->load,load->nload*sizeof(float),MPI_BYTE,
4909 0,comm->mpi_comm_load[d]);
4910 #endif
4911 if (dd->ci[dim] == dd->master_ci[dim])
4913 /* We are the root, process this row */
4914 if (comm->bDynLoadBal)
4916 root = comm->root[d];
4918 load->sum = 0;
4919 load->max = 0;
4920 load->sum_m = 0;
4921 load->cvol_min = 1;
4922 load->flags = 0;
4923 load->mdf = 0;
4924 load->pme = 0;
4925 pos = 0;
4926 for(i=0; i<dd->nc[dim]; i++)
4928 load->sum += load->load[pos++];
4929 load->max = max(load->max,load->load[pos]);
4930 pos++;
4931 if (dd->bGridJump)
4933 if (root->bLimited)
4935 /* This direction could not be load balanced properly,
4936 * therefore we need to use the maximum iso the average load.
4938 load->sum_m = max(load->sum_m,load->load[pos]);
4940 else
4942 load->sum_m += load->load[pos];
4944 pos++;
4945 load->cvol_min = min(load->cvol_min,load->load[pos]);
4946 pos++;
4947 if (d < dd->ndim-1)
4949 load->flags = (int)(load->load[pos++] + 0.5);
4951 if (d > 0)
4953 root->cell_f_max0[i] = load->load[pos++];
4954 root->cell_f_min1[i] = load->load[pos++];
4957 if (bSepPME)
4959 load->mdf = max(load->mdf,load->load[pos]);
4960 pos++;
4961 load->pme = max(load->pme,load->load[pos]);
4962 pos++;
4965 if (comm->bDynLoadBal && root->bLimited)
4967 load->sum_m *= dd->nc[dim];
4968 load->flags |= (1<<d);
4974 if (DDMASTER(dd))
4976 comm->nload += dd_load_count(comm);
4977 comm->load_step += comm->cycl[ddCyclStep];
4978 comm->load_sum += comm->load[0].sum;
4979 comm->load_max += comm->load[0].max;
4980 if (comm->bDynLoadBal)
4982 for(d=0; d<dd->ndim; d++)
4984 if (comm->load[0].flags & (1<<d))
4986 comm->load_lim[d]++;
4990 if (bSepPME)
4992 comm->load_mdf += comm->load[0].mdf;
4993 comm->load_pme += comm->load[0].pme;
4997 wallcycle_stop(wcycle,ewcDDCOMMLOAD);
4999 if (debug)
5001 fprintf(debug,"get_load_distribution finished\n");
5005 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5007 /* Return the relative performance loss on the total run time
5008 * due to the force calculation load imbalance.
5010 if (dd->comm->nload > 0)
5012 return
5013 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5014 (dd->comm->load_step*dd->nnodes);
5016 else
5018 return 0;
5022 static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
5024 char buf[STRLEN];
5025 int npp,npme,nnodes,d,limp;
5026 float imbal,pme_f_ratio,lossf,lossp=0;
5027 bool bLim;
5028 gmx_domdec_comm_t *comm;
5030 comm = dd->comm;
5031 if (DDMASTER(dd) && comm->nload > 0)
5033 npp = dd->nnodes;
5034 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5035 nnodes = npp + npme;
5036 imbal = comm->load_max*npp/comm->load_sum - 1;
5037 lossf = dd_force_imb_perf_loss(dd);
5038 sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
5039 fprintf(fplog,"%s",buf);
5040 fprintf(stderr,"\n");
5041 fprintf(stderr,"%s",buf);
5042 sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
5043 fprintf(fplog,"%s",buf);
5044 fprintf(stderr,"%s",buf);
5045 bLim = FALSE;
5046 if (comm->bDynLoadBal)
5048 sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5049 for(d=0; d<dd->ndim; d++)
5051 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5052 sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
5053 if (limp >= 50)
5055 bLim = TRUE;
5058 sprintf(buf+strlen(buf),"\n");
5059 fprintf(fplog,"%s",buf);
5060 fprintf(stderr,"%s",buf);
5062 if (npme > 0)
5064 pme_f_ratio = comm->load_pme/comm->load_mdf;
5065 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5066 if (lossp <= 0)
5068 lossp *= (float)npme/(float)nnodes;
5070 else
5072 lossp *= (float)npp/(float)nnodes;
5074 sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
5075 fprintf(fplog,"%s",buf);
5076 fprintf(stderr,"%s",buf);
5077 sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
5078 fprintf(fplog,"%s",buf);
5079 fprintf(stderr,"%s",buf);
5081 fprintf(fplog,"\n");
5082 fprintf(stderr,"\n");
5084 if (lossf >= DD_PERF_LOSS)
5086 sprintf(buf,
5087 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5088 " in the domain decomposition.\n",lossf*100);
5089 if (!comm->bDynLoadBal)
5091 sprintf(buf+strlen(buf)," You might want to use dynamic load balancing (option -dlb.)\n");
5093 else if (bLim)
5095 sprintf(buf+strlen(buf)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5097 fprintf(fplog,"%s\n",buf);
5098 fprintf(stderr,"%s\n",buf);
5100 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5102 sprintf(buf,
5103 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5104 " had %s work to do than the PP nodes.\n"
5105 " You might want to %s the number of PME nodes\n"
5106 " or %s the cut-off and the grid spacing.\n",
5107 fabs(lossp*100),
5108 (lossp < 0) ? "less" : "more",
5109 (lossp < 0) ? "decrease" : "increase",
5110 (lossp < 0) ? "decrease" : "increase");
5111 fprintf(fplog,"%s\n",buf);
5112 fprintf(stderr,"%s\n",buf);
5117 static float dd_vol_min(gmx_domdec_t *dd)
5119 return dd->comm->load[0].cvol_min*dd->nnodes;
5122 static bool dd_load_flags(gmx_domdec_t *dd)
5124 return dd->comm->load[0].flags;
5127 static float dd_f_imbal(gmx_domdec_t *dd)
5129 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5132 static float dd_pme_f_ratio(gmx_domdec_t *dd)
5134 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5137 static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
5139 int flags,d;
5140 char buf[22];
5142 flags = dd_load_flags(dd);
5143 if (flags)
5145 fprintf(fplog,
5146 "DD load balancing is limited by minimum cell size in dimension");
5147 for(d=0; d<dd->ndim; d++)
5149 if (flags & (1<<d))
5151 fprintf(fplog," %c",dim2char(dd->dim[d]));
5154 fprintf(fplog,"\n");
5156 fprintf(fplog,"DD step %s",gmx_step_str(step,buf));
5157 if (dd->comm->bDynLoadBal)
5159 fprintf(fplog," vol min/aver %5.3f%c",
5160 dd_vol_min(dd),flags ? '!' : ' ');
5162 fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
5163 if (dd->comm->cycl_n[ddCyclPME])
5165 fprintf(fplog," pme mesh/force %5.3f",dd_pme_f_ratio(dd));
5167 fprintf(fplog,"\n\n");
5170 static void dd_print_load_verbose(gmx_domdec_t *dd)
5172 if (dd->comm->bDynLoadBal)
5174 fprintf(stderr,"vol %4.2f%c ",
5175 dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
5177 fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
5178 if (dd->comm->cycl_n[ddCyclPME])
5180 fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
5184 #ifdef GMX_MPI
5185 static void make_load_communicator(gmx_domdec_t *dd,MPI_Group g_all,
5186 int dim_ind,ivec loc)
5188 MPI_Group g_row;
5189 MPI_Comm c_row;
5190 int dim,i,*rank;
5191 ivec loc_c;
5192 gmx_domdec_root_t *root;
5194 dim = dd->dim[dim_ind];
5195 copy_ivec(loc,loc_c);
5196 snew(rank,dd->nc[dim]);
5197 for(i=0; i<dd->nc[dim]; i++)
5199 loc_c[dim] = i;
5200 rank[i] = dd_index(dd->nc,loc_c);
5202 /* Here we create a new group, that does not necessarily
5203 * include our process. But MPI_Comm_create needs to be
5204 * called by all the processes in the original communicator.
5205 * Calling MPI_Group_free afterwards gives errors, so I assume
5206 * also the group is needed by all processes. (B. Hess)
5208 MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
5209 MPI_Comm_create(dd->mpi_comm_all,g_row,&c_row);
5210 if (c_row != MPI_COMM_NULL)
5212 /* This process is part of the group */
5213 dd->comm->mpi_comm_load[dim_ind] = c_row;
5214 if (dd->comm->eDLB != edlbNO)
5216 if (dd->ci[dim] == dd->master_ci[dim])
5218 /* This is the root process of this row */
5219 snew(dd->comm->root[dim_ind],1);
5220 root = dd->comm->root[dim_ind];
5221 snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
5222 snew(root->old_cell_f,dd->nc[dim]+1);
5223 snew(root->bCellMin,dd->nc[dim]);
5224 if (dim_ind > 0)
5226 snew(root->cell_f_max0,dd->nc[dim]);
5227 snew(root->cell_f_min1,dd->nc[dim]);
5228 snew(root->bound_min,dd->nc[dim]);
5229 snew(root->bound_max,dd->nc[dim]);
5231 snew(root->buf_ncd,dd->nc[dim]);
5233 else
5235 /* This is not a root process, we only need to receive cell_f */
5236 snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
5239 if (dd->ci[dim] == dd->master_ci[dim])
5241 snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
5244 sfree(rank);
5246 #endif
5248 static void make_load_communicators(gmx_domdec_t *dd)
5250 #ifdef GMX_MPI
5251 MPI_Group g_all;
5252 int dim0,dim1,i,j;
5253 ivec loc;
5255 if (debug)
5256 fprintf(debug,"Making load communicators\n");
5258 MPI_Comm_group(dd->mpi_comm_all,&g_all);
5260 snew(dd->comm->load,dd->ndim);
5261 snew(dd->comm->mpi_comm_load,dd->ndim);
5263 clear_ivec(loc);
5264 make_load_communicator(dd,g_all,0,loc);
5265 if (dd->ndim > 1) {
5266 dim0 = dd->dim[0];
5267 for(i=0; i<dd->nc[dim0]; i++) {
5268 loc[dim0] = i;
5269 make_load_communicator(dd,g_all,1,loc);
5272 if (dd->ndim > 2) {
5273 dim0 = dd->dim[0];
5274 for(i=0; i<dd->nc[dim0]; i++) {
5275 loc[dim0] = i;
5276 dim1 = dd->dim[1];
5277 for(j=0; j<dd->nc[dim1]; j++) {
5278 loc[dim1] = j;
5279 make_load_communicator(dd,g_all,2,loc);
5284 MPI_Group_free(&g_all);
5286 if (debug)
5287 fprintf(debug,"Finished making load communicators\n");
5288 #endif
5291 void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
5293 bool bZYX;
5294 int d,dim,i,j,m;
5295 ivec tmp,s;
5296 int nzone,nzonep;
5297 ivec dd_zp[DD_MAXIZONE];
5298 gmx_domdec_zones_t *zones;
5299 gmx_domdec_ns_ranges_t *izone;
5301 for(d=0; d<dd->ndim; d++)
5303 dim = dd->dim[d];
5304 copy_ivec(dd->ci,tmp);
5305 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5306 dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
5307 copy_ivec(dd->ci,tmp);
5308 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5309 dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
5310 if (debug)
5312 fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5313 dd->rank,dim,
5314 dd->neighbor[d][0],
5315 dd->neighbor[d][1]);
5319 if (DDMASTER(dd))
5321 fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
5322 dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
5324 if (fplog)
5326 fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5327 dd->ndim,
5328 dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
5329 dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5331 switch (dd->ndim)
5333 case 3:
5334 nzone = dd_z3n;
5335 nzonep = dd_zp3n;
5336 for(i=0; i<nzonep; i++)
5338 copy_ivec(dd_zp3[i],dd_zp[i]);
5340 break;
5341 case 2:
5342 nzone = dd_z2n;
5343 nzonep = dd_zp2n;
5344 for(i=0; i<nzonep; i++)
5346 copy_ivec(dd_zp2[i],dd_zp[i]);
5348 break;
5349 case 1:
5350 nzone = dd_z1n;
5351 nzonep = dd_zp1n;
5352 for(i=0; i<nzonep; i++)
5354 copy_ivec(dd_zp1[i],dd_zp[i]);
5356 break;
5357 default:
5358 gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
5359 nzone = 0;
5360 nzonep = 0;
5363 zones = &dd->comm->zones;
5365 for(i=0; i<nzone; i++)
5367 m = 0;
5368 clear_ivec(zones->shift[i]);
5369 for(d=0; d<dd->ndim; d++)
5371 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5375 zones->n = nzone;
5376 for(i=0; i<nzone; i++)
5378 for(d=0; d<DIM; d++)
5380 s[d] = dd->ci[d] - zones->shift[i][d];
5381 if (s[d] < 0)
5383 s[d] += dd->nc[d];
5385 else if (s[d] >= dd->nc[d])
5387 s[d] -= dd->nc[d];
5391 zones->nizone = nzonep;
5392 for(i=0; i<zones->nizone; i++)
5394 if (dd_zp[i][0] != i)
5396 gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
5398 izone = &zones->izone[i];
5399 izone->j0 = dd_zp[i][1];
5400 izone->j1 = dd_zp[i][2];
5401 for(dim=0; dim<DIM; dim++)
5403 if (dd->nc[dim] == 1)
5405 /* All shifts should be allowed */
5406 izone->shift0[dim] = -1;
5407 izone->shift1[dim] = 1;
5409 else
5412 izone->shift0[d] = 0;
5413 izone->shift1[d] = 0;
5414 for(j=izone->j0; j<izone->j1; j++) {
5415 if (dd->shift[j][d] > dd->shift[i][d])
5416 izone->shift0[d] = -1;
5417 if (dd->shift[j][d] < dd->shift[i][d])
5418 izone->shift1[d] = 1;
5422 int shift_diff;
5424 /* Assume the shift are not more than 1 cell */
5425 izone->shift0[dim] = 1;
5426 izone->shift1[dim] = -1;
5427 for(j=izone->j0; j<izone->j1; j++)
5429 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5430 if (shift_diff < izone->shift0[dim])
5432 izone->shift0[dim] = shift_diff;
5434 if (shift_diff > izone->shift1[dim])
5436 izone->shift1[dim] = shift_diff;
5443 if (dd->comm->eDLB != edlbNO)
5445 snew(dd->comm->root,dd->ndim);
5448 if (dd->comm->bRecordLoad)
5450 make_load_communicators(dd);
5454 static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
5456 gmx_domdec_t *dd;
5457 gmx_domdec_comm_t *comm;
5458 int i,rank,*buf;
5459 ivec periods;
5460 #ifdef GMX_MPI
5461 MPI_Comm comm_cart;
5462 #endif
5464 dd = cr->dd;
5465 comm = dd->comm;
5467 #ifdef GMX_MPI
5468 if (comm->bCartesianPP)
5470 /* Set up cartesian communication for the particle-particle part */
5471 if (fplog)
5473 fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
5474 dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
5477 for(i=0; i<DIM; i++)
5479 periods[i] = TRUE;
5481 MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
5482 &comm_cart);
5483 /* We overwrite the old communicator with the new cartesian one */
5484 cr->mpi_comm_mygroup = comm_cart;
5487 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5488 MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
5490 if (comm->bCartesianPP_PME)
5492 /* Since we want to use the original cartesian setup for sim,
5493 * and not the one after split, we need to make an index.
5495 snew(comm->ddindex2ddnodeid,dd->nnodes);
5496 comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
5497 gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
5498 /* Get the rank of the DD master,
5499 * above we made sure that the master node is a PP node.
5501 if (MASTER(cr))
5503 rank = dd->rank;
5505 else
5507 rank = 0;
5509 MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
5511 else if (comm->bCartesianPP)
5513 if (cr->npmenodes == 0)
5515 /* The PP communicator is also
5516 * the communicator for this simulation
5518 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5520 cr->nodeid = dd->rank;
5522 MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
5524 /* We need to make an index to go from the coordinates
5525 * to the nodeid of this simulation.
5527 snew(comm->ddindex2simnodeid,dd->nnodes);
5528 snew(buf,dd->nnodes);
5529 if (cr->duty & DUTY_PP)
5531 buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
5533 /* Communicate the ddindex to simulation nodeid index */
5534 MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
5535 cr->mpi_comm_mysim);
5536 sfree(buf);
5538 /* Determine the master coordinates and rank.
5539 * The DD master should be the same node as the master of this sim.
5541 for(i=0; i<dd->nnodes; i++)
5543 if (comm->ddindex2simnodeid[i] == 0)
5545 ddindex2xyz(dd->nc,i,dd->master_ci);
5546 MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
5549 if (debug)
5551 fprintf(debug,"The master rank is %d\n",dd->masterrank);
5554 else
5556 /* No Cartesian communicators */
5557 /* We use the rank in dd->comm->all as DD index */
5558 ddindex2xyz(dd->nc,dd->rank,dd->ci);
5559 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5560 dd->masterrank = 0;
5561 clear_ivec(dd->master_ci);
5563 #endif
5565 if (fplog)
5567 fprintf(fplog,
5568 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5569 dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5571 if (debug)
5573 fprintf(debug,
5574 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5575 dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5579 static void receive_ddindex2simnodeid(t_commrec *cr)
5581 gmx_domdec_t *dd;
5583 gmx_domdec_comm_t *comm;
5584 int *buf;
5586 dd = cr->dd;
5587 comm = dd->comm;
5589 #ifdef GMX_MPI
5590 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
5592 snew(comm->ddindex2simnodeid,dd->nnodes);
5593 snew(buf,dd->nnodes);
5594 if (cr->duty & DUTY_PP)
5596 buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
5598 #ifdef GMX_MPI
5599 /* Communicate the ddindex to simulation nodeid index */
5600 MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
5601 cr->mpi_comm_mysim);
5602 #endif
5603 sfree(buf);
5605 #endif
5608 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
5609 int ncg,int natoms)
5611 gmx_domdec_master_t *ma;
5612 int i;
5614 snew(ma,1);
5616 snew(ma->ncg,dd->nnodes);
5617 snew(ma->index,dd->nnodes+1);
5618 snew(ma->cg,ncg);
5619 snew(ma->nat,dd->nnodes);
5620 snew(ma->ibuf,dd->nnodes*2);
5621 snew(ma->cell_x,DIM);
5622 for(i=0; i<DIM; i++)
5624 snew(ma->cell_x[i],dd->nc[i]+1);
5627 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
5629 ma->vbuf = NULL;
5631 else
5633 snew(ma->vbuf,natoms);
5636 return ma;
5639 static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
5640 int reorder)
5642 gmx_domdec_t *dd;
5643 gmx_domdec_comm_t *comm;
5644 int i,rank;
5645 bool bDiv[DIM];
5646 ivec periods;
5647 #ifdef GMX_MPI
5648 MPI_Comm comm_cart;
5649 #endif
5651 dd = cr->dd;
5652 comm = dd->comm;
5654 if (comm->bCartesianPP)
5656 for(i=1; i<DIM; i++)
5658 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
5660 if (bDiv[YY] || bDiv[ZZ])
5662 comm->bCartesianPP_PME = TRUE;
5663 /* If we have 2D PME decomposition, which is always in x+y,
5664 * we stack the PME only nodes in z.
5665 * Otherwise we choose the direction that provides the thinnest slab
5666 * of PME only nodes as this will have the least effect
5667 * on the PP communication.
5668 * But for the PME communication the opposite might be better.
5670 if (bDiv[ZZ] && (comm->npmenodes_minor > 1 ||
5671 !bDiv[YY] ||
5672 dd->nc[YY] > dd->nc[ZZ]))
5674 comm->cartpmedim = ZZ;
5676 else
5678 comm->cartpmedim = YY;
5680 comm->ntot[comm->cartpmedim]
5681 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
5683 else if (fplog)
5685 fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
5686 fprintf(fplog,
5687 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5691 #ifdef GMX_MPI
5692 if (comm->bCartesianPP_PME)
5694 if (fplog)
5696 fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
5699 for(i=0; i<DIM; i++)
5701 periods[i] = TRUE;
5703 MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
5704 &comm_cart);
5706 MPI_Comm_rank(comm_cart,&rank);
5707 if (MASTERNODE(cr) && rank != 0)
5709 gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5712 /* With this assigment we loose the link to the original communicator
5713 * which will usually be MPI_COMM_WORLD, unless have multisim.
5715 cr->mpi_comm_mysim = comm_cart;
5716 cr->sim_nodeid = rank;
5718 MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
5720 if (fplog)
5722 fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5723 cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5726 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
5728 cr->duty = DUTY_PP;
5730 if (cr->npmenodes == 0 ||
5731 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
5733 cr->duty = DUTY_PME;
5736 /* Split the sim communicator into PP and PME only nodes */
5737 MPI_Comm_split(cr->mpi_comm_mysim,
5738 cr->duty,
5739 dd_index(comm->ntot,dd->ci),
5740 &cr->mpi_comm_mygroup);
5742 else
5744 switch (dd_node_order)
5746 case ddnoPP_PME:
5747 if (fplog)
5749 fprintf(fplog,"Order of the nodes: PP first, PME last\n");
5751 break;
5752 case ddnoINTERLEAVE:
5753 /* Interleave the PP-only and PME-only nodes,
5754 * as on clusters with dual-core machines this will double
5755 * the communication bandwidth of the PME processes
5756 * and thus speed up the PP <-> PME and inter PME communication.
5758 if (fplog)
5760 fprintf(fplog,"Interleaving PP and PME nodes\n");
5762 comm->pmenodes = dd_pmenodes(cr);
5763 break;
5764 case ddnoCARTESIAN:
5765 break;
5766 default:
5767 gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
5770 if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
5772 cr->duty = DUTY_PME;
5774 else
5776 cr->duty = DUTY_PP;
5779 /* Split the sim communicator into PP and PME only nodes */
5780 MPI_Comm_split(cr->mpi_comm_mysim,
5781 cr->duty,
5782 cr->nodeid,
5783 &cr->mpi_comm_mygroup);
5784 MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
5786 #endif
5788 if (fplog)
5790 fprintf(fplog,"This is a %s only node\n\n",
5791 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
5795 void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
5797 gmx_domdec_t *dd;
5798 gmx_domdec_comm_t *comm;
5799 int CartReorder;
5801 dd = cr->dd;
5802 comm = dd->comm;
5804 copy_ivec(dd->nc,comm->ntot);
5806 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
5807 comm->bCartesianPP_PME = FALSE;
5809 /* Reorder the nodes by default. This might change the MPI ranks.
5810 * Real reordering is only supported on very few architectures,
5811 * Blue Gene is one of them.
5813 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
5815 if (cr->npmenodes > 0)
5817 /* Split the communicator into a PP and PME part */
5818 split_communicator(fplog,cr,dd_node_order,CartReorder);
5819 if (comm->bCartesianPP_PME)
5821 /* We (possibly) reordered the nodes in split_communicator,
5822 * so it is no longer required in make_pp_communicator.
5824 CartReorder = FALSE;
5827 else
5829 /* All nodes do PP and PME */
5830 #ifdef GMX_MPI
5831 /* We do not require separate communicators */
5832 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
5833 #endif
5836 if (cr->duty & DUTY_PP)
5838 /* Copy or make a new PP communicator */
5839 make_pp_communicator(fplog,cr,CartReorder);
5841 else
5843 receive_ddindex2simnodeid(cr);
5846 if (!(cr->duty & DUTY_PME))
5848 /* Set up the commnuication to our PME node */
5849 dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
5850 dd->pme_receive_vir_ener = receive_vir_ener(cr);
5851 if (debug)
5853 fprintf(debug,"My pme_nodeid %d receive ener %d\n",
5854 dd->pme_nodeid,dd->pme_receive_vir_ener);
5857 else
5859 dd->pme_nodeid = -1;
5862 if (DDMASTER(dd))
5864 dd->ma = init_gmx_domdec_master_t(dd,
5865 comm->cgs_gl.nr,
5866 comm->cgs_gl.index[comm->cgs_gl.nr]);
5870 static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
5872 real *slb_frac,tot;
5873 int i,n;
5874 double dbl;
5876 slb_frac = NULL;
5877 if (nc > 1 && size_string != NULL)
5879 if (fplog)
5881 fprintf(fplog,"Using static load balancing for the %s direction\n",
5882 dir);
5884 snew(slb_frac,nc);
5885 tot = 0;
5886 for (i=0; i<nc; i++)
5888 dbl = 0;
5889 sscanf(size_string,"%lf%n",&dbl,&n);
5890 if (dbl == 0)
5892 gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
5894 slb_frac[i] = dbl;
5895 size_string += n;
5896 tot += slb_frac[i];
5898 /* Normalize */
5899 if (fplog)
5901 fprintf(fplog,"Relative cell sizes:");
5903 for (i=0; i<nc; i++)
5905 slb_frac[i] /= tot;
5906 if (fplog)
5908 fprintf(fplog," %5.3f",slb_frac[i]);
5911 if (fplog)
5913 fprintf(fplog,"\n");
5917 return slb_frac;
5920 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
5922 int n,nmol,ftype;
5923 gmx_mtop_ilistloop_t iloop;
5924 t_ilist *il;
5926 n = 0;
5927 iloop = gmx_mtop_ilistloop_init(mtop);
5928 while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
5930 for(ftype=0; ftype<F_NRE; ftype++)
5932 if ((interaction_function[ftype].flags & IF_BOND) &&
5933 NRAL(ftype) > 2)
5935 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
5940 return n;
5943 static int dd_nst_env(FILE *fplog,const char *env_var,int def)
5945 char *val;
5946 int nst;
5948 nst = def;
5949 val = getenv(env_var);
5950 if (val)
5952 if (sscanf(val,"%d",&nst) <= 0)
5954 nst = 1;
5956 if (fplog)
5958 fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
5959 env_var,val,nst);
5963 return nst;
5966 static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
5968 if (MASTER(cr))
5970 fprintf(stderr,"\n%s\n",warn_string);
5972 if (fplog)
5974 fprintf(fplog,"\n%s\n",warn_string);
5978 static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
5979 t_inputrec *ir,FILE *fplog)
5981 if (ir->ePBC == epbcSCREW &&
5982 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
5984 gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
5987 if (ir->ns_type == ensSIMPLE)
5989 gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
5992 if (ir->nstlist == 0)
5994 gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
5997 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
5999 dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6003 static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
6005 int di,d;
6006 real r;
6008 r = ddbox->box_size[XX];
6009 for(di=0; di<dd->ndim; di++)
6011 d = dd->dim[di];
6012 /* Check using the initial average cell size */
6013 r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6016 return r;
6019 static int check_dlb_support(FILE *fplog,t_commrec *cr,
6020 const char *dlb_opt,bool bRecordLoad,
6021 unsigned long Flags,t_inputrec *ir)
6023 gmx_domdec_t *dd;
6024 int eDLB=-1;
6025 char buf[STRLEN];
6027 switch (dlb_opt[0])
6029 case 'a': eDLB = edlbAUTO; break;
6030 case 'n': eDLB = edlbNO; break;
6031 case 'y': eDLB = edlbYES; break;
6032 default: gmx_incons("Unknown dlb_opt");
6035 if (Flags & MD_RERUN)
6037 return edlbNO;
6040 if (!EI_DYNAMICS(ir->eI))
6042 if (eDLB == edlbYES)
6044 sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
6045 dd_warning(cr,fplog,buf);
6048 return edlbNO;
6051 if (!bRecordLoad)
6053 dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6055 return edlbNO;
6058 if (Flags & MD_REPRODUCIBLE)
6060 switch (eDLB)
6062 case edlbNO:
6063 break;
6064 case edlbAUTO:
6065 dd_warning(cr,fplog,"NOTE: reproducability requested, will not use dynamic load balancing\n");
6066 eDLB = edlbNO;
6067 break;
6068 case edlbYES:
6069 dd_warning(cr,fplog,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
6070 break;
6071 default:
6072 gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
6073 break;
6077 return eDLB;
6080 static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
6082 int dim;
6084 dd->ndim = 0;
6085 if (getenv("GMX_DD_ORDER_ZYX"))
6087 /* Decomposition order z,y,x */
6088 if (fplog)
6090 fprintf(fplog,"Using domain decomposition order z, y, x\n");
6092 for(dim=DIM-1; dim>=0; dim--)
6094 if (dd->nc[dim] > 1)
6096 dd->dim[dd->ndim++] = dim;
6100 else
6102 /* Decomposition order x,y,z */
6103 for(dim=0; dim<DIM; dim++)
6105 if (dd->nc[dim] > 1)
6107 dd->dim[dd->ndim++] = dim;
6113 static gmx_domdec_comm_t *init_dd_comm()
6115 gmx_domdec_comm_t *comm;
6116 int i;
6118 snew(comm,1);
6119 snew(comm->cggl_flag,DIM*2);
6120 snew(comm->cgcm_state,DIM*2);
6121 for(i=0; i<DIM*2; i++)
6123 comm->cggl_flag_nalloc[i] = 0;
6124 comm->cgcm_state_nalloc[i] = 0;
6127 comm->nalloc_int = 0;
6128 comm->buf_int = NULL;
6130 vec_rvec_init(&comm->vbuf);
6132 comm->n_load_have = 0;
6133 comm->n_load_collect = 0;
6135 for(i=0; i<ddnatNR-ddnatZONE; i++)
6137 comm->sum_nat[i] = 0;
6139 comm->ndecomp = 0;
6140 comm->nload = 0;
6141 comm->load_step = 0;
6142 comm->load_sum = 0;
6143 comm->load_max = 0;
6144 clear_ivec(comm->load_lim);
6145 comm->load_mdf = 0;
6146 comm->load_pme = 0;
6148 return comm;
6151 gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
6152 unsigned long Flags,
6153 ivec nc,
6154 real comm_distance_min,real rconstr,
6155 const char *dlb_opt,real dlb_scale,
6156 const char *sizex,const char *sizey,const char *sizez,
6157 gmx_mtop_t *mtop,t_inputrec *ir,
6158 matrix box,rvec *x,
6159 gmx_ddbox_t *ddbox,
6160 int *npme_x,int *npme_y)
6162 gmx_domdec_t *dd;
6163 gmx_domdec_comm_t *comm;
6164 int recload;
6165 int d,i,j;
6166 real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
6167 bool bC;
6168 char buf[STRLEN];
6170 if (fplog)
6172 fprintf(fplog,
6173 "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
6176 snew(dd,1);
6178 dd->comm = init_dd_comm();
6179 comm = dd->comm;
6180 snew(comm->cggl_flag,DIM*2);
6181 snew(comm->cgcm_state,DIM*2);
6183 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6184 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6186 dd->bSendRecv2 = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
6187 comm->eFlop = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
6188 recload = dd_nst_env(fplog,"GMX_DD_LOAD",1);
6189 comm->nstSortCG = dd_nst_env(fplog,"GMX_DD_SORT",1);
6190 comm->nstDDDump = dd_nst_env(fplog,"GMX_DD_DUMP",0);
6191 comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
6192 comm->DD_debug = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
6194 dd->pme_recv_f_alloc = 0;
6195 dd->pme_recv_f_buf = NULL;
6197 if (dd->bSendRecv2 && fplog)
6199 fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6201 if (comm->eFlop)
6203 if (fplog)
6205 fprintf(fplog,"Will load balance based on FLOP count\n");
6207 if (comm->eFlop > 1)
6209 srand(1+cr->nodeid);
6211 comm->bRecordLoad = TRUE;
6213 else
6215 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6219 comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
6221 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6222 if (fplog)
6224 fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
6226 dd->bGridJump = comm->bDynLoadBal;
6228 if (comm->nstSortCG)
6230 if (fplog)
6232 if (comm->nstSortCG == 1)
6234 fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
6236 else
6238 fprintf(fplog,"Will sort the charge groups every %d steps\n",
6239 comm->nstSortCG);
6242 snew(comm->sort,1);
6244 else
6246 if (fplog)
6248 fprintf(fplog,"Will not sort the charge groups\n");
6252 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6253 if (comm->bInterCGBondeds)
6255 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6257 else
6259 comm->bInterCGMultiBody = FALSE;
6262 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6264 if (ir->rlistlong == 0)
6266 /* Set the cut-off to some very large value,
6267 * so we don't need if statements everywhere in the code.
6268 * We use sqrt, since the cut-off is squared in some places.
6270 comm->cutoff = GMX_CUTOFF_INF;
6272 else
6274 comm->cutoff = ir->rlistlong;
6276 comm->cutoff_mbody = 0;
6278 comm->cellsize_limit = 0;
6279 comm->bBondComm = FALSE;
6281 if (comm->bInterCGBondeds)
6283 if (comm_distance_min > 0)
6285 comm->cutoff_mbody = comm_distance_min;
6286 if (Flags & MD_DDBONDCOMM)
6288 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6290 else
6292 comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
6294 r_bonded_limit = comm->cutoff_mbody;
6296 else if (ir->bPeriodicMols)
6298 /* Can not easily determine the required cut-off */
6299 dd_warning(cr,fplog,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6300 comm->cutoff_mbody = comm->cutoff/2;
6301 r_bonded_limit = comm->cutoff_mbody;
6303 else
6305 if (MASTER(cr))
6307 dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
6308 Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
6310 gmx_bcast(sizeof(r_2b),&r_2b,cr);
6311 gmx_bcast(sizeof(r_mb),&r_mb,cr);
6313 /* We use an initial margin of 10% for the minimum cell size,
6314 * except when we are just below the non-bonded cut-off.
6316 if (Flags & MD_DDBONDCOMM)
6318 if (max(r_2b,r_mb) > comm->cutoff)
6320 r_bonded = max(r_2b,r_mb);
6321 r_bonded_limit = 1.1*r_bonded;
6322 comm->bBondComm = TRUE;
6324 else
6326 r_bonded = r_mb;
6327 r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
6329 /* We determine cutoff_mbody later */
6331 else
6333 /* No special bonded communication,
6334 * simply increase the DD cut-off.
6336 r_bonded_limit = 1.1*max(r_2b,r_mb);
6337 comm->cutoff_mbody = r_bonded_limit;
6338 comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
6341 comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
6342 if (fplog)
6344 fprintf(fplog,
6345 "Minimum cell size due to bonded interactions: %.3f nm\n",
6346 comm->cellsize_limit);
6350 if (dd->bInterCGcons && rconstr <= 0)
6352 /* There is a cell size limit due to the constraints (P-LINCS) */
6353 rconstr = constr_r_max(fplog,mtop,ir);
6354 if (fplog)
6356 fprintf(fplog,
6357 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6358 rconstr);
6359 if (rconstr > comm->cellsize_limit)
6361 fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
6365 else if (rconstr > 0 && fplog)
6367 /* Here we do not check for dd->bInterCGcons,
6368 * because one can also set a cell size limit for virtual sites only
6369 * and at this point we don't know yet if there are intercg v-sites.
6371 fprintf(fplog,
6372 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6373 rconstr);
6375 comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
6377 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6379 if (nc[XX] > 0)
6381 copy_ivec(nc,dd->nc);
6382 set_dd_dim(fplog,dd);
6383 set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
6385 if (cr->npmenodes == -1)
6387 cr->npmenodes = 0;
6389 acs = average_cellsize_min(dd,ddbox);
6390 if (acs < comm->cellsize_limit)
6392 if (fplog)
6394 fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
6396 gmx_fatal_collective(FARGS,cr,NULL,
6397 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6398 acs,comm->cellsize_limit);
6401 else
6403 set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
6405 /* We need to choose the optimal DD grid and possibly PME nodes */
6406 limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
6407 comm->eDLB!=edlbNO,dlb_scale,
6408 comm->cellsize_limit,comm->cutoff,
6409 comm->bInterCGBondeds,comm->bInterCGMultiBody);
6411 if (dd->nc[XX] == 0)
6413 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6414 sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
6415 !bC ? "-rdd" : "-rcon",
6416 comm->eDLB!=edlbNO ? " or -dds" : "",
6417 bC ? " or your LINCS settings" : "");
6419 gmx_fatal_collective(FARGS,cr,NULL,
6420 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6421 "%s\n"
6422 "Look in the log file for details on the domain decomposition",
6423 cr->nnodes-cr->npmenodes,limit,buf);
6425 set_dd_dim(fplog,dd);
6428 if (fplog)
6430 fprintf(fplog,
6431 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6432 dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
6435 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6436 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6438 gmx_fatal_collective(FARGS,cr,NULL,
6439 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6440 dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
6442 if (cr->npmenodes > dd->nnodes)
6444 gmx_fatal_collective(FARGS,cr,NULL,
6445 "The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
6447 if (cr->npmenodes > 0)
6449 comm->npmenodes = cr->npmenodes;
6451 else
6453 comm->npmenodes = dd->nnodes;
6456 if (EEL_PME(ir->coulombtype))
6458 if (dd->nc[XX] > 1 && dd->nc[YY] > 1 &&
6459 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6460 getenv("GMX_PMEONEDD") == NULL)
6462 comm->npmedecompdim = 2;
6463 comm->npmenodes_major = dd->nc[XX];
6464 comm->npmenodes_minor = comm->npmenodes/comm->npmenodes_major;
6466 else
6468 comm->npmedecompdim = 1;
6469 comm->npmenodes_major = comm->npmenodes;
6470 comm->npmenodes_minor = comm->npmenodes/comm->npmenodes_major;
6472 if (fplog)
6474 fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
6475 comm->npmenodes_major,comm->npmenodes_minor,1);
6478 else
6480 comm->npmedecompdim = 0;
6481 comm->npmenodes_major = 0;
6482 comm->npmenodes_minor = 0;
6485 /* Technically we don't need both of these,
6486 * but it simplifies code not having to recalculate it.
6488 if (comm->npmedecompdim == 1 && dd->dim[0] == YY)
6490 *npme_x = 1;
6491 *npme_y = comm->npmenodes;
6493 else
6495 *npme_x = comm->npmenodes_major;
6496 *npme_y = comm->npmenodes_minor;
6499 snew(comm->slb_frac,DIM);
6500 if (comm->eDLB == edlbNO)
6502 comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
6503 comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
6504 comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
6507 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6509 if (comm->bBondComm || comm->eDLB != edlbNO)
6511 /* Set the bonded communication distance to halfway
6512 * the minimum and the maximum,
6513 * since the extra communication cost is nearly zero.
6515 acs = average_cellsize_min(dd,ddbox);
6516 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6517 if (comm->eDLB != edlbNO)
6519 /* Check if this does not limit the scaling */
6520 comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
6522 if (!comm->bBondComm)
6524 /* Without bBondComm do not go beyond the n.b. cut-off */
6525 comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
6526 if (comm->cellsize_limit >= comm->cutoff)
6528 /* We don't loose a lot of efficieny
6529 * when increasing it to the n.b. cut-off.
6530 * It can even be slightly faster, because we need
6531 * less checks for the communication setup.
6533 comm->cutoff_mbody = comm->cutoff;
6536 /* Check if we did not end up below our original limit */
6537 comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
6539 if (comm->cutoff_mbody > comm->cellsize_limit)
6541 comm->cellsize_limit = comm->cutoff_mbody;
6544 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6547 if (debug)
6549 fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
6550 "cellsize limit %f\n",
6551 comm->bBondComm,comm->cellsize_limit);
6554 if (MASTER(cr))
6556 check_dd_restrictions(cr,dd,ir,fplog);
6559 comm->partition_step = INT_MIN;
6560 dd->ddp_count = 0;
6562 clear_dd_cycle_counts(dd);
6564 return dd;
6567 static void set_dlb_limits(gmx_domdec_t *dd)
6570 int d;
6572 for(d=0; d<dd->ndim; d++)
6574 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
6575 dd->comm->cellsize_min[dd->dim[d]] =
6576 dd->comm->cellsize_min_dlb[dd->dim[d]];
6581 static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
6583 gmx_domdec_t *dd;
6584 gmx_domdec_comm_t *comm;
6585 real cellsize_min;
6586 int d,nc,i;
6587 char buf[STRLEN];
6589 dd = cr->dd;
6590 comm = dd->comm;
6592 if (fplog)
6594 fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
6597 cellsize_min = comm->cellsize_min[dd->dim[0]];
6598 for(d=1; d<dd->ndim; d++)
6600 cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
6603 if (cellsize_min < comm->cellsize_limit*1.05)
6605 dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6607 /* Change DLB from "auto" to "no". */
6608 comm->eDLB = edlbNO;
6610 return;
6613 dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
6614 comm->bDynLoadBal = TRUE;
6615 dd->bGridJump = TRUE;
6617 set_dlb_limits(dd);
6619 /* We can set the required cell size info here,
6620 * so we do not need to communicate this.
6621 * The grid is completely uniform.
6623 for(d=0; d<dd->ndim; d++)
6625 if (comm->root[d])
6627 comm->load[d].sum_m = comm->load[d].sum;
6629 nc = dd->nc[dd->dim[d]];
6630 for(i=0; i<nc; i++)
6632 comm->root[d]->cell_f[i] = i/(real)nc;
6633 if (d > 0)
6635 comm->root[d]->cell_f_max0[i] = i /(real)nc;
6636 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
6639 comm->root[d]->cell_f[nc] = 1.0;
6644 static char *init_bLocalCG(gmx_mtop_t *mtop)
6646 int ncg,cg;
6647 char *bLocalCG;
6649 ncg = ncg_mtop(mtop);
6650 snew(bLocalCG,ncg);
6651 for(cg=0; cg<ncg; cg++)
6653 bLocalCG[cg] = FALSE;
6656 return bLocalCG;
6659 void dd_init_bondeds(FILE *fplog,
6660 gmx_domdec_t *dd,gmx_mtop_t *mtop,
6661 gmx_vsite_t *vsite,gmx_constr_t constr,
6662 t_inputrec *ir,bool bBCheck,cginfo_mb_t *cginfo_mb)
6664 gmx_domdec_comm_t *comm;
6665 bool bBondComm;
6666 int d;
6668 dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
6670 comm = dd->comm;
6672 if (comm->bBondComm)
6674 /* Communicate atoms beyond the cut-off for bonded interactions */
6675 comm = dd->comm;
6677 comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
6679 comm->bLocalCG = init_bLocalCG(mtop);
6681 else
6683 /* Only communicate atoms based on cut-off */
6684 comm->cglink = NULL;
6685 comm->bLocalCG = NULL;
6689 static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
6690 t_inputrec *ir,
6691 bool bDynLoadBal,real dlb_scale,
6692 gmx_ddbox_t *ddbox)
6694 gmx_domdec_comm_t *comm;
6695 int d;
6696 ivec np;
6697 real limit,shrink;
6698 char buf[64];
6700 if (fplog == NULL)
6702 return;
6705 comm = dd->comm;
6707 if (bDynLoadBal)
6709 fprintf(fplog,"The maximum number of communication pulses is:");
6710 for(d=0; d<dd->ndim; d++)
6712 fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
6714 fprintf(fplog,"\n");
6715 fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
6716 fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
6717 fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
6718 for(d=0; d<DIM; d++)
6720 if (dd->nc[d] > 1)
6722 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
6724 shrink = 0;
6726 else
6728 shrink =
6729 comm->cellsize_min_dlb[d]/
6730 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6732 fprintf(fplog," %c %.2f",dim2char(d),shrink);
6735 fprintf(fplog,"\n");
6737 else
6739 set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
6740 fprintf(fplog,"The initial number of communication pulses is:");
6741 for(d=0; d<dd->ndim; d++)
6743 fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
6745 fprintf(fplog,"\n");
6746 fprintf(fplog,"The initial domain decomposition cell size is:");
6747 for(d=0; d<DIM; d++) {
6748 if (dd->nc[d] > 1)
6750 fprintf(fplog," %c %.2f nm",
6751 dim2char(d),dd->comm->cellsize_min[d]);
6754 fprintf(fplog,"\n\n");
6757 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
6759 fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
6760 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6761 "non-bonded interactions","",comm->cutoff);
6763 if (bDynLoadBal)
6765 limit = dd->comm->cellsize_limit;
6767 else
6769 if (dynamic_dd_box(ddbox,ir))
6771 fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
6773 limit = dd->comm->cellsize_min[XX];
6774 for(d=1; d<DIM; d++)
6776 limit = min(limit,dd->comm->cellsize_min[d]);
6780 if (comm->bInterCGBondeds)
6782 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6783 "two-body bonded interactions","(-rdd)",
6784 max(comm->cutoff,comm->cutoff_mbody));
6785 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6786 "multi-body bonded interactions","(-rdd)",
6787 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
6789 if (dd->vsite_comm)
6791 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6792 "virtual site constructions","(-rcon)",limit);
6794 if (dd->constraint_comm)
6796 sprintf(buf,"atoms separated by up to %d constraints",
6797 1+ir->nProjOrder);
6798 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6799 buf,"(-rcon)",limit);
6801 fprintf(fplog,"\n");
6804 fflush(fplog);
6807 void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
6808 t_inputrec *ir,t_forcerec *fr,
6809 gmx_ddbox_t *ddbox)
6811 gmx_domdec_comm_t *comm;
6812 int d,dim,npulse,npulse_d_max,npulse_d;
6813 bool bNoCutOff;
6814 int natoms_tot;
6815 real vol_frac;
6817 comm = dd->comm;
6819 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
6821 if (EEL_PME(ir->coulombtype))
6823 init_ddpme(dd,&comm->ddpme[0],0,comm->npmenodes_major);
6824 if (comm->npmedecompdim >= 2)
6826 init_ddpme(dd,&comm->ddpme[1],1,
6827 comm->npmenodes/comm->npmenodes_major);
6830 else
6832 comm->npmenodes = 0;
6833 if (dd->pme_nodeid >= 0)
6835 gmx_fatal_collective(FARGS,NULL,dd,
6836 "Can not have separate PME nodes without PME electrostatics");
6840 /* If each molecule is a single charge group
6841 * or we use domain decomposition for each periodic dimension,
6842 * we do not need to take pbc into account for the bonded interactions.
6844 if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
6845 (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
6847 fr->bMolPBC = FALSE;
6849 else
6851 fr->bMolPBC = TRUE;
6854 if (debug)
6856 fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
6858 if (comm->eDLB != edlbNO)
6860 /* Determine the maximum number of comm. pulses in one dimension */
6862 comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
6864 /* Determine the maximum required number of grid pulses */
6865 if (comm->cellsize_limit >= comm->cutoff)
6867 /* Only a single pulse is required */
6868 npulse = 1;
6870 else if (!bNoCutOff && comm->cellsize_limit > 0)
6872 /* We round down slightly here to avoid overhead due to the latency
6873 * of extra communication calls when the cut-off
6874 * would be only slightly longer than the cell size.
6875 * Later cellsize_limit is redetermined,
6876 * so we can not miss interactions due to this rounding.
6878 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
6880 else
6882 /* There is no cell size limit */
6883 npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
6886 if (!bNoCutOff && npulse > 1)
6888 /* See if we can do with less pulses, based on dlb_scale */
6889 npulse_d_max = 0;
6890 for(d=0; d<dd->ndim; d++)
6892 dim = dd->dim[d];
6893 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
6894 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
6895 npulse_d_max = max(npulse_d_max,npulse_d);
6897 npulse = min(npulse,npulse_d_max);
6900 /* This env var can override npulse */
6901 d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
6902 if (d > 0)
6904 npulse = d;
6907 comm->maxpulse = 1;
6908 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
6909 for(d=0; d<dd->ndim; d++)
6911 comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
6912 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
6913 snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
6914 comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
6915 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
6917 comm->bVacDLBNoLimit = FALSE;
6921 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6922 if (!comm->bVacDLBNoLimit)
6924 comm->cellsize_limit = max(comm->cellsize_limit,
6925 comm->cutoff/comm->maxpulse);
6927 comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
6928 /* Set the minimum cell size for each DD dimension */
6929 for(d=0; d<dd->ndim; d++)
6931 if (comm->bVacDLBNoLimit ||
6932 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
6934 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
6936 else
6938 comm->cellsize_min_dlb[dd->dim[d]] =
6939 comm->cutoff/comm->cd[d].np_dlb;
6942 if (comm->cutoff_mbody <= 0)
6944 comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
6946 if (comm->bDynLoadBal)
6948 set_dlb_limits(dd);
6952 print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
6953 if (comm->eDLB == edlbAUTO)
6955 if (fplog)
6957 fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
6959 print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
6962 if (ir->ePBC == epbcNONE)
6964 vol_frac = 1 - 1/(double)dd->nnodes;
6966 else
6968 vol_frac =
6969 (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
6971 if (debug)
6973 fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
6975 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
6977 dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
6980 static void merge_cg_buffers(int ncell,
6981 gmx_domdec_comm_dim_t *cd, int pulse,
6982 int *ncg_cell,
6983 int *index_gl, int *recv_i,
6984 rvec *cg_cm, rvec *recv_vr,
6985 int *cgindex,
6986 cginfo_mb_t *cginfo_mb,int *cginfo)
6988 gmx_domdec_ind_t *ind,*ind_p;
6989 int p,cell,c,cg,cg0,cg1,cg_gl,nat;
6990 int shift,shift_at;
6992 ind = &cd->ind[pulse];
6994 /* First correct the already stored data */
6995 shift = ind->nrecv[ncell];
6996 for(cell=ncell-1; cell>=0; cell--)
6998 shift -= ind->nrecv[cell];
6999 if (shift > 0)
7001 /* Move the cg's present from previous grid pulses */
7002 cg0 = ncg_cell[ncell+cell];
7003 cg1 = ncg_cell[ncell+cell+1];
7004 cgindex[cg1+shift] = cgindex[cg1];
7005 for(cg=cg1-1; cg>=cg0; cg--)
7007 index_gl[cg+shift] = index_gl[cg];
7008 copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
7009 cgindex[cg+shift] = cgindex[cg];
7010 cginfo[cg+shift] = cginfo[cg];
7012 /* Correct the already stored send indices for the shift */
7013 for(p=1; p<=pulse; p++)
7015 ind_p = &cd->ind[p];
7016 cg0 = 0;
7017 for(c=0; c<cell; c++)
7019 cg0 += ind_p->nsend[c];
7021 cg1 = cg0 + ind_p->nsend[cell];
7022 for(cg=cg0; cg<cg1; cg++)
7024 ind_p->index[cg] += shift;
7030 /* Merge in the communicated buffers */
7031 shift = 0;
7032 shift_at = 0;
7033 cg0 = 0;
7034 for(cell=0; cell<ncell; cell++)
7036 cg1 = ncg_cell[ncell+cell+1] + shift;
7037 if (shift_at > 0)
7039 /* Correct the old cg indices */
7040 for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
7042 cgindex[cg+1] += shift_at;
7045 for(cg=0; cg<ind->nrecv[cell]; cg++)
7047 /* Copy this charge group from the buffer */
7048 index_gl[cg1] = recv_i[cg0];
7049 copy_rvec(recv_vr[cg0],cg_cm[cg1]);
7050 /* Add it to the cgindex */
7051 cg_gl = index_gl[cg1];
7052 cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
7053 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7054 cgindex[cg1+1] = cgindex[cg1] + nat;
7055 cg0++;
7056 cg1++;
7057 shift_at += nat;
7059 shift += ind->nrecv[cell];
7060 ncg_cell[ncell+cell+1] = cg1;
7064 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7065 int nzone,int cg0,const int *cgindex)
7067 int cg,zone,p;
7069 /* Store the atom block boundaries for easy copying of communication buffers
7071 cg = cg0;
7072 for(zone=0; zone<nzone; zone++)
7074 for(p=0; p<cd->np; p++) {
7075 cd->ind[p].cell2at0[zone] = cgindex[cg];
7076 cg += cd->ind[p].nrecv[zone];
7077 cd->ind[p].cell2at1[zone] = cgindex[cg];
7082 static bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
7084 int i;
7085 bool bMiss;
7087 bMiss = FALSE;
7088 for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
7090 if (!bLocalCG[link->a[i]])
7092 bMiss = TRUE;
7096 return bMiss;
7099 static void setup_dd_communication(gmx_domdec_t *dd,
7100 matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
7102 int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
7103 int nzone,nzone_send,zone,zonei,cg0,cg1;
7104 int c,i,j,cg,cg_gl,nrcg;
7105 int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
7106 gmx_domdec_comm_t *comm;
7107 gmx_domdec_zones_t *zones;
7108 gmx_domdec_comm_dim_t *cd;
7109 gmx_domdec_ind_t *ind;
7110 cginfo_mb_t *cginfo_mb;
7111 bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
7112 real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
7113 rvec rb,rn;
7114 real corner[DIM][4],corner_round_0=0,corner_round_1[4];
7115 real bcorner[DIM],bcorner_round_1=0;
7116 ivec tric_dist;
7117 rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
7118 real skew_fac2_d,skew_fac_01;
7119 rvec sf2_round;
7120 int nsend,nat;
7122 if (debug)
7124 fprintf(debug,"Setting up DD communication\n");
7127 comm = dd->comm;
7128 cg_cm = fr->cg_cm;
7130 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
7132 dim = dd->dim[dim_ind];
7134 /* Check if we need to use triclinic distances */
7135 tric_dist[dim_ind] = 0;
7136 for(i=0; i<=dim_ind; i++)
7138 if (ddbox->tric_dir[dd->dim[i]])
7140 tric_dist[dim_ind] = 1;
7145 bBondComm = comm->bBondComm;
7147 /* Do we need to determine extra distances for multi-body bondeds? */
7148 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
7150 /* Do we need to determine extra distances for only two-body bondeds? */
7151 bDist2B = (bBondComm && !bDistMB);
7153 r_comm2 = sqr(comm->cutoff);
7154 r_bcomm2 = sqr(comm->cutoff_mbody);
7156 if (debug)
7158 fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
7161 zones = &comm->zones;
7163 dim0 = dd->dim[0];
7164 /* The first dimension is equal for all cells */
7165 corner[0][0] = comm->cell_x0[dim0];
7166 if (bDistMB)
7168 bcorner[0] = corner[0][0];
7170 if (dd->ndim >= 2)
7172 dim1 = dd->dim[1];
7173 /* This cell row is only seen from the first row */
7174 corner[1][0] = comm->cell_x0[dim1];
7175 /* All rows can see this row */
7176 corner[1][1] = comm->cell_x0[dim1];
7177 if (dd->bGridJump)
7179 corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
7180 if (bDistMB)
7182 /* For the multi-body distance we need the maximum */
7183 bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
7186 /* Set the upper-right corner for rounding */
7187 corner_round_0 = comm->cell_x1[dim0];
7189 if (dd->ndim >= 3)
7191 dim2 = dd->dim[2];
7192 for(j=0; j<4; j++)
7194 corner[2][j] = comm->cell_x0[dim2];
7196 if (dd->bGridJump)
7198 /* Use the maximum of the i-cells that see a j-cell */
7199 for(i=0; i<zones->nizone; i++)
7201 for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
7203 if (j >= 4)
7205 corner[2][j-4] =
7206 max(corner[2][j-4],
7207 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7211 if (bDistMB)
7213 /* For the multi-body distance we need the maximum */
7214 bcorner[2] = comm->cell_x0[dim2];
7215 for(i=0; i<2; i++)
7217 for(j=0; j<2; j++)
7219 bcorner[2] = max(bcorner[2],
7220 comm->zone_d2[i][j].p1_0);
7226 /* Set the upper-right corner for rounding */
7227 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7228 * Only cell (0,0,0) can see cell 7 (1,1,1)
7230 corner_round_1[0] = comm->cell_x1[dim1];
7231 corner_round_1[3] = comm->cell_x1[dim1];
7232 if (dd->bGridJump)
7234 corner_round_1[0] = max(comm->cell_x1[dim1],
7235 comm->zone_d1[1].mch1);
7236 if (bDistMB)
7238 /* For the multi-body distance we need the maximum */
7239 bcorner_round_1 = max(comm->cell_x1[dim1],
7240 comm->zone_d1[1].p1_1);
7246 /* Triclinic stuff */
7247 normal = ddbox->normal;
7248 skew_fac_01 = 0;
7249 if (dd->ndim >= 2)
7251 v_0 = ddbox->v[dim0];
7252 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
7254 /* Determine the coupling coefficient for the distances
7255 * to the cell planes along dim0 and dim1 through dim2.
7256 * This is required for correct rounding.
7258 skew_fac_01 =
7259 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
7260 if (debug)
7262 fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
7266 if (dd->ndim >= 3)
7268 v_1 = ddbox->v[dim1];
7271 zone_cg_range = zones->cg_range;
7272 index_gl = dd->index_gl;
7273 cgindex = dd->cgindex;
7274 cginfo_mb = fr->cginfo_mb;
7276 zone_cg_range[0] = 0;
7277 zone_cg_range[1] = dd->ncg_home;
7278 comm->zone_ncg1[0] = dd->ncg_home;
7279 pos_cg = dd->ncg_home;
7281 nat_tot = dd->nat_home;
7282 nzone = 1;
7283 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
7285 dim = dd->dim[dim_ind];
7286 cd = &comm->cd[dim_ind];
7288 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
7290 /* No pbc in this dimension, the first node should not comm. */
7291 nzone_send = 0;
7293 else
7295 nzone_send = nzone;
7298 bScrew = (dd->bScrewPBC && dim == XX);
7300 v_d = ddbox->v[dim];
7301 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
7303 cd->bInPlace = TRUE;
7304 for(p=0; p<cd->np; p++)
7306 /* Only atoms communicated in the first pulse are used
7307 * for multi-body bonded interactions or for bBondComm.
7309 bDistBonded = ((bDistMB || bDist2B) && p == 0);
7310 bDistMB_pulse = (bDistMB && bDistBonded);
7312 ind = &cd->ind[p];
7313 nsend = 0;
7314 nat = 0;
7315 for(zone=0; zone<nzone_send; zone++)
7317 if (tric_dist[dim_ind] && dim_ind > 0)
7319 /* Determine slightly more optimized skew_fac's
7320 * for rounding.
7321 * This reduces the number of communicated atoms
7322 * by about 10% for 3D DD of rhombic dodecahedra.
7324 for(dimd=0; dimd<dim; dimd++)
7326 sf2_round[dimd] = 1;
7327 if (ddbox->tric_dir[dimd])
7329 for(i=dd->dim[dimd]+1; i<DIM; i++)
7331 /* If we are shifted in dimension i
7332 * and the cell plane is tilted forward
7333 * in dimension i, skip this coupling.
7335 if (!(zones->shift[nzone+zone][i] &&
7336 ddbox->v[dimd][i][dimd] >= 0))
7338 sf2_round[dimd] +=
7339 sqr(ddbox->v[dimd][i][dimd]);
7342 sf2_round[dimd] = 1/sf2_round[dimd];
7347 zonei = zone_perm[dim_ind][zone];
7348 if (p == 0)
7350 /* Here we permutate the zones to obtain a convenient order
7351 * for neighbor searching
7353 cg0 = zone_cg_range[zonei];
7354 cg1 = zone_cg_range[zonei+1];
7356 else
7358 /* Look only at the cg's received in the previous grid pulse
7360 cg1 = zone_cg_range[nzone+zone+1];
7361 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
7363 ind->nsend[zone] = 0;
7364 for(cg=cg0; cg<cg1; cg++)
7366 r2 = 0;
7367 rb2 = 0;
7368 if (tric_dist[dim_ind] == 0)
7370 /* Rectangular direction, easy */
7371 r = cg_cm[cg][dim] - corner[dim_ind][zone];
7372 if (r > 0)
7374 r2 += r*r;
7376 if (bDistMB_pulse)
7378 r = cg_cm[cg][dim] - bcorner[dim_ind];
7379 if (r > 0)
7381 rb2 += r*r;
7384 /* Rounding gives at most a 16% reduction
7385 * in communicated atoms
7387 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7389 r = cg_cm[cg][dim0] - corner_round_0;
7390 /* This is the first dimension, so always r >= 0 */
7391 r2 += r*r;
7392 if (bDistMB_pulse)
7394 rb2 += r*r;
7397 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7399 r = cg_cm[cg][dim1] - corner_round_1[zone];
7400 if (r > 0)
7402 r2 += r*r;
7404 if (bDistMB_pulse)
7406 r = cg_cm[cg][dim1] - bcorner_round_1;
7407 if (r > 0)
7409 rb2 += r*r;
7414 else
7416 /* Triclinic direction, more complicated */
7417 clear_rvec(rn);
7418 clear_rvec(rb);
7419 /* Rounding, conservative as the skew_fac multiplication
7420 * will slightly underestimate the distance.
7422 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7424 rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
7425 for(i=dim0+1; i<DIM; i++)
7427 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7429 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7430 if (bDistMB_pulse)
7432 rb[dim0] = rn[dim0];
7433 rb2 = r2;
7435 /* Take care that the cell planes along dim0 might not
7436 * be orthogonal to those along dim1 and dim2.
7438 for(i=1; i<=dim_ind; i++)
7440 dimd = dd->dim[i];
7441 if (normal[dim0][dimd] > 0)
7443 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7444 if (bDistMB_pulse)
7446 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7451 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7453 rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
7454 tric_sh = 0;
7455 for(i=dim1+1; i<DIM; i++)
7457 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7459 rn[dim1] += tric_sh;
7460 if (rn[dim1] > 0)
7462 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7463 /* Take care of coupling of the distances
7464 * to the planes along dim0 and dim1 through dim2.
7466 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7467 /* Take care that the cell planes along dim1
7468 * might not be orthogonal to that along dim2.
7470 if (normal[dim1][dim2] > 0)
7472 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7475 if (bDistMB_pulse)
7477 rb[dim1] +=
7478 cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
7479 if (rb[dim1] > 0)
7481 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7482 /* Take care of coupling of the distances
7483 * to the planes along dim0 and dim1 through dim2.
7485 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7486 /* Take care that the cell planes along dim1
7487 * might not be orthogonal to that along dim2.
7489 if (normal[dim1][dim2] > 0)
7491 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7496 /* The distance along the communication direction */
7497 rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
7498 tric_sh = 0;
7499 for(i=dim+1; i<DIM; i++)
7501 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7503 rn[dim] += tric_sh;
7504 if (rn[dim] > 0)
7506 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7507 /* Take care of coupling of the distances
7508 * to the planes along dim0 and dim1 through dim2.
7510 if (dim_ind == 1 && zonei == 1)
7512 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7515 if (bDistMB_pulse)
7517 clear_rvec(rb);
7518 rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
7519 if (rb[dim] > 0)
7521 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7522 /* Take care of coupling of the distances
7523 * to the planes along dim0 and dim1 through dim2.
7525 if (dim_ind == 1 && zonei == 1)
7527 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7533 if (r2 < r_comm2 ||
7534 (bDistBonded &&
7535 ((bDistMB && rb2 < r_bcomm2) ||
7536 (bDist2B && r2 < r_bcomm2)) &&
7537 (!bBondComm ||
7538 (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
7539 missing_link(comm->cglink,index_gl[cg],
7540 comm->bLocalCG)))))
7542 /* Make an index to the local charge groups */
7543 if (nsend+1 > ind->nalloc)
7545 ind->nalloc = over_alloc_large(nsend+1);
7546 srenew(ind->index,ind->nalloc);
7548 if (nsend+1 > comm->nalloc_int)
7550 comm->nalloc_int = over_alloc_large(nsend+1);
7551 srenew(comm->buf_int,comm->nalloc_int);
7553 ind->index[nsend] = cg;
7554 comm->buf_int[nsend] = index_gl[cg];
7555 ind->nsend[zone]++;
7556 vec_rvec_check_alloc(&comm->vbuf,nsend+1);
7558 if (dd->ci[dim] == 0)
7560 /* Correct cg_cm for pbc */
7561 rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
7562 if (bScrew)
7564 comm->vbuf.v[nsend][YY] =
7565 box[YY][YY]-comm->vbuf.v[nsend][YY];
7566 comm->vbuf.v[nsend][ZZ] =
7567 box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
7570 else
7572 copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
7574 nsend++;
7575 nat += cgindex[cg+1] - cgindex[cg];
7579 /* Clear the counts in case we do not have pbc */
7580 for(zone=nzone_send; zone<nzone; zone++)
7582 ind->nsend[zone] = 0;
7584 ind->nsend[nzone] = nsend;
7585 ind->nsend[nzone+1] = nat;
7586 /* Communicate the number of cg's and atoms to receive */
7587 dd_sendrecv_int(dd, dim_ind, dddirBackward,
7588 ind->nsend, nzone+2,
7589 ind->nrecv, nzone+2);
7591 /* The rvec buffer is also required for atom buffers of size nsend
7592 * in dd_move_x and dd_move_f.
7594 vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
7596 if (p > 0)
7598 /* We can receive in place if only the last zone is not empty */
7599 for(zone=0; zone<nzone-1; zone++)
7601 if (ind->nrecv[zone] > 0)
7603 cd->bInPlace = FALSE;
7606 if (!cd->bInPlace)
7608 /* The int buffer is only required here for the cg indices */
7609 if (ind->nrecv[nzone] > comm->nalloc_int2)
7611 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
7612 srenew(comm->buf_int2,comm->nalloc_int2);
7614 /* The rvec buffer is also required for atom buffers
7615 * of size nrecv in dd_move_x and dd_move_f.
7617 i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
7618 vec_rvec_check_alloc(&comm->vbuf2,i);
7622 /* Make space for the global cg indices */
7623 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
7624 || dd->cg_nalloc == 0)
7626 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
7627 srenew(index_gl,dd->cg_nalloc);
7628 srenew(cgindex,dd->cg_nalloc+1);
7630 /* Communicate the global cg indices */
7631 if (cd->bInPlace)
7633 recv_i = index_gl + pos_cg;
7635 else
7637 recv_i = comm->buf_int2;
7639 dd_sendrecv_int(dd, dim_ind, dddirBackward,
7640 comm->buf_int, nsend,
7641 recv_i, ind->nrecv[nzone]);
7643 /* Make space for cg_cm */
7644 if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
7646 dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
7647 cg_cm = fr->cg_cm;
7649 /* Communicate cg_cm */
7650 if (cd->bInPlace)
7652 recv_vr = cg_cm + pos_cg;
7654 else
7656 recv_vr = comm->vbuf2.v;
7658 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
7659 comm->vbuf.v, nsend,
7660 recv_vr, ind->nrecv[nzone]);
7662 /* Make the charge group index */
7663 if (cd->bInPlace)
7665 zone = (p == 0 ? 0 : nzone - 1);
7666 while (zone < nzone)
7668 for(cg=0; cg<ind->nrecv[zone]; cg++)
7670 cg_gl = index_gl[pos_cg];
7671 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
7672 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
7673 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
7674 if (bBondComm)
7676 /* Update the charge group presence,
7677 * so we can use it in the next pass of the loop.
7679 comm->bLocalCG[cg_gl] = TRUE;
7681 pos_cg++;
7683 if (p == 0)
7685 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
7687 zone++;
7688 zone_cg_range[nzone+zone] = pos_cg;
7691 else
7693 /* This part of the code is never executed with bBondComm. */
7694 merge_cg_buffers(nzone,cd,p,zone_cg_range,
7695 index_gl,recv_i,cg_cm,recv_vr,
7696 cgindex,fr->cginfo_mb,fr->cginfo);
7697 pos_cg += ind->nrecv[nzone];
7699 nat_tot += ind->nrecv[nzone+1];
7701 if (!cd->bInPlace)
7703 /* Store the atom block for easy copying of communication buffers */
7704 make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
7706 nzone += nzone;
7708 dd->index_gl = index_gl;
7709 dd->cgindex = cgindex;
7711 dd->ncg_tot = zone_cg_range[zones->n];
7712 dd->nat_tot = nat_tot;
7713 comm->nat[ddnatHOME] = dd->nat_home;
7714 for(i=ddnatZONE; i<ddnatNR; i++)
7716 comm->nat[i] = dd->nat_tot;
7719 if (!bBondComm)
7721 /* We don't need to update cginfo, since that was alrady done above.
7722 * So we pass NULL for the forcerec.
7724 dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
7725 NULL,comm->bLocalCG);
7728 if (debug)
7730 fprintf(debug,"Finished setting up DD communication, zones:");
7731 for(c=0; c<zones->n; c++)
7733 fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
7735 fprintf(debug,"\n");
7739 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
7741 int c;
7743 for(c=0; c<zones->nizone; c++)
7745 zones->izone[c].cg1 = zones->cg_range[c+1];
7746 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
7747 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
7751 static int comp_cgsort(const void *a,const void *b)
7753 int comp;
7755 gmx_cgsort_t *cga,*cgb;
7756 cga = (gmx_cgsort_t *)a;
7757 cgb = (gmx_cgsort_t *)b;
7759 comp = cga->nsc - cgb->nsc;
7760 if (comp == 0)
7762 comp = cga->ind_gl - cgb->ind_gl;
7765 return comp;
7768 static void order_int_cg(int n,gmx_cgsort_t *sort,
7769 int *a,int *buf)
7771 int i;
7773 /* Order the data */
7774 for(i=0; i<n; i++)
7776 buf[i] = a[sort[i].ind];
7779 /* Copy back to the original array */
7780 for(i=0; i<n; i++)
7782 a[i] = buf[i];
7786 static void order_vec_cg(int n,gmx_cgsort_t *sort,
7787 rvec *v,rvec *buf)
7789 int i;
7791 /* Order the data */
7792 for(i=0; i<n; i++)
7794 copy_rvec(v[sort[i].ind],buf[i]);
7797 /* Copy back to the original array */
7798 for(i=0; i<n; i++)
7800 copy_rvec(buf[i],v[i]);
7804 static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
7805 rvec *v,rvec *buf)
7807 int a,atot,cg,cg0,cg1,i;
7809 /* Order the data */
7810 a = 0;
7811 for(cg=0; cg<ncg; cg++)
7813 cg0 = cgindex[sort[cg].ind];
7814 cg1 = cgindex[sort[cg].ind+1];
7815 for(i=cg0; i<cg1; i++)
7817 copy_rvec(v[i],buf[a]);
7818 a++;
7821 atot = a;
7823 /* Copy back to the original array */
7824 for(a=0; a<atot; a++)
7826 copy_rvec(buf[a],v[a]);
7830 static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
7831 int nsort_new,gmx_cgsort_t *sort_new,
7832 gmx_cgsort_t *sort1)
7834 int i1,i2,i_new;
7836 /* The new indices are not very ordered, so we qsort them */
7837 qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
7839 /* sort2 is already ordered, so now we can merge the two arrays */
7840 i1 = 0;
7841 i2 = 0;
7842 i_new = 0;
7843 while(i2 < nsort2 || i_new < nsort_new)
7845 if (i2 == nsort2)
7847 sort1[i1++] = sort_new[i_new++];
7849 else if (i_new == nsort_new)
7851 sort1[i1++] = sort2[i2++];
7853 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
7854 (sort2[i2].nsc == sort_new[i_new].nsc &&
7855 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
7857 sort1[i1++] = sort2[i2++];
7859 else
7861 sort1[i1++] = sort_new[i_new++];
7866 static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
7867 rvec *cgcm,t_forcerec *fr,t_state *state,
7868 int ncg_home_old)
7870 gmx_domdec_sort_t *sort;
7871 gmx_cgsort_t *cgsort,*sort_i;
7872 int ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
7873 rvec *vbuf;
7875 sort = dd->comm->sort;
7877 if (dd->ncg_home > sort->sort_nalloc)
7879 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
7880 srenew(sort->sort1,sort->sort_nalloc);
7881 srenew(sort->sort2,sort->sort_nalloc);
7884 if (ncg_home_old >= 0)
7886 /* The charge groups that remained in the same ns grid cell
7887 * are completely ordered. So we can sort efficiently by sorting
7888 * the charge groups that did move into the stationary list.
7890 ncg_new = 0;
7891 nsort2 = 0;
7892 nsort_new = 0;
7893 for(i=0; i<dd->ncg_home; i++)
7895 /* Check if this cg did not move to another node */
7896 cell_index = fr->ns.grid->cell_index[i];
7897 if (cell_index != 4*fr->ns.grid->ncells)
7899 if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
7901 /* This cg is new on this node or moved ns grid cell */
7902 if (nsort_new >= sort->sort_new_nalloc)
7904 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
7905 srenew(sort->sort_new,sort->sort_new_nalloc);
7907 sort_i = &(sort->sort_new[nsort_new++]);
7909 else
7911 /* This cg did not move */
7912 sort_i = &(sort->sort2[nsort2++]);
7914 /* Sort on the ns grid cell indices
7915 * and the global topology index
7917 sort_i->nsc = cell_index;
7918 sort_i->ind_gl = dd->index_gl[i];
7919 sort_i->ind = i;
7920 ncg_new++;
7923 if (debug)
7925 fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
7926 nsort2,nsort_new);
7928 /* Sort efficiently */
7929 ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
7931 else
7933 cgsort = sort->sort1;
7934 ncg_new = 0;
7935 for(i=0; i<dd->ncg_home; i++)
7937 /* Sort on the ns grid cell indices
7938 * and the global topology index
7940 cgsort[i].nsc = fr->ns.grid->cell_index[i];
7941 cgsort[i].ind_gl = dd->index_gl[i];
7942 cgsort[i].ind = i;
7943 if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
7945 ncg_new++;
7948 if (debug)
7950 fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
7952 /* Determine the order of the charge groups using qsort */
7953 qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
7955 cgsort = sort->sort1;
7957 /* We alloc with the old size, since cgindex is still old */
7958 vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
7959 vbuf = dd->comm->vbuf.v;
7961 /* Remove the charge groups which are no longer at home here */
7962 dd->ncg_home = ncg_new;
7964 /* Reorder the state */
7965 for(i=0; i<estNR; i++)
7967 if (EST_DISTR(i) && state->flags & (1<<i))
7969 switch (i)
7971 case estX:
7972 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
7973 break;
7974 case estV:
7975 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
7976 break;
7977 case estSDX:
7978 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
7979 break;
7980 case estCGP:
7981 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
7982 break;
7983 case estLD_RNG:
7984 case estLD_RNGI:
7985 case estDISRE_INITF:
7986 case estDISRE_RM3TAV:
7987 case estORIRE_INITF:
7988 case estORIRE_DTAV:
7989 /* No ordering required */
7990 break;
7991 default:
7992 gmx_incons("Unknown state entry encountered in dd_sort_state");
7993 break;
7997 /* Reorder cgcm */
7998 order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
8000 if (dd->ncg_home+1 > sort->ibuf_nalloc)
8002 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
8003 srenew(sort->ibuf,sort->ibuf_nalloc);
8005 ibuf = sort->ibuf;
8006 /* Reorder the global cg index */
8007 order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
8008 /* Reorder the cginfo */
8009 order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
8010 /* Rebuild the local cg index */
8011 ibuf[0] = 0;
8012 for(i=0; i<dd->ncg_home; i++)
8014 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
8015 ibuf[i+1] = ibuf[i] + cgsize;
8017 for(i=0; i<dd->ncg_home+1; i++)
8019 dd->cgindex[i] = ibuf[i];
8021 /* Set the home atom number */
8022 dd->nat_home = dd->cgindex[dd->ncg_home];
8024 /* Copy the sorted ns cell indices back to the ns grid struct */
8025 for(i=0; i<dd->ncg_home; i++)
8027 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
8029 fr->ns.grid->nr = dd->ncg_home;
8032 static void add_dd_statistics(gmx_domdec_t *dd)
8034 gmx_domdec_comm_t *comm;
8035 int ddnat;
8037 comm = dd->comm;
8039 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
8041 comm->sum_nat[ddnat-ddnatZONE] +=
8042 comm->nat[ddnat] - comm->nat[ddnat-1];
8044 comm->ndecomp++;
8047 void reset_dd_statistics_counters(gmx_domdec_t *dd)
8049 gmx_domdec_comm_t *comm;
8050 int ddnat;
8052 comm = dd->comm;
8054 /* Reset all the statistics and counters for total run counting */
8055 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
8057 comm->sum_nat[ddnat-ddnatZONE] = 0;
8059 comm->ndecomp = 0;
8060 comm->nload = 0;
8061 comm->load_step = 0;
8062 comm->load_sum = 0;
8063 comm->load_max = 0;
8064 clear_ivec(comm->load_lim);
8065 comm->load_mdf = 0;
8066 comm->load_pme = 0;
8069 void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
8071 gmx_domdec_comm_t *comm;
8072 int ddnat;
8073 double av;
8075 comm = cr->dd->comm;
8077 gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
8079 if (fplog == NULL)
8081 return;
8084 fprintf(fplog,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8086 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
8088 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
8089 switch(ddnat)
8091 case ddnatZONE:
8092 fprintf(fplog,
8093 " av. #atoms communicated per step for force: %d x %.1f\n",
8094 2,av);
8095 break;
8096 case ddnatVSITE:
8097 if (cr->dd->vsite_comm)
8099 fprintf(fplog,
8100 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8101 (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
8102 av);
8104 break;
8105 case ddnatCON:
8106 if (cr->dd->constraint_comm)
8108 fprintf(fplog,
8109 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
8110 1 + ir->nLincsIter,av);
8112 break;
8113 default:
8114 gmx_incons(" Unknown type for DD statistics");
8117 fprintf(fplog,"\n");
8119 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
8121 print_dd_load_av(fplog,cr->dd);
8125 void dd_partition_system(FILE *fplog,
8126 gmx_large_int_t step,
8127 t_commrec *cr,
8128 bool bMasterState,
8129 int nstglobalcomm,
8130 t_state *state_global,
8131 gmx_mtop_t *top_global,
8132 t_inputrec *ir,
8133 t_state *state_local,
8134 rvec **f,
8135 t_mdatoms *mdatoms,
8136 gmx_localtop_t *top_local,
8137 t_forcerec *fr,
8138 gmx_vsite_t *vsite,
8139 gmx_shellfc_t shellfc,
8140 gmx_constr_t constr,
8141 t_nrnb *nrnb,
8142 gmx_wallcycle_t wcycle,
8143 bool bVerbose)
8145 gmx_domdec_t *dd;
8146 gmx_domdec_comm_t *comm;
8147 gmx_ddbox_t ddbox={0};
8148 t_block *cgs_gl;
8149 gmx_large_int_t step_pcoupl;
8150 rvec cell_ns_x0,cell_ns_x1;
8151 int i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
8152 bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
8153 bool bRedist,bSortCG,bResortAll;
8154 ivec ncells_old,np;
8155 real grid_density;
8156 char sbuf[22];
8158 dd = cr->dd;
8159 comm = dd->comm;
8161 bBoxChanged = (bMasterState || DEFORM(*ir));
8162 if (ir->epc != epcNO)
8164 /* With nstcalcenery > 1 pressure coupling happens.
8165 * one step after calculating the energies.
8166 * Box scaling happens at the end of the MD step,
8167 * after the DD partitioning.
8168 * We therefore have to do DLB in the first partitioning
8169 * after an MD step where P-coupling occured.
8170 * We need to determine the last step in which p-coupling occurred.
8171 * MRS -- need to validate this for vv?
8173 n = ir->nstcalcenergy;
8174 if (n == 1)
8176 step_pcoupl = step - 1;
8178 else
8180 step_pcoupl = ((step - 1)/n)*n + 1;
8182 if (step_pcoupl >= comm->partition_step)
8184 bBoxChanged = TRUE;
8188 bNStGlobalComm = (step >= comm->partition_step + nstglobalcomm);
8190 if (!comm->bDynLoadBal)
8192 bDoDLB = FALSE;
8194 else
8196 /* Should we do dynamic load balacing this step?
8197 * Since it requires (possibly expensive) global communication,
8198 * we might want to do DLB less frequently.
8200 if (bBoxChanged || ir->epc != epcNO)
8202 bDoDLB = bBoxChanged;
8204 else
8206 bDoDLB = bNStGlobalComm;
8210 /* Check if we have recorded loads on the nodes */
8211 if (comm->bRecordLoad && dd_load_count(comm))
8213 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
8215 /* Check if we should use DLB at the second partitioning
8216 * and every 100 partitionings,
8217 * so the extra communication cost is negligible.
8219 n = max(100,nstglobalcomm);
8220 bCheckDLB = (comm->n_load_collect == 0 ||
8221 comm->n_load_have % n == n-1);
8223 else
8225 bCheckDLB = FALSE;
8228 /* Print load every nstlog, first and last step to the log file */
8229 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
8230 comm->n_load_collect == 0 ||
8231 (step + ir->nstlist > ir->init_step + ir->nsteps));
8233 /* Avoid extra communication due to verbose screen output
8234 * when nstglobalcomm is set.
8236 if (bDoDLB || bLogLoad || bCheckDLB ||
8237 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
8239 get_load_distribution(dd,wcycle);
8240 if (DDMASTER(dd))
8242 if (bLogLoad)
8244 dd_print_load(fplog,dd,step-1);
8246 if (bVerbose)
8248 dd_print_load_verbose(dd);
8251 comm->n_load_collect++;
8253 if (bCheckDLB) {
8254 /* Since the timings are node dependent, the master decides */
8255 if (DDMASTER(dd))
8257 bTurnOnDLB =
8258 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
8259 if (debug)
8261 fprintf(debug,"step %s, imb loss %f\n",
8262 gmx_step_str(step,sbuf),
8263 dd_force_imb_perf_loss(dd));
8266 dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
8267 if (bTurnOnDLB)
8269 turn_on_dlb(fplog,cr,step);
8270 bDoDLB = TRUE;
8274 comm->n_load_have++;
8277 cgs_gl = &comm->cgs_gl;
8279 bRedist = FALSE;
8280 if (bMasterState)
8282 /* Clear the old state */
8283 clear_dd_indices(dd,0,0);
8285 set_ddbox(dd,bMasterState,cr,ir,state_global->box,
8286 TRUE,cgs_gl,state_global->x,&ddbox);
8288 get_cg_distribution(fplog,step,dd,cgs_gl,
8289 state_global->box,&ddbox,state_global->x);
8291 dd_distribute_state(dd,cgs_gl,
8292 state_global,state_local,f);
8294 dd_make_local_cgs(dd,&top_local->cgs);
8296 if (dd->ncg_home > fr->cg_nalloc)
8298 dd_realloc_fr_cg(fr,dd->ncg_home);
8300 calc_cgcm(fplog,0,dd->ncg_home,
8301 &top_local->cgs,state_local->x,fr->cg_cm);
8303 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
8305 dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
8307 cg0 = 0;
8309 else if (state_local->ddp_count != dd->ddp_count)
8311 if (state_local->ddp_count > dd->ddp_count)
8313 gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
8316 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
8318 gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
8321 /* Clear the old state */
8322 clear_dd_indices(dd,0,0);
8324 /* Build the new indices */
8325 rebuild_cgindex(dd,cgs_gl->index,state_local);
8326 make_dd_indices(dd,cgs_gl->index,0);
8328 /* Redetermine the cg COMs */
8329 calc_cgcm(fplog,0,dd->ncg_home,
8330 &top_local->cgs,state_local->x,fr->cg_cm);
8332 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
8334 dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
8336 set_ddbox(dd,bMasterState,cr,ir,state_local->box,
8337 TRUE,&top_local->cgs,state_local->x,&ddbox);
8339 bRedist = comm->bDynLoadBal;
8341 else
8343 /* We have the full state, only redistribute the cgs */
8345 /* Clear the non-home indices */
8346 clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
8348 /* Avoid global communication for dim's without pbc and -gcom */
8349 if (!bNStGlobalComm)
8351 copy_rvec(comm->box0 ,ddbox.box0 );
8352 copy_rvec(comm->box_size,ddbox.box_size);
8354 set_ddbox(dd,bMasterState,cr,ir,state_local->box,
8355 bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
8357 bBoxChanged = TRUE;
8358 bRedist = TRUE;
8360 /* For dim's without pbc and -gcom */
8361 copy_rvec(ddbox.box0 ,comm->box0 );
8362 copy_rvec(ddbox.box_size,comm->box_size);
8364 set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
8365 step,wcycle);
8367 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
8369 write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
8372 /* Check if we should sort the charge groups */
8373 if (comm->nstSortCG > 0)
8375 bSortCG = (bMasterState ||
8376 (bRedist && (step % comm->nstSortCG == 0)));
8378 else
8380 bSortCG = FALSE;
8383 ncg_home_old = dd->ncg_home;
8385 if (bRedist)
8387 cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
8388 state_local,f,fr,mdatoms,
8389 !bSortCG,nrnb);
8392 get_nsgrid_boundaries(fr->ns.grid,dd,
8393 state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
8394 dd->ncg_home,fr->cg_cm,
8395 cell_ns_x0,cell_ns_x1,&grid_density);
8397 if (bBoxChanged)
8399 comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
8402 copy_ivec(fr->ns.grid->n,ncells_old);
8403 grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
8404 state_local->box,cell_ns_x0,cell_ns_x1,
8405 fr->rlistlong,grid_density);
8406 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8407 copy_ivec(ddbox.tric_dir,comm->tric_dir);
8409 if (bSortCG)
8411 /* Sort the state on charge group position.
8412 * This enables exact restarts from this step.
8413 * It also improves performance by about 15% with larger numbers
8414 * of atoms per node.
8417 /* Fill the ns grid with the home cell,
8418 * so we can sort with the indices.
8420 set_zones_ncg_home(dd);
8421 fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
8422 0,dd->ncg_home,fr->cg_cm);
8424 /* Check if we can user the old order and ns grid cell indices
8425 * of the charge groups to sort the charge groups efficiently.
8427 bResortAll = (bMasterState ||
8428 fr->ns.grid->n[XX] != ncells_old[XX] ||
8429 fr->ns.grid->n[YY] != ncells_old[YY] ||
8430 fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
8432 if (debug)
8434 fprintf(debug,"Step %s, sorting the %d home charge groups\n",
8435 gmx_step_str(step,sbuf),dd->ncg_home);
8437 dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
8438 bResortAll ? -1 : ncg_home_old);
8439 /* Rebuild all the indices */
8440 cg0 = 0;
8441 ga2la_clear(dd->ga2la);
8444 /* Setup up the communication and communicate the coordinates */
8445 setup_dd_communication(dd,state_local->box,&ddbox,fr);
8447 /* Set the indices */
8448 make_dd_indices(dd,cgs_gl->index,cg0);
8450 /* Set the charge group boundaries for neighbor searching */
8451 set_cg_boundaries(&comm->zones);
8454 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8455 -1,state_local->x,state_local->box);
8458 /* Extract a local topology from the global topology */
8459 for(i=0; i<dd->ndim; i++)
8461 np[dd->dim[i]] = comm->cd[i].np;
8463 dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
8464 comm->cellsize_min,np,
8465 fr,vsite,top_global,top_local);
8467 /* Set up the special atom communication */
8468 n = comm->nat[ddnatZONE];
8469 for(i=ddnatZONE+1; i<ddnatNR; i++)
8471 switch(i)
8473 case ddnatVSITE:
8474 if (vsite && vsite->n_intercg_vsite)
8476 n = dd_make_local_vsites(dd,n,top_local->idef.il);
8478 break;
8479 case ddnatCON:
8480 if (dd->bInterCGcons)
8482 /* Only for inter-cg constraints we need special code */
8483 n = dd_make_local_constraints(dd,n,top_global,
8484 constr,ir->nProjOrder,
8485 &top_local->idef.il[F_CONSTR]);
8487 break;
8488 default:
8489 gmx_incons("Unknown special atom type setup");
8491 comm->nat[i] = n;
8494 /* Make space for the extra coordinates for virtual site
8495 * or constraint communication.
8497 state_local->natoms = comm->nat[ddnatNR-1];
8498 if (state_local->natoms > state_local->nalloc)
8500 dd_realloc_state(state_local,f,state_local->natoms);
8503 if (fr->bF_NoVirSum)
8505 if (vsite && vsite->n_intercg_vsite)
8507 nat_f_novirsum = comm->nat[ddnatVSITE];
8509 else
8511 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
8513 nat_f_novirsum = dd->nat_tot;
8515 else
8517 nat_f_novirsum = dd->nat_home;
8521 else
8523 nat_f_novirsum = 0;
8526 /* Set the number of atoms required for the force calculation.
8527 * Forces need to be constrained when using a twin-range setup
8528 * or with energy minimization. For simple simulations we could
8529 * avoid some allocation, zeroing and copying, but this is
8530 * probably not worth the complications ande checking.
8532 forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
8533 dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
8535 /* We make the all mdatoms up to nat_tot_con.
8536 * We could save some work by only setting invmass
8537 * between nat_tot and nat_tot_con.
8539 /* This call also sets the new number of home particles to dd->nat_home */
8540 atoms2md(top_global,ir,
8541 comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
8543 /* Now we have the charges we can sort the FE interactions */
8544 dd_sort_local_top(dd,mdatoms,top_local);
8546 if (shellfc)
8548 /* Make the local shell stuff, currently no communication is done */
8549 make_local_shells(cr,mdatoms,shellfc);
8552 if (ir->implicit_solvent)
8554 make_local_gb(cr,fr->born,ir->gb_algorithm);
8557 if (!(cr->duty & DUTY_PME))
8559 /* Send the charges to our PME only node */
8560 gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
8561 mdatoms->chargeA,mdatoms->chargeB,
8562 comm->ddpme[0].maxshift,comm->ddpme[1].maxshift);
8565 if (constr)
8567 set_constraints(constr,top_local,ir,mdatoms,cr);
8570 if (ir->ePull != epullNO)
8572 /* Update the local pull groups */
8573 dd_make_local_pull_groups(dd,ir->pull,mdatoms);
8576 add_dd_statistics(dd);
8578 /* Make sure we only count the cycles for this DD partitioning */
8579 clear_dd_cycle_counts(dd);
8581 /* Because the order of the atoms might have changed since
8582 * the last vsite construction, we need to communicate the constructing
8583 * atom coordinates again (for spreading the forces this MD step).
8585 dd_move_x_vsites(dd,state_local->box,state_local->x);
8587 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
8589 dd_move_x(dd,state_local->box,state_local->x);
8590 write_dd_pdb("dd_dump",step,"dump",top_global,cr,
8591 -1,state_local->x,state_local->box);
8594 /* Store the partitioning step */
8595 comm->partition_step = step;
8597 /* Increase the DD partitioning counter */
8598 dd->ddp_count++;
8599 /* The state currently matches this DD partitioning count, store it */
8600 state_local->ddp_count = dd->ddp_count;
8601 if (bMasterState)
8603 /* The DD master node knows the complete cg distribution,
8604 * store the count so we can possibly skip the cg info communication.
8606 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
8609 if (comm->DD_debug > 0)
8611 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8612 check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
8613 "after partitioning");