Enforced rotation: code cleanup
[gromacs/adressmacs.git] / src / mdlib / domdec.c
blobabc08743e2af1b6e6b8dea8bec54e9db7a179b65
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
23 #include <stdio.h>
24 #include <time.h>
25 #include <math.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include "typedefs.h"
29 #include "smalloc.h"
30 #include "vec.h"
31 #include "domdec.h"
32 #include "domdec_network.h"
33 #include "nrnb.h"
34 #include "pbc.h"
35 #include "chargegroup.h"
36 #include "constr.h"
37 #include "mdatoms.h"
38 #include "names.h"
39 #include "pdbio.h"
40 #include "futil.h"
41 #include "force.h"
42 #include "pme.h"
43 #include "pull.h"
44 #include "pull_rotation.h"
45 #include "gmx_wallcycle.h"
46 #include "mdrun.h"
47 #include "nsgrid.h"
48 #include "shellfc.h"
49 #include "mtop_util.h"
50 #include "gmxfio.h"
51 #include "gmx_ga2la.h"
52 #include "gmx_sort.h"
54 #ifdef GMX_LIB_MPI
55 #include <mpi.h>
56 #endif
57 #ifdef GMX_THREADS
58 #include "tmpi.h"
59 #endif
61 #define DDRANK(dd,rank) (rank)
62 #define DDMASTERRANK(dd) (dd->masterrank)
64 typedef struct gmx_domdec_master
66 /* The cell boundaries */
67 real **cell_x;
68 /* The global charge group division */
69 int *ncg; /* Number of home charge groups for each node */
70 int *index; /* Index of nnodes+1 into cg */
71 int *cg; /* Global charge group index */
72 int *nat; /* Number of home atoms for each node. */
73 int *ibuf; /* Buffer for communication */
74 rvec *vbuf; /* Buffer for state scattering and gathering */
75 } gmx_domdec_master_t;
77 typedef struct
79 /* The numbers of charge groups to send and receive for each cell
80 * that requires communication, the last entry contains the total
81 * number of atoms that needs to be communicated.
83 int nsend[DD_MAXIZONE+2];
84 int nrecv[DD_MAXIZONE+2];
85 /* The charge groups to send */
86 int *index;
87 int nalloc;
88 /* The atom range for non-in-place communication */
89 int cell2at0[DD_MAXIZONE];
90 int cell2at1[DD_MAXIZONE];
91 } gmx_domdec_ind_t;
93 typedef struct
95 int np; /* Number of grid pulses in this dimension */
96 int np_dlb; /* For dlb, for use with edlbAUTO */
97 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
98 int np_nalloc;
99 gmx_bool bInPlace; /* Can we communicate in place? */
100 } gmx_domdec_comm_dim_t;
102 typedef struct
104 gmx_bool *bCellMin; /* Temp. var.: is this cell size at the limit */
105 real *cell_f; /* State var.: cell boundaries, box relative */
106 real *old_cell_f; /* Temp. var.: old cell size */
107 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
108 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
109 real *bound_min; /* Temp. var.: lower limit for cell boundary */
110 real *bound_max; /* Temp. var.: upper limit for cell boundary */
111 gmx_bool bLimited; /* State var.: is DLB limited in this dim and row */
112 real *buf_ncd; /* Temp. var. */
113 } gmx_domdec_root_t;
115 #define DD_NLOAD_MAX 9
117 /* Here floats are accurate enough, since these variables
118 * only influence the load balancing, not the actual MD results.
120 typedef struct
122 int nload;
123 float *load;
124 float sum;
125 float max;
126 float sum_m;
127 float cvol_min;
128 float mdf;
129 float pme;
130 int flags;
131 } gmx_domdec_load_t;
133 typedef struct
135 int nsc;
136 int ind_gl;
137 int ind;
138 } gmx_cgsort_t;
140 typedef struct
142 gmx_cgsort_t *sort1,*sort2;
143 int sort_nalloc;
144 gmx_cgsort_t *sort_new;
145 int sort_new_nalloc;
146 int *ibuf;
147 int ibuf_nalloc;
148 } gmx_domdec_sort_t;
150 typedef struct
152 rvec *v;
153 int nalloc;
154 } vec_rvec_t;
156 /* This enum determines the order of the coordinates.
157 * ddnatHOME and ddnatZONE should be first and second,
158 * the others can be ordered as wanted.
160 enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
162 enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
163 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
165 typedef struct
167 int dim; /* The dimension */
168 gmx_bool dim_match;/* Tells if DD and PME dims match */
169 int nslab; /* The number of PME slabs in this dimension */
170 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
171 int *pp_min; /* The minimum pp node location, size nslab */
172 int *pp_max; /* The maximum pp node location,size nslab */
173 int maxshift; /* The maximum shift for coordinate redistribution in PME */
174 } gmx_ddpme_t;
176 typedef struct
178 real min0; /* The minimum bottom of this zone */
179 real max1; /* The maximum top of this zone */
180 real mch0; /* The maximum bottom communicaton height for this zone */
181 real mch1; /* The maximum top communicaton height for this zone */
182 real p1_0; /* The bottom value of the first cell in this zone */
183 real p1_1; /* The top value of the first cell in this zone */
184 } gmx_ddzone_t;
186 typedef struct gmx_domdec_comm
188 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
189 * unless stated otherwise.
192 /* The number of decomposition dimensions for PME, 0: no PME */
193 int npmedecompdim;
194 /* The number of nodes doing PME (PP/PME or only PME) */
195 int npmenodes;
196 int npmenodes_x;
197 int npmenodes_y;
198 /* The communication setup including the PME only nodes */
199 gmx_bool bCartesianPP_PME;
200 ivec ntot;
201 int cartpmedim;
202 int *pmenodes; /* size npmenodes */
203 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
204 * but with bCartesianPP_PME */
205 gmx_ddpme_t ddpme[2];
207 /* The DD particle-particle nodes only */
208 gmx_bool bCartesianPP;
209 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
211 /* The global charge groups */
212 t_block cgs_gl;
214 /* Should we sort the cgs */
215 int nstSortCG;
216 gmx_domdec_sort_t *sort;
218 /* Are there bonded and multi-body interactions between charge groups? */
219 gmx_bool bInterCGBondeds;
220 gmx_bool bInterCGMultiBody;
222 /* Data for the optional bonded interaction atom communication range */
223 gmx_bool bBondComm;
224 t_blocka *cglink;
225 char *bLocalCG;
227 /* The DLB option */
228 int eDLB;
229 /* Are we actually using DLB? */
230 gmx_bool bDynLoadBal;
232 /* Cell sizes for static load balancing, first index cartesian */
233 real **slb_frac;
235 /* The width of the communicated boundaries */
236 real cutoff_mbody;
237 real cutoff;
238 /* The minimum cell size (including triclinic correction) */
239 rvec cellsize_min;
240 /* For dlb, for use with edlbAUTO */
241 rvec cellsize_min_dlb;
242 /* The lower limit for the DD cell size with DLB */
243 real cellsize_limit;
244 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
245 gmx_bool bVacDLBNoLimit;
247 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
248 ivec tric_dir;
249 /* box0 and box_size are required with dim's without pbc and -gcom */
250 rvec box0;
251 rvec box_size;
253 /* The cell boundaries */
254 rvec cell_x0;
255 rvec cell_x1;
257 /* The old location of the cell boundaries, to check cg displacements */
258 rvec old_cell_x0;
259 rvec old_cell_x1;
261 /* The communication setup and charge group boundaries for the zones */
262 gmx_domdec_zones_t zones;
264 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
265 * cell boundaries of neighboring cells for dynamic load balancing.
267 gmx_ddzone_t zone_d1[2];
268 gmx_ddzone_t zone_d2[2][2];
270 /* The coordinate/force communication setup and indices */
271 gmx_domdec_comm_dim_t cd[DIM];
272 /* The maximum number of cells to communicate with in one dimension */
273 int maxpulse;
275 /* Which cg distribution is stored on the master node */
276 int master_cg_ddp_count;
278 /* The number of cg's received from the direct neighbors */
279 int zone_ncg1[DD_MAXZONE];
281 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
282 int nat[ddnatNR];
284 /* Communication buffer for general use */
285 int *buf_int;
286 int nalloc_int;
288 /* Communication buffer for general use */
289 vec_rvec_t vbuf;
291 /* Communication buffers only used with multiple grid pulses */
292 int *buf_int2;
293 int nalloc_int2;
294 vec_rvec_t vbuf2;
296 /* Communication buffers for local redistribution */
297 int **cggl_flag;
298 int cggl_flag_nalloc[DIM*2];
299 rvec **cgcm_state;
300 int cgcm_state_nalloc[DIM*2];
302 /* Cell sizes for dynamic load balancing */
303 gmx_domdec_root_t **root;
304 real *cell_f_row;
305 real cell_f0[DIM];
306 real cell_f1[DIM];
307 real cell_f_max0[DIM];
308 real cell_f_min1[DIM];
310 /* Stuff for load communication */
311 gmx_bool bRecordLoad;
312 gmx_domdec_load_t *load;
313 #ifdef GMX_MPI
314 MPI_Comm *mpi_comm_load;
315 #endif
316 /* Cycle counters */
317 float cycl[ddCyclNr];
318 int cycl_n[ddCyclNr];
319 float cycl_max[ddCyclNr];
320 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
321 int eFlop;
322 double flop;
323 int flop_n;
324 /* Have often have did we have load measurements */
325 int n_load_have;
326 /* Have often have we collected the load measurements */
327 int n_load_collect;
329 /* Statistics */
330 double sum_nat[ddnatNR-ddnatZONE];
331 int ndecomp;
332 int nload;
333 double load_step;
334 double load_sum;
335 double load_max;
336 ivec load_lim;
337 double load_mdf;
338 double load_pme;
340 /* The last partition step */
341 gmx_large_int_t partition_step;
343 /* Debugging */
344 int nstDDDump;
345 int nstDDDumpGrid;
346 int DD_debug;
347 } gmx_domdec_comm_t;
349 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
350 #define DD_CGIBS 2
352 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
353 #define DD_FLAG_NRCG 65535
354 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
355 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
357 /* Zone permutation required to obtain consecutive charge groups
358 * for neighbor searching.
360 static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
362 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
363 * components see only j zones with that component 0.
366 /* The DD zone order */
367 static const ivec dd_zo[DD_MAXZONE] =
368 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
370 /* The 3D setup */
371 #define dd_z3n 8
372 #define dd_zp3n 4
373 static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
375 /* The 2D setup */
376 #define dd_z2n 4
377 #define dd_zp2n 2
378 static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
380 /* The 1D setup */
381 #define dd_z1n 2
382 #define dd_zp1n 1
383 static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
385 /* Factors used to avoid problems due to rounding issues */
386 #define DD_CELL_MARGIN 1.0001
387 #define DD_CELL_MARGIN2 1.00005
388 /* Factor to account for pressure scaling during nstlist steps */
389 #define DD_PRES_SCALE_MARGIN 1.02
391 /* Allowed performance loss before we DLB or warn */
392 #define DD_PERF_LOSS 0.05
394 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
396 /* Use separate MPI send and receive commands
397 * when nnodes <= GMX_DD_NNODES_SENDRECV.
398 * This saves memory (and some copying for small nnodes).
399 * For high parallelization scatter and gather calls are used.
401 #define GMX_DD_NNODES_SENDRECV 4
405 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
407 static void index2xyz(ivec nc,int ind,ivec xyz)
409 xyz[XX] = ind % nc[XX];
410 xyz[YY] = (ind / nc[XX]) % nc[YY];
411 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
415 /* This order is required to minimize the coordinate communication in PME
416 * which uses decomposition in the x direction.
418 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
420 static void ddindex2xyz(ivec nc,int ind,ivec xyz)
422 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
423 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
424 xyz[ZZ] = ind % nc[ZZ];
427 static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
429 int ddindex;
430 int ddnodeid=-1;
432 ddindex = dd_index(dd->nc,c);
433 if (dd->comm->bCartesianPP_PME)
435 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
437 else if (dd->comm->bCartesianPP)
439 #ifdef GMX_MPI
440 MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
441 #endif
443 else
445 ddnodeid = ddindex;
448 return ddnodeid;
451 static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
453 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
456 int ddglatnr(gmx_domdec_t *dd,int i)
458 int atnr;
460 if (dd == NULL)
462 atnr = i + 1;
464 else
466 if (i >= dd->comm->nat[ddnatNR-1])
468 gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
470 atnr = dd->gatindex[i] + 1;
473 return atnr;
476 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
478 return &dd->comm->cgs_gl;
481 static void vec_rvec_init(vec_rvec_t *v)
483 v->nalloc = 0;
484 v->v = NULL;
487 static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
489 if (n > v->nalloc)
491 v->nalloc = over_alloc_dd(n);
492 srenew(v->v,v->nalloc);
496 void dd_store_state(gmx_domdec_t *dd,t_state *state)
498 int i;
500 if (state->ddp_count != dd->ddp_count)
502 gmx_incons("The state does not the domain decomposition state");
505 state->ncg_gl = dd->ncg_home;
506 if (state->ncg_gl > state->cg_gl_nalloc)
508 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
509 srenew(state->cg_gl,state->cg_gl_nalloc);
511 for(i=0; i<state->ncg_gl; i++)
513 state->cg_gl[i] = dd->index_gl[i];
516 state->ddp_count_cg_gl = dd->ddp_count;
519 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
521 return &dd->comm->zones;
524 void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
525 int *jcg0,int *jcg1,ivec shift0,ivec shift1)
527 gmx_domdec_zones_t *zones;
528 int izone,d,dim;
530 zones = &dd->comm->zones;
532 izone = 0;
533 while (icg >= zones->izone[izone].cg1)
535 izone++;
538 if (izone == 0)
540 *jcg0 = icg;
542 else if (izone < zones->nizone)
544 *jcg0 = zones->izone[izone].jcg0;
546 else
548 gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
549 icg,izone,zones->nizone);
552 *jcg1 = zones->izone[izone].jcg1;
554 for(d=0; d<dd->ndim; d++)
556 dim = dd->dim[d];
557 shift0[dim] = zones->izone[izone].shift0[dim];
558 shift1[dim] = zones->izone[izone].shift1[dim];
559 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
561 /* A conservative approach, this can be optimized */
562 shift0[dim] -= 1;
563 shift1[dim] += 1;
568 int dd_natoms_vsite(gmx_domdec_t *dd)
570 return dd->comm->nat[ddnatVSITE];
573 void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
575 *at_start = dd->comm->nat[ddnatCON-1];
576 *at_end = dd->comm->nat[ddnatCON];
579 void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
581 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
582 int *index,*cgindex;
583 gmx_domdec_comm_t *comm;
584 gmx_domdec_comm_dim_t *cd;
585 gmx_domdec_ind_t *ind;
586 rvec shift={0,0,0},*buf,*rbuf;
587 gmx_bool bPBC,bScrew;
589 comm = dd->comm;
591 cgindex = dd->cgindex;
593 buf = comm->vbuf.v;
595 nzone = 1;
596 nat_tot = dd->nat_home;
597 for(d=0; d<dd->ndim; d++)
599 bPBC = (dd->ci[dd->dim[d]] == 0);
600 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
601 if (bPBC)
603 copy_rvec(box[dd->dim[d]],shift);
605 cd = &comm->cd[d];
606 for(p=0; p<cd->np; p++)
608 ind = &cd->ind[p];
609 index = ind->index;
610 n = 0;
611 if (!bPBC)
613 for(i=0; i<ind->nsend[nzone]; i++)
615 at0 = cgindex[index[i]];
616 at1 = cgindex[index[i]+1];
617 for(j=at0; j<at1; j++)
619 copy_rvec(x[j],buf[n]);
620 n++;
624 else if (!bScrew)
626 for(i=0; i<ind->nsend[nzone]; i++)
628 at0 = cgindex[index[i]];
629 at1 = cgindex[index[i]+1];
630 for(j=at0; j<at1; j++)
632 /* We need to shift the coordinates */
633 rvec_add(x[j],shift,buf[n]);
634 n++;
638 else
640 for(i=0; i<ind->nsend[nzone]; i++)
642 at0 = cgindex[index[i]];
643 at1 = cgindex[index[i]+1];
644 for(j=at0; j<at1; j++)
646 /* Shift x */
647 buf[n][XX] = x[j][XX] + shift[XX];
648 /* Rotate y and z.
649 * This operation requires a special shift force
650 * treatment, which is performed in calc_vir.
652 buf[n][YY] = box[YY][YY] - x[j][YY];
653 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
654 n++;
659 if (cd->bInPlace)
661 rbuf = x + nat_tot;
663 else
665 rbuf = comm->vbuf2.v;
667 /* Send and receive the coordinates */
668 dd_sendrecv_rvec(dd, d, dddirBackward,
669 buf, ind->nsend[nzone+1],
670 rbuf, ind->nrecv[nzone+1]);
671 if (!cd->bInPlace)
673 j = 0;
674 for(zone=0; zone<nzone; zone++)
676 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
678 copy_rvec(rbuf[j],x[i]);
679 j++;
683 nat_tot += ind->nrecv[nzone+1];
685 nzone += nzone;
689 void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
691 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
692 int *index,*cgindex;
693 gmx_domdec_comm_t *comm;
694 gmx_domdec_comm_dim_t *cd;
695 gmx_domdec_ind_t *ind;
696 rvec *buf,*sbuf;
697 ivec vis;
698 int is;
699 gmx_bool bPBC,bScrew;
701 comm = dd->comm;
703 cgindex = dd->cgindex;
705 buf = comm->vbuf.v;
707 n = 0;
708 nzone = comm->zones.n/2;
709 nat_tot = dd->nat_tot;
710 for(d=dd->ndim-1; d>=0; d--)
712 bPBC = (dd->ci[dd->dim[d]] == 0);
713 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
714 if (fshift == NULL && !bScrew)
716 bPBC = FALSE;
718 /* Determine which shift vector we need */
719 clear_ivec(vis);
720 vis[dd->dim[d]] = 1;
721 is = IVEC2IS(vis);
723 cd = &comm->cd[d];
724 for(p=cd->np-1; p>=0; p--) {
725 ind = &cd->ind[p];
726 nat_tot -= ind->nrecv[nzone+1];
727 if (cd->bInPlace)
729 sbuf = f + nat_tot;
731 else
733 sbuf = comm->vbuf2.v;
734 j = 0;
735 for(zone=0; zone<nzone; zone++)
737 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
739 copy_rvec(f[i],sbuf[j]);
740 j++;
744 /* Communicate the forces */
745 dd_sendrecv_rvec(dd, d, dddirForward,
746 sbuf, ind->nrecv[nzone+1],
747 buf, ind->nsend[nzone+1]);
748 index = ind->index;
749 /* Add the received forces */
750 n = 0;
751 if (!bPBC)
753 for(i=0; i<ind->nsend[nzone]; i++)
755 at0 = cgindex[index[i]];
756 at1 = cgindex[index[i]+1];
757 for(j=at0; j<at1; j++)
759 rvec_inc(f[j],buf[n]);
760 n++;
764 else if (!bScrew)
766 for(i=0; i<ind->nsend[nzone]; i++)
768 at0 = cgindex[index[i]];
769 at1 = cgindex[index[i]+1];
770 for(j=at0; j<at1; j++)
772 rvec_inc(f[j],buf[n]);
773 /* Add this force to the shift force */
774 rvec_inc(fshift[is],buf[n]);
775 n++;
779 else
781 for(i=0; i<ind->nsend[nzone]; i++)
783 at0 = cgindex[index[i]];
784 at1 = cgindex[index[i]+1];
785 for(j=at0; j<at1; j++)
787 /* Rotate the force */
788 f[j][XX] += buf[n][XX];
789 f[j][YY] -= buf[n][YY];
790 f[j][ZZ] -= buf[n][ZZ];
791 if (fshift)
793 /* Add this force to the shift force */
794 rvec_inc(fshift[is],buf[n]);
796 n++;
801 nzone /= 2;
805 void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
807 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
808 int *index,*cgindex;
809 gmx_domdec_comm_t *comm;
810 gmx_domdec_comm_dim_t *cd;
811 gmx_domdec_ind_t *ind;
812 real *buf,*rbuf;
814 comm = dd->comm;
816 cgindex = dd->cgindex;
818 buf = &comm->vbuf.v[0][0];
820 nzone = 1;
821 nat_tot = dd->nat_home;
822 for(d=0; d<dd->ndim; d++)
824 cd = &comm->cd[d];
825 for(p=0; p<cd->np; p++)
827 ind = &cd->ind[p];
828 index = ind->index;
829 n = 0;
830 for(i=0; i<ind->nsend[nzone]; i++)
832 at0 = cgindex[index[i]];
833 at1 = cgindex[index[i]+1];
834 for(j=at0; j<at1; j++)
836 buf[n] = v[j];
837 n++;
841 if (cd->bInPlace)
843 rbuf = v + nat_tot;
845 else
847 rbuf = &comm->vbuf2.v[0][0];
849 /* Send and receive the coordinates */
850 dd_sendrecv_real(dd, d, dddirBackward,
851 buf, ind->nsend[nzone+1],
852 rbuf, ind->nrecv[nzone+1]);
853 if (!cd->bInPlace)
855 j = 0;
856 for(zone=0; zone<nzone; zone++)
858 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
860 v[i] = rbuf[j];
861 j++;
865 nat_tot += ind->nrecv[nzone+1];
867 nzone += nzone;
871 void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
873 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
874 int *index,*cgindex;
875 gmx_domdec_comm_t *comm;
876 gmx_domdec_comm_dim_t *cd;
877 gmx_domdec_ind_t *ind;
878 real *buf,*sbuf;
880 comm = dd->comm;
882 cgindex = dd->cgindex;
884 buf = &comm->vbuf.v[0][0];
886 n = 0;
887 nzone = comm->zones.n/2;
888 nat_tot = dd->nat_tot;
889 for(d=dd->ndim-1; d>=0; d--)
891 cd = &comm->cd[d];
892 for(p=cd->np-1; p>=0; p--) {
893 ind = &cd->ind[p];
894 nat_tot -= ind->nrecv[nzone+1];
895 if (cd->bInPlace)
897 sbuf = v + nat_tot;
899 else
901 sbuf = &comm->vbuf2.v[0][0];
902 j = 0;
903 for(zone=0; zone<nzone; zone++)
905 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
907 sbuf[j] = v[i];
908 j++;
912 /* Communicate the forces */
913 dd_sendrecv_real(dd, d, dddirForward,
914 sbuf, ind->nrecv[nzone+1],
915 buf, ind->nsend[nzone+1]);
916 index = ind->index;
917 /* Add the received forces */
918 n = 0;
919 for(i=0; i<ind->nsend[nzone]; i++)
921 at0 = cgindex[index[i]];
922 at1 = cgindex[index[i]+1];
923 for(j=at0; j<at1; j++)
925 v[j] += buf[n];
926 n++;
930 nzone /= 2;
934 static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
936 fprintf(fp,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
937 d,i,j,
938 zone->min0,zone->max1,
939 zone->mch0,zone->mch0,
940 zone->p1_0,zone->p1_1);
943 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
944 int ddimind,int direction,
945 gmx_ddzone_t *buf_s,int n_s,
946 gmx_ddzone_t *buf_r,int n_r)
948 rvec vbuf_s[5*2],vbuf_r[5*2];
949 int i;
951 for(i=0; i<n_s; i++)
953 vbuf_s[i*2 ][0] = buf_s[i].min0;
954 vbuf_s[i*2 ][1] = buf_s[i].max1;
955 vbuf_s[i*2 ][2] = buf_s[i].mch0;
956 vbuf_s[i*2+1][0] = buf_s[i].mch1;
957 vbuf_s[i*2+1][1] = buf_s[i].p1_0;
958 vbuf_s[i*2+1][2] = buf_s[i].p1_1;
961 dd_sendrecv_rvec(dd, ddimind, direction,
962 vbuf_s, n_s*2,
963 vbuf_r, n_r*2);
965 for(i=0; i<n_r; i++)
967 buf_r[i].min0 = vbuf_r[i*2 ][0];
968 buf_r[i].max1 = vbuf_r[i*2 ][1];
969 buf_r[i].mch0 = vbuf_r[i*2 ][2];
970 buf_r[i].mch1 = vbuf_r[i*2+1][0];
971 buf_r[i].p1_0 = vbuf_r[i*2+1][1];
972 buf_r[i].p1_1 = vbuf_r[i*2+1][2];
976 static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
977 rvec cell_ns_x0,rvec cell_ns_x1)
979 int d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
980 gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
981 rvec extr_s[2],extr_r[2];
982 rvec dh;
983 real dist_d,c=0,det;
984 gmx_domdec_comm_t *comm;
985 gmx_bool bPBC,bUse;
987 comm = dd->comm;
989 for(d=1; d<dd->ndim; d++)
991 dim = dd->dim[d];
992 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
993 zp->min0 = cell_ns_x0[dim];
994 zp->max1 = cell_ns_x1[dim];
995 zp->mch0 = cell_ns_x0[dim];
996 zp->mch1 = cell_ns_x1[dim];
997 zp->p1_0 = cell_ns_x0[dim];
998 zp->p1_1 = cell_ns_x1[dim];
1001 for(d=dd->ndim-2; d>=0; d--)
1003 dim = dd->dim[d];
1004 bPBC = (dim < ddbox->npbcdim);
1006 /* Use an rvec to store two reals */
1007 extr_s[d][0] = comm->cell_f0[d+1];
1008 extr_s[d][1] = comm->cell_f1[d+1];
1009 extr_s[d][2] = 0;
1011 pos = 0;
1012 /* Store the extremes in the backward sending buffer,
1013 * so the get updated separately from the forward communication.
1015 for(d1=d; d1<dd->ndim-1; d1++)
1017 /* We invert the order to be able to use the same loop for buf_e */
1018 buf_s[pos].min0 = extr_s[d1][1];
1019 buf_s[pos].max1 = extr_s[d1][0];
1020 buf_s[pos].mch0 = 0;
1021 buf_s[pos].mch1 = 0;
1022 /* Store the cell corner of the dimension we communicate along */
1023 buf_s[pos].p1_0 = comm->cell_x0[dim];
1024 buf_s[pos].p1_1 = 0;
1025 pos++;
1028 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1029 pos++;
1031 if (dd->ndim == 3 && d == 0)
1033 buf_s[pos] = comm->zone_d2[0][1];
1034 pos++;
1035 buf_s[pos] = comm->zone_d1[0];
1036 pos++;
1039 /* We only need to communicate the extremes
1040 * in the forward direction
1042 npulse = comm->cd[d].np;
1043 if (bPBC)
1045 /* Take the minimum to avoid double communication */
1046 npulse_min = min(npulse,dd->nc[dim]-1-npulse);
1048 else
1050 /* Without PBC we should really not communicate over
1051 * the boundaries, but implementing that complicates
1052 * the communication setup and therefore we simply
1053 * do all communication, but ignore some data.
1055 npulse_min = npulse;
1057 for(p=0; p<npulse_min; p++)
1059 /* Communicate the extremes forward */
1060 bUse = (bPBC || dd->ci[dim] > 0);
1062 dd_sendrecv_rvec(dd, d, dddirForward,
1063 extr_s+d, dd->ndim-d-1,
1064 extr_r+d, dd->ndim-d-1);
1066 if (bUse)
1068 for(d1=d; d1<dd->ndim-1; d1++)
1070 extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
1071 extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
1076 buf_size = pos;
1077 for(p=0; p<npulse; p++)
1079 /* Communicate all the zone information backward */
1080 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1082 dd_sendrecv_ddzone(dd, d, dddirBackward,
1083 buf_s, buf_size,
1084 buf_r, buf_size);
1086 clear_rvec(dh);
1087 if (p > 0)
1089 for(d1=d+1; d1<dd->ndim; d1++)
1091 /* Determine the decrease of maximum required
1092 * communication height along d1 due to the distance along d,
1093 * this avoids a lot of useless atom communication.
1095 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1097 if (ddbox->tric_dir[dim])
1099 /* c is the off-diagonal coupling between the cell planes
1100 * along directions d and d1.
1102 c = ddbox->v[dim][dd->dim[d1]][dim];
1104 else
1106 c = 0;
1108 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1109 if (det > 0)
1111 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1113 else
1115 /* A negative value signals out of range */
1116 dh[d1] = -1;
1121 /* Accumulate the extremes over all pulses */
1122 for(i=0; i<buf_size; i++)
1124 if (p == 0)
1126 buf_e[i] = buf_r[i];
1128 else
1130 if (bUse)
1132 buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
1133 buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
1136 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1138 d1 = 1;
1140 else
1142 d1 = d + 1;
1144 if (bUse && dh[d1] >= 0)
1146 buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
1147 buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
1150 /* Copy the received buffer to the send buffer,
1151 * to pass the data through with the next pulse.
1153 buf_s[i] = buf_r[i];
1155 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1156 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1158 /* Store the extremes */
1159 pos = 0;
1161 for(d1=d; d1<dd->ndim-1; d1++)
1163 extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
1164 extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
1165 pos++;
1168 if (d == 1 || (d == 0 && dd->ndim == 3))
1170 for(i=d; i<2; i++)
1172 comm->zone_d2[1-d][i] = buf_e[pos];
1173 pos++;
1176 if (d == 0)
1178 comm->zone_d1[1] = buf_e[pos];
1179 pos++;
1185 if (dd->ndim >= 2)
1187 dim = dd->dim[1];
1188 for(i=0; i<2; i++)
1190 if (debug)
1192 print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
1194 cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
1195 cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
1198 if (dd->ndim >= 3)
1200 dim = dd->dim[2];
1201 for(i=0; i<2; i++)
1203 for(j=0; j<2; j++)
1205 if (debug)
1207 print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
1209 cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
1210 cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
1214 for(d=1; d<dd->ndim; d++)
1216 comm->cell_f_max0[d] = extr_s[d-1][0];
1217 comm->cell_f_min1[d] = extr_s[d-1][1];
1218 if (debug)
1220 fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
1221 d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
1226 static void dd_collect_cg(gmx_domdec_t *dd,
1227 t_state *state_local)
1229 gmx_domdec_master_t *ma=NULL;
1230 int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
1231 t_block *cgs_gl;
1233 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1235 /* The master has the correct distribution */
1236 return;
1239 if (state_local->ddp_count == dd->ddp_count)
1241 ncg_home = dd->ncg_home;
1242 cg = dd->index_gl;
1243 nat_home = dd->nat_home;
1245 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1247 cgs_gl = &dd->comm->cgs_gl;
1249 ncg_home = state_local->ncg_gl;
1250 cg = state_local->cg_gl;
1251 nat_home = 0;
1252 for(i=0; i<ncg_home; i++)
1254 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1257 else
1259 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1262 buf2[0] = dd->ncg_home;
1263 buf2[1] = dd->nat_home;
1264 if (DDMASTER(dd))
1266 ma = dd->ma;
1267 ibuf = ma->ibuf;
1269 else
1271 ibuf = NULL;
1273 /* Collect the charge group and atom counts on the master */
1274 dd_gather(dd,2*sizeof(int),buf2,ibuf);
1276 if (DDMASTER(dd))
1278 ma->index[0] = 0;
1279 for(i=0; i<dd->nnodes; i++)
1281 ma->ncg[i] = ma->ibuf[2*i];
1282 ma->nat[i] = ma->ibuf[2*i+1];
1283 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1286 /* Make byte counts and indices */
1287 for(i=0; i<dd->nnodes; i++)
1289 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1290 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1292 if (debug)
1294 fprintf(debug,"Initial charge group distribution: ");
1295 for(i=0; i<dd->nnodes; i++)
1296 fprintf(debug," %d",ma->ncg[i]);
1297 fprintf(debug,"\n");
1301 /* Collect the charge group indices on the master */
1302 dd_gatherv(dd,
1303 dd->ncg_home*sizeof(int),dd->index_gl,
1304 DDMASTER(dd) ? ma->ibuf : NULL,
1305 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1306 DDMASTER(dd) ? ma->cg : NULL);
1308 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1311 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1312 rvec *lv,rvec *v)
1314 gmx_domdec_master_t *ma;
1315 int n,i,c,a,nalloc=0;
1316 rvec *buf=NULL;
1317 t_block *cgs_gl;
1319 ma = dd->ma;
1321 if (!DDMASTER(dd))
1323 #ifdef GMX_MPI
1324 MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
1325 dd->rank,dd->mpi_comm_all);
1326 #endif
1327 } else {
1328 /* Copy the master coordinates to the global array */
1329 cgs_gl = &dd->comm->cgs_gl;
1331 n = DDMASTERRANK(dd);
1332 a = 0;
1333 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1335 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1337 copy_rvec(lv[a++],v[c]);
1341 for(n=0; n<dd->nnodes; n++)
1343 if (n != dd->rank)
1345 if (ma->nat[n] > nalloc)
1347 nalloc = over_alloc_dd(ma->nat[n]);
1348 srenew(buf,nalloc);
1350 #ifdef GMX_MPI
1351 MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
1352 n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
1353 #endif
1354 a = 0;
1355 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1357 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1359 copy_rvec(buf[a++],v[c]);
1364 sfree(buf);
1368 static void get_commbuffer_counts(gmx_domdec_t *dd,
1369 int **counts,int **disps)
1371 gmx_domdec_master_t *ma;
1372 int n;
1374 ma = dd->ma;
1376 /* Make the rvec count and displacment arrays */
1377 *counts = ma->ibuf;
1378 *disps = ma->ibuf + dd->nnodes;
1379 for(n=0; n<dd->nnodes; n++)
1381 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1382 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1386 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1387 rvec *lv,rvec *v)
1389 gmx_domdec_master_t *ma;
1390 int *rcounts=NULL,*disps=NULL;
1391 int n,i,c,a;
1392 rvec *buf=NULL;
1393 t_block *cgs_gl;
1395 ma = dd->ma;
1397 if (DDMASTER(dd))
1399 get_commbuffer_counts(dd,&rcounts,&disps);
1401 buf = ma->vbuf;
1404 dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
1406 if (DDMASTER(dd))
1408 cgs_gl = &dd->comm->cgs_gl;
1410 a = 0;
1411 for(n=0; n<dd->nnodes; n++)
1413 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1415 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1417 copy_rvec(buf[a++],v[c]);
1424 void dd_collect_vec(gmx_domdec_t *dd,
1425 t_state *state_local,rvec *lv,rvec *v)
1427 gmx_domdec_master_t *ma;
1428 int n,i,c,a,nalloc=0;
1429 rvec *buf=NULL;
1431 dd_collect_cg(dd,state_local);
1433 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1435 dd_collect_vec_sendrecv(dd,lv,v);
1437 else
1439 dd_collect_vec_gatherv(dd,lv,v);
1444 void dd_collect_state(gmx_domdec_t *dd,
1445 t_state *state_local,t_state *state)
1447 int est,i,j,nh;
1449 nh = state->nhchainlength;
1451 if (DDMASTER(dd))
1453 state->lambda = state_local->lambda;
1454 state->veta = state_local->veta;
1455 state->vol0 = state_local->vol0;
1456 copy_mat(state_local->box,state->box);
1457 copy_mat(state_local->boxv,state->boxv);
1458 copy_mat(state_local->svir_prev,state->svir_prev);
1459 copy_mat(state_local->fvir_prev,state->fvir_prev);
1460 copy_mat(state_local->pres_prev,state->pres_prev);
1463 for(i=0; i<state_local->ngtc; i++)
1465 for(j=0; j<nh; j++) {
1466 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1467 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1469 state->therm_integral[i] = state_local->therm_integral[i];
1471 for(i=0; i<state_local->nnhpres; i++)
1473 for(j=0; j<nh; j++) {
1474 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1475 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1479 for(est=0; est<estNR; est++)
1481 if (EST_DISTR(est) && state_local->flags & (1<<est))
1483 switch (est) {
1484 case estX:
1485 dd_collect_vec(dd,state_local,state_local->x,state->x);
1486 break;
1487 case estV:
1488 dd_collect_vec(dd,state_local,state_local->v,state->v);
1489 break;
1490 case estSDX:
1491 dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
1492 break;
1493 case estCGP:
1494 dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
1495 break;
1496 case estLD_RNG:
1497 if (state->nrngi == 1)
1499 if (DDMASTER(dd))
1501 for(i=0; i<state_local->nrng; i++)
1503 state->ld_rng[i] = state_local->ld_rng[i];
1507 else
1509 dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
1510 state_local->ld_rng,state->ld_rng);
1512 break;
1513 case estLD_RNGI:
1514 if (state->nrngi == 1)
1516 if (DDMASTER(dd))
1518 state->ld_rngi[0] = state_local->ld_rngi[0];
1521 else
1523 dd_gather(dd,sizeof(state->ld_rngi[0]),
1524 state_local->ld_rngi,state->ld_rngi);
1526 break;
1527 case estDISRE_INITF:
1528 case estDISRE_RM3TAV:
1529 case estORIRE_INITF:
1530 case estORIRE_DTAV:
1531 break;
1532 default:
1533 gmx_incons("Unknown state entry encountered in dd_collect_state");
1539 static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
1541 if (debug)
1543 fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
1545 fr->cg_nalloc = over_alloc_dd(nalloc);
1546 srenew(fr->cg_cm,fr->cg_nalloc);
1547 srenew(fr->cginfo,fr->cg_nalloc);
1550 static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
1552 int est;
1554 if (debug)
1556 fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
1559 state->nalloc = over_alloc_dd(nalloc);
1561 for(est=0; est<estNR; est++)
1563 if (EST_DISTR(est) && state->flags & (1<<est))
1565 switch(est) {
1566 case estX:
1567 srenew(state->x,state->nalloc);
1568 break;
1569 case estV:
1570 srenew(state->v,state->nalloc);
1571 break;
1572 case estSDX:
1573 srenew(state->sd_X,state->nalloc);
1574 break;
1575 case estCGP:
1576 srenew(state->cg_p,state->nalloc);
1577 break;
1578 case estLD_RNG:
1579 case estLD_RNGI:
1580 case estDISRE_INITF:
1581 case estDISRE_RM3TAV:
1582 case estORIRE_INITF:
1583 case estORIRE_DTAV:
1584 /* No reallocation required */
1585 break;
1586 default:
1587 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1592 if (f != NULL)
1594 srenew(*f,state->nalloc);
1598 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
1599 rvec *v,rvec *lv)
1601 gmx_domdec_master_t *ma;
1602 int n,i,c,a,nalloc=0;
1603 rvec *buf=NULL;
1605 if (DDMASTER(dd))
1607 ma = dd->ma;
1609 for(n=0; n<dd->nnodes; n++)
1611 if (n != dd->rank)
1613 if (ma->nat[n] > nalloc)
1615 nalloc = over_alloc_dd(ma->nat[n]);
1616 srenew(buf,nalloc);
1618 /* Use lv as a temporary buffer */
1619 a = 0;
1620 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1622 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1624 copy_rvec(v[c],buf[a++]);
1627 if (a != ma->nat[n])
1629 gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
1630 a,ma->nat[n]);
1633 #ifdef GMX_MPI
1634 MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
1635 DDRANK(dd,n),n,dd->mpi_comm_all);
1636 #endif
1639 sfree(buf);
1640 n = DDMASTERRANK(dd);
1641 a = 0;
1642 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1644 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1646 copy_rvec(v[c],lv[a++]);
1650 else
1652 #ifdef GMX_MPI
1653 MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
1654 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
1655 #endif
1659 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
1660 rvec *v,rvec *lv)
1662 gmx_domdec_master_t *ma;
1663 int *scounts=NULL,*disps=NULL;
1664 int n,i,c,a,nalloc=0;
1665 rvec *buf=NULL;
1667 if (DDMASTER(dd))
1669 ma = dd->ma;
1671 get_commbuffer_counts(dd,&scounts,&disps);
1673 buf = ma->vbuf;
1674 a = 0;
1675 for(n=0; n<dd->nnodes; n++)
1677 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1679 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1681 copy_rvec(v[c],buf[a++]);
1687 dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
1690 static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
1692 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1694 dd_distribute_vec_sendrecv(dd,cgs,v,lv);
1696 else
1698 dd_distribute_vec_scatterv(dd,cgs,v,lv);
1702 static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
1703 t_state *state,t_state *state_local,
1704 rvec **f)
1706 int i,j,ngtch,ngtcp,nh;
1708 nh = state->nhchainlength;
1710 if (DDMASTER(dd))
1712 state_local->lambda = state->lambda;
1713 state_local->veta = state->veta;
1714 state_local->vol0 = state->vol0;
1715 copy_mat(state->box,state_local->box);
1716 copy_mat(state->box_rel,state_local->box_rel);
1717 copy_mat(state->boxv,state_local->boxv);
1718 copy_mat(state->svir_prev,state_local->svir_prev);
1719 copy_mat(state->fvir_prev,state_local->fvir_prev);
1720 for(i=0; i<state_local->ngtc; i++)
1722 for(j=0; j<nh; j++) {
1723 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1724 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1726 state_local->therm_integral[i] = state->therm_integral[i];
1728 for(i=0; i<state_local->nnhpres; i++)
1730 for(j=0; j<nh; j++) {
1731 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1732 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1736 dd_bcast(dd,sizeof(real),&state_local->lambda);
1737 dd_bcast(dd,sizeof(real),&state_local->veta);
1738 dd_bcast(dd,sizeof(real),&state_local->vol0);
1739 dd_bcast(dd,sizeof(state_local->box),state_local->box);
1740 dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
1741 dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
1742 dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
1743 dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
1744 dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
1745 dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
1746 dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
1747 dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
1748 dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
1750 if (dd->nat_home > state_local->nalloc)
1752 dd_realloc_state(state_local,f,dd->nat_home);
1754 for(i=0; i<estNR; i++)
1756 if (EST_DISTR(i) && state_local->flags & (1<<i))
1758 switch (i) {
1759 case estX:
1760 dd_distribute_vec(dd,cgs,state->x,state_local->x);
1761 break;
1762 case estV:
1763 dd_distribute_vec(dd,cgs,state->v,state_local->v);
1764 break;
1765 case estSDX:
1766 dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
1767 break;
1768 case estCGP:
1769 dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
1770 break;
1771 case estLD_RNG:
1772 if (state->nrngi == 1)
1774 dd_bcastc(dd,
1775 state_local->nrng*sizeof(state_local->ld_rng[0]),
1776 state->ld_rng,state_local->ld_rng);
1778 else
1780 dd_scatter(dd,
1781 state_local->nrng*sizeof(state_local->ld_rng[0]),
1782 state->ld_rng,state_local->ld_rng);
1784 break;
1785 case estLD_RNGI:
1786 if (state->nrngi == 1)
1788 dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
1789 state->ld_rngi,state_local->ld_rngi);
1791 else
1793 dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
1794 state->ld_rngi,state_local->ld_rngi);
1796 break;
1797 case estDISRE_INITF:
1798 case estDISRE_RM3TAV:
1799 case estORIRE_INITF:
1800 case estORIRE_DTAV:
1801 /* Not implemented yet */
1802 break;
1803 default:
1804 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1810 static char dim2char(int dim)
1812 char c='?';
1814 switch (dim)
1816 case XX: c = 'X'; break;
1817 case YY: c = 'Y'; break;
1818 case ZZ: c = 'Z'; break;
1819 default: gmx_fatal(FARGS,"Unknown dim %d",dim);
1822 return c;
1825 static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
1826 gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
1828 rvec grid_s[2],*grid_r=NULL,cx,r;
1829 char fname[STRLEN],format[STRLEN],buf[22];
1830 FILE *out;
1831 int a,i,d,z,y,x;
1832 matrix tric;
1833 real vol;
1835 copy_rvec(dd->comm->cell_x0,grid_s[0]);
1836 copy_rvec(dd->comm->cell_x1,grid_s[1]);
1838 if (DDMASTER(dd))
1840 snew(grid_r,2*dd->nnodes);
1843 dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
1845 if (DDMASTER(dd))
1847 for(d=0; d<DIM; d++)
1849 for(i=0; i<DIM; i++)
1851 if (d == i)
1853 tric[d][i] = 1;
1855 else
1857 if (dd->nc[d] > 1 && d < ddbox->npbcdim)
1859 tric[d][i] = box[i][d]/box[i][i];
1861 else
1863 tric[d][i] = 0;
1868 sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
1869 sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
1870 out = gmx_fio_fopen(fname,"w");
1871 gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
1872 a = 1;
1873 for(i=0; i<dd->nnodes; i++)
1875 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1876 for(d=0; d<DIM; d++)
1878 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1880 for(z=0; z<2; z++)
1882 for(y=0; y<2; y++)
1884 for(x=0; x<2; x++)
1886 cx[XX] = grid_r[i*2+x][XX];
1887 cx[YY] = grid_r[i*2+y][YY];
1888 cx[ZZ] = grid_r[i*2+z][ZZ];
1889 mvmul(tric,cx,r);
1890 fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
1891 10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
1895 for(d=0; d<DIM; d++)
1897 for(x=0; x<4; x++)
1899 switch(d)
1901 case 0: y = 1 + i*8 + 2*x; break;
1902 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1903 case 2: y = 1 + i*8 + x; break;
1905 fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
1909 gmx_fio_fclose(out);
1910 sfree(grid_r);
1914 void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
1915 gmx_mtop_t *mtop,t_commrec *cr,
1916 int natoms,rvec x[],matrix box)
1918 char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
1919 FILE *out;
1920 int i,ii,resnr,c;
1921 char *atomname,*resname;
1922 real b;
1923 gmx_domdec_t *dd;
1925 dd = cr->dd;
1926 if (natoms == -1)
1928 natoms = dd->comm->nat[ddnatVSITE];
1931 sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
1933 sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
1934 sprintf(format4,"%s%s\n",pdbformat4,"%6.2f%6.2f");
1936 out = gmx_fio_fopen(fname,"w");
1938 fprintf(out,"TITLE %s\n",title);
1939 gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
1940 for(i=0; i<natoms; i++)
1942 ii = dd->gatindex[i];
1943 gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
1944 if (i < dd->comm->nat[ddnatZONE])
1946 c = 0;
1947 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
1949 c++;
1951 b = c;
1953 else if (i < dd->comm->nat[ddnatVSITE])
1955 b = dd->comm->zones.n;
1957 else
1959 b = dd->comm->zones.n + 1;
1961 fprintf(out,strlen(atomname)<4 ? format : format4,
1962 "ATOM",(ii+1)%100000,
1963 atomname,resname,' ',resnr%10000,' ',
1964 10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
1966 fprintf(out,"TER\n");
1968 gmx_fio_fclose(out);
1971 real dd_cutoff_mbody(gmx_domdec_t *dd)
1973 gmx_domdec_comm_t *comm;
1974 int di;
1975 real r;
1977 comm = dd->comm;
1979 r = -1;
1980 if (comm->bInterCGBondeds)
1982 if (comm->cutoff_mbody > 0)
1984 r = comm->cutoff_mbody;
1986 else
1988 /* cutoff_mbody=0 means we do not have DLB */
1989 r = comm->cellsize_min[dd->dim[0]];
1990 for(di=1; di<dd->ndim; di++)
1992 r = min(r,comm->cellsize_min[dd->dim[di]]);
1994 if (comm->bBondComm)
1996 r = max(r,comm->cutoff_mbody);
1998 else
2000 r = min(r,comm->cutoff);
2005 return r;
2008 real dd_cutoff_twobody(gmx_domdec_t *dd)
2010 real r_mb;
2012 r_mb = dd_cutoff_mbody(dd);
2014 return max(dd->comm->cutoff,r_mb);
2018 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
2020 int nc,ntot;
2022 nc = dd->nc[dd->comm->cartpmedim];
2023 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2024 copy_ivec(coord,coord_pme);
2025 coord_pme[dd->comm->cartpmedim] =
2026 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2029 static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
2031 /* Here we assign a PME node to communicate with this DD node
2032 * by assuming that the major index of both is x.
2033 * We add cr->npmenodes/2 to obtain an even distribution.
2035 return (ddindex*npme + npme/2)/ndd;
2038 static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
2040 return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
2043 static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
2045 return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
2048 static int *dd_pmenodes(t_commrec *cr)
2050 int *pmenodes;
2051 int n,i,p0,p1;
2053 snew(pmenodes,cr->npmenodes);
2054 n = 0;
2055 for(i=0; i<cr->dd->nnodes; i++) {
2056 p0 = cr_ddindex2pmeindex(cr,i);
2057 p1 = cr_ddindex2pmeindex(cr,i+1);
2058 if (i+1 == cr->dd->nnodes || p1 > p0) {
2059 if (debug)
2060 fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
2061 pmenodes[n] = i + 1 + n;
2062 n++;
2066 return pmenodes;
2069 static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
2071 gmx_domdec_t *dd;
2072 ivec coords,coords_pme,nc;
2073 int slab;
2075 dd = cr->dd;
2077 if (dd->comm->bCartesian) {
2078 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2079 dd_coords2pmecoords(dd,coords,coords_pme);
2080 copy_ivec(dd->ntot,nc);
2081 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2082 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2084 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2085 } else {
2086 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2089 coords[XX] = x;
2090 coords[YY] = y;
2091 coords[ZZ] = z;
2092 slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
2094 return slab;
2097 static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
2099 gmx_domdec_comm_t *comm;
2100 ivec coords;
2101 int ddindex,nodeid=-1;
2103 comm = cr->dd->comm;
2105 coords[XX] = x;
2106 coords[YY] = y;
2107 coords[ZZ] = z;
2108 if (comm->bCartesianPP_PME)
2110 #ifdef GMX_MPI
2111 MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
2112 #endif
2114 else
2116 ddindex = dd_index(cr->dd->nc,coords);
2117 if (comm->bCartesianPP)
2119 nodeid = comm->ddindex2simnodeid[ddindex];
2121 else
2123 if (comm->pmenodes)
2125 nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
2127 else
2129 nodeid = ddindex;
2134 return nodeid;
2137 static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
2139 gmx_domdec_t *dd;
2140 gmx_domdec_comm_t *comm;
2141 ivec coord,coord_pme;
2142 int i;
2143 int pmenode=-1;
2145 dd = cr->dd;
2146 comm = dd->comm;
2148 /* This assumes a uniform x domain decomposition grid cell size */
2149 if (comm->bCartesianPP_PME)
2151 #ifdef GMX_MPI
2152 MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
2153 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2155 /* This is a PP node */
2156 dd_cart_coord2pmecoord(dd,coord,coord_pme);
2157 MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
2159 #endif
2161 else if (comm->bCartesianPP)
2163 if (sim_nodeid < dd->nnodes)
2165 pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
2168 else
2170 /* This assumes DD cells with identical x coordinates
2171 * are numbered sequentially.
2173 if (dd->comm->pmenodes == NULL)
2175 if (sim_nodeid < dd->nnodes)
2177 /* The DD index equals the nodeid */
2178 pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
2181 else
2183 i = 0;
2184 while (sim_nodeid > dd->comm->pmenodes[i])
2186 i++;
2188 if (sim_nodeid < dd->comm->pmenodes[i])
2190 pmenode = dd->comm->pmenodes[i];
2195 return pmenode;
2198 gmx_bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
2200 gmx_bool bPMEOnlyNode;
2202 if (DOMAINDECOMP(cr))
2204 bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
2206 else
2208 bPMEOnlyNode = FALSE;
2211 return bPMEOnlyNode;
2214 void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
2215 int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
2217 gmx_domdec_t *dd;
2218 int x,y,z;
2219 ivec coord,coord_pme;
2221 dd = cr->dd;
2223 snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2225 *nmy_ddnodes = 0;
2226 for(x=0; x<dd->nc[XX]; x++)
2228 for(y=0; y<dd->nc[YY]; y++)
2230 for(z=0; z<dd->nc[ZZ]; z++)
2232 if (dd->comm->bCartesianPP_PME)
2234 coord[XX] = x;
2235 coord[YY] = y;
2236 coord[ZZ] = z;
2237 dd_cart_coord2pmecoord(dd,coord,coord_pme);
2238 if (dd->ci[XX] == coord_pme[XX] &&
2239 dd->ci[YY] == coord_pme[YY] &&
2240 dd->ci[ZZ] == coord_pme[ZZ])
2241 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
2243 else
2245 /* The slab corresponds to the nodeid in the PME group */
2246 if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
2248 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
2255 /* The last PP-only node is the peer node */
2256 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2258 if (debug)
2260 fprintf(debug,"Receive coordinates from PP nodes:");
2261 for(x=0; x<*nmy_ddnodes; x++)
2263 fprintf(debug," %d",(*my_ddnodes)[x]);
2265 fprintf(debug,"\n");
2269 static gmx_bool receive_vir_ener(t_commrec *cr)
2271 gmx_domdec_comm_t *comm;
2272 int pmenode,coords[DIM],rank;
2273 gmx_bool bReceive;
2275 bReceive = TRUE;
2276 if (cr->npmenodes < cr->dd->nnodes)
2278 comm = cr->dd->comm;
2279 if (comm->bCartesianPP_PME)
2281 pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
2282 #ifdef GMX_MPI
2283 MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
2284 coords[comm->cartpmedim]++;
2285 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2287 MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
2288 if (dd_simnode2pmenode(cr,rank) == pmenode)
2290 /* This is not the last PP node for pmenode */
2291 bReceive = FALSE;
2294 #endif
2296 else
2298 pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
2299 if (cr->sim_nodeid+1 < cr->nnodes &&
2300 dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
2302 /* This is not the last PP node for pmenode */
2303 bReceive = FALSE;
2308 return bReceive;
2311 static void set_zones_ncg_home(gmx_domdec_t *dd)
2313 gmx_domdec_zones_t *zones;
2314 int i;
2316 zones = &dd->comm->zones;
2318 zones->cg_range[0] = 0;
2319 for(i=1; i<zones->n+1; i++)
2321 zones->cg_range[i] = dd->ncg_home;
2325 static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
2327 int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
2329 ind = state->cg_gl;
2330 dd_cg_gl = dd->index_gl;
2331 cgindex = dd->cgindex;
2332 nat = 0;
2333 cgindex[0] = nat;
2334 for(i=0; i<state->ncg_gl; i++)
2336 cgindex[i] = nat;
2337 cg_gl = ind[i];
2338 dd_cg_gl[i] = cg_gl;
2339 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2341 cgindex[i] = nat;
2343 dd->ncg_home = state->ncg_gl;
2344 dd->nat_home = nat;
2346 set_zones_ncg_home(dd);
2349 static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
2351 while (cg >= cginfo_mb->cg_end)
2353 cginfo_mb++;
2356 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2359 static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
2360 t_forcerec *fr,char *bLocalCG)
2362 cginfo_mb_t *cginfo_mb;
2363 int *cginfo;
2364 int cg;
2366 if (fr != NULL)
2368 cginfo_mb = fr->cginfo_mb;
2369 cginfo = fr->cginfo;
2371 for(cg=cg0; cg<cg1; cg++)
2373 cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
2377 if (bLocalCG != NULL)
2379 for(cg=cg0; cg<cg1; cg++)
2381 bLocalCG[index_gl[cg]] = TRUE;
2386 static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
2388 int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
2389 int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
2390 gmx_ga2la_t *ga2la;
2391 char *bLocalCG;
2393 bLocalCG = dd->comm->bLocalCG;
2395 if (dd->nat_tot > dd->gatindex_nalloc)
2397 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2398 srenew(dd->gatindex,dd->gatindex_nalloc);
2401 nzone = dd->comm->zones.n;
2402 zone2cg = dd->comm->zones.cg_range;
2403 zone_ncg1 = dd->comm->zone_ncg1;
2404 index_gl = dd->index_gl;
2405 gatindex = dd->gatindex;
2407 if (zone2cg[1] != dd->ncg_home)
2409 gmx_incons("dd->ncg_zone is not up to date");
2412 /* Make the local to global and global to local atom index */
2413 a = dd->cgindex[cg_start];
2414 for(zone=0; zone<nzone; zone++)
2416 if (zone == 0)
2418 cg0 = cg_start;
2420 else
2422 cg0 = zone2cg[zone];
2424 for(cg=cg0; cg<zone2cg[zone+1]; cg++)
2426 zone1 = zone;
2427 if (cg - cg0 >= zone_ncg1[zone])
2429 /* Signal that this cg is from more than one zone away */
2430 zone1 += nzone;
2432 cg_gl = index_gl[cg];
2433 for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
2435 gatindex[a] = a_gl;
2436 ga2la_set(dd->ga2la,a_gl,a,zone1);
2437 a++;
2443 static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
2444 const char *where)
2446 int ncg,i,ngl,nerr;
2448 nerr = 0;
2449 if (bLocalCG == NULL)
2451 return nerr;
2453 for(i=0; i<dd->ncg_tot; i++)
2455 if (!bLocalCG[dd->index_gl[i]])
2457 fprintf(stderr,
2458 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
2459 nerr++;
2462 ngl = 0;
2463 for(i=0; i<ncg_sys; i++)
2465 if (bLocalCG[i])
2467 ngl++;
2470 if (ngl != dd->ncg_tot)
2472 fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
2473 nerr++;
2476 return nerr;
2479 static void check_index_consistency(gmx_domdec_t *dd,
2480 int natoms_sys,int ncg_sys,
2481 const char *where)
2483 int nerr,ngl,i,a,cell;
2484 int *have;
2486 nerr = 0;
2488 if (dd->comm->DD_debug > 1)
2490 snew(have,natoms_sys);
2491 for(a=0; a<dd->nat_tot; a++)
2493 if (have[dd->gatindex[a]] > 0)
2495 fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
2497 else
2499 have[dd->gatindex[a]] = a + 1;
2502 sfree(have);
2505 snew(have,dd->nat_tot);
2507 ngl = 0;
2508 for(i=0; i<natoms_sys; i++)
2510 if (ga2la_get(dd->ga2la,i,&a,&cell))
2512 if (a >= dd->nat_tot)
2514 fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
2515 nerr++;
2517 else
2519 have[a] = 1;
2520 if (dd->gatindex[a] != i)
2522 fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
2523 nerr++;
2526 ngl++;
2529 if (ngl != dd->nat_tot)
2531 fprintf(stderr,
2532 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2533 dd->rank,where,ngl,dd->nat_tot);
2535 for(a=0; a<dd->nat_tot; a++)
2537 if (have[a] == 0)
2539 fprintf(stderr,
2540 "DD node %d, %s: local atom %d, global %d has no global index\n",
2541 dd->rank,where,a+1,dd->gatindex[a]+1);
2544 sfree(have);
2546 nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
2548 if (nerr > 0) {
2549 gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
2550 dd->rank,where,nerr);
2554 static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
2556 int i;
2557 char *bLocalCG;
2559 if (a_start == 0)
2561 /* Clear the whole list without searching */
2562 ga2la_clear(dd->ga2la);
2564 else
2566 for(i=a_start; i<dd->nat_tot; i++)
2568 ga2la_del(dd->ga2la,dd->gatindex[i]);
2572 bLocalCG = dd->comm->bLocalCG;
2573 if (bLocalCG)
2575 for(i=cg_start; i<dd->ncg_tot; i++)
2577 bLocalCG[dd->index_gl[i]] = FALSE;
2581 dd_clear_local_vsite_indices(dd);
2583 if (dd->constraints)
2585 dd_clear_local_constraint_indices(dd);
2589 static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
2591 real grid_jump_limit;
2593 /* The distance between the boundaries of cells at distance
2594 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2595 * and by the fact that cells should not be shifted by more than
2596 * half their size, such that cg's only shift by one cell
2597 * at redecomposition.
2599 grid_jump_limit = comm->cellsize_limit;
2600 if (!comm->bVacDLBNoLimit)
2602 grid_jump_limit = max(grid_jump_limit,
2603 comm->cutoff/comm->cd[dim_ind].np);
2606 return grid_jump_limit;
2609 static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
2611 gmx_domdec_comm_t *comm;
2612 int d,dim;
2613 real limit,bfac;
2615 comm = dd->comm;
2617 for(d=1; d<dd->ndim; d++)
2619 dim = dd->dim[d];
2620 limit = grid_jump_limit(comm,d);
2621 bfac = ddbox->box_size[dim];
2622 if (ddbox->tric_dir[dim])
2624 bfac *= ddbox->skew_fac[dim];
2626 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2627 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2629 char buf[22];
2630 gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2631 gmx_step_str(step,buf),
2632 dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
2637 static int dd_load_count(gmx_domdec_comm_t *comm)
2639 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2642 static float dd_force_load(gmx_domdec_comm_t *comm)
2644 float load;
2646 if (comm->eFlop)
2648 load = comm->flop;
2649 if (comm->eFlop > 1)
2651 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2654 else
2656 load = comm->cycl[ddCyclF];
2657 if (comm->cycl_n[ddCyclF] > 1)
2659 /* Subtract the maximum of the last n cycle counts
2660 * to get rid of possible high counts due to other soures,
2661 * for instance system activity, that would otherwise
2662 * affect the dynamic load balancing.
2664 load -= comm->cycl_max[ddCyclF];
2668 return load;
2671 static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
2673 gmx_domdec_comm_t *comm;
2674 int i;
2676 comm = dd->comm;
2678 snew(*dim_f,dd->nc[dim]+1);
2679 (*dim_f)[0] = 0;
2680 for(i=1; i<dd->nc[dim]; i++)
2682 if (comm->slb_frac[dim])
2684 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
2686 else
2688 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
2691 (*dim_f)[dd->nc[dim]] = 1;
2694 static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,int dimind)
2696 int pmeindex,slab,nso,i;
2697 ivec xyz;
2699 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
2701 ddpme->dim = YY;
2703 else
2705 ddpme->dim = dimind;
2707 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
2709 ddpme->nslab = (ddpme->dim == 0 ?
2710 dd->comm->npmenodes_x :
2711 dd->comm->npmenodes_y);
2713 if (ddpme->nslab <= 1)
2715 return;
2718 nso = dd->comm->npmenodes/ddpme->nslab;
2719 /* Determine for each PME slab the PP location range for dimension dim */
2720 snew(ddpme->pp_min,ddpme->nslab);
2721 snew(ddpme->pp_max,ddpme->nslab);
2722 for(slab=0; slab<ddpme->nslab; slab++) {
2723 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2724 ddpme->pp_max[slab] = 0;
2726 for(i=0; i<dd->nnodes; i++) {
2727 ddindex2xyz(dd->nc,i,xyz);
2728 /* For y only use our y/z slab.
2729 * This assumes that the PME x grid size matches the DD grid size.
2731 if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
2732 pmeindex = ddindex2pmeindex(dd,i);
2733 if (dimind == 0) {
2734 slab = pmeindex/nso;
2735 } else {
2736 slab = pmeindex % ddpme->nslab;
2738 ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
2739 ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
2743 set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
2746 int dd_pme_maxshift_x(gmx_domdec_t *dd)
2748 if (dd->comm->ddpme[0].dim == XX)
2750 return dd->comm->ddpme[0].maxshift;
2752 else
2754 return 0;
2758 int dd_pme_maxshift_y(gmx_domdec_t *dd)
2760 if (dd->comm->ddpme[0].dim == YY)
2762 return dd->comm->ddpme[0].maxshift;
2764 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2766 return dd->comm->ddpme[1].maxshift;
2768 else
2770 return 0;
2774 static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
2775 gmx_bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
2777 gmx_domdec_comm_t *comm;
2778 int nc,ns,s;
2779 int *xmin,*xmax;
2780 real range,pme_boundary;
2781 int sh;
2783 comm = dd->comm;
2784 nc = dd->nc[ddpme->dim];
2785 ns = ddpme->nslab;
2787 if (!ddpme->dim_match)
2789 /* PP decomposition is not along dim: the worst situation */
2790 sh = ns/2;
2792 else if (ns <= 3 || (bUniform && ns == nc))
2794 /* The optimal situation */
2795 sh = 1;
2797 else
2799 /* We need to check for all pme nodes which nodes they
2800 * could possibly need to communicate with.
2802 xmin = ddpme->pp_min;
2803 xmax = ddpme->pp_max;
2804 /* Allow for atoms to be maximally 2/3 times the cut-off
2805 * out of their DD cell. This is a reasonable balance between
2806 * between performance and support for most charge-group/cut-off
2807 * combinations.
2809 range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
2810 /* Avoid extra communication when we are exactly at a boundary */
2811 range *= 0.999;
2813 sh = 1;
2814 for(s=0; s<ns; s++)
2816 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2817 pme_boundary = (real)s/ns;
2818 while (sh+1 < ns &&
2819 ((s-(sh+1) >= 0 &&
2820 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
2821 (s-(sh+1) < 0 &&
2822 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
2824 sh++;
2826 pme_boundary = (real)(s+1)/ns;
2827 while (sh+1 < ns &&
2828 ((s+(sh+1) < ns &&
2829 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
2830 (s+(sh+1) >= ns &&
2831 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
2833 sh++;
2838 ddpme->maxshift = sh;
2840 if (debug)
2842 fprintf(debug,"PME slab communication range for dim %d is %d\n",
2843 ddpme->dim,ddpme->maxshift);
2847 static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
2849 int d,dim;
2851 for(d=0; d<dd->ndim; d++)
2853 dim = dd->dim[d];
2854 if (dim < ddbox->nboundeddim &&
2855 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
2856 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
2858 gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2859 dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
2860 dd->nc[dim],dd->comm->cellsize_limit);
2865 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
2866 gmx_bool bMaster,ivec npulse)
2868 gmx_domdec_comm_t *comm;
2869 int d,j;
2870 rvec cellsize_min;
2871 real *cell_x,cell_dx,cellsize;
2873 comm = dd->comm;
2875 for(d=0; d<DIM; d++)
2877 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
2878 npulse[d] = 1;
2879 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
2881 /* Uniform grid */
2882 cell_dx = ddbox->box_size[d]/dd->nc[d];
2883 if (bMaster)
2885 for(j=0; j<dd->nc[d]+1; j++)
2887 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
2890 else
2892 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
2893 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
2895 cellsize = cell_dx*ddbox->skew_fac[d];
2896 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
2898 npulse[d]++;
2900 cellsize_min[d] = cellsize;
2902 else
2904 /* Statically load balanced grid */
2905 /* Also when we are not doing a master distribution we determine
2906 * all cell borders in a loop to obtain identical values
2907 * to the master distribution case and to determine npulse.
2909 if (bMaster)
2911 cell_x = dd->ma->cell_x[d];
2913 else
2915 snew(cell_x,dd->nc[d]+1);
2917 cell_x[0] = ddbox->box0[d];
2918 for(j=0; j<dd->nc[d]; j++)
2920 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
2921 cell_x[j+1] = cell_x[j] + cell_dx;
2922 cellsize = cell_dx*ddbox->skew_fac[d];
2923 while (cellsize*npulse[d] < comm->cutoff &&
2924 npulse[d] < dd->nc[d]-1)
2926 npulse[d]++;
2928 cellsize_min[d] = min(cellsize_min[d],cellsize);
2930 if (!bMaster)
2932 comm->cell_x0[d] = cell_x[dd->ci[d]];
2933 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
2934 sfree(cell_x);
2937 /* The following limitation is to avoid that a cell would receive
2938 * some of its own home charge groups back over the periodic boundary.
2939 * Double charge groups cause trouble with the global indices.
2941 if (d < ddbox->npbcdim &&
2942 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
2944 gmx_fatal_collective(FARGS,NULL,dd,
2945 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2946 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
2947 comm->cutoff,
2948 dd->nc[d],dd->nc[d],
2949 dd->nnodes > dd->nc[d] ? "cells" : "processors");
2953 if (!comm->bDynLoadBal)
2955 copy_rvec(cellsize_min,comm->cellsize_min);
2958 for(d=0; d<comm->npmedecompdim; d++)
2960 set_pme_maxshift(dd,&comm->ddpme[d],
2961 comm->slb_frac[dd->dim[d]]==NULL,ddbox,
2962 comm->ddpme[d].slb_dim_f);
2967 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
2968 int d,int dim,gmx_domdec_root_t *root,
2969 gmx_ddbox_t *ddbox,
2970 gmx_bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
2972 gmx_domdec_comm_t *comm;
2973 int ncd,i,j,nmin,nmin_old;
2974 gmx_bool bLimLo,bLimHi;
2975 real *cell_size;
2976 real fac,halfway,cellsize_limit_f_i,region_size;
2977 gmx_bool bPBC,bLastHi=FALSE;
2978 int nrange[]={range[0],range[1]};
2980 region_size= root->cell_f[range[1]]-root->cell_f[range[0]];
2982 comm = dd->comm;
2984 ncd = dd->nc[dim];
2986 bPBC = (dim < ddbox->npbcdim);
2988 cell_size = root->buf_ncd;
2990 if (debug)
2992 fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
2995 /* First we need to check if the scaling does not make cells
2996 * smaller than the smallest allowed size.
2997 * We need to do this iteratively, since if a cell is too small,
2998 * it needs to be enlarged, which makes all the other cells smaller,
2999 * which could in turn make another cell smaller than allowed.
3001 for(i=range[0]; i<range[1]; i++)
3003 root->bCellMin[i] = FALSE;
3005 nmin = 0;
3008 nmin_old = nmin;
3009 /* We need the total for normalization */
3010 fac = 0;
3011 for(i=range[0]; i<range[1]; i++)
3013 if (root->bCellMin[i] == FALSE)
3015 fac += cell_size[i];
3018 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
3019 /* Determine the cell boundaries */
3020 for(i=range[0]; i<range[1]; i++)
3022 if (root->bCellMin[i] == FALSE)
3024 cell_size[i] *= fac;
3025 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3027 cellsize_limit_f_i = 0;
3029 else
3031 cellsize_limit_f_i = cellsize_limit_f;
3033 if (cell_size[i] < cellsize_limit_f_i)
3035 root->bCellMin[i] = TRUE;
3036 cell_size[i] = cellsize_limit_f_i;
3037 nmin++;
3040 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3043 while (nmin > nmin_old);
3045 i=range[1]-1;
3046 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3047 /* For this check we should not use DD_CELL_MARGIN,
3048 * but a slightly smaller factor,
3049 * since rounding could get use below the limit.
3051 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3053 char buf[22];
3054 gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3055 gmx_step_str(step,buf),
3056 dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
3057 ncd,comm->cellsize_min[dim]);
3060 root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
3062 if (!bUniform)
3064 /* Check if the boundary did not displace more than halfway
3065 * each of the cells it bounds, as this could cause problems,
3066 * especially when the differences between cell sizes are large.
3067 * If changes are applied, they will not make cells smaller
3068 * than the cut-off, as we check all the boundaries which
3069 * might be affected by a change and if the old state was ok,
3070 * the cells will at most be shrunk back to their old size.
3072 for(i=range[0]+1; i<range[1]; i++)
3074 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3075 if (root->cell_f[i] < halfway)
3077 root->cell_f[i] = halfway;
3078 /* Check if the change also causes shifts of the next boundaries */
3079 for(j=i+1; j<range[1]; j++)
3081 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3082 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3085 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3086 if (root->cell_f[i] > halfway)
3088 root->cell_f[i] = halfway;
3089 /* Check if the change also causes shifts of the next boundaries */
3090 for(j=i-1; j>=range[0]+1; j--)
3092 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3093 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3099 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3100 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3101 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3102 * for a and b nrange is used */
3103 if (d > 0)
3105 /* Take care of the staggering of the cell boundaries */
3106 if (bUniform)
3108 for(i=range[0]; i<range[1]; i++)
3110 root->cell_f_max0[i] = root->cell_f[i];
3111 root->cell_f_min1[i] = root->cell_f[i+1];
3114 else
3116 for(i=range[0]+1; i<range[1]; i++)
3118 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3119 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3120 if (bLimLo && bLimHi)
3122 /* Both limits violated, try the best we can */
3123 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3124 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3125 nrange[0]=range[0];
3126 nrange[1]=i;
3127 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3129 nrange[0]=i;
3130 nrange[1]=range[1];
3131 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3133 return;
3135 else if (bLimLo)
3137 /* root->cell_f[i] = root->bound_min[i]; */
3138 nrange[1]=i; /* only store violation location. There could be a LimLo violation following with an higher index */
3139 bLastHi=FALSE;
3141 else if (bLimHi && !bLastHi)
3143 bLastHi=TRUE;
3144 if (nrange[1] < range[1]) /* found a LimLo before */
3146 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3147 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3148 nrange[0]=nrange[1];
3150 root->cell_f[i] = root->bound_max[i];
3151 nrange[1]=i;
3152 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3153 nrange[0]=i;
3154 nrange[1]=range[1];
3157 if (nrange[1] < range[1]) /* found last a LimLo */
3159 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3160 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3161 nrange[0]=nrange[1];
3162 nrange[1]=range[1];
3163 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3165 else if (nrange[0] > range[0]) /* found at least one LimHi */
3167 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3174 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3175 int d,int dim,gmx_domdec_root_t *root,
3176 gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
3177 gmx_bool bUniform,gmx_large_int_t step)
3179 gmx_domdec_comm_t *comm;
3180 int ncd,d1,i,j,pos;
3181 real *cell_size;
3182 real load_aver,load_i,imbalance,change,change_max,sc;
3183 real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
3184 real change_limit = 0.1;
3185 real relax = 0.5;
3186 gmx_bool bPBC;
3187 int range[] = { 0, 0 };
3189 comm = dd->comm;
3191 ncd = dd->nc[dim];
3193 bPBC = (dim < ddbox->npbcdim);
3195 cell_size = root->buf_ncd;
3197 /* Store the original boundaries */
3198 for(i=0; i<ncd+1; i++)
3200 root->old_cell_f[i] = root->cell_f[i];
3202 if (bUniform) {
3203 for(i=0; i<ncd; i++)
3205 cell_size[i] = 1.0/ncd;
3208 else if (dd_load_count(comm))
3210 load_aver = comm->load[d].sum_m/ncd;
3211 change_max = 0;
3212 for(i=0; i<ncd; i++)
3214 /* Determine the relative imbalance of cell i */
3215 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3216 imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
3217 /* Determine the change of the cell size using underrelaxation */
3218 change = -relax*imbalance;
3219 change_max = max(change_max,max(change,-change));
3221 /* Limit the amount of scaling.
3222 * We need to use the same rescaling for all cells in one row,
3223 * otherwise the load balancing might not converge.
3225 sc = relax;
3226 if (change_max > change_limit)
3228 sc *= change_limit/change_max;
3230 for(i=0; i<ncd; i++)
3232 /* Determine the relative imbalance of cell i */
3233 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3234 imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
3235 /* Determine the change of the cell size using underrelaxation */
3236 change = -sc*imbalance;
3237 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3241 cellsize_limit_f = comm->cellsize_min[dim]/ddbox->box_size[dim];
3242 cellsize_limit_f *= DD_CELL_MARGIN;
3243 dist_min_f_hard = grid_jump_limit(comm,d)/ddbox->box_size[dim];
3244 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3245 if (ddbox->tric_dir[dim])
3247 cellsize_limit_f /= ddbox->skew_fac[dim];
3248 dist_min_f /= ddbox->skew_fac[dim];
3250 if (bDynamicBox && d > 0)
3252 dist_min_f *= DD_PRES_SCALE_MARGIN;
3254 if (d > 0 && !bUniform)
3256 /* Make sure that the grid is not shifted too much */
3257 for(i=1; i<ncd; i++) {
3258 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3260 gmx_incons("Inconsistent DD boundary staggering limits!");
3262 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3263 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3264 if (space > 0) {
3265 root->bound_min[i] += 0.5*space;
3267 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3268 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3269 if (space < 0) {
3270 root->bound_max[i] += 0.5*space;
3272 if (debug)
3274 fprintf(debug,
3275 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3276 d,i,
3277 root->cell_f_max0[i-1] + dist_min_f,
3278 root->bound_min[i],root->cell_f[i],root->bound_max[i],
3279 root->cell_f_min1[i] - dist_min_f);
3283 range[1]=ncd;
3284 root->cell_f[0] = 0;
3285 root->cell_f[ncd] = 1;
3286 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3289 /* After the checks above, the cells should obey the cut-off
3290 * restrictions, but it does not hurt to check.
3292 for(i=0; i<ncd; i++)
3294 if (debug)
3296 fprintf(debug,"Relative bounds dim %d cell %d: %f %f\n",
3297 dim,i,root->cell_f[i],root->cell_f[i+1]);
3300 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3301 root->cell_f[i+1] - root->cell_f[i] <
3302 cellsize_limit_f/DD_CELL_MARGIN)
3304 char buf[22];
3305 fprintf(stderr,
3306 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3307 gmx_step_str(step,buf),dim2char(dim),i,
3308 (root->cell_f[i+1] - root->cell_f[i])
3309 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3313 pos = ncd + 1;
3314 /* Store the cell boundaries of the lower dimensions at the end */
3315 for(d1=0; d1<d; d1++)
3317 root->cell_f[pos++] = comm->cell_f0[d1];
3318 root->cell_f[pos++] = comm->cell_f1[d1];
3321 if (d < comm->npmedecompdim)
3323 /* The master determines the maximum shift for
3324 * the coordinate communication between separate PME nodes.
3326 set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
3328 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3329 if (d >= 1)
3331 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3335 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3336 gmx_ddbox_t *ddbox,int dimind)
3338 gmx_domdec_comm_t *comm;
3339 int dim;
3341 comm = dd->comm;
3343 /* Set the cell dimensions */
3344 dim = dd->dim[dimind];
3345 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3346 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3347 if (dim >= ddbox->nboundeddim)
3349 comm->cell_x0[dim] += ddbox->box0[dim];
3350 comm->cell_x1[dim] += ddbox->box0[dim];
3354 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3355 int d,int dim,real *cell_f_row,
3356 gmx_ddbox_t *ddbox)
3358 gmx_domdec_comm_t *comm;
3359 int d1,dim1,pos;
3361 comm = dd->comm;
3363 #ifdef GMX_MPI
3364 /* Each node would only need to know two fractions,
3365 * but it is probably cheaper to broadcast the whole array.
3367 MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
3368 0,comm->mpi_comm_load[d]);
3369 #endif
3370 /* Copy the fractions for this dimension from the buffer */
3371 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3372 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3373 /* The whole array was communicated, so set the buffer position */
3374 pos = dd->nc[dim] + 1;
3375 for(d1=0; d1<=d; d1++)
3377 if (d1 < d)
3379 /* Copy the cell fractions of the lower dimensions */
3380 comm->cell_f0[d1] = cell_f_row[pos++];
3381 comm->cell_f1[d1] = cell_f_row[pos++];
3383 relative_to_absolute_cell_bounds(dd,ddbox,d1);
3385 /* Convert the communicated shift from float to int */
3386 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3387 if (d >= 1)
3389 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3393 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3394 gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
3395 gmx_bool bUniform,gmx_large_int_t step)
3397 gmx_domdec_comm_t *comm;
3398 int d,dim,d1;
3399 gmx_bool bRowMember,bRowRoot;
3400 real *cell_f_row;
3402 comm = dd->comm;
3404 for(d=0; d<dd->ndim; d++)
3406 dim = dd->dim[d];
3407 bRowMember = TRUE;
3408 bRowRoot = TRUE;
3409 for(d1=d; d1<dd->ndim; d1++)
3411 if (dd->ci[dd->dim[d1]] > 0)
3413 if (d1 > d)
3415 bRowMember = FALSE;
3417 bRowRoot = FALSE;
3420 if (bRowMember)
3422 if (bRowRoot)
3424 set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
3425 ddbox,bDynamicBox,bUniform,step);
3426 cell_f_row = comm->root[d]->cell_f;
3428 else
3430 cell_f_row = comm->cell_f_row;
3432 distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
3437 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
3439 int d;
3441 /* This function assumes the box is static and should therefore
3442 * not be called when the box has changed since the last
3443 * call to dd_partition_system.
3445 for(d=0; d<dd->ndim; d++)
3447 relative_to_absolute_cell_bounds(dd,ddbox,d);
3453 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3454 gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
3455 gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
3456 gmx_wallcycle_t wcycle)
3458 gmx_domdec_comm_t *comm;
3459 int dim;
3461 comm = dd->comm;
3463 if (bDoDLB)
3465 wallcycle_start(wcycle,ewcDDCOMMBOUND);
3466 set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
3467 wallcycle_stop(wcycle,ewcDDCOMMBOUND);
3469 else if (bDynamicBox)
3471 set_dd_cell_sizes_dlb_nochange(dd,ddbox);
3474 /* Set the dimensions for which no DD is used */
3475 for(dim=0; dim<DIM; dim++) {
3476 if (dd->nc[dim] == 1) {
3477 comm->cell_x0[dim] = 0;
3478 comm->cell_x1[dim] = ddbox->box_size[dim];
3479 if (dim >= ddbox->nboundeddim)
3481 comm->cell_x0[dim] += ddbox->box0[dim];
3482 comm->cell_x1[dim] += ddbox->box0[dim];
3488 static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
3490 int d,np,i;
3491 gmx_domdec_comm_dim_t *cd;
3493 for(d=0; d<dd->ndim; d++)
3495 cd = &dd->comm->cd[d];
3496 np = npulse[dd->dim[d]];
3497 if (np > cd->np_nalloc)
3499 if (debug)
3501 fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
3502 dim2char(dd->dim[d]),np);
3504 if (DDMASTER(dd) && cd->np_nalloc > 0)
3506 fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
3508 srenew(cd->ind,np);
3509 for(i=cd->np_nalloc; i<np; i++)
3511 cd->ind[i].index = NULL;
3512 cd->ind[i].nalloc = 0;
3514 cd->np_nalloc = np;
3516 cd->np = np;
3521 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3522 gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
3523 gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
3524 gmx_wallcycle_t wcycle)
3526 gmx_domdec_comm_t *comm;
3527 int d;
3528 ivec npulse;
3530 comm = dd->comm;
3532 /* Copy the old cell boundaries for the cg displacement check */
3533 copy_rvec(comm->cell_x0,comm->old_cell_x0);
3534 copy_rvec(comm->cell_x1,comm->old_cell_x1);
3536 if (comm->bDynLoadBal)
3538 if (DDMASTER(dd))
3540 check_box_size(dd,ddbox);
3542 set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
3544 else
3546 set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
3547 realloc_comm_ind(dd,npulse);
3550 if (debug)
3552 for(d=0; d<DIM; d++)
3554 fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
3555 d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
3560 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3561 gmx_ddbox_t *ddbox,
3562 rvec cell_ns_x0,rvec cell_ns_x1,
3563 gmx_large_int_t step)
3565 gmx_domdec_comm_t *comm;
3566 int dim_ind,dim;
3568 comm = dd->comm;
3570 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
3572 dim = dd->dim[dim_ind];
3574 /* Without PBC we don't have restrictions on the outer cells */
3575 if (!(dim >= ddbox->npbcdim &&
3576 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3577 comm->bDynLoadBal &&
3578 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3579 comm->cellsize_min[dim])
3581 char buf[22];
3582 gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3583 gmx_step_str(step,buf),dim2char(dim),
3584 comm->cell_x1[dim] - comm->cell_x0[dim],
3585 ddbox->skew_fac[dim],
3586 dd->comm->cellsize_min[dim],
3587 dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
3591 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3593 /* Communicate the boundaries and update cell_ns_x0/1 */
3594 dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
3595 if (dd->bGridJump && dd->ndim > 1)
3597 check_grid_jump(step,dd,ddbox);
3602 static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
3604 if (YY < npbcdim)
3606 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3608 else
3610 tcm[YY][XX] = 0;
3612 if (ZZ < npbcdim)
3614 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3615 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3617 else
3619 tcm[ZZ][XX] = 0;
3620 tcm[ZZ][YY] = 0;
3624 static void check_screw_box(matrix box)
3626 /* Mathematical limitation */
3627 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3629 gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3632 /* Limitation due to the asymmetry of the eighth shell method */
3633 if (box[ZZ][YY] != 0)
3635 gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
3639 static void distribute_cg(FILE *fplog,gmx_large_int_t step,
3640 matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
3641 gmx_domdec_t *dd)
3643 gmx_domdec_master_t *ma;
3644 int **tmp_ind=NULL,*tmp_nalloc=NULL;
3645 int i,icg,j,k,k0,k1,d,npbcdim;
3646 matrix tcm;
3647 rvec box_size,cg_cm;
3648 ivec ind;
3649 real nrcg,inv_ncg,pos_d;
3650 atom_id *cgindex;
3651 gmx_bool bUnbounded,bScrew;
3653 ma = dd->ma;
3655 if (tmp_ind == NULL)
3657 snew(tmp_nalloc,dd->nnodes);
3658 snew(tmp_ind,dd->nnodes);
3659 for(i=0; i<dd->nnodes; i++)
3661 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3662 snew(tmp_ind[i],tmp_nalloc[i]);
3666 /* Clear the count */
3667 for(i=0; i<dd->nnodes; i++)
3669 ma->ncg[i] = 0;
3670 ma->nat[i] = 0;
3673 make_tric_corr_matrix(dd->npbcdim,box,tcm);
3675 cgindex = cgs->index;
3677 /* Compute the center of geometry for all charge groups */
3678 for(icg=0; icg<cgs->nr; icg++)
3680 k0 = cgindex[icg];
3681 k1 = cgindex[icg+1];
3682 nrcg = k1 - k0;
3683 if (nrcg == 1)
3685 copy_rvec(pos[k0],cg_cm);
3687 else
3689 inv_ncg = 1.0/nrcg;
3691 clear_rvec(cg_cm);
3692 for(k=k0; (k<k1); k++)
3694 rvec_inc(cg_cm,pos[k]);
3696 for(d=0; (d<DIM); d++)
3698 cg_cm[d] *= inv_ncg;
3701 /* Put the charge group in the box and determine the cell index */
3702 for(d=DIM-1; d>=0; d--) {
3703 pos_d = cg_cm[d];
3704 if (d < dd->npbcdim)
3706 bScrew = (dd->bScrewPBC && d == XX);
3707 if (tric_dir[d] && dd->nc[d] > 1)
3709 /* Use triclinic coordintates for this dimension */
3710 for(j=d+1; j<DIM; j++)
3712 pos_d += cg_cm[j]*tcm[j][d];
3715 while(pos_d >= box[d][d])
3717 pos_d -= box[d][d];
3718 rvec_dec(cg_cm,box[d]);
3719 if (bScrew)
3721 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3722 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3724 for(k=k0; (k<k1); k++)
3726 rvec_dec(pos[k],box[d]);
3727 if (bScrew)
3729 pos[k][YY] = box[YY][YY] - pos[k][YY];
3730 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3734 while(pos_d < 0)
3736 pos_d += box[d][d];
3737 rvec_inc(cg_cm,box[d]);
3738 if (bScrew)
3740 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3741 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3743 for(k=k0; (k<k1); k++)
3745 rvec_inc(pos[k],box[d]);
3746 if (bScrew) {
3747 pos[k][YY] = box[YY][YY] - pos[k][YY];
3748 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3753 /* This could be done more efficiently */
3754 ind[d] = 0;
3755 while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3757 ind[d]++;
3760 i = dd_index(dd->nc,ind);
3761 if (ma->ncg[i] == tmp_nalloc[i])
3763 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3764 srenew(tmp_ind[i],tmp_nalloc[i]);
3766 tmp_ind[i][ma->ncg[i]] = icg;
3767 ma->ncg[i]++;
3768 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
3771 k1 = 0;
3772 for(i=0; i<dd->nnodes; i++)
3774 ma->index[i] = k1;
3775 for(k=0; k<ma->ncg[i]; k++)
3777 ma->cg[k1++] = tmp_ind[i][k];
3780 ma->index[dd->nnodes] = k1;
3782 for(i=0; i<dd->nnodes; i++)
3784 sfree(tmp_ind[i]);
3786 sfree(tmp_ind);
3787 sfree(tmp_nalloc);
3789 if (fplog)
3791 char buf[22];
3792 fprintf(fplog,"Charge group distribution at step %s:",
3793 gmx_step_str(step,buf));
3794 for(i=0; i<dd->nnodes; i++)
3796 fprintf(fplog," %d",ma->ncg[i]);
3798 fprintf(fplog,"\n");
3802 static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
3803 t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
3804 rvec pos[])
3806 gmx_domdec_master_t *ma=NULL;
3807 ivec npulse;
3808 int i,cg_gl;
3809 int *ibuf,buf2[2] = { 0, 0 };
3811 if (DDMASTER(dd))
3813 ma = dd->ma;
3815 if (dd->bScrewPBC)
3817 check_screw_box(box);
3820 set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
3822 distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
3823 for(i=0; i<dd->nnodes; i++)
3825 ma->ibuf[2*i] = ma->ncg[i];
3826 ma->ibuf[2*i+1] = ma->nat[i];
3828 ibuf = ma->ibuf;
3830 else
3832 ibuf = NULL;
3834 dd_scatter(dd,2*sizeof(int),ibuf,buf2);
3836 dd->ncg_home = buf2[0];
3837 dd->nat_home = buf2[1];
3838 dd->ncg_tot = dd->ncg_home;
3839 dd->nat_tot = dd->nat_home;
3840 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
3842 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
3843 srenew(dd->index_gl,dd->cg_nalloc);
3844 srenew(dd->cgindex,dd->cg_nalloc+1);
3846 if (DDMASTER(dd))
3848 for(i=0; i<dd->nnodes; i++)
3850 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
3851 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
3855 dd_scatterv(dd,
3856 DDMASTER(dd) ? ma->ibuf : NULL,
3857 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
3858 DDMASTER(dd) ? ma->cg : NULL,
3859 dd->ncg_home*sizeof(int),dd->index_gl);
3861 /* Determine the home charge group sizes */
3862 dd->cgindex[0] = 0;
3863 for(i=0; i<dd->ncg_home; i++)
3865 cg_gl = dd->index_gl[i];
3866 dd->cgindex[i+1] =
3867 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
3870 if (debug)
3872 fprintf(debug,"Home charge groups:\n");
3873 for(i=0; i<dd->ncg_home; i++)
3875 fprintf(debug," %d",dd->index_gl[i]);
3876 if (i % 10 == 9)
3877 fprintf(debug,"\n");
3879 fprintf(debug,"\n");
3883 static int compact_and_copy_vec_at(int ncg,int *move,
3884 int *cgindex,
3885 int nvec,int vec,
3886 rvec *src,gmx_domdec_comm_t *comm,
3887 gmx_bool bCompact)
3889 int m,icg,i,i0,i1,nrcg;
3890 int home_pos;
3891 int pos_vec[DIM*2];
3893 home_pos = 0;
3895 for(m=0; m<DIM*2; m++)
3897 pos_vec[m] = 0;
3900 i0 = 0;
3901 for(icg=0; icg<ncg; icg++)
3903 i1 = cgindex[icg+1];
3904 m = move[icg];
3905 if (m == -1)
3907 if (bCompact)
3909 /* Compact the home array in place */
3910 for(i=i0; i<i1; i++)
3912 copy_rvec(src[i],src[home_pos++]);
3916 else
3918 /* Copy to the communication buffer */
3919 nrcg = i1 - i0;
3920 pos_vec[m] += 1 + vec*nrcg;
3921 for(i=i0; i<i1; i++)
3923 copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
3925 pos_vec[m] += (nvec - vec - 1)*nrcg;
3927 if (!bCompact)
3929 home_pos += i1 - i0;
3931 i0 = i1;
3934 return home_pos;
3937 static int compact_and_copy_vec_cg(int ncg,int *move,
3938 int *cgindex,
3939 int nvec,rvec *src,gmx_domdec_comm_t *comm,
3940 gmx_bool bCompact)
3942 int m,icg,i0,i1,nrcg;
3943 int home_pos;
3944 int pos_vec[DIM*2];
3946 home_pos = 0;
3948 for(m=0; m<DIM*2; m++)
3950 pos_vec[m] = 0;
3953 i0 = 0;
3954 for(icg=0; icg<ncg; icg++)
3956 i1 = cgindex[icg+1];
3957 m = move[icg];
3958 if (m == -1)
3960 if (bCompact)
3962 /* Compact the home array in place */
3963 copy_rvec(src[icg],src[home_pos++]);
3966 else
3968 nrcg = i1 - i0;
3969 /* Copy to the communication buffer */
3970 copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
3971 pos_vec[m] += 1 + nrcg*nvec;
3973 i0 = i1;
3975 if (!bCompact)
3977 home_pos = ncg;
3980 return home_pos;
3983 static int compact_ind(int ncg,int *move,
3984 int *index_gl,int *cgindex,
3985 int *gatindex,
3986 gmx_ga2la_t ga2la,char *bLocalCG,
3987 int *cginfo)
3989 int cg,nat,a0,a1,a,a_gl;
3990 int home_pos;
3992 home_pos = 0;
3993 nat = 0;
3994 for(cg=0; cg<ncg; cg++)
3996 a0 = cgindex[cg];
3997 a1 = cgindex[cg+1];
3998 if (move[cg] == -1)
4000 /* Compact the home arrays in place.
4001 * Anything that can be done here avoids access to global arrays.
4003 cgindex[home_pos] = nat;
4004 for(a=a0; a<a1; a++)
4006 a_gl = gatindex[a];
4007 gatindex[nat] = a_gl;
4008 /* The cell number stays 0, so we don't need to set it */
4009 ga2la_change_la(ga2la,a_gl,nat);
4010 nat++;
4012 index_gl[home_pos] = index_gl[cg];
4013 cginfo[home_pos] = cginfo[cg];
4014 /* The charge group remains local, so bLocalCG does not change */
4015 home_pos++;
4017 else
4019 /* Clear the global indices */
4020 for(a=a0; a<a1; a++)
4022 ga2la_del(ga2la,gatindex[a]);
4024 if (bLocalCG)
4026 bLocalCG[index_gl[cg]] = FALSE;
4030 cgindex[home_pos] = nat;
4032 return home_pos;
4035 static void clear_and_mark_ind(int ncg,int *move,
4036 int *index_gl,int *cgindex,int *gatindex,
4037 gmx_ga2la_t ga2la,char *bLocalCG,
4038 int *cell_index)
4040 int cg,a0,a1,a;
4042 for(cg=0; cg<ncg; cg++)
4044 if (move[cg] >= 0)
4046 a0 = cgindex[cg];
4047 a1 = cgindex[cg+1];
4048 /* Clear the global indices */
4049 for(a=a0; a<a1; a++)
4051 ga2la_del(ga2la,gatindex[a]);
4053 if (bLocalCG)
4055 bLocalCG[index_gl[cg]] = FALSE;
4057 /* Signal that this cg has moved using the ns cell index.
4058 * Here we set it to -1.
4059 * fill_grid will change it from -1 to 4*grid->ncells.
4061 cell_index[cg] = -1;
4066 static void print_cg_move(FILE *fplog,
4067 gmx_domdec_t *dd,
4068 gmx_large_int_t step,int cg,int dim,int dir,
4069 gmx_bool bHaveLimitdAndCMOld,real limitd,
4070 rvec cm_old,rvec cm_new,real pos_d)
4072 gmx_domdec_comm_t *comm;
4073 char buf[22];
4075 comm = dd->comm;
4077 fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
4078 if (bHaveLimitdAndCMOld)
4080 fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4081 ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
4083 else
4085 fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4086 ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
4088 fprintf(fplog,"distance out of cell %f\n",
4089 dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4090 if (bHaveLimitdAndCMOld)
4092 fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
4093 cm_old[XX],cm_old[YY],cm_old[ZZ]);
4095 fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
4096 cm_new[XX],cm_new[YY],cm_new[ZZ]);
4097 fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4098 dim2char(dim),
4099 comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
4100 fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4101 dim2char(dim),
4102 comm->cell_x0[dim],comm->cell_x1[dim]);
4105 static void cg_move_error(FILE *fplog,
4106 gmx_domdec_t *dd,
4107 gmx_large_int_t step,int cg,int dim,int dir,
4108 gmx_bool bHaveLimitdAndCMOld,real limitd,
4109 rvec cm_old,rvec cm_new,real pos_d)
4111 if (fplog)
4113 print_cg_move(fplog, dd,step,cg,dim,dir,
4114 bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
4116 print_cg_move(stderr,dd,step,cg,dim,dir,
4117 bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
4118 gmx_fatal(FARGS,
4119 "A charge group moved too far between two domain decomposition steps\n"
4120 "This usually means that your system is not well equilibrated");
4123 static void rotate_state_atom(t_state *state,int a)
4125 int est;
4127 for(est=0; est<estNR; est++)
4129 if (EST_DISTR(est) && state->flags & (1<<est)) {
4130 switch (est) {
4131 case estX:
4132 /* Rotate the complete state; for a rectangular box only */
4133 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4134 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4135 break;
4136 case estV:
4137 state->v[a][YY] = -state->v[a][YY];
4138 state->v[a][ZZ] = -state->v[a][ZZ];
4139 break;
4140 case estSDX:
4141 state->sd_X[a][YY] = -state->sd_X[a][YY];
4142 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4143 break;
4144 case estCGP:
4145 state->cg_p[a][YY] = -state->cg_p[a][YY];
4146 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4147 break;
4148 case estDISRE_INITF:
4149 case estDISRE_RM3TAV:
4150 case estORIRE_INITF:
4151 case estORIRE_DTAV:
4152 /* These are distances, so not affected by rotation */
4153 break;
4154 default:
4155 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4161 static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
4162 gmx_domdec_t *dd,ivec tric_dir,
4163 t_state *state,rvec **f,
4164 t_forcerec *fr,t_mdatoms *md,
4165 gmx_bool bCompact,
4166 t_nrnb *nrnb)
4168 int *move;
4169 int npbcdim;
4170 int ncg[DIM*2],nat[DIM*2];
4171 int c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
4172 int mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
4173 int sbuf[2],rbuf[2];
4174 int home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
4175 int flag;
4176 gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
4177 gmx_bool bScrew;
4178 ivec dev;
4179 real inv_ncg,pos_d;
4180 matrix tcm;
4181 rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
4182 atom_id *cgindex;
4183 cginfo_mb_t *cginfo_mb;
4184 gmx_domdec_comm_t *comm;
4186 if (dd->bScrewPBC)
4188 check_screw_box(state->box);
4191 comm = dd->comm;
4192 cg_cm = fr->cg_cm;
4194 for(i=0; i<estNR; i++)
4196 if (EST_DISTR(i))
4198 switch (i)
4200 case estX: /* Always present */ break;
4201 case estV: bV = (state->flags & (1<<i)); break;
4202 case estSDX: bSDX = (state->flags & (1<<i)); break;
4203 case estCGP: bCGP = (state->flags & (1<<i)); break;
4204 case estLD_RNG:
4205 case estLD_RNGI:
4206 case estDISRE_INITF:
4207 case estDISRE_RM3TAV:
4208 case estORIRE_INITF:
4209 case estORIRE_DTAV:
4210 /* No processing required */
4211 break;
4212 default:
4213 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4218 if (dd->ncg_tot > comm->nalloc_int)
4220 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4221 srenew(comm->buf_int,comm->nalloc_int);
4223 move = comm->buf_int;
4225 /* Clear the count */
4226 for(c=0; c<dd->ndim*2; c++)
4228 ncg[c] = 0;
4229 nat[c] = 0;
4232 npbcdim = dd->npbcdim;
4234 for(d=0; (d<DIM); d++)
4236 limitd[d] = dd->comm->cellsize_min[d];
4237 if (d >= npbcdim && dd->ci[d] == 0)
4239 cell_x0[d] = -GMX_FLOAT_MAX;
4241 else
4243 cell_x0[d] = comm->cell_x0[d];
4245 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4247 cell_x1[d] = GMX_FLOAT_MAX;
4249 else
4251 cell_x1[d] = comm->cell_x1[d];
4253 if (d < npbcdim)
4255 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4256 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4258 else
4260 /* We check after communication if a charge group moved
4261 * more than one cell. Set the pre-comm check limit to float_max.
4263 limit0[d] = -GMX_FLOAT_MAX;
4264 limit1[d] = GMX_FLOAT_MAX;
4268 make_tric_corr_matrix(npbcdim,state->box,tcm);
4270 cgindex = dd->cgindex;
4272 /* Compute the center of geometry for all home charge groups
4273 * and put them in the box and determine where they should go.
4275 for(cg=0; cg<dd->ncg_home; cg++)
4277 k0 = cgindex[cg];
4278 k1 = cgindex[cg+1];
4279 nrcg = k1 - k0;
4280 if (nrcg == 1)
4282 copy_rvec(state->x[k0],cm_new);
4284 else
4286 inv_ncg = 1.0/nrcg;
4288 clear_rvec(cm_new);
4289 for(k=k0; (k<k1); k++)
4291 rvec_inc(cm_new,state->x[k]);
4293 for(d=0; (d<DIM); d++)
4295 cm_new[d] = inv_ncg*cm_new[d];
4299 clear_ivec(dev);
4300 /* Do pbc and check DD cell boundary crossings */
4301 for(d=DIM-1; d>=0; d--)
4303 if (dd->nc[d] > 1)
4305 bScrew = (dd->bScrewPBC && d == XX);
4306 /* Determine the location of this cg in lattice coordinates */
4307 pos_d = cm_new[d];
4308 if (tric_dir[d])
4310 for(d2=d+1; d2<DIM; d2++)
4312 pos_d += cm_new[d2]*tcm[d2][d];
4315 /* Put the charge group in the triclinic unit-cell */
4316 if (pos_d >= cell_x1[d])
4318 if (pos_d >= limit1[d])
4320 cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
4321 cg_cm[cg],cm_new,pos_d);
4323 dev[d] = 1;
4324 if (dd->ci[d] == dd->nc[d] - 1)
4326 rvec_dec(cm_new,state->box[d]);
4327 if (bScrew)
4329 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4330 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4332 for(k=k0; (k<k1); k++)
4334 rvec_dec(state->x[k],state->box[d]);
4335 if (bScrew)
4337 rotate_state_atom(state,k);
4342 else if (pos_d < cell_x0[d])
4344 if (pos_d < limit0[d])
4346 cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
4347 cg_cm[cg],cm_new,pos_d);
4349 dev[d] = -1;
4350 if (dd->ci[d] == 0)
4352 rvec_inc(cm_new,state->box[d]);
4353 if (bScrew)
4355 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4356 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4358 for(k=k0; (k<k1); k++)
4360 rvec_inc(state->x[k],state->box[d]);
4361 if (bScrew)
4363 rotate_state_atom(state,k);
4369 else if (d < npbcdim)
4371 /* Put the charge group in the rectangular unit-cell */
4372 while (cm_new[d] >= state->box[d][d])
4374 rvec_dec(cm_new,state->box[d]);
4375 for(k=k0; (k<k1); k++)
4377 rvec_dec(state->x[k],state->box[d]);
4380 while (cm_new[d] < 0)
4382 rvec_inc(cm_new,state->box[d]);
4383 for(k=k0; (k<k1); k++)
4385 rvec_inc(state->x[k],state->box[d]);
4391 copy_rvec(cm_new,cg_cm[cg]);
4393 /* Determine where this cg should go */
4394 flag = 0;
4395 mc = -1;
4396 for(d=0; d<dd->ndim; d++)
4398 dim = dd->dim[d];
4399 if (dev[dim] == 1)
4401 flag |= DD_FLAG_FW(d);
4402 if (mc == -1)
4404 mc = d*2;
4407 else if (dev[dim] == -1)
4409 flag |= DD_FLAG_BW(d);
4410 if (mc == -1) {
4411 if (dd->nc[dim] > 2)
4413 mc = d*2 + 1;
4415 else
4417 mc = d*2;
4422 move[cg] = mc;
4423 if (mc >= 0)
4425 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4427 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4428 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4430 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4431 /* We store the cg size in the lower 16 bits
4432 * and the place where the charge group should go
4433 * in the next 6 bits. This saves some communication volume.
4435 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4436 ncg[mc] += 1;
4437 nat[mc] += nrcg;
4441 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
4442 inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
4444 nvec = 1;
4445 if (bV)
4447 nvec++;
4449 if (bSDX)
4451 nvec++;
4453 if (bCGP)
4455 nvec++;
4458 /* Make sure the communication buffers are large enough */
4459 for(mc=0; mc<dd->ndim*2; mc++)
4461 nvr = ncg[mc] + nat[mc]*nvec;
4462 if (nvr > comm->cgcm_state_nalloc[mc])
4464 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4465 srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
4469 /* Recalculating cg_cm might be cheaper than communicating,
4470 * but that could give rise to rounding issues.
4472 home_pos_cg =
4473 compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
4474 nvec,cg_cm,comm,bCompact);
4476 vec = 0;
4477 home_pos_at =
4478 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4479 nvec,vec++,state->x,comm,bCompact);
4480 if (bV)
4482 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4483 nvec,vec++,state->v,comm,bCompact);
4485 if (bSDX)
4487 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4488 nvec,vec++,state->sd_X,comm,bCompact);
4490 if (bCGP)
4492 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4493 nvec,vec++,state->cg_p,comm,bCompact);
4496 if (bCompact)
4498 compact_ind(dd->ncg_home,move,
4499 dd->index_gl,dd->cgindex,dd->gatindex,
4500 dd->ga2la,comm->bLocalCG,
4501 fr->cginfo);
4503 else
4505 clear_and_mark_ind(dd->ncg_home,move,
4506 dd->index_gl,dd->cgindex,dd->gatindex,
4507 dd->ga2la,comm->bLocalCG,
4508 fr->ns.grid->cell_index);
4511 cginfo_mb = fr->cginfo_mb;
4513 ncg_stay_home = home_pos_cg;
4514 for(d=0; d<dd->ndim; d++)
4516 dim = dd->dim[d];
4517 ncg_recv = 0;
4518 nat_recv = 0;
4519 nvr = 0;
4520 for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
4522 cdd = d*2 + dir;
4523 /* Communicate the cg and atom counts */
4524 sbuf[0] = ncg[cdd];
4525 sbuf[1] = nat[cdd];
4526 if (debug)
4528 fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
4529 d,dir,sbuf[0],sbuf[1]);
4531 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4533 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4535 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4536 srenew(comm->buf_int,comm->nalloc_int);
4539 /* Communicate the charge group indices, sizes and flags */
4540 dd_sendrecv_int(dd, d, dir,
4541 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4542 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4544 nvs = ncg[cdd] + nat[cdd]*nvec;
4545 i = rbuf[0] + rbuf[1] *nvec;
4546 vec_rvec_check_alloc(&comm->vbuf,nvr+i);
4548 /* Communicate cgcm and state */
4549 dd_sendrecv_rvec(dd, d, dir,
4550 comm->cgcm_state[cdd], nvs,
4551 comm->vbuf.v+nvr, i);
4552 ncg_recv += rbuf[0];
4553 nat_recv += rbuf[1];
4554 nvr += i;
4557 /* Process the received charge groups */
4558 buf_pos = 0;
4559 for(cg=0; cg<ncg_recv; cg++)
4561 flag = comm->buf_int[cg*DD_CGIBS+1];
4563 if (dim >= npbcdim && dd->nc[dim] > 2)
4565 /* No pbc in this dim and more than one domain boundary.
4566 * We to a separate check if a charge did not move too far.
4568 if (((flag & DD_FLAG_FW(d)) &&
4569 comm->vbuf.v[buf_pos][d] > cell_x1[dim]) ||
4570 ((flag & DD_FLAG_BW(d)) &&
4571 comm->vbuf.v[buf_pos][d] < cell_x0[dim]))
4573 cg_move_error(fplog,dd,step,cg,d,
4574 (flag & DD_FLAG_FW(d)) ? 1 : 0,
4575 FALSE,0,
4576 comm->vbuf.v[buf_pos],
4577 comm->vbuf.v[buf_pos],
4578 comm->vbuf.v[buf_pos][d]);
4582 mc = -1;
4583 if (d < dd->ndim-1)
4585 /* Check which direction this cg should go */
4586 for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
4588 if (dd->bGridJump)
4590 /* The cell boundaries for dimension d2 are not equal
4591 * for each cell row of the lower dimension(s),
4592 * therefore we might need to redetermine where
4593 * this cg should go.
4595 dim2 = dd->dim[d2];
4596 /* If this cg crosses the box boundary in dimension d2
4597 * we can use the communicated flag, so we do not
4598 * have to worry about pbc.
4600 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4601 (flag & DD_FLAG_FW(d2))) ||
4602 (dd->ci[dim2] == 0 &&
4603 (flag & DD_FLAG_BW(d2)))))
4605 /* Clear the two flags for this dimension */
4606 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4607 /* Determine the location of this cg
4608 * in lattice coordinates
4610 pos_d = comm->vbuf.v[buf_pos][dim2];
4611 if (tric_dir[dim2])
4613 for(d3=dim2+1; d3<DIM; d3++)
4615 pos_d +=
4616 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4619 /* Check of we are not at the box edge.
4620 * pbc is only handled in the first step above,
4621 * but this check could move over pbc while
4622 * the first step did not due to different rounding.
4624 if (pos_d >= cell_x1[dim2] &&
4625 dd->ci[dim2] != dd->nc[dim2]-1)
4627 flag |= DD_FLAG_FW(d2);
4629 else if (pos_d < cell_x0[dim2] &&
4630 dd->ci[dim2] != 0)
4632 flag |= DD_FLAG_BW(d2);
4634 comm->buf_int[cg*DD_CGIBS+1] = flag;
4637 /* Set to which neighboring cell this cg should go */
4638 if (flag & DD_FLAG_FW(d2))
4640 mc = d2*2;
4642 else if (flag & DD_FLAG_BW(d2))
4644 if (dd->nc[dd->dim[d2]] > 2)
4646 mc = d2*2+1;
4648 else
4650 mc = d2*2;
4656 nrcg = flag & DD_FLAG_NRCG;
4657 if (mc == -1)
4659 if (home_pos_cg+1 > dd->cg_nalloc)
4661 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
4662 srenew(dd->index_gl,dd->cg_nalloc);
4663 srenew(dd->cgindex,dd->cg_nalloc+1);
4665 /* Set the global charge group index and size */
4666 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
4667 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
4668 /* Copy the state from the buffer */
4669 if (home_pos_cg >= fr->cg_nalloc)
4671 dd_realloc_fr_cg(fr,home_pos_cg+1);
4672 cg_cm = fr->cg_cm;
4674 copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
4675 /* Set the cginfo */
4676 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
4677 dd->index_gl[home_pos_cg]);
4678 if (comm->bLocalCG)
4680 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
4683 if (home_pos_at+nrcg > state->nalloc)
4685 dd_realloc_state(state,f,home_pos_at+nrcg);
4687 for(i=0; i<nrcg; i++)
4689 copy_rvec(comm->vbuf.v[buf_pos++],
4690 state->x[home_pos_at+i]);
4692 if (bV)
4694 for(i=0; i<nrcg; i++)
4696 copy_rvec(comm->vbuf.v[buf_pos++],
4697 state->v[home_pos_at+i]);
4700 if (bSDX)
4702 for(i=0; i<nrcg; i++)
4704 copy_rvec(comm->vbuf.v[buf_pos++],
4705 state->sd_X[home_pos_at+i]);
4708 if (bCGP)
4710 for(i=0; i<nrcg; i++)
4712 copy_rvec(comm->vbuf.v[buf_pos++],
4713 state->cg_p[home_pos_at+i]);
4716 home_pos_cg += 1;
4717 home_pos_at += nrcg;
4719 else
4721 /* Reallocate the buffers if necessary */
4722 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4724 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4725 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4727 nvr = ncg[mc] + nat[mc]*nvec;
4728 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
4730 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
4731 srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
4733 /* Copy from the receive to the send buffers */
4734 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
4735 comm->buf_int + cg*DD_CGIBS,
4736 DD_CGIBS*sizeof(int));
4737 memcpy(comm->cgcm_state[mc][nvr],
4738 comm->vbuf.v[buf_pos],
4739 (1+nrcg*nvec)*sizeof(rvec));
4740 buf_pos += 1 + nrcg*nvec;
4741 ncg[mc] += 1;
4742 nat[mc] += nrcg;
4747 /* With sorting (!bCompact) the indices are now only partially up to date
4748 * and ncg_home and nat_home are not the real count, since there are
4749 * "holes" in the arrays for the charge groups that moved to neighbors.
4751 dd->ncg_home = home_pos_cg;
4752 dd->nat_home = home_pos_at;
4754 if (debug)
4756 fprintf(debug,"Finished repartitioning\n");
4759 return ncg_stay_home;
4762 void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
4764 dd->comm->cycl[ddCycl] += cycles;
4765 dd->comm->cycl_n[ddCycl]++;
4766 if (cycles > dd->comm->cycl_max[ddCycl])
4768 dd->comm->cycl_max[ddCycl] = cycles;
4772 static double force_flop_count(t_nrnb *nrnb)
4774 int i;
4775 double sum;
4776 const char *name;
4778 sum = 0;
4779 for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
4781 /* To get closer to the real timings, we half the count
4782 * for the normal loops and again half it for water loops.
4784 name = nrnb_str(i);
4785 if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
4787 sum += nrnb->n[i]*0.25*cost_nrnb(i);
4789 else
4791 sum += nrnb->n[i]*0.50*cost_nrnb(i);
4794 for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
4796 name = nrnb_str(i);
4797 if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
4798 sum += nrnb->n[i]*cost_nrnb(i);
4800 for(i=eNR_BONDS; i<=eNR_WALLS; i++)
4802 sum += nrnb->n[i]*cost_nrnb(i);
4805 return sum;
4808 void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
4810 if (dd->comm->eFlop)
4812 dd->comm->flop -= force_flop_count(nrnb);
4815 void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
4817 if (dd->comm->eFlop)
4819 dd->comm->flop += force_flop_count(nrnb);
4820 dd->comm->flop_n++;
4824 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
4826 int i;
4828 for(i=0; i<ddCyclNr; i++)
4830 dd->comm->cycl[i] = 0;
4831 dd->comm->cycl_n[i] = 0;
4832 dd->comm->cycl_max[i] = 0;
4834 dd->comm->flop = 0;
4835 dd->comm->flop_n = 0;
4838 static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
4840 gmx_domdec_comm_t *comm;
4841 gmx_domdec_load_t *load;
4842 gmx_domdec_root_t *root=NULL;
4843 int d,dim,cid,i,pos;
4844 float cell_frac=0,sbuf[DD_NLOAD_MAX];
4845 gmx_bool bSepPME;
4847 if (debug)
4849 fprintf(debug,"get_load_distribution start\n");
4852 wallcycle_start(wcycle,ewcDDCOMMLOAD);
4854 comm = dd->comm;
4856 bSepPME = (dd->pme_nodeid >= 0);
4858 for(d=dd->ndim-1; d>=0; d--)
4860 dim = dd->dim[d];
4861 /* Check if we participate in the communication in this dimension */
4862 if (d == dd->ndim-1 ||
4863 (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
4865 load = &comm->load[d];
4866 if (dd->bGridJump)
4868 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
4870 pos = 0;
4871 if (d == dd->ndim-1)
4873 sbuf[pos++] = dd_force_load(comm);
4874 sbuf[pos++] = sbuf[0];
4875 if (dd->bGridJump)
4877 sbuf[pos++] = sbuf[0];
4878 sbuf[pos++] = cell_frac;
4879 if (d > 0)
4881 sbuf[pos++] = comm->cell_f_max0[d];
4882 sbuf[pos++] = comm->cell_f_min1[d];
4885 if (bSepPME)
4887 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
4888 sbuf[pos++] = comm->cycl[ddCyclPME];
4891 else
4893 sbuf[pos++] = comm->load[d+1].sum;
4894 sbuf[pos++] = comm->load[d+1].max;
4895 if (dd->bGridJump)
4897 sbuf[pos++] = comm->load[d+1].sum_m;
4898 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
4899 sbuf[pos++] = comm->load[d+1].flags;
4900 if (d > 0)
4902 sbuf[pos++] = comm->cell_f_max0[d];
4903 sbuf[pos++] = comm->cell_f_min1[d];
4906 if (bSepPME)
4908 sbuf[pos++] = comm->load[d+1].mdf;
4909 sbuf[pos++] = comm->load[d+1].pme;
4912 load->nload = pos;
4913 /* Communicate a row in DD direction d.
4914 * The communicators are setup such that the root always has rank 0.
4916 #ifdef GMX_MPI
4917 MPI_Gather(sbuf ,load->nload*sizeof(float),MPI_BYTE,
4918 load->load,load->nload*sizeof(float),MPI_BYTE,
4919 0,comm->mpi_comm_load[d]);
4920 #endif
4921 if (dd->ci[dim] == dd->master_ci[dim])
4923 /* We are the root, process this row */
4924 if (comm->bDynLoadBal)
4926 root = comm->root[d];
4928 load->sum = 0;
4929 load->max = 0;
4930 load->sum_m = 0;
4931 load->cvol_min = 1;
4932 load->flags = 0;
4933 load->mdf = 0;
4934 load->pme = 0;
4935 pos = 0;
4936 for(i=0; i<dd->nc[dim]; i++)
4938 load->sum += load->load[pos++];
4939 load->max = max(load->max,load->load[pos]);
4940 pos++;
4941 if (dd->bGridJump)
4943 if (root->bLimited)
4945 /* This direction could not be load balanced properly,
4946 * therefore we need to use the maximum iso the average load.
4948 load->sum_m = max(load->sum_m,load->load[pos]);
4950 else
4952 load->sum_m += load->load[pos];
4954 pos++;
4955 load->cvol_min = min(load->cvol_min,load->load[pos]);
4956 pos++;
4957 if (d < dd->ndim-1)
4959 load->flags = (int)(load->load[pos++] + 0.5);
4961 if (d > 0)
4963 root->cell_f_max0[i] = load->load[pos++];
4964 root->cell_f_min1[i] = load->load[pos++];
4967 if (bSepPME)
4969 load->mdf = max(load->mdf,load->load[pos]);
4970 pos++;
4971 load->pme = max(load->pme,load->load[pos]);
4972 pos++;
4975 if (comm->bDynLoadBal && root->bLimited)
4977 load->sum_m *= dd->nc[dim];
4978 load->flags |= (1<<d);
4984 if (DDMASTER(dd))
4986 comm->nload += dd_load_count(comm);
4987 comm->load_step += comm->cycl[ddCyclStep];
4988 comm->load_sum += comm->load[0].sum;
4989 comm->load_max += comm->load[0].max;
4990 if (comm->bDynLoadBal)
4992 for(d=0; d<dd->ndim; d++)
4994 if (comm->load[0].flags & (1<<d))
4996 comm->load_lim[d]++;
5000 if (bSepPME)
5002 comm->load_mdf += comm->load[0].mdf;
5003 comm->load_pme += comm->load[0].pme;
5007 wallcycle_stop(wcycle,ewcDDCOMMLOAD);
5009 if (debug)
5011 fprintf(debug,"get_load_distribution finished\n");
5015 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
5017 /* Return the relative performance loss on the total run time
5018 * due to the force calculation load imbalance.
5020 if (dd->comm->nload > 0)
5022 return
5023 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
5024 (dd->comm->load_step*dd->nnodes);
5026 else
5028 return 0;
5032 static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
5034 char buf[STRLEN];
5035 int npp,npme,nnodes,d,limp;
5036 float imbal,pme_f_ratio,lossf,lossp=0;
5037 gmx_bool bLim;
5038 gmx_domdec_comm_t *comm;
5040 comm = dd->comm;
5041 if (DDMASTER(dd) && comm->nload > 0)
5043 npp = dd->nnodes;
5044 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
5045 nnodes = npp + npme;
5046 imbal = comm->load_max*npp/comm->load_sum - 1;
5047 lossf = dd_force_imb_perf_loss(dd);
5048 sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
5049 fprintf(fplog,"%s",buf);
5050 fprintf(stderr,"\n");
5051 fprintf(stderr,"%s",buf);
5052 sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
5053 fprintf(fplog,"%s",buf);
5054 fprintf(stderr,"%s",buf);
5055 bLim = FALSE;
5056 if (comm->bDynLoadBal)
5058 sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5059 for(d=0; d<dd->ndim; d++)
5061 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
5062 sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
5063 if (limp >= 50)
5065 bLim = TRUE;
5068 sprintf(buf+strlen(buf),"\n");
5069 fprintf(fplog,"%s",buf);
5070 fprintf(stderr,"%s",buf);
5072 if (npme > 0)
5074 pme_f_ratio = comm->load_pme/comm->load_mdf;
5075 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5076 if (lossp <= 0)
5078 lossp *= (float)npme/(float)nnodes;
5080 else
5082 lossp *= (float)npp/(float)nnodes;
5084 sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
5085 fprintf(fplog,"%s",buf);
5086 fprintf(stderr,"%s",buf);
5087 sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
5088 fprintf(fplog,"%s",buf);
5089 fprintf(stderr,"%s",buf);
5091 fprintf(fplog,"\n");
5092 fprintf(stderr,"\n");
5094 if (lossf >= DD_PERF_LOSS)
5096 sprintf(buf,
5097 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5098 " in the domain decomposition.\n",lossf*100);
5099 if (!comm->bDynLoadBal)
5101 sprintf(buf+strlen(buf)," You might want to use dynamic load balancing (option -dlb.)\n");
5103 else if (bLim)
5105 sprintf(buf+strlen(buf)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5107 fprintf(fplog,"%s\n",buf);
5108 fprintf(stderr,"%s\n",buf);
5110 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5112 sprintf(buf,
5113 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5114 " had %s work to do than the PP nodes.\n"
5115 " You might want to %s the number of PME nodes\n"
5116 " or %s the cut-off and the grid spacing.\n",
5117 fabs(lossp*100),
5118 (lossp < 0) ? "less" : "more",
5119 (lossp < 0) ? "decrease" : "increase",
5120 (lossp < 0) ? "decrease" : "increase");
5121 fprintf(fplog,"%s\n",buf);
5122 fprintf(stderr,"%s\n",buf);
5127 static float dd_vol_min(gmx_domdec_t *dd)
5129 return dd->comm->load[0].cvol_min*dd->nnodes;
5132 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
5134 return dd->comm->load[0].flags;
5137 static float dd_f_imbal(gmx_domdec_t *dd)
5139 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5142 static float dd_pme_f_ratio(gmx_domdec_t *dd)
5144 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5147 static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
5149 int flags,d;
5150 char buf[22];
5152 flags = dd_load_flags(dd);
5153 if (flags)
5155 fprintf(fplog,
5156 "DD load balancing is limited by minimum cell size in dimension");
5157 for(d=0; d<dd->ndim; d++)
5159 if (flags & (1<<d))
5161 fprintf(fplog," %c",dim2char(dd->dim[d]));
5164 fprintf(fplog,"\n");
5166 fprintf(fplog,"DD step %s",gmx_step_str(step,buf));
5167 if (dd->comm->bDynLoadBal)
5169 fprintf(fplog," vol min/aver %5.3f%c",
5170 dd_vol_min(dd),flags ? '!' : ' ');
5172 fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
5173 if (dd->comm->cycl_n[ddCyclPME])
5175 fprintf(fplog," pme mesh/force %5.3f",dd_pme_f_ratio(dd));
5177 fprintf(fplog,"\n\n");
5180 static void dd_print_load_verbose(gmx_domdec_t *dd)
5182 if (dd->comm->bDynLoadBal)
5184 fprintf(stderr,"vol %4.2f%c ",
5185 dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
5187 fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
5188 if (dd->comm->cycl_n[ddCyclPME])
5190 fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
5194 #ifdef GMX_MPI
5195 static void make_load_communicator(gmx_domdec_t *dd,MPI_Group g_all,
5196 int dim_ind,ivec loc)
5198 MPI_Group g_row;
5199 MPI_Comm c_row;
5200 int dim,i,*rank;
5201 ivec loc_c;
5202 gmx_domdec_root_t *root;
5204 dim = dd->dim[dim_ind];
5205 copy_ivec(loc,loc_c);
5206 snew(rank,dd->nc[dim]);
5207 for(i=0; i<dd->nc[dim]; i++)
5209 loc_c[dim] = i;
5210 rank[i] = dd_index(dd->nc,loc_c);
5212 /* Here we create a new group, that does not necessarily
5213 * include our process. But MPI_Comm_create needs to be
5214 * called by all the processes in the original communicator.
5215 * Calling MPI_Group_free afterwards gives errors, so I assume
5216 * also the group is needed by all processes. (B. Hess)
5218 MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
5219 MPI_Comm_create(dd->mpi_comm_all,g_row,&c_row);
5220 if (c_row != MPI_COMM_NULL)
5222 /* This process is part of the group */
5223 dd->comm->mpi_comm_load[dim_ind] = c_row;
5224 if (dd->comm->eDLB != edlbNO)
5226 if (dd->ci[dim] == dd->master_ci[dim])
5228 /* This is the root process of this row */
5229 snew(dd->comm->root[dim_ind],1);
5230 root = dd->comm->root[dim_ind];
5231 snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
5232 snew(root->old_cell_f,dd->nc[dim]+1);
5233 snew(root->bCellMin,dd->nc[dim]);
5234 if (dim_ind > 0)
5236 snew(root->cell_f_max0,dd->nc[dim]);
5237 snew(root->cell_f_min1,dd->nc[dim]);
5238 snew(root->bound_min,dd->nc[dim]);
5239 snew(root->bound_max,dd->nc[dim]);
5241 snew(root->buf_ncd,dd->nc[dim]);
5243 else
5245 /* This is not a root process, we only need to receive cell_f */
5246 snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
5249 if (dd->ci[dim] == dd->master_ci[dim])
5251 snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
5254 sfree(rank);
5256 #endif
5258 static void make_load_communicators(gmx_domdec_t *dd)
5260 #ifdef GMX_MPI
5261 MPI_Group g_all;
5262 int dim0,dim1,i,j;
5263 ivec loc;
5265 if (debug)
5266 fprintf(debug,"Making load communicators\n");
5268 MPI_Comm_group(dd->mpi_comm_all,&g_all);
5270 snew(dd->comm->load,dd->ndim);
5271 snew(dd->comm->mpi_comm_load,dd->ndim);
5273 clear_ivec(loc);
5274 make_load_communicator(dd,g_all,0,loc);
5275 if (dd->ndim > 1) {
5276 dim0 = dd->dim[0];
5277 for(i=0; i<dd->nc[dim0]; i++) {
5278 loc[dim0] = i;
5279 make_load_communicator(dd,g_all,1,loc);
5282 if (dd->ndim > 2) {
5283 dim0 = dd->dim[0];
5284 for(i=0; i<dd->nc[dim0]; i++) {
5285 loc[dim0] = i;
5286 dim1 = dd->dim[1];
5287 for(j=0; j<dd->nc[dim1]; j++) {
5288 loc[dim1] = j;
5289 make_load_communicator(dd,g_all,2,loc);
5294 MPI_Group_free(&g_all);
5296 if (debug)
5297 fprintf(debug,"Finished making load communicators\n");
5298 #endif
5301 void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
5303 gmx_bool bZYX;
5304 int d,dim,i,j,m;
5305 ivec tmp,s;
5306 int nzone,nzonep;
5307 ivec dd_zp[DD_MAXIZONE];
5308 gmx_domdec_zones_t *zones;
5309 gmx_domdec_ns_ranges_t *izone;
5311 for(d=0; d<dd->ndim; d++)
5313 dim = dd->dim[d];
5314 copy_ivec(dd->ci,tmp);
5315 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5316 dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
5317 copy_ivec(dd->ci,tmp);
5318 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5319 dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
5320 if (debug)
5322 fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5323 dd->rank,dim,
5324 dd->neighbor[d][0],
5325 dd->neighbor[d][1]);
5329 if (DDMASTER(dd))
5331 fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
5332 dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
5334 if (fplog)
5336 fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5337 dd->ndim,
5338 dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
5339 dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5341 switch (dd->ndim)
5343 case 3:
5344 nzone = dd_z3n;
5345 nzonep = dd_zp3n;
5346 for(i=0; i<nzonep; i++)
5348 copy_ivec(dd_zp3[i],dd_zp[i]);
5350 break;
5351 case 2:
5352 nzone = dd_z2n;
5353 nzonep = dd_zp2n;
5354 for(i=0; i<nzonep; i++)
5356 copy_ivec(dd_zp2[i],dd_zp[i]);
5358 break;
5359 case 1:
5360 nzone = dd_z1n;
5361 nzonep = dd_zp1n;
5362 for(i=0; i<nzonep; i++)
5364 copy_ivec(dd_zp1[i],dd_zp[i]);
5366 break;
5367 default:
5368 gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
5369 nzone = 0;
5370 nzonep = 0;
5373 zones = &dd->comm->zones;
5375 for(i=0; i<nzone; i++)
5377 m = 0;
5378 clear_ivec(zones->shift[i]);
5379 for(d=0; d<dd->ndim; d++)
5381 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5385 zones->n = nzone;
5386 for(i=0; i<nzone; i++)
5388 for(d=0; d<DIM; d++)
5390 s[d] = dd->ci[d] - zones->shift[i][d];
5391 if (s[d] < 0)
5393 s[d] += dd->nc[d];
5395 else if (s[d] >= dd->nc[d])
5397 s[d] -= dd->nc[d];
5401 zones->nizone = nzonep;
5402 for(i=0; i<zones->nizone; i++)
5404 if (dd_zp[i][0] != i)
5406 gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
5408 izone = &zones->izone[i];
5409 izone->j0 = dd_zp[i][1];
5410 izone->j1 = dd_zp[i][2];
5411 for(dim=0; dim<DIM; dim++)
5413 if (dd->nc[dim] == 1)
5415 /* All shifts should be allowed */
5416 izone->shift0[dim] = -1;
5417 izone->shift1[dim] = 1;
5419 else
5422 izone->shift0[d] = 0;
5423 izone->shift1[d] = 0;
5424 for(j=izone->j0; j<izone->j1; j++) {
5425 if (dd->shift[j][d] > dd->shift[i][d])
5426 izone->shift0[d] = -1;
5427 if (dd->shift[j][d] < dd->shift[i][d])
5428 izone->shift1[d] = 1;
5432 int shift_diff;
5434 /* Assume the shift are not more than 1 cell */
5435 izone->shift0[dim] = 1;
5436 izone->shift1[dim] = -1;
5437 for(j=izone->j0; j<izone->j1; j++)
5439 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5440 if (shift_diff < izone->shift0[dim])
5442 izone->shift0[dim] = shift_diff;
5444 if (shift_diff > izone->shift1[dim])
5446 izone->shift1[dim] = shift_diff;
5453 if (dd->comm->eDLB != edlbNO)
5455 snew(dd->comm->root,dd->ndim);
5458 if (dd->comm->bRecordLoad)
5460 make_load_communicators(dd);
5464 static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
5466 gmx_domdec_t *dd;
5467 gmx_domdec_comm_t *comm;
5468 int i,rank,*buf;
5469 ivec periods;
5470 #ifdef GMX_MPI
5471 MPI_Comm comm_cart;
5472 #endif
5474 dd = cr->dd;
5475 comm = dd->comm;
5477 #ifdef GMX_MPI
5478 if (comm->bCartesianPP)
5480 /* Set up cartesian communication for the particle-particle part */
5481 if (fplog)
5483 fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
5484 dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
5487 for(i=0; i<DIM; i++)
5489 periods[i] = TRUE;
5491 MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
5492 &comm_cart);
5493 /* We overwrite the old communicator with the new cartesian one */
5494 cr->mpi_comm_mygroup = comm_cart;
5497 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5498 MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
5500 if (comm->bCartesianPP_PME)
5502 /* Since we want to use the original cartesian setup for sim,
5503 * and not the one after split, we need to make an index.
5505 snew(comm->ddindex2ddnodeid,dd->nnodes);
5506 comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
5507 gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
5508 /* Get the rank of the DD master,
5509 * above we made sure that the master node is a PP node.
5511 if (MASTER(cr))
5513 rank = dd->rank;
5515 else
5517 rank = 0;
5519 MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
5521 else if (comm->bCartesianPP)
5523 if (cr->npmenodes == 0)
5525 /* The PP communicator is also
5526 * the communicator for this simulation
5528 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5530 cr->nodeid = dd->rank;
5532 MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
5534 /* We need to make an index to go from the coordinates
5535 * to the nodeid of this simulation.
5537 snew(comm->ddindex2simnodeid,dd->nnodes);
5538 snew(buf,dd->nnodes);
5539 if (cr->duty & DUTY_PP)
5541 buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
5543 /* Communicate the ddindex to simulation nodeid index */
5544 MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
5545 cr->mpi_comm_mysim);
5546 sfree(buf);
5548 /* Determine the master coordinates and rank.
5549 * The DD master should be the same node as the master of this sim.
5551 for(i=0; i<dd->nnodes; i++)
5553 if (comm->ddindex2simnodeid[i] == 0)
5555 ddindex2xyz(dd->nc,i,dd->master_ci);
5556 MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
5559 if (debug)
5561 fprintf(debug,"The master rank is %d\n",dd->masterrank);
5564 else
5566 /* No Cartesian communicators */
5567 /* We use the rank in dd->comm->all as DD index */
5568 ddindex2xyz(dd->nc,dd->rank,dd->ci);
5569 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5570 dd->masterrank = 0;
5571 clear_ivec(dd->master_ci);
5573 #endif
5575 if (fplog)
5577 fprintf(fplog,
5578 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5579 dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5581 if (debug)
5583 fprintf(debug,
5584 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5585 dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5589 static void receive_ddindex2simnodeid(t_commrec *cr)
5591 gmx_domdec_t *dd;
5593 gmx_domdec_comm_t *comm;
5594 int *buf;
5596 dd = cr->dd;
5597 comm = dd->comm;
5599 #ifdef GMX_MPI
5600 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
5602 snew(comm->ddindex2simnodeid,dd->nnodes);
5603 snew(buf,dd->nnodes);
5604 if (cr->duty & DUTY_PP)
5606 buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
5608 #ifdef GMX_MPI
5609 /* Communicate the ddindex to simulation nodeid index */
5610 MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
5611 cr->mpi_comm_mysim);
5612 #endif
5613 sfree(buf);
5615 #endif
5618 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
5619 int ncg,int natoms)
5621 gmx_domdec_master_t *ma;
5622 int i;
5624 snew(ma,1);
5626 snew(ma->ncg,dd->nnodes);
5627 snew(ma->index,dd->nnodes+1);
5628 snew(ma->cg,ncg);
5629 snew(ma->nat,dd->nnodes);
5630 snew(ma->ibuf,dd->nnodes*2);
5631 snew(ma->cell_x,DIM);
5632 for(i=0; i<DIM; i++)
5634 snew(ma->cell_x[i],dd->nc[i]+1);
5637 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
5639 ma->vbuf = NULL;
5641 else
5643 snew(ma->vbuf,natoms);
5646 return ma;
5649 static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
5650 int reorder)
5652 gmx_domdec_t *dd;
5653 gmx_domdec_comm_t *comm;
5654 int i,rank;
5655 gmx_bool bDiv[DIM];
5656 ivec periods;
5657 #ifdef GMX_MPI
5658 MPI_Comm comm_cart;
5659 #endif
5661 dd = cr->dd;
5662 comm = dd->comm;
5664 if (comm->bCartesianPP)
5666 for(i=1; i<DIM; i++)
5668 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
5670 if (bDiv[YY] || bDiv[ZZ])
5672 comm->bCartesianPP_PME = TRUE;
5673 /* If we have 2D PME decomposition, which is always in x+y,
5674 * we stack the PME only nodes in z.
5675 * Otherwise we choose the direction that provides the thinnest slab
5676 * of PME only nodes as this will have the least effect
5677 * on the PP communication.
5678 * But for the PME communication the opposite might be better.
5680 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
5681 !bDiv[YY] ||
5682 dd->nc[YY] > dd->nc[ZZ]))
5684 comm->cartpmedim = ZZ;
5686 else
5688 comm->cartpmedim = YY;
5690 comm->ntot[comm->cartpmedim]
5691 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
5693 else if (fplog)
5695 fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
5696 fprintf(fplog,
5697 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5701 #ifdef GMX_MPI
5702 if (comm->bCartesianPP_PME)
5704 if (fplog)
5706 fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
5709 for(i=0; i<DIM; i++)
5711 periods[i] = TRUE;
5713 MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
5714 &comm_cart);
5716 MPI_Comm_rank(comm_cart,&rank);
5717 if (MASTERNODE(cr) && rank != 0)
5719 gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5722 /* With this assigment we loose the link to the original communicator
5723 * which will usually be MPI_COMM_WORLD, unless have multisim.
5725 cr->mpi_comm_mysim = comm_cart;
5726 cr->sim_nodeid = rank;
5728 MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
5730 if (fplog)
5732 fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5733 cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5736 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
5738 cr->duty = DUTY_PP;
5740 if (cr->npmenodes == 0 ||
5741 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
5743 cr->duty = DUTY_PME;
5746 /* Split the sim communicator into PP and PME only nodes */
5747 MPI_Comm_split(cr->mpi_comm_mysim,
5748 cr->duty,
5749 dd_index(comm->ntot,dd->ci),
5750 &cr->mpi_comm_mygroup);
5752 else
5754 switch (dd_node_order)
5756 case ddnoPP_PME:
5757 if (fplog)
5759 fprintf(fplog,"Order of the nodes: PP first, PME last\n");
5761 break;
5762 case ddnoINTERLEAVE:
5763 /* Interleave the PP-only and PME-only nodes,
5764 * as on clusters with dual-core machines this will double
5765 * the communication bandwidth of the PME processes
5766 * and thus speed up the PP <-> PME and inter PME communication.
5768 if (fplog)
5770 fprintf(fplog,"Interleaving PP and PME nodes\n");
5772 comm->pmenodes = dd_pmenodes(cr);
5773 break;
5774 case ddnoCARTESIAN:
5775 break;
5776 default:
5777 gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
5780 if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
5782 cr->duty = DUTY_PME;
5784 else
5786 cr->duty = DUTY_PP;
5789 /* Split the sim communicator into PP and PME only nodes */
5790 MPI_Comm_split(cr->mpi_comm_mysim,
5791 cr->duty,
5792 cr->nodeid,
5793 &cr->mpi_comm_mygroup);
5794 MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
5796 #endif
5798 if (fplog)
5800 fprintf(fplog,"This is a %s only node\n\n",
5801 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
5805 void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
5807 gmx_domdec_t *dd;
5808 gmx_domdec_comm_t *comm;
5809 int CartReorder;
5811 dd = cr->dd;
5812 comm = dd->comm;
5814 copy_ivec(dd->nc,comm->ntot);
5816 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
5817 comm->bCartesianPP_PME = FALSE;
5819 /* Reorder the nodes by default. This might change the MPI ranks.
5820 * Real reordering is only supported on very few architectures,
5821 * Blue Gene is one of them.
5823 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
5825 if (cr->npmenodes > 0)
5827 /* Split the communicator into a PP and PME part */
5828 split_communicator(fplog,cr,dd_node_order,CartReorder);
5829 if (comm->bCartesianPP_PME)
5831 /* We (possibly) reordered the nodes in split_communicator,
5832 * so it is no longer required in make_pp_communicator.
5834 CartReorder = FALSE;
5837 else
5839 /* All nodes do PP and PME */
5840 #ifdef GMX_MPI
5841 /* We do not require separate communicators */
5842 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
5843 #endif
5846 if (cr->duty & DUTY_PP)
5848 /* Copy or make a new PP communicator */
5849 make_pp_communicator(fplog,cr,CartReorder);
5851 else
5853 receive_ddindex2simnodeid(cr);
5856 if (!(cr->duty & DUTY_PME))
5858 /* Set up the commnuication to our PME node */
5859 dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
5860 dd->pme_receive_vir_ener = receive_vir_ener(cr);
5861 if (debug)
5863 fprintf(debug,"My pme_nodeid %d receive ener %d\n",
5864 dd->pme_nodeid,dd->pme_receive_vir_ener);
5867 else
5869 dd->pme_nodeid = -1;
5872 if (DDMASTER(dd))
5874 dd->ma = init_gmx_domdec_master_t(dd,
5875 comm->cgs_gl.nr,
5876 comm->cgs_gl.index[comm->cgs_gl.nr]);
5880 static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
5882 real *slb_frac,tot;
5883 int i,n;
5884 double dbl;
5886 slb_frac = NULL;
5887 if (nc > 1 && size_string != NULL)
5889 if (fplog)
5891 fprintf(fplog,"Using static load balancing for the %s direction\n",
5892 dir);
5894 snew(slb_frac,nc);
5895 tot = 0;
5896 for (i=0; i<nc; i++)
5898 dbl = 0;
5899 sscanf(size_string,"%lf%n",&dbl,&n);
5900 if (dbl == 0)
5902 gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
5904 slb_frac[i] = dbl;
5905 size_string += n;
5906 tot += slb_frac[i];
5908 /* Normalize */
5909 if (fplog)
5911 fprintf(fplog,"Relative cell sizes:");
5913 for (i=0; i<nc; i++)
5915 slb_frac[i] /= tot;
5916 if (fplog)
5918 fprintf(fplog," %5.3f",slb_frac[i]);
5921 if (fplog)
5923 fprintf(fplog,"\n");
5927 return slb_frac;
5930 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
5932 int n,nmol,ftype;
5933 gmx_mtop_ilistloop_t iloop;
5934 t_ilist *il;
5936 n = 0;
5937 iloop = gmx_mtop_ilistloop_init(mtop);
5938 while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
5940 for(ftype=0; ftype<F_NRE; ftype++)
5942 if ((interaction_function[ftype].flags & IF_BOND) &&
5943 NRAL(ftype) > 2)
5945 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
5950 return n;
5953 static int dd_nst_env(FILE *fplog,const char *env_var,int def)
5955 char *val;
5956 int nst;
5958 nst = def;
5959 val = getenv(env_var);
5960 if (val)
5962 if (sscanf(val,"%d",&nst) <= 0)
5964 nst = 1;
5966 if (fplog)
5968 fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
5969 env_var,val,nst);
5973 return nst;
5976 static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
5978 if (MASTER(cr))
5980 fprintf(stderr,"\n%s\n",warn_string);
5982 if (fplog)
5984 fprintf(fplog,"\n%s\n",warn_string);
5988 static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
5989 t_inputrec *ir,FILE *fplog)
5991 if (ir->ePBC == epbcSCREW &&
5992 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
5994 gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
5997 if (ir->ns_type == ensSIMPLE)
5999 gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6002 if (ir->nstlist == 0)
6004 gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
6007 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
6009 dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6013 static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
6015 int di,d;
6016 real r;
6018 r = ddbox->box_size[XX];
6019 for(di=0; di<dd->ndim; di++)
6021 d = dd->dim[di];
6022 /* Check using the initial average cell size */
6023 r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6026 return r;
6029 static int check_dlb_support(FILE *fplog,t_commrec *cr,
6030 const char *dlb_opt,gmx_bool bRecordLoad,
6031 unsigned long Flags,t_inputrec *ir)
6033 gmx_domdec_t *dd;
6034 int eDLB=-1;
6035 char buf[STRLEN];
6037 switch (dlb_opt[0])
6039 case 'a': eDLB = edlbAUTO; break;
6040 case 'n': eDLB = edlbNO; break;
6041 case 'y': eDLB = edlbYES; break;
6042 default: gmx_incons("Unknown dlb_opt");
6045 if (Flags & MD_RERUN)
6047 return edlbNO;
6050 if (!EI_DYNAMICS(ir->eI))
6052 if (eDLB == edlbYES)
6054 sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
6055 dd_warning(cr,fplog,buf);
6058 return edlbNO;
6061 if (!bRecordLoad)
6063 dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6065 return edlbNO;
6068 if (Flags & MD_REPRODUCIBLE)
6070 switch (eDLB)
6072 case edlbNO:
6073 break;
6074 case edlbAUTO:
6075 dd_warning(cr,fplog,"NOTE: reproducability requested, will not use dynamic load balancing\n");
6076 eDLB = edlbNO;
6077 break;
6078 case edlbYES:
6079 dd_warning(cr,fplog,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
6080 break;
6081 default:
6082 gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
6083 break;
6087 return eDLB;
6090 static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
6092 int dim;
6094 dd->ndim = 0;
6095 if (getenv("GMX_DD_ORDER_ZYX") != NULL)
6097 /* Decomposition order z,y,x */
6098 if (fplog)
6100 fprintf(fplog,"Using domain decomposition order z, y, x\n");
6102 for(dim=DIM-1; dim>=0; dim--)
6104 if (dd->nc[dim] > 1)
6106 dd->dim[dd->ndim++] = dim;
6110 else
6112 /* Decomposition order x,y,z */
6113 for(dim=0; dim<DIM; dim++)
6115 if (dd->nc[dim] > 1)
6117 dd->dim[dd->ndim++] = dim;
6123 static gmx_domdec_comm_t *init_dd_comm()
6125 gmx_domdec_comm_t *comm;
6126 int i;
6128 snew(comm,1);
6129 snew(comm->cggl_flag,DIM*2);
6130 snew(comm->cgcm_state,DIM*2);
6131 for(i=0; i<DIM*2; i++)
6133 comm->cggl_flag_nalloc[i] = 0;
6134 comm->cgcm_state_nalloc[i] = 0;
6137 comm->nalloc_int = 0;
6138 comm->buf_int = NULL;
6140 vec_rvec_init(&comm->vbuf);
6142 comm->n_load_have = 0;
6143 comm->n_load_collect = 0;
6145 for(i=0; i<ddnatNR-ddnatZONE; i++)
6147 comm->sum_nat[i] = 0;
6149 comm->ndecomp = 0;
6150 comm->nload = 0;
6151 comm->load_step = 0;
6152 comm->load_sum = 0;
6153 comm->load_max = 0;
6154 clear_ivec(comm->load_lim);
6155 comm->load_mdf = 0;
6156 comm->load_pme = 0;
6158 return comm;
6161 gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
6162 unsigned long Flags,
6163 ivec nc,
6164 real comm_distance_min,real rconstr,
6165 const char *dlb_opt,real dlb_scale,
6166 const char *sizex,const char *sizey,const char *sizez,
6167 gmx_mtop_t *mtop,t_inputrec *ir,
6168 matrix box,rvec *x,
6169 gmx_ddbox_t *ddbox,
6170 int *npme_x,int *npme_y)
6172 gmx_domdec_t *dd;
6173 gmx_domdec_comm_t *comm;
6174 int recload;
6175 int d,i,j;
6176 real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
6177 gmx_bool bC;
6178 char buf[STRLEN];
6180 if (fplog)
6182 fprintf(fplog,
6183 "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
6186 snew(dd,1);
6188 dd->comm = init_dd_comm();
6189 comm = dd->comm;
6190 snew(comm->cggl_flag,DIM*2);
6191 snew(comm->cgcm_state,DIM*2);
6193 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6194 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6196 dd->bSendRecv2 = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
6197 comm->eFlop = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
6198 recload = dd_nst_env(fplog,"GMX_DD_LOAD",1);
6199 comm->nstSortCG = dd_nst_env(fplog,"GMX_DD_SORT",1);
6200 comm->nstDDDump = dd_nst_env(fplog,"GMX_DD_DUMP",0);
6201 comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
6202 comm->DD_debug = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
6204 dd->pme_recv_f_alloc = 0;
6205 dd->pme_recv_f_buf = NULL;
6207 if (dd->bSendRecv2 && fplog)
6209 fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6211 if (comm->eFlop)
6213 if (fplog)
6215 fprintf(fplog,"Will load balance based on FLOP count\n");
6217 if (comm->eFlop > 1)
6219 srand(1+cr->nodeid);
6221 comm->bRecordLoad = TRUE;
6223 else
6225 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6229 comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
6231 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6232 if (fplog)
6234 fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
6236 dd->bGridJump = comm->bDynLoadBal;
6238 if (comm->nstSortCG)
6240 if (fplog)
6242 if (comm->nstSortCG == 1)
6244 fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
6246 else
6248 fprintf(fplog,"Will sort the charge groups every %d steps\n",
6249 comm->nstSortCG);
6252 snew(comm->sort,1);
6254 else
6256 if (fplog)
6258 fprintf(fplog,"Will not sort the charge groups\n");
6262 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6263 if (comm->bInterCGBondeds)
6265 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6267 else
6269 comm->bInterCGMultiBody = FALSE;
6272 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6274 if (ir->rlistlong == 0)
6276 /* Set the cut-off to some very large value,
6277 * so we don't need if statements everywhere in the code.
6278 * We use sqrt, since the cut-off is squared in some places.
6280 comm->cutoff = GMX_CUTOFF_INF;
6282 else
6284 comm->cutoff = ir->rlistlong;
6286 comm->cutoff_mbody = 0;
6288 comm->cellsize_limit = 0;
6289 comm->bBondComm = FALSE;
6291 if (comm->bInterCGBondeds)
6293 if (comm_distance_min > 0)
6295 comm->cutoff_mbody = comm_distance_min;
6296 if (Flags & MD_DDBONDCOMM)
6298 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6300 else
6302 comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
6304 r_bonded_limit = comm->cutoff_mbody;
6306 else if (ir->bPeriodicMols)
6308 /* Can not easily determine the required cut-off */
6309 dd_warning(cr,fplog,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6310 comm->cutoff_mbody = comm->cutoff/2;
6311 r_bonded_limit = comm->cutoff_mbody;
6313 else
6315 if (MASTER(cr))
6317 dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
6318 Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
6320 gmx_bcast(sizeof(r_2b),&r_2b,cr);
6321 gmx_bcast(sizeof(r_mb),&r_mb,cr);
6323 /* We use an initial margin of 10% for the minimum cell size,
6324 * except when we are just below the non-bonded cut-off.
6326 if (Flags & MD_DDBONDCOMM)
6328 if (max(r_2b,r_mb) > comm->cutoff)
6330 r_bonded = max(r_2b,r_mb);
6331 r_bonded_limit = 1.1*r_bonded;
6332 comm->bBondComm = TRUE;
6334 else
6336 r_bonded = r_mb;
6337 r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
6339 /* We determine cutoff_mbody later */
6341 else
6343 /* No special bonded communication,
6344 * simply increase the DD cut-off.
6346 r_bonded_limit = 1.1*max(r_2b,r_mb);
6347 comm->cutoff_mbody = r_bonded_limit;
6348 comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
6351 comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
6352 if (fplog)
6354 fprintf(fplog,
6355 "Minimum cell size due to bonded interactions: %.3f nm\n",
6356 comm->cellsize_limit);
6360 if (dd->bInterCGcons && rconstr <= 0)
6362 /* There is a cell size limit due to the constraints (P-LINCS) */
6363 rconstr = constr_r_max(fplog,mtop,ir);
6364 if (fplog)
6366 fprintf(fplog,
6367 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6368 rconstr);
6369 if (rconstr > comm->cellsize_limit)
6371 fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
6375 else if (rconstr > 0 && fplog)
6377 /* Here we do not check for dd->bInterCGcons,
6378 * because one can also set a cell size limit for virtual sites only
6379 * and at this point we don't know yet if there are intercg v-sites.
6381 fprintf(fplog,
6382 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6383 rconstr);
6385 comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
6387 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6389 if (nc[XX] > 0)
6391 copy_ivec(nc,dd->nc);
6392 set_dd_dim(fplog,dd);
6393 set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
6395 if (cr->npmenodes == -1)
6397 cr->npmenodes = 0;
6399 acs = average_cellsize_min(dd,ddbox);
6400 if (acs < comm->cellsize_limit)
6402 if (fplog)
6404 fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
6406 gmx_fatal_collective(FARGS,cr,NULL,
6407 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6408 acs,comm->cellsize_limit);
6411 else
6413 set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
6415 /* We need to choose the optimal DD grid and possibly PME nodes */
6416 limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
6417 comm->eDLB!=edlbNO,dlb_scale,
6418 comm->cellsize_limit,comm->cutoff,
6419 comm->bInterCGBondeds,comm->bInterCGMultiBody);
6421 if (dd->nc[XX] == 0)
6423 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6424 sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
6425 !bC ? "-rdd" : "-rcon",
6426 comm->eDLB!=edlbNO ? " or -dds" : "",
6427 bC ? " or your LINCS settings" : "");
6429 gmx_fatal_collective(FARGS,cr,NULL,
6430 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6431 "%s\n"
6432 "Look in the log file for details on the domain decomposition",
6433 cr->nnodes-cr->npmenodes,limit,buf);
6435 set_dd_dim(fplog,dd);
6438 if (fplog)
6440 fprintf(fplog,
6441 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6442 dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
6445 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6446 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6448 gmx_fatal_collective(FARGS,cr,NULL,
6449 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6450 dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
6452 if (cr->npmenodes > dd->nnodes)
6454 gmx_fatal_collective(FARGS,cr,NULL,
6455 "The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
6457 if (cr->npmenodes > 0)
6459 comm->npmenodes = cr->npmenodes;
6461 else
6463 comm->npmenodes = dd->nnodes;
6466 if (EEL_PME(ir->coulombtype))
6468 /* The following choices should match those
6469 * in comm_cost_est in domdec_setup.c.
6470 * Note that here the checks have to take into account
6471 * that the decomposition might occur in a different order than xyz
6472 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6473 * in which case they will not match those in comm_cost_est,
6474 * but since that is mainly for testing purposes that's fine.
6476 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
6477 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6478 getenv("GMX_PMEONEDD") == NULL)
6480 comm->npmedecompdim = 2;
6481 comm->npmenodes_x = dd->nc[XX];
6482 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
6484 else
6486 /* In case nc is 1 in both x and y we could still choose to
6487 * decompose pme in y instead of x, but we use x for simplicity.
6489 comm->npmedecompdim = 1;
6490 if (dd->dim[0] == YY)
6492 comm->npmenodes_x = 1;
6493 comm->npmenodes_y = comm->npmenodes;
6495 else
6497 comm->npmenodes_x = comm->npmenodes;
6498 comm->npmenodes_y = 1;
6501 if (fplog)
6503 fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
6504 comm->npmenodes_x,comm->npmenodes_y,1);
6507 else
6509 comm->npmedecompdim = 0;
6510 comm->npmenodes_x = 0;
6511 comm->npmenodes_y = 0;
6514 /* Technically we don't need both of these,
6515 * but it simplifies code not having to recalculate it.
6517 *npme_x = comm->npmenodes_x;
6518 *npme_y = comm->npmenodes_y;
6520 snew(comm->slb_frac,DIM);
6521 if (comm->eDLB == edlbNO)
6523 comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
6524 comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
6525 comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
6528 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6530 if (comm->bBondComm || comm->eDLB != edlbNO)
6532 /* Set the bonded communication distance to halfway
6533 * the minimum and the maximum,
6534 * since the extra communication cost is nearly zero.
6536 acs = average_cellsize_min(dd,ddbox);
6537 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6538 if (comm->eDLB != edlbNO)
6540 /* Check if this does not limit the scaling */
6541 comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
6543 if (!comm->bBondComm)
6545 /* Without bBondComm do not go beyond the n.b. cut-off */
6546 comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
6547 if (comm->cellsize_limit >= comm->cutoff)
6549 /* We don't loose a lot of efficieny
6550 * when increasing it to the n.b. cut-off.
6551 * It can even be slightly faster, because we need
6552 * less checks for the communication setup.
6554 comm->cutoff_mbody = comm->cutoff;
6557 /* Check if we did not end up below our original limit */
6558 comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
6560 if (comm->cutoff_mbody > comm->cellsize_limit)
6562 comm->cellsize_limit = comm->cutoff_mbody;
6565 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6568 if (debug)
6570 fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
6571 "cellsize limit %f\n",
6572 comm->bBondComm,comm->cellsize_limit);
6575 if (MASTER(cr))
6577 check_dd_restrictions(cr,dd,ir,fplog);
6580 comm->partition_step = INT_MIN;
6581 dd->ddp_count = 0;
6583 clear_dd_cycle_counts(dd);
6585 return dd;
6588 static void set_dlb_limits(gmx_domdec_t *dd)
6591 int d;
6593 for(d=0; d<dd->ndim; d++)
6595 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
6596 dd->comm->cellsize_min[dd->dim[d]] =
6597 dd->comm->cellsize_min_dlb[dd->dim[d]];
6602 static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
6604 gmx_domdec_t *dd;
6605 gmx_domdec_comm_t *comm;
6606 real cellsize_min;
6607 int d,nc,i;
6608 char buf[STRLEN];
6610 dd = cr->dd;
6611 comm = dd->comm;
6613 if (fplog)
6615 fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
6618 cellsize_min = comm->cellsize_min[dd->dim[0]];
6619 for(d=1; d<dd->ndim; d++)
6621 cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
6624 if (cellsize_min < comm->cellsize_limit*1.05)
6626 dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6628 /* Change DLB from "auto" to "no". */
6629 comm->eDLB = edlbNO;
6631 return;
6634 dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
6635 comm->bDynLoadBal = TRUE;
6636 dd->bGridJump = TRUE;
6638 set_dlb_limits(dd);
6640 /* We can set the required cell size info here,
6641 * so we do not need to communicate this.
6642 * The grid is completely uniform.
6644 for(d=0; d<dd->ndim; d++)
6646 if (comm->root[d])
6648 comm->load[d].sum_m = comm->load[d].sum;
6650 nc = dd->nc[dd->dim[d]];
6651 for(i=0; i<nc; i++)
6653 comm->root[d]->cell_f[i] = i/(real)nc;
6654 if (d > 0)
6656 comm->root[d]->cell_f_max0[i] = i /(real)nc;
6657 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
6660 comm->root[d]->cell_f[nc] = 1.0;
6665 static char *init_bLocalCG(gmx_mtop_t *mtop)
6667 int ncg,cg;
6668 char *bLocalCG;
6670 ncg = ncg_mtop(mtop);
6671 snew(bLocalCG,ncg);
6672 for(cg=0; cg<ncg; cg++)
6674 bLocalCG[cg] = FALSE;
6677 return bLocalCG;
6680 void dd_init_bondeds(FILE *fplog,
6681 gmx_domdec_t *dd,gmx_mtop_t *mtop,
6682 gmx_vsite_t *vsite,gmx_constr_t constr,
6683 t_inputrec *ir,gmx_bool bBCheck,cginfo_mb_t *cginfo_mb)
6685 gmx_domdec_comm_t *comm;
6686 gmx_bool bBondComm;
6687 int d;
6689 dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
6691 comm = dd->comm;
6693 if (comm->bBondComm)
6695 /* Communicate atoms beyond the cut-off for bonded interactions */
6696 comm = dd->comm;
6698 comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
6700 comm->bLocalCG = init_bLocalCG(mtop);
6702 else
6704 /* Only communicate atoms based on cut-off */
6705 comm->cglink = NULL;
6706 comm->bLocalCG = NULL;
6710 static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
6711 t_inputrec *ir,
6712 gmx_bool bDynLoadBal,real dlb_scale,
6713 gmx_ddbox_t *ddbox)
6715 gmx_domdec_comm_t *comm;
6716 int d;
6717 ivec np;
6718 real limit,shrink;
6719 char buf[64];
6721 if (fplog == NULL)
6723 return;
6726 comm = dd->comm;
6728 if (bDynLoadBal)
6730 fprintf(fplog,"The maximum number of communication pulses is:");
6731 for(d=0; d<dd->ndim; d++)
6733 fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
6735 fprintf(fplog,"\n");
6736 fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
6737 fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
6738 fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
6739 for(d=0; d<DIM; d++)
6741 if (dd->nc[d] > 1)
6743 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
6745 shrink = 0;
6747 else
6749 shrink =
6750 comm->cellsize_min_dlb[d]/
6751 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6753 fprintf(fplog," %c %.2f",dim2char(d),shrink);
6756 fprintf(fplog,"\n");
6758 else
6760 set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
6761 fprintf(fplog,"The initial number of communication pulses is:");
6762 for(d=0; d<dd->ndim; d++)
6764 fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
6766 fprintf(fplog,"\n");
6767 fprintf(fplog,"The initial domain decomposition cell size is:");
6768 for(d=0; d<DIM; d++) {
6769 if (dd->nc[d] > 1)
6771 fprintf(fplog," %c %.2f nm",
6772 dim2char(d),dd->comm->cellsize_min[d]);
6775 fprintf(fplog,"\n\n");
6778 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
6780 fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
6781 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6782 "non-bonded interactions","",comm->cutoff);
6784 if (bDynLoadBal)
6786 limit = dd->comm->cellsize_limit;
6788 else
6790 if (dynamic_dd_box(ddbox,ir))
6792 fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
6794 limit = dd->comm->cellsize_min[XX];
6795 for(d=1; d<DIM; d++)
6797 limit = min(limit,dd->comm->cellsize_min[d]);
6801 if (comm->bInterCGBondeds)
6803 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6804 "two-body bonded interactions","(-rdd)",
6805 max(comm->cutoff,comm->cutoff_mbody));
6806 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6807 "multi-body bonded interactions","(-rdd)",
6808 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
6810 if (dd->vsite_comm)
6812 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6813 "virtual site constructions","(-rcon)",limit);
6815 if (dd->constraint_comm)
6817 sprintf(buf,"atoms separated by up to %d constraints",
6818 1+ir->nProjOrder);
6819 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6820 buf,"(-rcon)",limit);
6822 fprintf(fplog,"\n");
6825 fflush(fplog);
6828 void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
6829 t_inputrec *ir,t_forcerec *fr,
6830 gmx_ddbox_t *ddbox)
6832 gmx_domdec_comm_t *comm;
6833 int d,dim,npulse,npulse_d_max,npulse_d;
6834 gmx_bool bNoCutOff;
6835 int natoms_tot;
6836 real vol_frac;
6838 comm = dd->comm;
6840 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
6842 if (EEL_PME(ir->coulombtype))
6844 init_ddpme(dd,&comm->ddpme[0],0);
6845 if (comm->npmedecompdim >= 2)
6847 init_ddpme(dd,&comm->ddpme[1],1);
6850 else
6852 comm->npmenodes = 0;
6853 if (dd->pme_nodeid >= 0)
6855 gmx_fatal_collective(FARGS,NULL,dd,
6856 "Can not have separate PME nodes without PME electrostatics");
6860 /* If each molecule is a single charge group
6861 * or we use domain decomposition for each periodic dimension,
6862 * we do not need to take pbc into account for the bonded interactions.
6864 if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
6865 (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
6867 fr->bMolPBC = FALSE;
6869 else
6871 fr->bMolPBC = TRUE;
6874 if (debug)
6876 fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
6878 if (comm->eDLB != edlbNO)
6880 /* Determine the maximum number of comm. pulses in one dimension */
6882 comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
6884 /* Determine the maximum required number of grid pulses */
6885 if (comm->cellsize_limit >= comm->cutoff)
6887 /* Only a single pulse is required */
6888 npulse = 1;
6890 else if (!bNoCutOff && comm->cellsize_limit > 0)
6892 /* We round down slightly here to avoid overhead due to the latency
6893 * of extra communication calls when the cut-off
6894 * would be only slightly longer than the cell size.
6895 * Later cellsize_limit is redetermined,
6896 * so we can not miss interactions due to this rounding.
6898 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
6900 else
6902 /* There is no cell size limit */
6903 npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
6906 if (!bNoCutOff && npulse > 1)
6908 /* See if we can do with less pulses, based on dlb_scale */
6909 npulse_d_max = 0;
6910 for(d=0; d<dd->ndim; d++)
6912 dim = dd->dim[d];
6913 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
6914 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
6915 npulse_d_max = max(npulse_d_max,npulse_d);
6917 npulse = min(npulse,npulse_d_max);
6920 /* This env var can override npulse */
6921 d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
6922 if (d > 0)
6924 npulse = d;
6927 comm->maxpulse = 1;
6928 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
6929 for(d=0; d<dd->ndim; d++)
6931 comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
6932 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
6933 snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
6934 comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
6935 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
6937 comm->bVacDLBNoLimit = FALSE;
6941 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6942 if (!comm->bVacDLBNoLimit)
6944 comm->cellsize_limit = max(comm->cellsize_limit,
6945 comm->cutoff/comm->maxpulse);
6947 comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
6948 /* Set the minimum cell size for each DD dimension */
6949 for(d=0; d<dd->ndim; d++)
6951 if (comm->bVacDLBNoLimit ||
6952 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
6954 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
6956 else
6958 comm->cellsize_min_dlb[dd->dim[d]] =
6959 comm->cutoff/comm->cd[d].np_dlb;
6962 if (comm->cutoff_mbody <= 0)
6964 comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
6966 if (comm->bDynLoadBal)
6968 set_dlb_limits(dd);
6972 print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
6973 if (comm->eDLB == edlbAUTO)
6975 if (fplog)
6977 fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
6979 print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
6982 if (ir->ePBC == epbcNONE)
6984 vol_frac = 1 - 1/(double)dd->nnodes;
6986 else
6988 vol_frac =
6989 (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
6991 if (debug)
6993 fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
6995 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
6997 dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
7000 static void merge_cg_buffers(int ncell,
7001 gmx_domdec_comm_dim_t *cd, int pulse,
7002 int *ncg_cell,
7003 int *index_gl, int *recv_i,
7004 rvec *cg_cm, rvec *recv_vr,
7005 int *cgindex,
7006 cginfo_mb_t *cginfo_mb,int *cginfo)
7008 gmx_domdec_ind_t *ind,*ind_p;
7009 int p,cell,c,cg,cg0,cg1,cg_gl,nat;
7010 int shift,shift_at;
7012 ind = &cd->ind[pulse];
7014 /* First correct the already stored data */
7015 shift = ind->nrecv[ncell];
7016 for(cell=ncell-1; cell>=0; cell--)
7018 shift -= ind->nrecv[cell];
7019 if (shift > 0)
7021 /* Move the cg's present from previous grid pulses */
7022 cg0 = ncg_cell[ncell+cell];
7023 cg1 = ncg_cell[ncell+cell+1];
7024 cgindex[cg1+shift] = cgindex[cg1];
7025 for(cg=cg1-1; cg>=cg0; cg--)
7027 index_gl[cg+shift] = index_gl[cg];
7028 copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
7029 cgindex[cg+shift] = cgindex[cg];
7030 cginfo[cg+shift] = cginfo[cg];
7032 /* Correct the already stored send indices for the shift */
7033 for(p=1; p<=pulse; p++)
7035 ind_p = &cd->ind[p];
7036 cg0 = 0;
7037 for(c=0; c<cell; c++)
7039 cg0 += ind_p->nsend[c];
7041 cg1 = cg0 + ind_p->nsend[cell];
7042 for(cg=cg0; cg<cg1; cg++)
7044 ind_p->index[cg] += shift;
7050 /* Merge in the communicated buffers */
7051 shift = 0;
7052 shift_at = 0;
7053 cg0 = 0;
7054 for(cell=0; cell<ncell; cell++)
7056 cg1 = ncg_cell[ncell+cell+1] + shift;
7057 if (shift_at > 0)
7059 /* Correct the old cg indices */
7060 for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
7062 cgindex[cg+1] += shift_at;
7065 for(cg=0; cg<ind->nrecv[cell]; cg++)
7067 /* Copy this charge group from the buffer */
7068 index_gl[cg1] = recv_i[cg0];
7069 copy_rvec(recv_vr[cg0],cg_cm[cg1]);
7070 /* Add it to the cgindex */
7071 cg_gl = index_gl[cg1];
7072 cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
7073 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
7074 cgindex[cg1+1] = cgindex[cg1] + nat;
7075 cg0++;
7076 cg1++;
7077 shift_at += nat;
7079 shift += ind->nrecv[cell];
7080 ncg_cell[ncell+cell+1] = cg1;
7084 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
7085 int nzone,int cg0,const int *cgindex)
7087 int cg,zone,p;
7089 /* Store the atom block boundaries for easy copying of communication buffers
7091 cg = cg0;
7092 for(zone=0; zone<nzone; zone++)
7094 for(p=0; p<cd->np; p++) {
7095 cd->ind[p].cell2at0[zone] = cgindex[cg];
7096 cg += cd->ind[p].nrecv[zone];
7097 cd->ind[p].cell2at1[zone] = cgindex[cg];
7102 static gmx_bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
7104 int i;
7105 gmx_bool bMiss;
7107 bMiss = FALSE;
7108 for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
7110 if (!bLocalCG[link->a[i]])
7112 bMiss = TRUE;
7116 return bMiss;
7119 static void setup_dd_communication(gmx_domdec_t *dd,
7120 matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
7122 int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
7123 int nzone,nzone_send,zone,zonei,cg0,cg1;
7124 int c,i,j,cg,cg_gl,nrcg;
7125 int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
7126 gmx_domdec_comm_t *comm;
7127 gmx_domdec_zones_t *zones;
7128 gmx_domdec_comm_dim_t *cd;
7129 gmx_domdec_ind_t *ind;
7130 cginfo_mb_t *cginfo_mb;
7131 gmx_bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
7132 real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
7133 rvec rb,rn;
7134 real corner[DIM][4],corner_round_0=0,corner_round_1[4];
7135 real bcorner[DIM],bcorner_round_1=0;
7136 ivec tric_dist;
7137 rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
7138 real skew_fac2_d,skew_fac_01;
7139 rvec sf2_round;
7140 int nsend,nat;
7142 if (debug)
7144 fprintf(debug,"Setting up DD communication\n");
7147 comm = dd->comm;
7148 cg_cm = fr->cg_cm;
7150 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
7152 dim = dd->dim[dim_ind];
7154 /* Check if we need to use triclinic distances */
7155 tric_dist[dim_ind] = 0;
7156 for(i=0; i<=dim_ind; i++)
7158 if (ddbox->tric_dir[dd->dim[i]])
7160 tric_dist[dim_ind] = 1;
7165 bBondComm = comm->bBondComm;
7167 /* Do we need to determine extra distances for multi-body bondeds? */
7168 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
7170 /* Do we need to determine extra distances for only two-body bondeds? */
7171 bDist2B = (bBondComm && !bDistMB);
7173 r_comm2 = sqr(comm->cutoff);
7174 r_bcomm2 = sqr(comm->cutoff_mbody);
7176 if (debug)
7178 fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
7181 zones = &comm->zones;
7183 dim0 = dd->dim[0];
7184 /* The first dimension is equal for all cells */
7185 corner[0][0] = comm->cell_x0[dim0];
7186 if (bDistMB)
7188 bcorner[0] = corner[0][0];
7190 if (dd->ndim >= 2)
7192 dim1 = dd->dim[1];
7193 /* This cell row is only seen from the first row */
7194 corner[1][0] = comm->cell_x0[dim1];
7195 /* All rows can see this row */
7196 corner[1][1] = comm->cell_x0[dim1];
7197 if (dd->bGridJump)
7199 corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
7200 if (bDistMB)
7202 /* For the multi-body distance we need the maximum */
7203 bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
7206 /* Set the upper-right corner for rounding */
7207 corner_round_0 = comm->cell_x1[dim0];
7209 if (dd->ndim >= 3)
7211 dim2 = dd->dim[2];
7212 for(j=0; j<4; j++)
7214 corner[2][j] = comm->cell_x0[dim2];
7216 if (dd->bGridJump)
7218 /* Use the maximum of the i-cells that see a j-cell */
7219 for(i=0; i<zones->nizone; i++)
7221 for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
7223 if (j >= 4)
7225 corner[2][j-4] =
7226 max(corner[2][j-4],
7227 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7231 if (bDistMB)
7233 /* For the multi-body distance we need the maximum */
7234 bcorner[2] = comm->cell_x0[dim2];
7235 for(i=0; i<2; i++)
7237 for(j=0; j<2; j++)
7239 bcorner[2] = max(bcorner[2],
7240 comm->zone_d2[i][j].p1_0);
7246 /* Set the upper-right corner for rounding */
7247 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7248 * Only cell (0,0,0) can see cell 7 (1,1,1)
7250 corner_round_1[0] = comm->cell_x1[dim1];
7251 corner_round_1[3] = comm->cell_x1[dim1];
7252 if (dd->bGridJump)
7254 corner_round_1[0] = max(comm->cell_x1[dim1],
7255 comm->zone_d1[1].mch1);
7256 if (bDistMB)
7258 /* For the multi-body distance we need the maximum */
7259 bcorner_round_1 = max(comm->cell_x1[dim1],
7260 comm->zone_d1[1].p1_1);
7266 /* Triclinic stuff */
7267 normal = ddbox->normal;
7268 skew_fac_01 = 0;
7269 if (dd->ndim >= 2)
7271 v_0 = ddbox->v[dim0];
7272 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
7274 /* Determine the coupling coefficient for the distances
7275 * to the cell planes along dim0 and dim1 through dim2.
7276 * This is required for correct rounding.
7278 skew_fac_01 =
7279 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
7280 if (debug)
7282 fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
7286 if (dd->ndim >= 3)
7288 v_1 = ddbox->v[dim1];
7291 zone_cg_range = zones->cg_range;
7292 index_gl = dd->index_gl;
7293 cgindex = dd->cgindex;
7294 cginfo_mb = fr->cginfo_mb;
7296 zone_cg_range[0] = 0;
7297 zone_cg_range[1] = dd->ncg_home;
7298 comm->zone_ncg1[0] = dd->ncg_home;
7299 pos_cg = dd->ncg_home;
7301 nat_tot = dd->nat_home;
7302 nzone = 1;
7303 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
7305 dim = dd->dim[dim_ind];
7306 cd = &comm->cd[dim_ind];
7308 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
7310 /* No pbc in this dimension, the first node should not comm. */
7311 nzone_send = 0;
7313 else
7315 nzone_send = nzone;
7318 bScrew = (dd->bScrewPBC && dim == XX);
7320 v_d = ddbox->v[dim];
7321 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
7323 cd->bInPlace = TRUE;
7324 for(p=0; p<cd->np; p++)
7326 /* Only atoms communicated in the first pulse are used
7327 * for multi-body bonded interactions or for bBondComm.
7329 bDistBonded = ((bDistMB || bDist2B) && p == 0);
7330 bDistMB_pulse = (bDistMB && bDistBonded);
7332 ind = &cd->ind[p];
7333 nsend = 0;
7334 nat = 0;
7335 for(zone=0; zone<nzone_send; zone++)
7337 if (tric_dist[dim_ind] && dim_ind > 0)
7339 /* Determine slightly more optimized skew_fac's
7340 * for rounding.
7341 * This reduces the number of communicated atoms
7342 * by about 10% for 3D DD of rhombic dodecahedra.
7344 for(dimd=0; dimd<dim; dimd++)
7346 sf2_round[dimd] = 1;
7347 if (ddbox->tric_dir[dimd])
7349 for(i=dd->dim[dimd]+1; i<DIM; i++)
7351 /* If we are shifted in dimension i
7352 * and the cell plane is tilted forward
7353 * in dimension i, skip this coupling.
7355 if (!(zones->shift[nzone+zone][i] &&
7356 ddbox->v[dimd][i][dimd] >= 0))
7358 sf2_round[dimd] +=
7359 sqr(ddbox->v[dimd][i][dimd]);
7362 sf2_round[dimd] = 1/sf2_round[dimd];
7367 zonei = zone_perm[dim_ind][zone];
7368 if (p == 0)
7370 /* Here we permutate the zones to obtain a convenient order
7371 * for neighbor searching
7373 cg0 = zone_cg_range[zonei];
7374 cg1 = zone_cg_range[zonei+1];
7376 else
7378 /* Look only at the cg's received in the previous grid pulse
7380 cg1 = zone_cg_range[nzone+zone+1];
7381 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
7383 ind->nsend[zone] = 0;
7384 for(cg=cg0; cg<cg1; cg++)
7386 r2 = 0;
7387 rb2 = 0;
7388 if (tric_dist[dim_ind] == 0)
7390 /* Rectangular direction, easy */
7391 r = cg_cm[cg][dim] - corner[dim_ind][zone];
7392 if (r > 0)
7394 r2 += r*r;
7396 if (bDistMB_pulse)
7398 r = cg_cm[cg][dim] - bcorner[dim_ind];
7399 if (r > 0)
7401 rb2 += r*r;
7404 /* Rounding gives at most a 16% reduction
7405 * in communicated atoms
7407 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7409 r = cg_cm[cg][dim0] - corner_round_0;
7410 /* This is the first dimension, so always r >= 0 */
7411 r2 += r*r;
7412 if (bDistMB_pulse)
7414 rb2 += r*r;
7417 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7419 r = cg_cm[cg][dim1] - corner_round_1[zone];
7420 if (r > 0)
7422 r2 += r*r;
7424 if (bDistMB_pulse)
7426 r = cg_cm[cg][dim1] - bcorner_round_1;
7427 if (r > 0)
7429 rb2 += r*r;
7434 else
7436 /* Triclinic direction, more complicated */
7437 clear_rvec(rn);
7438 clear_rvec(rb);
7439 /* Rounding, conservative as the skew_fac multiplication
7440 * will slightly underestimate the distance.
7442 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7444 rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
7445 for(i=dim0+1; i<DIM; i++)
7447 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7449 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7450 if (bDistMB_pulse)
7452 rb[dim0] = rn[dim0];
7453 rb2 = r2;
7455 /* Take care that the cell planes along dim0 might not
7456 * be orthogonal to those along dim1 and dim2.
7458 for(i=1; i<=dim_ind; i++)
7460 dimd = dd->dim[i];
7461 if (normal[dim0][dimd] > 0)
7463 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7464 if (bDistMB_pulse)
7466 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7471 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7473 rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
7474 tric_sh = 0;
7475 for(i=dim1+1; i<DIM; i++)
7477 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7479 rn[dim1] += tric_sh;
7480 if (rn[dim1] > 0)
7482 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7483 /* Take care of coupling of the distances
7484 * to the planes along dim0 and dim1 through dim2.
7486 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7487 /* Take care that the cell planes along dim1
7488 * might not be orthogonal to that along dim2.
7490 if (normal[dim1][dim2] > 0)
7492 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7495 if (bDistMB_pulse)
7497 rb[dim1] +=
7498 cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
7499 if (rb[dim1] > 0)
7501 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7502 /* Take care of coupling of the distances
7503 * to the planes along dim0 and dim1 through dim2.
7505 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7506 /* Take care that the cell planes along dim1
7507 * might not be orthogonal to that along dim2.
7509 if (normal[dim1][dim2] > 0)
7511 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7516 /* The distance along the communication direction */
7517 rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
7518 tric_sh = 0;
7519 for(i=dim+1; i<DIM; i++)
7521 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7523 rn[dim] += tric_sh;
7524 if (rn[dim] > 0)
7526 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7527 /* Take care of coupling of the distances
7528 * to the planes along dim0 and dim1 through dim2.
7530 if (dim_ind == 1 && zonei == 1)
7532 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7535 if (bDistMB_pulse)
7537 clear_rvec(rb);
7538 rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
7539 if (rb[dim] > 0)
7541 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7542 /* Take care of coupling of the distances
7543 * to the planes along dim0 and dim1 through dim2.
7545 if (dim_ind == 1 && zonei == 1)
7547 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7553 if (r2 < r_comm2 ||
7554 (bDistBonded &&
7555 ((bDistMB && rb2 < r_bcomm2) ||
7556 (bDist2B && r2 < r_bcomm2)) &&
7557 (!bBondComm ||
7558 (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
7559 missing_link(comm->cglink,index_gl[cg],
7560 comm->bLocalCG)))))
7562 /* Make an index to the local charge groups */
7563 if (nsend+1 > ind->nalloc)
7565 ind->nalloc = over_alloc_large(nsend+1);
7566 srenew(ind->index,ind->nalloc);
7568 if (nsend+1 > comm->nalloc_int)
7570 comm->nalloc_int = over_alloc_large(nsend+1);
7571 srenew(comm->buf_int,comm->nalloc_int);
7573 ind->index[nsend] = cg;
7574 comm->buf_int[nsend] = index_gl[cg];
7575 ind->nsend[zone]++;
7576 vec_rvec_check_alloc(&comm->vbuf,nsend+1);
7578 if (dd->ci[dim] == 0)
7580 /* Correct cg_cm for pbc */
7581 rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
7582 if (bScrew)
7584 comm->vbuf.v[nsend][YY] =
7585 box[YY][YY]-comm->vbuf.v[nsend][YY];
7586 comm->vbuf.v[nsend][ZZ] =
7587 box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
7590 else
7592 copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
7594 nsend++;
7595 nat += cgindex[cg+1] - cgindex[cg];
7599 /* Clear the counts in case we do not have pbc */
7600 for(zone=nzone_send; zone<nzone; zone++)
7602 ind->nsend[zone] = 0;
7604 ind->nsend[nzone] = nsend;
7605 ind->nsend[nzone+1] = nat;
7606 /* Communicate the number of cg's and atoms to receive */
7607 dd_sendrecv_int(dd, dim_ind, dddirBackward,
7608 ind->nsend, nzone+2,
7609 ind->nrecv, nzone+2);
7611 /* The rvec buffer is also required for atom buffers of size nsend
7612 * in dd_move_x and dd_move_f.
7614 vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
7616 if (p > 0)
7618 /* We can receive in place if only the last zone is not empty */
7619 for(zone=0; zone<nzone-1; zone++)
7621 if (ind->nrecv[zone] > 0)
7623 cd->bInPlace = FALSE;
7626 if (!cd->bInPlace)
7628 /* The int buffer is only required here for the cg indices */
7629 if (ind->nrecv[nzone] > comm->nalloc_int2)
7631 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
7632 srenew(comm->buf_int2,comm->nalloc_int2);
7634 /* The rvec buffer is also required for atom buffers
7635 * of size nrecv in dd_move_x and dd_move_f.
7637 i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
7638 vec_rvec_check_alloc(&comm->vbuf2,i);
7642 /* Make space for the global cg indices */
7643 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
7644 || dd->cg_nalloc == 0)
7646 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
7647 srenew(index_gl,dd->cg_nalloc);
7648 srenew(cgindex,dd->cg_nalloc+1);
7650 /* Communicate the global cg indices */
7651 if (cd->bInPlace)
7653 recv_i = index_gl + pos_cg;
7655 else
7657 recv_i = comm->buf_int2;
7659 dd_sendrecv_int(dd, dim_ind, dddirBackward,
7660 comm->buf_int, nsend,
7661 recv_i, ind->nrecv[nzone]);
7663 /* Make space for cg_cm */
7664 if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
7666 dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
7667 cg_cm = fr->cg_cm;
7669 /* Communicate cg_cm */
7670 if (cd->bInPlace)
7672 recv_vr = cg_cm + pos_cg;
7674 else
7676 recv_vr = comm->vbuf2.v;
7678 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
7679 comm->vbuf.v, nsend,
7680 recv_vr, ind->nrecv[nzone]);
7682 /* Make the charge group index */
7683 if (cd->bInPlace)
7685 zone = (p == 0 ? 0 : nzone - 1);
7686 while (zone < nzone)
7688 for(cg=0; cg<ind->nrecv[zone]; cg++)
7690 cg_gl = index_gl[pos_cg];
7691 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
7692 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
7693 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
7694 if (bBondComm)
7696 /* Update the charge group presence,
7697 * so we can use it in the next pass of the loop.
7699 comm->bLocalCG[cg_gl] = TRUE;
7701 pos_cg++;
7703 if (p == 0)
7705 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
7707 zone++;
7708 zone_cg_range[nzone+zone] = pos_cg;
7711 else
7713 /* This part of the code is never executed with bBondComm. */
7714 merge_cg_buffers(nzone,cd,p,zone_cg_range,
7715 index_gl,recv_i,cg_cm,recv_vr,
7716 cgindex,fr->cginfo_mb,fr->cginfo);
7717 pos_cg += ind->nrecv[nzone];
7719 nat_tot += ind->nrecv[nzone+1];
7721 if (!cd->bInPlace)
7723 /* Store the atom block for easy copying of communication buffers */
7724 make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
7726 nzone += nzone;
7728 dd->index_gl = index_gl;
7729 dd->cgindex = cgindex;
7731 dd->ncg_tot = zone_cg_range[zones->n];
7732 dd->nat_tot = nat_tot;
7733 comm->nat[ddnatHOME] = dd->nat_home;
7734 for(i=ddnatZONE; i<ddnatNR; i++)
7736 comm->nat[i] = dd->nat_tot;
7739 if (!bBondComm)
7741 /* We don't need to update cginfo, since that was alrady done above.
7742 * So we pass NULL for the forcerec.
7744 dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
7745 NULL,comm->bLocalCG);
7748 if (debug)
7750 fprintf(debug,"Finished setting up DD communication, zones:");
7751 for(c=0; c<zones->n; c++)
7753 fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
7755 fprintf(debug,"\n");
7759 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
7761 int c;
7763 for(c=0; c<zones->nizone; c++)
7765 zones->izone[c].cg1 = zones->cg_range[c+1];
7766 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
7767 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
7771 static int comp_cgsort(const void *a,const void *b)
7773 int comp;
7775 gmx_cgsort_t *cga,*cgb;
7776 cga = (gmx_cgsort_t *)a;
7777 cgb = (gmx_cgsort_t *)b;
7779 comp = cga->nsc - cgb->nsc;
7780 if (comp == 0)
7782 comp = cga->ind_gl - cgb->ind_gl;
7785 return comp;
7788 static void order_int_cg(int n,gmx_cgsort_t *sort,
7789 int *a,int *buf)
7791 int i;
7793 /* Order the data */
7794 for(i=0; i<n; i++)
7796 buf[i] = a[sort[i].ind];
7799 /* Copy back to the original array */
7800 for(i=0; i<n; i++)
7802 a[i] = buf[i];
7806 static void order_vec_cg(int n,gmx_cgsort_t *sort,
7807 rvec *v,rvec *buf)
7809 int i;
7811 /* Order the data */
7812 for(i=0; i<n; i++)
7814 copy_rvec(v[sort[i].ind],buf[i]);
7817 /* Copy back to the original array */
7818 for(i=0; i<n; i++)
7820 copy_rvec(buf[i],v[i]);
7824 static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
7825 rvec *v,rvec *buf)
7827 int a,atot,cg,cg0,cg1,i;
7829 /* Order the data */
7830 a = 0;
7831 for(cg=0; cg<ncg; cg++)
7833 cg0 = cgindex[sort[cg].ind];
7834 cg1 = cgindex[sort[cg].ind+1];
7835 for(i=cg0; i<cg1; i++)
7837 copy_rvec(v[i],buf[a]);
7838 a++;
7841 atot = a;
7843 /* Copy back to the original array */
7844 for(a=0; a<atot; a++)
7846 copy_rvec(buf[a],v[a]);
7850 static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
7851 int nsort_new,gmx_cgsort_t *sort_new,
7852 gmx_cgsort_t *sort1)
7854 int i1,i2,i_new;
7856 /* The new indices are not very ordered, so we qsort them */
7857 qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
7859 /* sort2 is already ordered, so now we can merge the two arrays */
7860 i1 = 0;
7861 i2 = 0;
7862 i_new = 0;
7863 while(i2 < nsort2 || i_new < nsort_new)
7865 if (i2 == nsort2)
7867 sort1[i1++] = sort_new[i_new++];
7869 else if (i_new == nsort_new)
7871 sort1[i1++] = sort2[i2++];
7873 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
7874 (sort2[i2].nsc == sort_new[i_new].nsc &&
7875 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
7877 sort1[i1++] = sort2[i2++];
7879 else
7881 sort1[i1++] = sort_new[i_new++];
7886 static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
7887 rvec *cgcm,t_forcerec *fr,t_state *state,
7888 int ncg_home_old)
7890 gmx_domdec_sort_t *sort;
7891 gmx_cgsort_t *cgsort,*sort_i;
7892 int ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
7893 rvec *vbuf;
7895 sort = dd->comm->sort;
7897 if (dd->ncg_home > sort->sort_nalloc)
7899 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
7900 srenew(sort->sort1,sort->sort_nalloc);
7901 srenew(sort->sort2,sort->sort_nalloc);
7904 if (ncg_home_old >= 0)
7906 /* The charge groups that remained in the same ns grid cell
7907 * are completely ordered. So we can sort efficiently by sorting
7908 * the charge groups that did move into the stationary list.
7910 ncg_new = 0;
7911 nsort2 = 0;
7912 nsort_new = 0;
7913 for(i=0; i<dd->ncg_home; i++)
7915 /* Check if this cg did not move to another node */
7916 cell_index = fr->ns.grid->cell_index[i];
7917 if (cell_index != 4*fr->ns.grid->ncells)
7919 if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
7921 /* This cg is new on this node or moved ns grid cell */
7922 if (nsort_new >= sort->sort_new_nalloc)
7924 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
7925 srenew(sort->sort_new,sort->sort_new_nalloc);
7927 sort_i = &(sort->sort_new[nsort_new++]);
7929 else
7931 /* This cg did not move */
7932 sort_i = &(sort->sort2[nsort2++]);
7934 /* Sort on the ns grid cell indices
7935 * and the global topology index
7937 sort_i->nsc = cell_index;
7938 sort_i->ind_gl = dd->index_gl[i];
7939 sort_i->ind = i;
7940 ncg_new++;
7943 if (debug)
7945 fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
7946 nsort2,nsort_new);
7948 /* Sort efficiently */
7949 ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
7951 else
7953 cgsort = sort->sort1;
7954 ncg_new = 0;
7955 for(i=0; i<dd->ncg_home; i++)
7957 /* Sort on the ns grid cell indices
7958 * and the global topology index
7960 cgsort[i].nsc = fr->ns.grid->cell_index[i];
7961 cgsort[i].ind_gl = dd->index_gl[i];
7962 cgsort[i].ind = i;
7963 if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
7965 ncg_new++;
7968 if (debug)
7970 fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
7972 /* Determine the order of the charge groups using qsort */
7973 qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
7975 cgsort = sort->sort1;
7977 /* We alloc with the old size, since cgindex is still old */
7978 vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
7979 vbuf = dd->comm->vbuf.v;
7981 /* Remove the charge groups which are no longer at home here */
7982 dd->ncg_home = ncg_new;
7984 /* Reorder the state */
7985 for(i=0; i<estNR; i++)
7987 if (EST_DISTR(i) && state->flags & (1<<i))
7989 switch (i)
7991 case estX:
7992 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
7993 break;
7994 case estV:
7995 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
7996 break;
7997 case estSDX:
7998 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
7999 break;
8000 case estCGP:
8001 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
8002 break;
8003 case estLD_RNG:
8004 case estLD_RNGI:
8005 case estDISRE_INITF:
8006 case estDISRE_RM3TAV:
8007 case estORIRE_INITF:
8008 case estORIRE_DTAV:
8009 /* No ordering required */
8010 break;
8011 default:
8012 gmx_incons("Unknown state entry encountered in dd_sort_state");
8013 break;
8017 /* Reorder cgcm */
8018 order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
8020 if (dd->ncg_home+1 > sort->ibuf_nalloc)
8022 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
8023 srenew(sort->ibuf,sort->ibuf_nalloc);
8025 ibuf = sort->ibuf;
8026 /* Reorder the global cg index */
8027 order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
8028 /* Reorder the cginfo */
8029 order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
8030 /* Rebuild the local cg index */
8031 ibuf[0] = 0;
8032 for(i=0; i<dd->ncg_home; i++)
8034 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
8035 ibuf[i+1] = ibuf[i] + cgsize;
8037 for(i=0; i<dd->ncg_home+1; i++)
8039 dd->cgindex[i] = ibuf[i];
8041 /* Set the home atom number */
8042 dd->nat_home = dd->cgindex[dd->ncg_home];
8044 /* Copy the sorted ns cell indices back to the ns grid struct */
8045 for(i=0; i<dd->ncg_home; i++)
8047 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
8049 fr->ns.grid->nr = dd->ncg_home;
8052 static void add_dd_statistics(gmx_domdec_t *dd)
8054 gmx_domdec_comm_t *comm;
8055 int ddnat;
8057 comm = dd->comm;
8059 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
8061 comm->sum_nat[ddnat-ddnatZONE] +=
8062 comm->nat[ddnat] - comm->nat[ddnat-1];
8064 comm->ndecomp++;
8067 void reset_dd_statistics_counters(gmx_domdec_t *dd)
8069 gmx_domdec_comm_t *comm;
8070 int ddnat;
8072 comm = dd->comm;
8074 /* Reset all the statistics and counters for total run counting */
8075 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
8077 comm->sum_nat[ddnat-ddnatZONE] = 0;
8079 comm->ndecomp = 0;
8080 comm->nload = 0;
8081 comm->load_step = 0;
8082 comm->load_sum = 0;
8083 comm->load_max = 0;
8084 clear_ivec(comm->load_lim);
8085 comm->load_mdf = 0;
8086 comm->load_pme = 0;
8089 void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
8091 gmx_domdec_comm_t *comm;
8092 int ddnat;
8093 double av;
8095 comm = cr->dd->comm;
8097 gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
8099 if (fplog == NULL)
8101 return;
8104 fprintf(fplog,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8106 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
8108 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
8109 switch(ddnat)
8111 case ddnatZONE:
8112 fprintf(fplog,
8113 " av. #atoms communicated per step for force: %d x %.1f\n",
8114 2,av);
8115 break;
8116 case ddnatVSITE:
8117 if (cr->dd->vsite_comm)
8119 fprintf(fplog,
8120 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8121 (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
8122 av);
8124 break;
8125 case ddnatCON:
8126 if (cr->dd->constraint_comm)
8128 fprintf(fplog,
8129 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
8130 1 + ir->nLincsIter,av);
8132 break;
8133 default:
8134 gmx_incons(" Unknown type for DD statistics");
8137 fprintf(fplog,"\n");
8139 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
8141 print_dd_load_av(fplog,cr->dd);
8145 void dd_partition_system(FILE *fplog,
8146 gmx_large_int_t step,
8147 t_commrec *cr,
8148 gmx_bool bMasterState,
8149 int nstglobalcomm,
8150 t_state *state_global,
8151 gmx_mtop_t *top_global,
8152 t_inputrec *ir,
8153 t_state *state_local,
8154 rvec **f,
8155 t_mdatoms *mdatoms,
8156 gmx_localtop_t *top_local,
8157 t_forcerec *fr,
8158 gmx_vsite_t *vsite,
8159 gmx_shellfc_t shellfc,
8160 gmx_constr_t constr,
8161 t_nrnb *nrnb,
8162 gmx_wallcycle_t wcycle,
8163 gmx_bool bVerbose)
8165 gmx_domdec_t *dd;
8166 gmx_domdec_comm_t *comm;
8167 gmx_ddbox_t ddbox={0};
8168 t_block *cgs_gl;
8169 gmx_large_int_t step_pcoupl;
8170 rvec cell_ns_x0,cell_ns_x1;
8171 int i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
8172 gmx_bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
8173 gmx_bool bRedist,bSortCG,bResortAll;
8174 ivec ncells_old,np;
8175 real grid_density;
8176 char sbuf[22];
8178 dd = cr->dd;
8179 comm = dd->comm;
8181 bBoxChanged = (bMasterState || DEFORM(*ir));
8182 if (ir->epc != epcNO)
8184 /* With nstcalcenery > 1 pressure coupling happens.
8185 * one step after calculating the energies.
8186 * Box scaling happens at the end of the MD step,
8187 * after the DD partitioning.
8188 * We therefore have to do DLB in the first partitioning
8189 * after an MD step where P-coupling occured.
8190 * We need to determine the last step in which p-coupling occurred.
8191 * MRS -- need to validate this for vv?
8193 n = ir->nstcalcenergy;
8194 if (n == 1)
8196 step_pcoupl = step - 1;
8198 else
8200 step_pcoupl = ((step - 1)/n)*n + 1;
8202 if (step_pcoupl >= comm->partition_step)
8204 bBoxChanged = TRUE;
8208 bNStGlobalComm = (step >= comm->partition_step + nstglobalcomm);
8210 if (!comm->bDynLoadBal)
8212 bDoDLB = FALSE;
8214 else
8216 /* Should we do dynamic load balacing this step?
8217 * Since it requires (possibly expensive) global communication,
8218 * we might want to do DLB less frequently.
8220 if (bBoxChanged || ir->epc != epcNO)
8222 bDoDLB = bBoxChanged;
8224 else
8226 bDoDLB = bNStGlobalComm;
8230 /* Check if we have recorded loads on the nodes */
8231 if (comm->bRecordLoad && dd_load_count(comm))
8233 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
8235 /* Check if we should use DLB at the second partitioning
8236 * and every 100 partitionings,
8237 * so the extra communication cost is negligible.
8239 n = max(100,nstglobalcomm);
8240 bCheckDLB = (comm->n_load_collect == 0 ||
8241 comm->n_load_have % n == n-1);
8243 else
8245 bCheckDLB = FALSE;
8248 /* Print load every nstlog, first and last step to the log file */
8249 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
8250 comm->n_load_collect == 0 ||
8251 (step + ir->nstlist > ir->init_step + ir->nsteps));
8253 /* Avoid extra communication due to verbose screen output
8254 * when nstglobalcomm is set.
8256 if (bDoDLB || bLogLoad || bCheckDLB ||
8257 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
8259 get_load_distribution(dd,wcycle);
8260 if (DDMASTER(dd))
8262 if (bLogLoad)
8264 dd_print_load(fplog,dd,step-1);
8266 if (bVerbose)
8268 dd_print_load_verbose(dd);
8271 comm->n_load_collect++;
8273 if (bCheckDLB) {
8274 /* Since the timings are node dependent, the master decides */
8275 if (DDMASTER(dd))
8277 bTurnOnDLB =
8278 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
8279 if (debug)
8281 fprintf(debug,"step %s, imb loss %f\n",
8282 gmx_step_str(step,sbuf),
8283 dd_force_imb_perf_loss(dd));
8286 dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
8287 if (bTurnOnDLB)
8289 turn_on_dlb(fplog,cr,step);
8290 bDoDLB = TRUE;
8294 comm->n_load_have++;
8297 cgs_gl = &comm->cgs_gl;
8299 bRedist = FALSE;
8300 if (bMasterState)
8302 /* Clear the old state */
8303 clear_dd_indices(dd,0,0);
8305 set_ddbox(dd,bMasterState,cr,ir,state_global->box,
8306 TRUE,cgs_gl,state_global->x,&ddbox);
8308 get_cg_distribution(fplog,step,dd,cgs_gl,
8309 state_global->box,&ddbox,state_global->x);
8311 dd_distribute_state(dd,cgs_gl,
8312 state_global,state_local,f);
8314 dd_make_local_cgs(dd,&top_local->cgs);
8316 if (dd->ncg_home > fr->cg_nalloc)
8318 dd_realloc_fr_cg(fr,dd->ncg_home);
8320 calc_cgcm(fplog,0,dd->ncg_home,
8321 &top_local->cgs,state_local->x,fr->cg_cm);
8323 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
8325 dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
8327 cg0 = 0;
8329 else if (state_local->ddp_count != dd->ddp_count)
8331 if (state_local->ddp_count > dd->ddp_count)
8333 gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
8336 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
8338 gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
8341 /* Clear the old state */
8342 clear_dd_indices(dd,0,0);
8344 /* Build the new indices */
8345 rebuild_cgindex(dd,cgs_gl->index,state_local);
8346 make_dd_indices(dd,cgs_gl->index,0);
8348 /* Redetermine the cg COMs */
8349 calc_cgcm(fplog,0,dd->ncg_home,
8350 &top_local->cgs,state_local->x,fr->cg_cm);
8352 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
8354 dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
8356 set_ddbox(dd,bMasterState,cr,ir,state_local->box,
8357 TRUE,&top_local->cgs,state_local->x,&ddbox);
8359 bRedist = comm->bDynLoadBal;
8361 else
8363 /* We have the full state, only redistribute the cgs */
8365 /* Clear the non-home indices */
8366 clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
8368 /* Avoid global communication for dim's without pbc and -gcom */
8369 if (!bNStGlobalComm)
8371 copy_rvec(comm->box0 ,ddbox.box0 );
8372 copy_rvec(comm->box_size,ddbox.box_size);
8374 set_ddbox(dd,bMasterState,cr,ir,state_local->box,
8375 bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
8377 bBoxChanged = TRUE;
8378 bRedist = TRUE;
8380 /* For dim's without pbc and -gcom */
8381 copy_rvec(ddbox.box0 ,comm->box0 );
8382 copy_rvec(ddbox.box_size,comm->box_size);
8384 set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
8385 step,wcycle);
8387 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
8389 write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
8392 /* Check if we should sort the charge groups */
8393 if (comm->nstSortCG > 0)
8395 bSortCG = (bMasterState ||
8396 (bRedist && (step % comm->nstSortCG == 0)));
8398 else
8400 bSortCG = FALSE;
8403 ncg_home_old = dd->ncg_home;
8405 if (bRedist)
8407 cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
8408 state_local,f,fr,mdatoms,
8409 !bSortCG,nrnb);
8412 get_nsgrid_boundaries(fr->ns.grid,dd,
8413 state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
8414 dd->ncg_home,fr->cg_cm,
8415 cell_ns_x0,cell_ns_x1,&grid_density);
8417 if (bBoxChanged)
8419 comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
8422 copy_ivec(fr->ns.grid->n,ncells_old);
8423 grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
8424 state_local->box,cell_ns_x0,cell_ns_x1,
8425 fr->rlistlong,grid_density);
8426 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8427 copy_ivec(ddbox.tric_dir,comm->tric_dir);
8429 if (bSortCG)
8431 /* Sort the state on charge group position.
8432 * This enables exact restarts from this step.
8433 * It also improves performance by about 15% with larger numbers
8434 * of atoms per node.
8437 /* Fill the ns grid with the home cell,
8438 * so we can sort with the indices.
8440 set_zones_ncg_home(dd);
8441 fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
8442 0,dd->ncg_home,fr->cg_cm);
8444 /* Check if we can user the old order and ns grid cell indices
8445 * of the charge groups to sort the charge groups efficiently.
8447 bResortAll = (bMasterState ||
8448 fr->ns.grid->n[XX] != ncells_old[XX] ||
8449 fr->ns.grid->n[YY] != ncells_old[YY] ||
8450 fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
8452 if (debug)
8454 fprintf(debug,"Step %s, sorting the %d home charge groups\n",
8455 gmx_step_str(step,sbuf),dd->ncg_home);
8457 dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
8458 bResortAll ? -1 : ncg_home_old);
8459 /* Rebuild all the indices */
8460 cg0 = 0;
8461 ga2la_clear(dd->ga2la);
8464 /* Setup up the communication and communicate the coordinates */
8465 setup_dd_communication(dd,state_local->box,&ddbox,fr);
8467 /* Set the indices */
8468 make_dd_indices(dd,cgs_gl->index,cg0);
8470 /* Set the charge group boundaries for neighbor searching */
8471 set_cg_boundaries(&comm->zones);
8474 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8475 -1,state_local->x,state_local->box);
8478 /* Extract a local topology from the global topology */
8479 for(i=0; i<dd->ndim; i++)
8481 np[dd->dim[i]] = comm->cd[i].np;
8483 dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
8484 comm->cellsize_min,np,
8485 fr,vsite,top_global,top_local);
8487 /* Set up the special atom communication */
8488 n = comm->nat[ddnatZONE];
8489 for(i=ddnatZONE+1; i<ddnatNR; i++)
8491 switch(i)
8493 case ddnatVSITE:
8494 if (vsite && vsite->n_intercg_vsite)
8496 n = dd_make_local_vsites(dd,n,top_local->idef.il);
8498 break;
8499 case ddnatCON:
8500 if (dd->bInterCGcons)
8502 /* Only for inter-cg constraints we need special code */
8503 n = dd_make_local_constraints(dd,n,top_global,
8504 constr,ir->nProjOrder,
8505 &top_local->idef.il[F_CONSTR]);
8507 break;
8508 default:
8509 gmx_incons("Unknown special atom type setup");
8511 comm->nat[i] = n;
8514 /* Make space for the extra coordinates for virtual site
8515 * or constraint communication.
8517 state_local->natoms = comm->nat[ddnatNR-1];
8518 if (state_local->natoms > state_local->nalloc)
8520 dd_realloc_state(state_local,f,state_local->natoms);
8523 if (fr->bF_NoVirSum)
8525 if (vsite && vsite->n_intercg_vsite)
8527 nat_f_novirsum = comm->nat[ddnatVSITE];
8529 else
8531 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
8533 nat_f_novirsum = dd->nat_tot;
8535 else
8537 nat_f_novirsum = dd->nat_home;
8541 else
8543 nat_f_novirsum = 0;
8546 /* Set the number of atoms required for the force calculation.
8547 * Forces need to be constrained when using a twin-range setup
8548 * or with energy minimization. For simple simulations we could
8549 * avoid some allocation, zeroing and copying, but this is
8550 * probably not worth the complications ande checking.
8552 forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
8553 dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
8555 /* We make the all mdatoms up to nat_tot_con.
8556 * We could save some work by only setting invmass
8557 * between nat_tot and nat_tot_con.
8559 /* This call also sets the new number of home particles to dd->nat_home */
8560 atoms2md(top_global,ir,
8561 comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
8563 /* Now we have the charges we can sort the FE interactions */
8564 dd_sort_local_top(dd,mdatoms,top_local);
8566 if (shellfc)
8568 /* Make the local shell stuff, currently no communication is done */
8569 make_local_shells(cr,mdatoms,shellfc);
8572 if (ir->implicit_solvent)
8574 make_local_gb(cr,fr->born,ir->gb_algorithm);
8577 if (!(cr->duty & DUTY_PME))
8579 /* Send the charges to our PME only node */
8580 gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
8581 mdatoms->chargeA,mdatoms->chargeB,
8582 dd_pme_maxshift_x(dd),dd_pme_maxshift_y(dd));
8585 if (constr)
8587 set_constraints(constr,top_local,ir,mdatoms,cr);
8590 if (ir->ePull != epullNO)
8592 /* Update the local pull groups */
8593 dd_make_local_pull_groups(dd,ir->pull,mdatoms);
8596 if (ir->bRot)
8598 /* Update the local rotation groups */
8599 dd_make_local_rotation_groups(dd,ir->rot);
8603 add_dd_statistics(dd);
8605 /* Make sure we only count the cycles for this DD partitioning */
8606 clear_dd_cycle_counts(dd);
8608 /* Because the order of the atoms might have changed since
8609 * the last vsite construction, we need to communicate the constructing
8610 * atom coordinates again (for spreading the forces this MD step).
8612 dd_move_x_vsites(dd,state_local->box,state_local->x);
8614 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
8616 dd_move_x(dd,state_local->box,state_local->x);
8617 write_dd_pdb("dd_dump",step,"dump",top_global,cr,
8618 -1,state_local->x,state_local->box);
8621 /* Store the partitioning step */
8622 comm->partition_step = step;
8624 /* Increase the DD partitioning counter */
8625 dd->ddp_count++;
8626 /* The state currently matches this DD partitioning count, store it */
8627 state_local->ddp_count = dd->ddp_count;
8628 if (bMasterState)
8630 /* The DD master node knows the complete cg distribution,
8631 * store the count so we can possibly skip the cg info communication.
8633 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
8636 if (comm->DD_debug > 0)
8638 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8639 check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
8640 "after partitioning");