1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
32 #include "domdec_network.h"
35 #include "chargegroup.h"
44 #include "gmx_wallcycle.h"
48 #include "mtop_util.h"
50 #include "gmx_ga2la.h"
60 #define DDRANK(dd,rank) (rank)
61 #define DDMASTERRANK(dd) (dd->masterrank)
63 typedef struct gmx_domdec_master
65 /* The cell boundaries */
67 /* The global charge group division */
68 int *ncg
; /* Number of home charge groups for each node */
69 int *index
; /* Index of nnodes+1 into cg */
70 int *cg
; /* Global charge group index */
71 int *nat
; /* Number of home atoms for each node. */
72 int *ibuf
; /* Buffer for communication */
73 rvec
*vbuf
; /* Buffer for state scattering and gathering */
74 } gmx_domdec_master_t
;
78 /* The numbers of charge groups to send and receive for each cell
79 * that requires communication, the last entry contains the total
80 * number of atoms that needs to be communicated.
82 int nsend
[DD_MAXIZONE
+2];
83 int nrecv
[DD_MAXIZONE
+2];
84 /* The charge groups to send */
87 /* The atom range for non-in-place communication */
88 int cell2at0
[DD_MAXIZONE
];
89 int cell2at1
[DD_MAXIZONE
];
94 int np
; /* Number of grid pulses in this dimension */
95 int np_dlb
; /* For dlb, for use with edlbAUTO */
96 gmx_domdec_ind_t
*ind
; /* The indices to communicate, size np */
98 bool bInPlace
; /* Can we communicate in place? */
99 } gmx_domdec_comm_dim_t
;
103 bool *bCellMin
; /* Temp. var.: is this cell size at the limit */
104 real
*cell_f
; /* State var.: cell boundaries, box relative */
105 real
*old_cell_f
; /* Temp. var.: old cell size */
106 real
*cell_f_max0
; /* State var.: max lower boundary, incl neighbors */
107 real
*cell_f_min1
; /* State var.: min upper boundary, incl neighbors */
108 real
*bound_min
; /* Temp. var.: lower limit for cell boundary */
109 real
*bound_max
; /* Temp. var.: upper limit for cell boundary */
110 bool bLimited
; /* State var.: is DLB limited in this dim and row */
111 real
*buf_ncd
; /* Temp. var. */
114 #define DD_NLOAD_MAX 9
116 /* Here floats are accurate enough, since these variables
117 * only influence the load balancing, not the actual MD results.
141 gmx_cgsort_t
*sort1
,*sort2
;
143 gmx_cgsort_t
*sort_new
;
155 /* This enum determines the order of the coordinates.
156 * ddnatHOME and ddnatZONE should be first and second,
157 * the others can be ordered as wanted.
159 enum { ddnatHOME
, ddnatZONE
, ddnatVSITE
, ddnatCON
, ddnatNR
};
161 enum { edlbAUTO
, edlbNO
, edlbYES
, edlbNR
};
162 const char *edlb_names
[edlbNR
] = { "auto", "no", "yes" };
166 int dim
; /* The dimension */
167 bool dim_match
;/* Tells if DD and PME dims match */
168 int nslab
; /* The number of PME slabs in this dimension */
169 real
*slb_dim_f
; /* Cell sizes for determining the PME comm. with SLB */
170 int *pp_min
; /* The minimum pp node location, size nslab */
171 int *pp_max
; /* The maximum pp node location,size nslab */
172 int maxshift
; /* The maximum shift for coordinate redistribution in PME */
177 real min0
; /* The minimum bottom of this zone */
178 real max1
; /* The maximum top of this zone */
179 real mch0
; /* The maximum bottom communicaton height for this zone */
180 real mch1
; /* The maximum top communicaton height for this zone */
181 real p1_0
; /* The bottom value of the first cell in this zone */
182 real p1_1
; /* The top value of the first cell in this zone */
185 typedef struct gmx_domdec_comm
187 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
188 * unless stated otherwise.
191 /* The number of decomposition dimensions for PME, 0: no PME */
193 /* The number of nodes doing PME (PP/PME or only PME) */
197 /* The communication setup including the PME only nodes */
198 bool bCartesianPP_PME
;
201 int *pmenodes
; /* size npmenodes */
202 int *ddindex2simnodeid
; /* size npmenodes, only with bCartesianPP
203 * but with bCartesianPP_PME */
204 gmx_ddpme_t ddpme
[2];
206 /* The DD particle-particle nodes only */
208 int *ddindex2ddnodeid
; /* size npmenode, only with bCartesianPP_PME */
210 /* The global charge groups */
213 /* Should we sort the cgs */
215 gmx_domdec_sort_t
*sort
;
217 /* Are there bonded and multi-body interactions between charge groups? */
218 bool bInterCGBondeds
;
219 bool bInterCGMultiBody
;
221 /* Data for the optional bonded interaction atom communication range */
228 /* Are we actually using DLB? */
231 /* Cell sizes for static load balancing, first index cartesian */
234 /* The width of the communicated boundaries */
237 /* The minimum cell size (including triclinic correction) */
239 /* For dlb, for use with edlbAUTO */
240 rvec cellsize_min_dlb
;
241 /* The lower limit for the DD cell size with DLB */
243 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
246 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
248 /* box0 and box_size are required with dim's without pbc and -gcom */
252 /* The cell boundaries */
256 /* The old location of the cell boundaries, to check cg displacements */
260 /* The communication setup and charge group boundaries for the zones */
261 gmx_domdec_zones_t zones
;
263 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
264 * cell boundaries of neighboring cells for dynamic load balancing.
266 gmx_ddzone_t zone_d1
[2];
267 gmx_ddzone_t zone_d2
[2][2];
269 /* The coordinate/force communication setup and indices */
270 gmx_domdec_comm_dim_t cd
[DIM
];
271 /* The maximum number of cells to communicate with in one dimension */
274 /* Which cg distribution is stored on the master node */
275 int master_cg_ddp_count
;
277 /* The number of cg's received from the direct neighbors */
278 int zone_ncg1
[DD_MAXZONE
];
280 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
283 /* Communication buffer for general use */
287 /* Communication buffer for general use */
290 /* Communication buffers only used with multiple grid pulses */
295 /* Communication buffers for local redistribution */
297 int cggl_flag_nalloc
[DIM
*2];
299 int cgcm_state_nalloc
[DIM
*2];
301 /* Cell sizes for dynamic load balancing */
302 gmx_domdec_root_t
**root
;
306 real cell_f_max0
[DIM
];
307 real cell_f_min1
[DIM
];
309 /* Stuff for load communication */
311 gmx_domdec_load_t
*load
;
313 MPI_Comm
*mpi_comm_load
;
316 float cycl
[ddCyclNr
];
317 int cycl_n
[ddCyclNr
];
318 float cycl_max
[ddCyclNr
];
319 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
323 /* Have often have did we have load measurements */
325 /* Have often have we collected the load measurements */
329 double sum_nat
[ddnatNR
-ddnatZONE
];
339 /* The last partition step */
340 gmx_large_int_t partition_step
;
348 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
351 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
352 #define DD_FLAG_NRCG 65535
353 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
354 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
356 /* Zone permutation required to obtain consecutive charge groups
357 * for neighbor searching.
359 static const int zone_perm
[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
361 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
362 * components see only j zones with that component 0.
365 /* The DD zone order */
366 static const ivec dd_zo
[DD_MAXZONE
] =
367 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
372 static const ivec dd_zp3
[dd_zp3n
] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
377 static const ivec dd_zp2
[dd_zp2n
] = {{0,0,4},{1,3,4}};
382 static const ivec dd_zp1
[dd_zp1n
] = {{0,0,2}};
384 /* Factors used to avoid problems due to rounding issues */
385 #define DD_CELL_MARGIN 1.0001
386 #define DD_CELL_MARGIN2 1.00005
387 /* Factor to account for pressure scaling during nstlist steps */
388 #define DD_PRES_SCALE_MARGIN 1.02
390 /* Allowed performance loss before we DLB or warn */
391 #define DD_PERF_LOSS 0.05
393 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
395 /* Use separate MPI send and receive commands
396 * when nnodes <= GMX_DD_NNODES_SENDRECV.
397 * This saves memory (and some copying for small nnodes).
398 * For high parallelization scatter and gather calls are used.
400 #define GMX_DD_NNODES_SENDRECV 4
404 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
406 static void index2xyz(ivec nc,int ind,ivec xyz)
408 xyz[XX] = ind % nc[XX];
409 xyz[YY] = (ind / nc[XX]) % nc[YY];
410 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
414 /* This order is required to minimize the coordinate communication in PME
415 * which uses decomposition in the x direction.
417 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
419 static void ddindex2xyz(ivec nc
,int ind
,ivec xyz
)
421 xyz
[XX
] = ind
/ (nc
[YY
]*nc
[ZZ
]);
422 xyz
[YY
] = (ind
/ nc
[ZZ
]) % nc
[YY
];
423 xyz
[ZZ
] = ind
% nc
[ZZ
];
426 static int ddcoord2ddnodeid(gmx_domdec_t
*dd
,ivec c
)
431 ddindex
= dd_index(dd
->nc
,c
);
432 if (dd
->comm
->bCartesianPP_PME
)
434 ddnodeid
= dd
->comm
->ddindex2ddnodeid
[ddindex
];
436 else if (dd
->comm
->bCartesianPP
)
439 MPI_Cart_rank(dd
->mpi_comm_all
,c
,&ddnodeid
);
450 static bool dynamic_dd_box(gmx_ddbox_t
*ddbox
,t_inputrec
*ir
)
452 return (ddbox
->nboundeddim
< DIM
|| DYNAMIC_BOX(*ir
));
455 int ddglatnr(gmx_domdec_t
*dd
,int i
)
465 if (i
>= dd
->comm
->nat
[ddnatNR
-1])
467 gmx_fatal(FARGS
,"glatnr called with %d, which is larger than the local number of atoms (%d)",i
,dd
->comm
->nat
[ddnatNR
-1]);
469 atnr
= dd
->gatindex
[i
] + 1;
475 t_block
*dd_charge_groups_global(gmx_domdec_t
*dd
)
477 return &dd
->comm
->cgs_gl
;
480 static void vec_rvec_init(vec_rvec_t
*v
)
486 static void vec_rvec_check_alloc(vec_rvec_t
*v
,int n
)
490 v
->nalloc
= over_alloc_dd(n
);
491 srenew(v
->v
,v
->nalloc
);
495 void dd_store_state(gmx_domdec_t
*dd
,t_state
*state
)
499 if (state
->ddp_count
!= dd
->ddp_count
)
501 gmx_incons("The state does not the domain decomposition state");
504 state
->ncg_gl
= dd
->ncg_home
;
505 if (state
->ncg_gl
> state
->cg_gl_nalloc
)
507 state
->cg_gl_nalloc
= over_alloc_dd(state
->ncg_gl
);
508 srenew(state
->cg_gl
,state
->cg_gl_nalloc
);
510 for(i
=0; i
<state
->ncg_gl
; i
++)
512 state
->cg_gl
[i
] = dd
->index_gl
[i
];
515 state
->ddp_count_cg_gl
= dd
->ddp_count
;
518 gmx_domdec_zones_t
*domdec_zones(gmx_domdec_t
*dd
)
520 return &dd
->comm
->zones
;
523 void dd_get_ns_ranges(gmx_domdec_t
*dd
,int icg
,
524 int *jcg0
,int *jcg1
,ivec shift0
,ivec shift1
)
526 gmx_domdec_zones_t
*zones
;
529 zones
= &dd
->comm
->zones
;
532 while (icg
>= zones
->izone
[izone
].cg1
)
541 else if (izone
< zones
->nizone
)
543 *jcg0
= zones
->izone
[izone
].jcg0
;
547 gmx_fatal(FARGS
,"DD icg %d out of range: izone (%d) >= nizone (%d)",
548 icg
,izone
,zones
->nizone
);
551 *jcg1
= zones
->izone
[izone
].jcg1
;
553 for(d
=0; d
<dd
->ndim
; d
++)
556 shift0
[dim
] = zones
->izone
[izone
].shift0
[dim
];
557 shift1
[dim
] = zones
->izone
[izone
].shift1
[dim
];
558 if (dd
->comm
->tric_dir
[dim
] || (dd
->bGridJump
&& d
> 0))
560 /* A conservative approach, this can be optimized */
567 int dd_natoms_vsite(gmx_domdec_t
*dd
)
569 return dd
->comm
->nat
[ddnatVSITE
];
572 void dd_get_constraint_range(gmx_domdec_t
*dd
,int *at_start
,int *at_end
)
574 *at_start
= dd
->comm
->nat
[ddnatCON
-1];
575 *at_end
= dd
->comm
->nat
[ddnatCON
];
578 void dd_move_x(gmx_domdec_t
*dd
,matrix box
,rvec x
[])
580 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
582 gmx_domdec_comm_t
*comm
;
583 gmx_domdec_comm_dim_t
*cd
;
584 gmx_domdec_ind_t
*ind
;
585 rvec shift
={0,0,0},*buf
,*rbuf
;
590 cgindex
= dd
->cgindex
;
595 nat_tot
= dd
->nat_home
;
596 for(d
=0; d
<dd
->ndim
; d
++)
598 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
599 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
602 copy_rvec(box
[dd
->dim
[d
]],shift
);
605 for(p
=0; p
<cd
->np
; p
++)
612 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
614 at0
= cgindex
[index
[i
]];
615 at1
= cgindex
[index
[i
]+1];
616 for(j
=at0
; j
<at1
; j
++)
618 copy_rvec(x
[j
],buf
[n
]);
625 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
627 at0
= cgindex
[index
[i
]];
628 at1
= cgindex
[index
[i
]+1];
629 for(j
=at0
; j
<at1
; j
++)
631 /* We need to shift the coordinates */
632 rvec_add(x
[j
],shift
,buf
[n
]);
639 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
641 at0
= cgindex
[index
[i
]];
642 at1
= cgindex
[index
[i
]+1];
643 for(j
=at0
; j
<at1
; j
++)
646 buf
[n
][XX
] = x
[j
][XX
] + shift
[XX
];
648 * This operation requires a special shift force
649 * treatment, which is performed in calc_vir.
651 buf
[n
][YY
] = box
[YY
][YY
] - x
[j
][YY
];
652 buf
[n
][ZZ
] = box
[ZZ
][ZZ
] - x
[j
][ZZ
];
664 rbuf
= comm
->vbuf2
.v
;
666 /* Send and receive the coordinates */
667 dd_sendrecv_rvec(dd
, d
, dddirBackward
,
668 buf
, ind
->nsend
[nzone
+1],
669 rbuf
, ind
->nrecv
[nzone
+1]);
673 for(zone
=0; zone
<nzone
; zone
++)
675 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
677 copy_rvec(rbuf
[j
],x
[i
]);
682 nat_tot
+= ind
->nrecv
[nzone
+1];
688 void dd_move_f(gmx_domdec_t
*dd
,rvec f
[],rvec
*fshift
)
690 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
692 gmx_domdec_comm_t
*comm
;
693 gmx_domdec_comm_dim_t
*cd
;
694 gmx_domdec_ind_t
*ind
;
702 cgindex
= dd
->cgindex
;
707 nzone
= comm
->zones
.n
/2;
708 nat_tot
= dd
->nat_tot
;
709 for(d
=dd
->ndim
-1; d
>=0; d
--)
711 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
712 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
713 if (fshift
== NULL
&& !bScrew
)
717 /* Determine which shift vector we need */
723 for(p
=cd
->np
-1; p
>=0; p
--) {
725 nat_tot
-= ind
->nrecv
[nzone
+1];
732 sbuf
= comm
->vbuf2
.v
;
734 for(zone
=0; zone
<nzone
; zone
++)
736 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
738 copy_rvec(f
[i
],sbuf
[j
]);
743 /* Communicate the forces */
744 dd_sendrecv_rvec(dd
, d
, dddirForward
,
745 sbuf
, ind
->nrecv
[nzone
+1],
746 buf
, ind
->nsend
[nzone
+1]);
748 /* Add the received forces */
752 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
754 at0
= cgindex
[index
[i
]];
755 at1
= cgindex
[index
[i
]+1];
756 for(j
=at0
; j
<at1
; j
++)
758 rvec_inc(f
[j
],buf
[n
]);
765 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
767 at0
= cgindex
[index
[i
]];
768 at1
= cgindex
[index
[i
]+1];
769 for(j
=at0
; j
<at1
; j
++)
771 rvec_inc(f
[j
],buf
[n
]);
772 /* Add this force to the shift force */
773 rvec_inc(fshift
[is
],buf
[n
]);
780 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
782 at0
= cgindex
[index
[i
]];
783 at1
= cgindex
[index
[i
]+1];
784 for(j
=at0
; j
<at1
; j
++)
786 /* Rotate the force */
787 f
[j
][XX
] += buf
[n
][XX
];
788 f
[j
][YY
] -= buf
[n
][YY
];
789 f
[j
][ZZ
] -= buf
[n
][ZZ
];
792 /* Add this force to the shift force */
793 rvec_inc(fshift
[is
],buf
[n
]);
804 void dd_atom_spread_real(gmx_domdec_t
*dd
,real v
[])
806 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
808 gmx_domdec_comm_t
*comm
;
809 gmx_domdec_comm_dim_t
*cd
;
810 gmx_domdec_ind_t
*ind
;
815 cgindex
= dd
->cgindex
;
817 buf
= &comm
->vbuf
.v
[0][0];
820 nat_tot
= dd
->nat_home
;
821 for(d
=0; d
<dd
->ndim
; d
++)
824 for(p
=0; p
<cd
->np
; p
++)
829 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
831 at0
= cgindex
[index
[i
]];
832 at1
= cgindex
[index
[i
]+1];
833 for(j
=at0
; j
<at1
; j
++)
846 rbuf
= &comm
->vbuf2
.v
[0][0];
848 /* Send and receive the coordinates */
849 dd_sendrecv_real(dd
, d
, dddirBackward
,
850 buf
, ind
->nsend
[nzone
+1],
851 rbuf
, ind
->nrecv
[nzone
+1]);
855 for(zone
=0; zone
<nzone
; zone
++)
857 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
864 nat_tot
+= ind
->nrecv
[nzone
+1];
870 void dd_atom_sum_real(gmx_domdec_t
*dd
,real v
[])
872 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
874 gmx_domdec_comm_t
*comm
;
875 gmx_domdec_comm_dim_t
*cd
;
876 gmx_domdec_ind_t
*ind
;
881 cgindex
= dd
->cgindex
;
883 buf
= &comm
->vbuf
.v
[0][0];
886 nzone
= comm
->zones
.n
/2;
887 nat_tot
= dd
->nat_tot
;
888 for(d
=dd
->ndim
-1; d
>=0; d
--)
891 for(p
=cd
->np
-1; p
>=0; p
--) {
893 nat_tot
-= ind
->nrecv
[nzone
+1];
900 sbuf
= &comm
->vbuf2
.v
[0][0];
902 for(zone
=0; zone
<nzone
; zone
++)
904 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
911 /* Communicate the forces */
912 dd_sendrecv_real(dd
, d
, dddirForward
,
913 sbuf
, ind
->nrecv
[nzone
+1],
914 buf
, ind
->nsend
[nzone
+1]);
916 /* Add the received forces */
918 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
920 at0
= cgindex
[index
[i
]];
921 at1
= cgindex
[index
[i
]+1];
922 for(j
=at0
; j
<at1
; j
++)
933 static void print_ddzone(FILE *fp
,int d
,int i
,int j
,gmx_ddzone_t
*zone
)
935 fprintf(fp
,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
937 zone
->min0
,zone
->max1
,
938 zone
->mch0
,zone
->mch0
,
939 zone
->p1_0
,zone
->p1_1
);
942 static void dd_sendrecv_ddzone(const gmx_domdec_t
*dd
,
943 int ddimind
,int direction
,
944 gmx_ddzone_t
*buf_s
,int n_s
,
945 gmx_ddzone_t
*buf_r
,int n_r
)
947 rvec vbuf_s
[5*2],vbuf_r
[5*2];
952 vbuf_s
[i
*2 ][0] = buf_s
[i
].min0
;
953 vbuf_s
[i
*2 ][1] = buf_s
[i
].max1
;
954 vbuf_s
[i
*2 ][2] = buf_s
[i
].mch0
;
955 vbuf_s
[i
*2+1][0] = buf_s
[i
].mch1
;
956 vbuf_s
[i
*2+1][1] = buf_s
[i
].p1_0
;
957 vbuf_s
[i
*2+1][2] = buf_s
[i
].p1_1
;
960 dd_sendrecv_rvec(dd
, ddimind
, direction
,
966 buf_r
[i
].min0
= vbuf_r
[i
*2 ][0];
967 buf_r
[i
].max1
= vbuf_r
[i
*2 ][1];
968 buf_r
[i
].mch0
= vbuf_r
[i
*2 ][2];
969 buf_r
[i
].mch1
= vbuf_r
[i
*2+1][0];
970 buf_r
[i
].p1_0
= vbuf_r
[i
*2+1][1];
971 buf_r
[i
].p1_1
= vbuf_r
[i
*2+1][2];
975 static void dd_move_cellx(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
976 rvec cell_ns_x0
,rvec cell_ns_x1
)
978 int d
,d1
,dim
,dim1
,pos
,buf_size
,i
,j
,k
,p
,npulse
,npulse_min
;
979 gmx_ddzone_t
*zp
,buf_s
[5],buf_r
[5],buf_e
[5];
980 rvec extr_s
[2],extr_r
[2];
983 gmx_domdec_comm_t
*comm
;
988 for(d
=1; d
<dd
->ndim
; d
++)
991 zp
= (d
== 1) ? &comm
->zone_d1
[0] : &comm
->zone_d2
[0][0];
992 zp
->min0
= cell_ns_x0
[dim
];
993 zp
->max1
= cell_ns_x1
[dim
];
994 zp
->mch0
= cell_ns_x0
[dim
];
995 zp
->mch1
= cell_ns_x1
[dim
];
996 zp
->p1_0
= cell_ns_x0
[dim
];
997 zp
->p1_1
= cell_ns_x1
[dim
];
1000 for(d
=dd
->ndim
-2; d
>=0; d
--)
1003 bPBC
= (dim
< ddbox
->npbcdim
);
1005 /* Use an rvec to store two reals */
1006 extr_s
[d
][0] = comm
->cell_f0
[d
+1];
1007 extr_s
[d
][1] = comm
->cell_f1
[d
+1];
1011 /* Store the extremes in the backward sending buffer,
1012 * so the get updated separately from the forward communication.
1014 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1016 /* We invert the order to be able to use the same loop for buf_e */
1017 buf_s
[pos
].min0
= extr_s
[d1
][1];
1018 buf_s
[pos
].max1
= extr_s
[d1
][0];
1019 buf_s
[pos
].mch0
= 0;
1020 buf_s
[pos
].mch1
= 0;
1021 /* Store the cell corner of the dimension we communicate along */
1022 buf_s
[pos
].p1_0
= comm
->cell_x0
[dim
];
1023 buf_s
[pos
].p1_1
= 0;
1027 buf_s
[pos
] = (dd
->ndim
== 2) ? comm
->zone_d1
[0] : comm
->zone_d2
[0][0];
1030 if (dd
->ndim
== 3 && d
== 0)
1032 buf_s
[pos
] = comm
->zone_d2
[0][1];
1034 buf_s
[pos
] = comm
->zone_d1
[0];
1038 /* We only need to communicate the extremes
1039 * in the forward direction
1041 npulse
= comm
->cd
[d
].np
;
1044 /* Take the minimum to avoid double communication */
1045 npulse_min
= min(npulse
,dd
->nc
[dim
]-1-npulse
);
1049 /* Without PBC we should really not communicate over
1050 * the boundaries, but implementing that complicates
1051 * the communication setup and therefore we simply
1052 * do all communication, but ignore some data.
1054 npulse_min
= npulse
;
1056 for(p
=0; p
<npulse_min
; p
++)
1058 /* Communicate the extremes forward */
1059 bUse
= (bPBC
|| dd
->ci
[dim
] > 0);
1061 dd_sendrecv_rvec(dd
, d
, dddirForward
,
1062 extr_s
+d
, dd
->ndim
-d
-1,
1063 extr_r
+d
, dd
->ndim
-d
-1);
1067 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1069 extr_s
[d1
][0] = max(extr_s
[d1
][0],extr_r
[d1
][0]);
1070 extr_s
[d1
][1] = min(extr_s
[d1
][1],extr_r
[d1
][1]);
1076 for(p
=0; p
<npulse
; p
++)
1078 /* Communicate all the zone information backward */
1079 bUse
= (bPBC
|| dd
->ci
[dim
] < dd
->nc
[dim
] - 1);
1081 dd_sendrecv_ddzone(dd
, d
, dddirBackward
,
1088 for(d1
=d
+1; d1
<dd
->ndim
; d1
++)
1090 /* Determine the decrease of maximum required
1091 * communication height along d1 due to the distance along d,
1092 * this avoids a lot of useless atom communication.
1094 dist_d
= comm
->cell_x1
[dim
] - buf_r
[0].p1_0
;
1096 if (ddbox
->tric_dir
[dim
])
1098 /* c is the off-diagonal coupling between the cell planes
1099 * along directions d and d1.
1101 c
= ddbox
->v
[dim
][dd
->dim
[d1
]][dim
];
1107 det
= (1 + c
*c
)*comm
->cutoff
*comm
->cutoff
- dist_d
*dist_d
;
1110 dh
[d1
] = comm
->cutoff
- (c
*dist_d
+ sqrt(det
))/(1 + c
*c
);
1114 /* A negative value signals out of range */
1120 /* Accumulate the extremes over all pulses */
1121 for(i
=0; i
<buf_size
; i
++)
1125 buf_e
[i
] = buf_r
[i
];
1131 buf_e
[i
].min0
= min(buf_e
[i
].min0
,buf_r
[i
].min0
);
1132 buf_e
[i
].max1
= max(buf_e
[i
].max1
,buf_r
[i
].max1
);
1135 if (dd
->ndim
== 3 && d
== 0 && i
== buf_size
- 1)
1143 if (bUse
&& dh
[d1
] >= 0)
1145 buf_e
[i
].mch0
= max(buf_e
[i
].mch0
,buf_r
[i
].mch0
-dh
[d1
]);
1146 buf_e
[i
].mch1
= max(buf_e
[i
].mch1
,buf_r
[i
].mch1
-dh
[d1
]);
1149 /* Copy the received buffer to the send buffer,
1150 * to pass the data through with the next pulse.
1152 buf_s
[i
] = buf_r
[i
];
1154 if (((bPBC
|| dd
->ci
[dim
]+npulse
< dd
->nc
[dim
]) && p
== npulse
-1) ||
1155 (!bPBC
&& dd
->ci
[dim
]+1+p
== dd
->nc
[dim
]-1))
1157 /* Store the extremes */
1160 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1162 extr_s
[d1
][1] = min(extr_s
[d1
][1],buf_e
[pos
].min0
);
1163 extr_s
[d1
][0] = max(extr_s
[d1
][0],buf_e
[pos
].max1
);
1167 if (d
== 1 || (d
== 0 && dd
->ndim
== 3))
1171 comm
->zone_d2
[1-d
][i
] = buf_e
[pos
];
1177 comm
->zone_d1
[1] = buf_e
[pos
];
1191 print_ddzone(debug
,1,i
,0,&comm
->zone_d1
[i
]);
1193 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d1
[i
].min0
);
1194 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d1
[i
].max1
);
1206 print_ddzone(debug
,2,i
,j
,&comm
->zone_d2
[i
][j
]);
1208 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d2
[i
][j
].min0
);
1209 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d2
[i
][j
].max1
);
1213 for(d
=1; d
<dd
->ndim
; d
++)
1215 comm
->cell_f_max0
[d
] = extr_s
[d
-1][0];
1216 comm
->cell_f_min1
[d
] = extr_s
[d
-1][1];
1219 fprintf(debug
,"Cell fraction d %d, max0 %f, min1 %f\n",
1220 d
,comm
->cell_f_max0
[d
],comm
->cell_f_min1
[d
]);
1225 static void dd_collect_cg(gmx_domdec_t
*dd
,
1226 t_state
*state_local
)
1228 gmx_domdec_master_t
*ma
=NULL
;
1229 int buf2
[2],*ibuf
,i
,ncg_home
=0,*cg
=NULL
,nat_home
=0;
1232 if (state_local
->ddp_count
== dd
->comm
->master_cg_ddp_count
)
1234 /* The master has the correct distribution */
1238 if (state_local
->ddp_count
== dd
->ddp_count
)
1240 ncg_home
= dd
->ncg_home
;
1242 nat_home
= dd
->nat_home
;
1244 else if (state_local
->ddp_count_cg_gl
== state_local
->ddp_count
)
1246 cgs_gl
= &dd
->comm
->cgs_gl
;
1248 ncg_home
= state_local
->ncg_gl
;
1249 cg
= state_local
->cg_gl
;
1251 for(i
=0; i
<ncg_home
; i
++)
1253 nat_home
+= cgs_gl
->index
[cg
[i
]+1] - cgs_gl
->index
[cg
[i
]];
1258 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1261 buf2
[0] = dd
->ncg_home
;
1262 buf2
[1] = dd
->nat_home
;
1272 /* Collect the charge group and atom counts on the master */
1273 dd_gather(dd
,2*sizeof(int),buf2
,ibuf
);
1278 for(i
=0; i
<dd
->nnodes
; i
++)
1280 ma
->ncg
[i
] = ma
->ibuf
[2*i
];
1281 ma
->nat
[i
] = ma
->ibuf
[2*i
+1];
1282 ma
->index
[i
+1] = ma
->index
[i
] + ma
->ncg
[i
];
1285 /* Make byte counts and indices */
1286 for(i
=0; i
<dd
->nnodes
; i
++)
1288 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
1289 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
1293 fprintf(debug
,"Initial charge group distribution: ");
1294 for(i
=0; i
<dd
->nnodes
; i
++)
1295 fprintf(debug
," %d",ma
->ncg
[i
]);
1296 fprintf(debug
,"\n");
1300 /* Collect the charge group indices on the master */
1302 dd
->ncg_home
*sizeof(int),dd
->index_gl
,
1303 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
1304 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
1305 DDMASTER(dd
) ? ma
->cg
: NULL
);
1307 dd
->comm
->master_cg_ddp_count
= state_local
->ddp_count
;
1310 static void dd_collect_vec_sendrecv(gmx_domdec_t
*dd
,
1313 gmx_domdec_master_t
*ma
;
1314 int n
,i
,c
,a
,nalloc
=0;
1323 MPI_Send(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1324 dd
->rank
,dd
->mpi_comm_all
);
1327 /* Copy the master coordinates to the global array */
1328 cgs_gl
= &dd
->comm
->cgs_gl
;
1330 n
= DDMASTERRANK(dd
);
1332 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1334 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1336 copy_rvec(lv
[a
++],v
[c
]);
1340 for(n
=0; n
<dd
->nnodes
; n
++)
1344 if (ma
->nat
[n
] > nalloc
)
1346 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1350 MPI_Recv(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,DDRANK(dd
,n
),
1351 n
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1354 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1356 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1358 copy_rvec(buf
[a
++],v
[c
]);
1367 static void get_commbuffer_counts(gmx_domdec_t
*dd
,
1368 int **counts
,int **disps
)
1370 gmx_domdec_master_t
*ma
;
1375 /* Make the rvec count and displacment arrays */
1377 *disps
= ma
->ibuf
+ dd
->nnodes
;
1378 for(n
=0; n
<dd
->nnodes
; n
++)
1380 (*counts
)[n
] = ma
->nat
[n
]*sizeof(rvec
);
1381 (*disps
)[n
] = (n
== 0 ? 0 : (*disps
)[n
-1] + (*counts
)[n
-1]);
1385 static void dd_collect_vec_gatherv(gmx_domdec_t
*dd
,
1388 gmx_domdec_master_t
*ma
;
1389 int *rcounts
=NULL
,*disps
=NULL
;
1398 get_commbuffer_counts(dd
,&rcounts
,&disps
);
1403 dd_gatherv(dd
,dd
->nat_home
*sizeof(rvec
),lv
,rcounts
,disps
,buf
);
1407 cgs_gl
= &dd
->comm
->cgs_gl
;
1410 for(n
=0; n
<dd
->nnodes
; n
++)
1412 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1414 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1416 copy_rvec(buf
[a
++],v
[c
]);
1423 void dd_collect_vec(gmx_domdec_t
*dd
,
1424 t_state
*state_local
,rvec
*lv
,rvec
*v
)
1426 gmx_domdec_master_t
*ma
;
1427 int n
,i
,c
,a
,nalloc
=0;
1430 dd_collect_cg(dd
,state_local
);
1432 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1434 dd_collect_vec_sendrecv(dd
,lv
,v
);
1438 dd_collect_vec_gatherv(dd
,lv
,v
);
1443 void dd_collect_state(gmx_domdec_t
*dd
,
1444 t_state
*state_local
,t_state
*state
)
1448 nh
= state
->nhchainlength
;
1452 state
->lambda
= state_local
->lambda
;
1453 state
->veta
= state_local
->veta
;
1454 state
->vol0
= state_local
->vol0
;
1455 copy_mat(state_local
->box
,state
->box
);
1456 copy_mat(state_local
->boxv
,state
->boxv
);
1457 copy_mat(state_local
->svir_prev
,state
->svir_prev
);
1458 copy_mat(state_local
->fvir_prev
,state
->fvir_prev
);
1459 copy_mat(state_local
->pres_prev
,state
->pres_prev
);
1462 for(i
=0; i
<state_local
->ngtc
; i
++)
1464 for(j
=0; j
<nh
; j
++) {
1465 state
->nosehoover_xi
[i
*nh
+j
] = state_local
->nosehoover_xi
[i
*nh
+j
];
1466 state
->nosehoover_vxi
[i
*nh
+j
] = state_local
->nosehoover_vxi
[i
*nh
+j
];
1468 state
->therm_integral
[i
] = state_local
->therm_integral
[i
];
1470 for(i
=0; i
<state_local
->nnhpres
; i
++)
1472 for(j
=0; j
<nh
; j
++) {
1473 state
->nhpres_xi
[i
*nh
+j
] = state_local
->nhpres_xi
[i
*nh
+j
];
1474 state
->nhpres_vxi
[i
*nh
+j
] = state_local
->nhpres_vxi
[i
*nh
+j
];
1478 for(est
=0; est
<estNR
; est
++)
1480 if (EST_DISTR(est
) && state_local
->flags
& (1<<est
))
1484 dd_collect_vec(dd
,state_local
,state_local
->x
,state
->x
);
1487 dd_collect_vec(dd
,state_local
,state_local
->v
,state
->v
);
1490 dd_collect_vec(dd
,state_local
,state_local
->sd_X
,state
->sd_X
);
1493 dd_collect_vec(dd
,state_local
,state_local
->cg_p
,state
->cg_p
);
1496 if (state
->nrngi
== 1)
1500 for(i
=0; i
<state_local
->nrng
; i
++)
1502 state
->ld_rng
[i
] = state_local
->ld_rng
[i
];
1508 dd_gather(dd
,state_local
->nrng
*sizeof(state
->ld_rng
[0]),
1509 state_local
->ld_rng
,state
->ld_rng
);
1513 if (state
->nrngi
== 1)
1517 state
->ld_rngi
[0] = state_local
->ld_rngi
[0];
1522 dd_gather(dd
,sizeof(state
->ld_rngi
[0]),
1523 state_local
->ld_rngi
,state
->ld_rngi
);
1526 case estDISRE_INITF
:
1527 case estDISRE_RM3TAV
:
1528 case estORIRE_INITF
:
1532 gmx_incons("Unknown state entry encountered in dd_collect_state");
1538 static void dd_realloc_fr_cg(t_forcerec
*fr
,int nalloc
)
1542 fprintf(debug
,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr
->cg_nalloc
,nalloc
,over_alloc_dd(nalloc
));
1544 fr
->cg_nalloc
= over_alloc_dd(nalloc
);
1545 srenew(fr
->cg_cm
,fr
->cg_nalloc
);
1546 srenew(fr
->cginfo
,fr
->cg_nalloc
);
1549 static void dd_realloc_state(t_state
*state
,rvec
**f
,int nalloc
)
1555 fprintf(debug
,"Reallocating state: currently %d, required %d, allocating %d\n",state
->nalloc
,nalloc
,over_alloc_dd(nalloc
));
1558 state
->nalloc
= over_alloc_dd(nalloc
);
1560 for(est
=0; est
<estNR
; est
++)
1562 if (EST_DISTR(est
) && state
->flags
& (1<<est
))
1566 srenew(state
->x
,state
->nalloc
);
1569 srenew(state
->v
,state
->nalloc
);
1572 srenew(state
->sd_X
,state
->nalloc
);
1575 srenew(state
->cg_p
,state
->nalloc
);
1579 case estDISRE_INITF
:
1580 case estDISRE_RM3TAV
:
1581 case estORIRE_INITF
:
1583 /* No reallocation required */
1586 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1593 srenew(*f
,state
->nalloc
);
1597 static void dd_distribute_vec_sendrecv(gmx_domdec_t
*dd
,t_block
*cgs
,
1600 gmx_domdec_master_t
*ma
;
1601 int n
,i
,c
,a
,nalloc
=0;
1608 for(n
=0; n
<dd
->nnodes
; n
++)
1612 if (ma
->nat
[n
] > nalloc
)
1614 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1617 /* Use lv as a temporary buffer */
1619 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1621 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1623 copy_rvec(v
[c
],buf
[a
++]);
1626 if (a
!= ma
->nat
[n
])
1628 gmx_fatal(FARGS
,"Internal error a (%d) != nat (%d)",
1633 MPI_Send(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,
1634 DDRANK(dd
,n
),n
,dd
->mpi_comm_all
);
1639 n
= DDMASTERRANK(dd
);
1641 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1643 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1645 copy_rvec(v
[c
],lv
[a
++]);
1652 MPI_Recv(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1653 MPI_ANY_TAG
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1658 static void dd_distribute_vec_scatterv(gmx_domdec_t
*dd
,t_block
*cgs
,
1661 gmx_domdec_master_t
*ma
;
1662 int *scounts
=NULL
,*disps
=NULL
;
1663 int n
,i
,c
,a
,nalloc
=0;
1670 get_commbuffer_counts(dd
,&scounts
,&disps
);
1674 for(n
=0; n
<dd
->nnodes
; n
++)
1676 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1678 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1680 copy_rvec(v
[c
],buf
[a
++]);
1686 dd_scatterv(dd
,scounts
,disps
,buf
,dd
->nat_home
*sizeof(rvec
),lv
);
1689 static void dd_distribute_vec(gmx_domdec_t
*dd
,t_block
*cgs
,rvec
*v
,rvec
*lv
)
1691 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1693 dd_distribute_vec_sendrecv(dd
,cgs
,v
,lv
);
1697 dd_distribute_vec_scatterv(dd
,cgs
,v
,lv
);
1701 static void dd_distribute_state(gmx_domdec_t
*dd
,t_block
*cgs
,
1702 t_state
*state
,t_state
*state_local
,
1705 int i
,j
,ngtch
,ngtcp
,nh
;
1707 nh
= state
->nhchainlength
;
1711 state_local
->lambda
= state
->lambda
;
1712 state_local
->veta
= state
->veta
;
1713 state_local
->vol0
= state
->vol0
;
1714 copy_mat(state
->box
,state_local
->box
);
1715 copy_mat(state
->box_rel
,state_local
->box_rel
);
1716 copy_mat(state
->boxv
,state_local
->boxv
);
1717 copy_mat(state
->svir_prev
,state_local
->svir_prev
);
1718 copy_mat(state
->fvir_prev
,state_local
->fvir_prev
);
1719 for(i
=0; i
<state_local
->ngtc
; i
++)
1721 for(j
=0; j
<nh
; j
++) {
1722 state_local
->nosehoover_xi
[i
*nh
+j
] = state
->nosehoover_xi
[i
*nh
+j
];
1723 state_local
->nosehoover_vxi
[i
*nh
+j
] = state
->nosehoover_vxi
[i
*nh
+j
];
1725 state_local
->therm_integral
[i
] = state
->therm_integral
[i
];
1727 for(i
=0; i
<state_local
->nnhpres
; i
++)
1729 for(j
=0; j
<nh
; j
++) {
1730 state_local
->nhpres_xi
[i
*nh
+j
] = state
->nhpres_xi
[i
*nh
+j
];
1731 state_local
->nhpres_vxi
[i
*nh
+j
] = state
->nhpres_vxi
[i
*nh
+j
];
1735 dd_bcast(dd
,sizeof(real
),&state_local
->lambda
);
1736 dd_bcast(dd
,sizeof(real
),&state_local
->veta
);
1737 dd_bcast(dd
,sizeof(real
),&state_local
->vol0
);
1738 dd_bcast(dd
,sizeof(state_local
->box
),state_local
->box
);
1739 dd_bcast(dd
,sizeof(state_local
->box_rel
),state_local
->box_rel
);
1740 dd_bcast(dd
,sizeof(state_local
->boxv
),state_local
->boxv
);
1741 dd_bcast(dd
,sizeof(state_local
->svir_prev
),state_local
->svir_prev
);
1742 dd_bcast(dd
,sizeof(state_local
->fvir_prev
),state_local
->fvir_prev
);
1743 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_xi
);
1744 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_vxi
);
1745 dd_bcast(dd
,state_local
->ngtc
*sizeof(double),state_local
->therm_integral
);
1746 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_xi
);
1747 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_vxi
);
1749 if (dd
->nat_home
> state_local
->nalloc
)
1751 dd_realloc_state(state_local
,f
,dd
->nat_home
);
1753 for(i
=0; i
<estNR
; i
++)
1755 if (EST_DISTR(i
) && state_local
->flags
& (1<<i
))
1759 dd_distribute_vec(dd
,cgs
,state
->x
,state_local
->x
);
1762 dd_distribute_vec(dd
,cgs
,state
->v
,state_local
->v
);
1765 dd_distribute_vec(dd
,cgs
,state
->sd_X
,state_local
->sd_X
);
1768 dd_distribute_vec(dd
,cgs
,state
->cg_p
,state_local
->cg_p
);
1771 if (state
->nrngi
== 1)
1774 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1775 state
->ld_rng
,state_local
->ld_rng
);
1780 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1781 state
->ld_rng
,state_local
->ld_rng
);
1785 if (state
->nrngi
== 1)
1787 dd_bcastc(dd
,sizeof(state_local
->ld_rngi
[0]),
1788 state
->ld_rngi
,state_local
->ld_rngi
);
1792 dd_scatter(dd
,sizeof(state_local
->ld_rngi
[0]),
1793 state
->ld_rngi
,state_local
->ld_rngi
);
1796 case estDISRE_INITF
:
1797 case estDISRE_RM3TAV
:
1798 case estORIRE_INITF
:
1800 /* Not implemented yet */
1803 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1809 static char dim2char(int dim
)
1815 case XX
: c
= 'X'; break;
1816 case YY
: c
= 'Y'; break;
1817 case ZZ
: c
= 'Z'; break;
1818 default: gmx_fatal(FARGS
,"Unknown dim %d",dim
);
1824 static void write_dd_grid_pdb(const char *fn
,gmx_large_int_t step
,
1825 gmx_domdec_t
*dd
,matrix box
,gmx_ddbox_t
*ddbox
)
1827 rvec grid_s
[2],*grid_r
=NULL
,cx
,r
;
1828 char fname
[STRLEN
],format
[STRLEN
],buf
[22];
1834 copy_rvec(dd
->comm
->cell_x0
,grid_s
[0]);
1835 copy_rvec(dd
->comm
->cell_x1
,grid_s
[1]);
1839 snew(grid_r
,2*dd
->nnodes
);
1842 dd_gather(dd
,2*sizeof(rvec
),grid_s
[0],DDMASTER(dd
) ? grid_r
[0] : NULL
);
1846 for(d
=0; d
<DIM
; d
++)
1848 for(i
=0; i
<DIM
; i
++)
1856 if (dd
->nc
[d
] > 1 && d
< ddbox
->npbcdim
)
1858 tric
[d
][i
] = box
[i
][d
]/box
[i
][i
];
1867 sprintf(fname
,"%s_%s.pdb",fn
,gmx_step_str(step
,buf
));
1868 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1869 out
= gmx_fio_fopen(fname
,"w");
1870 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1872 for(i
=0; i
<dd
->nnodes
; i
++)
1874 vol
= dd
->nnodes
/(box
[XX
][XX
]*box
[YY
][YY
]*box
[ZZ
][ZZ
]);
1875 for(d
=0; d
<DIM
; d
++)
1877 vol
*= grid_r
[i
*2+1][d
] - grid_r
[i
*2][d
];
1885 cx
[XX
] = grid_r
[i
*2+x
][XX
];
1886 cx
[YY
] = grid_r
[i
*2+y
][YY
];
1887 cx
[ZZ
] = grid_r
[i
*2+z
][ZZ
];
1889 fprintf(out
,format
,"ATOM",a
++,"CA","GLY",' ',1+i
,
1890 10*r
[XX
],10*r
[YY
],10*r
[ZZ
],1.0,vol
);
1894 for(d
=0; d
<DIM
; d
++)
1900 case 0: y
= 1 + i
*8 + 2*x
; break;
1901 case 1: y
= 1 + i
*8 + 2*x
- (x
% 2); break;
1902 case 2: y
= 1 + i
*8 + x
; break;
1904 fprintf(out
,"%6s%5d%5d\n","CONECT",y
,y
+(1<<d
));
1908 gmx_fio_fclose(out
);
1913 void write_dd_pdb(const char *fn
,gmx_large_int_t step
,const char *title
,
1914 gmx_mtop_t
*mtop
,t_commrec
*cr
,
1915 int natoms
,rvec x
[],matrix box
)
1917 char fname
[STRLEN
],format
[STRLEN
],format4
[STRLEN
],buf
[22];
1920 char *atomname
,*resname
;
1927 natoms
= dd
->comm
->nat
[ddnatVSITE
];
1930 sprintf(fname
,"%s_%s_n%d.pdb",fn
,gmx_step_str(step
,buf
),cr
->sim_nodeid
);
1932 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1933 sprintf(format4
,"%s%s\n",pdbformat4
,"%6.2f%6.2f");
1935 out
= gmx_fio_fopen(fname
,"w");
1937 fprintf(out
,"TITLE %s\n",title
);
1938 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1939 for(i
=0; i
<natoms
; i
++)
1941 ii
= dd
->gatindex
[i
];
1942 gmx_mtop_atominfo_global(mtop
,ii
,&atomname
,&resnr
,&resname
);
1943 if (i
< dd
->comm
->nat
[ddnatZONE
])
1946 while (i
>= dd
->cgindex
[dd
->comm
->zones
.cg_range
[c
+1]])
1952 else if (i
< dd
->comm
->nat
[ddnatVSITE
])
1954 b
= dd
->comm
->zones
.n
;
1958 b
= dd
->comm
->zones
.n
+ 1;
1960 fprintf(out
,strlen(atomname
)<4 ? format
: format4
,
1961 "ATOM",(ii
+1)%100000,
1962 atomname
,resname
,' ',resnr
%10000,' ',
1963 10*x
[i
][XX
],10*x
[i
][YY
],10*x
[i
][ZZ
],1.0,b
);
1965 fprintf(out
,"TER\n");
1967 gmx_fio_fclose(out
);
1970 real
dd_cutoff_mbody(gmx_domdec_t
*dd
)
1972 gmx_domdec_comm_t
*comm
;
1979 if (comm
->bInterCGBondeds
)
1981 if (comm
->cutoff_mbody
> 0)
1983 r
= comm
->cutoff_mbody
;
1987 /* cutoff_mbody=0 means we do not have DLB */
1988 r
= comm
->cellsize_min
[dd
->dim
[0]];
1989 for(di
=1; di
<dd
->ndim
; di
++)
1991 r
= min(r
,comm
->cellsize_min
[dd
->dim
[di
]]);
1993 if (comm
->bBondComm
)
1995 r
= max(r
,comm
->cutoff_mbody
);
1999 r
= min(r
,comm
->cutoff
);
2007 real
dd_cutoff_twobody(gmx_domdec_t
*dd
)
2011 r_mb
= dd_cutoff_mbody(dd
);
2013 return max(dd
->comm
->cutoff
,r_mb
);
2017 static void dd_cart_coord2pmecoord(gmx_domdec_t
*dd
,ivec coord
,ivec coord_pme
)
2021 nc
= dd
->nc
[dd
->comm
->cartpmedim
];
2022 ntot
= dd
->comm
->ntot
[dd
->comm
->cartpmedim
];
2023 copy_ivec(coord
,coord_pme
);
2024 coord_pme
[dd
->comm
->cartpmedim
] =
2025 nc
+ (coord
[dd
->comm
->cartpmedim
]*(ntot
- nc
) + (ntot
- nc
)/2)/nc
;
2028 static int low_ddindex2pmeindex(int ndd
,int npme
,int ddindex
)
2030 /* Here we assign a PME node to communicate with this DD node
2031 * by assuming that the major index of both is x.
2032 * We add cr->npmenodes/2 to obtain an even distribution.
2034 return (ddindex
*npme
+ npme
/2)/ndd
;
2037 static int ddindex2pmeindex(const gmx_domdec_t
*dd
,int ddindex
)
2039 return low_ddindex2pmeindex(dd
->nnodes
,dd
->comm
->npmenodes
,ddindex
);
2042 static int cr_ddindex2pmeindex(const t_commrec
*cr
,int ddindex
)
2044 return low_ddindex2pmeindex(cr
->dd
->nnodes
,cr
->npmenodes
,ddindex
);
2047 static int *dd_pmenodes(t_commrec
*cr
)
2052 snew(pmenodes
,cr
->npmenodes
);
2054 for(i
=0; i
<cr
->dd
->nnodes
; i
++) {
2055 p0
= cr_ddindex2pmeindex(cr
,i
);
2056 p1
= cr_ddindex2pmeindex(cr
,i
+1);
2057 if (i
+1 == cr
->dd
->nnodes
|| p1
> p0
) {
2059 fprintf(debug
,"pmenode[%d] = %d\n",n
,i
+1+n
);
2060 pmenodes
[n
] = i
+ 1 + n
;
2068 static int gmx_ddcoord2pmeindex(t_commrec
*cr
,int x
,int y
,int z
)
2071 ivec coords
,coords_pme
,nc
;
2076 if (dd->comm->bCartesian) {
2077 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2078 dd_coords2pmecoords(dd,coords,coords_pme);
2079 copy_ivec(dd->ntot,nc);
2080 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2081 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2083 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2085 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2091 slab
= ddindex2pmeindex(dd
,dd_index(dd
->nc
,coords
));
2096 static int ddcoord2simnodeid(t_commrec
*cr
,int x
,int y
,int z
)
2098 gmx_domdec_comm_t
*comm
;
2100 int ddindex
,nodeid
=-1;
2102 comm
= cr
->dd
->comm
;
2107 if (comm
->bCartesianPP_PME
)
2110 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&nodeid
);
2115 ddindex
= dd_index(cr
->dd
->nc
,coords
);
2116 if (comm
->bCartesianPP
)
2118 nodeid
= comm
->ddindex2simnodeid
[ddindex
];
2124 nodeid
= ddindex
+ gmx_ddcoord2pmeindex(cr
,x
,y
,z
);
2136 static int dd_simnode2pmenode(t_commrec
*cr
,int sim_nodeid
)
2139 gmx_domdec_comm_t
*comm
;
2140 ivec coord
,coord_pme
;
2147 /* This assumes a uniform x domain decomposition grid cell size */
2148 if (comm
->bCartesianPP_PME
)
2151 MPI_Cart_coords(cr
->mpi_comm_mysim
,sim_nodeid
,DIM
,coord
);
2152 if (coord
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
2154 /* This is a PP node */
2155 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2156 MPI_Cart_rank(cr
->mpi_comm_mysim
,coord_pme
,&pmenode
);
2160 else if (comm
->bCartesianPP
)
2162 if (sim_nodeid
< dd
->nnodes
)
2164 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2169 /* This assumes DD cells with identical x coordinates
2170 * are numbered sequentially.
2172 if (dd
->comm
->pmenodes
== NULL
)
2174 if (sim_nodeid
< dd
->nnodes
)
2176 /* The DD index equals the nodeid */
2177 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2183 while (sim_nodeid
> dd
->comm
->pmenodes
[i
])
2187 if (sim_nodeid
< dd
->comm
->pmenodes
[i
])
2189 pmenode
= dd
->comm
->pmenodes
[i
];
2197 bool gmx_pmeonlynode(t_commrec
*cr
,int sim_nodeid
)
2201 if (DOMAINDECOMP(cr
))
2203 bPMEOnlyNode
= (dd_simnode2pmenode(cr
,sim_nodeid
) == -1);
2207 bPMEOnlyNode
= FALSE
;
2210 return bPMEOnlyNode
;
2213 void get_pme_ddnodes(t_commrec
*cr
,int pmenodeid
,
2214 int *nmy_ddnodes
,int **my_ddnodes
,int *node_peer
)
2218 ivec coord
,coord_pme
;
2222 snew(*my_ddnodes
,(dd
->nnodes
+cr
->npmenodes
-1)/cr
->npmenodes
);
2225 for(x
=0; x
<dd
->nc
[XX
]; x
++)
2227 for(y
=0; y
<dd
->nc
[YY
]; y
++)
2229 for(z
=0; z
<dd
->nc
[ZZ
]; z
++)
2231 if (dd
->comm
->bCartesianPP_PME
)
2236 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2237 if (dd
->ci
[XX
] == coord_pme
[XX
] &&
2238 dd
->ci
[YY
] == coord_pme
[YY
] &&
2239 dd
->ci
[ZZ
] == coord_pme
[ZZ
])
2240 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2244 /* The slab corresponds to the nodeid in the PME group */
2245 if (gmx_ddcoord2pmeindex(cr
,x
,y
,z
) == pmenodeid
)
2247 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2254 /* The last PP-only node is the peer node */
2255 *node_peer
= (*my_ddnodes
)[*nmy_ddnodes
-1];
2259 fprintf(debug
,"Receive coordinates from PP nodes:");
2260 for(x
=0; x
<*nmy_ddnodes
; x
++)
2262 fprintf(debug
," %d",(*my_ddnodes
)[x
]);
2264 fprintf(debug
,"\n");
2268 static bool receive_vir_ener(t_commrec
*cr
)
2270 gmx_domdec_comm_t
*comm
;
2271 int pmenode
,coords
[DIM
],rank
;
2275 if (cr
->npmenodes
< cr
->dd
->nnodes
)
2277 comm
= cr
->dd
->comm
;
2278 if (comm
->bCartesianPP_PME
)
2280 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2282 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,coords
);
2283 coords
[comm
->cartpmedim
]++;
2284 if (coords
[comm
->cartpmedim
] < cr
->dd
->nc
[comm
->cartpmedim
])
2286 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&rank
);
2287 if (dd_simnode2pmenode(cr
,rank
) == pmenode
)
2289 /* This is not the last PP node for pmenode */
2297 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2298 if (cr
->sim_nodeid
+1 < cr
->nnodes
&&
2299 dd_simnode2pmenode(cr
,cr
->sim_nodeid
+1) == pmenode
)
2301 /* This is not the last PP node for pmenode */
2310 static void set_zones_ncg_home(gmx_domdec_t
*dd
)
2312 gmx_domdec_zones_t
*zones
;
2315 zones
= &dd
->comm
->zones
;
2317 zones
->cg_range
[0] = 0;
2318 for(i
=1; i
<zones
->n
+1; i
++)
2320 zones
->cg_range
[i
] = dd
->ncg_home
;
2324 static void rebuild_cgindex(gmx_domdec_t
*dd
,int *gcgs_index
,t_state
*state
)
2326 int nat
,i
,*ind
,*dd_cg_gl
,*cgindex
,cg_gl
;
2329 dd_cg_gl
= dd
->index_gl
;
2330 cgindex
= dd
->cgindex
;
2333 for(i
=0; i
<state
->ncg_gl
; i
++)
2337 dd_cg_gl
[i
] = cg_gl
;
2338 nat
+= gcgs_index
[cg_gl
+1] - gcgs_index
[cg_gl
];
2342 dd
->ncg_home
= state
->ncg_gl
;
2345 set_zones_ncg_home(dd
);
2348 static int ddcginfo(const cginfo_mb_t
*cginfo_mb
,int cg
)
2350 while (cg
>= cginfo_mb
->cg_end
)
2355 return cginfo_mb
->cginfo
[(cg
- cginfo_mb
->cg_start
) % cginfo_mb
->cg_mod
];
2358 static void dd_set_cginfo(int *index_gl
,int cg0
,int cg1
,
2359 t_forcerec
*fr
,char *bLocalCG
)
2361 cginfo_mb_t
*cginfo_mb
;
2367 cginfo_mb
= fr
->cginfo_mb
;
2368 cginfo
= fr
->cginfo
;
2370 for(cg
=cg0
; cg
<cg1
; cg
++)
2372 cginfo
[cg
] = ddcginfo(cginfo_mb
,index_gl
[cg
]);
2376 if (bLocalCG
!= NULL
)
2378 for(cg
=cg0
; cg
<cg1
; cg
++)
2380 bLocalCG
[index_gl
[cg
]] = TRUE
;
2385 static void make_dd_indices(gmx_domdec_t
*dd
,int *gcgs_index
,int cg_start
)
2387 int nzone
,zone
,zone1
,cg0
,cg
,cg_gl
,a
,a_gl
;
2388 int *zone2cg
,*zone_ncg1
,*index_gl
,*gatindex
;
2392 bLocalCG
= dd
->comm
->bLocalCG
;
2394 if (dd
->nat_tot
> dd
->gatindex_nalloc
)
2396 dd
->gatindex_nalloc
= over_alloc_dd(dd
->nat_tot
);
2397 srenew(dd
->gatindex
,dd
->gatindex_nalloc
);
2400 nzone
= dd
->comm
->zones
.n
;
2401 zone2cg
= dd
->comm
->zones
.cg_range
;
2402 zone_ncg1
= dd
->comm
->zone_ncg1
;
2403 index_gl
= dd
->index_gl
;
2404 gatindex
= dd
->gatindex
;
2406 if (zone2cg
[1] != dd
->ncg_home
)
2408 gmx_incons("dd->ncg_zone is not up to date");
2411 /* Make the local to global and global to local atom index */
2412 a
= dd
->cgindex
[cg_start
];
2413 for(zone
=0; zone
<nzone
; zone
++)
2421 cg0
= zone2cg
[zone
];
2423 for(cg
=cg0
; cg
<zone2cg
[zone
+1]; cg
++)
2426 if (cg
- cg0
>= zone_ncg1
[zone
])
2428 /* Signal that this cg is from more than one zone away */
2431 cg_gl
= index_gl
[cg
];
2432 for(a_gl
=gcgs_index
[cg_gl
]; a_gl
<gcgs_index
[cg_gl
+1]; a_gl
++)
2435 ga2la_set(dd
->ga2la
,a_gl
,a
,zone1
);
2442 static int check_bLocalCG(gmx_domdec_t
*dd
,int ncg_sys
,const char *bLocalCG
,
2448 if (bLocalCG
== NULL
)
2452 for(i
=0; i
<dd
->ncg_tot
; i
++)
2454 if (!bLocalCG
[dd
->index_gl
[i
]])
2457 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd
->rank
,where
,i
+1,dd
->index_gl
[i
]+1,dd
->ncg_home
);
2462 for(i
=0; i
<ncg_sys
; i
++)
2469 if (ngl
!= dd
->ncg_tot
)
2471 fprintf(stderr
,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd
->rank
,where
,ngl
,dd
->ncg_tot
);
2478 static void check_index_consistency(gmx_domdec_t
*dd
,
2479 int natoms_sys
,int ncg_sys
,
2482 int nerr
,ngl
,i
,a
,cell
;
2487 if (dd
->comm
->DD_debug
> 1)
2489 snew(have
,natoms_sys
);
2490 for(a
=0; a
<dd
->nat_tot
; a
++)
2492 if (have
[dd
->gatindex
[a
]] > 0)
2494 fprintf(stderr
,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd
->rank
,dd
->gatindex
[a
]+1,have
[dd
->gatindex
[a
]],a
+1);
2498 have
[dd
->gatindex
[a
]] = a
+ 1;
2504 snew(have
,dd
->nat_tot
);
2507 for(i
=0; i
<natoms_sys
; i
++)
2509 if (ga2la_get(dd
->ga2la
,i
,&a
,&cell
))
2511 if (a
>= dd
->nat_tot
)
2513 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd
->rank
,i
+1,a
+1,dd
->nat_tot
);
2519 if (dd
->gatindex
[a
] != i
)
2521 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd
->rank
,i
+1,a
+1,dd
->gatindex
[a
]+1);
2528 if (ngl
!= dd
->nat_tot
)
2531 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2532 dd
->rank
,where
,ngl
,dd
->nat_tot
);
2534 for(a
=0; a
<dd
->nat_tot
; a
++)
2539 "DD node %d, %s: local atom %d, global %d has no global index\n",
2540 dd
->rank
,where
,a
+1,dd
->gatindex
[a
]+1);
2545 nerr
+= check_bLocalCG(dd
,ncg_sys
,dd
->comm
->bLocalCG
,where
);
2548 gmx_fatal(FARGS
,"DD node %d, %s: %d atom/cg index inconsistencies",
2549 dd
->rank
,where
,nerr
);
2553 static void clear_dd_indices(gmx_domdec_t
*dd
,int cg_start
,int a_start
)
2560 /* Clear the whole list without searching */
2561 ga2la_clear(dd
->ga2la
);
2565 for(i
=a_start
; i
<dd
->nat_tot
; i
++)
2567 ga2la_del(dd
->ga2la
,dd
->gatindex
[i
]);
2571 bLocalCG
= dd
->comm
->bLocalCG
;
2574 for(i
=cg_start
; i
<dd
->ncg_tot
; i
++)
2576 bLocalCG
[dd
->index_gl
[i
]] = FALSE
;
2580 dd_clear_local_vsite_indices(dd
);
2582 if (dd
->constraints
)
2584 dd_clear_local_constraint_indices(dd
);
2588 static real
grid_jump_limit(gmx_domdec_comm_t
*comm
,int dim_ind
)
2590 real grid_jump_limit
;
2592 /* The distance between the boundaries of cells at distance
2593 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2594 * and by the fact that cells should not be shifted by more than
2595 * half their size, such that cg's only shift by one cell
2596 * at redecomposition.
2598 grid_jump_limit
= comm
->cellsize_limit
;
2599 if (!comm
->bVacDLBNoLimit
)
2601 grid_jump_limit
= max(grid_jump_limit
,
2602 comm
->cutoff
/comm
->cd
[dim_ind
].np
);
2605 return grid_jump_limit
;
2608 static void check_grid_jump(gmx_large_int_t step
,gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2610 gmx_domdec_comm_t
*comm
;
2616 for(d
=1; d
<dd
->ndim
; d
++)
2619 limit
= grid_jump_limit(comm
,d
);
2620 bfac
= ddbox
->box_size
[dim
];
2621 if (ddbox
->tric_dir
[dim
])
2623 bfac
*= ddbox
->skew_fac
[dim
];
2625 if ((comm
->cell_f1
[d
] - comm
->cell_f_max0
[d
])*bfac
< limit
||
2626 (comm
->cell_f0
[d
] - comm
->cell_f_min1
[d
])*bfac
> -limit
)
2629 gmx_fatal(FARGS
,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2630 gmx_step_str(step
,buf
),
2631 dim2char(dim
),dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
2636 static int dd_load_count(gmx_domdec_comm_t
*comm
)
2638 return (comm
->eFlop
? comm
->flop_n
: comm
->cycl_n
[ddCyclF
]);
2641 static float dd_force_load(gmx_domdec_comm_t
*comm
)
2648 if (comm
->eFlop
> 1)
2650 load
*= 1.0 + (comm
->eFlop
- 1)*(0.1*rand()/RAND_MAX
- 0.05);
2655 load
= comm
->cycl
[ddCyclF
];
2656 if (comm
->cycl_n
[ddCyclF
] > 1)
2658 /* Subtract the maximum of the last n cycle counts
2659 * to get rid of possible high counts due to other soures,
2660 * for instance system activity, that would otherwise
2661 * affect the dynamic load balancing.
2663 load
-= comm
->cycl_max
[ddCyclF
];
2670 static void set_slb_pme_dim_f(gmx_domdec_t
*dd
,int dim
,real
**dim_f
)
2672 gmx_domdec_comm_t
*comm
;
2677 snew(*dim_f
,dd
->nc
[dim
]+1);
2679 for(i
=1; i
<dd
->nc
[dim
]; i
++)
2681 if (comm
->slb_frac
[dim
])
2683 (*dim_f
)[i
] = (*dim_f
)[i
-1] + comm
->slb_frac
[dim
][i
-1];
2687 (*dim_f
)[i
] = (real
)i
/(real
)dd
->nc
[dim
];
2690 (*dim_f
)[dd
->nc
[dim
]] = 1;
2693 static void init_ddpme(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2694 int dimind
,int nslab
)
2696 int pmeindex
,slab
,nso
,i
;
2699 if (dd
->comm
->npmedecompdim
== 1 && dd
->dim
[0] == YY
)
2705 ddpme
->dim
= dimind
;
2707 ddpme
->dim_match
= (ddpme
->dim
== dd
->dim
[dimind
]);
2709 ddpme
->nslab
= nslab
;
2711 if (ddpme
->nslab
<= 1)
2716 nso
= dd
->comm
->npmenodes
/nslab
;
2717 /* Determine for each PME slab the PP location range for dimension dim */
2718 snew(ddpme
->pp_min
,nslab
);
2719 snew(ddpme
->pp_max
,nslab
);
2720 for(slab
=0; slab
<nslab
; slab
++) {
2721 ddpme
->pp_min
[slab
] = dd
->nc
[dd
->dim
[dimind
]] - 1;
2722 ddpme
->pp_max
[slab
] = 0;
2724 for(i
=0; i
<dd
->nnodes
; i
++) {
2725 ddindex2xyz(dd
->nc
,i
,xyz
);
2726 /* For y only use our y/z slab.
2727 * This assumes that the PME x grid size matches the DD grid size.
2729 if (dimind
== 0 || xyz
[XX
] == dd
->ci
[XX
]) {
2730 pmeindex
= ddindex2pmeindex(dd
,i
);
2732 slab
= pmeindex
/nso
;
2734 slab
= pmeindex
% nslab
;
2736 ddpme
->pp_min
[slab
] = min(ddpme
->pp_min
[slab
],xyz
[dimind
]);
2737 ddpme
->pp_max
[slab
] = max(ddpme
->pp_max
[slab
],xyz
[dimind
]);
2741 set_slb_pme_dim_f(dd
,ddpme
->dim
,&ddpme
->slb_dim_f
);
2744 int dd_pme_maxshift0(gmx_domdec_t
*dd
)
2746 return dd
->comm
->ddpme
[0].maxshift
;
2749 int dd_pme_maxshift1(gmx_domdec_t
*dd
)
2751 /* This should return the maxshift for dim Y,
2752 * where comm indexes ddpme with dimind.
2754 if (dd
->comm
->npmedecompdim
== 1 && dd
->dim
[0] == YY
)
2756 return dd
->comm
->ddpme
[0].maxshift
;
2760 return dd
->comm
->ddpme
[1].maxshift
;
2764 static void set_pme_maxshift(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2765 bool bUniform
,gmx_ddbox_t
*ddbox
,real
*cell_f
)
2767 gmx_domdec_comm_t
*comm
;
2770 real range
,pme_boundary
;
2774 nc
= dd
->nc
[ddpme
->dim
];
2777 if (!ddpme
->dim_match
)
2779 /* PP decomposition is not along dim: the worst situation */
2782 else if (ns
<= 3 || (bUniform
&& ns
== nc
))
2784 /* The optimal situation */
2789 /* We need to check for all pme nodes which nodes they
2790 * could possibly need to communicate with.
2792 xmin
= ddpme
->pp_min
;
2793 xmax
= ddpme
->pp_max
;
2794 /* Allow for atoms to be maximally 2/3 times the cut-off
2795 * out of their DD cell. This is a reasonable balance between
2796 * between performance and support for most charge-group/cut-off
2799 range
= 2.0/3.0*comm
->cutoff
/ddbox
->box_size
[ddpme
->dim
];
2800 /* Avoid extra communication when we are exactly at a boundary */
2806 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2807 pme_boundary
= (real
)s
/ns
;
2810 cell_f
[xmax
[s
-(sh
+1) ]+1] + range
> pme_boundary
) ||
2812 cell_f
[xmax
[s
-(sh
+1)+ns
]+1] - 1 + range
> pme_boundary
)))
2816 pme_boundary
= (real
)(s
+1)/ns
;
2819 cell_f
[xmin
[s
+(sh
+1) ] ] - range
< pme_boundary
) ||
2821 cell_f
[xmin
[s
+(sh
+1)-ns
] ] + 1 - range
< pme_boundary
)))
2828 ddpme
->maxshift
= sh
;
2832 fprintf(debug
,"PME slab communication range for dim %d is %d\n",
2833 ddpme
->dim
,ddpme
->maxshift
);
2837 static void check_box_size(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2841 for(d
=0; d
<dd
->ndim
; d
++)
2844 if (dim
< ddbox
->nboundeddim
&&
2845 ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
] <
2846 dd
->nc
[dim
]*dd
->comm
->cellsize_limit
*DD_CELL_MARGIN
)
2848 gmx_fatal(FARGS
,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2849 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
2850 dd
->nc
[dim
],dd
->comm
->cellsize_limit
);
2855 static void set_dd_cell_sizes_slb(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
2856 bool bMaster
,ivec npulse
)
2858 gmx_domdec_comm_t
*comm
;
2861 real
*cell_x
,cell_dx
,cellsize
;
2865 for(d
=0; d
<DIM
; d
++)
2867 cellsize_min
[d
] = ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
];
2869 if (dd
->nc
[d
] == 1 || comm
->slb_frac
[d
] == NULL
)
2872 cell_dx
= ddbox
->box_size
[d
]/dd
->nc
[d
];
2875 for(j
=0; j
<dd
->nc
[d
]+1; j
++)
2877 dd
->ma
->cell_x
[d
][j
] = ddbox
->box0
[d
] + j
*cell_dx
;
2882 comm
->cell_x0
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
] )*cell_dx
;
2883 comm
->cell_x1
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
]+1)*cell_dx
;
2885 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2886 while (cellsize
*npulse
[d
] < comm
->cutoff
&& npulse
[d
] < dd
->nc
[d
]-1)
2890 cellsize_min
[d
] = cellsize
;
2894 /* Statically load balanced grid */
2895 /* Also when we are not doing a master distribution we determine
2896 * all cell borders in a loop to obtain identical values
2897 * to the master distribution case and to determine npulse.
2901 cell_x
= dd
->ma
->cell_x
[d
];
2905 snew(cell_x
,dd
->nc
[d
]+1);
2907 cell_x
[0] = ddbox
->box0
[d
];
2908 for(j
=0; j
<dd
->nc
[d
]; j
++)
2910 cell_dx
= ddbox
->box_size
[d
]*comm
->slb_frac
[d
][j
];
2911 cell_x
[j
+1] = cell_x
[j
] + cell_dx
;
2912 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2913 while (cellsize
*npulse
[d
] < comm
->cutoff
&&
2914 npulse
[d
] < dd
->nc
[d
]-1)
2918 cellsize_min
[d
] = min(cellsize_min
[d
],cellsize
);
2922 comm
->cell_x0
[d
] = cell_x
[dd
->ci
[d
]];
2923 comm
->cell_x1
[d
] = cell_x
[dd
->ci
[d
]+1];
2927 /* The following limitation is to avoid that a cell would receive
2928 * some of its own home charge groups back over the periodic boundary.
2929 * Double charge groups cause trouble with the global indices.
2931 if (d
< ddbox
->npbcdim
&&
2932 dd
->nc
[d
] > 1 && npulse
[d
] >= dd
->nc
[d
])
2934 gmx_fatal_collective(FARGS
,NULL
,dd
,
2935 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2936 dim2char(d
),ddbox
->box_size
[d
],ddbox
->skew_fac
[d
],
2938 dd
->nc
[d
],dd
->nc
[d
],
2939 dd
->nnodes
> dd
->nc
[d
] ? "cells" : "processors");
2943 if (!comm
->bDynLoadBal
)
2945 copy_rvec(cellsize_min
,comm
->cellsize_min
);
2948 for(d
=0; d
<comm
->npmedecompdim
; d
++)
2950 set_pme_maxshift(dd
,&comm
->ddpme
[d
],
2951 comm
->slb_frac
[dd
->dim
[d
]]==NULL
,ddbox
,
2952 comm
->ddpme
[d
].slb_dim_f
);
2957 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t
*dd
,
2958 int d
,int dim
,gmx_domdec_root_t
*root
,
2960 bool bUniform
,gmx_large_int_t step
, real cellsize_limit_f
, int range
[])
2962 gmx_domdec_comm_t
*comm
;
2963 int ncd
,i
,j
,nmin
,nmin_old
;
2966 real fac
,halfway
,cellsize_limit_f_i
,region_size
;
2967 bool bPBC
,bLastHi
=FALSE
;
2968 int nrange
[]={range
[0],range
[1]};
2970 region_size
= root
->cell_f
[range
[1]]-root
->cell_f
[range
[0]];
2976 bPBC
= (dim
< ddbox
->npbcdim
);
2978 cell_size
= root
->buf_ncd
;
2982 fprintf(debug
,"enforce_limits: %d %d\n",range
[0],range
[1]);
2985 /* First we need to check if the scaling does not make cells
2986 * smaller than the smallest allowed size.
2987 * We need to do this iteratively, since if a cell is too small,
2988 * it needs to be enlarged, which makes all the other cells smaller,
2989 * which could in turn make another cell smaller than allowed.
2991 for(i
=range
[0]; i
<range
[1]; i
++)
2993 root
->bCellMin
[i
] = FALSE
;
2999 /* We need the total for normalization */
3001 for(i
=range
[0]; i
<range
[1]; i
++)
3003 if (root
->bCellMin
[i
] == FALSE
)
3005 fac
+= cell_size
[i
];
3008 fac
= ( region_size
- nmin
*cellsize_limit_f
)/fac
; /* substracting cells already set to cellsize_limit_f */
3009 /* Determine the cell boundaries */
3010 for(i
=range
[0]; i
<range
[1]; i
++)
3012 if (root
->bCellMin
[i
] == FALSE
)
3014 cell_size
[i
] *= fac
;
3015 if (!bPBC
&& (i
== 0 || i
== dd
->nc
[dim
] -1))
3017 cellsize_limit_f_i
= 0;
3021 cellsize_limit_f_i
= cellsize_limit_f
;
3023 if (cell_size
[i
] < cellsize_limit_f_i
)
3025 root
->bCellMin
[i
] = TRUE
;
3026 cell_size
[i
] = cellsize_limit_f_i
;
3030 root
->cell_f
[i
+1] = root
->cell_f
[i
] + cell_size
[i
];
3033 while (nmin
> nmin_old
);
3036 cell_size
[i
] = root
->cell_f
[i
+1] - root
->cell_f
[i
];
3037 /* For this check we should not use DD_CELL_MARGIN,
3038 * but a slightly smaller factor,
3039 * since rounding could get use below the limit.
3041 if (bPBC
&& cell_size
[i
] < cellsize_limit_f
*DD_CELL_MARGIN2
/DD_CELL_MARGIN
)
3044 gmx_fatal(FARGS
,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3045 gmx_step_str(step
,buf
),
3046 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
3047 ncd
,comm
->cellsize_min
[dim
]);
3050 root
->bLimited
= (nmin
> 0) || (range
[0]>0) || (range
[1]<ncd
);
3054 /* Check if the boundary did not displace more than halfway
3055 * each of the cells it bounds, as this could cause problems,
3056 * especially when the differences between cell sizes are large.
3057 * If changes are applied, they will not make cells smaller
3058 * than the cut-off, as we check all the boundaries which
3059 * might be affected by a change and if the old state was ok,
3060 * the cells will at most be shrunk back to their old size.
3062 for(i
=range
[0]+1; i
<range
[1]; i
++)
3064 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
-1]);
3065 if (root
->cell_f
[i
] < halfway
)
3067 root
->cell_f
[i
] = halfway
;
3068 /* Check if the change also causes shifts of the next boundaries */
3069 for(j
=i
+1; j
<range
[1]; j
++)
3071 if (root
->cell_f
[j
] < root
->cell_f
[j
-1] + cellsize_limit_f
)
3072 root
->cell_f
[j
] = root
->cell_f
[j
-1] + cellsize_limit_f
;
3075 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
+1]);
3076 if (root
->cell_f
[i
] > halfway
)
3078 root
->cell_f
[i
] = halfway
;
3079 /* Check if the change also causes shifts of the next boundaries */
3080 for(j
=i
-1; j
>=range
[0]+1; j
--)
3082 if (root
->cell_f
[j
] > root
->cell_f
[j
+1] - cellsize_limit_f
)
3083 root
->cell_f
[j
] = root
->cell_f
[j
+1] - cellsize_limit_f
;
3089 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3090 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3091 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3092 * for a and b nrange is used */
3095 /* Take care of the staggering of the cell boundaries */
3098 for(i
=range
[0]; i
<range
[1]; i
++)
3100 root
->cell_f_max0
[i
] = root
->cell_f
[i
];
3101 root
->cell_f_min1
[i
] = root
->cell_f
[i
+1];
3106 for(i
=range
[0]+1; i
<range
[1]; i
++)
3108 bLimLo
= (root
->cell_f
[i
] < root
->bound_min
[i
]);
3109 bLimHi
= (root
->cell_f
[i
] > root
->bound_max
[i
]);
3110 if (bLimLo
&& bLimHi
)
3112 /* Both limits violated, try the best we can */
3113 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3114 root
->cell_f
[i
] = 0.5*(root
->bound_min
[i
] + root
->bound_max
[i
]);
3117 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3121 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3127 /* root->cell_f[i] = root->bound_min[i]; */
3128 nrange
[1]=i
; /* only store violation location. There could be a LimLo violation following with an higher index */
3131 else if (bLimHi
&& !bLastHi
)
3134 if (nrange
[1] < range
[1]) /* found a LimLo before */
3136 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3137 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3138 nrange
[0]=nrange
[1];
3140 root
->cell_f
[i
] = root
->bound_max
[i
];
3142 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3147 if (nrange
[1] < range
[1]) /* found last a LimLo */
3149 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3150 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3151 nrange
[0]=nrange
[1];
3153 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3155 else if (nrange
[0] > range
[0]) /* found at least one LimHi */
3157 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3164 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t
*dd
,
3165 int d
,int dim
,gmx_domdec_root_t
*root
,
3166 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3167 bool bUniform
,gmx_large_int_t step
)
3169 gmx_domdec_comm_t
*comm
;
3172 real load_aver
,load_i
,imbalance
,change
,change_max
,sc
;
3173 real cellsize_limit_f
,dist_min_f
,dist_min_f_hard
,space
;
3174 real change_limit
= 0.1;
3177 int range
[] = { 0, 0 };
3183 bPBC
= (dim
< ddbox
->npbcdim
);
3185 cell_size
= root
->buf_ncd
;
3187 /* Store the original boundaries */
3188 for(i
=0; i
<ncd
+1; i
++)
3190 root
->old_cell_f
[i
] = root
->cell_f
[i
];
3193 for(i
=0; i
<ncd
; i
++)
3195 cell_size
[i
] = 1.0/ncd
;
3198 else if (dd_load_count(comm
))
3200 load_aver
= comm
->load
[d
].sum_m
/ncd
;
3202 for(i
=0; i
<ncd
; i
++)
3204 /* Determine the relative imbalance of cell i */
3205 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3206 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3207 /* Determine the change of the cell size using underrelaxation */
3208 change
= -relax
*imbalance
;
3209 change_max
= max(change_max
,max(change
,-change
));
3211 /* Limit the amount of scaling.
3212 * We need to use the same rescaling for all cells in one row,
3213 * otherwise the load balancing might not converge.
3216 if (change_max
> change_limit
)
3218 sc
*= change_limit
/change_max
;
3220 for(i
=0; i
<ncd
; i
++)
3222 /* Determine the relative imbalance of cell i */
3223 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3224 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3225 /* Determine the change of the cell size using underrelaxation */
3226 change
= -sc
*imbalance
;
3227 cell_size
[i
] = (root
->cell_f
[i
+1]-root
->cell_f
[i
])*(1 + change
);
3231 cellsize_limit_f
= comm
->cellsize_min
[dim
]/ddbox
->box_size
[dim
];
3232 cellsize_limit_f
*= DD_CELL_MARGIN
;
3233 dist_min_f_hard
= grid_jump_limit(comm
,d
)/ddbox
->box_size
[dim
];
3234 dist_min_f
= dist_min_f_hard
* DD_CELL_MARGIN
;
3235 if (ddbox
->tric_dir
[dim
])
3237 cellsize_limit_f
/= ddbox
->skew_fac
[dim
];
3238 dist_min_f
/= ddbox
->skew_fac
[dim
];
3240 if (bDynamicBox
&& d
> 0)
3242 dist_min_f
*= DD_PRES_SCALE_MARGIN
;
3244 if (d
> 0 && !bUniform
)
3246 /* Make sure that the grid is not shifted too much */
3247 for(i
=1; i
<ncd
; i
++) {
3248 if (root
->cell_f_min1
[i
] - root
->cell_f_max0
[i
-1] < 2 * dist_min_f_hard
)
3250 gmx_incons("Inconsistent DD boundary staggering limits!");
3252 root
->bound_min
[i
] = root
->cell_f_max0
[i
-1] + dist_min_f
;
3253 space
= root
->cell_f
[i
] - (root
->cell_f_max0
[i
-1] + dist_min_f
);
3255 root
->bound_min
[i
] += 0.5*space
;
3257 root
->bound_max
[i
] = root
->cell_f_min1
[i
] - dist_min_f
;
3258 space
= root
->cell_f
[i
] - (root
->cell_f_min1
[i
] - dist_min_f
);
3260 root
->bound_max
[i
] += 0.5*space
;
3265 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3267 root
->cell_f_max0
[i
-1] + dist_min_f
,
3268 root
->bound_min
[i
],root
->cell_f
[i
],root
->bound_max
[i
],
3269 root
->cell_f_min1
[i
] - dist_min_f
);
3274 root
->cell_f
[0] = 0;
3275 root
->cell_f
[ncd
] = 1;
3276 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, range
);
3279 /* After the checks above, the cells should obey the cut-off
3280 * restrictions, but it does not hurt to check.
3282 for(i
=0; i
<ncd
; i
++)
3286 fprintf(debug
,"Relative bounds dim %d cell %d: %f %f\n",
3287 dim
,i
,root
->cell_f
[i
],root
->cell_f
[i
+1]);
3290 if ((bPBC
|| (i
!= 0 && i
!= dd
->nc
[dim
]-1)) &&
3291 root
->cell_f
[i
+1] - root
->cell_f
[i
] <
3292 cellsize_limit_f
/DD_CELL_MARGIN
)
3296 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3297 gmx_step_str(step
,buf
),dim2char(dim
),i
,
3298 (root
->cell_f
[i
+1] - root
->cell_f
[i
])
3299 *ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]);
3304 /* Store the cell boundaries of the lower dimensions at the end */
3305 for(d1
=0; d1
<d
; d1
++)
3307 root
->cell_f
[pos
++] = comm
->cell_f0
[d1
];
3308 root
->cell_f
[pos
++] = comm
->cell_f1
[d1
];
3311 if (d
< comm
->npmedecompdim
)
3313 /* The master determines the maximum shift for
3314 * the coordinate communication between separate PME nodes.
3316 set_pme_maxshift(dd
,&comm
->ddpme
[d
],bUniform
,ddbox
,root
->cell_f
);
3318 root
->cell_f
[pos
++] = comm
->ddpme
[0].maxshift
;
3321 root
->cell_f
[pos
++] = comm
->ddpme
[1].maxshift
;
3325 static void relative_to_absolute_cell_bounds(gmx_domdec_t
*dd
,
3326 gmx_ddbox_t
*ddbox
,int dimind
)
3328 gmx_domdec_comm_t
*comm
;
3333 /* Set the cell dimensions */
3334 dim
= dd
->dim
[dimind
];
3335 comm
->cell_x0
[dim
] = comm
->cell_f0
[dimind
]*ddbox
->box_size
[dim
];
3336 comm
->cell_x1
[dim
] = comm
->cell_f1
[dimind
]*ddbox
->box_size
[dim
];
3337 if (dim
>= ddbox
->nboundeddim
)
3339 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3340 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3344 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3345 int d
,int dim
,real
*cell_f_row
,
3348 gmx_domdec_comm_t
*comm
;
3354 /* Each node would only need to know two fractions,
3355 * but it is probably cheaper to broadcast the whole array.
3357 MPI_Bcast(cell_f_row
,DD_CELL_F_SIZE(dd
,d
)*sizeof(real
),MPI_BYTE
,
3358 0,comm
->mpi_comm_load
[d
]);
3360 /* Copy the fractions for this dimension from the buffer */
3361 comm
->cell_f0
[d
] = cell_f_row
[dd
->ci
[dim
] ];
3362 comm
->cell_f1
[d
] = cell_f_row
[dd
->ci
[dim
]+1];
3363 /* The whole array was communicated, so set the buffer position */
3364 pos
= dd
->nc
[dim
] + 1;
3365 for(d1
=0; d1
<=d
; d1
++)
3369 /* Copy the cell fractions of the lower dimensions */
3370 comm
->cell_f0
[d1
] = cell_f_row
[pos
++];
3371 comm
->cell_f1
[d1
] = cell_f_row
[pos
++];
3373 relative_to_absolute_cell_bounds(dd
,ddbox
,d1
);
3375 /* Convert the communicated shift from float to int */
3376 comm
->ddpme
[0].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3379 comm
->ddpme
[1].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3383 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t
*dd
,
3384 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3385 bool bUniform
,gmx_large_int_t step
)
3387 gmx_domdec_comm_t
*comm
;
3389 bool bRowMember
,bRowRoot
;
3394 for(d
=0; d
<dd
->ndim
; d
++)
3399 for(d1
=d
; d1
<dd
->ndim
; d1
++)
3401 if (dd
->ci
[dd
->dim
[d1
]] > 0)
3414 set_dd_cell_sizes_dlb_root(dd
,d
,dim
,comm
->root
[d
],
3415 ddbox
,bDynamicBox
,bUniform
,step
);
3416 cell_f_row
= comm
->root
[d
]->cell_f
;
3420 cell_f_row
= comm
->cell_f_row
;
3422 distribute_dd_cell_sizes_dlb(dd
,d
,dim
,cell_f_row
,ddbox
);
3427 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
3431 /* This function assumes the box is static and should therefore
3432 * not be called when the box has changed since the last
3433 * call to dd_partition_system.
3435 for(d
=0; d
<dd
->ndim
; d
++)
3437 relative_to_absolute_cell_bounds(dd
,ddbox
,d
);
3443 static void set_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3444 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3445 bool bUniform
,bool bDoDLB
,gmx_large_int_t step
,
3446 gmx_wallcycle_t wcycle
)
3448 gmx_domdec_comm_t
*comm
;
3455 wallcycle_start(wcycle
,ewcDDCOMMBOUND
);
3456 set_dd_cell_sizes_dlb_change(dd
,ddbox
,bDynamicBox
,bUniform
,step
);
3457 wallcycle_stop(wcycle
,ewcDDCOMMBOUND
);
3459 else if (bDynamicBox
)
3461 set_dd_cell_sizes_dlb_nochange(dd
,ddbox
);
3464 /* Set the dimensions for which no DD is used */
3465 for(dim
=0; dim
<DIM
; dim
++) {
3466 if (dd
->nc
[dim
] == 1) {
3467 comm
->cell_x0
[dim
] = 0;
3468 comm
->cell_x1
[dim
] = ddbox
->box_size
[dim
];
3469 if (dim
>= ddbox
->nboundeddim
)
3471 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3472 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3478 static void realloc_comm_ind(gmx_domdec_t
*dd
,ivec npulse
)
3481 gmx_domdec_comm_dim_t
*cd
;
3483 for(d
=0; d
<dd
->ndim
; d
++)
3485 cd
= &dd
->comm
->cd
[d
];
3486 np
= npulse
[dd
->dim
[d
]];
3487 if (np
> cd
->np_nalloc
)
3491 fprintf(debug
,"(Re)allocing cd for %c to %d pulses\n",
3492 dim2char(dd
->dim
[d
]),np
);
3494 if (DDMASTER(dd
) && cd
->np_nalloc
> 0)
3496 fprintf(stderr
,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd
->dim
[d
]),np
);
3499 for(i
=cd
->np_nalloc
; i
<np
; i
++)
3501 cd
->ind
[i
].index
= NULL
;
3502 cd
->ind
[i
].nalloc
= 0;
3511 static void set_dd_cell_sizes(gmx_domdec_t
*dd
,
3512 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3513 bool bUniform
,bool bDoDLB
,gmx_large_int_t step
,
3514 gmx_wallcycle_t wcycle
)
3516 gmx_domdec_comm_t
*comm
;
3522 /* Copy the old cell boundaries for the cg displacement check */
3523 copy_rvec(comm
->cell_x0
,comm
->old_cell_x0
);
3524 copy_rvec(comm
->cell_x1
,comm
->old_cell_x1
);
3526 if (comm
->bDynLoadBal
)
3530 check_box_size(dd
,ddbox
);
3532 set_dd_cell_sizes_dlb(dd
,ddbox
,bDynamicBox
,bUniform
,bDoDLB
,step
,wcycle
);
3536 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,npulse
);
3537 realloc_comm_ind(dd
,npulse
);
3542 for(d
=0; d
<DIM
; d
++)
3544 fprintf(debug
,"cell_x[%d] %f - %f skew_fac %f\n",
3545 d
,comm
->cell_x0
[d
],comm
->cell_x1
[d
],ddbox
->skew_fac
[d
]);
3550 static void comm_dd_ns_cell_sizes(gmx_domdec_t
*dd
,
3552 rvec cell_ns_x0
,rvec cell_ns_x1
,
3553 gmx_large_int_t step
)
3555 gmx_domdec_comm_t
*comm
;
3560 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
3562 dim
= dd
->dim
[dim_ind
];
3564 /* Without PBC we don't have restrictions on the outer cells */
3565 if (!(dim
>= ddbox
->npbcdim
&&
3566 (dd
->ci
[dim
] == 0 || dd
->ci
[dim
] == dd
->nc
[dim
] - 1)) &&
3567 comm
->bDynLoadBal
&&
3568 (comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
])*ddbox
->skew_fac
[dim
] <
3569 comm
->cellsize_min
[dim
])
3572 gmx_fatal(FARGS
,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3573 gmx_step_str(step
,buf
),dim2char(dim
),
3574 comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
],
3575 ddbox
->skew_fac
[dim
],
3576 dd
->comm
->cellsize_min
[dim
],
3577 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
3581 if ((dd
->bGridJump
&& dd
->ndim
> 1) || ddbox
->nboundeddim
< DIM
)
3583 /* Communicate the boundaries and update cell_ns_x0/1 */
3584 dd_move_cellx(dd
,ddbox
,cell_ns_x0
,cell_ns_x1
);
3585 if (dd
->bGridJump
&& dd
->ndim
> 1)
3587 check_grid_jump(step
,dd
,ddbox
);
3592 static void make_tric_corr_matrix(int npbcdim
,matrix box
,matrix tcm
)
3596 tcm
[YY
][XX
] = -box
[YY
][XX
]/box
[YY
][YY
];
3604 tcm
[ZZ
][XX
] = -(box
[ZZ
][YY
]*tcm
[YY
][XX
] + box
[ZZ
][XX
])/box
[ZZ
][ZZ
];
3605 tcm
[ZZ
][YY
] = -box
[ZZ
][YY
]/box
[ZZ
][ZZ
];
3614 static void check_screw_box(matrix box
)
3616 /* Mathematical limitation */
3617 if (box
[YY
][XX
] != 0 || box
[ZZ
][XX
] != 0)
3619 gmx_fatal(FARGS
,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3622 /* Limitation due to the asymmetry of the eighth shell method */
3623 if (box
[ZZ
][YY
] != 0)
3625 gmx_fatal(FARGS
,"pbc=screw with non-zero box_zy is not supported");
3629 static void distribute_cg(FILE *fplog
,gmx_large_int_t step
,
3630 matrix box
,ivec tric_dir
,t_block
*cgs
,rvec pos
[],
3633 gmx_domdec_master_t
*ma
;
3634 int **tmp_ind
=NULL
,*tmp_nalloc
=NULL
;
3635 int i
,icg
,j
,k
,k0
,k1
,d
,npbcdim
;
3637 rvec box_size
,cg_cm
;
3639 real nrcg
,inv_ncg
,pos_d
;
3641 bool bUnbounded
,bScrew
;
3645 if (tmp_ind
== NULL
)
3647 snew(tmp_nalloc
,dd
->nnodes
);
3648 snew(tmp_ind
,dd
->nnodes
);
3649 for(i
=0; i
<dd
->nnodes
; i
++)
3651 tmp_nalloc
[i
] = over_alloc_large(cgs
->nr
/dd
->nnodes
+1);
3652 snew(tmp_ind
[i
],tmp_nalloc
[i
]);
3656 /* Clear the count */
3657 for(i
=0; i
<dd
->nnodes
; i
++)
3663 make_tric_corr_matrix(dd
->npbcdim
,box
,tcm
);
3665 cgindex
= cgs
->index
;
3667 /* Compute the center of geometry for all charge groups */
3668 for(icg
=0; icg
<cgs
->nr
; icg
++)
3671 k1
= cgindex
[icg
+1];
3675 copy_rvec(pos
[k0
],cg_cm
);
3682 for(k
=k0
; (k
<k1
); k
++)
3684 rvec_inc(cg_cm
,pos
[k
]);
3686 for(d
=0; (d
<DIM
); d
++)
3688 cg_cm
[d
] *= inv_ncg
;
3691 /* Put the charge group in the box and determine the cell index */
3692 for(d
=DIM
-1; d
>=0; d
--) {
3694 if (d
< dd
->npbcdim
)
3696 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
3697 if (tric_dir
[d
] && dd
->nc
[d
] > 1)
3699 /* Use triclinic coordintates for this dimension */
3700 for(j
=d
+1; j
<DIM
; j
++)
3702 pos_d
+= cg_cm
[j
]*tcm
[j
][d
];
3705 while(pos_d
>= box
[d
][d
])
3708 rvec_dec(cg_cm
,box
[d
]);
3711 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3712 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3714 for(k
=k0
; (k
<k1
); k
++)
3716 rvec_dec(pos
[k
],box
[d
]);
3719 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3720 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3727 rvec_inc(cg_cm
,box
[d
]);
3730 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3731 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3733 for(k
=k0
; (k
<k1
); k
++)
3735 rvec_inc(pos
[k
],box
[d
]);
3737 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3738 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3743 /* This could be done more efficiently */
3745 while(ind
[d
]+1 < dd
->nc
[d
] && pos_d
>= ma
->cell_x
[d
][ind
[d
]+1])
3750 i
= dd_index(dd
->nc
,ind
);
3751 if (ma
->ncg
[i
] == tmp_nalloc
[i
])
3753 tmp_nalloc
[i
] = over_alloc_large(ma
->ncg
[i
]+1);
3754 srenew(tmp_ind
[i
],tmp_nalloc
[i
]);
3756 tmp_ind
[i
][ma
->ncg
[i
]] = icg
;
3758 ma
->nat
[i
] += cgindex
[icg
+1] - cgindex
[icg
];
3762 for(i
=0; i
<dd
->nnodes
; i
++)
3765 for(k
=0; k
<ma
->ncg
[i
]; k
++)
3767 ma
->cg
[k1
++] = tmp_ind
[i
][k
];
3770 ma
->index
[dd
->nnodes
] = k1
;
3772 for(i
=0; i
<dd
->nnodes
; i
++)
3782 fprintf(fplog
,"Charge group distribution at step %s:",
3783 gmx_step_str(step
,buf
));
3784 for(i
=0; i
<dd
->nnodes
; i
++)
3786 fprintf(fplog
," %d",ma
->ncg
[i
]);
3788 fprintf(fplog
,"\n");
3792 static void get_cg_distribution(FILE *fplog
,gmx_large_int_t step
,gmx_domdec_t
*dd
,
3793 t_block
*cgs
,matrix box
,gmx_ddbox_t
*ddbox
,
3796 gmx_domdec_master_t
*ma
=NULL
;
3799 int *ibuf
,buf2
[2] = { 0, 0 };
3807 check_screw_box(box
);
3810 set_dd_cell_sizes_slb(dd
,ddbox
,TRUE
,npulse
);
3812 distribute_cg(fplog
,step
,box
,ddbox
->tric_dir
,cgs
,pos
,dd
);
3813 for(i
=0; i
<dd
->nnodes
; i
++)
3815 ma
->ibuf
[2*i
] = ma
->ncg
[i
];
3816 ma
->ibuf
[2*i
+1] = ma
->nat
[i
];
3824 dd_scatter(dd
,2*sizeof(int),ibuf
,buf2
);
3826 dd
->ncg_home
= buf2
[0];
3827 dd
->nat_home
= buf2
[1];
3828 dd
->ncg_tot
= dd
->ncg_home
;
3829 dd
->nat_tot
= dd
->nat_home
;
3830 if (dd
->ncg_home
> dd
->cg_nalloc
|| dd
->cg_nalloc
== 0)
3832 dd
->cg_nalloc
= over_alloc_dd(dd
->ncg_home
);
3833 srenew(dd
->index_gl
,dd
->cg_nalloc
);
3834 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
3838 for(i
=0; i
<dd
->nnodes
; i
++)
3840 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
3841 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
3846 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
3847 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
3848 DDMASTER(dd
) ? ma
->cg
: NULL
,
3849 dd
->ncg_home
*sizeof(int),dd
->index_gl
);
3851 /* Determine the home charge group sizes */
3853 for(i
=0; i
<dd
->ncg_home
; i
++)
3855 cg_gl
= dd
->index_gl
[i
];
3857 dd
->cgindex
[i
] + cgs
->index
[cg_gl
+1] - cgs
->index
[cg_gl
];
3862 fprintf(debug
,"Home charge groups:\n");
3863 for(i
=0; i
<dd
->ncg_home
; i
++)
3865 fprintf(debug
," %d",dd
->index_gl
[i
]);
3867 fprintf(debug
,"\n");
3869 fprintf(debug
,"\n");
3873 static int compact_and_copy_vec_at(int ncg
,int *move
,
3876 rvec
*src
,gmx_domdec_comm_t
*comm
,
3879 int m
,icg
,i
,i0
,i1
,nrcg
;
3885 for(m
=0; m
<DIM
*2; m
++)
3891 for(icg
=0; icg
<ncg
; icg
++)
3893 i1
= cgindex
[icg
+1];
3899 /* Compact the home array in place */
3900 for(i
=i0
; i
<i1
; i
++)
3902 copy_rvec(src
[i
],src
[home_pos
++]);
3908 /* Copy to the communication buffer */
3910 pos_vec
[m
] += 1 + vec
*nrcg
;
3911 for(i
=i0
; i
<i1
; i
++)
3913 copy_rvec(src
[i
],comm
->cgcm_state
[m
][pos_vec
[m
]++]);
3915 pos_vec
[m
] += (nvec
- vec
- 1)*nrcg
;
3919 home_pos
+= i1
- i0
;
3927 static int compact_and_copy_vec_cg(int ncg
,int *move
,
3929 int nvec
,rvec
*src
,gmx_domdec_comm_t
*comm
,
3932 int m
,icg
,i0
,i1
,nrcg
;
3938 for(m
=0; m
<DIM
*2; m
++)
3944 for(icg
=0; icg
<ncg
; icg
++)
3946 i1
= cgindex
[icg
+1];
3952 /* Compact the home array in place */
3953 copy_rvec(src
[icg
],src
[home_pos
++]);
3959 /* Copy to the communication buffer */
3960 copy_rvec(src
[icg
],comm
->cgcm_state
[m
][pos_vec
[m
]]);
3961 pos_vec
[m
] += 1 + nrcg
*nvec
;
3973 static int compact_ind(int ncg
,int *move
,
3974 int *index_gl
,int *cgindex
,
3976 gmx_ga2la_t ga2la
,char *bLocalCG
,
3979 int cg
,nat
,a0
,a1
,a
,a_gl
;
3984 for(cg
=0; cg
<ncg
; cg
++)
3990 /* Compact the home arrays in place.
3991 * Anything that can be done here avoids access to global arrays.
3993 cgindex
[home_pos
] = nat
;
3994 for(a
=a0
; a
<a1
; a
++)
3997 gatindex
[nat
] = a_gl
;
3998 /* The cell number stays 0, so we don't need to set it */
3999 ga2la_change_la(ga2la
,a_gl
,nat
);
4002 index_gl
[home_pos
] = index_gl
[cg
];
4003 cginfo
[home_pos
] = cginfo
[cg
];
4004 /* The charge group remains local, so bLocalCG does not change */
4009 /* Clear the global indices */
4010 for(a
=a0
; a
<a1
; a
++)
4012 ga2la_del(ga2la
,gatindex
[a
]);
4016 bLocalCG
[index_gl
[cg
]] = FALSE
;
4020 cgindex
[home_pos
] = nat
;
4025 static void clear_and_mark_ind(int ncg
,int *move
,
4026 int *index_gl
,int *cgindex
,int *gatindex
,
4027 gmx_ga2la_t ga2la
,char *bLocalCG
,
4032 for(cg
=0; cg
<ncg
; cg
++)
4038 /* Clear the global indices */
4039 for(a
=a0
; a
<a1
; a
++)
4041 ga2la_del(ga2la
,gatindex
[a
]);
4045 bLocalCG
[index_gl
[cg
]] = FALSE
;
4047 /* Signal that this cg has moved using the ns cell index.
4048 * Here we set it to -1.
4049 * fill_grid will change it from -1 to 4*grid->ncells.
4051 cell_index
[cg
] = -1;
4056 static void print_cg_move(FILE *fplog
,
4058 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4059 bool bHaveLimitdAndCMOld
,real limitd
,
4060 rvec cm_old
,rvec cm_new
,real pos_d
)
4062 gmx_domdec_comm_t
*comm
;
4067 fprintf(fplog
,"\nStep %s:\n",gmx_step_str(step
,buf
));
4068 if (bHaveLimitdAndCMOld
)
4070 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4071 ddglatnr(dd
,dd
->cgindex
[cg
]),limitd
,dim2char(dim
));
4075 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4076 ddglatnr(dd
,dd
->cgindex
[cg
]),dim2char(dim
));
4078 fprintf(fplog
,"distance out of cell %f\n",
4079 dir
==1 ? pos_d
- comm
->cell_x1
[dim
] : pos_d
- comm
->cell_x0
[dim
]);
4080 if (bHaveLimitdAndCMOld
)
4082 fprintf(fplog
,"Old coordinates: %8.3f %8.3f %8.3f\n",
4083 cm_old
[XX
],cm_old
[YY
],cm_old
[ZZ
]);
4085 fprintf(fplog
,"New coordinates: %8.3f %8.3f %8.3f\n",
4086 cm_new
[XX
],cm_new
[YY
],cm_new
[ZZ
]);
4087 fprintf(fplog
,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4089 comm
->old_cell_x0
[dim
],comm
->old_cell_x1
[dim
]);
4090 fprintf(fplog
,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4092 comm
->cell_x0
[dim
],comm
->cell_x1
[dim
]);
4095 static void cg_move_error(FILE *fplog
,
4097 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4098 bool bHaveLimitdAndCMOld
,real limitd
,
4099 rvec cm_old
,rvec cm_new
,real pos_d
)
4103 print_cg_move(fplog
, dd
,step
,cg
,dim
,dir
,
4104 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4106 print_cg_move(stderr
,dd
,step
,cg
,dim
,dir
,
4107 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4109 "A charge group moved too far between two domain decomposition steps\n"
4110 "This usually means that your system is not well equilibrated");
4113 static void rotate_state_atom(t_state
*state
,int a
)
4117 for(est
=0; est
<estNR
; est
++)
4119 if (EST_DISTR(est
) && state
->flags
& (1<<est
)) {
4122 /* Rotate the complete state; for a rectangular box only */
4123 state
->x
[a
][YY
] = state
->box
[YY
][YY
] - state
->x
[a
][YY
];
4124 state
->x
[a
][ZZ
] = state
->box
[ZZ
][ZZ
] - state
->x
[a
][ZZ
];
4127 state
->v
[a
][YY
] = -state
->v
[a
][YY
];
4128 state
->v
[a
][ZZ
] = -state
->v
[a
][ZZ
];
4131 state
->sd_X
[a
][YY
] = -state
->sd_X
[a
][YY
];
4132 state
->sd_X
[a
][ZZ
] = -state
->sd_X
[a
][ZZ
];
4135 state
->cg_p
[a
][YY
] = -state
->cg_p
[a
][YY
];
4136 state
->cg_p
[a
][ZZ
] = -state
->cg_p
[a
][ZZ
];
4138 case estDISRE_INITF
:
4139 case estDISRE_RM3TAV
:
4140 case estORIRE_INITF
:
4142 /* These are distances, so not affected by rotation */
4145 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4151 static int dd_redistribute_cg(FILE *fplog
,gmx_large_int_t step
,
4152 gmx_domdec_t
*dd
,ivec tric_dir
,
4153 t_state
*state
,rvec
**f
,
4154 t_forcerec
*fr
,t_mdatoms
*md
,
4160 int ncg
[DIM
*2],nat
[DIM
*2];
4161 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4162 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4163 int sbuf
[2],rbuf
[2];
4164 int home_pos_cg
,home_pos_at
,ncg_stay_home
,buf_pos
;
4166 bool bV
=FALSE
,bSDX
=FALSE
,bCGP
=FALSE
;
4171 rvec
*cg_cm
,cell_x0
,cell_x1
,limitd
,limit0
,limit1
,cm_new
;
4173 cginfo_mb_t
*cginfo_mb
;
4174 gmx_domdec_comm_t
*comm
;
4178 check_screw_box(state
->box
);
4184 for(i
=0; i
<estNR
; i
++)
4190 case estX
: /* Always present */ break;
4191 case estV
: bV
= (state
->flags
& (1<<i
)); break;
4192 case estSDX
: bSDX
= (state
->flags
& (1<<i
)); break;
4193 case estCGP
: bCGP
= (state
->flags
& (1<<i
)); break;
4196 case estDISRE_INITF
:
4197 case estDISRE_RM3TAV
:
4198 case estORIRE_INITF
:
4200 /* No processing required */
4203 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4208 if (dd
->ncg_tot
> comm
->nalloc_int
)
4210 comm
->nalloc_int
= over_alloc_dd(dd
->ncg_tot
);
4211 srenew(comm
->buf_int
,comm
->nalloc_int
);
4213 move
= comm
->buf_int
;
4215 /* Clear the count */
4216 for(c
=0; c
<dd
->ndim
*2; c
++)
4222 npbcdim
= dd
->npbcdim
;
4224 for(d
=0; (d
<DIM
); d
++)
4226 limitd
[d
] = dd
->comm
->cellsize_min
[d
];
4227 if (d
>= npbcdim
&& dd
->ci
[d
] == 0)
4229 cell_x0
[d
] = -GMX_FLOAT_MAX
;
4233 cell_x0
[d
] = comm
->cell_x0
[d
];
4235 if (d
>= npbcdim
&& dd
->ci
[d
] == dd
->nc
[d
] - 1)
4237 cell_x1
[d
] = GMX_FLOAT_MAX
;
4241 cell_x1
[d
] = comm
->cell_x1
[d
];
4245 limit0
[d
] = comm
->old_cell_x0
[d
] - limitd
[d
];
4246 limit1
[d
] = comm
->old_cell_x1
[d
] + limitd
[d
];
4250 /* We check after communication if a charge group moved
4251 * more than one cell. Set the pre-comm check limit to float_max.
4253 limit0
[d
] = -GMX_FLOAT_MAX
;
4254 limit1
[d
] = GMX_FLOAT_MAX
;
4258 make_tric_corr_matrix(npbcdim
,state
->box
,tcm
);
4260 cgindex
= dd
->cgindex
;
4262 /* Compute the center of geometry for all home charge groups
4263 * and put them in the box and determine where they should go.
4265 for(cg
=0; cg
<dd
->ncg_home
; cg
++)
4272 copy_rvec(state
->x
[k0
],cm_new
);
4279 for(k
=k0
; (k
<k1
); k
++)
4281 rvec_inc(cm_new
,state
->x
[k
]);
4283 for(d
=0; (d
<DIM
); d
++)
4285 cm_new
[d
] = inv_ncg
*cm_new
[d
];
4290 /* Do pbc and check DD cell boundary crossings */
4291 for(d
=DIM
-1; d
>=0; d
--)
4295 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
4296 /* Determine the location of this cg in lattice coordinates */
4300 for(d2
=d
+1; d2
<DIM
; d2
++)
4302 pos_d
+= cm_new
[d2
]*tcm
[d2
][d
];
4305 /* Put the charge group in the triclinic unit-cell */
4306 if (pos_d
>= cell_x1
[d
])
4308 if (pos_d
>= limit1
[d
])
4310 cg_move_error(fplog
,dd
,step
,cg
,d
,1,TRUE
,limitd
[d
],
4311 cg_cm
[cg
],cm_new
,pos_d
);
4314 if (dd
->ci
[d
] == dd
->nc
[d
] - 1)
4316 rvec_dec(cm_new
,state
->box
[d
]);
4319 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4320 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4322 for(k
=k0
; (k
<k1
); k
++)
4324 rvec_dec(state
->x
[k
],state
->box
[d
]);
4327 rotate_state_atom(state
,k
);
4332 else if (pos_d
< cell_x0
[d
])
4334 if (pos_d
< limit0
[d
])
4336 cg_move_error(fplog
,dd
,step
,cg
,d
,-1,TRUE
,limitd
[d
],
4337 cg_cm
[cg
],cm_new
,pos_d
);
4342 rvec_inc(cm_new
,state
->box
[d
]);
4345 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4346 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4348 for(k
=k0
; (k
<k1
); k
++)
4350 rvec_inc(state
->x
[k
],state
->box
[d
]);
4353 rotate_state_atom(state
,k
);
4359 else if (d
< npbcdim
)
4361 /* Put the charge group in the rectangular unit-cell */
4362 while (cm_new
[d
] >= state
->box
[d
][d
])
4364 rvec_dec(cm_new
,state
->box
[d
]);
4365 for(k
=k0
; (k
<k1
); k
++)
4367 rvec_dec(state
->x
[k
],state
->box
[d
]);
4370 while (cm_new
[d
] < 0)
4372 rvec_inc(cm_new
,state
->box
[d
]);
4373 for(k
=k0
; (k
<k1
); k
++)
4375 rvec_inc(state
->x
[k
],state
->box
[d
]);
4381 copy_rvec(cm_new
,cg_cm
[cg
]);
4383 /* Determine where this cg should go */
4386 for(d
=0; d
<dd
->ndim
; d
++)
4391 flag
|= DD_FLAG_FW(d
);
4397 else if (dev
[dim
] == -1)
4399 flag
|= DD_FLAG_BW(d
);
4401 if (dd
->nc
[dim
] > 2)
4415 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4417 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4418 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4420 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
] = dd
->index_gl
[cg
];
4421 /* We store the cg size in the lower 16 bits
4422 * and the place where the charge group should go
4423 * in the next 6 bits. This saves some communication volume.
4425 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
+1] = nrcg
| flag
;
4431 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
4432 inc_nrnb(nrnb
,eNR_RESETX
,dd
->ncg_home
);
4448 /* Make sure the communication buffers are large enough */
4449 for(mc
=0; mc
<dd
->ndim
*2; mc
++)
4451 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4452 if (nvr
> comm
->cgcm_state_nalloc
[mc
])
4454 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
);
4455 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4459 /* Recalculating cg_cm might be cheaper than communicating,
4460 * but that could give rise to rounding issues.
4463 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4464 nvec
,cg_cm
,comm
,bCompact
);
4468 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4469 nvec
,vec
++,state
->x
,comm
,bCompact
);
4472 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4473 nvec
,vec
++,state
->v
,comm
,bCompact
);
4477 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4478 nvec
,vec
++,state
->sd_X
,comm
,bCompact
);
4482 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4483 nvec
,vec
++,state
->cg_p
,comm
,bCompact
);
4488 compact_ind(dd
->ncg_home
,move
,
4489 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4490 dd
->ga2la
,comm
->bLocalCG
,
4495 clear_and_mark_ind(dd
->ncg_home
,move
,
4496 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4497 dd
->ga2la
,comm
->bLocalCG
,
4498 fr
->ns
.grid
->cell_index
);
4501 cginfo_mb
= fr
->cginfo_mb
;
4503 ncg_stay_home
= home_pos_cg
;
4504 for(d
=0; d
<dd
->ndim
; d
++)
4510 for(dir
=0; dir
<(dd
->nc
[dim
]==2 ? 1 : 2); dir
++)
4513 /* Communicate the cg and atom counts */
4518 fprintf(debug
,"Sending ddim %d dir %d: ncg %d nat %d\n",
4519 d
,dir
,sbuf
[0],sbuf
[1]);
4521 dd_sendrecv_int(dd
, d
, dir
, sbuf
, 2, rbuf
, 2);
4523 if ((ncg_recv
+rbuf
[0])*DD_CGIBS
> comm
->nalloc_int
)
4525 comm
->nalloc_int
= over_alloc_dd((ncg_recv
+rbuf
[0])*DD_CGIBS
);
4526 srenew(comm
->buf_int
,comm
->nalloc_int
);
4529 /* Communicate the charge group indices, sizes and flags */
4530 dd_sendrecv_int(dd
, d
, dir
,
4531 comm
->cggl_flag
[cdd
], sbuf
[0]*DD_CGIBS
,
4532 comm
->buf_int
+ncg_recv
*DD_CGIBS
, rbuf
[0]*DD_CGIBS
);
4534 nvs
= ncg
[cdd
] + nat
[cdd
]*nvec
;
4535 i
= rbuf
[0] + rbuf
[1] *nvec
;
4536 vec_rvec_check_alloc(&comm
->vbuf
,nvr
+i
);
4538 /* Communicate cgcm and state */
4539 dd_sendrecv_rvec(dd
, d
, dir
,
4540 comm
->cgcm_state
[cdd
], nvs
,
4541 comm
->vbuf
.v
+nvr
, i
);
4542 ncg_recv
+= rbuf
[0];
4543 nat_recv
+= rbuf
[1];
4547 /* Process the received charge groups */
4549 for(cg
=0; cg
<ncg_recv
; cg
++)
4551 flag
= comm
->buf_int
[cg
*DD_CGIBS
+1];
4553 if (dim
>= npbcdim
&& dd
->nc
[dim
] > 2)
4555 /* No pbc in this dim and more than one domain boundary.
4556 * We to a separate check if a charge did not move too far.
4558 if (((flag
& DD_FLAG_FW(d
)) &&
4559 comm
->vbuf
.v
[buf_pos
][d
] > cell_x1
[dim
]) ||
4560 ((flag
& DD_FLAG_BW(d
)) &&
4561 comm
->vbuf
.v
[buf_pos
][d
] < cell_x0
[dim
]))
4563 cg_move_error(fplog
,dd
,step
,cg
,d
,
4564 (flag
& DD_FLAG_FW(d
)) ? 1 : 0,
4566 comm
->vbuf
.v
[buf_pos
],
4567 comm
->vbuf
.v
[buf_pos
],
4568 comm
->vbuf
.v
[buf_pos
][d
]);
4575 /* Check which direction this cg should go */
4576 for(d2
=d
+1; (d2
<dd
->ndim
&& mc
==-1); d2
++)
4580 /* The cell boundaries for dimension d2 are not equal
4581 * for each cell row of the lower dimension(s),
4582 * therefore we might need to redetermine where
4583 * this cg should go.
4586 /* If this cg crosses the box boundary in dimension d2
4587 * we can use the communicated flag, so we do not
4588 * have to worry about pbc.
4590 if (!((dd
->ci
[dim2
] == dd
->nc
[dim2
]-1 &&
4591 (flag
& DD_FLAG_FW(d2
))) ||
4592 (dd
->ci
[dim2
] == 0 &&
4593 (flag
& DD_FLAG_BW(d2
)))))
4595 /* Clear the two flags for this dimension */
4596 flag
&= ~(DD_FLAG_FW(d2
) | DD_FLAG_BW(d2
));
4597 /* Determine the location of this cg
4598 * in lattice coordinates
4600 pos_d
= comm
->vbuf
.v
[buf_pos
][dim2
];
4603 for(d3
=dim2
+1; d3
<DIM
; d3
++)
4606 comm
->vbuf
.v
[buf_pos
][d3
]*tcm
[d3
][dim2
];
4609 /* Check of we are not at the box edge.
4610 * pbc is only handled in the first step above,
4611 * but this check could move over pbc while
4612 * the first step did not due to different rounding.
4614 if (pos_d
>= cell_x1
[dim2
] &&
4615 dd
->ci
[dim2
] != dd
->nc
[dim2
]-1)
4617 flag
|= DD_FLAG_FW(d2
);
4619 else if (pos_d
< cell_x0
[dim2
] &&
4622 flag
|= DD_FLAG_BW(d2
);
4624 comm
->buf_int
[cg
*DD_CGIBS
+1] = flag
;
4627 /* Set to which neighboring cell this cg should go */
4628 if (flag
& DD_FLAG_FW(d2
))
4632 else if (flag
& DD_FLAG_BW(d2
))
4634 if (dd
->nc
[dd
->dim
[d2
]] > 2)
4646 nrcg
= flag
& DD_FLAG_NRCG
;
4649 if (home_pos_cg
+1 > dd
->cg_nalloc
)
4651 dd
->cg_nalloc
= over_alloc_dd(home_pos_cg
+1);
4652 srenew(dd
->index_gl
,dd
->cg_nalloc
);
4653 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
4655 /* Set the global charge group index and size */
4656 dd
->index_gl
[home_pos_cg
] = comm
->buf_int
[cg
*DD_CGIBS
];
4657 dd
->cgindex
[home_pos_cg
+1] = dd
->cgindex
[home_pos_cg
] + nrcg
;
4658 /* Copy the state from the buffer */
4659 if (home_pos_cg
>= fr
->cg_nalloc
)
4661 dd_realloc_fr_cg(fr
,home_pos_cg
+1);
4664 copy_rvec(comm
->vbuf
.v
[buf_pos
++],cg_cm
[home_pos_cg
]);
4665 /* Set the cginfo */
4666 fr
->cginfo
[home_pos_cg
] = ddcginfo(cginfo_mb
,
4667 dd
->index_gl
[home_pos_cg
]);
4670 comm
->bLocalCG
[dd
->index_gl
[home_pos_cg
]] = TRUE
;
4673 if (home_pos_at
+nrcg
> state
->nalloc
)
4675 dd_realloc_state(state
,f
,home_pos_at
+nrcg
);
4677 for(i
=0; i
<nrcg
; i
++)
4679 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4680 state
->x
[home_pos_at
+i
]);
4684 for(i
=0; i
<nrcg
; i
++)
4686 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4687 state
->v
[home_pos_at
+i
]);
4692 for(i
=0; i
<nrcg
; i
++)
4694 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4695 state
->sd_X
[home_pos_at
+i
]);
4700 for(i
=0; i
<nrcg
; i
++)
4702 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4703 state
->cg_p
[home_pos_at
+i
]);
4707 home_pos_at
+= nrcg
;
4711 /* Reallocate the buffers if necessary */
4712 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4714 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4715 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4717 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4718 if (nvr
+ 1 + nrcg
*nvec
> comm
->cgcm_state_nalloc
[mc
])
4720 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
+ 1 + nrcg
*nvec
);
4721 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4723 /* Copy from the receive to the send buffers */
4724 memcpy(comm
->cggl_flag
[mc
] + ncg
[mc
]*DD_CGIBS
,
4725 comm
->buf_int
+ cg
*DD_CGIBS
,
4726 DD_CGIBS
*sizeof(int));
4727 memcpy(comm
->cgcm_state
[mc
][nvr
],
4728 comm
->vbuf
.v
[buf_pos
],
4729 (1+nrcg
*nvec
)*sizeof(rvec
));
4730 buf_pos
+= 1 + nrcg
*nvec
;
4737 /* With sorting (!bCompact) the indices are now only partially up to date
4738 * and ncg_home and nat_home are not the real count, since there are
4739 * "holes" in the arrays for the charge groups that moved to neighbors.
4741 dd
->ncg_home
= home_pos_cg
;
4742 dd
->nat_home
= home_pos_at
;
4746 fprintf(debug
,"Finished repartitioning\n");
4749 return ncg_stay_home
;
4752 void dd_cycles_add(gmx_domdec_t
*dd
,float cycles
,int ddCycl
)
4754 dd
->comm
->cycl
[ddCycl
] += cycles
;
4755 dd
->comm
->cycl_n
[ddCycl
]++;
4756 if (cycles
> dd
->comm
->cycl_max
[ddCycl
])
4758 dd
->comm
->cycl_max
[ddCycl
] = cycles
;
4762 static double force_flop_count(t_nrnb
*nrnb
)
4769 for(i
=eNR_NBKERNEL010
; i
<eNR_NBKERNEL_FREE_ENERGY
; i
++)
4771 /* To get closer to the real timings, we half the count
4772 * for the normal loops and again half it for water loops.
4775 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4777 sum
+= nrnb
->n
[i
]*0.25*cost_nrnb(i
);
4781 sum
+= nrnb
->n
[i
]*0.50*cost_nrnb(i
);
4784 for(i
=eNR_NBKERNEL_FREE_ENERGY
; i
<=eNR_NB14
; i
++)
4787 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4788 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4790 for(i
=eNR_BONDS
; i
<=eNR_WALLS
; i
++)
4792 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4798 void dd_force_flop_start(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4800 if (dd
->comm
->eFlop
)
4802 dd
->comm
->flop
-= force_flop_count(nrnb
);
4805 void dd_force_flop_stop(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4807 if (dd
->comm
->eFlop
)
4809 dd
->comm
->flop
+= force_flop_count(nrnb
);
4814 static void clear_dd_cycle_counts(gmx_domdec_t
*dd
)
4818 for(i
=0; i
<ddCyclNr
; i
++)
4820 dd
->comm
->cycl
[i
] = 0;
4821 dd
->comm
->cycl_n
[i
] = 0;
4822 dd
->comm
->cycl_max
[i
] = 0;
4825 dd
->comm
->flop_n
= 0;
4828 static void get_load_distribution(gmx_domdec_t
*dd
,gmx_wallcycle_t wcycle
)
4830 gmx_domdec_comm_t
*comm
;
4831 gmx_domdec_load_t
*load
;
4832 gmx_domdec_root_t
*root
=NULL
;
4833 int d
,dim
,cid
,i
,pos
;
4834 float cell_frac
=0,sbuf
[DD_NLOAD_MAX
];
4839 fprintf(debug
,"get_load_distribution start\n");
4842 wallcycle_start(wcycle
,ewcDDCOMMLOAD
);
4846 bSepPME
= (dd
->pme_nodeid
>= 0);
4848 for(d
=dd
->ndim
-1; d
>=0; d
--)
4851 /* Check if we participate in the communication in this dimension */
4852 if (d
== dd
->ndim
-1 ||
4853 (dd
->ci
[dd
->dim
[d
+1]]==0 && dd
->ci
[dd
->dim
[dd
->ndim
-1]]==0))
4855 load
= &comm
->load
[d
];
4858 cell_frac
= comm
->cell_f1
[d
] - comm
->cell_f0
[d
];
4861 if (d
== dd
->ndim
-1)
4863 sbuf
[pos
++] = dd_force_load(comm
);
4864 sbuf
[pos
++] = sbuf
[0];
4867 sbuf
[pos
++] = sbuf
[0];
4868 sbuf
[pos
++] = cell_frac
;
4871 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4872 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4877 sbuf
[pos
++] = comm
->cycl
[ddCyclPPduringPME
];
4878 sbuf
[pos
++] = comm
->cycl
[ddCyclPME
];
4883 sbuf
[pos
++] = comm
->load
[d
+1].sum
;
4884 sbuf
[pos
++] = comm
->load
[d
+1].max
;
4887 sbuf
[pos
++] = comm
->load
[d
+1].sum_m
;
4888 sbuf
[pos
++] = comm
->load
[d
+1].cvol_min
*cell_frac
;
4889 sbuf
[pos
++] = comm
->load
[d
+1].flags
;
4892 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4893 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4898 sbuf
[pos
++] = comm
->load
[d
+1].mdf
;
4899 sbuf
[pos
++] = comm
->load
[d
+1].pme
;
4903 /* Communicate a row in DD direction d.
4904 * The communicators are setup such that the root always has rank 0.
4907 MPI_Gather(sbuf
,load
->nload
*sizeof(float),MPI_BYTE
,
4908 load
->load
,load
->nload
*sizeof(float),MPI_BYTE
,
4909 0,comm
->mpi_comm_load
[d
]);
4911 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
4913 /* We are the root, process this row */
4914 if (comm
->bDynLoadBal
)
4916 root
= comm
->root
[d
];
4926 for(i
=0; i
<dd
->nc
[dim
]; i
++)
4928 load
->sum
+= load
->load
[pos
++];
4929 load
->max
= max(load
->max
,load
->load
[pos
]);
4935 /* This direction could not be load balanced properly,
4936 * therefore we need to use the maximum iso the average load.
4938 load
->sum_m
= max(load
->sum_m
,load
->load
[pos
]);
4942 load
->sum_m
+= load
->load
[pos
];
4945 load
->cvol_min
= min(load
->cvol_min
,load
->load
[pos
]);
4949 load
->flags
= (int)(load
->load
[pos
++] + 0.5);
4953 root
->cell_f_max0
[i
] = load
->load
[pos
++];
4954 root
->cell_f_min1
[i
] = load
->load
[pos
++];
4959 load
->mdf
= max(load
->mdf
,load
->load
[pos
]);
4961 load
->pme
= max(load
->pme
,load
->load
[pos
]);
4965 if (comm
->bDynLoadBal
&& root
->bLimited
)
4967 load
->sum_m
*= dd
->nc
[dim
];
4968 load
->flags
|= (1<<d
);
4976 comm
->nload
+= dd_load_count(comm
);
4977 comm
->load_step
+= comm
->cycl
[ddCyclStep
];
4978 comm
->load_sum
+= comm
->load
[0].sum
;
4979 comm
->load_max
+= comm
->load
[0].max
;
4980 if (comm
->bDynLoadBal
)
4982 for(d
=0; d
<dd
->ndim
; d
++)
4984 if (comm
->load
[0].flags
& (1<<d
))
4986 comm
->load_lim
[d
]++;
4992 comm
->load_mdf
+= comm
->load
[0].mdf
;
4993 comm
->load_pme
+= comm
->load
[0].pme
;
4997 wallcycle_stop(wcycle
,ewcDDCOMMLOAD
);
5001 fprintf(debug
,"get_load_distribution finished\n");
5005 static float dd_force_imb_perf_loss(gmx_domdec_t
*dd
)
5007 /* Return the relative performance loss on the total run time
5008 * due to the force calculation load imbalance.
5010 if (dd
->comm
->nload
> 0)
5013 (dd
->comm
->load_max
*dd
->nnodes
- dd
->comm
->load_sum
)/
5014 (dd
->comm
->load_step
*dd
->nnodes
);
5022 static void print_dd_load_av(FILE *fplog
,gmx_domdec_t
*dd
)
5025 int npp
,npme
,nnodes
,d
,limp
;
5026 float imbal
,pme_f_ratio
,lossf
,lossp
=0;
5028 gmx_domdec_comm_t
*comm
;
5031 if (DDMASTER(dd
) && comm
->nload
> 0)
5034 npme
= (dd
->pme_nodeid
>= 0) ? comm
->npmenodes
: 0;
5035 nnodes
= npp
+ npme
;
5036 imbal
= comm
->load_max
*npp
/comm
->load_sum
- 1;
5037 lossf
= dd_force_imb_perf_loss(dd
);
5038 sprintf(buf
," Average load imbalance: %.1f %%\n",imbal
*100);
5039 fprintf(fplog
,"%s",buf
);
5040 fprintf(stderr
,"\n");
5041 fprintf(stderr
,"%s",buf
);
5042 sprintf(buf
," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf
*100);
5043 fprintf(fplog
,"%s",buf
);
5044 fprintf(stderr
,"%s",buf
);
5046 if (comm
->bDynLoadBal
)
5048 sprintf(buf
," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5049 for(d
=0; d
<dd
->ndim
; d
++)
5051 limp
= (200*comm
->load_lim
[d
]+1)/(2*comm
->nload
);
5052 sprintf(buf
+strlen(buf
)," %c %d %%",dim2char(dd
->dim
[d
]),limp
);
5058 sprintf(buf
+strlen(buf
),"\n");
5059 fprintf(fplog
,"%s",buf
);
5060 fprintf(stderr
,"%s",buf
);
5064 pme_f_ratio
= comm
->load_pme
/comm
->load_mdf
;
5065 lossp
= (comm
->load_pme
-comm
->load_mdf
)/comm
->load_step
;
5068 lossp
*= (float)npme
/(float)nnodes
;
5072 lossp
*= (float)npp
/(float)nnodes
;
5074 sprintf(buf
," Average PME mesh/force load: %5.3f\n",pme_f_ratio
);
5075 fprintf(fplog
,"%s",buf
);
5076 fprintf(stderr
,"%s",buf
);
5077 sprintf(buf
," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp
)*100);
5078 fprintf(fplog
,"%s",buf
);
5079 fprintf(stderr
,"%s",buf
);
5081 fprintf(fplog
,"\n");
5082 fprintf(stderr
,"\n");
5084 if (lossf
>= DD_PERF_LOSS
)
5087 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5088 " in the domain decomposition.\n",lossf
*100);
5089 if (!comm
->bDynLoadBal
)
5091 sprintf(buf
+strlen(buf
)," You might want to use dynamic load balancing (option -dlb.)\n");
5095 sprintf(buf
+strlen(buf
)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5097 fprintf(fplog
,"%s\n",buf
);
5098 fprintf(stderr
,"%s\n",buf
);
5100 if (npme
> 0 && fabs(lossp
) >= DD_PERF_LOSS
)
5103 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5104 " had %s work to do than the PP nodes.\n"
5105 " You might want to %s the number of PME nodes\n"
5106 " or %s the cut-off and the grid spacing.\n",
5108 (lossp
< 0) ? "less" : "more",
5109 (lossp
< 0) ? "decrease" : "increase",
5110 (lossp
< 0) ? "decrease" : "increase");
5111 fprintf(fplog
,"%s\n",buf
);
5112 fprintf(stderr
,"%s\n",buf
);
5117 static float dd_vol_min(gmx_domdec_t
*dd
)
5119 return dd
->comm
->load
[0].cvol_min
*dd
->nnodes
;
5122 static bool dd_load_flags(gmx_domdec_t
*dd
)
5124 return dd
->comm
->load
[0].flags
;
5127 static float dd_f_imbal(gmx_domdec_t
*dd
)
5129 return dd
->comm
->load
[0].max
*dd
->nnodes
/dd
->comm
->load
[0].sum
- 1;
5132 static float dd_pme_f_ratio(gmx_domdec_t
*dd
)
5134 return dd
->comm
->load
[0].pme
/dd
->comm
->load
[0].mdf
;
5137 static void dd_print_load(FILE *fplog
,gmx_domdec_t
*dd
,gmx_large_int_t step
)
5142 flags
= dd_load_flags(dd
);
5146 "DD load balancing is limited by minimum cell size in dimension");
5147 for(d
=0; d
<dd
->ndim
; d
++)
5151 fprintf(fplog
," %c",dim2char(dd
->dim
[d
]));
5154 fprintf(fplog
,"\n");
5156 fprintf(fplog
,"DD step %s",gmx_step_str(step
,buf
));
5157 if (dd
->comm
->bDynLoadBal
)
5159 fprintf(fplog
," vol min/aver %5.3f%c",
5160 dd_vol_min(dd
),flags
? '!' : ' ');
5162 fprintf(fplog
," load imb.: force %4.1f%%",dd_f_imbal(dd
)*100);
5163 if (dd
->comm
->cycl_n
[ddCyclPME
])
5165 fprintf(fplog
," pme mesh/force %5.3f",dd_pme_f_ratio(dd
));
5167 fprintf(fplog
,"\n\n");
5170 static void dd_print_load_verbose(gmx_domdec_t
*dd
)
5172 if (dd
->comm
->bDynLoadBal
)
5174 fprintf(stderr
,"vol %4.2f%c ",
5175 dd_vol_min(dd
),dd_load_flags(dd
) ? '!' : ' ');
5177 fprintf(stderr
,"imb F %2d%% ",(int)(dd_f_imbal(dd
)*100+0.5));
5178 if (dd
->comm
->cycl_n
[ddCyclPME
])
5180 fprintf(stderr
,"pme/F %4.2f ",dd_pme_f_ratio(dd
));
5185 static void make_load_communicator(gmx_domdec_t
*dd
,MPI_Group g_all
,
5186 int dim_ind
,ivec loc
)
5192 gmx_domdec_root_t
*root
;
5194 dim
= dd
->dim
[dim_ind
];
5195 copy_ivec(loc
,loc_c
);
5196 snew(rank
,dd
->nc
[dim
]);
5197 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5200 rank
[i
] = dd_index(dd
->nc
,loc_c
);
5202 /* Here we create a new group, that does not necessarily
5203 * include our process. But MPI_Comm_create needs to be
5204 * called by all the processes in the original communicator.
5205 * Calling MPI_Group_free afterwards gives errors, so I assume
5206 * also the group is needed by all processes. (B. Hess)
5208 MPI_Group_incl(g_all
,dd
->nc
[dim
],rank
,&g_row
);
5209 MPI_Comm_create(dd
->mpi_comm_all
,g_row
,&c_row
);
5210 if (c_row
!= MPI_COMM_NULL
)
5212 /* This process is part of the group */
5213 dd
->comm
->mpi_comm_load
[dim_ind
] = c_row
;
5214 if (dd
->comm
->eDLB
!= edlbNO
)
5216 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5218 /* This is the root process of this row */
5219 snew(dd
->comm
->root
[dim_ind
],1);
5220 root
= dd
->comm
->root
[dim_ind
];
5221 snew(root
->cell_f
,DD_CELL_F_SIZE(dd
,dim_ind
));
5222 snew(root
->old_cell_f
,dd
->nc
[dim
]+1);
5223 snew(root
->bCellMin
,dd
->nc
[dim
]);
5226 snew(root
->cell_f_max0
,dd
->nc
[dim
]);
5227 snew(root
->cell_f_min1
,dd
->nc
[dim
]);
5228 snew(root
->bound_min
,dd
->nc
[dim
]);
5229 snew(root
->bound_max
,dd
->nc
[dim
]);
5231 snew(root
->buf_ncd
,dd
->nc
[dim
]);
5235 /* This is not a root process, we only need to receive cell_f */
5236 snew(dd
->comm
->cell_f_row
,DD_CELL_F_SIZE(dd
,dim_ind
));
5239 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5241 snew(dd
->comm
->load
[dim_ind
].load
,dd
->nc
[dim
]*DD_NLOAD_MAX
);
5248 static void make_load_communicators(gmx_domdec_t
*dd
)
5256 fprintf(debug
,"Making load communicators\n");
5258 MPI_Comm_group(dd
->mpi_comm_all
,&g_all
);
5260 snew(dd
->comm
->load
,dd
->ndim
);
5261 snew(dd
->comm
->mpi_comm_load
,dd
->ndim
);
5264 make_load_communicator(dd
,g_all
,0,loc
);
5267 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5269 make_load_communicator(dd
,g_all
,1,loc
);
5274 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5277 for(j
=0; j
<dd
->nc
[dim1
]; j
++) {
5279 make_load_communicator(dd
,g_all
,2,loc
);
5284 MPI_Group_free(&g_all
);
5287 fprintf(debug
,"Finished making load communicators\n");
5291 void setup_dd_grid(FILE *fplog
,gmx_domdec_t
*dd
)
5297 ivec dd_zp
[DD_MAXIZONE
];
5298 gmx_domdec_zones_t
*zones
;
5299 gmx_domdec_ns_ranges_t
*izone
;
5301 for(d
=0; d
<dd
->ndim
; d
++)
5304 copy_ivec(dd
->ci
,tmp
);
5305 tmp
[dim
] = (tmp
[dim
] + 1) % dd
->nc
[dim
];
5306 dd
->neighbor
[d
][0] = ddcoord2ddnodeid(dd
,tmp
);
5307 copy_ivec(dd
->ci
,tmp
);
5308 tmp
[dim
] = (tmp
[dim
] - 1 + dd
->nc
[dim
]) % dd
->nc
[dim
];
5309 dd
->neighbor
[d
][1] = ddcoord2ddnodeid(dd
,tmp
);
5312 fprintf(debug
,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5315 dd
->neighbor
[d
][1]);
5321 fprintf(stderr
,"Making %dD domain decomposition %d x %d x %d\n",
5322 dd
->ndim
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5326 fprintf(fplog
,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5328 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],
5329 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5336 for(i
=0; i
<nzonep
; i
++)
5338 copy_ivec(dd_zp3
[i
],dd_zp
[i
]);
5344 for(i
=0; i
<nzonep
; i
++)
5346 copy_ivec(dd_zp2
[i
],dd_zp
[i
]);
5352 for(i
=0; i
<nzonep
; i
++)
5354 copy_ivec(dd_zp1
[i
],dd_zp
[i
]);
5358 gmx_fatal(FARGS
,"Can only do 1, 2 or 3D domain decomposition");
5363 zones
= &dd
->comm
->zones
;
5365 for(i
=0; i
<nzone
; i
++)
5368 clear_ivec(zones
->shift
[i
]);
5369 for(d
=0; d
<dd
->ndim
; d
++)
5371 zones
->shift
[i
][dd
->dim
[d
]] = dd_zo
[i
][m
++];
5376 for(i
=0; i
<nzone
; i
++)
5378 for(d
=0; d
<DIM
; d
++)
5380 s
[d
] = dd
->ci
[d
] - zones
->shift
[i
][d
];
5385 else if (s
[d
] >= dd
->nc
[d
])
5391 zones
->nizone
= nzonep
;
5392 for(i
=0; i
<zones
->nizone
; i
++)
5394 if (dd_zp
[i
][0] != i
)
5396 gmx_fatal(FARGS
,"Internal inconsistency in the dd grid setup");
5398 izone
= &zones
->izone
[i
];
5399 izone
->j0
= dd_zp
[i
][1];
5400 izone
->j1
= dd_zp
[i
][2];
5401 for(dim
=0; dim
<DIM
; dim
++)
5403 if (dd
->nc
[dim
] == 1)
5405 /* All shifts should be allowed */
5406 izone
->shift0
[dim
] = -1;
5407 izone
->shift1
[dim
] = 1;
5412 izone->shift0[d] = 0;
5413 izone->shift1[d] = 0;
5414 for(j=izone->j0; j<izone->j1; j++) {
5415 if (dd->shift[j][d] > dd->shift[i][d])
5416 izone->shift0[d] = -1;
5417 if (dd->shift[j][d] < dd->shift[i][d])
5418 izone->shift1[d] = 1;
5424 /* Assume the shift are not more than 1 cell */
5425 izone
->shift0
[dim
] = 1;
5426 izone
->shift1
[dim
] = -1;
5427 for(j
=izone
->j0
; j
<izone
->j1
; j
++)
5429 shift_diff
= zones
->shift
[j
][dim
] - zones
->shift
[i
][dim
];
5430 if (shift_diff
< izone
->shift0
[dim
])
5432 izone
->shift0
[dim
] = shift_diff
;
5434 if (shift_diff
> izone
->shift1
[dim
])
5436 izone
->shift1
[dim
] = shift_diff
;
5443 if (dd
->comm
->eDLB
!= edlbNO
)
5445 snew(dd
->comm
->root
,dd
->ndim
);
5448 if (dd
->comm
->bRecordLoad
)
5450 make_load_communicators(dd
);
5454 static void make_pp_communicator(FILE *fplog
,t_commrec
*cr
,int reorder
)
5457 gmx_domdec_comm_t
*comm
;
5468 if (comm
->bCartesianPP
)
5470 /* Set up cartesian communication for the particle-particle part */
5473 fprintf(fplog
,"Will use a Cartesian communicator: %d x %d x %d\n",
5474 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5477 for(i
=0; i
<DIM
; i
++)
5481 MPI_Cart_create(cr
->mpi_comm_mygroup
,DIM
,dd
->nc
,periods
,reorder
,
5483 /* We overwrite the old communicator with the new cartesian one */
5484 cr
->mpi_comm_mygroup
= comm_cart
;
5487 dd
->mpi_comm_all
= cr
->mpi_comm_mygroup
;
5488 MPI_Comm_rank(dd
->mpi_comm_all
,&dd
->rank
);
5490 if (comm
->bCartesianPP_PME
)
5492 /* Since we want to use the original cartesian setup for sim,
5493 * and not the one after split, we need to make an index.
5495 snew(comm
->ddindex2ddnodeid
,dd
->nnodes
);
5496 comm
->ddindex2ddnodeid
[dd_index(dd
->nc
,dd
->ci
)] = dd
->rank
;
5497 gmx_sumi(dd
->nnodes
,comm
->ddindex2ddnodeid
,cr
);
5498 /* Get the rank of the DD master,
5499 * above we made sure that the master node is a PP node.
5509 MPI_Allreduce(&rank
,&dd
->masterrank
,1,MPI_INT
,MPI_SUM
,dd
->mpi_comm_all
);
5511 else if (comm
->bCartesianPP
)
5513 if (cr
->npmenodes
== 0)
5515 /* The PP communicator is also
5516 * the communicator for this simulation
5518 cr
->mpi_comm_mysim
= cr
->mpi_comm_mygroup
;
5520 cr
->nodeid
= dd
->rank
;
5522 MPI_Cart_coords(dd
->mpi_comm_all
,dd
->rank
,DIM
,dd
->ci
);
5524 /* We need to make an index to go from the coordinates
5525 * to the nodeid of this simulation.
5527 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5528 snew(buf
,dd
->nnodes
);
5529 if (cr
->duty
& DUTY_PP
)
5531 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5533 /* Communicate the ddindex to simulation nodeid index */
5534 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5535 cr
->mpi_comm_mysim
);
5538 /* Determine the master coordinates and rank.
5539 * The DD master should be the same node as the master of this sim.
5541 for(i
=0; i
<dd
->nnodes
; i
++)
5543 if (comm
->ddindex2simnodeid
[i
] == 0)
5545 ddindex2xyz(dd
->nc
,i
,dd
->master_ci
);
5546 MPI_Cart_rank(dd
->mpi_comm_all
,dd
->master_ci
,&dd
->masterrank
);
5551 fprintf(debug
,"The master rank is %d\n",dd
->masterrank
);
5556 /* No Cartesian communicators */
5557 /* We use the rank in dd->comm->all as DD index */
5558 ddindex2xyz(dd
->nc
,dd
->rank
,dd
->ci
);
5559 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5561 clear_ivec(dd
->master_ci
);
5568 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5569 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5574 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5575 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5579 static void receive_ddindex2simnodeid(t_commrec
*cr
)
5583 gmx_domdec_comm_t
*comm
;
5590 if (!comm
->bCartesianPP_PME
&& comm
->bCartesianPP
)
5592 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5593 snew(buf
,dd
->nnodes
);
5594 if (cr
->duty
& DUTY_PP
)
5596 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5599 /* Communicate the ddindex to simulation nodeid index */
5600 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5601 cr
->mpi_comm_mysim
);
5608 static gmx_domdec_master_t
*init_gmx_domdec_master_t(gmx_domdec_t
*dd
,
5611 gmx_domdec_master_t
*ma
;
5616 snew(ma
->ncg
,dd
->nnodes
);
5617 snew(ma
->index
,dd
->nnodes
+1);
5619 snew(ma
->nat
,dd
->nnodes
);
5620 snew(ma
->ibuf
,dd
->nnodes
*2);
5621 snew(ma
->cell_x
,DIM
);
5622 for(i
=0; i
<DIM
; i
++)
5624 snew(ma
->cell_x
[i
],dd
->nc
[i
]+1);
5627 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
5633 snew(ma
->vbuf
,natoms
);
5639 static void split_communicator(FILE *fplog
,t_commrec
*cr
,int dd_node_order
,
5643 gmx_domdec_comm_t
*comm
;
5654 if (comm
->bCartesianPP
)
5656 for(i
=1; i
<DIM
; i
++)
5658 bDiv
[i
] = ((cr
->npmenodes
*dd
->nc
[i
]) % (dd
->nnodes
) == 0);
5660 if (bDiv
[YY
] || bDiv
[ZZ
])
5662 comm
->bCartesianPP_PME
= TRUE
;
5663 /* If we have 2D PME decomposition, which is always in x+y,
5664 * we stack the PME only nodes in z.
5665 * Otherwise we choose the direction that provides the thinnest slab
5666 * of PME only nodes as this will have the least effect
5667 * on the PP communication.
5668 * But for the PME communication the opposite might be better.
5670 if (bDiv
[ZZ
] && (comm
->npmenodes_minor
> 1 ||
5672 dd
->nc
[YY
] > dd
->nc
[ZZ
]))
5674 comm
->cartpmedim
= ZZ
;
5678 comm
->cartpmedim
= YY
;
5680 comm
->ntot
[comm
->cartpmedim
]
5681 += (cr
->npmenodes
*dd
->nc
[comm
->cartpmedim
])/dd
->nnodes
;
5685 fprintf(fplog
,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr
->npmenodes
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[XX
],dd
->nc
[ZZ
]);
5687 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5692 if (comm
->bCartesianPP_PME
)
5696 fprintf(fplog
,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm
->ntot
[XX
],comm
->ntot
[YY
],comm
->ntot
[ZZ
]);
5699 for(i
=0; i
<DIM
; i
++)
5703 MPI_Cart_create(cr
->mpi_comm_mysim
,DIM
,comm
->ntot
,periods
,reorder
,
5706 MPI_Comm_rank(comm_cart
,&rank
);
5707 if (MASTERNODE(cr
) && rank
!= 0)
5709 gmx_fatal(FARGS
,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5712 /* With this assigment we loose the link to the original communicator
5713 * which will usually be MPI_COMM_WORLD, unless have multisim.
5715 cr
->mpi_comm_mysim
= comm_cart
;
5716 cr
->sim_nodeid
= rank
;
5718 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,dd
->ci
);
5722 fprintf(fplog
,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5723 cr
->sim_nodeid
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5726 if (dd
->ci
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
5730 if (cr
->npmenodes
== 0 ||
5731 dd
->ci
[comm
->cartpmedim
] >= dd
->nc
[comm
->cartpmedim
])
5733 cr
->duty
= DUTY_PME
;
5736 /* Split the sim communicator into PP and PME only nodes */
5737 MPI_Comm_split(cr
->mpi_comm_mysim
,
5739 dd_index(comm
->ntot
,dd
->ci
),
5740 &cr
->mpi_comm_mygroup
);
5744 switch (dd_node_order
)
5749 fprintf(fplog
,"Order of the nodes: PP first, PME last\n");
5752 case ddnoINTERLEAVE
:
5753 /* Interleave the PP-only and PME-only nodes,
5754 * as on clusters with dual-core machines this will double
5755 * the communication bandwidth of the PME processes
5756 * and thus speed up the PP <-> PME and inter PME communication.
5760 fprintf(fplog
,"Interleaving PP and PME nodes\n");
5762 comm
->pmenodes
= dd_pmenodes(cr
);
5767 gmx_fatal(FARGS
,"Unknown dd_node_order=%d",dd_node_order
);
5770 if (dd_simnode2pmenode(cr
,cr
->sim_nodeid
) == -1)
5772 cr
->duty
= DUTY_PME
;
5779 /* Split the sim communicator into PP and PME only nodes */
5780 MPI_Comm_split(cr
->mpi_comm_mysim
,
5783 &cr
->mpi_comm_mygroup
);
5784 MPI_Comm_rank(cr
->mpi_comm_mygroup
,&cr
->nodeid
);
5790 fprintf(fplog
,"This is a %s only node\n\n",
5791 (cr
->duty
& DUTY_PP
) ? "particle-particle" : "PME-mesh");
5795 void make_dd_communicators(FILE *fplog
,t_commrec
*cr
,int dd_node_order
)
5798 gmx_domdec_comm_t
*comm
;
5804 copy_ivec(dd
->nc
,comm
->ntot
);
5806 comm
->bCartesianPP
= (dd_node_order
== ddnoCARTESIAN
);
5807 comm
->bCartesianPP_PME
= FALSE
;
5809 /* Reorder the nodes by default. This might change the MPI ranks.
5810 * Real reordering is only supported on very few architectures,
5811 * Blue Gene is one of them.
5813 CartReorder
= (getenv("GMX_NO_CART_REORDER") == NULL
);
5815 if (cr
->npmenodes
> 0)
5817 /* Split the communicator into a PP and PME part */
5818 split_communicator(fplog
,cr
,dd_node_order
,CartReorder
);
5819 if (comm
->bCartesianPP_PME
)
5821 /* We (possibly) reordered the nodes in split_communicator,
5822 * so it is no longer required in make_pp_communicator.
5824 CartReorder
= FALSE
;
5829 /* All nodes do PP and PME */
5831 /* We do not require separate communicators */
5832 cr
->mpi_comm_mygroup
= cr
->mpi_comm_mysim
;
5836 if (cr
->duty
& DUTY_PP
)
5838 /* Copy or make a new PP communicator */
5839 make_pp_communicator(fplog
,cr
,CartReorder
);
5843 receive_ddindex2simnodeid(cr
);
5846 if (!(cr
->duty
& DUTY_PME
))
5848 /* Set up the commnuication to our PME node */
5849 dd
->pme_nodeid
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
5850 dd
->pme_receive_vir_ener
= receive_vir_ener(cr
);
5853 fprintf(debug
,"My pme_nodeid %d receive ener %d\n",
5854 dd
->pme_nodeid
,dd
->pme_receive_vir_ener
);
5859 dd
->pme_nodeid
= -1;
5864 dd
->ma
= init_gmx_domdec_master_t(dd
,
5866 comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
]);
5870 static real
*get_slb_frac(FILE *fplog
,const char *dir
,int nc
,const char *size_string
)
5877 if (nc
> 1 && size_string
!= NULL
)
5881 fprintf(fplog
,"Using static load balancing for the %s direction\n",
5886 for (i
=0; i
<nc
; i
++)
5889 sscanf(size_string
,"%lf%n",&dbl
,&n
);
5892 gmx_fatal(FARGS
,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir
,size_string
);
5901 fprintf(fplog
,"Relative cell sizes:");
5903 for (i
=0; i
<nc
; i
++)
5908 fprintf(fplog
," %5.3f",slb_frac
[i
]);
5913 fprintf(fplog
,"\n");
5920 static int multi_body_bondeds_count(gmx_mtop_t
*mtop
)
5923 gmx_mtop_ilistloop_t iloop
;
5927 iloop
= gmx_mtop_ilistloop_init(mtop
);
5928 while (gmx_mtop_ilistloop_next(iloop
,&il
,&nmol
))
5930 for(ftype
=0; ftype
<F_NRE
; ftype
++)
5932 if ((interaction_function
[ftype
].flags
& IF_BOND
) &&
5935 n
+= nmol
*il
[ftype
].nr
/(1 + NRAL(ftype
));
5943 static int dd_nst_env(FILE *fplog
,const char *env_var
,int def
)
5949 val
= getenv(env_var
);
5952 if (sscanf(val
,"%d",&nst
) <= 0)
5958 fprintf(fplog
,"Found env.var. %s = %s, using value %d\n",
5966 static void dd_warning(t_commrec
*cr
,FILE *fplog
,const char *warn_string
)
5970 fprintf(stderr
,"\n%s\n",warn_string
);
5974 fprintf(fplog
,"\n%s\n",warn_string
);
5978 static void check_dd_restrictions(t_commrec
*cr
,gmx_domdec_t
*dd
,
5979 t_inputrec
*ir
,FILE *fplog
)
5981 if (ir
->ePBC
== epbcSCREW
&&
5982 (dd
->nc
[XX
] == 1 || dd
->nc
[YY
] > 1 || dd
->nc
[ZZ
] > 1))
5984 gmx_fatal(FARGS
,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names
[ir
->ePBC
]);
5987 if (ir
->ns_type
== ensSIMPLE
)
5989 gmx_fatal(FARGS
,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
5992 if (ir
->nstlist
== 0)
5994 gmx_fatal(FARGS
,"Domain decomposition does not work with nstlist=0");
5997 if (ir
->comm_mode
== ecmANGULAR
&& ir
->ePBC
!= epbcNONE
)
5999 dd_warning(cr
,fplog
,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6003 static real
average_cellsize_min(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
6008 r
= ddbox
->box_size
[XX
];
6009 for(di
=0; di
<dd
->ndim
; di
++)
6012 /* Check using the initial average cell size */
6013 r
= min(r
,ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6019 static int check_dlb_support(FILE *fplog
,t_commrec
*cr
,
6020 const char *dlb_opt
,bool bRecordLoad
,
6021 unsigned long Flags
,t_inputrec
*ir
)
6029 case 'a': eDLB
= edlbAUTO
; break;
6030 case 'n': eDLB
= edlbNO
; break;
6031 case 'y': eDLB
= edlbYES
; break;
6032 default: gmx_incons("Unknown dlb_opt");
6035 if (Flags
& MD_RERUN
)
6040 if (!EI_DYNAMICS(ir
->eI
))
6042 if (eDLB
== edlbYES
)
6044 sprintf(buf
,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir
->eI
));
6045 dd_warning(cr
,fplog
,buf
);
6053 dd_warning(cr
,fplog
,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6058 if (Flags
& MD_REPRODUCIBLE
)
6065 dd_warning(cr
,fplog
,"NOTE: reproducability requested, will not use dynamic load balancing\n");
6069 dd_warning(cr
,fplog
,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
6072 gmx_fatal(FARGS
,"Death horror: undefined case (%d) for load balancing choice",eDLB
);
6080 static void set_dd_dim(FILE *fplog
,gmx_domdec_t
*dd
)
6085 if (getenv("GMX_DD_ORDER_ZYX"))
6087 /* Decomposition order z,y,x */
6090 fprintf(fplog
,"Using domain decomposition order z, y, x\n");
6092 for(dim
=DIM
-1; dim
>=0; dim
--)
6094 if (dd
->nc
[dim
] > 1)
6096 dd
->dim
[dd
->ndim
++] = dim
;
6102 /* Decomposition order x,y,z */
6103 for(dim
=0; dim
<DIM
; dim
++)
6105 if (dd
->nc
[dim
] > 1)
6107 dd
->dim
[dd
->ndim
++] = dim
;
6113 static gmx_domdec_comm_t
*init_dd_comm()
6115 gmx_domdec_comm_t
*comm
;
6119 snew(comm
->cggl_flag
,DIM
*2);
6120 snew(comm
->cgcm_state
,DIM
*2);
6121 for(i
=0; i
<DIM
*2; i
++)
6123 comm
->cggl_flag_nalloc
[i
] = 0;
6124 comm
->cgcm_state_nalloc
[i
] = 0;
6127 comm
->nalloc_int
= 0;
6128 comm
->buf_int
= NULL
;
6130 vec_rvec_init(&comm
->vbuf
);
6132 comm
->n_load_have
= 0;
6133 comm
->n_load_collect
= 0;
6135 for(i
=0; i
<ddnatNR
-ddnatZONE
; i
++)
6137 comm
->sum_nat
[i
] = 0;
6141 comm
->load_step
= 0;
6144 clear_ivec(comm
->load_lim
);
6151 gmx_domdec_t
*init_domain_decomposition(FILE *fplog
,t_commrec
*cr
,
6152 unsigned long Flags
,
6154 real comm_distance_min
,real rconstr
,
6155 const char *dlb_opt
,real dlb_scale
,
6156 const char *sizex
,const char *sizey
,const char *sizez
,
6157 gmx_mtop_t
*mtop
,t_inputrec
*ir
,
6160 int *npme_x
,int *npme_y
)
6163 gmx_domdec_comm_t
*comm
;
6166 real r_2b
,r_mb
,r_bonded
=-1,r_bonded_limit
=-1,limit
,acs
;
6173 "\nInitializing Domain Decomposition on %d nodes\n",cr
->nnodes
);
6178 dd
->comm
= init_dd_comm();
6180 snew(comm
->cggl_flag
,DIM
*2);
6181 snew(comm
->cgcm_state
,DIM
*2);
6183 dd
->npbcdim
= ePBC2npbcdim(ir
->ePBC
);
6184 dd
->bScrewPBC
= (ir
->ePBC
== epbcSCREW
);
6186 dd
->bSendRecv2
= dd_nst_env(fplog
,"GMX_DD_SENDRECV2",0);
6187 comm
->eFlop
= dd_nst_env(fplog
,"GMX_DLB_FLOP",0);
6188 recload
= dd_nst_env(fplog
,"GMX_DD_LOAD",1);
6189 comm
->nstSortCG
= dd_nst_env(fplog
,"GMX_DD_SORT",1);
6190 comm
->nstDDDump
= dd_nst_env(fplog
,"GMX_DD_DUMP",0);
6191 comm
->nstDDDumpGrid
= dd_nst_env(fplog
,"GMX_DD_DUMP_GRID",0);
6192 comm
->DD_debug
= dd_nst_env(fplog
,"GMX_DD_DEBUG",0);
6194 dd
->pme_recv_f_alloc
= 0;
6195 dd
->pme_recv_f_buf
= NULL
;
6197 if (dd
->bSendRecv2
&& fplog
)
6199 fprintf(fplog
,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6205 fprintf(fplog
,"Will load balance based on FLOP count\n");
6207 if (comm
->eFlop
> 1)
6209 srand(1+cr
->nodeid
);
6211 comm
->bRecordLoad
= TRUE
;
6215 comm
->bRecordLoad
= (wallcycle_have_counter() && recload
> 0);
6219 comm
->eDLB
= check_dlb_support(fplog
,cr
,dlb_opt
,comm
->bRecordLoad
,Flags
,ir
);
6221 comm
->bDynLoadBal
= (comm
->eDLB
== edlbYES
);
6224 fprintf(fplog
,"Dynamic load balancing: %s\n",edlb_names
[comm
->eDLB
]);
6226 dd
->bGridJump
= comm
->bDynLoadBal
;
6228 if (comm
->nstSortCG
)
6232 if (comm
->nstSortCG
== 1)
6234 fprintf(fplog
,"Will sort the charge groups at every domain (re)decomposition\n");
6238 fprintf(fplog
,"Will sort the charge groups every %d steps\n",
6248 fprintf(fplog
,"Will not sort the charge groups\n");
6252 comm
->bInterCGBondeds
= (ncg_mtop(mtop
) > mtop
->mols
.nr
);
6253 if (comm
->bInterCGBondeds
)
6255 comm
->bInterCGMultiBody
= (multi_body_bondeds_count(mtop
) > 0);
6259 comm
->bInterCGMultiBody
= FALSE
;
6262 dd
->bInterCGcons
= inter_charge_group_constraints(mtop
);
6264 if (ir
->rlistlong
== 0)
6266 /* Set the cut-off to some very large value,
6267 * so we don't need if statements everywhere in the code.
6268 * We use sqrt, since the cut-off is squared in some places.
6270 comm
->cutoff
= GMX_CUTOFF_INF
;
6274 comm
->cutoff
= ir
->rlistlong
;
6276 comm
->cutoff_mbody
= 0;
6278 comm
->cellsize_limit
= 0;
6279 comm
->bBondComm
= FALSE
;
6281 if (comm
->bInterCGBondeds
)
6283 if (comm_distance_min
> 0)
6285 comm
->cutoff_mbody
= comm_distance_min
;
6286 if (Flags
& MD_DDBONDCOMM
)
6288 comm
->bBondComm
= (comm
->cutoff_mbody
> comm
->cutoff
);
6292 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6294 r_bonded_limit
= comm
->cutoff_mbody
;
6296 else if (ir
->bPeriodicMols
)
6298 /* Can not easily determine the required cut-off */
6299 dd_warning(cr
,fplog
,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6300 comm
->cutoff_mbody
= comm
->cutoff
/2;
6301 r_bonded_limit
= comm
->cutoff_mbody
;
6307 dd_bonded_cg_distance(fplog
,dd
,mtop
,ir
,x
,box
,
6308 Flags
& MD_DDBONDCHECK
,&r_2b
,&r_mb
);
6310 gmx_bcast(sizeof(r_2b
),&r_2b
,cr
);
6311 gmx_bcast(sizeof(r_mb
),&r_mb
,cr
);
6313 /* We use an initial margin of 10% for the minimum cell size,
6314 * except when we are just below the non-bonded cut-off.
6316 if (Flags
& MD_DDBONDCOMM
)
6318 if (max(r_2b
,r_mb
) > comm
->cutoff
)
6320 r_bonded
= max(r_2b
,r_mb
);
6321 r_bonded_limit
= 1.1*r_bonded
;
6322 comm
->bBondComm
= TRUE
;
6327 r_bonded_limit
= min(1.1*r_bonded
,comm
->cutoff
);
6329 /* We determine cutoff_mbody later */
6333 /* No special bonded communication,
6334 * simply increase the DD cut-off.
6336 r_bonded_limit
= 1.1*max(r_2b
,r_mb
);
6337 comm
->cutoff_mbody
= r_bonded_limit
;
6338 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6341 comm
->cellsize_limit
= max(comm
->cellsize_limit
,r_bonded_limit
);
6345 "Minimum cell size due to bonded interactions: %.3f nm\n",
6346 comm
->cellsize_limit
);
6350 if (dd
->bInterCGcons
&& rconstr
<= 0)
6352 /* There is a cell size limit due to the constraints (P-LINCS) */
6353 rconstr
= constr_r_max(fplog
,mtop
,ir
);
6357 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6359 if (rconstr
> comm
->cellsize_limit
)
6361 fprintf(fplog
,"This distance will limit the DD cell size, you can override this with -rcon\n");
6365 else if (rconstr
> 0 && fplog
)
6367 /* Here we do not check for dd->bInterCGcons,
6368 * because one can also set a cell size limit for virtual sites only
6369 * and at this point we don't know yet if there are intercg v-sites.
6372 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6375 comm
->cellsize_limit
= max(comm
->cellsize_limit
,rconstr
);
6377 comm
->cgs_gl
= gmx_mtop_global_cgs(mtop
);
6381 copy_ivec(nc
,dd
->nc
);
6382 set_dd_dim(fplog
,dd
);
6383 set_ddbox_cr(cr
,&dd
->nc
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6385 if (cr
->npmenodes
== -1)
6389 acs
= average_cellsize_min(dd
,ddbox
);
6390 if (acs
< comm
->cellsize_limit
)
6394 fprintf(fplog
,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs
,comm
->cellsize_limit
);
6396 gmx_fatal_collective(FARGS
,cr
,NULL
,
6397 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6398 acs
,comm
->cellsize_limit
);
6403 set_ddbox_cr(cr
,NULL
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6405 /* We need to choose the optimal DD grid and possibly PME nodes */
6406 limit
= dd_choose_grid(fplog
,cr
,dd
,ir
,mtop
,box
,ddbox
,
6407 comm
->eDLB
!=edlbNO
,dlb_scale
,
6408 comm
->cellsize_limit
,comm
->cutoff
,
6409 comm
->bInterCGBondeds
,comm
->bInterCGMultiBody
);
6411 if (dd
->nc
[XX
] == 0)
6413 bC
= (dd
->bInterCGcons
&& rconstr
> r_bonded_limit
);
6414 sprintf(buf
,"Change the number of nodes or mdrun option %s%s%s",
6415 !bC
? "-rdd" : "-rcon",
6416 comm
->eDLB
!=edlbNO
? " or -dds" : "",
6417 bC
? " or your LINCS settings" : "");
6419 gmx_fatal_collective(FARGS
,cr
,NULL
,
6420 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6422 "Look in the log file for details on the domain decomposition",
6423 cr
->nnodes
-cr
->npmenodes
,limit
,buf
);
6425 set_dd_dim(fplog
,dd
);
6431 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6432 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],cr
->npmenodes
);
6435 dd
->nnodes
= dd
->nc
[XX
]*dd
->nc
[YY
]*dd
->nc
[ZZ
];
6436 if (cr
->nnodes
- dd
->nnodes
!= cr
->npmenodes
)
6438 gmx_fatal_collective(FARGS
,cr
,NULL
,
6439 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6440 dd
->nnodes
,cr
->nnodes
- cr
->npmenodes
,cr
->nnodes
);
6442 if (cr
->npmenodes
> dd
->nnodes
)
6444 gmx_fatal_collective(FARGS
,cr
,NULL
,
6445 "The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr
->npmenodes
,dd
->nnodes
);
6447 if (cr
->npmenodes
> 0)
6449 comm
->npmenodes
= cr
->npmenodes
;
6453 comm
->npmenodes
= dd
->nnodes
;
6456 if (EEL_PME(ir
->coulombtype
))
6458 if (dd
->nc
[XX
] > 1 && dd
->nc
[YY
] > 1 &&
6459 comm
->npmenodes
> dd
->nc
[XX
] && comm
->npmenodes
% dd
->nc
[XX
] == 0 &&
6460 getenv("GMX_PMEONEDD") == NULL
)
6462 comm
->npmedecompdim
= 2;
6463 comm
->npmenodes_major
= dd
->nc
[XX
];
6464 comm
->npmenodes_minor
= comm
->npmenodes
/comm
->npmenodes_major
;
6468 comm
->npmedecompdim
= 1;
6469 comm
->npmenodes_major
= comm
->npmenodes
;
6470 comm
->npmenodes_minor
= comm
->npmenodes
/comm
->npmenodes_major
;
6474 fprintf(fplog
,"PME domain decomposition: %d x %d x %d\n",
6475 comm
->npmenodes_major
,comm
->npmenodes_minor
,1);
6480 comm
->npmedecompdim
= 0;
6481 comm
->npmenodes_major
= 0;
6482 comm
->npmenodes_minor
= 0;
6485 /* Technically we don't need both of these,
6486 * but it simplifies code not having to recalculate it.
6488 if (comm
->npmedecompdim
== 1 && dd
->dim
[0] == YY
)
6491 *npme_y
= comm
->npmenodes
;
6495 *npme_x
= comm
->npmenodes_major
;
6496 *npme_y
= comm
->npmenodes_minor
;
6499 snew(comm
->slb_frac
,DIM
);
6500 if (comm
->eDLB
== edlbNO
)
6502 comm
->slb_frac
[XX
] = get_slb_frac(fplog
,"x",dd
->nc
[XX
],sizex
);
6503 comm
->slb_frac
[YY
] = get_slb_frac(fplog
,"y",dd
->nc
[YY
],sizey
);
6504 comm
->slb_frac
[ZZ
] = get_slb_frac(fplog
,"z",dd
->nc
[ZZ
],sizez
);
6507 if (comm
->bInterCGBondeds
&& comm
->cutoff_mbody
== 0)
6509 if (comm
->bBondComm
|| comm
->eDLB
!= edlbNO
)
6511 /* Set the bonded communication distance to halfway
6512 * the minimum and the maximum,
6513 * since the extra communication cost is nearly zero.
6515 acs
= average_cellsize_min(dd
,ddbox
);
6516 comm
->cutoff_mbody
= 0.5*(r_bonded
+ acs
);
6517 if (comm
->eDLB
!= edlbNO
)
6519 /* Check if this does not limit the scaling */
6520 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,dlb_scale
*acs
);
6522 if (!comm
->bBondComm
)
6524 /* Without bBondComm do not go beyond the n.b. cut-off */
6525 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,comm
->cutoff
);
6526 if (comm
->cellsize_limit
>= comm
->cutoff
)
6528 /* We don't loose a lot of efficieny
6529 * when increasing it to the n.b. cut-off.
6530 * It can even be slightly faster, because we need
6531 * less checks for the communication setup.
6533 comm
->cutoff_mbody
= comm
->cutoff
;
6536 /* Check if we did not end up below our original limit */
6537 comm
->cutoff_mbody
= max(comm
->cutoff_mbody
,r_bonded_limit
);
6539 if (comm
->cutoff_mbody
> comm
->cellsize_limit
)
6541 comm
->cellsize_limit
= comm
->cutoff_mbody
;
6544 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6549 fprintf(debug
,"Bonded atom communication beyond the cut-off: %d\n"
6550 "cellsize limit %f\n",
6551 comm
->bBondComm
,comm
->cellsize_limit
);
6556 check_dd_restrictions(cr
,dd
,ir
,fplog
);
6559 comm
->partition_step
= INT_MIN
;
6562 clear_dd_cycle_counts(dd
);
6567 static void set_dlb_limits(gmx_domdec_t
*dd
)
6572 for(d
=0; d
<dd
->ndim
; d
++)
6574 dd
->comm
->cd
[d
].np
= dd
->comm
->cd
[d
].np_dlb
;
6575 dd
->comm
->cellsize_min
[dd
->dim
[d
]] =
6576 dd
->comm
->cellsize_min_dlb
[dd
->dim
[d
]];
6581 static void turn_on_dlb(FILE *fplog
,t_commrec
*cr
,gmx_large_int_t step
)
6584 gmx_domdec_comm_t
*comm
;
6594 fprintf(fplog
,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step
,buf
),dd_force_imb_perf_loss(dd
)*100);
6597 cellsize_min
= comm
->cellsize_min
[dd
->dim
[0]];
6598 for(d
=1; d
<dd
->ndim
; d
++)
6600 cellsize_min
= min(cellsize_min
,comm
->cellsize_min
[dd
->dim
[d
]]);
6603 if (cellsize_min
< comm
->cellsize_limit
*1.05)
6605 dd_warning(cr
,fplog
,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6607 /* Change DLB from "auto" to "no". */
6608 comm
->eDLB
= edlbNO
;
6613 dd_warning(cr
,fplog
,"NOTE: Turning on dynamic load balancing\n");
6614 comm
->bDynLoadBal
= TRUE
;
6615 dd
->bGridJump
= TRUE
;
6619 /* We can set the required cell size info here,
6620 * so we do not need to communicate this.
6621 * The grid is completely uniform.
6623 for(d
=0; d
<dd
->ndim
; d
++)
6627 comm
->load
[d
].sum_m
= comm
->load
[d
].sum
;
6629 nc
= dd
->nc
[dd
->dim
[d
]];
6632 comm
->root
[d
]->cell_f
[i
] = i
/(real
)nc
;
6635 comm
->root
[d
]->cell_f_max0
[i
] = i
/(real
)nc
;
6636 comm
->root
[d
]->cell_f_min1
[i
] = (i
+1)/(real
)nc
;
6639 comm
->root
[d
]->cell_f
[nc
] = 1.0;
6644 static char *init_bLocalCG(gmx_mtop_t
*mtop
)
6649 ncg
= ncg_mtop(mtop
);
6651 for(cg
=0; cg
<ncg
; cg
++)
6653 bLocalCG
[cg
] = FALSE
;
6659 void dd_init_bondeds(FILE *fplog
,
6660 gmx_domdec_t
*dd
,gmx_mtop_t
*mtop
,
6661 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
6662 t_inputrec
*ir
,bool bBCheck
,cginfo_mb_t
*cginfo_mb
)
6664 gmx_domdec_comm_t
*comm
;
6668 dd_make_reverse_top(fplog
,dd
,mtop
,vsite
,constr
,ir
,bBCheck
);
6672 if (comm
->bBondComm
)
6674 /* Communicate atoms beyond the cut-off for bonded interactions */
6677 comm
->cglink
= make_charge_group_links(mtop
,dd
,cginfo_mb
);
6679 comm
->bLocalCG
= init_bLocalCG(mtop
);
6683 /* Only communicate atoms based on cut-off */
6684 comm
->cglink
= NULL
;
6685 comm
->bLocalCG
= NULL
;
6689 static void print_dd_settings(FILE *fplog
,gmx_domdec_t
*dd
,
6691 bool bDynLoadBal
,real dlb_scale
,
6694 gmx_domdec_comm_t
*comm
;
6709 fprintf(fplog
,"The maximum number of communication pulses is:");
6710 for(d
=0; d
<dd
->ndim
; d
++)
6712 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),comm
->cd
[d
].np_dlb
);
6714 fprintf(fplog
,"\n");
6715 fprintf(fplog
,"The minimum size for domain decomposition cells is %.3f nm\n",comm
->cellsize_limit
);
6716 fprintf(fplog
,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale
);
6717 fprintf(fplog
,"The allowed shrink of domain decomposition cells is:");
6718 for(d
=0; d
<DIM
; d
++)
6722 if (d
>= ddbox
->npbcdim
&& dd
->nc
[d
] == 2)
6729 comm
->cellsize_min_dlb
[d
]/
6730 (ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6732 fprintf(fplog
," %c %.2f",dim2char(d
),shrink
);
6735 fprintf(fplog
,"\n");
6739 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,np
);
6740 fprintf(fplog
,"The initial number of communication pulses is:");
6741 for(d
=0; d
<dd
->ndim
; d
++)
6743 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),np
[dd
->dim
[d
]]);
6745 fprintf(fplog
,"\n");
6746 fprintf(fplog
,"The initial domain decomposition cell size is:");
6747 for(d
=0; d
<DIM
; d
++) {
6750 fprintf(fplog
," %c %.2f nm",
6751 dim2char(d
),dd
->comm
->cellsize_min
[d
]);
6754 fprintf(fplog
,"\n\n");
6757 if (comm
->bInterCGBondeds
|| dd
->vsite_comm
|| dd
->constraint_comm
)
6759 fprintf(fplog
,"The maximum allowed distance for charge groups involved in interactions is:\n");
6760 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6761 "non-bonded interactions","",comm
->cutoff
);
6765 limit
= dd
->comm
->cellsize_limit
;
6769 if (dynamic_dd_box(ddbox
,ir
))
6771 fprintf(fplog
,"(the following are initial values, they could change due to box deformation)\n");
6773 limit
= dd
->comm
->cellsize_min
[XX
];
6774 for(d
=1; d
<DIM
; d
++)
6776 limit
= min(limit
,dd
->comm
->cellsize_min
[d
]);
6780 if (comm
->bInterCGBondeds
)
6782 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6783 "two-body bonded interactions","(-rdd)",
6784 max(comm
->cutoff
,comm
->cutoff_mbody
));
6785 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6786 "multi-body bonded interactions","(-rdd)",
6787 (comm
->bBondComm
|| dd
->bGridJump
) ? comm
->cutoff_mbody
: min(comm
->cutoff
,limit
));
6791 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6792 "virtual site constructions","(-rcon)",limit
);
6794 if (dd
->constraint_comm
)
6796 sprintf(buf
,"atoms separated by up to %d constraints",
6798 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6799 buf
,"(-rcon)",limit
);
6801 fprintf(fplog
,"\n");
6807 void set_dd_parameters(FILE *fplog
,gmx_domdec_t
*dd
,real dlb_scale
,
6808 t_inputrec
*ir
,t_forcerec
*fr
,
6811 gmx_domdec_comm_t
*comm
;
6812 int d
,dim
,npulse
,npulse_d_max
,npulse_d
;
6819 bNoCutOff
= (ir
->rvdw
== 0 || ir
->rcoulomb
== 0);
6821 if (EEL_PME(ir
->coulombtype
))
6823 init_ddpme(dd
,&comm
->ddpme
[0],0,comm
->npmenodes_major
);
6824 if (comm
->npmedecompdim
>= 2)
6826 init_ddpme(dd
,&comm
->ddpme
[1],1,
6827 comm
->npmenodes
/comm
->npmenodes_major
);
6832 comm
->npmenodes
= 0;
6833 if (dd
->pme_nodeid
>= 0)
6835 gmx_fatal_collective(FARGS
,NULL
,dd
,
6836 "Can not have separate PME nodes without PME electrostatics");
6840 /* If each molecule is a single charge group
6841 * or we use domain decomposition for each periodic dimension,
6842 * we do not need to take pbc into account for the bonded interactions.
6844 if (fr
->ePBC
== epbcNONE
|| !comm
->bInterCGBondeds
||
6845 (dd
->nc
[XX
]>1 && dd
->nc
[YY
]>1 && (dd
->nc
[ZZ
]>1 || fr
->ePBC
==epbcXY
)))
6847 fr
->bMolPBC
= FALSE
;
6856 fprintf(debug
,"The DD cut-off is %f\n",comm
->cutoff
);
6858 if (comm
->eDLB
!= edlbNO
)
6860 /* Determine the maximum number of comm. pulses in one dimension */
6862 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6864 /* Determine the maximum required number of grid pulses */
6865 if (comm
->cellsize_limit
>= comm
->cutoff
)
6867 /* Only a single pulse is required */
6870 else if (!bNoCutOff
&& comm
->cellsize_limit
> 0)
6872 /* We round down slightly here to avoid overhead due to the latency
6873 * of extra communication calls when the cut-off
6874 * would be only slightly longer than the cell size.
6875 * Later cellsize_limit is redetermined,
6876 * so we can not miss interactions due to this rounding.
6878 npulse
= (int)(0.96 + comm
->cutoff
/comm
->cellsize_limit
);
6882 /* There is no cell size limit */
6883 npulse
= max(dd
->nc
[XX
]-1,max(dd
->nc
[YY
]-1,dd
->nc
[ZZ
]-1));
6886 if (!bNoCutOff
&& npulse
> 1)
6888 /* See if we can do with less pulses, based on dlb_scale */
6890 for(d
=0; d
<dd
->ndim
; d
++)
6893 npulse_d
= (int)(1 + dd
->nc
[dim
]*comm
->cutoff
6894 /(ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]*dlb_scale
));
6895 npulse_d_max
= max(npulse_d_max
,npulse_d
);
6897 npulse
= min(npulse
,npulse_d_max
);
6900 /* This env var can override npulse */
6901 d
= dd_nst_env(fplog
,"GMX_DD_NPULSE",0);
6908 comm
->bVacDLBNoLimit
= (ir
->ePBC
== epbcNONE
);
6909 for(d
=0; d
<dd
->ndim
; d
++)
6911 comm
->cd
[d
].np_dlb
= min(npulse
,dd
->nc
[dd
->dim
[d
]]-1);
6912 comm
->cd
[d
].np_nalloc
= comm
->cd
[d
].np_dlb
;
6913 snew(comm
->cd
[d
].ind
,comm
->cd
[d
].np_nalloc
);
6914 comm
->maxpulse
= max(comm
->maxpulse
,comm
->cd
[d
].np_dlb
);
6915 if (comm
->cd
[d
].np_dlb
< dd
->nc
[dd
->dim
[d
]]-1)
6917 comm
->bVacDLBNoLimit
= FALSE
;
6921 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6922 if (!comm
->bVacDLBNoLimit
)
6924 comm
->cellsize_limit
= max(comm
->cellsize_limit
,
6925 comm
->cutoff
/comm
->maxpulse
);
6927 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6928 /* Set the minimum cell size for each DD dimension */
6929 for(d
=0; d
<dd
->ndim
; d
++)
6931 if (comm
->bVacDLBNoLimit
||
6932 comm
->cd
[d
].np_dlb
*comm
->cellsize_limit
>= comm
->cutoff
)
6934 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->cellsize_limit
;
6938 comm
->cellsize_min_dlb
[dd
->dim
[d
]] =
6939 comm
->cutoff
/comm
->cd
[d
].np_dlb
;
6942 if (comm
->cutoff_mbody
<= 0)
6944 comm
->cutoff_mbody
= min(comm
->cutoff
,comm
->cellsize_limit
);
6946 if (comm
->bDynLoadBal
)
6952 print_dd_settings(fplog
,dd
,ir
,comm
->bDynLoadBal
,dlb_scale
,ddbox
);
6953 if (comm
->eDLB
== edlbAUTO
)
6957 fprintf(fplog
,"When dynamic load balancing gets turned on, these settings will change to:\n");
6959 print_dd_settings(fplog
,dd
,ir
,TRUE
,dlb_scale
,ddbox
);
6962 if (ir
->ePBC
== epbcNONE
)
6964 vol_frac
= 1 - 1/(double)dd
->nnodes
;
6969 (1 + comm_box_frac(dd
->nc
,comm
->cutoff
,ddbox
))/(double)dd
->nnodes
;
6973 fprintf(debug
,"Volume fraction for all DD zones: %f\n",vol_frac
);
6975 natoms_tot
= comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
];
6977 dd
->ga2la
= ga2la_init(natoms_tot
,vol_frac
*natoms_tot
);
6980 static void merge_cg_buffers(int ncell
,
6981 gmx_domdec_comm_dim_t
*cd
, int pulse
,
6983 int *index_gl
, int *recv_i
,
6984 rvec
*cg_cm
, rvec
*recv_vr
,
6986 cginfo_mb_t
*cginfo_mb
,int *cginfo
)
6988 gmx_domdec_ind_t
*ind
,*ind_p
;
6989 int p
,cell
,c
,cg
,cg0
,cg1
,cg_gl
,nat
;
6992 ind
= &cd
->ind
[pulse
];
6994 /* First correct the already stored data */
6995 shift
= ind
->nrecv
[ncell
];
6996 for(cell
=ncell
-1; cell
>=0; cell
--)
6998 shift
-= ind
->nrecv
[cell
];
7001 /* Move the cg's present from previous grid pulses */
7002 cg0
= ncg_cell
[ncell
+cell
];
7003 cg1
= ncg_cell
[ncell
+cell
+1];
7004 cgindex
[cg1
+shift
] = cgindex
[cg1
];
7005 for(cg
=cg1
-1; cg
>=cg0
; cg
--)
7007 index_gl
[cg
+shift
] = index_gl
[cg
];
7008 copy_rvec(cg_cm
[cg
],cg_cm
[cg
+shift
]);
7009 cgindex
[cg
+shift
] = cgindex
[cg
];
7010 cginfo
[cg
+shift
] = cginfo
[cg
];
7012 /* Correct the already stored send indices for the shift */
7013 for(p
=1; p
<=pulse
; p
++)
7015 ind_p
= &cd
->ind
[p
];
7017 for(c
=0; c
<cell
; c
++)
7019 cg0
+= ind_p
->nsend
[c
];
7021 cg1
= cg0
+ ind_p
->nsend
[cell
];
7022 for(cg
=cg0
; cg
<cg1
; cg
++)
7024 ind_p
->index
[cg
] += shift
;
7030 /* Merge in the communicated buffers */
7034 for(cell
=0; cell
<ncell
; cell
++)
7036 cg1
= ncg_cell
[ncell
+cell
+1] + shift
;
7039 /* Correct the old cg indices */
7040 for(cg
=ncg_cell
[ncell
+cell
]; cg
<cg1
; cg
++)
7042 cgindex
[cg
+1] += shift_at
;
7045 for(cg
=0; cg
<ind
->nrecv
[cell
]; cg
++)
7047 /* Copy this charge group from the buffer */
7048 index_gl
[cg1
] = recv_i
[cg0
];
7049 copy_rvec(recv_vr
[cg0
],cg_cm
[cg1
]);
7050 /* Add it to the cgindex */
7051 cg_gl
= index_gl
[cg1
];
7052 cginfo
[cg1
] = ddcginfo(cginfo_mb
,cg_gl
);
7053 nat
= GET_CGINFO_NATOMS(cginfo
[cg1
]);
7054 cgindex
[cg1
+1] = cgindex
[cg1
] + nat
;
7059 shift
+= ind
->nrecv
[cell
];
7060 ncg_cell
[ncell
+cell
+1] = cg1
;
7064 static void make_cell2at_index(gmx_domdec_comm_dim_t
*cd
,
7065 int nzone
,int cg0
,const int *cgindex
)
7069 /* Store the atom block boundaries for easy copying of communication buffers
7072 for(zone
=0; zone
<nzone
; zone
++)
7074 for(p
=0; p
<cd
->np
; p
++) {
7075 cd
->ind
[p
].cell2at0
[zone
] = cgindex
[cg
];
7076 cg
+= cd
->ind
[p
].nrecv
[zone
];
7077 cd
->ind
[p
].cell2at1
[zone
] = cgindex
[cg
];
7082 static bool missing_link(t_blocka
*link
,int cg_gl
,char *bLocalCG
)
7088 for(i
=link
->index
[cg_gl
]; i
<link
->index
[cg_gl
+1]; i
++)
7090 if (!bLocalCG
[link
->a
[i
]])
7099 static void setup_dd_communication(gmx_domdec_t
*dd
,
7100 matrix box
,gmx_ddbox_t
*ddbox
,t_forcerec
*fr
)
7102 int dim_ind
,dim
,dim0
,dim1
=-1,dim2
=-1,dimd
,p
,nat_tot
;
7103 int nzone
,nzone_send
,zone
,zonei
,cg0
,cg1
;
7104 int c
,i
,j
,cg
,cg_gl
,nrcg
;
7105 int *zone_cg_range
,pos_cg
,*index_gl
,*cgindex
,*recv_i
;
7106 gmx_domdec_comm_t
*comm
;
7107 gmx_domdec_zones_t
*zones
;
7108 gmx_domdec_comm_dim_t
*cd
;
7109 gmx_domdec_ind_t
*ind
;
7110 cginfo_mb_t
*cginfo_mb
;
7111 bool bBondComm
,bDist2B
,bDistMB
,bDistMB_pulse
,bDistBonded
,bScrew
;
7112 real r_mb
,r_comm2
,r_scomm2
,r_bcomm2
,r
,r_0
,r_1
,r2
,rb2
,r2inc
,inv_ncg
,tric_sh
;
7114 real corner
[DIM
][4],corner_round_0
=0,corner_round_1
[4];
7115 real bcorner
[DIM
],bcorner_round_1
=0;
7117 rvec
*cg_cm
,*normal
,*v_d
,*v_0
=NULL
,*v_1
=NULL
,*recv_vr
;
7118 real skew_fac2_d
,skew_fac_01
;
7124 fprintf(debug
,"Setting up DD communication\n");
7130 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7132 dim
= dd
->dim
[dim_ind
];
7134 /* Check if we need to use triclinic distances */
7135 tric_dist
[dim_ind
] = 0;
7136 for(i
=0; i
<=dim_ind
; i
++)
7138 if (ddbox
->tric_dir
[dd
->dim
[i
]])
7140 tric_dist
[dim_ind
] = 1;
7145 bBondComm
= comm
->bBondComm
;
7147 /* Do we need to determine extra distances for multi-body bondeds? */
7148 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
7150 /* Do we need to determine extra distances for only two-body bondeds? */
7151 bDist2B
= (bBondComm
&& !bDistMB
);
7153 r_comm2
= sqr(comm
->cutoff
);
7154 r_bcomm2
= sqr(comm
->cutoff_mbody
);
7158 fprintf(debug
,"bBondComm %d, r_bc %f\n",bBondComm
,sqrt(r_bcomm2
));
7161 zones
= &comm
->zones
;
7164 /* The first dimension is equal for all cells */
7165 corner
[0][0] = comm
->cell_x0
[dim0
];
7168 bcorner
[0] = corner
[0][0];
7173 /* This cell row is only seen from the first row */
7174 corner
[1][0] = comm
->cell_x0
[dim1
];
7175 /* All rows can see this row */
7176 corner
[1][1] = comm
->cell_x0
[dim1
];
7179 corner
[1][1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].mch0
);
7182 /* For the multi-body distance we need the maximum */
7183 bcorner
[1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].p1_0
);
7186 /* Set the upper-right corner for rounding */
7187 corner_round_0
= comm
->cell_x1
[dim0
];
7194 corner
[2][j
] = comm
->cell_x0
[dim2
];
7198 /* Use the maximum of the i-cells that see a j-cell */
7199 for(i
=0; i
<zones
->nizone
; i
++)
7201 for(j
=zones
->izone
[i
].j0
; j
<zones
->izone
[i
].j1
; j
++)
7207 comm
->zone_d2
[zones
->shift
[i
][dim0
]][zones
->shift
[i
][dim1
]].mch0
);
7213 /* For the multi-body distance we need the maximum */
7214 bcorner
[2] = comm
->cell_x0
[dim2
];
7219 bcorner
[2] = max(bcorner
[2],
7220 comm
->zone_d2
[i
][j
].p1_0
);
7226 /* Set the upper-right corner for rounding */
7227 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7228 * Only cell (0,0,0) can see cell 7 (1,1,1)
7230 corner_round_1
[0] = comm
->cell_x1
[dim1
];
7231 corner_round_1
[3] = comm
->cell_x1
[dim1
];
7234 corner_round_1
[0] = max(comm
->cell_x1
[dim1
],
7235 comm
->zone_d1
[1].mch1
);
7238 /* For the multi-body distance we need the maximum */
7239 bcorner_round_1
= max(comm
->cell_x1
[dim1
],
7240 comm
->zone_d1
[1].p1_1
);
7246 /* Triclinic stuff */
7247 normal
= ddbox
->normal
;
7251 v_0
= ddbox
->v
[dim0
];
7252 if (ddbox
->tric_dir
[dim0
] && ddbox
->tric_dir
[dim1
])
7254 /* Determine the coupling coefficient for the distances
7255 * to the cell planes along dim0 and dim1 through dim2.
7256 * This is required for correct rounding.
7259 ddbox
->v
[dim0
][dim1
+1][dim0
]*ddbox
->v
[dim1
][dim1
+1][dim1
];
7262 fprintf(debug
,"\nskew_fac_01 %f\n",skew_fac_01
);
7268 v_1
= ddbox
->v
[dim1
];
7271 zone_cg_range
= zones
->cg_range
;
7272 index_gl
= dd
->index_gl
;
7273 cgindex
= dd
->cgindex
;
7274 cginfo_mb
= fr
->cginfo_mb
;
7276 zone_cg_range
[0] = 0;
7277 zone_cg_range
[1] = dd
->ncg_home
;
7278 comm
->zone_ncg1
[0] = dd
->ncg_home
;
7279 pos_cg
= dd
->ncg_home
;
7281 nat_tot
= dd
->nat_home
;
7283 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7285 dim
= dd
->dim
[dim_ind
];
7286 cd
= &comm
->cd
[dim_ind
];
7288 if (dim
>= ddbox
->npbcdim
&& dd
->ci
[dim
] == 0)
7290 /* No pbc in this dimension, the first node should not comm. */
7298 bScrew
= (dd
->bScrewPBC
&& dim
== XX
);
7300 v_d
= ddbox
->v
[dim
];
7301 skew_fac2_d
= sqr(ddbox
->skew_fac
[dim
]);
7303 cd
->bInPlace
= TRUE
;
7304 for(p
=0; p
<cd
->np
; p
++)
7306 /* Only atoms communicated in the first pulse are used
7307 * for multi-body bonded interactions or for bBondComm.
7309 bDistBonded
= ((bDistMB
|| bDist2B
) && p
== 0);
7310 bDistMB_pulse
= (bDistMB
&& bDistBonded
);
7315 for(zone
=0; zone
<nzone_send
; zone
++)
7317 if (tric_dist
[dim_ind
] && dim_ind
> 0)
7319 /* Determine slightly more optimized skew_fac's
7321 * This reduces the number of communicated atoms
7322 * by about 10% for 3D DD of rhombic dodecahedra.
7324 for(dimd
=0; dimd
<dim
; dimd
++)
7326 sf2_round
[dimd
] = 1;
7327 if (ddbox
->tric_dir
[dimd
])
7329 for(i
=dd
->dim
[dimd
]+1; i
<DIM
; i
++)
7331 /* If we are shifted in dimension i
7332 * and the cell plane is tilted forward
7333 * in dimension i, skip this coupling.
7335 if (!(zones
->shift
[nzone
+zone
][i
] &&
7336 ddbox
->v
[dimd
][i
][dimd
] >= 0))
7339 sqr(ddbox
->v
[dimd
][i
][dimd
]);
7342 sf2_round
[dimd
] = 1/sf2_round
[dimd
];
7347 zonei
= zone_perm
[dim_ind
][zone
];
7350 /* Here we permutate the zones to obtain a convenient order
7351 * for neighbor searching
7353 cg0
= zone_cg_range
[zonei
];
7354 cg1
= zone_cg_range
[zonei
+1];
7358 /* Look only at the cg's received in the previous grid pulse
7360 cg1
= zone_cg_range
[nzone
+zone
+1];
7361 cg0
= cg1
- cd
->ind
[p
-1].nrecv
[zone
];
7363 ind
->nsend
[zone
] = 0;
7364 for(cg
=cg0
; cg
<cg1
; cg
++)
7368 if (tric_dist
[dim_ind
] == 0)
7370 /* Rectangular direction, easy */
7371 r
= cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7378 r
= cg_cm
[cg
][dim
] - bcorner
[dim_ind
];
7384 /* Rounding gives at most a 16% reduction
7385 * in communicated atoms
7387 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7389 r
= cg_cm
[cg
][dim0
] - corner_round_0
;
7390 /* This is the first dimension, so always r >= 0 */
7397 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7399 r
= cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7406 r
= cg_cm
[cg
][dim1
] - bcorner_round_1
;
7416 /* Triclinic direction, more complicated */
7419 /* Rounding, conservative as the skew_fac multiplication
7420 * will slightly underestimate the distance.
7422 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7424 rn
[dim0
] = cg_cm
[cg
][dim0
] - corner_round_0
;
7425 for(i
=dim0
+1; i
<DIM
; i
++)
7427 rn
[dim0
] -= cg_cm
[cg
][i
]*v_0
[i
][dim0
];
7429 r2
= rn
[dim0
]*rn
[dim0
]*sf2_round
[dim0
];
7432 rb
[dim0
] = rn
[dim0
];
7435 /* Take care that the cell planes along dim0 might not
7436 * be orthogonal to those along dim1 and dim2.
7438 for(i
=1; i
<=dim_ind
; i
++)
7441 if (normal
[dim0
][dimd
] > 0)
7443 rn
[dimd
] -= rn
[dim0
]*normal
[dim0
][dimd
];
7446 rb
[dimd
] -= rb
[dim0
]*normal
[dim0
][dimd
];
7451 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7453 rn
[dim1
] += cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7455 for(i
=dim1
+1; i
<DIM
; i
++)
7457 tric_sh
-= cg_cm
[cg
][i
]*v_1
[i
][dim1
];
7459 rn
[dim1
] += tric_sh
;
7462 r2
+= rn
[dim1
]*rn
[dim1
]*sf2_round
[dim1
];
7463 /* Take care of coupling of the distances
7464 * to the planes along dim0 and dim1 through dim2.
7466 r2
-= rn
[dim0
]*rn
[dim1
]*skew_fac_01
;
7467 /* Take care that the cell planes along dim1
7468 * might not be orthogonal to that along dim2.
7470 if (normal
[dim1
][dim2
] > 0)
7472 rn
[dim2
] -= rn
[dim1
]*normal
[dim1
][dim2
];
7478 cg_cm
[cg
][dim1
] - bcorner_round_1
+ tric_sh
;
7481 rb2
+= rb
[dim1
]*rb
[dim1
]*sf2_round
[dim1
];
7482 /* Take care of coupling of the distances
7483 * to the planes along dim0 and dim1 through dim2.
7485 rb2
-= rb
[dim0
]*rb
[dim1
]*skew_fac_01
;
7486 /* Take care that the cell planes along dim1
7487 * might not be orthogonal to that along dim2.
7489 if (normal
[dim1
][dim2
] > 0)
7491 rb
[dim2
] -= rb
[dim1
]*normal
[dim1
][dim2
];
7496 /* The distance along the communication direction */
7497 rn
[dim
] += cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7499 for(i
=dim
+1; i
<DIM
; i
++)
7501 tric_sh
-= cg_cm
[cg
][i
]*v_d
[i
][dim
];
7506 r2
+= rn
[dim
]*rn
[dim
]*skew_fac2_d
;
7507 /* Take care of coupling of the distances
7508 * to the planes along dim0 and dim1 through dim2.
7510 if (dim_ind
== 1 && zonei
== 1)
7512 r2
-= rn
[dim0
]*rn
[dim
]*skew_fac_01
;
7518 rb
[dim
] += cg_cm
[cg
][dim
] - bcorner
[dim_ind
] + tric_sh
;
7521 rb2
+= rb
[dim
]*rb
[dim
]*skew_fac2_d
;
7522 /* Take care of coupling of the distances
7523 * to the planes along dim0 and dim1 through dim2.
7525 if (dim_ind
== 1 && zonei
== 1)
7527 rb2
-= rb
[dim0
]*rb
[dim
]*skew_fac_01
;
7535 ((bDistMB
&& rb2
< r_bcomm2
) ||
7536 (bDist2B
&& r2
< r_bcomm2
)) &&
7538 (GET_CGINFO_BOND_INTER(fr
->cginfo
[cg
]) &&
7539 missing_link(comm
->cglink
,index_gl
[cg
],
7542 /* Make an index to the local charge groups */
7543 if (nsend
+1 > ind
->nalloc
)
7545 ind
->nalloc
= over_alloc_large(nsend
+1);
7546 srenew(ind
->index
,ind
->nalloc
);
7548 if (nsend
+1 > comm
->nalloc_int
)
7550 comm
->nalloc_int
= over_alloc_large(nsend
+1);
7551 srenew(comm
->buf_int
,comm
->nalloc_int
);
7553 ind
->index
[nsend
] = cg
;
7554 comm
->buf_int
[nsend
] = index_gl
[cg
];
7556 vec_rvec_check_alloc(&comm
->vbuf
,nsend
+1);
7558 if (dd
->ci
[dim
] == 0)
7560 /* Correct cg_cm for pbc */
7561 rvec_add(cg_cm
[cg
],box
[dim
],comm
->vbuf
.v
[nsend
]);
7564 comm
->vbuf
.v
[nsend
][YY
] =
7565 box
[YY
][YY
]-comm
->vbuf
.v
[nsend
][YY
];
7566 comm
->vbuf
.v
[nsend
][ZZ
] =
7567 box
[ZZ
][ZZ
]-comm
->vbuf
.v
[nsend
][ZZ
];
7572 copy_rvec(cg_cm
[cg
],comm
->vbuf
.v
[nsend
]);
7575 nat
+= cgindex
[cg
+1] - cgindex
[cg
];
7579 /* Clear the counts in case we do not have pbc */
7580 for(zone
=nzone_send
; zone
<nzone
; zone
++)
7582 ind
->nsend
[zone
] = 0;
7584 ind
->nsend
[nzone
] = nsend
;
7585 ind
->nsend
[nzone
+1] = nat
;
7586 /* Communicate the number of cg's and atoms to receive */
7587 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7588 ind
->nsend
, nzone
+2,
7589 ind
->nrecv
, nzone
+2);
7591 /* The rvec buffer is also required for atom buffers of size nsend
7592 * in dd_move_x and dd_move_f.
7594 vec_rvec_check_alloc(&comm
->vbuf
,ind
->nsend
[nzone
+1]);
7598 /* We can receive in place if only the last zone is not empty */
7599 for(zone
=0; zone
<nzone
-1; zone
++)
7601 if (ind
->nrecv
[zone
] > 0)
7603 cd
->bInPlace
= FALSE
;
7608 /* The int buffer is only required here for the cg indices */
7609 if (ind
->nrecv
[nzone
] > comm
->nalloc_int2
)
7611 comm
->nalloc_int2
= over_alloc_dd(ind
->nrecv
[nzone
]);
7612 srenew(comm
->buf_int2
,comm
->nalloc_int2
);
7614 /* The rvec buffer is also required for atom buffers
7615 * of size nrecv in dd_move_x and dd_move_f.
7617 i
= max(cd
->ind
[0].nrecv
[nzone
+1],ind
->nrecv
[nzone
+1]);
7618 vec_rvec_check_alloc(&comm
->vbuf2
,i
);
7622 /* Make space for the global cg indices */
7623 if (pos_cg
+ ind
->nrecv
[nzone
] > dd
->cg_nalloc
7624 || dd
->cg_nalloc
== 0)
7626 dd
->cg_nalloc
= over_alloc_dd(pos_cg
+ ind
->nrecv
[nzone
]);
7627 srenew(index_gl
,dd
->cg_nalloc
);
7628 srenew(cgindex
,dd
->cg_nalloc
+1);
7630 /* Communicate the global cg indices */
7633 recv_i
= index_gl
+ pos_cg
;
7637 recv_i
= comm
->buf_int2
;
7639 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7640 comm
->buf_int
, nsend
,
7641 recv_i
, ind
->nrecv
[nzone
]);
7643 /* Make space for cg_cm */
7644 if (pos_cg
+ ind
->nrecv
[nzone
] > fr
->cg_nalloc
)
7646 dd_realloc_fr_cg(fr
,pos_cg
+ ind
->nrecv
[nzone
]);
7649 /* Communicate cg_cm */
7652 recv_vr
= cg_cm
+ pos_cg
;
7656 recv_vr
= comm
->vbuf2
.v
;
7658 dd_sendrecv_rvec(dd
, dim_ind
, dddirBackward
,
7659 comm
->vbuf
.v
, nsend
,
7660 recv_vr
, ind
->nrecv
[nzone
]);
7662 /* Make the charge group index */
7665 zone
= (p
== 0 ? 0 : nzone
- 1);
7666 while (zone
< nzone
)
7668 for(cg
=0; cg
<ind
->nrecv
[zone
]; cg
++)
7670 cg_gl
= index_gl
[pos_cg
];
7671 fr
->cginfo
[pos_cg
] = ddcginfo(cginfo_mb
,cg_gl
);
7672 nrcg
= GET_CGINFO_NATOMS(fr
->cginfo
[pos_cg
]);
7673 cgindex
[pos_cg
+1] = cgindex
[pos_cg
] + nrcg
;
7676 /* Update the charge group presence,
7677 * so we can use it in the next pass of the loop.
7679 comm
->bLocalCG
[cg_gl
] = TRUE
;
7685 comm
->zone_ncg1
[nzone
+zone
] = ind
->nrecv
[zone
];
7688 zone_cg_range
[nzone
+zone
] = pos_cg
;
7693 /* This part of the code is never executed with bBondComm. */
7694 merge_cg_buffers(nzone
,cd
,p
,zone_cg_range
,
7695 index_gl
,recv_i
,cg_cm
,recv_vr
,
7696 cgindex
,fr
->cginfo_mb
,fr
->cginfo
);
7697 pos_cg
+= ind
->nrecv
[nzone
];
7699 nat_tot
+= ind
->nrecv
[nzone
+1];
7703 /* Store the atom block for easy copying of communication buffers */
7704 make_cell2at_index(cd
,nzone
,zone_cg_range
[nzone
],cgindex
);
7708 dd
->index_gl
= index_gl
;
7709 dd
->cgindex
= cgindex
;
7711 dd
->ncg_tot
= zone_cg_range
[zones
->n
];
7712 dd
->nat_tot
= nat_tot
;
7713 comm
->nat
[ddnatHOME
] = dd
->nat_home
;
7714 for(i
=ddnatZONE
; i
<ddnatNR
; i
++)
7716 comm
->nat
[i
] = dd
->nat_tot
;
7721 /* We don't need to update cginfo, since that was alrady done above.
7722 * So we pass NULL for the forcerec.
7724 dd_set_cginfo(dd
->index_gl
,dd
->ncg_home
,dd
->ncg_tot
,
7725 NULL
,comm
->bLocalCG
);
7730 fprintf(debug
,"Finished setting up DD communication, zones:");
7731 for(c
=0; c
<zones
->n
; c
++)
7733 fprintf(debug
," %d",zones
->cg_range
[c
+1]-zones
->cg_range
[c
]);
7735 fprintf(debug
,"\n");
7739 static void set_cg_boundaries(gmx_domdec_zones_t
*zones
)
7743 for(c
=0; c
<zones
->nizone
; c
++)
7745 zones
->izone
[c
].cg1
= zones
->cg_range
[c
+1];
7746 zones
->izone
[c
].jcg0
= zones
->cg_range
[zones
->izone
[c
].j0
];
7747 zones
->izone
[c
].jcg1
= zones
->cg_range
[zones
->izone
[c
].j1
];
7751 static int comp_cgsort(const void *a
,const void *b
)
7755 gmx_cgsort_t
*cga
,*cgb
;
7756 cga
= (gmx_cgsort_t
*)a
;
7757 cgb
= (gmx_cgsort_t
*)b
;
7759 comp
= cga
->nsc
- cgb
->nsc
;
7762 comp
= cga
->ind_gl
- cgb
->ind_gl
;
7768 static void order_int_cg(int n
,gmx_cgsort_t
*sort
,
7773 /* Order the data */
7776 buf
[i
] = a
[sort
[i
].ind
];
7779 /* Copy back to the original array */
7786 static void order_vec_cg(int n
,gmx_cgsort_t
*sort
,
7791 /* Order the data */
7794 copy_rvec(v
[sort
[i
].ind
],buf
[i
]);
7797 /* Copy back to the original array */
7800 copy_rvec(buf
[i
],v
[i
]);
7804 static void order_vec_atom(int ncg
,int *cgindex
,gmx_cgsort_t
*sort
,
7807 int a
,atot
,cg
,cg0
,cg1
,i
;
7809 /* Order the data */
7811 for(cg
=0; cg
<ncg
; cg
++)
7813 cg0
= cgindex
[sort
[cg
].ind
];
7814 cg1
= cgindex
[sort
[cg
].ind
+1];
7815 for(i
=cg0
; i
<cg1
; i
++)
7817 copy_rvec(v
[i
],buf
[a
]);
7823 /* Copy back to the original array */
7824 for(a
=0; a
<atot
; a
++)
7826 copy_rvec(buf
[a
],v
[a
]);
7830 static void ordered_sort(int nsort2
,gmx_cgsort_t
*sort2
,
7831 int nsort_new
,gmx_cgsort_t
*sort_new
,
7832 gmx_cgsort_t
*sort1
)
7836 /* The new indices are not very ordered, so we qsort them */
7837 qsort_threadsafe(sort_new
,nsort_new
,sizeof(sort_new
[0]),comp_cgsort
);
7839 /* sort2 is already ordered, so now we can merge the two arrays */
7843 while(i2
< nsort2
|| i_new
< nsort_new
)
7847 sort1
[i1
++] = sort_new
[i_new
++];
7849 else if (i_new
== nsort_new
)
7851 sort1
[i1
++] = sort2
[i2
++];
7853 else if (sort2
[i2
].nsc
< sort_new
[i_new
].nsc
||
7854 (sort2
[i2
].nsc
== sort_new
[i_new
].nsc
&&
7855 sort2
[i2
].ind_gl
< sort_new
[i_new
].ind_gl
))
7857 sort1
[i1
++] = sort2
[i2
++];
7861 sort1
[i1
++] = sort_new
[i_new
++];
7866 static void dd_sort_state(gmx_domdec_t
*dd
,int ePBC
,
7867 rvec
*cgcm
,t_forcerec
*fr
,t_state
*state
,
7870 gmx_domdec_sort_t
*sort
;
7871 gmx_cgsort_t
*cgsort
,*sort_i
;
7872 int ncg_new
,nsort2
,nsort_new
,i
,cell_index
,*ibuf
,cgsize
;
7875 sort
= dd
->comm
->sort
;
7877 if (dd
->ncg_home
> sort
->sort_nalloc
)
7879 sort
->sort_nalloc
= over_alloc_dd(dd
->ncg_home
);
7880 srenew(sort
->sort1
,sort
->sort_nalloc
);
7881 srenew(sort
->sort2
,sort
->sort_nalloc
);
7884 if (ncg_home_old
>= 0)
7886 /* The charge groups that remained in the same ns grid cell
7887 * are completely ordered. So we can sort efficiently by sorting
7888 * the charge groups that did move into the stationary list.
7893 for(i
=0; i
<dd
->ncg_home
; i
++)
7895 /* Check if this cg did not move to another node */
7896 cell_index
= fr
->ns
.grid
->cell_index
[i
];
7897 if (cell_index
!= 4*fr
->ns
.grid
->ncells
)
7899 if (i
>= ncg_home_old
|| cell_index
!= sort
->sort1
[i
].nsc
)
7901 /* This cg is new on this node or moved ns grid cell */
7902 if (nsort_new
>= sort
->sort_new_nalloc
)
7904 sort
->sort_new_nalloc
= over_alloc_dd(nsort_new
+1);
7905 srenew(sort
->sort_new
,sort
->sort_new_nalloc
);
7907 sort_i
= &(sort
->sort_new
[nsort_new
++]);
7911 /* This cg did not move */
7912 sort_i
= &(sort
->sort2
[nsort2
++]);
7914 /* Sort on the ns grid cell indices
7915 * and the global topology index
7917 sort_i
->nsc
= cell_index
;
7918 sort_i
->ind_gl
= dd
->index_gl
[i
];
7925 fprintf(debug
,"ordered sort cgs: stationary %d moved %d\n",
7928 /* Sort efficiently */
7929 ordered_sort(nsort2
,sort
->sort2
,nsort_new
,sort
->sort_new
,sort
->sort1
);
7933 cgsort
= sort
->sort1
;
7935 for(i
=0; i
<dd
->ncg_home
; i
++)
7937 /* Sort on the ns grid cell indices
7938 * and the global topology index
7940 cgsort
[i
].nsc
= fr
->ns
.grid
->cell_index
[i
];
7941 cgsort
[i
].ind_gl
= dd
->index_gl
[i
];
7943 if (cgsort
[i
].nsc
!= 4*fr
->ns
.grid
->ncells
)
7950 fprintf(debug
,"qsort cgs: %d new home %d\n",dd
->ncg_home
,ncg_new
);
7952 /* Determine the order of the charge groups using qsort */
7953 qsort_threadsafe(cgsort
,dd
->ncg_home
,sizeof(cgsort
[0]),comp_cgsort
);
7955 cgsort
= sort
->sort1
;
7957 /* We alloc with the old size, since cgindex is still old */
7958 vec_rvec_check_alloc(&dd
->comm
->vbuf
,dd
->cgindex
[dd
->ncg_home
]);
7959 vbuf
= dd
->comm
->vbuf
.v
;
7961 /* Remove the charge groups which are no longer at home here */
7962 dd
->ncg_home
= ncg_new
;
7964 /* Reorder the state */
7965 for(i
=0; i
<estNR
; i
++)
7967 if (EST_DISTR(i
) && state
->flags
& (1<<i
))
7972 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->x
,vbuf
);
7975 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->v
,vbuf
);
7978 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->sd_X
,vbuf
);
7981 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->cg_p
,vbuf
);
7985 case estDISRE_INITF
:
7986 case estDISRE_RM3TAV
:
7987 case estORIRE_INITF
:
7989 /* No ordering required */
7992 gmx_incons("Unknown state entry encountered in dd_sort_state");
7998 order_vec_cg(dd
->ncg_home
,cgsort
,cgcm
,vbuf
);
8000 if (dd
->ncg_home
+1 > sort
->ibuf_nalloc
)
8002 sort
->ibuf_nalloc
= over_alloc_dd(dd
->ncg_home
+1);
8003 srenew(sort
->ibuf
,sort
->ibuf_nalloc
);
8006 /* Reorder the global cg index */
8007 order_int_cg(dd
->ncg_home
,cgsort
,dd
->index_gl
,ibuf
);
8008 /* Reorder the cginfo */
8009 order_int_cg(dd
->ncg_home
,cgsort
,fr
->cginfo
,ibuf
);
8010 /* Rebuild the local cg index */
8012 for(i
=0; i
<dd
->ncg_home
; i
++)
8014 cgsize
= dd
->cgindex
[cgsort
[i
].ind
+1] - dd
->cgindex
[cgsort
[i
].ind
];
8015 ibuf
[i
+1] = ibuf
[i
] + cgsize
;
8017 for(i
=0; i
<dd
->ncg_home
+1; i
++)
8019 dd
->cgindex
[i
] = ibuf
[i
];
8021 /* Set the home atom number */
8022 dd
->nat_home
= dd
->cgindex
[dd
->ncg_home
];
8024 /* Copy the sorted ns cell indices back to the ns grid struct */
8025 for(i
=0; i
<dd
->ncg_home
; i
++)
8027 fr
->ns
.grid
->cell_index
[i
] = cgsort
[i
].nsc
;
8029 fr
->ns
.grid
->nr
= dd
->ncg_home
;
8032 static void add_dd_statistics(gmx_domdec_t
*dd
)
8034 gmx_domdec_comm_t
*comm
;
8039 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8041 comm
->sum_nat
[ddnat
-ddnatZONE
] +=
8042 comm
->nat
[ddnat
] - comm
->nat
[ddnat
-1];
8047 void reset_dd_statistics_counters(gmx_domdec_t
*dd
)
8049 gmx_domdec_comm_t
*comm
;
8054 /* Reset all the statistics and counters for total run counting */
8055 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8057 comm
->sum_nat
[ddnat
-ddnatZONE
] = 0;
8061 comm
->load_step
= 0;
8064 clear_ivec(comm
->load_lim
);
8069 void print_dd_statistics(t_commrec
*cr
,t_inputrec
*ir
,FILE *fplog
)
8071 gmx_domdec_comm_t
*comm
;
8075 comm
= cr
->dd
->comm
;
8077 gmx_sumd(ddnatNR
-ddnatZONE
,comm
->sum_nat
,cr
);
8084 fprintf(fplog
,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8086 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8088 av
= comm
->sum_nat
[ddnat
-ddnatZONE
]/comm
->ndecomp
;
8093 " av. #atoms communicated per step for force: %d x %.1f\n",
8097 if (cr
->dd
->vsite_comm
)
8100 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8101 (EEL_PME(ir
->coulombtype
) || ir
->coulombtype
==eelEWALD
) ? 3 : 2,
8106 if (cr
->dd
->constraint_comm
)
8109 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
8110 1 + ir
->nLincsIter
,av
);
8114 gmx_incons(" Unknown type for DD statistics");
8117 fprintf(fplog
,"\n");
8119 if (comm
->bRecordLoad
&& EI_DYNAMICS(ir
->eI
))
8121 print_dd_load_av(fplog
,cr
->dd
);
8125 void dd_partition_system(FILE *fplog
,
8126 gmx_large_int_t step
,
8130 t_state
*state_global
,
8131 gmx_mtop_t
*top_global
,
8133 t_state
*state_local
,
8136 gmx_localtop_t
*top_local
,
8139 gmx_shellfc_t shellfc
,
8140 gmx_constr_t constr
,
8142 gmx_wallcycle_t wcycle
,
8146 gmx_domdec_comm_t
*comm
;
8147 gmx_ddbox_t ddbox
={0};
8149 gmx_large_int_t step_pcoupl
;
8150 rvec cell_ns_x0
,cell_ns_x1
;
8151 int i
,j
,n
,cg0
=0,ncg_home_old
=-1,nat_f_novirsum
;
8152 bool bBoxChanged
,bNStGlobalComm
,bDoDLB
,bCheckDLB
,bTurnOnDLB
,bLogLoad
;
8153 bool bRedist
,bSortCG
,bResortAll
;
8161 bBoxChanged
= (bMasterState
|| DEFORM(*ir
));
8162 if (ir
->epc
!= epcNO
)
8164 /* With nstcalcenery > 1 pressure coupling happens.
8165 * one step after calculating the energies.
8166 * Box scaling happens at the end of the MD step,
8167 * after the DD partitioning.
8168 * We therefore have to do DLB in the first partitioning
8169 * after an MD step where P-coupling occured.
8170 * We need to determine the last step in which p-coupling occurred.
8171 * MRS -- need to validate this for vv?
8173 n
= ir
->nstcalcenergy
;
8176 step_pcoupl
= step
- 1;
8180 step_pcoupl
= ((step
- 1)/n
)*n
+ 1;
8182 if (step_pcoupl
>= comm
->partition_step
)
8188 bNStGlobalComm
= (step
>= comm
->partition_step
+ nstglobalcomm
);
8190 if (!comm
->bDynLoadBal
)
8196 /* Should we do dynamic load balacing this step?
8197 * Since it requires (possibly expensive) global communication,
8198 * we might want to do DLB less frequently.
8200 if (bBoxChanged
|| ir
->epc
!= epcNO
)
8202 bDoDLB
= bBoxChanged
;
8206 bDoDLB
= bNStGlobalComm
;
8210 /* Check if we have recorded loads on the nodes */
8211 if (comm
->bRecordLoad
&& dd_load_count(comm
))
8213 if (comm
->eDLB
== edlbAUTO
&& !comm
->bDynLoadBal
)
8215 /* Check if we should use DLB at the second partitioning
8216 * and every 100 partitionings,
8217 * so the extra communication cost is negligible.
8219 n
= max(100,nstglobalcomm
);
8220 bCheckDLB
= (comm
->n_load_collect
== 0 ||
8221 comm
->n_load_have
% n
== n
-1);
8228 /* Print load every nstlog, first and last step to the log file */
8229 bLogLoad
= ((ir
->nstlog
> 0 && step
% ir
->nstlog
== 0) ||
8230 comm
->n_load_collect
== 0 ||
8231 (step
+ ir
->nstlist
> ir
->init_step
+ ir
->nsteps
));
8233 /* Avoid extra communication due to verbose screen output
8234 * when nstglobalcomm is set.
8236 if (bDoDLB
|| bLogLoad
|| bCheckDLB
||
8237 (bVerbose
&& (ir
->nstlist
== 0 || nstglobalcomm
<= ir
->nstlist
)))
8239 get_load_distribution(dd
,wcycle
);
8244 dd_print_load(fplog
,dd
,step
-1);
8248 dd_print_load_verbose(dd
);
8251 comm
->n_load_collect
++;
8254 /* Since the timings are node dependent, the master decides */
8258 (dd_force_imb_perf_loss(dd
) >= DD_PERF_LOSS
);
8261 fprintf(debug
,"step %s, imb loss %f\n",
8262 gmx_step_str(step
,sbuf
),
8263 dd_force_imb_perf_loss(dd
));
8266 dd_bcast(dd
,sizeof(bTurnOnDLB
),&bTurnOnDLB
);
8269 turn_on_dlb(fplog
,cr
,step
);
8274 comm
->n_load_have
++;
8277 cgs_gl
= &comm
->cgs_gl
;
8282 /* Clear the old state */
8283 clear_dd_indices(dd
,0,0);
8285 set_ddbox(dd
,bMasterState
,cr
,ir
,state_global
->box
,
8286 TRUE
,cgs_gl
,state_global
->x
,&ddbox
);
8288 get_cg_distribution(fplog
,step
,dd
,cgs_gl
,
8289 state_global
->box
,&ddbox
,state_global
->x
);
8291 dd_distribute_state(dd
,cgs_gl
,
8292 state_global
,state_local
,f
);
8294 dd_make_local_cgs(dd
,&top_local
->cgs
);
8296 if (dd
->ncg_home
> fr
->cg_nalloc
)
8298 dd_realloc_fr_cg(fr
,dd
->ncg_home
);
8300 calc_cgcm(fplog
,0,dd
->ncg_home
,
8301 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8303 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8305 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8309 else if (state_local
->ddp_count
!= dd
->ddp_count
)
8311 if (state_local
->ddp_count
> dd
->ddp_count
)
8313 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local
->ddp_count
,dd
->ddp_count
);
8316 if (state_local
->ddp_count_cg_gl
!= state_local
->ddp_count
)
8318 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local
->ddp_count_cg_gl
,state_local
->ddp_count
);
8321 /* Clear the old state */
8322 clear_dd_indices(dd
,0,0);
8324 /* Build the new indices */
8325 rebuild_cgindex(dd
,cgs_gl
->index
,state_local
);
8326 make_dd_indices(dd
,cgs_gl
->index
,0);
8328 /* Redetermine the cg COMs */
8329 calc_cgcm(fplog
,0,dd
->ncg_home
,
8330 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8332 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8334 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8336 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8337 TRUE
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8339 bRedist
= comm
->bDynLoadBal
;
8343 /* We have the full state, only redistribute the cgs */
8345 /* Clear the non-home indices */
8346 clear_dd_indices(dd
,dd
->ncg_home
,dd
->nat_home
);
8348 /* Avoid global communication for dim's without pbc and -gcom */
8349 if (!bNStGlobalComm
)
8351 copy_rvec(comm
->box0
,ddbox
.box0
);
8352 copy_rvec(comm
->box_size
,ddbox
.box_size
);
8354 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8355 bNStGlobalComm
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8360 /* For dim's without pbc and -gcom */
8361 copy_rvec(ddbox
.box0
,comm
->box0
);
8362 copy_rvec(ddbox
.box_size
,comm
->box_size
);
8364 set_dd_cell_sizes(dd
,&ddbox
,dynamic_dd_box(&ddbox
,ir
),bMasterState
,bDoDLB
,
8367 if (comm
->nstDDDumpGrid
> 0 && step
% comm
->nstDDDumpGrid
== 0)
8369 write_dd_grid_pdb("dd_grid",step
,dd
,state_local
->box
,&ddbox
);
8372 /* Check if we should sort the charge groups */
8373 if (comm
->nstSortCG
> 0)
8375 bSortCG
= (bMasterState
||
8376 (bRedist
&& (step
% comm
->nstSortCG
== 0)));
8383 ncg_home_old
= dd
->ncg_home
;
8387 cg0
= dd_redistribute_cg(fplog
,step
,dd
,ddbox
.tric_dir
,
8388 state_local
,f
,fr
,mdatoms
,
8392 get_nsgrid_boundaries(fr
->ns
.grid
,dd
,
8393 state_local
->box
,&ddbox
,&comm
->cell_x0
,&comm
->cell_x1
,
8394 dd
->ncg_home
,fr
->cg_cm
,
8395 cell_ns_x0
,cell_ns_x1
,&grid_density
);
8399 comm_dd_ns_cell_sizes(dd
,&ddbox
,cell_ns_x0
,cell_ns_x1
,step
);
8402 copy_ivec(fr
->ns
.grid
->n
,ncells_old
);
8403 grid_first(fplog
,fr
->ns
.grid
,dd
,&ddbox
,fr
->ePBC
,
8404 state_local
->box
,cell_ns_x0
,cell_ns_x1
,
8405 fr
->rlistlong
,grid_density
);
8406 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8407 copy_ivec(ddbox
.tric_dir
,comm
->tric_dir
);
8411 /* Sort the state on charge group position.
8412 * This enables exact restarts from this step.
8413 * It also improves performance by about 15% with larger numbers
8414 * of atoms per node.
8417 /* Fill the ns grid with the home cell,
8418 * so we can sort with the indices.
8420 set_zones_ncg_home(dd
);
8421 fill_grid(fplog
,&comm
->zones
,fr
->ns
.grid
,dd
->ncg_home
,
8422 0,dd
->ncg_home
,fr
->cg_cm
);
8424 /* Check if we can user the old order and ns grid cell indices
8425 * of the charge groups to sort the charge groups efficiently.
8427 bResortAll
= (bMasterState
||
8428 fr
->ns
.grid
->n
[XX
] != ncells_old
[XX
] ||
8429 fr
->ns
.grid
->n
[YY
] != ncells_old
[YY
] ||
8430 fr
->ns
.grid
->n
[ZZ
] != ncells_old
[ZZ
]);
8434 fprintf(debug
,"Step %s, sorting the %d home charge groups\n",
8435 gmx_step_str(step
,sbuf
),dd
->ncg_home
);
8437 dd_sort_state(dd
,ir
->ePBC
,fr
->cg_cm
,fr
,state_local
,
8438 bResortAll
? -1 : ncg_home_old
);
8439 /* Rebuild all the indices */
8441 ga2la_clear(dd
->ga2la
);
8444 /* Setup up the communication and communicate the coordinates */
8445 setup_dd_communication(dd
,state_local
->box
,&ddbox
,fr
);
8447 /* Set the indices */
8448 make_dd_indices(dd
,cgs_gl
->index
,cg0
);
8450 /* Set the charge group boundaries for neighbor searching */
8451 set_cg_boundaries(&comm
->zones
);
8454 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8455 -1,state_local->x,state_local->box);
8458 /* Extract a local topology from the global topology */
8459 for(i
=0; i
<dd
->ndim
; i
++)
8461 np
[dd
->dim
[i
]] = comm
->cd
[i
].np
;
8463 dd_make_local_top(fplog
,dd
,&comm
->zones
,dd
->npbcdim
,state_local
->box
,
8464 comm
->cellsize_min
,np
,
8465 fr
,vsite
,top_global
,top_local
);
8467 /* Set up the special atom communication */
8468 n
= comm
->nat
[ddnatZONE
];
8469 for(i
=ddnatZONE
+1; i
<ddnatNR
; i
++)
8474 if (vsite
&& vsite
->n_intercg_vsite
)
8476 n
= dd_make_local_vsites(dd
,n
,top_local
->idef
.il
);
8480 if (dd
->bInterCGcons
)
8482 /* Only for inter-cg constraints we need special code */
8483 n
= dd_make_local_constraints(dd
,n
,top_global
,
8484 constr
,ir
->nProjOrder
,
8485 &top_local
->idef
.il
[F_CONSTR
]);
8489 gmx_incons("Unknown special atom type setup");
8494 /* Make space for the extra coordinates for virtual site
8495 * or constraint communication.
8497 state_local
->natoms
= comm
->nat
[ddnatNR
-1];
8498 if (state_local
->natoms
> state_local
->nalloc
)
8500 dd_realloc_state(state_local
,f
,state_local
->natoms
);
8503 if (fr
->bF_NoVirSum
)
8505 if (vsite
&& vsite
->n_intercg_vsite
)
8507 nat_f_novirsum
= comm
->nat
[ddnatVSITE
];
8511 if (EEL_FULL(ir
->coulombtype
) && dd
->n_intercg_excl
> 0)
8513 nat_f_novirsum
= dd
->nat_tot
;
8517 nat_f_novirsum
= dd
->nat_home
;
8526 /* Set the number of atoms required for the force calculation.
8527 * Forces need to be constrained when using a twin-range setup
8528 * or with energy minimization. For simple simulations we could
8529 * avoid some allocation, zeroing and copying, but this is
8530 * probably not worth the complications ande checking.
8532 forcerec_set_ranges(fr
,dd
->ncg_home
,dd
->ncg_tot
,
8533 dd
->nat_tot
,comm
->nat
[ddnatCON
],nat_f_novirsum
);
8535 /* We make the all mdatoms up to nat_tot_con.
8536 * We could save some work by only setting invmass
8537 * between nat_tot and nat_tot_con.
8539 /* This call also sets the new number of home particles to dd->nat_home */
8540 atoms2md(top_global
,ir
,
8541 comm
->nat
[ddnatCON
],dd
->gatindex
,0,dd
->nat_home
,mdatoms
);
8543 /* Now we have the charges we can sort the FE interactions */
8544 dd_sort_local_top(dd
,mdatoms
,top_local
);
8548 /* Make the local shell stuff, currently no communication is done */
8549 make_local_shells(cr
,mdatoms
,shellfc
);
8552 if (ir
->implicit_solvent
)
8554 make_local_gb(cr
,fr
->born
,ir
->gb_algorithm
);
8557 if (!(cr
->duty
& DUTY_PME
))
8559 /* Send the charges to our PME only node */
8560 gmx_pme_send_q(cr
,mdatoms
->nChargePerturbed
,
8561 mdatoms
->chargeA
,mdatoms
->chargeB
,
8562 comm
->ddpme
[0].maxshift
,comm
->ddpme
[1].maxshift
);
8567 set_constraints(constr
,top_local
,ir
,mdatoms
,cr
);
8570 if (ir
->ePull
!= epullNO
)
8572 /* Update the local pull groups */
8573 dd_make_local_pull_groups(dd
,ir
->pull
,mdatoms
);
8576 add_dd_statistics(dd
);
8578 /* Make sure we only count the cycles for this DD partitioning */
8579 clear_dd_cycle_counts(dd
);
8581 /* Because the order of the atoms might have changed since
8582 * the last vsite construction, we need to communicate the constructing
8583 * atom coordinates again (for spreading the forces this MD step).
8585 dd_move_x_vsites(dd
,state_local
->box
,state_local
->x
);
8587 if (comm
->nstDDDump
> 0 && step
% comm
->nstDDDump
== 0)
8589 dd_move_x(dd
,state_local
->box
,state_local
->x
);
8590 write_dd_pdb("dd_dump",step
,"dump",top_global
,cr
,
8591 -1,state_local
->x
,state_local
->box
);
8594 /* Store the partitioning step */
8595 comm
->partition_step
= step
;
8597 /* Increase the DD partitioning counter */
8599 /* The state currently matches this DD partitioning count, store it */
8600 state_local
->ddp_count
= dd
->ddp_count
;
8603 /* The DD master node knows the complete cg distribution,
8604 * store the count so we can possibly skip the cg info communication.
8606 comm
->master_cg_ddp_count
= (bSortCG
? 0 : dd
->ddp_count
);
8609 if (comm
->DD_debug
> 0)
8611 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8612 check_index_consistency(dd
,top_global
->natoms
,ncg_mtop(top_global
),
8613 "after partitioning");