1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
32 #include "domdec_network.h"
35 #include "chargegroup.h"
44 #include "gmx_wallcycle.h"
48 #include "mtop_util.h"
50 #include "gmx_ga2la.h"
60 #define DDRANK(dd,rank) (rank)
61 #define DDMASTERRANK(dd) (dd->masterrank)
63 typedef struct gmx_domdec_master
65 /* The cell boundaries */
67 /* The global charge group division */
68 int *ncg
; /* Number of home charge groups for each node */
69 int *index
; /* Index of nnodes+1 into cg */
70 int *cg
; /* Global charge group index */
71 int *nat
; /* Number of home atoms for each node. */
72 int *ibuf
; /* Buffer for communication */
73 rvec
*vbuf
; /* Buffer for state scattering and gathering */
74 } gmx_domdec_master_t
;
78 /* The numbers of charge groups to send and receive for each cell
79 * that requires communication, the last entry contains the total
80 * number of atoms that needs to be communicated.
82 int nsend
[DD_MAXIZONE
+2];
83 int nrecv
[DD_MAXIZONE
+2];
84 /* The charge groups to send */
87 /* The atom range for non-in-place communication */
88 int cell2at0
[DD_MAXIZONE
];
89 int cell2at1
[DD_MAXIZONE
];
94 int np
; /* Number of grid pulses in this dimension */
95 int np_dlb
; /* For dlb, for use with edlbAUTO */
96 gmx_domdec_ind_t
*ind
; /* The indices to communicate, size np */
98 bool bInPlace
; /* Can we communicate in place? */
99 } gmx_domdec_comm_dim_t
;
103 bool *bCellMin
; /* Temp. var.: is this cell size at the limit */
104 real
*cell_f
; /* State var.: cell boundaries, box relative */
105 real
*old_cell_f
; /* Temp. var.: old cell size */
106 real
*cell_f_max0
; /* State var.: max lower boundary, incl neighbors */
107 real
*cell_f_min1
; /* State var.: min upper boundary, incl neighbors */
108 real
*bound_min
; /* Temp. var.: lower limit for cell boundary */
109 real
*bound_max
; /* Temp. var.: upper limit for cell boundary */
110 bool bLimited
; /* State var.: is DLB limited in this dim and row */
111 real
*buf_ncd
; /* Temp. var. */
114 #define DD_NLOAD_MAX 9
116 /* Here floats are accurate enough, since these variables
117 * only influence the load balancing, not the actual MD results.
141 gmx_cgsort_t
*sort1
,*sort2
;
143 gmx_cgsort_t
*sort_new
;
155 /* This enum determines the order of the coordinates.
156 * ddnatHOME and ddnatZONE should be first and second,
157 * the others can be ordered as wanted.
159 enum { ddnatHOME
, ddnatZONE
, ddnatVSITE
, ddnatCON
, ddnatNR
};
161 enum { edlbAUTO
, edlbNO
, edlbYES
, edlbNR
};
162 const char *edlb_names
[edlbNR
] = { "auto", "no", "yes" };
166 int dim
; /* The dimension */
167 bool dim_match
;/* Tells if DD and PME dims match */
168 int nslab
; /* The number of PME slabs in this dimension */
169 real
*slb_dim_f
; /* Cell sizes for determining the PME comm. with SLB */
170 int *pp_min
; /* The minimum pp node location, size nslab */
171 int *pp_max
; /* The maximum pp node location,size nslab */
172 int maxshift
; /* The maximum shift for coordinate redistribution in PME */
177 real min0
; /* The minimum bottom of this zone */
178 real max1
; /* The maximum top of this zone */
179 real mch0
; /* The maximum bottom communicaton height for this zone */
180 real mch1
; /* The maximum top communicaton height for this zone */
181 real p1_0
; /* The bottom value of the first cell in this zone */
182 real p1_1
; /* The top value of the first cell in this zone */
185 typedef struct gmx_domdec_comm
187 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
188 * unless stated otherwise.
191 /* The number of decomposition dimensions for PME, 0: no PME */
193 /* The number of nodes doing PME (PP/PME or only PME) */
197 /* The communication setup including the PME only nodes */
198 bool bCartesianPP_PME
;
201 int *pmenodes
; /* size npmenodes */
202 int *ddindex2simnodeid
; /* size npmenodes, only with bCartesianPP
203 * but with bCartesianPP_PME */
204 gmx_ddpme_t ddpme
[2];
206 /* The DD particle-particle nodes only */
208 int *ddindex2ddnodeid
; /* size npmenode, only with bCartesianPP_PME */
210 /* The global charge groups */
213 /* Should we sort the cgs */
215 gmx_domdec_sort_t
*sort
;
217 /* Are there bonded and multi-body interactions between charge groups? */
218 bool bInterCGBondeds
;
219 bool bInterCGMultiBody
;
221 /* Data for the optional bonded interaction atom communication range */
228 /* Are we actually using DLB? */
231 /* Cell sizes for static load balancing, first index cartesian */
234 /* The width of the communicated boundaries */
237 /* The minimum cell size (including triclinic correction) */
239 /* For dlb, for use with edlbAUTO */
240 rvec cellsize_min_dlb
;
241 /* The lower limit for the DD cell size with DLB */
243 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
246 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
248 /* box0 and box_size are required with dim's without pbc and -gcom */
252 /* The cell boundaries */
256 /* The old location of the cell boundaries, to check cg displacements */
260 /* The communication setup and charge group boundaries for the zones */
261 gmx_domdec_zones_t zones
;
263 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
264 * cell boundaries of neighboring cells for dynamic load balancing.
266 gmx_ddzone_t zone_d1
[2];
267 gmx_ddzone_t zone_d2
[2][2];
269 /* The coordinate/force communication setup and indices */
270 gmx_domdec_comm_dim_t cd
[DIM
];
271 /* The maximum number of cells to communicate with in one dimension */
274 /* Which cg distribution is stored on the master node */
275 int master_cg_ddp_count
;
277 /* The number of cg's received from the direct neighbors */
278 int zone_ncg1
[DD_MAXZONE
];
280 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
283 /* Communication buffer for general use */
287 /* Communication buffer for general use */
290 /* Communication buffers only used with multiple grid pulses */
295 /* Communication buffers for local redistribution */
297 int cggl_flag_nalloc
[DIM
*2];
299 int cgcm_state_nalloc
[DIM
*2];
301 /* Cell sizes for dynamic load balancing */
302 gmx_domdec_root_t
**root
;
306 real cell_f_max0
[DIM
];
307 real cell_f_min1
[DIM
];
309 /* Stuff for load communication */
311 gmx_domdec_load_t
*load
;
313 MPI_Comm
*mpi_comm_load
;
316 float cycl
[ddCyclNr
];
317 int cycl_n
[ddCyclNr
];
318 float cycl_max
[ddCyclNr
];
319 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
323 /* Have often have did we have load measurements */
325 /* Have often have we collected the load measurements */
329 double sum_nat
[ddnatNR
-ddnatZONE
];
339 /* The last partition step */
340 gmx_large_int_t partition_step
;
348 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
351 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
352 #define DD_FLAG_NRCG 65535
353 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
354 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
356 /* Zone permutation required to obtain consecutive charge groups
357 * for neighbor searching.
359 static const int zone_perm
[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
361 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
362 * components see only j zones with that component 0.
365 /* The DD zone order */
366 static const ivec dd_zo
[DD_MAXZONE
] =
367 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
372 static const ivec dd_zp3
[dd_zp3n
] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
377 static const ivec dd_zp2
[dd_zp2n
] = {{0,0,4},{1,3,4}};
382 static const ivec dd_zp1
[dd_zp1n
] = {{0,0,2}};
384 /* Factors used to avoid problems due to rounding issues */
385 #define DD_CELL_MARGIN 1.0001
386 #define DD_CELL_MARGIN2 1.00005
387 /* Factor to account for pressure scaling during nstlist steps */
388 #define DD_PRES_SCALE_MARGIN 1.02
390 /* Allowed performance loss before we DLB or warn */
391 #define DD_PERF_LOSS 0.05
393 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
395 /* Use separate MPI send and receive commands
396 * when nnodes <= GMX_DD_NNODES_SENDRECV.
397 * This saves memory (and some copying for small nnodes).
398 * For high parallelization scatter and gather calls are used.
400 #define GMX_DD_NNODES_SENDRECV 4
404 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
406 static void index2xyz(ivec nc,int ind,ivec xyz)
408 xyz[XX] = ind % nc[XX];
409 xyz[YY] = (ind / nc[XX]) % nc[YY];
410 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
414 /* This order is required to minimize the coordinate communication in PME
415 * which uses decomposition in the x direction.
417 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
419 static void ddindex2xyz(ivec nc
,int ind
,ivec xyz
)
421 xyz
[XX
] = ind
/ (nc
[YY
]*nc
[ZZ
]);
422 xyz
[YY
] = (ind
/ nc
[ZZ
]) % nc
[YY
];
423 xyz
[ZZ
] = ind
% nc
[ZZ
];
426 static int ddcoord2ddnodeid(gmx_domdec_t
*dd
,ivec c
)
431 ddindex
= dd_index(dd
->nc
,c
);
432 if (dd
->comm
->bCartesianPP_PME
)
434 ddnodeid
= dd
->comm
->ddindex2ddnodeid
[ddindex
];
436 else if (dd
->comm
->bCartesianPP
)
439 MPI_Cart_rank(dd
->mpi_comm_all
,c
,&ddnodeid
);
450 static bool dynamic_dd_box(gmx_ddbox_t
*ddbox
,t_inputrec
*ir
)
452 return (ddbox
->nboundeddim
< DIM
|| DYNAMIC_BOX(*ir
));
455 int ddglatnr(gmx_domdec_t
*dd
,int i
)
465 if (i
>= dd
->comm
->nat
[ddnatNR
-1])
467 gmx_fatal(FARGS
,"glatnr called with %d, which is larger than the local number of atoms (%d)",i
,dd
->comm
->nat
[ddnatNR
-1]);
469 atnr
= dd
->gatindex
[i
] + 1;
475 t_block
*dd_charge_groups_global(gmx_domdec_t
*dd
)
477 return &dd
->comm
->cgs_gl
;
480 static void vec_rvec_init(vec_rvec_t
*v
)
486 static void vec_rvec_check_alloc(vec_rvec_t
*v
,int n
)
490 v
->nalloc
= over_alloc_dd(n
);
491 srenew(v
->v
,v
->nalloc
);
495 void dd_store_state(gmx_domdec_t
*dd
,t_state
*state
)
499 if (state
->ddp_count
!= dd
->ddp_count
)
501 gmx_incons("The state does not the domain decomposition state");
504 state
->ncg_gl
= dd
->ncg_home
;
505 if (state
->ncg_gl
> state
->cg_gl_nalloc
)
507 state
->cg_gl_nalloc
= over_alloc_dd(state
->ncg_gl
);
508 srenew(state
->cg_gl
,state
->cg_gl_nalloc
);
510 for(i
=0; i
<state
->ncg_gl
; i
++)
512 state
->cg_gl
[i
] = dd
->index_gl
[i
];
515 state
->ddp_count_cg_gl
= dd
->ddp_count
;
518 gmx_domdec_zones_t
*domdec_zones(gmx_domdec_t
*dd
)
520 return &dd
->comm
->zones
;
523 void dd_get_ns_ranges(gmx_domdec_t
*dd
,int icg
,
524 int *jcg0
,int *jcg1
,ivec shift0
,ivec shift1
)
526 gmx_domdec_zones_t
*zones
;
529 zones
= &dd
->comm
->zones
;
532 while (icg
>= zones
->izone
[izone
].cg1
)
541 else if (izone
< zones
->nizone
)
543 *jcg0
= zones
->izone
[izone
].jcg0
;
547 gmx_fatal(FARGS
,"DD icg %d out of range: izone (%d) >= nizone (%d)",
548 icg
,izone
,zones
->nizone
);
551 *jcg1
= zones
->izone
[izone
].jcg1
;
553 for(d
=0; d
<dd
->ndim
; d
++)
556 shift0
[dim
] = zones
->izone
[izone
].shift0
[dim
];
557 shift1
[dim
] = zones
->izone
[izone
].shift1
[dim
];
558 if (dd
->comm
->tric_dir
[dim
] || (dd
->bGridJump
&& d
> 0))
560 /* A conservative approach, this can be optimized */
567 int dd_natoms_vsite(gmx_domdec_t
*dd
)
569 return dd
->comm
->nat
[ddnatVSITE
];
572 void dd_get_constraint_range(gmx_domdec_t
*dd
,int *at_start
,int *at_end
)
574 *at_start
= dd
->comm
->nat
[ddnatCON
-1];
575 *at_end
= dd
->comm
->nat
[ddnatCON
];
578 void dd_move_x(gmx_domdec_t
*dd
,matrix box
,rvec x
[])
580 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
582 gmx_domdec_comm_t
*comm
;
583 gmx_domdec_comm_dim_t
*cd
;
584 gmx_domdec_ind_t
*ind
;
585 rvec shift
={0,0,0},*buf
,*rbuf
;
590 cgindex
= dd
->cgindex
;
595 nat_tot
= dd
->nat_home
;
596 for(d
=0; d
<dd
->ndim
; d
++)
598 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
599 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
602 copy_rvec(box
[dd
->dim
[d
]],shift
);
605 for(p
=0; p
<cd
->np
; p
++)
612 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
614 at0
= cgindex
[index
[i
]];
615 at1
= cgindex
[index
[i
]+1];
616 for(j
=at0
; j
<at1
; j
++)
618 copy_rvec(x
[j
],buf
[n
]);
625 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
627 at0
= cgindex
[index
[i
]];
628 at1
= cgindex
[index
[i
]+1];
629 for(j
=at0
; j
<at1
; j
++)
631 /* We need to shift the coordinates */
632 rvec_add(x
[j
],shift
,buf
[n
]);
639 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
641 at0
= cgindex
[index
[i
]];
642 at1
= cgindex
[index
[i
]+1];
643 for(j
=at0
; j
<at1
; j
++)
646 buf
[n
][XX
] = x
[j
][XX
] + shift
[XX
];
648 * This operation requires a special shift force
649 * treatment, which is performed in calc_vir.
651 buf
[n
][YY
] = box
[YY
][YY
] - x
[j
][YY
];
652 buf
[n
][ZZ
] = box
[ZZ
][ZZ
] - x
[j
][ZZ
];
664 rbuf
= comm
->vbuf2
.v
;
666 /* Send and receive the coordinates */
667 dd_sendrecv_rvec(dd
, d
, dddirBackward
,
668 buf
, ind
->nsend
[nzone
+1],
669 rbuf
, ind
->nrecv
[nzone
+1]);
673 for(zone
=0; zone
<nzone
; zone
++)
675 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
677 copy_rvec(rbuf
[j
],x
[i
]);
682 nat_tot
+= ind
->nrecv
[nzone
+1];
688 void dd_move_f(gmx_domdec_t
*dd
,rvec f
[],rvec
*fshift
)
690 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
692 gmx_domdec_comm_t
*comm
;
693 gmx_domdec_comm_dim_t
*cd
;
694 gmx_domdec_ind_t
*ind
;
702 cgindex
= dd
->cgindex
;
707 nzone
= comm
->zones
.n
/2;
708 nat_tot
= dd
->nat_tot
;
709 for(d
=dd
->ndim
-1; d
>=0; d
--)
711 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
712 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
713 if (fshift
== NULL
&& !bScrew
)
717 /* Determine which shift vector we need */
723 for(p
=cd
->np
-1; p
>=0; p
--) {
725 nat_tot
-= ind
->nrecv
[nzone
+1];
732 sbuf
= comm
->vbuf2
.v
;
734 for(zone
=0; zone
<nzone
; zone
++)
736 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
738 copy_rvec(f
[i
],sbuf
[j
]);
743 /* Communicate the forces */
744 dd_sendrecv_rvec(dd
, d
, dddirForward
,
745 sbuf
, ind
->nrecv
[nzone
+1],
746 buf
, ind
->nsend
[nzone
+1]);
748 /* Add the received forces */
752 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
754 at0
= cgindex
[index
[i
]];
755 at1
= cgindex
[index
[i
]+1];
756 for(j
=at0
; j
<at1
; j
++)
758 rvec_inc(f
[j
],buf
[n
]);
765 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
767 at0
= cgindex
[index
[i
]];
768 at1
= cgindex
[index
[i
]+1];
769 for(j
=at0
; j
<at1
; j
++)
771 rvec_inc(f
[j
],buf
[n
]);
772 /* Add this force to the shift force */
773 rvec_inc(fshift
[is
],buf
[n
]);
780 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
782 at0
= cgindex
[index
[i
]];
783 at1
= cgindex
[index
[i
]+1];
784 for(j
=at0
; j
<at1
; j
++)
786 /* Rotate the force */
787 f
[j
][XX
] += buf
[n
][XX
];
788 f
[j
][YY
] -= buf
[n
][YY
];
789 f
[j
][ZZ
] -= buf
[n
][ZZ
];
792 /* Add this force to the shift force */
793 rvec_inc(fshift
[is
],buf
[n
]);
804 void dd_atom_spread_real(gmx_domdec_t
*dd
,real v
[])
806 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
808 gmx_domdec_comm_t
*comm
;
809 gmx_domdec_comm_dim_t
*cd
;
810 gmx_domdec_ind_t
*ind
;
815 cgindex
= dd
->cgindex
;
817 buf
= &comm
->vbuf
.v
[0][0];
820 nat_tot
= dd
->nat_home
;
821 for(d
=0; d
<dd
->ndim
; d
++)
824 for(p
=0; p
<cd
->np
; p
++)
829 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
831 at0
= cgindex
[index
[i
]];
832 at1
= cgindex
[index
[i
]+1];
833 for(j
=at0
; j
<at1
; j
++)
846 rbuf
= &comm
->vbuf2
.v
[0][0];
848 /* Send and receive the coordinates */
849 dd_sendrecv_real(dd
, d
, dddirBackward
,
850 buf
, ind
->nsend
[nzone
+1],
851 rbuf
, ind
->nrecv
[nzone
+1]);
855 for(zone
=0; zone
<nzone
; zone
++)
857 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
864 nat_tot
+= ind
->nrecv
[nzone
+1];
870 void dd_atom_sum_real(gmx_domdec_t
*dd
,real v
[])
872 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
874 gmx_domdec_comm_t
*comm
;
875 gmx_domdec_comm_dim_t
*cd
;
876 gmx_domdec_ind_t
*ind
;
881 cgindex
= dd
->cgindex
;
883 buf
= &comm
->vbuf
.v
[0][0];
886 nzone
= comm
->zones
.n
/2;
887 nat_tot
= dd
->nat_tot
;
888 for(d
=dd
->ndim
-1; d
>=0; d
--)
891 for(p
=cd
->np
-1; p
>=0; p
--) {
893 nat_tot
-= ind
->nrecv
[nzone
+1];
900 sbuf
= &comm
->vbuf2
.v
[0][0];
902 for(zone
=0; zone
<nzone
; zone
++)
904 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
911 /* Communicate the forces */
912 dd_sendrecv_real(dd
, d
, dddirForward
,
913 sbuf
, ind
->nrecv
[nzone
+1],
914 buf
, ind
->nsend
[nzone
+1]);
916 /* Add the received forces */
918 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
920 at0
= cgindex
[index
[i
]];
921 at1
= cgindex
[index
[i
]+1];
922 for(j
=at0
; j
<at1
; j
++)
933 static void print_ddzone(FILE *fp
,int d
,int i
,int j
,gmx_ddzone_t
*zone
)
935 fprintf(fp
,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
937 zone
->min0
,zone
->max1
,
938 zone
->mch0
,zone
->mch0
,
939 zone
->p1_0
,zone
->p1_1
);
942 static void dd_sendrecv_ddzone(const gmx_domdec_t
*dd
,
943 int ddimind
,int direction
,
944 gmx_ddzone_t
*buf_s
,int n_s
,
945 gmx_ddzone_t
*buf_r
,int n_r
)
947 rvec vbuf_s
[5*2],vbuf_r
[5*2];
952 vbuf_s
[i
*2 ][0] = buf_s
[i
].min0
;
953 vbuf_s
[i
*2 ][1] = buf_s
[i
].max1
;
954 vbuf_s
[i
*2 ][2] = buf_s
[i
].mch0
;
955 vbuf_s
[i
*2+1][0] = buf_s
[i
].mch1
;
956 vbuf_s
[i
*2+1][1] = buf_s
[i
].p1_0
;
957 vbuf_s
[i
*2+1][2] = buf_s
[i
].p1_1
;
960 dd_sendrecv_rvec(dd
, ddimind
, direction
,
966 buf_r
[i
].min0
= vbuf_r
[i
*2 ][0];
967 buf_r
[i
].max1
= vbuf_r
[i
*2 ][1];
968 buf_r
[i
].mch0
= vbuf_r
[i
*2 ][2];
969 buf_r
[i
].mch1
= vbuf_r
[i
*2+1][0];
970 buf_r
[i
].p1_0
= vbuf_r
[i
*2+1][1];
971 buf_r
[i
].p1_1
= vbuf_r
[i
*2+1][2];
975 static void dd_move_cellx(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
976 rvec cell_ns_x0
,rvec cell_ns_x1
)
978 int d
,d1
,dim
,dim1
,pos
,buf_size
,i
,j
,k
,p
,npulse
,npulse_min
;
979 gmx_ddzone_t
*zp
,buf_s
[5],buf_r
[5],buf_e
[5];
980 rvec extr_s
[2],extr_r
[2];
983 gmx_domdec_comm_t
*comm
;
988 for(d
=1; d
<dd
->ndim
; d
++)
991 zp
= (d
== 1) ? &comm
->zone_d1
[0] : &comm
->zone_d2
[0][0];
992 zp
->min0
= cell_ns_x0
[dim
];
993 zp
->max1
= cell_ns_x1
[dim
];
994 zp
->mch0
= cell_ns_x0
[dim
];
995 zp
->mch1
= cell_ns_x1
[dim
];
996 zp
->p1_0
= cell_ns_x0
[dim
];
997 zp
->p1_1
= cell_ns_x1
[dim
];
1000 for(d
=dd
->ndim
-2; d
>=0; d
--)
1003 bPBC
= (dim
< ddbox
->npbcdim
);
1005 /* Use an rvec to store two reals */
1006 extr_s
[d
][0] = comm
->cell_f0
[d
+1];
1007 extr_s
[d
][1] = comm
->cell_f1
[d
+1];
1011 /* Store the extremes in the backward sending buffer,
1012 * so the get updated separately from the forward communication.
1014 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1016 /* We invert the order to be able to use the same loop for buf_e */
1017 buf_s
[pos
].min0
= extr_s
[d1
][1];
1018 buf_s
[pos
].max1
= extr_s
[d1
][0];
1019 buf_s
[pos
].mch0
= 0;
1020 buf_s
[pos
].mch1
= 0;
1021 /* Store the cell corner of the dimension we communicate along */
1022 buf_s
[pos
].p1_0
= comm
->cell_x0
[dim
];
1023 buf_s
[pos
].p1_1
= 0;
1027 buf_s
[pos
] = (dd
->ndim
== 2) ? comm
->zone_d1
[0] : comm
->zone_d2
[0][0];
1030 if (dd
->ndim
== 3 && d
== 0)
1032 buf_s
[pos
] = comm
->zone_d2
[0][1];
1034 buf_s
[pos
] = comm
->zone_d1
[0];
1038 /* We only need to communicate the extremes
1039 * in the forward direction
1041 npulse
= comm
->cd
[d
].np
;
1044 /* Take the minimum to avoid double communication */
1045 npulse_min
= min(npulse
,dd
->nc
[dim
]-1-npulse
);
1049 /* Without PBC we should really not communicate over
1050 * the boundaries, but implementing that complicates
1051 * the communication setup and therefore we simply
1052 * do all communication, but ignore some data.
1054 npulse_min
= npulse
;
1056 for(p
=0; p
<npulse_min
; p
++)
1058 /* Communicate the extremes forward */
1059 bUse
= (bPBC
|| dd
->ci
[dim
] > 0);
1061 dd_sendrecv_rvec(dd
, d
, dddirForward
,
1062 extr_s
+d
, dd
->ndim
-d
-1,
1063 extr_r
+d
, dd
->ndim
-d
-1);
1067 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1069 extr_s
[d1
][0] = max(extr_s
[d1
][0],extr_r
[d1
][0]);
1070 extr_s
[d1
][1] = min(extr_s
[d1
][1],extr_r
[d1
][1]);
1076 for(p
=0; p
<npulse
; p
++)
1078 /* Communicate all the zone information backward */
1079 bUse
= (bPBC
|| dd
->ci
[dim
] < dd
->nc
[dim
] - 1);
1081 dd_sendrecv_ddzone(dd
, d
, dddirBackward
,
1088 for(d1
=d
+1; d1
<dd
->ndim
; d1
++)
1090 /* Determine the decrease of maximum required
1091 * communication height along d1 due to the distance along d,
1092 * this avoids a lot of useless atom communication.
1094 dist_d
= comm
->cell_x1
[dim
] - buf_r
[0].p1_0
;
1096 if (ddbox
->tric_dir
[dim
])
1098 /* c is the off-diagonal coupling between the cell planes
1099 * along directions d and d1.
1101 c
= ddbox
->v
[dim
][dd
->dim
[d1
]][dim
];
1107 det
= (1 + c
*c
)*comm
->cutoff
*comm
->cutoff
- dist_d
*dist_d
;
1110 dh
[d1
] = comm
->cutoff
- (c
*dist_d
+ sqrt(det
))/(1 + c
*c
);
1114 /* A negative value signals out of range */
1120 /* Accumulate the extremes over all pulses */
1121 for(i
=0; i
<buf_size
; i
++)
1125 buf_e
[i
] = buf_r
[i
];
1131 buf_e
[i
].min0
= min(buf_e
[i
].min0
,buf_r
[i
].min0
);
1132 buf_e
[i
].max1
= max(buf_e
[i
].max1
,buf_r
[i
].max1
);
1135 if (dd
->ndim
== 3 && d
== 0 && i
== buf_size
- 1)
1143 if (bUse
&& dh
[d1
] >= 0)
1145 buf_e
[i
].mch0
= max(buf_e
[i
].mch0
,buf_r
[i
].mch0
-dh
[d1
]);
1146 buf_e
[i
].mch1
= max(buf_e
[i
].mch1
,buf_r
[i
].mch1
-dh
[d1
]);
1149 /* Copy the received buffer to the send buffer,
1150 * to pass the data through with the next pulse.
1152 buf_s
[i
] = buf_r
[i
];
1154 if (((bPBC
|| dd
->ci
[dim
]+npulse
< dd
->nc
[dim
]) && p
== npulse
-1) ||
1155 (!bPBC
&& dd
->ci
[dim
]+1+p
== dd
->nc
[dim
]-1))
1157 /* Store the extremes */
1160 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1162 extr_s
[d1
][1] = min(extr_s
[d1
][1],buf_e
[pos
].min0
);
1163 extr_s
[d1
][0] = max(extr_s
[d1
][0],buf_e
[pos
].max1
);
1167 if (d
== 1 || (d
== 0 && dd
->ndim
== 3))
1171 comm
->zone_d2
[1-d
][i
] = buf_e
[pos
];
1177 comm
->zone_d1
[1] = buf_e
[pos
];
1191 print_ddzone(debug
,1,i
,0,&comm
->zone_d1
[i
]);
1193 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d1
[i
].min0
);
1194 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d1
[i
].max1
);
1206 print_ddzone(debug
,2,i
,j
,&comm
->zone_d2
[i
][j
]);
1208 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d2
[i
][j
].min0
);
1209 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d2
[i
][j
].max1
);
1213 for(d
=1; d
<dd
->ndim
; d
++)
1215 comm
->cell_f_max0
[d
] = extr_s
[d
-1][0];
1216 comm
->cell_f_min1
[d
] = extr_s
[d
-1][1];
1219 fprintf(debug
,"Cell fraction d %d, max0 %f, min1 %f\n",
1220 d
,comm
->cell_f_max0
[d
],comm
->cell_f_min1
[d
]);
1225 static void dd_collect_cg(gmx_domdec_t
*dd
,
1226 t_state
*state_local
)
1228 gmx_domdec_master_t
*ma
=NULL
;
1229 int buf2
[2],*ibuf
,i
,ncg_home
=0,*cg
=NULL
,nat_home
=0;
1232 if (state_local
->ddp_count
== dd
->comm
->master_cg_ddp_count
)
1234 /* The master has the correct distribution */
1238 if (state_local
->ddp_count
== dd
->ddp_count
)
1240 ncg_home
= dd
->ncg_home
;
1242 nat_home
= dd
->nat_home
;
1244 else if (state_local
->ddp_count_cg_gl
== state_local
->ddp_count
)
1246 cgs_gl
= &dd
->comm
->cgs_gl
;
1248 ncg_home
= state_local
->ncg_gl
;
1249 cg
= state_local
->cg_gl
;
1251 for(i
=0; i
<ncg_home
; i
++)
1253 nat_home
+= cgs_gl
->index
[cg
[i
]+1] - cgs_gl
->index
[cg
[i
]];
1258 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1261 buf2
[0] = dd
->ncg_home
;
1262 buf2
[1] = dd
->nat_home
;
1272 /* Collect the charge group and atom counts on the master */
1273 dd_gather(dd
,2*sizeof(int),buf2
,ibuf
);
1278 for(i
=0; i
<dd
->nnodes
; i
++)
1280 ma
->ncg
[i
] = ma
->ibuf
[2*i
];
1281 ma
->nat
[i
] = ma
->ibuf
[2*i
+1];
1282 ma
->index
[i
+1] = ma
->index
[i
] + ma
->ncg
[i
];
1285 /* Make byte counts and indices */
1286 for(i
=0; i
<dd
->nnodes
; i
++)
1288 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
1289 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
1293 fprintf(debug
,"Initial charge group distribution: ");
1294 for(i
=0; i
<dd
->nnodes
; i
++)
1295 fprintf(debug
," %d",ma
->ncg
[i
]);
1296 fprintf(debug
,"\n");
1300 /* Collect the charge group indices on the master */
1302 dd
->ncg_home
*sizeof(int),dd
->index_gl
,
1303 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
1304 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
1305 DDMASTER(dd
) ? ma
->cg
: NULL
);
1307 dd
->comm
->master_cg_ddp_count
= state_local
->ddp_count
;
1310 static void dd_collect_vec_sendrecv(gmx_domdec_t
*dd
,
1313 gmx_domdec_master_t
*ma
;
1314 int n
,i
,c
,a
,nalloc
=0;
1323 MPI_Send(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1324 dd
->rank
,dd
->mpi_comm_all
);
1327 /* Copy the master coordinates to the global array */
1328 cgs_gl
= &dd
->comm
->cgs_gl
;
1330 n
= DDMASTERRANK(dd
);
1332 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1334 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1336 copy_rvec(lv
[a
++],v
[c
]);
1340 for(n
=0; n
<dd
->nnodes
; n
++)
1344 if (ma
->nat
[n
] > nalloc
)
1346 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1350 MPI_Recv(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,DDRANK(dd
,n
),
1351 n
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1354 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1356 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1358 copy_rvec(buf
[a
++],v
[c
]);
1367 static void get_commbuffer_counts(gmx_domdec_t
*dd
,
1368 int **counts
,int **disps
)
1370 gmx_domdec_master_t
*ma
;
1375 /* Make the rvec count and displacment arrays */
1377 *disps
= ma
->ibuf
+ dd
->nnodes
;
1378 for(n
=0; n
<dd
->nnodes
; n
++)
1380 (*counts
)[n
] = ma
->nat
[n
]*sizeof(rvec
);
1381 (*disps
)[n
] = (n
== 0 ? 0 : (*disps
)[n
-1] + (*counts
)[n
-1]);
1385 static void dd_collect_vec_gatherv(gmx_domdec_t
*dd
,
1388 gmx_domdec_master_t
*ma
;
1389 int *rcounts
=NULL
,*disps
=NULL
;
1398 get_commbuffer_counts(dd
,&rcounts
,&disps
);
1403 dd_gatherv(dd
,dd
->nat_home
*sizeof(rvec
),lv
,rcounts
,disps
,buf
);
1407 cgs_gl
= &dd
->comm
->cgs_gl
;
1410 for(n
=0; n
<dd
->nnodes
; n
++)
1412 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1414 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1416 copy_rvec(buf
[a
++],v
[c
]);
1423 void dd_collect_vec(gmx_domdec_t
*dd
,
1424 t_state
*state_local
,rvec
*lv
,rvec
*v
)
1426 gmx_domdec_master_t
*ma
;
1427 int n
,i
,c
,a
,nalloc
=0;
1430 dd_collect_cg(dd
,state_local
);
1432 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1434 dd_collect_vec_sendrecv(dd
,lv
,v
);
1438 dd_collect_vec_gatherv(dd
,lv
,v
);
1443 void dd_collect_state(gmx_domdec_t
*dd
,
1444 t_state
*state_local
,t_state
*state
)
1448 nh
= state
->nhchainlength
;
1452 state
->lambda
= state_local
->lambda
;
1453 state
->veta
= state_local
->veta
;
1454 state
->vol0
= state_local
->vol0
;
1455 copy_mat(state_local
->box
,state
->box
);
1456 copy_mat(state_local
->boxv
,state
->boxv
);
1457 copy_mat(state_local
->svir_prev
,state
->svir_prev
);
1458 copy_mat(state_local
->fvir_prev
,state
->fvir_prev
);
1459 copy_mat(state_local
->pres_prev
,state
->pres_prev
);
1462 for(i
=0; i
<state_local
->ngtc
; i
++)
1464 for(j
=0; j
<nh
; j
++) {
1465 state
->nosehoover_xi
[i
*nh
+j
] = state_local
->nosehoover_xi
[i
*nh
+j
];
1466 state
->nosehoover_vxi
[i
*nh
+j
] = state_local
->nosehoover_vxi
[i
*nh
+j
];
1468 state
->therm_integral
[i
] = state_local
->therm_integral
[i
];
1470 for(i
=0; i
<state_local
->nnhpres
; i
++)
1472 for(j
=0; j
<nh
; j
++) {
1473 state
->nhpres_xi
[i
*nh
+j
] = state_local
->nhpres_xi
[i
*nh
+j
];
1474 state
->nhpres_vxi
[i
*nh
+j
] = state_local
->nhpres_vxi
[i
*nh
+j
];
1478 for(est
=0; est
<estNR
; est
++)
1480 if (EST_DISTR(est
) && state_local
->flags
& (1<<est
))
1484 dd_collect_vec(dd
,state_local
,state_local
->x
,state
->x
);
1487 dd_collect_vec(dd
,state_local
,state_local
->v
,state
->v
);
1490 dd_collect_vec(dd
,state_local
,state_local
->sd_X
,state
->sd_X
);
1493 dd_collect_vec(dd
,state_local
,state_local
->cg_p
,state
->cg_p
);
1496 if (state
->nrngi
== 1)
1500 for(i
=0; i
<state_local
->nrng
; i
++)
1502 state
->ld_rng
[i
] = state_local
->ld_rng
[i
];
1508 dd_gather(dd
,state_local
->nrng
*sizeof(state
->ld_rng
[0]),
1509 state_local
->ld_rng
,state
->ld_rng
);
1513 if (state
->nrngi
== 1)
1517 state
->ld_rngi
[0] = state_local
->ld_rngi
[0];
1522 dd_gather(dd
,sizeof(state
->ld_rngi
[0]),
1523 state_local
->ld_rngi
,state
->ld_rngi
);
1526 case estDISRE_INITF
:
1527 case estDISRE_RM3TAV
:
1528 case estORIRE_INITF
:
1532 gmx_incons("Unknown state entry encountered in dd_collect_state");
1538 static void dd_realloc_fr_cg(t_forcerec
*fr
,int nalloc
)
1542 fprintf(debug
,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr
->cg_nalloc
,nalloc
,over_alloc_dd(nalloc
));
1544 fr
->cg_nalloc
= over_alloc_dd(nalloc
);
1545 srenew(fr
->cg_cm
,fr
->cg_nalloc
);
1546 srenew(fr
->cginfo
,fr
->cg_nalloc
);
1549 static void dd_realloc_state(t_state
*state
,rvec
**f
,int nalloc
)
1555 fprintf(debug
,"Reallocating state: currently %d, required %d, allocating %d\n",state
->nalloc
,nalloc
,over_alloc_dd(nalloc
));
1558 state
->nalloc
= over_alloc_dd(nalloc
);
1560 for(est
=0; est
<estNR
; est
++)
1562 if (EST_DISTR(est
) && state
->flags
& (1<<est
))
1566 srenew(state
->x
,state
->nalloc
);
1569 srenew(state
->v
,state
->nalloc
);
1572 srenew(state
->sd_X
,state
->nalloc
);
1575 srenew(state
->cg_p
,state
->nalloc
);
1579 case estDISRE_INITF
:
1580 case estDISRE_RM3TAV
:
1581 case estORIRE_INITF
:
1583 /* No reallocation required */
1586 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1593 srenew(*f
,state
->nalloc
);
1597 static void dd_distribute_vec_sendrecv(gmx_domdec_t
*dd
,t_block
*cgs
,
1600 gmx_domdec_master_t
*ma
;
1601 int n
,i
,c
,a
,nalloc
=0;
1608 for(n
=0; n
<dd
->nnodes
; n
++)
1612 if (ma
->nat
[n
] > nalloc
)
1614 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1617 /* Use lv as a temporary buffer */
1619 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1621 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1623 copy_rvec(v
[c
],buf
[a
++]);
1626 if (a
!= ma
->nat
[n
])
1628 gmx_fatal(FARGS
,"Internal error a (%d) != nat (%d)",
1633 MPI_Send(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,
1634 DDRANK(dd
,n
),n
,dd
->mpi_comm_all
);
1639 n
= DDMASTERRANK(dd
);
1641 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1643 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1645 copy_rvec(v
[c
],lv
[a
++]);
1652 MPI_Recv(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1653 MPI_ANY_TAG
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1658 static void dd_distribute_vec_scatterv(gmx_domdec_t
*dd
,t_block
*cgs
,
1661 gmx_domdec_master_t
*ma
;
1662 int *scounts
=NULL
,*disps
=NULL
;
1663 int n
,i
,c
,a
,nalloc
=0;
1670 get_commbuffer_counts(dd
,&scounts
,&disps
);
1674 for(n
=0; n
<dd
->nnodes
; n
++)
1676 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1678 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1680 copy_rvec(v
[c
],buf
[a
++]);
1686 dd_scatterv(dd
,scounts
,disps
,buf
,dd
->nat_home
*sizeof(rvec
),lv
);
1689 static void dd_distribute_vec(gmx_domdec_t
*dd
,t_block
*cgs
,rvec
*v
,rvec
*lv
)
1691 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1693 dd_distribute_vec_sendrecv(dd
,cgs
,v
,lv
);
1697 dd_distribute_vec_scatterv(dd
,cgs
,v
,lv
);
1701 static void dd_distribute_state(gmx_domdec_t
*dd
,t_block
*cgs
,
1702 t_state
*state
,t_state
*state_local
,
1705 int i
,j
,ngtch
,ngtcp
,nh
;
1707 nh
= state
->nhchainlength
;
1711 state_local
->lambda
= state
->lambda
;
1712 state_local
->veta
= state
->veta
;
1713 state_local
->vol0
= state
->vol0
;
1714 copy_mat(state
->box
,state_local
->box
);
1715 copy_mat(state
->box_rel
,state_local
->box_rel
);
1716 copy_mat(state
->boxv
,state_local
->boxv
);
1717 copy_mat(state
->svir_prev
,state_local
->svir_prev
);
1718 copy_mat(state
->fvir_prev
,state_local
->fvir_prev
);
1719 for(i
=0; i
<state_local
->ngtc
; i
++)
1721 for(j
=0; j
<nh
; j
++) {
1722 state_local
->nosehoover_xi
[i
*nh
+j
] = state
->nosehoover_xi
[i
*nh
+j
];
1723 state_local
->nosehoover_vxi
[i
*nh
+j
] = state
->nosehoover_vxi
[i
*nh
+j
];
1725 state_local
->therm_integral
[i
] = state
->therm_integral
[i
];
1727 for(i
=0; i
<state_local
->nnhpres
; i
++)
1729 for(j
=0; j
<nh
; j
++) {
1730 state_local
->nhpres_xi
[i
*nh
+j
] = state
->nhpres_xi
[i
*nh
+j
];
1731 state_local
->nhpres_vxi
[i
*nh
+j
] = state
->nhpres_vxi
[i
*nh
+j
];
1735 dd_bcast(dd
,sizeof(real
),&state_local
->lambda
);
1736 dd_bcast(dd
,sizeof(real
),&state_local
->veta
);
1737 dd_bcast(dd
,sizeof(real
),&state_local
->vol0
);
1738 dd_bcast(dd
,sizeof(state_local
->box
),state_local
->box
);
1739 dd_bcast(dd
,sizeof(state_local
->box_rel
),state_local
->box_rel
);
1740 dd_bcast(dd
,sizeof(state_local
->boxv
),state_local
->boxv
);
1741 dd_bcast(dd
,sizeof(state_local
->svir_prev
),state_local
->svir_prev
);
1742 dd_bcast(dd
,sizeof(state_local
->fvir_prev
),state_local
->fvir_prev
);
1743 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_xi
);
1744 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_vxi
);
1745 dd_bcast(dd
,state_local
->ngtc
*sizeof(double),state_local
->therm_integral
);
1746 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_xi
);
1747 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_vxi
);
1749 if (dd
->nat_home
> state_local
->nalloc
)
1751 dd_realloc_state(state_local
,f
,dd
->nat_home
);
1753 for(i
=0; i
<estNR
; i
++)
1755 if (EST_DISTR(i
) && state_local
->flags
& (1<<i
))
1759 dd_distribute_vec(dd
,cgs
,state
->x
,state_local
->x
);
1762 dd_distribute_vec(dd
,cgs
,state
->v
,state_local
->v
);
1765 dd_distribute_vec(dd
,cgs
,state
->sd_X
,state_local
->sd_X
);
1768 dd_distribute_vec(dd
,cgs
,state
->cg_p
,state_local
->cg_p
);
1771 if (state
->nrngi
== 1)
1774 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1775 state
->ld_rng
,state_local
->ld_rng
);
1780 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1781 state
->ld_rng
,state_local
->ld_rng
);
1785 if (state
->nrngi
== 1)
1787 dd_bcastc(dd
,sizeof(state_local
->ld_rngi
[0]),
1788 state
->ld_rngi
,state_local
->ld_rngi
);
1792 dd_scatter(dd
,sizeof(state_local
->ld_rngi
[0]),
1793 state
->ld_rngi
,state_local
->ld_rngi
);
1796 case estDISRE_INITF
:
1797 case estDISRE_RM3TAV
:
1798 case estORIRE_INITF
:
1800 /* Not implemented yet */
1803 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1809 static char dim2char(int dim
)
1815 case XX
: c
= 'X'; break;
1816 case YY
: c
= 'Y'; break;
1817 case ZZ
: c
= 'Z'; break;
1818 default: gmx_fatal(FARGS
,"Unknown dim %d",dim
);
1824 static void write_dd_grid_pdb(const char *fn
,gmx_large_int_t step
,
1825 gmx_domdec_t
*dd
,matrix box
,gmx_ddbox_t
*ddbox
)
1827 rvec grid_s
[2],*grid_r
=NULL
,cx
,r
;
1828 char fname
[STRLEN
],format
[STRLEN
],buf
[22];
1834 copy_rvec(dd
->comm
->cell_x0
,grid_s
[0]);
1835 copy_rvec(dd
->comm
->cell_x1
,grid_s
[1]);
1839 snew(grid_r
,2*dd
->nnodes
);
1842 dd_gather(dd
,2*sizeof(rvec
),grid_s
[0],DDMASTER(dd
) ? grid_r
[0] : NULL
);
1846 for(d
=0; d
<DIM
; d
++)
1848 for(i
=0; i
<DIM
; i
++)
1856 if (dd
->nc
[d
] > 1 && d
< ddbox
->npbcdim
)
1858 tric
[d
][i
] = box
[i
][d
]/box
[i
][i
];
1867 sprintf(fname
,"%s_%s.pdb",fn
,gmx_step_str(step
,buf
));
1868 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1869 out
= gmx_fio_fopen(fname
,"w");
1870 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1872 for(i
=0; i
<dd
->nnodes
; i
++)
1874 vol
= dd
->nnodes
/(box
[XX
][XX
]*box
[YY
][YY
]*box
[ZZ
][ZZ
]);
1875 for(d
=0; d
<DIM
; d
++)
1877 vol
*= grid_r
[i
*2+1][d
] - grid_r
[i
*2][d
];
1885 cx
[XX
] = grid_r
[i
*2+x
][XX
];
1886 cx
[YY
] = grid_r
[i
*2+y
][YY
];
1887 cx
[ZZ
] = grid_r
[i
*2+z
][ZZ
];
1889 fprintf(out
,format
,"ATOM",a
++,"CA","GLY",' ',1+i
,
1890 10*r
[XX
],10*r
[YY
],10*r
[ZZ
],1.0,vol
);
1894 for(d
=0; d
<DIM
; d
++)
1900 case 0: y
= 1 + i
*8 + 2*x
; break;
1901 case 1: y
= 1 + i
*8 + 2*x
- (x
% 2); break;
1902 case 2: y
= 1 + i
*8 + x
; break;
1904 fprintf(out
,"%6s%5d%5d\n","CONECT",y
,y
+(1<<d
));
1908 gmx_fio_fclose(out
);
1913 void write_dd_pdb(const char *fn
,gmx_large_int_t step
,const char *title
,
1914 gmx_mtop_t
*mtop
,t_commrec
*cr
,
1915 int natoms
,rvec x
[],matrix box
)
1917 char fname
[STRLEN
],format
[STRLEN
],format4
[STRLEN
],buf
[22];
1920 char *atomname
,*resname
;
1927 natoms
= dd
->comm
->nat
[ddnatVSITE
];
1930 sprintf(fname
,"%s_%s_n%d.pdb",fn
,gmx_step_str(step
,buf
),cr
->sim_nodeid
);
1932 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1933 sprintf(format4
,"%s%s\n",pdbformat4
,"%6.2f%6.2f");
1935 out
= gmx_fio_fopen(fname
,"w");
1937 fprintf(out
,"TITLE %s\n",title
);
1938 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1939 for(i
=0; i
<natoms
; i
++)
1941 ii
= dd
->gatindex
[i
];
1942 gmx_mtop_atominfo_global(mtop
,ii
,&atomname
,&resnr
,&resname
);
1943 if (i
< dd
->comm
->nat
[ddnatZONE
])
1946 while (i
>= dd
->cgindex
[dd
->comm
->zones
.cg_range
[c
+1]])
1952 else if (i
< dd
->comm
->nat
[ddnatVSITE
])
1954 b
= dd
->comm
->zones
.n
;
1958 b
= dd
->comm
->zones
.n
+ 1;
1960 fprintf(out
,strlen(atomname
)<4 ? format
: format4
,
1961 "ATOM",(ii
+1)%100000,
1962 atomname
,resname
,' ',resnr
%10000,' ',
1963 10*x
[i
][XX
],10*x
[i
][YY
],10*x
[i
][ZZ
],1.0,b
);
1965 fprintf(out
,"TER\n");
1967 gmx_fio_fclose(out
);
1970 real
dd_cutoff_mbody(gmx_domdec_t
*dd
)
1972 gmx_domdec_comm_t
*comm
;
1979 if (comm
->bInterCGBondeds
)
1981 if (comm
->cutoff_mbody
> 0)
1983 r
= comm
->cutoff_mbody
;
1987 /* cutoff_mbody=0 means we do not have DLB */
1988 r
= comm
->cellsize_min
[dd
->dim
[0]];
1989 for(di
=1; di
<dd
->ndim
; di
++)
1991 r
= min(r
,comm
->cellsize_min
[dd
->dim
[di
]]);
1993 if (comm
->bBondComm
)
1995 r
= max(r
,comm
->cutoff_mbody
);
1999 r
= min(r
,comm
->cutoff
);
2007 real
dd_cutoff_twobody(gmx_domdec_t
*dd
)
2011 r_mb
= dd_cutoff_mbody(dd
);
2013 return max(dd
->comm
->cutoff
,r_mb
);
2017 static void dd_cart_coord2pmecoord(gmx_domdec_t
*dd
,ivec coord
,ivec coord_pme
)
2021 nc
= dd
->nc
[dd
->comm
->cartpmedim
];
2022 ntot
= dd
->comm
->ntot
[dd
->comm
->cartpmedim
];
2023 copy_ivec(coord
,coord_pme
);
2024 coord_pme
[dd
->comm
->cartpmedim
] =
2025 nc
+ (coord
[dd
->comm
->cartpmedim
]*(ntot
- nc
) + (ntot
- nc
)/2)/nc
;
2028 static int low_ddindex2pmeindex(int ndd
,int npme
,int ddindex
)
2030 /* Here we assign a PME node to communicate with this DD node
2031 * by assuming that the major index of both is x.
2032 * We add cr->npmenodes/2 to obtain an even distribution.
2034 return (ddindex
*npme
+ npme
/2)/ndd
;
2037 static int ddindex2pmeindex(const gmx_domdec_t
*dd
,int ddindex
)
2039 return low_ddindex2pmeindex(dd
->nnodes
,dd
->comm
->npmenodes
,ddindex
);
2042 static int cr_ddindex2pmeindex(const t_commrec
*cr
,int ddindex
)
2044 return low_ddindex2pmeindex(cr
->dd
->nnodes
,cr
->npmenodes
,ddindex
);
2047 static int *dd_pmenodes(t_commrec
*cr
)
2052 snew(pmenodes
,cr
->npmenodes
);
2054 for(i
=0; i
<cr
->dd
->nnodes
; i
++) {
2055 p0
= cr_ddindex2pmeindex(cr
,i
);
2056 p1
= cr_ddindex2pmeindex(cr
,i
+1);
2057 if (i
+1 == cr
->dd
->nnodes
|| p1
> p0
) {
2059 fprintf(debug
,"pmenode[%d] = %d\n",n
,i
+1+n
);
2060 pmenodes
[n
] = i
+ 1 + n
;
2068 static int gmx_ddcoord2pmeindex(t_commrec
*cr
,int x
,int y
,int z
)
2071 ivec coords
,coords_pme
,nc
;
2076 if (dd->comm->bCartesian) {
2077 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2078 dd_coords2pmecoords(dd,coords,coords_pme);
2079 copy_ivec(dd->ntot,nc);
2080 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2081 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2083 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2085 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2091 slab
= ddindex2pmeindex(dd
,dd_index(dd
->nc
,coords
));
2096 static int ddcoord2simnodeid(t_commrec
*cr
,int x
,int y
,int z
)
2098 gmx_domdec_comm_t
*comm
;
2100 int ddindex
,nodeid
=-1;
2102 comm
= cr
->dd
->comm
;
2107 if (comm
->bCartesianPP_PME
)
2110 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&nodeid
);
2115 ddindex
= dd_index(cr
->dd
->nc
,coords
);
2116 if (comm
->bCartesianPP
)
2118 nodeid
= comm
->ddindex2simnodeid
[ddindex
];
2124 nodeid
= ddindex
+ gmx_ddcoord2pmeindex(cr
,x
,y
,z
);
2136 static int dd_simnode2pmenode(t_commrec
*cr
,int sim_nodeid
)
2139 gmx_domdec_comm_t
*comm
;
2140 ivec coord
,coord_pme
;
2147 /* This assumes a uniform x domain decomposition grid cell size */
2148 if (comm
->bCartesianPP_PME
)
2151 MPI_Cart_coords(cr
->mpi_comm_mysim
,sim_nodeid
,DIM
,coord
);
2152 if (coord
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
2154 /* This is a PP node */
2155 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2156 MPI_Cart_rank(cr
->mpi_comm_mysim
,coord_pme
,&pmenode
);
2160 else if (comm
->bCartesianPP
)
2162 if (sim_nodeid
< dd
->nnodes
)
2164 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2169 /* This assumes DD cells with identical x coordinates
2170 * are numbered sequentially.
2172 if (dd
->comm
->pmenodes
== NULL
)
2174 if (sim_nodeid
< dd
->nnodes
)
2176 /* The DD index equals the nodeid */
2177 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2183 while (sim_nodeid
> dd
->comm
->pmenodes
[i
])
2187 if (sim_nodeid
< dd
->comm
->pmenodes
[i
])
2189 pmenode
= dd
->comm
->pmenodes
[i
];
2197 bool gmx_pmeonlynode(t_commrec
*cr
,int sim_nodeid
)
2201 if (DOMAINDECOMP(cr
))
2203 bPMEOnlyNode
= (dd_simnode2pmenode(cr
,sim_nodeid
) == -1);
2207 bPMEOnlyNode
= FALSE
;
2210 return bPMEOnlyNode
;
2213 void get_pme_ddnodes(t_commrec
*cr
,int pmenodeid
,
2214 int *nmy_ddnodes
,int **my_ddnodes
,int *node_peer
)
2218 ivec coord
,coord_pme
;
2222 snew(*my_ddnodes
,(dd
->nnodes
+cr
->npmenodes
-1)/cr
->npmenodes
);
2225 for(x
=0; x
<dd
->nc
[XX
]; x
++)
2227 for(y
=0; y
<dd
->nc
[YY
]; y
++)
2229 for(z
=0; z
<dd
->nc
[ZZ
]; z
++)
2231 if (dd
->comm
->bCartesianPP_PME
)
2236 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2237 if (dd
->ci
[XX
] == coord_pme
[XX
] &&
2238 dd
->ci
[YY
] == coord_pme
[YY
] &&
2239 dd
->ci
[ZZ
] == coord_pme
[ZZ
])
2240 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2244 /* The slab corresponds to the nodeid in the PME group */
2245 if (gmx_ddcoord2pmeindex(cr
,x
,y
,z
) == pmenodeid
)
2247 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2254 /* The last PP-only node is the peer node */
2255 *node_peer
= (*my_ddnodes
)[*nmy_ddnodes
-1];
2259 fprintf(debug
,"Receive coordinates from PP nodes:");
2260 for(x
=0; x
<*nmy_ddnodes
; x
++)
2262 fprintf(debug
," %d",(*my_ddnodes
)[x
]);
2264 fprintf(debug
,"\n");
2268 static bool receive_vir_ener(t_commrec
*cr
)
2270 gmx_domdec_comm_t
*comm
;
2271 int pmenode
,coords
[DIM
],rank
;
2275 if (cr
->npmenodes
< cr
->dd
->nnodes
)
2277 comm
= cr
->dd
->comm
;
2278 if (comm
->bCartesianPP_PME
)
2280 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2282 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,coords
);
2283 coords
[comm
->cartpmedim
]++;
2284 if (coords
[comm
->cartpmedim
] < cr
->dd
->nc
[comm
->cartpmedim
])
2286 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&rank
);
2287 if (dd_simnode2pmenode(cr
,rank
) == pmenode
)
2289 /* This is not the last PP node for pmenode */
2297 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2298 if (cr
->sim_nodeid
+1 < cr
->nnodes
&&
2299 dd_simnode2pmenode(cr
,cr
->sim_nodeid
+1) == pmenode
)
2301 /* This is not the last PP node for pmenode */
2310 static void set_zones_ncg_home(gmx_domdec_t
*dd
)
2312 gmx_domdec_zones_t
*zones
;
2315 zones
= &dd
->comm
->zones
;
2317 zones
->cg_range
[0] = 0;
2318 for(i
=1; i
<zones
->n
+1; i
++)
2320 zones
->cg_range
[i
] = dd
->ncg_home
;
2324 static void rebuild_cgindex(gmx_domdec_t
*dd
,int *gcgs_index
,t_state
*state
)
2326 int nat
,i
,*ind
,*dd_cg_gl
,*cgindex
,cg_gl
;
2329 dd_cg_gl
= dd
->index_gl
;
2330 cgindex
= dd
->cgindex
;
2333 for(i
=0; i
<state
->ncg_gl
; i
++)
2337 dd_cg_gl
[i
] = cg_gl
;
2338 nat
+= gcgs_index
[cg_gl
+1] - gcgs_index
[cg_gl
];
2342 dd
->ncg_home
= state
->ncg_gl
;
2345 set_zones_ncg_home(dd
);
2348 static int ddcginfo(const cginfo_mb_t
*cginfo_mb
,int cg
)
2350 while (cg
>= cginfo_mb
->cg_end
)
2355 return cginfo_mb
->cginfo
[(cg
- cginfo_mb
->cg_start
) % cginfo_mb
->cg_mod
];
2358 static void dd_set_cginfo(int *index_gl
,int cg0
,int cg1
,
2359 t_forcerec
*fr
,char *bLocalCG
)
2361 cginfo_mb_t
*cginfo_mb
;
2367 cginfo_mb
= fr
->cginfo_mb
;
2368 cginfo
= fr
->cginfo
;
2370 for(cg
=cg0
; cg
<cg1
; cg
++)
2372 cginfo
[cg
] = ddcginfo(cginfo_mb
,index_gl
[cg
]);
2376 if (bLocalCG
!= NULL
)
2378 for(cg
=cg0
; cg
<cg1
; cg
++)
2380 bLocalCG
[index_gl
[cg
]] = TRUE
;
2385 static void make_dd_indices(gmx_domdec_t
*dd
,int *gcgs_index
,int cg_start
)
2387 int nzone
,zone
,zone1
,cg0
,cg
,cg_gl
,a
,a_gl
;
2388 int *zone2cg
,*zone_ncg1
,*index_gl
,*gatindex
;
2392 bLocalCG
= dd
->comm
->bLocalCG
;
2394 if (dd
->nat_tot
> dd
->gatindex_nalloc
)
2396 dd
->gatindex_nalloc
= over_alloc_dd(dd
->nat_tot
);
2397 srenew(dd
->gatindex
,dd
->gatindex_nalloc
);
2400 nzone
= dd
->comm
->zones
.n
;
2401 zone2cg
= dd
->comm
->zones
.cg_range
;
2402 zone_ncg1
= dd
->comm
->zone_ncg1
;
2403 index_gl
= dd
->index_gl
;
2404 gatindex
= dd
->gatindex
;
2406 if (zone2cg
[1] != dd
->ncg_home
)
2408 gmx_incons("dd->ncg_zone is not up to date");
2411 /* Make the local to global and global to local atom index */
2412 a
= dd
->cgindex
[cg_start
];
2413 for(zone
=0; zone
<nzone
; zone
++)
2421 cg0
= zone2cg
[zone
];
2423 for(cg
=cg0
; cg
<zone2cg
[zone
+1]; cg
++)
2426 if (cg
- cg0
>= zone_ncg1
[zone
])
2428 /* Signal that this cg is from more than one zone away */
2431 cg_gl
= index_gl
[cg
];
2432 for(a_gl
=gcgs_index
[cg_gl
]; a_gl
<gcgs_index
[cg_gl
+1]; a_gl
++)
2435 ga2la_set(dd
->ga2la
,a_gl
,a
,zone1
);
2442 static int check_bLocalCG(gmx_domdec_t
*dd
,int ncg_sys
,const char *bLocalCG
,
2448 if (bLocalCG
== NULL
)
2452 for(i
=0; i
<dd
->ncg_tot
; i
++)
2454 if (!bLocalCG
[dd
->index_gl
[i
]])
2457 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd
->rank
,where
,i
+1,dd
->index_gl
[i
]+1,dd
->ncg_home
);
2462 for(i
=0; i
<ncg_sys
; i
++)
2469 if (ngl
!= dd
->ncg_tot
)
2471 fprintf(stderr
,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd
->rank
,where
,ngl
,dd
->ncg_tot
);
2478 static void check_index_consistency(gmx_domdec_t
*dd
,
2479 int natoms_sys
,int ncg_sys
,
2482 int nerr
,ngl
,i
,a
,cell
;
2487 if (dd
->comm
->DD_debug
> 1)
2489 snew(have
,natoms_sys
);
2490 for(a
=0; a
<dd
->nat_tot
; a
++)
2492 if (have
[dd
->gatindex
[a
]] > 0)
2494 fprintf(stderr
,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd
->rank
,dd
->gatindex
[a
]+1,have
[dd
->gatindex
[a
]],a
+1);
2498 have
[dd
->gatindex
[a
]] = a
+ 1;
2504 snew(have
,dd
->nat_tot
);
2507 for(i
=0; i
<natoms_sys
; i
++)
2509 if (ga2la_get(dd
->ga2la
,i
,&a
,&cell
))
2511 if (a
>= dd
->nat_tot
)
2513 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd
->rank
,i
+1,a
+1,dd
->nat_tot
);
2519 if (dd
->gatindex
[a
] != i
)
2521 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd
->rank
,i
+1,a
+1,dd
->gatindex
[a
]+1);
2528 if (ngl
!= dd
->nat_tot
)
2531 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2532 dd
->rank
,where
,ngl
,dd
->nat_tot
);
2534 for(a
=0; a
<dd
->nat_tot
; a
++)
2539 "DD node %d, %s: local atom %d, global %d has no global index\n",
2540 dd
->rank
,where
,a
+1,dd
->gatindex
[a
]+1);
2545 nerr
+= check_bLocalCG(dd
,ncg_sys
,dd
->comm
->bLocalCG
,where
);
2548 gmx_fatal(FARGS
,"DD node %d, %s: %d atom/cg index inconsistencies",
2549 dd
->rank
,where
,nerr
);
2553 static void clear_dd_indices(gmx_domdec_t
*dd
,int cg_start
,int a_start
)
2560 /* Clear the whole list without searching */
2561 ga2la_clear(dd
->ga2la
);
2565 for(i
=a_start
; i
<dd
->nat_tot
; i
++)
2567 ga2la_del(dd
->ga2la
,dd
->gatindex
[i
]);
2571 bLocalCG
= dd
->comm
->bLocalCG
;
2574 for(i
=cg_start
; i
<dd
->ncg_tot
; i
++)
2576 bLocalCG
[dd
->index_gl
[i
]] = FALSE
;
2580 dd_clear_local_vsite_indices(dd
);
2582 if (dd
->constraints
)
2584 dd_clear_local_constraint_indices(dd
);
2588 static real
grid_jump_limit(gmx_domdec_comm_t
*comm
,int dim_ind
)
2590 real grid_jump_limit
;
2592 /* The distance between the boundaries of cells at distance
2593 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2594 * and by the fact that cells should not be shifted by more than
2595 * half their size, such that cg's only shift by one cell
2596 * at redecomposition.
2598 grid_jump_limit
= comm
->cellsize_limit
;
2599 if (!comm
->bVacDLBNoLimit
)
2601 grid_jump_limit
= max(grid_jump_limit
,
2602 comm
->cutoff
/comm
->cd
[dim_ind
].np
);
2605 return grid_jump_limit
;
2608 static void check_grid_jump(gmx_large_int_t step
,gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2610 gmx_domdec_comm_t
*comm
;
2616 for(d
=1; d
<dd
->ndim
; d
++)
2619 limit
= grid_jump_limit(comm
,d
);
2620 bfac
= ddbox
->box_size
[dim
];
2621 if (ddbox
->tric_dir
[dim
])
2623 bfac
*= ddbox
->skew_fac
[dim
];
2625 if ((comm
->cell_f1
[d
] - comm
->cell_f_max0
[d
])*bfac
< limit
||
2626 (comm
->cell_f0
[d
] - comm
->cell_f_min1
[d
])*bfac
> -limit
)
2629 gmx_fatal(FARGS
,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2630 gmx_step_str(step
,buf
),
2631 dim2char(dim
),dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
2636 static int dd_load_count(gmx_domdec_comm_t
*comm
)
2638 return (comm
->eFlop
? comm
->flop_n
: comm
->cycl_n
[ddCyclF
]);
2641 static float dd_force_load(gmx_domdec_comm_t
*comm
)
2648 if (comm
->eFlop
> 1)
2650 load
*= 1.0 + (comm
->eFlop
- 1)*(0.1*rand()/RAND_MAX
- 0.05);
2655 load
= comm
->cycl
[ddCyclF
];
2656 if (comm
->cycl_n
[ddCyclF
] > 1)
2658 /* Subtract the maximum of the last n cycle counts
2659 * to get rid of possible high counts due to other soures,
2660 * for instance system activity, that would otherwise
2661 * affect the dynamic load balancing.
2663 load
-= comm
->cycl_max
[ddCyclF
];
2670 static void set_slb_pme_dim_f(gmx_domdec_t
*dd
,int dim
,real
**dim_f
)
2672 gmx_domdec_comm_t
*comm
;
2677 snew(*dim_f
,dd
->nc
[dim
]+1);
2679 for(i
=1; i
<dd
->nc
[dim
]; i
++)
2681 if (comm
->slb_frac
[dim
])
2683 (*dim_f
)[i
] = (*dim_f
)[i
-1] + comm
->slb_frac
[dim
][i
-1];
2687 (*dim_f
)[i
] = (real
)i
/(real
)dd
->nc
[dim
];
2690 (*dim_f
)[dd
->nc
[dim
]] = 1;
2693 static void init_ddpme(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,int dimind
)
2695 int pmeindex
,slab
,nso
,i
;
2698 if (dimind
== 0 && dd
->dim
[0] == YY
&& dd
->comm
->npmenodes_x
== 1)
2704 ddpme
->dim
= dimind
;
2706 ddpme
->dim_match
= (ddpme
->dim
== dd
->dim
[dimind
]);
2708 ddpme
->nslab
= (ddpme
->dim
== 0 ?
2709 dd
->comm
->npmenodes_x
:
2710 dd
->comm
->npmenodes_y
);
2712 if (ddpme
->nslab
<= 1)
2717 nso
= dd
->comm
->npmenodes
/ddpme
->nslab
;
2718 /* Determine for each PME slab the PP location range for dimension dim */
2719 snew(ddpme
->pp_min
,ddpme
->nslab
);
2720 snew(ddpme
->pp_max
,ddpme
->nslab
);
2721 for(slab
=0; slab
<ddpme
->nslab
; slab
++) {
2722 ddpme
->pp_min
[slab
] = dd
->nc
[dd
->dim
[dimind
]] - 1;
2723 ddpme
->pp_max
[slab
] = 0;
2725 for(i
=0; i
<dd
->nnodes
; i
++) {
2726 ddindex2xyz(dd
->nc
,i
,xyz
);
2727 /* For y only use our y/z slab.
2728 * This assumes that the PME x grid size matches the DD grid size.
2730 if (dimind
== 0 || xyz
[XX
] == dd
->ci
[XX
]) {
2731 pmeindex
= ddindex2pmeindex(dd
,i
);
2733 slab
= pmeindex
/nso
;
2735 slab
= pmeindex
% ddpme
->nslab
;
2737 ddpme
->pp_min
[slab
] = min(ddpme
->pp_min
[slab
],xyz
[dimind
]);
2738 ddpme
->pp_max
[slab
] = max(ddpme
->pp_max
[slab
],xyz
[dimind
]);
2742 set_slb_pme_dim_f(dd
,ddpme
->dim
,&ddpme
->slb_dim_f
);
2745 int dd_pme_maxshift_x(gmx_domdec_t
*dd
)
2747 if (dd
->comm
->ddpme
[0].dim
== XX
)
2749 return dd
->comm
->ddpme
[0].maxshift
;
2757 int dd_pme_maxshift_y(gmx_domdec_t
*dd
)
2759 if (dd
->comm
->ddpme
[0].dim
== YY
)
2761 return dd
->comm
->ddpme
[0].maxshift
;
2763 else if (dd
->comm
->npmedecompdim
>= 2 && dd
->comm
->ddpme
[1].dim
== YY
)
2765 return dd
->comm
->ddpme
[1].maxshift
;
2773 static void set_pme_maxshift(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2774 bool bUniform
,gmx_ddbox_t
*ddbox
,real
*cell_f
)
2776 gmx_domdec_comm_t
*comm
;
2779 real range
,pme_boundary
;
2783 nc
= dd
->nc
[ddpme
->dim
];
2786 if (!ddpme
->dim_match
)
2788 /* PP decomposition is not along dim: the worst situation */
2791 else if (ns
<= 3 || (bUniform
&& ns
== nc
))
2793 /* The optimal situation */
2798 /* We need to check for all pme nodes which nodes they
2799 * could possibly need to communicate with.
2801 xmin
= ddpme
->pp_min
;
2802 xmax
= ddpme
->pp_max
;
2803 /* Allow for atoms to be maximally 2/3 times the cut-off
2804 * out of their DD cell. This is a reasonable balance between
2805 * between performance and support for most charge-group/cut-off
2808 range
= 2.0/3.0*comm
->cutoff
/ddbox
->box_size
[ddpme
->dim
];
2809 /* Avoid extra communication when we are exactly at a boundary */
2815 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2816 pme_boundary
= (real
)s
/ns
;
2819 cell_f
[xmax
[s
-(sh
+1) ]+1] + range
> pme_boundary
) ||
2821 cell_f
[xmax
[s
-(sh
+1)+ns
]+1] - 1 + range
> pme_boundary
)))
2825 pme_boundary
= (real
)(s
+1)/ns
;
2828 cell_f
[xmin
[s
+(sh
+1) ] ] - range
< pme_boundary
) ||
2830 cell_f
[xmin
[s
+(sh
+1)-ns
] ] + 1 - range
< pme_boundary
)))
2837 ddpme
->maxshift
= sh
;
2841 fprintf(debug
,"PME slab communication range for dim %d is %d\n",
2842 ddpme
->dim
,ddpme
->maxshift
);
2846 static void check_box_size(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2850 for(d
=0; d
<dd
->ndim
; d
++)
2853 if (dim
< ddbox
->nboundeddim
&&
2854 ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
] <
2855 dd
->nc
[dim
]*dd
->comm
->cellsize_limit
*DD_CELL_MARGIN
)
2857 gmx_fatal(FARGS
,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2858 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
2859 dd
->nc
[dim
],dd
->comm
->cellsize_limit
);
2864 static void set_dd_cell_sizes_slb(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
2865 bool bMaster
,ivec npulse
)
2867 gmx_domdec_comm_t
*comm
;
2870 real
*cell_x
,cell_dx
,cellsize
;
2874 for(d
=0; d
<DIM
; d
++)
2876 cellsize_min
[d
] = ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
];
2878 if (dd
->nc
[d
] == 1 || comm
->slb_frac
[d
] == NULL
)
2881 cell_dx
= ddbox
->box_size
[d
]/dd
->nc
[d
];
2884 for(j
=0; j
<dd
->nc
[d
]+1; j
++)
2886 dd
->ma
->cell_x
[d
][j
] = ddbox
->box0
[d
] + j
*cell_dx
;
2891 comm
->cell_x0
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
] )*cell_dx
;
2892 comm
->cell_x1
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
]+1)*cell_dx
;
2894 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2895 while (cellsize
*npulse
[d
] < comm
->cutoff
&& npulse
[d
] < dd
->nc
[d
]-1)
2899 cellsize_min
[d
] = cellsize
;
2903 /* Statically load balanced grid */
2904 /* Also when we are not doing a master distribution we determine
2905 * all cell borders in a loop to obtain identical values
2906 * to the master distribution case and to determine npulse.
2910 cell_x
= dd
->ma
->cell_x
[d
];
2914 snew(cell_x
,dd
->nc
[d
]+1);
2916 cell_x
[0] = ddbox
->box0
[d
];
2917 for(j
=0; j
<dd
->nc
[d
]; j
++)
2919 cell_dx
= ddbox
->box_size
[d
]*comm
->slb_frac
[d
][j
];
2920 cell_x
[j
+1] = cell_x
[j
] + cell_dx
;
2921 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2922 while (cellsize
*npulse
[d
] < comm
->cutoff
&&
2923 npulse
[d
] < dd
->nc
[d
]-1)
2927 cellsize_min
[d
] = min(cellsize_min
[d
],cellsize
);
2931 comm
->cell_x0
[d
] = cell_x
[dd
->ci
[d
]];
2932 comm
->cell_x1
[d
] = cell_x
[dd
->ci
[d
]+1];
2936 /* The following limitation is to avoid that a cell would receive
2937 * some of its own home charge groups back over the periodic boundary.
2938 * Double charge groups cause trouble with the global indices.
2940 if (d
< ddbox
->npbcdim
&&
2941 dd
->nc
[d
] > 1 && npulse
[d
] >= dd
->nc
[d
])
2943 gmx_fatal_collective(FARGS
,NULL
,dd
,
2944 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2945 dim2char(d
),ddbox
->box_size
[d
],ddbox
->skew_fac
[d
],
2947 dd
->nc
[d
],dd
->nc
[d
],
2948 dd
->nnodes
> dd
->nc
[d
] ? "cells" : "processors");
2952 if (!comm
->bDynLoadBal
)
2954 copy_rvec(cellsize_min
,comm
->cellsize_min
);
2957 for(d
=0; d
<comm
->npmedecompdim
; d
++)
2959 set_pme_maxshift(dd
,&comm
->ddpme
[d
],
2960 comm
->slb_frac
[dd
->dim
[d
]]==NULL
,ddbox
,
2961 comm
->ddpme
[d
].slb_dim_f
);
2966 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t
*dd
,
2967 int d
,int dim
,gmx_domdec_root_t
*root
,
2969 bool bUniform
,gmx_large_int_t step
, real cellsize_limit_f
, int range
[])
2971 gmx_domdec_comm_t
*comm
;
2972 int ncd
,i
,j
,nmin
,nmin_old
;
2975 real fac
,halfway
,cellsize_limit_f_i
,region_size
;
2976 bool bPBC
,bLastHi
=FALSE
;
2977 int nrange
[]={range
[0],range
[1]};
2979 region_size
= root
->cell_f
[range
[1]]-root
->cell_f
[range
[0]];
2985 bPBC
= (dim
< ddbox
->npbcdim
);
2987 cell_size
= root
->buf_ncd
;
2991 fprintf(debug
,"enforce_limits: %d %d\n",range
[0],range
[1]);
2994 /* First we need to check if the scaling does not make cells
2995 * smaller than the smallest allowed size.
2996 * We need to do this iteratively, since if a cell is too small,
2997 * it needs to be enlarged, which makes all the other cells smaller,
2998 * which could in turn make another cell smaller than allowed.
3000 for(i
=range
[0]; i
<range
[1]; i
++)
3002 root
->bCellMin
[i
] = FALSE
;
3008 /* We need the total for normalization */
3010 for(i
=range
[0]; i
<range
[1]; i
++)
3012 if (root
->bCellMin
[i
] == FALSE
)
3014 fac
+= cell_size
[i
];
3017 fac
= ( region_size
- nmin
*cellsize_limit_f
)/fac
; /* substracting cells already set to cellsize_limit_f */
3018 /* Determine the cell boundaries */
3019 for(i
=range
[0]; i
<range
[1]; i
++)
3021 if (root
->bCellMin
[i
] == FALSE
)
3023 cell_size
[i
] *= fac
;
3024 if (!bPBC
&& (i
== 0 || i
== dd
->nc
[dim
] -1))
3026 cellsize_limit_f_i
= 0;
3030 cellsize_limit_f_i
= cellsize_limit_f
;
3032 if (cell_size
[i
] < cellsize_limit_f_i
)
3034 root
->bCellMin
[i
] = TRUE
;
3035 cell_size
[i
] = cellsize_limit_f_i
;
3039 root
->cell_f
[i
+1] = root
->cell_f
[i
] + cell_size
[i
];
3042 while (nmin
> nmin_old
);
3045 cell_size
[i
] = root
->cell_f
[i
+1] - root
->cell_f
[i
];
3046 /* For this check we should not use DD_CELL_MARGIN,
3047 * but a slightly smaller factor,
3048 * since rounding could get use below the limit.
3050 if (bPBC
&& cell_size
[i
] < cellsize_limit_f
*DD_CELL_MARGIN2
/DD_CELL_MARGIN
)
3053 gmx_fatal(FARGS
,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3054 gmx_step_str(step
,buf
),
3055 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
3056 ncd
,comm
->cellsize_min
[dim
]);
3059 root
->bLimited
= (nmin
> 0) || (range
[0]>0) || (range
[1]<ncd
);
3063 /* Check if the boundary did not displace more than halfway
3064 * each of the cells it bounds, as this could cause problems,
3065 * especially when the differences between cell sizes are large.
3066 * If changes are applied, they will not make cells smaller
3067 * than the cut-off, as we check all the boundaries which
3068 * might be affected by a change and if the old state was ok,
3069 * the cells will at most be shrunk back to their old size.
3071 for(i
=range
[0]+1; i
<range
[1]; i
++)
3073 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
-1]);
3074 if (root
->cell_f
[i
] < halfway
)
3076 root
->cell_f
[i
] = halfway
;
3077 /* Check if the change also causes shifts of the next boundaries */
3078 for(j
=i
+1; j
<range
[1]; j
++)
3080 if (root
->cell_f
[j
] < root
->cell_f
[j
-1] + cellsize_limit_f
)
3081 root
->cell_f
[j
] = root
->cell_f
[j
-1] + cellsize_limit_f
;
3084 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
+1]);
3085 if (root
->cell_f
[i
] > halfway
)
3087 root
->cell_f
[i
] = halfway
;
3088 /* Check if the change also causes shifts of the next boundaries */
3089 for(j
=i
-1; j
>=range
[0]+1; j
--)
3091 if (root
->cell_f
[j
] > root
->cell_f
[j
+1] - cellsize_limit_f
)
3092 root
->cell_f
[j
] = root
->cell_f
[j
+1] - cellsize_limit_f
;
3098 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3099 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3100 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3101 * for a and b nrange is used */
3104 /* Take care of the staggering of the cell boundaries */
3107 for(i
=range
[0]; i
<range
[1]; i
++)
3109 root
->cell_f_max0
[i
] = root
->cell_f
[i
];
3110 root
->cell_f_min1
[i
] = root
->cell_f
[i
+1];
3115 for(i
=range
[0]+1; i
<range
[1]; i
++)
3117 bLimLo
= (root
->cell_f
[i
] < root
->bound_min
[i
]);
3118 bLimHi
= (root
->cell_f
[i
] > root
->bound_max
[i
]);
3119 if (bLimLo
&& bLimHi
)
3121 /* Both limits violated, try the best we can */
3122 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3123 root
->cell_f
[i
] = 0.5*(root
->bound_min
[i
] + root
->bound_max
[i
]);
3126 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3130 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3136 /* root->cell_f[i] = root->bound_min[i]; */
3137 nrange
[1]=i
; /* only store violation location. There could be a LimLo violation following with an higher index */
3140 else if (bLimHi
&& !bLastHi
)
3143 if (nrange
[1] < range
[1]) /* found a LimLo before */
3145 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3146 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3147 nrange
[0]=nrange
[1];
3149 root
->cell_f
[i
] = root
->bound_max
[i
];
3151 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3156 if (nrange
[1] < range
[1]) /* found last a LimLo */
3158 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3159 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3160 nrange
[0]=nrange
[1];
3162 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3164 else if (nrange
[0] > range
[0]) /* found at least one LimHi */
3166 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3173 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t
*dd
,
3174 int d
,int dim
,gmx_domdec_root_t
*root
,
3175 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3176 bool bUniform
,gmx_large_int_t step
)
3178 gmx_domdec_comm_t
*comm
;
3181 real load_aver
,load_i
,imbalance
,change
,change_max
,sc
;
3182 real cellsize_limit_f
,dist_min_f
,dist_min_f_hard
,space
;
3183 real change_limit
= 0.1;
3186 int range
[] = { 0, 0 };
3192 bPBC
= (dim
< ddbox
->npbcdim
);
3194 cell_size
= root
->buf_ncd
;
3196 /* Store the original boundaries */
3197 for(i
=0; i
<ncd
+1; i
++)
3199 root
->old_cell_f
[i
] = root
->cell_f
[i
];
3202 for(i
=0; i
<ncd
; i
++)
3204 cell_size
[i
] = 1.0/ncd
;
3207 else if (dd_load_count(comm
))
3209 load_aver
= comm
->load
[d
].sum_m
/ncd
;
3211 for(i
=0; i
<ncd
; i
++)
3213 /* Determine the relative imbalance of cell i */
3214 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3215 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3216 /* Determine the change of the cell size using underrelaxation */
3217 change
= -relax
*imbalance
;
3218 change_max
= max(change_max
,max(change
,-change
));
3220 /* Limit the amount of scaling.
3221 * We need to use the same rescaling for all cells in one row,
3222 * otherwise the load balancing might not converge.
3225 if (change_max
> change_limit
)
3227 sc
*= change_limit
/change_max
;
3229 for(i
=0; i
<ncd
; i
++)
3231 /* Determine the relative imbalance of cell i */
3232 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3233 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3234 /* Determine the change of the cell size using underrelaxation */
3235 change
= -sc
*imbalance
;
3236 cell_size
[i
] = (root
->cell_f
[i
+1]-root
->cell_f
[i
])*(1 + change
);
3240 cellsize_limit_f
= comm
->cellsize_min
[dim
]/ddbox
->box_size
[dim
];
3241 cellsize_limit_f
*= DD_CELL_MARGIN
;
3242 dist_min_f_hard
= grid_jump_limit(comm
,d
)/ddbox
->box_size
[dim
];
3243 dist_min_f
= dist_min_f_hard
* DD_CELL_MARGIN
;
3244 if (ddbox
->tric_dir
[dim
])
3246 cellsize_limit_f
/= ddbox
->skew_fac
[dim
];
3247 dist_min_f
/= ddbox
->skew_fac
[dim
];
3249 if (bDynamicBox
&& d
> 0)
3251 dist_min_f
*= DD_PRES_SCALE_MARGIN
;
3253 if (d
> 0 && !bUniform
)
3255 /* Make sure that the grid is not shifted too much */
3256 for(i
=1; i
<ncd
; i
++) {
3257 if (root
->cell_f_min1
[i
] - root
->cell_f_max0
[i
-1] < 2 * dist_min_f_hard
)
3259 gmx_incons("Inconsistent DD boundary staggering limits!");
3261 root
->bound_min
[i
] = root
->cell_f_max0
[i
-1] + dist_min_f
;
3262 space
= root
->cell_f
[i
] - (root
->cell_f_max0
[i
-1] + dist_min_f
);
3264 root
->bound_min
[i
] += 0.5*space
;
3266 root
->bound_max
[i
] = root
->cell_f_min1
[i
] - dist_min_f
;
3267 space
= root
->cell_f
[i
] - (root
->cell_f_min1
[i
] - dist_min_f
);
3269 root
->bound_max
[i
] += 0.5*space
;
3274 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3276 root
->cell_f_max0
[i
-1] + dist_min_f
,
3277 root
->bound_min
[i
],root
->cell_f
[i
],root
->bound_max
[i
],
3278 root
->cell_f_min1
[i
] - dist_min_f
);
3283 root
->cell_f
[0] = 0;
3284 root
->cell_f
[ncd
] = 1;
3285 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, range
);
3288 /* After the checks above, the cells should obey the cut-off
3289 * restrictions, but it does not hurt to check.
3291 for(i
=0; i
<ncd
; i
++)
3295 fprintf(debug
,"Relative bounds dim %d cell %d: %f %f\n",
3296 dim
,i
,root
->cell_f
[i
],root
->cell_f
[i
+1]);
3299 if ((bPBC
|| (i
!= 0 && i
!= dd
->nc
[dim
]-1)) &&
3300 root
->cell_f
[i
+1] - root
->cell_f
[i
] <
3301 cellsize_limit_f
/DD_CELL_MARGIN
)
3305 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3306 gmx_step_str(step
,buf
),dim2char(dim
),i
,
3307 (root
->cell_f
[i
+1] - root
->cell_f
[i
])
3308 *ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]);
3313 /* Store the cell boundaries of the lower dimensions at the end */
3314 for(d1
=0; d1
<d
; d1
++)
3316 root
->cell_f
[pos
++] = comm
->cell_f0
[d1
];
3317 root
->cell_f
[pos
++] = comm
->cell_f1
[d1
];
3320 if (d
< comm
->npmedecompdim
)
3322 /* The master determines the maximum shift for
3323 * the coordinate communication between separate PME nodes.
3325 set_pme_maxshift(dd
,&comm
->ddpme
[d
],bUniform
,ddbox
,root
->cell_f
);
3327 root
->cell_f
[pos
++] = comm
->ddpme
[0].maxshift
;
3330 root
->cell_f
[pos
++] = comm
->ddpme
[1].maxshift
;
3334 static void relative_to_absolute_cell_bounds(gmx_domdec_t
*dd
,
3335 gmx_ddbox_t
*ddbox
,int dimind
)
3337 gmx_domdec_comm_t
*comm
;
3342 /* Set the cell dimensions */
3343 dim
= dd
->dim
[dimind
];
3344 comm
->cell_x0
[dim
] = comm
->cell_f0
[dimind
]*ddbox
->box_size
[dim
];
3345 comm
->cell_x1
[dim
] = comm
->cell_f1
[dimind
]*ddbox
->box_size
[dim
];
3346 if (dim
>= ddbox
->nboundeddim
)
3348 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3349 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3353 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3354 int d
,int dim
,real
*cell_f_row
,
3357 gmx_domdec_comm_t
*comm
;
3363 /* Each node would only need to know two fractions,
3364 * but it is probably cheaper to broadcast the whole array.
3366 MPI_Bcast(cell_f_row
,DD_CELL_F_SIZE(dd
,d
)*sizeof(real
),MPI_BYTE
,
3367 0,comm
->mpi_comm_load
[d
]);
3369 /* Copy the fractions for this dimension from the buffer */
3370 comm
->cell_f0
[d
] = cell_f_row
[dd
->ci
[dim
] ];
3371 comm
->cell_f1
[d
] = cell_f_row
[dd
->ci
[dim
]+1];
3372 /* The whole array was communicated, so set the buffer position */
3373 pos
= dd
->nc
[dim
] + 1;
3374 for(d1
=0; d1
<=d
; d1
++)
3378 /* Copy the cell fractions of the lower dimensions */
3379 comm
->cell_f0
[d1
] = cell_f_row
[pos
++];
3380 comm
->cell_f1
[d1
] = cell_f_row
[pos
++];
3382 relative_to_absolute_cell_bounds(dd
,ddbox
,d1
);
3384 /* Convert the communicated shift from float to int */
3385 comm
->ddpme
[0].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3388 comm
->ddpme
[1].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3392 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t
*dd
,
3393 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3394 bool bUniform
,gmx_large_int_t step
)
3396 gmx_domdec_comm_t
*comm
;
3398 bool bRowMember
,bRowRoot
;
3403 for(d
=0; d
<dd
->ndim
; d
++)
3408 for(d1
=d
; d1
<dd
->ndim
; d1
++)
3410 if (dd
->ci
[dd
->dim
[d1
]] > 0)
3423 set_dd_cell_sizes_dlb_root(dd
,d
,dim
,comm
->root
[d
],
3424 ddbox
,bDynamicBox
,bUniform
,step
);
3425 cell_f_row
= comm
->root
[d
]->cell_f
;
3429 cell_f_row
= comm
->cell_f_row
;
3431 distribute_dd_cell_sizes_dlb(dd
,d
,dim
,cell_f_row
,ddbox
);
3436 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
3440 /* This function assumes the box is static and should therefore
3441 * not be called when the box has changed since the last
3442 * call to dd_partition_system.
3444 for(d
=0; d
<dd
->ndim
; d
++)
3446 relative_to_absolute_cell_bounds(dd
,ddbox
,d
);
3452 static void set_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3453 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3454 bool bUniform
,bool bDoDLB
,gmx_large_int_t step
,
3455 gmx_wallcycle_t wcycle
)
3457 gmx_domdec_comm_t
*comm
;
3464 wallcycle_start(wcycle
,ewcDDCOMMBOUND
);
3465 set_dd_cell_sizes_dlb_change(dd
,ddbox
,bDynamicBox
,bUniform
,step
);
3466 wallcycle_stop(wcycle
,ewcDDCOMMBOUND
);
3468 else if (bDynamicBox
)
3470 set_dd_cell_sizes_dlb_nochange(dd
,ddbox
);
3473 /* Set the dimensions for which no DD is used */
3474 for(dim
=0; dim
<DIM
; dim
++) {
3475 if (dd
->nc
[dim
] == 1) {
3476 comm
->cell_x0
[dim
] = 0;
3477 comm
->cell_x1
[dim
] = ddbox
->box_size
[dim
];
3478 if (dim
>= ddbox
->nboundeddim
)
3480 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3481 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3487 static void realloc_comm_ind(gmx_domdec_t
*dd
,ivec npulse
)
3490 gmx_domdec_comm_dim_t
*cd
;
3492 for(d
=0; d
<dd
->ndim
; d
++)
3494 cd
= &dd
->comm
->cd
[d
];
3495 np
= npulse
[dd
->dim
[d
]];
3496 if (np
> cd
->np_nalloc
)
3500 fprintf(debug
,"(Re)allocing cd for %c to %d pulses\n",
3501 dim2char(dd
->dim
[d
]),np
);
3503 if (DDMASTER(dd
) && cd
->np_nalloc
> 0)
3505 fprintf(stderr
,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd
->dim
[d
]),np
);
3508 for(i
=cd
->np_nalloc
; i
<np
; i
++)
3510 cd
->ind
[i
].index
= NULL
;
3511 cd
->ind
[i
].nalloc
= 0;
3520 static void set_dd_cell_sizes(gmx_domdec_t
*dd
,
3521 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3522 bool bUniform
,bool bDoDLB
,gmx_large_int_t step
,
3523 gmx_wallcycle_t wcycle
)
3525 gmx_domdec_comm_t
*comm
;
3531 /* Copy the old cell boundaries for the cg displacement check */
3532 copy_rvec(comm
->cell_x0
,comm
->old_cell_x0
);
3533 copy_rvec(comm
->cell_x1
,comm
->old_cell_x1
);
3535 if (comm
->bDynLoadBal
)
3539 check_box_size(dd
,ddbox
);
3541 set_dd_cell_sizes_dlb(dd
,ddbox
,bDynamicBox
,bUniform
,bDoDLB
,step
,wcycle
);
3545 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,npulse
);
3546 realloc_comm_ind(dd
,npulse
);
3551 for(d
=0; d
<DIM
; d
++)
3553 fprintf(debug
,"cell_x[%d] %f - %f skew_fac %f\n",
3554 d
,comm
->cell_x0
[d
],comm
->cell_x1
[d
],ddbox
->skew_fac
[d
]);
3559 static void comm_dd_ns_cell_sizes(gmx_domdec_t
*dd
,
3561 rvec cell_ns_x0
,rvec cell_ns_x1
,
3562 gmx_large_int_t step
)
3564 gmx_domdec_comm_t
*comm
;
3569 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
3571 dim
= dd
->dim
[dim_ind
];
3573 /* Without PBC we don't have restrictions on the outer cells */
3574 if (!(dim
>= ddbox
->npbcdim
&&
3575 (dd
->ci
[dim
] == 0 || dd
->ci
[dim
] == dd
->nc
[dim
] - 1)) &&
3576 comm
->bDynLoadBal
&&
3577 (comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
])*ddbox
->skew_fac
[dim
] <
3578 comm
->cellsize_min
[dim
])
3581 gmx_fatal(FARGS
,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3582 gmx_step_str(step
,buf
),dim2char(dim
),
3583 comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
],
3584 ddbox
->skew_fac
[dim
],
3585 dd
->comm
->cellsize_min
[dim
],
3586 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
3590 if ((dd
->bGridJump
&& dd
->ndim
> 1) || ddbox
->nboundeddim
< DIM
)
3592 /* Communicate the boundaries and update cell_ns_x0/1 */
3593 dd_move_cellx(dd
,ddbox
,cell_ns_x0
,cell_ns_x1
);
3594 if (dd
->bGridJump
&& dd
->ndim
> 1)
3596 check_grid_jump(step
,dd
,ddbox
);
3601 static void make_tric_corr_matrix(int npbcdim
,matrix box
,matrix tcm
)
3605 tcm
[YY
][XX
] = -box
[YY
][XX
]/box
[YY
][YY
];
3613 tcm
[ZZ
][XX
] = -(box
[ZZ
][YY
]*tcm
[YY
][XX
] + box
[ZZ
][XX
])/box
[ZZ
][ZZ
];
3614 tcm
[ZZ
][YY
] = -box
[ZZ
][YY
]/box
[ZZ
][ZZ
];
3623 static void check_screw_box(matrix box
)
3625 /* Mathematical limitation */
3626 if (box
[YY
][XX
] != 0 || box
[ZZ
][XX
] != 0)
3628 gmx_fatal(FARGS
,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3631 /* Limitation due to the asymmetry of the eighth shell method */
3632 if (box
[ZZ
][YY
] != 0)
3634 gmx_fatal(FARGS
,"pbc=screw with non-zero box_zy is not supported");
3638 static void distribute_cg(FILE *fplog
,gmx_large_int_t step
,
3639 matrix box
,ivec tric_dir
,t_block
*cgs
,rvec pos
[],
3642 gmx_domdec_master_t
*ma
;
3643 int **tmp_ind
=NULL
,*tmp_nalloc
=NULL
;
3644 int i
,icg
,j
,k
,k0
,k1
,d
,npbcdim
;
3646 rvec box_size
,cg_cm
;
3648 real nrcg
,inv_ncg
,pos_d
;
3650 bool bUnbounded
,bScrew
;
3654 if (tmp_ind
== NULL
)
3656 snew(tmp_nalloc
,dd
->nnodes
);
3657 snew(tmp_ind
,dd
->nnodes
);
3658 for(i
=0; i
<dd
->nnodes
; i
++)
3660 tmp_nalloc
[i
] = over_alloc_large(cgs
->nr
/dd
->nnodes
+1);
3661 snew(tmp_ind
[i
],tmp_nalloc
[i
]);
3665 /* Clear the count */
3666 for(i
=0; i
<dd
->nnodes
; i
++)
3672 make_tric_corr_matrix(dd
->npbcdim
,box
,tcm
);
3674 cgindex
= cgs
->index
;
3676 /* Compute the center of geometry for all charge groups */
3677 for(icg
=0; icg
<cgs
->nr
; icg
++)
3680 k1
= cgindex
[icg
+1];
3684 copy_rvec(pos
[k0
],cg_cm
);
3691 for(k
=k0
; (k
<k1
); k
++)
3693 rvec_inc(cg_cm
,pos
[k
]);
3695 for(d
=0; (d
<DIM
); d
++)
3697 cg_cm
[d
] *= inv_ncg
;
3700 /* Put the charge group in the box and determine the cell index */
3701 for(d
=DIM
-1; d
>=0; d
--) {
3703 if (d
< dd
->npbcdim
)
3705 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
3706 if (tric_dir
[d
] && dd
->nc
[d
] > 1)
3708 /* Use triclinic coordintates for this dimension */
3709 for(j
=d
+1; j
<DIM
; j
++)
3711 pos_d
+= cg_cm
[j
]*tcm
[j
][d
];
3714 while(pos_d
>= box
[d
][d
])
3717 rvec_dec(cg_cm
,box
[d
]);
3720 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3721 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3723 for(k
=k0
; (k
<k1
); k
++)
3725 rvec_dec(pos
[k
],box
[d
]);
3728 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3729 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3736 rvec_inc(cg_cm
,box
[d
]);
3739 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3740 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3742 for(k
=k0
; (k
<k1
); k
++)
3744 rvec_inc(pos
[k
],box
[d
]);
3746 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3747 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3752 /* This could be done more efficiently */
3754 while(ind
[d
]+1 < dd
->nc
[d
] && pos_d
>= ma
->cell_x
[d
][ind
[d
]+1])
3759 i
= dd_index(dd
->nc
,ind
);
3760 if (ma
->ncg
[i
] == tmp_nalloc
[i
])
3762 tmp_nalloc
[i
] = over_alloc_large(ma
->ncg
[i
]+1);
3763 srenew(tmp_ind
[i
],tmp_nalloc
[i
]);
3765 tmp_ind
[i
][ma
->ncg
[i
]] = icg
;
3767 ma
->nat
[i
] += cgindex
[icg
+1] - cgindex
[icg
];
3771 for(i
=0; i
<dd
->nnodes
; i
++)
3774 for(k
=0; k
<ma
->ncg
[i
]; k
++)
3776 ma
->cg
[k1
++] = tmp_ind
[i
][k
];
3779 ma
->index
[dd
->nnodes
] = k1
;
3781 for(i
=0; i
<dd
->nnodes
; i
++)
3791 fprintf(fplog
,"Charge group distribution at step %s:",
3792 gmx_step_str(step
,buf
));
3793 for(i
=0; i
<dd
->nnodes
; i
++)
3795 fprintf(fplog
," %d",ma
->ncg
[i
]);
3797 fprintf(fplog
,"\n");
3801 static void get_cg_distribution(FILE *fplog
,gmx_large_int_t step
,gmx_domdec_t
*dd
,
3802 t_block
*cgs
,matrix box
,gmx_ddbox_t
*ddbox
,
3805 gmx_domdec_master_t
*ma
=NULL
;
3808 int *ibuf
,buf2
[2] = { 0, 0 };
3816 check_screw_box(box
);
3819 set_dd_cell_sizes_slb(dd
,ddbox
,TRUE
,npulse
);
3821 distribute_cg(fplog
,step
,box
,ddbox
->tric_dir
,cgs
,pos
,dd
);
3822 for(i
=0; i
<dd
->nnodes
; i
++)
3824 ma
->ibuf
[2*i
] = ma
->ncg
[i
];
3825 ma
->ibuf
[2*i
+1] = ma
->nat
[i
];
3833 dd_scatter(dd
,2*sizeof(int),ibuf
,buf2
);
3835 dd
->ncg_home
= buf2
[0];
3836 dd
->nat_home
= buf2
[1];
3837 dd
->ncg_tot
= dd
->ncg_home
;
3838 dd
->nat_tot
= dd
->nat_home
;
3839 if (dd
->ncg_home
> dd
->cg_nalloc
|| dd
->cg_nalloc
== 0)
3841 dd
->cg_nalloc
= over_alloc_dd(dd
->ncg_home
);
3842 srenew(dd
->index_gl
,dd
->cg_nalloc
);
3843 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
3847 for(i
=0; i
<dd
->nnodes
; i
++)
3849 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
3850 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
3855 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
3856 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
3857 DDMASTER(dd
) ? ma
->cg
: NULL
,
3858 dd
->ncg_home
*sizeof(int),dd
->index_gl
);
3860 /* Determine the home charge group sizes */
3862 for(i
=0; i
<dd
->ncg_home
; i
++)
3864 cg_gl
= dd
->index_gl
[i
];
3866 dd
->cgindex
[i
] + cgs
->index
[cg_gl
+1] - cgs
->index
[cg_gl
];
3871 fprintf(debug
,"Home charge groups:\n");
3872 for(i
=0; i
<dd
->ncg_home
; i
++)
3874 fprintf(debug
," %d",dd
->index_gl
[i
]);
3876 fprintf(debug
,"\n");
3878 fprintf(debug
,"\n");
3882 static int compact_and_copy_vec_at(int ncg
,int *move
,
3885 rvec
*src
,gmx_domdec_comm_t
*comm
,
3888 int m
,icg
,i
,i0
,i1
,nrcg
;
3894 for(m
=0; m
<DIM
*2; m
++)
3900 for(icg
=0; icg
<ncg
; icg
++)
3902 i1
= cgindex
[icg
+1];
3908 /* Compact the home array in place */
3909 for(i
=i0
; i
<i1
; i
++)
3911 copy_rvec(src
[i
],src
[home_pos
++]);
3917 /* Copy to the communication buffer */
3919 pos_vec
[m
] += 1 + vec
*nrcg
;
3920 for(i
=i0
; i
<i1
; i
++)
3922 copy_rvec(src
[i
],comm
->cgcm_state
[m
][pos_vec
[m
]++]);
3924 pos_vec
[m
] += (nvec
- vec
- 1)*nrcg
;
3928 home_pos
+= i1
- i0
;
3936 static int compact_and_copy_vec_cg(int ncg
,int *move
,
3938 int nvec
,rvec
*src
,gmx_domdec_comm_t
*comm
,
3941 int m
,icg
,i0
,i1
,nrcg
;
3947 for(m
=0; m
<DIM
*2; m
++)
3953 for(icg
=0; icg
<ncg
; icg
++)
3955 i1
= cgindex
[icg
+1];
3961 /* Compact the home array in place */
3962 copy_rvec(src
[icg
],src
[home_pos
++]);
3968 /* Copy to the communication buffer */
3969 copy_rvec(src
[icg
],comm
->cgcm_state
[m
][pos_vec
[m
]]);
3970 pos_vec
[m
] += 1 + nrcg
*nvec
;
3982 static int compact_ind(int ncg
,int *move
,
3983 int *index_gl
,int *cgindex
,
3985 gmx_ga2la_t ga2la
,char *bLocalCG
,
3988 int cg
,nat
,a0
,a1
,a
,a_gl
;
3993 for(cg
=0; cg
<ncg
; cg
++)
3999 /* Compact the home arrays in place.
4000 * Anything that can be done here avoids access to global arrays.
4002 cgindex
[home_pos
] = nat
;
4003 for(a
=a0
; a
<a1
; a
++)
4006 gatindex
[nat
] = a_gl
;
4007 /* The cell number stays 0, so we don't need to set it */
4008 ga2la_change_la(ga2la
,a_gl
,nat
);
4011 index_gl
[home_pos
] = index_gl
[cg
];
4012 cginfo
[home_pos
] = cginfo
[cg
];
4013 /* The charge group remains local, so bLocalCG does not change */
4018 /* Clear the global indices */
4019 for(a
=a0
; a
<a1
; a
++)
4021 ga2la_del(ga2la
,gatindex
[a
]);
4025 bLocalCG
[index_gl
[cg
]] = FALSE
;
4029 cgindex
[home_pos
] = nat
;
4034 static void clear_and_mark_ind(int ncg
,int *move
,
4035 int *index_gl
,int *cgindex
,int *gatindex
,
4036 gmx_ga2la_t ga2la
,char *bLocalCG
,
4041 for(cg
=0; cg
<ncg
; cg
++)
4047 /* Clear the global indices */
4048 for(a
=a0
; a
<a1
; a
++)
4050 ga2la_del(ga2la
,gatindex
[a
]);
4054 bLocalCG
[index_gl
[cg
]] = FALSE
;
4056 /* Signal that this cg has moved using the ns cell index.
4057 * Here we set it to -1.
4058 * fill_grid will change it from -1 to 4*grid->ncells.
4060 cell_index
[cg
] = -1;
4065 static void print_cg_move(FILE *fplog
,
4067 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4068 bool bHaveLimitdAndCMOld
,real limitd
,
4069 rvec cm_old
,rvec cm_new
,real pos_d
)
4071 gmx_domdec_comm_t
*comm
;
4076 fprintf(fplog
,"\nStep %s:\n",gmx_step_str(step
,buf
));
4077 if (bHaveLimitdAndCMOld
)
4079 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4080 ddglatnr(dd
,dd
->cgindex
[cg
]),limitd
,dim2char(dim
));
4084 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4085 ddglatnr(dd
,dd
->cgindex
[cg
]),dim2char(dim
));
4087 fprintf(fplog
,"distance out of cell %f\n",
4088 dir
==1 ? pos_d
- comm
->cell_x1
[dim
] : pos_d
- comm
->cell_x0
[dim
]);
4089 if (bHaveLimitdAndCMOld
)
4091 fprintf(fplog
,"Old coordinates: %8.3f %8.3f %8.3f\n",
4092 cm_old
[XX
],cm_old
[YY
],cm_old
[ZZ
]);
4094 fprintf(fplog
,"New coordinates: %8.3f %8.3f %8.3f\n",
4095 cm_new
[XX
],cm_new
[YY
],cm_new
[ZZ
]);
4096 fprintf(fplog
,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4098 comm
->old_cell_x0
[dim
],comm
->old_cell_x1
[dim
]);
4099 fprintf(fplog
,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4101 comm
->cell_x0
[dim
],comm
->cell_x1
[dim
]);
4104 static void cg_move_error(FILE *fplog
,
4106 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4107 bool bHaveLimitdAndCMOld
,real limitd
,
4108 rvec cm_old
,rvec cm_new
,real pos_d
)
4112 print_cg_move(fplog
, dd
,step
,cg
,dim
,dir
,
4113 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4115 print_cg_move(stderr
,dd
,step
,cg
,dim
,dir
,
4116 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4118 "A charge group moved too far between two domain decomposition steps\n"
4119 "This usually means that your system is not well equilibrated");
4122 static void rotate_state_atom(t_state
*state
,int a
)
4126 for(est
=0; est
<estNR
; est
++)
4128 if (EST_DISTR(est
) && state
->flags
& (1<<est
)) {
4131 /* Rotate the complete state; for a rectangular box only */
4132 state
->x
[a
][YY
] = state
->box
[YY
][YY
] - state
->x
[a
][YY
];
4133 state
->x
[a
][ZZ
] = state
->box
[ZZ
][ZZ
] - state
->x
[a
][ZZ
];
4136 state
->v
[a
][YY
] = -state
->v
[a
][YY
];
4137 state
->v
[a
][ZZ
] = -state
->v
[a
][ZZ
];
4140 state
->sd_X
[a
][YY
] = -state
->sd_X
[a
][YY
];
4141 state
->sd_X
[a
][ZZ
] = -state
->sd_X
[a
][ZZ
];
4144 state
->cg_p
[a
][YY
] = -state
->cg_p
[a
][YY
];
4145 state
->cg_p
[a
][ZZ
] = -state
->cg_p
[a
][ZZ
];
4147 case estDISRE_INITF
:
4148 case estDISRE_RM3TAV
:
4149 case estORIRE_INITF
:
4151 /* These are distances, so not affected by rotation */
4154 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4160 static int dd_redistribute_cg(FILE *fplog
,gmx_large_int_t step
,
4161 gmx_domdec_t
*dd
,ivec tric_dir
,
4162 t_state
*state
,rvec
**f
,
4163 t_forcerec
*fr
,t_mdatoms
*md
,
4169 int ncg
[DIM
*2],nat
[DIM
*2];
4170 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4171 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4172 int sbuf
[2],rbuf
[2];
4173 int home_pos_cg
,home_pos_at
,ncg_stay_home
,buf_pos
;
4175 bool bV
=FALSE
,bSDX
=FALSE
,bCGP
=FALSE
;
4180 rvec
*cg_cm
,cell_x0
,cell_x1
,limitd
,limit0
,limit1
,cm_new
;
4182 cginfo_mb_t
*cginfo_mb
;
4183 gmx_domdec_comm_t
*comm
;
4187 check_screw_box(state
->box
);
4193 for(i
=0; i
<estNR
; i
++)
4199 case estX
: /* Always present */ break;
4200 case estV
: bV
= (state
->flags
& (1<<i
)); break;
4201 case estSDX
: bSDX
= (state
->flags
& (1<<i
)); break;
4202 case estCGP
: bCGP
= (state
->flags
& (1<<i
)); break;
4205 case estDISRE_INITF
:
4206 case estDISRE_RM3TAV
:
4207 case estORIRE_INITF
:
4209 /* No processing required */
4212 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4217 if (dd
->ncg_tot
> comm
->nalloc_int
)
4219 comm
->nalloc_int
= over_alloc_dd(dd
->ncg_tot
);
4220 srenew(comm
->buf_int
,comm
->nalloc_int
);
4222 move
= comm
->buf_int
;
4224 /* Clear the count */
4225 for(c
=0; c
<dd
->ndim
*2; c
++)
4231 npbcdim
= dd
->npbcdim
;
4233 for(d
=0; (d
<DIM
); d
++)
4235 limitd
[d
] = dd
->comm
->cellsize_min
[d
];
4236 if (d
>= npbcdim
&& dd
->ci
[d
] == 0)
4238 cell_x0
[d
] = -GMX_FLOAT_MAX
;
4242 cell_x0
[d
] = comm
->cell_x0
[d
];
4244 if (d
>= npbcdim
&& dd
->ci
[d
] == dd
->nc
[d
] - 1)
4246 cell_x1
[d
] = GMX_FLOAT_MAX
;
4250 cell_x1
[d
] = comm
->cell_x1
[d
];
4254 limit0
[d
] = comm
->old_cell_x0
[d
] - limitd
[d
];
4255 limit1
[d
] = comm
->old_cell_x1
[d
] + limitd
[d
];
4259 /* We check after communication if a charge group moved
4260 * more than one cell. Set the pre-comm check limit to float_max.
4262 limit0
[d
] = -GMX_FLOAT_MAX
;
4263 limit1
[d
] = GMX_FLOAT_MAX
;
4267 make_tric_corr_matrix(npbcdim
,state
->box
,tcm
);
4269 cgindex
= dd
->cgindex
;
4271 /* Compute the center of geometry for all home charge groups
4272 * and put them in the box and determine where they should go.
4274 for(cg
=0; cg
<dd
->ncg_home
; cg
++)
4281 copy_rvec(state
->x
[k0
],cm_new
);
4288 for(k
=k0
; (k
<k1
); k
++)
4290 rvec_inc(cm_new
,state
->x
[k
]);
4292 for(d
=0; (d
<DIM
); d
++)
4294 cm_new
[d
] = inv_ncg
*cm_new
[d
];
4299 /* Do pbc and check DD cell boundary crossings */
4300 for(d
=DIM
-1; d
>=0; d
--)
4304 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
4305 /* Determine the location of this cg in lattice coordinates */
4309 for(d2
=d
+1; d2
<DIM
; d2
++)
4311 pos_d
+= cm_new
[d2
]*tcm
[d2
][d
];
4314 /* Put the charge group in the triclinic unit-cell */
4315 if (pos_d
>= cell_x1
[d
])
4317 if (pos_d
>= limit1
[d
])
4319 cg_move_error(fplog
,dd
,step
,cg
,d
,1,TRUE
,limitd
[d
],
4320 cg_cm
[cg
],cm_new
,pos_d
);
4323 if (dd
->ci
[d
] == dd
->nc
[d
] - 1)
4325 rvec_dec(cm_new
,state
->box
[d
]);
4328 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4329 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4331 for(k
=k0
; (k
<k1
); k
++)
4333 rvec_dec(state
->x
[k
],state
->box
[d
]);
4336 rotate_state_atom(state
,k
);
4341 else if (pos_d
< cell_x0
[d
])
4343 if (pos_d
< limit0
[d
])
4345 cg_move_error(fplog
,dd
,step
,cg
,d
,-1,TRUE
,limitd
[d
],
4346 cg_cm
[cg
],cm_new
,pos_d
);
4351 rvec_inc(cm_new
,state
->box
[d
]);
4354 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4355 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4357 for(k
=k0
; (k
<k1
); k
++)
4359 rvec_inc(state
->x
[k
],state
->box
[d
]);
4362 rotate_state_atom(state
,k
);
4368 else if (d
< npbcdim
)
4370 /* Put the charge group in the rectangular unit-cell */
4371 while (cm_new
[d
] >= state
->box
[d
][d
])
4373 rvec_dec(cm_new
,state
->box
[d
]);
4374 for(k
=k0
; (k
<k1
); k
++)
4376 rvec_dec(state
->x
[k
],state
->box
[d
]);
4379 while (cm_new
[d
] < 0)
4381 rvec_inc(cm_new
,state
->box
[d
]);
4382 for(k
=k0
; (k
<k1
); k
++)
4384 rvec_inc(state
->x
[k
],state
->box
[d
]);
4390 copy_rvec(cm_new
,cg_cm
[cg
]);
4392 /* Determine where this cg should go */
4395 for(d
=0; d
<dd
->ndim
; d
++)
4400 flag
|= DD_FLAG_FW(d
);
4406 else if (dev
[dim
] == -1)
4408 flag
|= DD_FLAG_BW(d
);
4410 if (dd
->nc
[dim
] > 2)
4424 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4426 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4427 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4429 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
] = dd
->index_gl
[cg
];
4430 /* We store the cg size in the lower 16 bits
4431 * and the place where the charge group should go
4432 * in the next 6 bits. This saves some communication volume.
4434 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
+1] = nrcg
| flag
;
4440 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
4441 inc_nrnb(nrnb
,eNR_RESETX
,dd
->ncg_home
);
4457 /* Make sure the communication buffers are large enough */
4458 for(mc
=0; mc
<dd
->ndim
*2; mc
++)
4460 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4461 if (nvr
> comm
->cgcm_state_nalloc
[mc
])
4463 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
);
4464 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4468 /* Recalculating cg_cm might be cheaper than communicating,
4469 * but that could give rise to rounding issues.
4472 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4473 nvec
,cg_cm
,comm
,bCompact
);
4477 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4478 nvec
,vec
++,state
->x
,comm
,bCompact
);
4481 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4482 nvec
,vec
++,state
->v
,comm
,bCompact
);
4486 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4487 nvec
,vec
++,state
->sd_X
,comm
,bCompact
);
4491 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4492 nvec
,vec
++,state
->cg_p
,comm
,bCompact
);
4497 compact_ind(dd
->ncg_home
,move
,
4498 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4499 dd
->ga2la
,comm
->bLocalCG
,
4504 clear_and_mark_ind(dd
->ncg_home
,move
,
4505 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4506 dd
->ga2la
,comm
->bLocalCG
,
4507 fr
->ns
.grid
->cell_index
);
4510 cginfo_mb
= fr
->cginfo_mb
;
4512 ncg_stay_home
= home_pos_cg
;
4513 for(d
=0; d
<dd
->ndim
; d
++)
4519 for(dir
=0; dir
<(dd
->nc
[dim
]==2 ? 1 : 2); dir
++)
4522 /* Communicate the cg and atom counts */
4527 fprintf(debug
,"Sending ddim %d dir %d: ncg %d nat %d\n",
4528 d
,dir
,sbuf
[0],sbuf
[1]);
4530 dd_sendrecv_int(dd
, d
, dir
, sbuf
, 2, rbuf
, 2);
4532 if ((ncg_recv
+rbuf
[0])*DD_CGIBS
> comm
->nalloc_int
)
4534 comm
->nalloc_int
= over_alloc_dd((ncg_recv
+rbuf
[0])*DD_CGIBS
);
4535 srenew(comm
->buf_int
,comm
->nalloc_int
);
4538 /* Communicate the charge group indices, sizes and flags */
4539 dd_sendrecv_int(dd
, d
, dir
,
4540 comm
->cggl_flag
[cdd
], sbuf
[0]*DD_CGIBS
,
4541 comm
->buf_int
+ncg_recv
*DD_CGIBS
, rbuf
[0]*DD_CGIBS
);
4543 nvs
= ncg
[cdd
] + nat
[cdd
]*nvec
;
4544 i
= rbuf
[0] + rbuf
[1] *nvec
;
4545 vec_rvec_check_alloc(&comm
->vbuf
,nvr
+i
);
4547 /* Communicate cgcm and state */
4548 dd_sendrecv_rvec(dd
, d
, dir
,
4549 comm
->cgcm_state
[cdd
], nvs
,
4550 comm
->vbuf
.v
+nvr
, i
);
4551 ncg_recv
+= rbuf
[0];
4552 nat_recv
+= rbuf
[1];
4556 /* Process the received charge groups */
4558 for(cg
=0; cg
<ncg_recv
; cg
++)
4560 flag
= comm
->buf_int
[cg
*DD_CGIBS
+1];
4562 if (dim
>= npbcdim
&& dd
->nc
[dim
] > 2)
4564 /* No pbc in this dim and more than one domain boundary.
4565 * We to a separate check if a charge did not move too far.
4567 if (((flag
& DD_FLAG_FW(d
)) &&
4568 comm
->vbuf
.v
[buf_pos
][d
] > cell_x1
[dim
]) ||
4569 ((flag
& DD_FLAG_BW(d
)) &&
4570 comm
->vbuf
.v
[buf_pos
][d
] < cell_x0
[dim
]))
4572 cg_move_error(fplog
,dd
,step
,cg
,d
,
4573 (flag
& DD_FLAG_FW(d
)) ? 1 : 0,
4575 comm
->vbuf
.v
[buf_pos
],
4576 comm
->vbuf
.v
[buf_pos
],
4577 comm
->vbuf
.v
[buf_pos
][d
]);
4584 /* Check which direction this cg should go */
4585 for(d2
=d
+1; (d2
<dd
->ndim
&& mc
==-1); d2
++)
4589 /* The cell boundaries for dimension d2 are not equal
4590 * for each cell row of the lower dimension(s),
4591 * therefore we might need to redetermine where
4592 * this cg should go.
4595 /* If this cg crosses the box boundary in dimension d2
4596 * we can use the communicated flag, so we do not
4597 * have to worry about pbc.
4599 if (!((dd
->ci
[dim2
] == dd
->nc
[dim2
]-1 &&
4600 (flag
& DD_FLAG_FW(d2
))) ||
4601 (dd
->ci
[dim2
] == 0 &&
4602 (flag
& DD_FLAG_BW(d2
)))))
4604 /* Clear the two flags for this dimension */
4605 flag
&= ~(DD_FLAG_FW(d2
) | DD_FLAG_BW(d2
));
4606 /* Determine the location of this cg
4607 * in lattice coordinates
4609 pos_d
= comm
->vbuf
.v
[buf_pos
][dim2
];
4612 for(d3
=dim2
+1; d3
<DIM
; d3
++)
4615 comm
->vbuf
.v
[buf_pos
][d3
]*tcm
[d3
][dim2
];
4618 /* Check of we are not at the box edge.
4619 * pbc is only handled in the first step above,
4620 * but this check could move over pbc while
4621 * the first step did not due to different rounding.
4623 if (pos_d
>= cell_x1
[dim2
] &&
4624 dd
->ci
[dim2
] != dd
->nc
[dim2
]-1)
4626 flag
|= DD_FLAG_FW(d2
);
4628 else if (pos_d
< cell_x0
[dim2
] &&
4631 flag
|= DD_FLAG_BW(d2
);
4633 comm
->buf_int
[cg
*DD_CGIBS
+1] = flag
;
4636 /* Set to which neighboring cell this cg should go */
4637 if (flag
& DD_FLAG_FW(d2
))
4641 else if (flag
& DD_FLAG_BW(d2
))
4643 if (dd
->nc
[dd
->dim
[d2
]] > 2)
4655 nrcg
= flag
& DD_FLAG_NRCG
;
4658 if (home_pos_cg
+1 > dd
->cg_nalloc
)
4660 dd
->cg_nalloc
= over_alloc_dd(home_pos_cg
+1);
4661 srenew(dd
->index_gl
,dd
->cg_nalloc
);
4662 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
4664 /* Set the global charge group index and size */
4665 dd
->index_gl
[home_pos_cg
] = comm
->buf_int
[cg
*DD_CGIBS
];
4666 dd
->cgindex
[home_pos_cg
+1] = dd
->cgindex
[home_pos_cg
] + nrcg
;
4667 /* Copy the state from the buffer */
4668 if (home_pos_cg
>= fr
->cg_nalloc
)
4670 dd_realloc_fr_cg(fr
,home_pos_cg
+1);
4673 copy_rvec(comm
->vbuf
.v
[buf_pos
++],cg_cm
[home_pos_cg
]);
4674 /* Set the cginfo */
4675 fr
->cginfo
[home_pos_cg
] = ddcginfo(cginfo_mb
,
4676 dd
->index_gl
[home_pos_cg
]);
4679 comm
->bLocalCG
[dd
->index_gl
[home_pos_cg
]] = TRUE
;
4682 if (home_pos_at
+nrcg
> state
->nalloc
)
4684 dd_realloc_state(state
,f
,home_pos_at
+nrcg
);
4686 for(i
=0; i
<nrcg
; i
++)
4688 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4689 state
->x
[home_pos_at
+i
]);
4693 for(i
=0; i
<nrcg
; i
++)
4695 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4696 state
->v
[home_pos_at
+i
]);
4701 for(i
=0; i
<nrcg
; i
++)
4703 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4704 state
->sd_X
[home_pos_at
+i
]);
4709 for(i
=0; i
<nrcg
; i
++)
4711 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4712 state
->cg_p
[home_pos_at
+i
]);
4716 home_pos_at
+= nrcg
;
4720 /* Reallocate the buffers if necessary */
4721 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4723 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4724 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4726 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4727 if (nvr
+ 1 + nrcg
*nvec
> comm
->cgcm_state_nalloc
[mc
])
4729 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
+ 1 + nrcg
*nvec
);
4730 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4732 /* Copy from the receive to the send buffers */
4733 memcpy(comm
->cggl_flag
[mc
] + ncg
[mc
]*DD_CGIBS
,
4734 comm
->buf_int
+ cg
*DD_CGIBS
,
4735 DD_CGIBS
*sizeof(int));
4736 memcpy(comm
->cgcm_state
[mc
][nvr
],
4737 comm
->vbuf
.v
[buf_pos
],
4738 (1+nrcg
*nvec
)*sizeof(rvec
));
4739 buf_pos
+= 1 + nrcg
*nvec
;
4746 /* With sorting (!bCompact) the indices are now only partially up to date
4747 * and ncg_home and nat_home are not the real count, since there are
4748 * "holes" in the arrays for the charge groups that moved to neighbors.
4750 dd
->ncg_home
= home_pos_cg
;
4751 dd
->nat_home
= home_pos_at
;
4755 fprintf(debug
,"Finished repartitioning\n");
4758 return ncg_stay_home
;
4761 void dd_cycles_add(gmx_domdec_t
*dd
,float cycles
,int ddCycl
)
4763 dd
->comm
->cycl
[ddCycl
] += cycles
;
4764 dd
->comm
->cycl_n
[ddCycl
]++;
4765 if (cycles
> dd
->comm
->cycl_max
[ddCycl
])
4767 dd
->comm
->cycl_max
[ddCycl
] = cycles
;
4771 static double force_flop_count(t_nrnb
*nrnb
)
4778 for(i
=eNR_NBKERNEL010
; i
<eNR_NBKERNEL_FREE_ENERGY
; i
++)
4780 /* To get closer to the real timings, we half the count
4781 * for the normal loops and again half it for water loops.
4784 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4786 sum
+= nrnb
->n
[i
]*0.25*cost_nrnb(i
);
4790 sum
+= nrnb
->n
[i
]*0.50*cost_nrnb(i
);
4793 for(i
=eNR_NBKERNEL_FREE_ENERGY
; i
<=eNR_NB14
; i
++)
4796 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4797 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4799 for(i
=eNR_BONDS
; i
<=eNR_WALLS
; i
++)
4801 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4807 void dd_force_flop_start(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4809 if (dd
->comm
->eFlop
)
4811 dd
->comm
->flop
-= force_flop_count(nrnb
);
4814 void dd_force_flop_stop(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4816 if (dd
->comm
->eFlop
)
4818 dd
->comm
->flop
+= force_flop_count(nrnb
);
4823 static void clear_dd_cycle_counts(gmx_domdec_t
*dd
)
4827 for(i
=0; i
<ddCyclNr
; i
++)
4829 dd
->comm
->cycl
[i
] = 0;
4830 dd
->comm
->cycl_n
[i
] = 0;
4831 dd
->comm
->cycl_max
[i
] = 0;
4834 dd
->comm
->flop_n
= 0;
4837 static void get_load_distribution(gmx_domdec_t
*dd
,gmx_wallcycle_t wcycle
)
4839 gmx_domdec_comm_t
*comm
;
4840 gmx_domdec_load_t
*load
;
4841 gmx_domdec_root_t
*root
=NULL
;
4842 int d
,dim
,cid
,i
,pos
;
4843 float cell_frac
=0,sbuf
[DD_NLOAD_MAX
];
4848 fprintf(debug
,"get_load_distribution start\n");
4851 wallcycle_start(wcycle
,ewcDDCOMMLOAD
);
4855 bSepPME
= (dd
->pme_nodeid
>= 0);
4857 for(d
=dd
->ndim
-1; d
>=0; d
--)
4860 /* Check if we participate in the communication in this dimension */
4861 if (d
== dd
->ndim
-1 ||
4862 (dd
->ci
[dd
->dim
[d
+1]]==0 && dd
->ci
[dd
->dim
[dd
->ndim
-1]]==0))
4864 load
= &comm
->load
[d
];
4867 cell_frac
= comm
->cell_f1
[d
] - comm
->cell_f0
[d
];
4870 if (d
== dd
->ndim
-1)
4872 sbuf
[pos
++] = dd_force_load(comm
);
4873 sbuf
[pos
++] = sbuf
[0];
4876 sbuf
[pos
++] = sbuf
[0];
4877 sbuf
[pos
++] = cell_frac
;
4880 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4881 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4886 sbuf
[pos
++] = comm
->cycl
[ddCyclPPduringPME
];
4887 sbuf
[pos
++] = comm
->cycl
[ddCyclPME
];
4892 sbuf
[pos
++] = comm
->load
[d
+1].sum
;
4893 sbuf
[pos
++] = comm
->load
[d
+1].max
;
4896 sbuf
[pos
++] = comm
->load
[d
+1].sum_m
;
4897 sbuf
[pos
++] = comm
->load
[d
+1].cvol_min
*cell_frac
;
4898 sbuf
[pos
++] = comm
->load
[d
+1].flags
;
4901 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4902 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4907 sbuf
[pos
++] = comm
->load
[d
+1].mdf
;
4908 sbuf
[pos
++] = comm
->load
[d
+1].pme
;
4912 /* Communicate a row in DD direction d.
4913 * The communicators are setup such that the root always has rank 0.
4916 MPI_Gather(sbuf
,load
->nload
*sizeof(float),MPI_BYTE
,
4917 load
->load
,load
->nload
*sizeof(float),MPI_BYTE
,
4918 0,comm
->mpi_comm_load
[d
]);
4920 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
4922 /* We are the root, process this row */
4923 if (comm
->bDynLoadBal
)
4925 root
= comm
->root
[d
];
4935 for(i
=0; i
<dd
->nc
[dim
]; i
++)
4937 load
->sum
+= load
->load
[pos
++];
4938 load
->max
= max(load
->max
,load
->load
[pos
]);
4944 /* This direction could not be load balanced properly,
4945 * therefore we need to use the maximum iso the average load.
4947 load
->sum_m
= max(load
->sum_m
,load
->load
[pos
]);
4951 load
->sum_m
+= load
->load
[pos
];
4954 load
->cvol_min
= min(load
->cvol_min
,load
->load
[pos
]);
4958 load
->flags
= (int)(load
->load
[pos
++] + 0.5);
4962 root
->cell_f_max0
[i
] = load
->load
[pos
++];
4963 root
->cell_f_min1
[i
] = load
->load
[pos
++];
4968 load
->mdf
= max(load
->mdf
,load
->load
[pos
]);
4970 load
->pme
= max(load
->pme
,load
->load
[pos
]);
4974 if (comm
->bDynLoadBal
&& root
->bLimited
)
4976 load
->sum_m
*= dd
->nc
[dim
];
4977 load
->flags
|= (1<<d
);
4985 comm
->nload
+= dd_load_count(comm
);
4986 comm
->load_step
+= comm
->cycl
[ddCyclStep
];
4987 comm
->load_sum
+= comm
->load
[0].sum
;
4988 comm
->load_max
+= comm
->load
[0].max
;
4989 if (comm
->bDynLoadBal
)
4991 for(d
=0; d
<dd
->ndim
; d
++)
4993 if (comm
->load
[0].flags
& (1<<d
))
4995 comm
->load_lim
[d
]++;
5001 comm
->load_mdf
+= comm
->load
[0].mdf
;
5002 comm
->load_pme
+= comm
->load
[0].pme
;
5006 wallcycle_stop(wcycle
,ewcDDCOMMLOAD
);
5010 fprintf(debug
,"get_load_distribution finished\n");
5014 static float dd_force_imb_perf_loss(gmx_domdec_t
*dd
)
5016 /* Return the relative performance loss on the total run time
5017 * due to the force calculation load imbalance.
5019 if (dd
->comm
->nload
> 0)
5022 (dd
->comm
->load_max
*dd
->nnodes
- dd
->comm
->load_sum
)/
5023 (dd
->comm
->load_step
*dd
->nnodes
);
5031 static void print_dd_load_av(FILE *fplog
,gmx_domdec_t
*dd
)
5034 int npp
,npme
,nnodes
,d
,limp
;
5035 float imbal
,pme_f_ratio
,lossf
,lossp
=0;
5037 gmx_domdec_comm_t
*comm
;
5040 if (DDMASTER(dd
) && comm
->nload
> 0)
5043 npme
= (dd
->pme_nodeid
>= 0) ? comm
->npmenodes
: 0;
5044 nnodes
= npp
+ npme
;
5045 imbal
= comm
->load_max
*npp
/comm
->load_sum
- 1;
5046 lossf
= dd_force_imb_perf_loss(dd
);
5047 sprintf(buf
," Average load imbalance: %.1f %%\n",imbal
*100);
5048 fprintf(fplog
,"%s",buf
);
5049 fprintf(stderr
,"\n");
5050 fprintf(stderr
,"%s",buf
);
5051 sprintf(buf
," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf
*100);
5052 fprintf(fplog
,"%s",buf
);
5053 fprintf(stderr
,"%s",buf
);
5055 if (comm
->bDynLoadBal
)
5057 sprintf(buf
," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5058 for(d
=0; d
<dd
->ndim
; d
++)
5060 limp
= (200*comm
->load_lim
[d
]+1)/(2*comm
->nload
);
5061 sprintf(buf
+strlen(buf
)," %c %d %%",dim2char(dd
->dim
[d
]),limp
);
5067 sprintf(buf
+strlen(buf
),"\n");
5068 fprintf(fplog
,"%s",buf
);
5069 fprintf(stderr
,"%s",buf
);
5073 pme_f_ratio
= comm
->load_pme
/comm
->load_mdf
;
5074 lossp
= (comm
->load_pme
-comm
->load_mdf
)/comm
->load_step
;
5077 lossp
*= (float)npme
/(float)nnodes
;
5081 lossp
*= (float)npp
/(float)nnodes
;
5083 sprintf(buf
," Average PME mesh/force load: %5.3f\n",pme_f_ratio
);
5084 fprintf(fplog
,"%s",buf
);
5085 fprintf(stderr
,"%s",buf
);
5086 sprintf(buf
," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp
)*100);
5087 fprintf(fplog
,"%s",buf
);
5088 fprintf(stderr
,"%s",buf
);
5090 fprintf(fplog
,"\n");
5091 fprintf(stderr
,"\n");
5093 if (lossf
>= DD_PERF_LOSS
)
5096 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5097 " in the domain decomposition.\n",lossf
*100);
5098 if (!comm
->bDynLoadBal
)
5100 sprintf(buf
+strlen(buf
)," You might want to use dynamic load balancing (option -dlb.)\n");
5104 sprintf(buf
+strlen(buf
)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5106 fprintf(fplog
,"%s\n",buf
);
5107 fprintf(stderr
,"%s\n",buf
);
5109 if (npme
> 0 && fabs(lossp
) >= DD_PERF_LOSS
)
5112 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5113 " had %s work to do than the PP nodes.\n"
5114 " You might want to %s the number of PME nodes\n"
5115 " or %s the cut-off and the grid spacing.\n",
5117 (lossp
< 0) ? "less" : "more",
5118 (lossp
< 0) ? "decrease" : "increase",
5119 (lossp
< 0) ? "decrease" : "increase");
5120 fprintf(fplog
,"%s\n",buf
);
5121 fprintf(stderr
,"%s\n",buf
);
5126 static float dd_vol_min(gmx_domdec_t
*dd
)
5128 return dd
->comm
->load
[0].cvol_min
*dd
->nnodes
;
5131 static bool dd_load_flags(gmx_domdec_t
*dd
)
5133 return dd
->comm
->load
[0].flags
;
5136 static float dd_f_imbal(gmx_domdec_t
*dd
)
5138 return dd
->comm
->load
[0].max
*dd
->nnodes
/dd
->comm
->load
[0].sum
- 1;
5141 static float dd_pme_f_ratio(gmx_domdec_t
*dd
)
5143 return dd
->comm
->load
[0].pme
/dd
->comm
->load
[0].mdf
;
5146 static void dd_print_load(FILE *fplog
,gmx_domdec_t
*dd
,gmx_large_int_t step
)
5151 flags
= dd_load_flags(dd
);
5155 "DD load balancing is limited by minimum cell size in dimension");
5156 for(d
=0; d
<dd
->ndim
; d
++)
5160 fprintf(fplog
," %c",dim2char(dd
->dim
[d
]));
5163 fprintf(fplog
,"\n");
5165 fprintf(fplog
,"DD step %s",gmx_step_str(step
,buf
));
5166 if (dd
->comm
->bDynLoadBal
)
5168 fprintf(fplog
," vol min/aver %5.3f%c",
5169 dd_vol_min(dd
),flags
? '!' : ' ');
5171 fprintf(fplog
," load imb.: force %4.1f%%",dd_f_imbal(dd
)*100);
5172 if (dd
->comm
->cycl_n
[ddCyclPME
])
5174 fprintf(fplog
," pme mesh/force %5.3f",dd_pme_f_ratio(dd
));
5176 fprintf(fplog
,"\n\n");
5179 static void dd_print_load_verbose(gmx_domdec_t
*dd
)
5181 if (dd
->comm
->bDynLoadBal
)
5183 fprintf(stderr
,"vol %4.2f%c ",
5184 dd_vol_min(dd
),dd_load_flags(dd
) ? '!' : ' ');
5186 fprintf(stderr
,"imb F %2d%% ",(int)(dd_f_imbal(dd
)*100+0.5));
5187 if (dd
->comm
->cycl_n
[ddCyclPME
])
5189 fprintf(stderr
,"pme/F %4.2f ",dd_pme_f_ratio(dd
));
5194 static void make_load_communicator(gmx_domdec_t
*dd
,MPI_Group g_all
,
5195 int dim_ind
,ivec loc
)
5201 gmx_domdec_root_t
*root
;
5203 dim
= dd
->dim
[dim_ind
];
5204 copy_ivec(loc
,loc_c
);
5205 snew(rank
,dd
->nc
[dim
]);
5206 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5209 rank
[i
] = dd_index(dd
->nc
,loc_c
);
5211 /* Here we create a new group, that does not necessarily
5212 * include our process. But MPI_Comm_create needs to be
5213 * called by all the processes in the original communicator.
5214 * Calling MPI_Group_free afterwards gives errors, so I assume
5215 * also the group is needed by all processes. (B. Hess)
5217 MPI_Group_incl(g_all
,dd
->nc
[dim
],rank
,&g_row
);
5218 MPI_Comm_create(dd
->mpi_comm_all
,g_row
,&c_row
);
5219 if (c_row
!= MPI_COMM_NULL
)
5221 /* This process is part of the group */
5222 dd
->comm
->mpi_comm_load
[dim_ind
] = c_row
;
5223 if (dd
->comm
->eDLB
!= edlbNO
)
5225 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5227 /* This is the root process of this row */
5228 snew(dd
->comm
->root
[dim_ind
],1);
5229 root
= dd
->comm
->root
[dim_ind
];
5230 snew(root
->cell_f
,DD_CELL_F_SIZE(dd
,dim_ind
));
5231 snew(root
->old_cell_f
,dd
->nc
[dim
]+1);
5232 snew(root
->bCellMin
,dd
->nc
[dim
]);
5235 snew(root
->cell_f_max0
,dd
->nc
[dim
]);
5236 snew(root
->cell_f_min1
,dd
->nc
[dim
]);
5237 snew(root
->bound_min
,dd
->nc
[dim
]);
5238 snew(root
->bound_max
,dd
->nc
[dim
]);
5240 snew(root
->buf_ncd
,dd
->nc
[dim
]);
5244 /* This is not a root process, we only need to receive cell_f */
5245 snew(dd
->comm
->cell_f_row
,DD_CELL_F_SIZE(dd
,dim_ind
));
5248 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5250 snew(dd
->comm
->load
[dim_ind
].load
,dd
->nc
[dim
]*DD_NLOAD_MAX
);
5257 static void make_load_communicators(gmx_domdec_t
*dd
)
5265 fprintf(debug
,"Making load communicators\n");
5267 MPI_Comm_group(dd
->mpi_comm_all
,&g_all
);
5269 snew(dd
->comm
->load
,dd
->ndim
);
5270 snew(dd
->comm
->mpi_comm_load
,dd
->ndim
);
5273 make_load_communicator(dd
,g_all
,0,loc
);
5276 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5278 make_load_communicator(dd
,g_all
,1,loc
);
5283 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5286 for(j
=0; j
<dd
->nc
[dim1
]; j
++) {
5288 make_load_communicator(dd
,g_all
,2,loc
);
5293 MPI_Group_free(&g_all
);
5296 fprintf(debug
,"Finished making load communicators\n");
5300 void setup_dd_grid(FILE *fplog
,gmx_domdec_t
*dd
)
5306 ivec dd_zp
[DD_MAXIZONE
];
5307 gmx_domdec_zones_t
*zones
;
5308 gmx_domdec_ns_ranges_t
*izone
;
5310 for(d
=0; d
<dd
->ndim
; d
++)
5313 copy_ivec(dd
->ci
,tmp
);
5314 tmp
[dim
] = (tmp
[dim
] + 1) % dd
->nc
[dim
];
5315 dd
->neighbor
[d
][0] = ddcoord2ddnodeid(dd
,tmp
);
5316 copy_ivec(dd
->ci
,tmp
);
5317 tmp
[dim
] = (tmp
[dim
] - 1 + dd
->nc
[dim
]) % dd
->nc
[dim
];
5318 dd
->neighbor
[d
][1] = ddcoord2ddnodeid(dd
,tmp
);
5321 fprintf(debug
,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5324 dd
->neighbor
[d
][1]);
5330 fprintf(stderr
,"Making %dD domain decomposition %d x %d x %d\n",
5331 dd
->ndim
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5335 fprintf(fplog
,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5337 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],
5338 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5345 for(i
=0; i
<nzonep
; i
++)
5347 copy_ivec(dd_zp3
[i
],dd_zp
[i
]);
5353 for(i
=0; i
<nzonep
; i
++)
5355 copy_ivec(dd_zp2
[i
],dd_zp
[i
]);
5361 for(i
=0; i
<nzonep
; i
++)
5363 copy_ivec(dd_zp1
[i
],dd_zp
[i
]);
5367 gmx_fatal(FARGS
,"Can only do 1, 2 or 3D domain decomposition");
5372 zones
= &dd
->comm
->zones
;
5374 for(i
=0; i
<nzone
; i
++)
5377 clear_ivec(zones
->shift
[i
]);
5378 for(d
=0; d
<dd
->ndim
; d
++)
5380 zones
->shift
[i
][dd
->dim
[d
]] = dd_zo
[i
][m
++];
5385 for(i
=0; i
<nzone
; i
++)
5387 for(d
=0; d
<DIM
; d
++)
5389 s
[d
] = dd
->ci
[d
] - zones
->shift
[i
][d
];
5394 else if (s
[d
] >= dd
->nc
[d
])
5400 zones
->nizone
= nzonep
;
5401 for(i
=0; i
<zones
->nizone
; i
++)
5403 if (dd_zp
[i
][0] != i
)
5405 gmx_fatal(FARGS
,"Internal inconsistency in the dd grid setup");
5407 izone
= &zones
->izone
[i
];
5408 izone
->j0
= dd_zp
[i
][1];
5409 izone
->j1
= dd_zp
[i
][2];
5410 for(dim
=0; dim
<DIM
; dim
++)
5412 if (dd
->nc
[dim
] == 1)
5414 /* All shifts should be allowed */
5415 izone
->shift0
[dim
] = -1;
5416 izone
->shift1
[dim
] = 1;
5421 izone->shift0[d] = 0;
5422 izone->shift1[d] = 0;
5423 for(j=izone->j0; j<izone->j1; j++) {
5424 if (dd->shift[j][d] > dd->shift[i][d])
5425 izone->shift0[d] = -1;
5426 if (dd->shift[j][d] < dd->shift[i][d])
5427 izone->shift1[d] = 1;
5433 /* Assume the shift are not more than 1 cell */
5434 izone
->shift0
[dim
] = 1;
5435 izone
->shift1
[dim
] = -1;
5436 for(j
=izone
->j0
; j
<izone
->j1
; j
++)
5438 shift_diff
= zones
->shift
[j
][dim
] - zones
->shift
[i
][dim
];
5439 if (shift_diff
< izone
->shift0
[dim
])
5441 izone
->shift0
[dim
] = shift_diff
;
5443 if (shift_diff
> izone
->shift1
[dim
])
5445 izone
->shift1
[dim
] = shift_diff
;
5452 if (dd
->comm
->eDLB
!= edlbNO
)
5454 snew(dd
->comm
->root
,dd
->ndim
);
5457 if (dd
->comm
->bRecordLoad
)
5459 make_load_communicators(dd
);
5463 static void make_pp_communicator(FILE *fplog
,t_commrec
*cr
,int reorder
)
5466 gmx_domdec_comm_t
*comm
;
5477 if (comm
->bCartesianPP
)
5479 /* Set up cartesian communication for the particle-particle part */
5482 fprintf(fplog
,"Will use a Cartesian communicator: %d x %d x %d\n",
5483 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5486 for(i
=0; i
<DIM
; i
++)
5490 MPI_Cart_create(cr
->mpi_comm_mygroup
,DIM
,dd
->nc
,periods
,reorder
,
5492 /* We overwrite the old communicator with the new cartesian one */
5493 cr
->mpi_comm_mygroup
= comm_cart
;
5496 dd
->mpi_comm_all
= cr
->mpi_comm_mygroup
;
5497 MPI_Comm_rank(dd
->mpi_comm_all
,&dd
->rank
);
5499 if (comm
->bCartesianPP_PME
)
5501 /* Since we want to use the original cartesian setup for sim,
5502 * and not the one after split, we need to make an index.
5504 snew(comm
->ddindex2ddnodeid
,dd
->nnodes
);
5505 comm
->ddindex2ddnodeid
[dd_index(dd
->nc
,dd
->ci
)] = dd
->rank
;
5506 gmx_sumi(dd
->nnodes
,comm
->ddindex2ddnodeid
,cr
);
5507 /* Get the rank of the DD master,
5508 * above we made sure that the master node is a PP node.
5518 MPI_Allreduce(&rank
,&dd
->masterrank
,1,MPI_INT
,MPI_SUM
,dd
->mpi_comm_all
);
5520 else if (comm
->bCartesianPP
)
5522 if (cr
->npmenodes
== 0)
5524 /* The PP communicator is also
5525 * the communicator for this simulation
5527 cr
->mpi_comm_mysim
= cr
->mpi_comm_mygroup
;
5529 cr
->nodeid
= dd
->rank
;
5531 MPI_Cart_coords(dd
->mpi_comm_all
,dd
->rank
,DIM
,dd
->ci
);
5533 /* We need to make an index to go from the coordinates
5534 * to the nodeid of this simulation.
5536 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5537 snew(buf
,dd
->nnodes
);
5538 if (cr
->duty
& DUTY_PP
)
5540 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5542 /* Communicate the ddindex to simulation nodeid index */
5543 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5544 cr
->mpi_comm_mysim
);
5547 /* Determine the master coordinates and rank.
5548 * The DD master should be the same node as the master of this sim.
5550 for(i
=0; i
<dd
->nnodes
; i
++)
5552 if (comm
->ddindex2simnodeid
[i
] == 0)
5554 ddindex2xyz(dd
->nc
,i
,dd
->master_ci
);
5555 MPI_Cart_rank(dd
->mpi_comm_all
,dd
->master_ci
,&dd
->masterrank
);
5560 fprintf(debug
,"The master rank is %d\n",dd
->masterrank
);
5565 /* No Cartesian communicators */
5566 /* We use the rank in dd->comm->all as DD index */
5567 ddindex2xyz(dd
->nc
,dd
->rank
,dd
->ci
);
5568 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5570 clear_ivec(dd
->master_ci
);
5577 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5578 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5583 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5584 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5588 static void receive_ddindex2simnodeid(t_commrec
*cr
)
5592 gmx_domdec_comm_t
*comm
;
5599 if (!comm
->bCartesianPP_PME
&& comm
->bCartesianPP
)
5601 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5602 snew(buf
,dd
->nnodes
);
5603 if (cr
->duty
& DUTY_PP
)
5605 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5608 /* Communicate the ddindex to simulation nodeid index */
5609 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5610 cr
->mpi_comm_mysim
);
5617 static gmx_domdec_master_t
*init_gmx_domdec_master_t(gmx_domdec_t
*dd
,
5620 gmx_domdec_master_t
*ma
;
5625 snew(ma
->ncg
,dd
->nnodes
);
5626 snew(ma
->index
,dd
->nnodes
+1);
5628 snew(ma
->nat
,dd
->nnodes
);
5629 snew(ma
->ibuf
,dd
->nnodes
*2);
5630 snew(ma
->cell_x
,DIM
);
5631 for(i
=0; i
<DIM
; i
++)
5633 snew(ma
->cell_x
[i
],dd
->nc
[i
]+1);
5636 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
5642 snew(ma
->vbuf
,natoms
);
5648 static void split_communicator(FILE *fplog
,t_commrec
*cr
,int dd_node_order
,
5652 gmx_domdec_comm_t
*comm
;
5663 if (comm
->bCartesianPP
)
5665 for(i
=1; i
<DIM
; i
++)
5667 bDiv
[i
] = ((cr
->npmenodes
*dd
->nc
[i
]) % (dd
->nnodes
) == 0);
5669 if (bDiv
[YY
] || bDiv
[ZZ
])
5671 comm
->bCartesianPP_PME
= TRUE
;
5672 /* If we have 2D PME decomposition, which is always in x+y,
5673 * we stack the PME only nodes in z.
5674 * Otherwise we choose the direction that provides the thinnest slab
5675 * of PME only nodes as this will have the least effect
5676 * on the PP communication.
5677 * But for the PME communication the opposite might be better.
5679 if (bDiv
[ZZ
] && (comm
->npmenodes_y
> 1 ||
5681 dd
->nc
[YY
] > dd
->nc
[ZZ
]))
5683 comm
->cartpmedim
= ZZ
;
5687 comm
->cartpmedim
= YY
;
5689 comm
->ntot
[comm
->cartpmedim
]
5690 += (cr
->npmenodes
*dd
->nc
[comm
->cartpmedim
])/dd
->nnodes
;
5694 fprintf(fplog
,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr
->npmenodes
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[XX
],dd
->nc
[ZZ
]);
5696 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5701 if (comm
->bCartesianPP_PME
)
5705 fprintf(fplog
,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm
->ntot
[XX
],comm
->ntot
[YY
],comm
->ntot
[ZZ
]);
5708 for(i
=0; i
<DIM
; i
++)
5712 MPI_Cart_create(cr
->mpi_comm_mysim
,DIM
,comm
->ntot
,periods
,reorder
,
5715 MPI_Comm_rank(comm_cart
,&rank
);
5716 if (MASTERNODE(cr
) && rank
!= 0)
5718 gmx_fatal(FARGS
,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5721 /* With this assigment we loose the link to the original communicator
5722 * which will usually be MPI_COMM_WORLD, unless have multisim.
5724 cr
->mpi_comm_mysim
= comm_cart
;
5725 cr
->sim_nodeid
= rank
;
5727 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,dd
->ci
);
5731 fprintf(fplog
,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5732 cr
->sim_nodeid
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5735 if (dd
->ci
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
5739 if (cr
->npmenodes
== 0 ||
5740 dd
->ci
[comm
->cartpmedim
] >= dd
->nc
[comm
->cartpmedim
])
5742 cr
->duty
= DUTY_PME
;
5745 /* Split the sim communicator into PP and PME only nodes */
5746 MPI_Comm_split(cr
->mpi_comm_mysim
,
5748 dd_index(comm
->ntot
,dd
->ci
),
5749 &cr
->mpi_comm_mygroup
);
5753 switch (dd_node_order
)
5758 fprintf(fplog
,"Order of the nodes: PP first, PME last\n");
5761 case ddnoINTERLEAVE
:
5762 /* Interleave the PP-only and PME-only nodes,
5763 * as on clusters with dual-core machines this will double
5764 * the communication bandwidth of the PME processes
5765 * and thus speed up the PP <-> PME and inter PME communication.
5769 fprintf(fplog
,"Interleaving PP and PME nodes\n");
5771 comm
->pmenodes
= dd_pmenodes(cr
);
5776 gmx_fatal(FARGS
,"Unknown dd_node_order=%d",dd_node_order
);
5779 if (dd_simnode2pmenode(cr
,cr
->sim_nodeid
) == -1)
5781 cr
->duty
= DUTY_PME
;
5788 /* Split the sim communicator into PP and PME only nodes */
5789 MPI_Comm_split(cr
->mpi_comm_mysim
,
5792 &cr
->mpi_comm_mygroup
);
5793 MPI_Comm_rank(cr
->mpi_comm_mygroup
,&cr
->nodeid
);
5799 fprintf(fplog
,"This is a %s only node\n\n",
5800 (cr
->duty
& DUTY_PP
) ? "particle-particle" : "PME-mesh");
5804 void make_dd_communicators(FILE *fplog
,t_commrec
*cr
,int dd_node_order
)
5807 gmx_domdec_comm_t
*comm
;
5813 copy_ivec(dd
->nc
,comm
->ntot
);
5815 comm
->bCartesianPP
= (dd_node_order
== ddnoCARTESIAN
);
5816 comm
->bCartesianPP_PME
= FALSE
;
5818 /* Reorder the nodes by default. This might change the MPI ranks.
5819 * Real reordering is only supported on very few architectures,
5820 * Blue Gene is one of them.
5822 CartReorder
= (getenv("GMX_NO_CART_REORDER") == NULL
);
5824 if (cr
->npmenodes
> 0)
5826 /* Split the communicator into a PP and PME part */
5827 split_communicator(fplog
,cr
,dd_node_order
,CartReorder
);
5828 if (comm
->bCartesianPP_PME
)
5830 /* We (possibly) reordered the nodes in split_communicator,
5831 * so it is no longer required in make_pp_communicator.
5833 CartReorder
= FALSE
;
5838 /* All nodes do PP and PME */
5840 /* We do not require separate communicators */
5841 cr
->mpi_comm_mygroup
= cr
->mpi_comm_mysim
;
5845 if (cr
->duty
& DUTY_PP
)
5847 /* Copy or make a new PP communicator */
5848 make_pp_communicator(fplog
,cr
,CartReorder
);
5852 receive_ddindex2simnodeid(cr
);
5855 if (!(cr
->duty
& DUTY_PME
))
5857 /* Set up the commnuication to our PME node */
5858 dd
->pme_nodeid
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
5859 dd
->pme_receive_vir_ener
= receive_vir_ener(cr
);
5862 fprintf(debug
,"My pme_nodeid %d receive ener %d\n",
5863 dd
->pme_nodeid
,dd
->pme_receive_vir_ener
);
5868 dd
->pme_nodeid
= -1;
5873 dd
->ma
= init_gmx_domdec_master_t(dd
,
5875 comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
]);
5879 static real
*get_slb_frac(FILE *fplog
,const char *dir
,int nc
,const char *size_string
)
5886 if (nc
> 1 && size_string
!= NULL
)
5890 fprintf(fplog
,"Using static load balancing for the %s direction\n",
5895 for (i
=0; i
<nc
; i
++)
5898 sscanf(size_string
,"%lf%n",&dbl
,&n
);
5901 gmx_fatal(FARGS
,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir
,size_string
);
5910 fprintf(fplog
,"Relative cell sizes:");
5912 for (i
=0; i
<nc
; i
++)
5917 fprintf(fplog
," %5.3f",slb_frac
[i
]);
5922 fprintf(fplog
,"\n");
5929 static int multi_body_bondeds_count(gmx_mtop_t
*mtop
)
5932 gmx_mtop_ilistloop_t iloop
;
5936 iloop
= gmx_mtop_ilistloop_init(mtop
);
5937 while (gmx_mtop_ilistloop_next(iloop
,&il
,&nmol
))
5939 for(ftype
=0; ftype
<F_NRE
; ftype
++)
5941 if ((interaction_function
[ftype
].flags
& IF_BOND
) &&
5944 n
+= nmol
*il
[ftype
].nr
/(1 + NRAL(ftype
));
5952 static int dd_nst_env(FILE *fplog
,const char *env_var
,int def
)
5958 val
= getenv(env_var
);
5961 if (sscanf(val
,"%d",&nst
) <= 0)
5967 fprintf(fplog
,"Found env.var. %s = %s, using value %d\n",
5975 static void dd_warning(t_commrec
*cr
,FILE *fplog
,const char *warn_string
)
5979 fprintf(stderr
,"\n%s\n",warn_string
);
5983 fprintf(fplog
,"\n%s\n",warn_string
);
5987 static void check_dd_restrictions(t_commrec
*cr
,gmx_domdec_t
*dd
,
5988 t_inputrec
*ir
,FILE *fplog
)
5990 if (ir
->ePBC
== epbcSCREW
&&
5991 (dd
->nc
[XX
] == 1 || dd
->nc
[YY
] > 1 || dd
->nc
[ZZ
] > 1))
5993 gmx_fatal(FARGS
,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names
[ir
->ePBC
]);
5996 if (ir
->ns_type
== ensSIMPLE
)
5998 gmx_fatal(FARGS
,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6001 if (ir
->nstlist
== 0)
6003 gmx_fatal(FARGS
,"Domain decomposition does not work with nstlist=0");
6006 if (ir
->comm_mode
== ecmANGULAR
&& ir
->ePBC
!= epbcNONE
)
6008 dd_warning(cr
,fplog
,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6012 static real
average_cellsize_min(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
6017 r
= ddbox
->box_size
[XX
];
6018 for(di
=0; di
<dd
->ndim
; di
++)
6021 /* Check using the initial average cell size */
6022 r
= min(r
,ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6028 static int check_dlb_support(FILE *fplog
,t_commrec
*cr
,
6029 const char *dlb_opt
,bool bRecordLoad
,
6030 unsigned long Flags
,t_inputrec
*ir
)
6038 case 'a': eDLB
= edlbAUTO
; break;
6039 case 'n': eDLB
= edlbNO
; break;
6040 case 'y': eDLB
= edlbYES
; break;
6041 default: gmx_incons("Unknown dlb_opt");
6044 if (Flags
& MD_RERUN
)
6049 if (!EI_DYNAMICS(ir
->eI
))
6051 if (eDLB
== edlbYES
)
6053 sprintf(buf
,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir
->eI
));
6054 dd_warning(cr
,fplog
,buf
);
6062 dd_warning(cr
,fplog
,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6067 if (Flags
& MD_REPRODUCIBLE
)
6074 dd_warning(cr
,fplog
,"NOTE: reproducability requested, will not use dynamic load balancing\n");
6078 dd_warning(cr
,fplog
,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
6081 gmx_fatal(FARGS
,"Death horror: undefined case (%d) for load balancing choice",eDLB
);
6089 static void set_dd_dim(FILE *fplog
,gmx_domdec_t
*dd
)
6094 if (getenv("GMX_DD_ORDER_ZYX") != NULL
)
6096 /* Decomposition order z,y,x */
6099 fprintf(fplog
,"Using domain decomposition order z, y, x\n");
6101 for(dim
=DIM
-1; dim
>=0; dim
--)
6103 if (dd
->nc
[dim
] > 1)
6105 dd
->dim
[dd
->ndim
++] = dim
;
6111 /* Decomposition order x,y,z */
6112 for(dim
=0; dim
<DIM
; dim
++)
6114 if (dd
->nc
[dim
] > 1)
6116 dd
->dim
[dd
->ndim
++] = dim
;
6122 static gmx_domdec_comm_t
*init_dd_comm()
6124 gmx_domdec_comm_t
*comm
;
6128 snew(comm
->cggl_flag
,DIM
*2);
6129 snew(comm
->cgcm_state
,DIM
*2);
6130 for(i
=0; i
<DIM
*2; i
++)
6132 comm
->cggl_flag_nalloc
[i
] = 0;
6133 comm
->cgcm_state_nalloc
[i
] = 0;
6136 comm
->nalloc_int
= 0;
6137 comm
->buf_int
= NULL
;
6139 vec_rvec_init(&comm
->vbuf
);
6141 comm
->n_load_have
= 0;
6142 comm
->n_load_collect
= 0;
6144 for(i
=0; i
<ddnatNR
-ddnatZONE
; i
++)
6146 comm
->sum_nat
[i
] = 0;
6150 comm
->load_step
= 0;
6153 clear_ivec(comm
->load_lim
);
6160 gmx_domdec_t
*init_domain_decomposition(FILE *fplog
,t_commrec
*cr
,
6161 unsigned long Flags
,
6163 real comm_distance_min
,real rconstr
,
6164 const char *dlb_opt
,real dlb_scale
,
6165 const char *sizex
,const char *sizey
,const char *sizez
,
6166 gmx_mtop_t
*mtop
,t_inputrec
*ir
,
6169 int *npme_x
,int *npme_y
)
6172 gmx_domdec_comm_t
*comm
;
6175 real r_2b
,r_mb
,r_bonded
=-1,r_bonded_limit
=-1,limit
,acs
;
6182 "\nInitializing Domain Decomposition on %d nodes\n",cr
->nnodes
);
6187 dd
->comm
= init_dd_comm();
6189 snew(comm
->cggl_flag
,DIM
*2);
6190 snew(comm
->cgcm_state
,DIM
*2);
6192 dd
->npbcdim
= ePBC2npbcdim(ir
->ePBC
);
6193 dd
->bScrewPBC
= (ir
->ePBC
== epbcSCREW
);
6195 dd
->bSendRecv2
= dd_nst_env(fplog
,"GMX_DD_SENDRECV2",0);
6196 comm
->eFlop
= dd_nst_env(fplog
,"GMX_DLB_FLOP",0);
6197 recload
= dd_nst_env(fplog
,"GMX_DD_LOAD",1);
6198 comm
->nstSortCG
= dd_nst_env(fplog
,"GMX_DD_SORT",1);
6199 comm
->nstDDDump
= dd_nst_env(fplog
,"GMX_DD_DUMP",0);
6200 comm
->nstDDDumpGrid
= dd_nst_env(fplog
,"GMX_DD_DUMP_GRID",0);
6201 comm
->DD_debug
= dd_nst_env(fplog
,"GMX_DD_DEBUG",0);
6203 dd
->pme_recv_f_alloc
= 0;
6204 dd
->pme_recv_f_buf
= NULL
;
6206 if (dd
->bSendRecv2
&& fplog
)
6208 fprintf(fplog
,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6214 fprintf(fplog
,"Will load balance based on FLOP count\n");
6216 if (comm
->eFlop
> 1)
6218 srand(1+cr
->nodeid
);
6220 comm
->bRecordLoad
= TRUE
;
6224 comm
->bRecordLoad
= (wallcycle_have_counter() && recload
> 0);
6228 comm
->eDLB
= check_dlb_support(fplog
,cr
,dlb_opt
,comm
->bRecordLoad
,Flags
,ir
);
6230 comm
->bDynLoadBal
= (comm
->eDLB
== edlbYES
);
6233 fprintf(fplog
,"Dynamic load balancing: %s\n",edlb_names
[comm
->eDLB
]);
6235 dd
->bGridJump
= comm
->bDynLoadBal
;
6237 if (comm
->nstSortCG
)
6241 if (comm
->nstSortCG
== 1)
6243 fprintf(fplog
,"Will sort the charge groups at every domain (re)decomposition\n");
6247 fprintf(fplog
,"Will sort the charge groups every %d steps\n",
6257 fprintf(fplog
,"Will not sort the charge groups\n");
6261 comm
->bInterCGBondeds
= (ncg_mtop(mtop
) > mtop
->mols
.nr
);
6262 if (comm
->bInterCGBondeds
)
6264 comm
->bInterCGMultiBody
= (multi_body_bondeds_count(mtop
) > 0);
6268 comm
->bInterCGMultiBody
= FALSE
;
6271 dd
->bInterCGcons
= inter_charge_group_constraints(mtop
);
6273 if (ir
->rlistlong
== 0)
6275 /* Set the cut-off to some very large value,
6276 * so we don't need if statements everywhere in the code.
6277 * We use sqrt, since the cut-off is squared in some places.
6279 comm
->cutoff
= GMX_CUTOFF_INF
;
6283 comm
->cutoff
= ir
->rlistlong
;
6285 comm
->cutoff_mbody
= 0;
6287 comm
->cellsize_limit
= 0;
6288 comm
->bBondComm
= FALSE
;
6290 if (comm
->bInterCGBondeds
)
6292 if (comm_distance_min
> 0)
6294 comm
->cutoff_mbody
= comm_distance_min
;
6295 if (Flags
& MD_DDBONDCOMM
)
6297 comm
->bBondComm
= (comm
->cutoff_mbody
> comm
->cutoff
);
6301 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6303 r_bonded_limit
= comm
->cutoff_mbody
;
6305 else if (ir
->bPeriodicMols
)
6307 /* Can not easily determine the required cut-off */
6308 dd_warning(cr
,fplog
,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6309 comm
->cutoff_mbody
= comm
->cutoff
/2;
6310 r_bonded_limit
= comm
->cutoff_mbody
;
6316 dd_bonded_cg_distance(fplog
,dd
,mtop
,ir
,x
,box
,
6317 Flags
& MD_DDBONDCHECK
,&r_2b
,&r_mb
);
6319 gmx_bcast(sizeof(r_2b
),&r_2b
,cr
);
6320 gmx_bcast(sizeof(r_mb
),&r_mb
,cr
);
6322 /* We use an initial margin of 10% for the minimum cell size,
6323 * except when we are just below the non-bonded cut-off.
6325 if (Flags
& MD_DDBONDCOMM
)
6327 if (max(r_2b
,r_mb
) > comm
->cutoff
)
6329 r_bonded
= max(r_2b
,r_mb
);
6330 r_bonded_limit
= 1.1*r_bonded
;
6331 comm
->bBondComm
= TRUE
;
6336 r_bonded_limit
= min(1.1*r_bonded
,comm
->cutoff
);
6338 /* We determine cutoff_mbody later */
6342 /* No special bonded communication,
6343 * simply increase the DD cut-off.
6345 r_bonded_limit
= 1.1*max(r_2b
,r_mb
);
6346 comm
->cutoff_mbody
= r_bonded_limit
;
6347 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6350 comm
->cellsize_limit
= max(comm
->cellsize_limit
,r_bonded_limit
);
6354 "Minimum cell size due to bonded interactions: %.3f nm\n",
6355 comm
->cellsize_limit
);
6359 if (dd
->bInterCGcons
&& rconstr
<= 0)
6361 /* There is a cell size limit due to the constraints (P-LINCS) */
6362 rconstr
= constr_r_max(fplog
,mtop
,ir
);
6366 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6368 if (rconstr
> comm
->cellsize_limit
)
6370 fprintf(fplog
,"This distance will limit the DD cell size, you can override this with -rcon\n");
6374 else if (rconstr
> 0 && fplog
)
6376 /* Here we do not check for dd->bInterCGcons,
6377 * because one can also set a cell size limit for virtual sites only
6378 * and at this point we don't know yet if there are intercg v-sites.
6381 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6384 comm
->cellsize_limit
= max(comm
->cellsize_limit
,rconstr
);
6386 comm
->cgs_gl
= gmx_mtop_global_cgs(mtop
);
6390 copy_ivec(nc
,dd
->nc
);
6391 set_dd_dim(fplog
,dd
);
6392 set_ddbox_cr(cr
,&dd
->nc
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6394 if (cr
->npmenodes
== -1)
6398 acs
= average_cellsize_min(dd
,ddbox
);
6399 if (acs
< comm
->cellsize_limit
)
6403 fprintf(fplog
,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs
,comm
->cellsize_limit
);
6405 gmx_fatal_collective(FARGS
,cr
,NULL
,
6406 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6407 acs
,comm
->cellsize_limit
);
6412 set_ddbox_cr(cr
,NULL
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6414 /* We need to choose the optimal DD grid and possibly PME nodes */
6415 limit
= dd_choose_grid(fplog
,cr
,dd
,ir
,mtop
,box
,ddbox
,
6416 comm
->eDLB
!=edlbNO
,dlb_scale
,
6417 comm
->cellsize_limit
,comm
->cutoff
,
6418 comm
->bInterCGBondeds
,comm
->bInterCGMultiBody
);
6420 if (dd
->nc
[XX
] == 0)
6422 bC
= (dd
->bInterCGcons
&& rconstr
> r_bonded_limit
);
6423 sprintf(buf
,"Change the number of nodes or mdrun option %s%s%s",
6424 !bC
? "-rdd" : "-rcon",
6425 comm
->eDLB
!=edlbNO
? " or -dds" : "",
6426 bC
? " or your LINCS settings" : "");
6428 gmx_fatal_collective(FARGS
,cr
,NULL
,
6429 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6431 "Look in the log file for details on the domain decomposition",
6432 cr
->nnodes
-cr
->npmenodes
,limit
,buf
);
6434 set_dd_dim(fplog
,dd
);
6440 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6441 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],cr
->npmenodes
);
6444 dd
->nnodes
= dd
->nc
[XX
]*dd
->nc
[YY
]*dd
->nc
[ZZ
];
6445 if (cr
->nnodes
- dd
->nnodes
!= cr
->npmenodes
)
6447 gmx_fatal_collective(FARGS
,cr
,NULL
,
6448 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6449 dd
->nnodes
,cr
->nnodes
- cr
->npmenodes
,cr
->nnodes
);
6451 if (cr
->npmenodes
> dd
->nnodes
)
6453 gmx_fatal_collective(FARGS
,cr
,NULL
,
6454 "The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr
->npmenodes
,dd
->nnodes
);
6456 if (cr
->npmenodes
> 0)
6458 comm
->npmenodes
= cr
->npmenodes
;
6462 comm
->npmenodes
= dd
->nnodes
;
6465 if (EEL_PME(ir
->coulombtype
))
6467 /* The following choices should match those
6468 * in comm_cost_est in domdec_setup.c.
6469 * Note that here the checks have to take into account
6470 * that the decomposition might occur in a different order than xyz
6471 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6472 * in which case they will not match those in comm_cost_est,
6473 * but since that is mainly for testing purposes that's fine.
6475 if (dd
->ndim
>= 2 && dd
->dim
[0] == XX
&& dd
->dim
[1] == YY
&&
6476 comm
->npmenodes
> dd
->nc
[XX
] && comm
->npmenodes
% dd
->nc
[XX
] == 0 &&
6477 getenv("GMX_PMEONEDD") == NULL
)
6479 comm
->npmedecompdim
= 2;
6480 comm
->npmenodes_x
= dd
->nc
[XX
];
6481 comm
->npmenodes_y
= comm
->npmenodes
/comm
->npmenodes_x
;
6485 /* In case nc is 1 in both x and y we could still choose to
6486 * decompose pme in y instead of x, but we use x for simplicity.
6488 comm
->npmedecompdim
= 1;
6489 if (dd
->dim
[0] == YY
)
6491 comm
->npmenodes_x
= 1;
6492 comm
->npmenodes_y
= comm
->npmenodes
;
6496 comm
->npmenodes_x
= comm
->npmenodes
;
6497 comm
->npmenodes_y
= 1;
6502 fprintf(fplog
,"PME domain decomposition: %d x %d x %d\n",
6503 comm
->npmenodes_x
,comm
->npmenodes_y
,1);
6508 comm
->npmedecompdim
= 0;
6509 comm
->npmenodes_x
= 0;
6510 comm
->npmenodes_y
= 0;
6513 /* Technically we don't need both of these,
6514 * but it simplifies code not having to recalculate it.
6516 *npme_x
= comm
->npmenodes_x
;
6517 *npme_y
= comm
->npmenodes_y
;
6519 snew(comm
->slb_frac
,DIM
);
6520 if (comm
->eDLB
== edlbNO
)
6522 comm
->slb_frac
[XX
] = get_slb_frac(fplog
,"x",dd
->nc
[XX
],sizex
);
6523 comm
->slb_frac
[YY
] = get_slb_frac(fplog
,"y",dd
->nc
[YY
],sizey
);
6524 comm
->slb_frac
[ZZ
] = get_slb_frac(fplog
,"z",dd
->nc
[ZZ
],sizez
);
6527 if (comm
->bInterCGBondeds
&& comm
->cutoff_mbody
== 0)
6529 if (comm
->bBondComm
|| comm
->eDLB
!= edlbNO
)
6531 /* Set the bonded communication distance to halfway
6532 * the minimum and the maximum,
6533 * since the extra communication cost is nearly zero.
6535 acs
= average_cellsize_min(dd
,ddbox
);
6536 comm
->cutoff_mbody
= 0.5*(r_bonded
+ acs
);
6537 if (comm
->eDLB
!= edlbNO
)
6539 /* Check if this does not limit the scaling */
6540 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,dlb_scale
*acs
);
6542 if (!comm
->bBondComm
)
6544 /* Without bBondComm do not go beyond the n.b. cut-off */
6545 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,comm
->cutoff
);
6546 if (comm
->cellsize_limit
>= comm
->cutoff
)
6548 /* We don't loose a lot of efficieny
6549 * when increasing it to the n.b. cut-off.
6550 * It can even be slightly faster, because we need
6551 * less checks for the communication setup.
6553 comm
->cutoff_mbody
= comm
->cutoff
;
6556 /* Check if we did not end up below our original limit */
6557 comm
->cutoff_mbody
= max(comm
->cutoff_mbody
,r_bonded_limit
);
6559 if (comm
->cutoff_mbody
> comm
->cellsize_limit
)
6561 comm
->cellsize_limit
= comm
->cutoff_mbody
;
6564 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6569 fprintf(debug
,"Bonded atom communication beyond the cut-off: %d\n"
6570 "cellsize limit %f\n",
6571 comm
->bBondComm
,comm
->cellsize_limit
);
6576 check_dd_restrictions(cr
,dd
,ir
,fplog
);
6579 comm
->partition_step
= INT_MIN
;
6582 clear_dd_cycle_counts(dd
);
6587 static void set_dlb_limits(gmx_domdec_t
*dd
)
6592 for(d
=0; d
<dd
->ndim
; d
++)
6594 dd
->comm
->cd
[d
].np
= dd
->comm
->cd
[d
].np_dlb
;
6595 dd
->comm
->cellsize_min
[dd
->dim
[d
]] =
6596 dd
->comm
->cellsize_min_dlb
[dd
->dim
[d
]];
6601 static void turn_on_dlb(FILE *fplog
,t_commrec
*cr
,gmx_large_int_t step
)
6604 gmx_domdec_comm_t
*comm
;
6614 fprintf(fplog
,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step
,buf
),dd_force_imb_perf_loss(dd
)*100);
6617 cellsize_min
= comm
->cellsize_min
[dd
->dim
[0]];
6618 for(d
=1; d
<dd
->ndim
; d
++)
6620 cellsize_min
= min(cellsize_min
,comm
->cellsize_min
[dd
->dim
[d
]]);
6623 if (cellsize_min
< comm
->cellsize_limit
*1.05)
6625 dd_warning(cr
,fplog
,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6627 /* Change DLB from "auto" to "no". */
6628 comm
->eDLB
= edlbNO
;
6633 dd_warning(cr
,fplog
,"NOTE: Turning on dynamic load balancing\n");
6634 comm
->bDynLoadBal
= TRUE
;
6635 dd
->bGridJump
= TRUE
;
6639 /* We can set the required cell size info here,
6640 * so we do not need to communicate this.
6641 * The grid is completely uniform.
6643 for(d
=0; d
<dd
->ndim
; d
++)
6647 comm
->load
[d
].sum_m
= comm
->load
[d
].sum
;
6649 nc
= dd
->nc
[dd
->dim
[d
]];
6652 comm
->root
[d
]->cell_f
[i
] = i
/(real
)nc
;
6655 comm
->root
[d
]->cell_f_max0
[i
] = i
/(real
)nc
;
6656 comm
->root
[d
]->cell_f_min1
[i
] = (i
+1)/(real
)nc
;
6659 comm
->root
[d
]->cell_f
[nc
] = 1.0;
6664 static char *init_bLocalCG(gmx_mtop_t
*mtop
)
6669 ncg
= ncg_mtop(mtop
);
6671 for(cg
=0; cg
<ncg
; cg
++)
6673 bLocalCG
[cg
] = FALSE
;
6679 void dd_init_bondeds(FILE *fplog
,
6680 gmx_domdec_t
*dd
,gmx_mtop_t
*mtop
,
6681 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
6682 t_inputrec
*ir
,bool bBCheck
,cginfo_mb_t
*cginfo_mb
)
6684 gmx_domdec_comm_t
*comm
;
6688 dd_make_reverse_top(fplog
,dd
,mtop
,vsite
,constr
,ir
,bBCheck
);
6692 if (comm
->bBondComm
)
6694 /* Communicate atoms beyond the cut-off for bonded interactions */
6697 comm
->cglink
= make_charge_group_links(mtop
,dd
,cginfo_mb
);
6699 comm
->bLocalCG
= init_bLocalCG(mtop
);
6703 /* Only communicate atoms based on cut-off */
6704 comm
->cglink
= NULL
;
6705 comm
->bLocalCG
= NULL
;
6709 static void print_dd_settings(FILE *fplog
,gmx_domdec_t
*dd
,
6711 bool bDynLoadBal
,real dlb_scale
,
6714 gmx_domdec_comm_t
*comm
;
6729 fprintf(fplog
,"The maximum number of communication pulses is:");
6730 for(d
=0; d
<dd
->ndim
; d
++)
6732 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),comm
->cd
[d
].np_dlb
);
6734 fprintf(fplog
,"\n");
6735 fprintf(fplog
,"The minimum size for domain decomposition cells is %.3f nm\n",comm
->cellsize_limit
);
6736 fprintf(fplog
,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale
);
6737 fprintf(fplog
,"The allowed shrink of domain decomposition cells is:");
6738 for(d
=0; d
<DIM
; d
++)
6742 if (d
>= ddbox
->npbcdim
&& dd
->nc
[d
] == 2)
6749 comm
->cellsize_min_dlb
[d
]/
6750 (ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6752 fprintf(fplog
," %c %.2f",dim2char(d
),shrink
);
6755 fprintf(fplog
,"\n");
6759 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,np
);
6760 fprintf(fplog
,"The initial number of communication pulses is:");
6761 for(d
=0; d
<dd
->ndim
; d
++)
6763 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),np
[dd
->dim
[d
]]);
6765 fprintf(fplog
,"\n");
6766 fprintf(fplog
,"The initial domain decomposition cell size is:");
6767 for(d
=0; d
<DIM
; d
++) {
6770 fprintf(fplog
," %c %.2f nm",
6771 dim2char(d
),dd
->comm
->cellsize_min
[d
]);
6774 fprintf(fplog
,"\n\n");
6777 if (comm
->bInterCGBondeds
|| dd
->vsite_comm
|| dd
->constraint_comm
)
6779 fprintf(fplog
,"The maximum allowed distance for charge groups involved in interactions is:\n");
6780 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6781 "non-bonded interactions","",comm
->cutoff
);
6785 limit
= dd
->comm
->cellsize_limit
;
6789 if (dynamic_dd_box(ddbox
,ir
))
6791 fprintf(fplog
,"(the following are initial values, they could change due to box deformation)\n");
6793 limit
= dd
->comm
->cellsize_min
[XX
];
6794 for(d
=1; d
<DIM
; d
++)
6796 limit
= min(limit
,dd
->comm
->cellsize_min
[d
]);
6800 if (comm
->bInterCGBondeds
)
6802 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6803 "two-body bonded interactions","(-rdd)",
6804 max(comm
->cutoff
,comm
->cutoff_mbody
));
6805 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6806 "multi-body bonded interactions","(-rdd)",
6807 (comm
->bBondComm
|| dd
->bGridJump
) ? comm
->cutoff_mbody
: min(comm
->cutoff
,limit
));
6811 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6812 "virtual site constructions","(-rcon)",limit
);
6814 if (dd
->constraint_comm
)
6816 sprintf(buf
,"atoms separated by up to %d constraints",
6818 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6819 buf
,"(-rcon)",limit
);
6821 fprintf(fplog
,"\n");
6827 void set_dd_parameters(FILE *fplog
,gmx_domdec_t
*dd
,real dlb_scale
,
6828 t_inputrec
*ir
,t_forcerec
*fr
,
6831 gmx_domdec_comm_t
*comm
;
6832 int d
,dim
,npulse
,npulse_d_max
,npulse_d
;
6839 bNoCutOff
= (ir
->rvdw
== 0 || ir
->rcoulomb
== 0);
6841 if (EEL_PME(ir
->coulombtype
))
6843 init_ddpme(dd
,&comm
->ddpme
[0],0);
6844 if (comm
->npmedecompdim
>= 2)
6846 init_ddpme(dd
,&comm
->ddpme
[1],1);
6851 comm
->npmenodes
= 0;
6852 if (dd
->pme_nodeid
>= 0)
6854 gmx_fatal_collective(FARGS
,NULL
,dd
,
6855 "Can not have separate PME nodes without PME electrostatics");
6859 /* If each molecule is a single charge group
6860 * or we use domain decomposition for each periodic dimension,
6861 * we do not need to take pbc into account for the bonded interactions.
6863 if (fr
->ePBC
== epbcNONE
|| !comm
->bInterCGBondeds
||
6864 (dd
->nc
[XX
]>1 && dd
->nc
[YY
]>1 && (dd
->nc
[ZZ
]>1 || fr
->ePBC
==epbcXY
)))
6866 fr
->bMolPBC
= FALSE
;
6875 fprintf(debug
,"The DD cut-off is %f\n",comm
->cutoff
);
6877 if (comm
->eDLB
!= edlbNO
)
6879 /* Determine the maximum number of comm. pulses in one dimension */
6881 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6883 /* Determine the maximum required number of grid pulses */
6884 if (comm
->cellsize_limit
>= comm
->cutoff
)
6886 /* Only a single pulse is required */
6889 else if (!bNoCutOff
&& comm
->cellsize_limit
> 0)
6891 /* We round down slightly here to avoid overhead due to the latency
6892 * of extra communication calls when the cut-off
6893 * would be only slightly longer than the cell size.
6894 * Later cellsize_limit is redetermined,
6895 * so we can not miss interactions due to this rounding.
6897 npulse
= (int)(0.96 + comm
->cutoff
/comm
->cellsize_limit
);
6901 /* There is no cell size limit */
6902 npulse
= max(dd
->nc
[XX
]-1,max(dd
->nc
[YY
]-1,dd
->nc
[ZZ
]-1));
6905 if (!bNoCutOff
&& npulse
> 1)
6907 /* See if we can do with less pulses, based on dlb_scale */
6909 for(d
=0; d
<dd
->ndim
; d
++)
6912 npulse_d
= (int)(1 + dd
->nc
[dim
]*comm
->cutoff
6913 /(ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]*dlb_scale
));
6914 npulse_d_max
= max(npulse_d_max
,npulse_d
);
6916 npulse
= min(npulse
,npulse_d_max
);
6919 /* This env var can override npulse */
6920 d
= dd_nst_env(fplog
,"GMX_DD_NPULSE",0);
6927 comm
->bVacDLBNoLimit
= (ir
->ePBC
== epbcNONE
);
6928 for(d
=0; d
<dd
->ndim
; d
++)
6930 comm
->cd
[d
].np_dlb
= min(npulse
,dd
->nc
[dd
->dim
[d
]]-1);
6931 comm
->cd
[d
].np_nalloc
= comm
->cd
[d
].np_dlb
;
6932 snew(comm
->cd
[d
].ind
,comm
->cd
[d
].np_nalloc
);
6933 comm
->maxpulse
= max(comm
->maxpulse
,comm
->cd
[d
].np_dlb
);
6934 if (comm
->cd
[d
].np_dlb
< dd
->nc
[dd
->dim
[d
]]-1)
6936 comm
->bVacDLBNoLimit
= FALSE
;
6940 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6941 if (!comm
->bVacDLBNoLimit
)
6943 comm
->cellsize_limit
= max(comm
->cellsize_limit
,
6944 comm
->cutoff
/comm
->maxpulse
);
6946 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6947 /* Set the minimum cell size for each DD dimension */
6948 for(d
=0; d
<dd
->ndim
; d
++)
6950 if (comm
->bVacDLBNoLimit
||
6951 comm
->cd
[d
].np_dlb
*comm
->cellsize_limit
>= comm
->cutoff
)
6953 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->cellsize_limit
;
6957 comm
->cellsize_min_dlb
[dd
->dim
[d
]] =
6958 comm
->cutoff
/comm
->cd
[d
].np_dlb
;
6961 if (comm
->cutoff_mbody
<= 0)
6963 comm
->cutoff_mbody
= min(comm
->cutoff
,comm
->cellsize_limit
);
6965 if (comm
->bDynLoadBal
)
6971 print_dd_settings(fplog
,dd
,ir
,comm
->bDynLoadBal
,dlb_scale
,ddbox
);
6972 if (comm
->eDLB
== edlbAUTO
)
6976 fprintf(fplog
,"When dynamic load balancing gets turned on, these settings will change to:\n");
6978 print_dd_settings(fplog
,dd
,ir
,TRUE
,dlb_scale
,ddbox
);
6981 if (ir
->ePBC
== epbcNONE
)
6983 vol_frac
= 1 - 1/(double)dd
->nnodes
;
6988 (1 + comm_box_frac(dd
->nc
,comm
->cutoff
,ddbox
))/(double)dd
->nnodes
;
6992 fprintf(debug
,"Volume fraction for all DD zones: %f\n",vol_frac
);
6994 natoms_tot
= comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
];
6996 dd
->ga2la
= ga2la_init(natoms_tot
,vol_frac
*natoms_tot
);
6999 static void merge_cg_buffers(int ncell
,
7000 gmx_domdec_comm_dim_t
*cd
, int pulse
,
7002 int *index_gl
, int *recv_i
,
7003 rvec
*cg_cm
, rvec
*recv_vr
,
7005 cginfo_mb_t
*cginfo_mb
,int *cginfo
)
7007 gmx_domdec_ind_t
*ind
,*ind_p
;
7008 int p
,cell
,c
,cg
,cg0
,cg1
,cg_gl
,nat
;
7011 ind
= &cd
->ind
[pulse
];
7013 /* First correct the already stored data */
7014 shift
= ind
->nrecv
[ncell
];
7015 for(cell
=ncell
-1; cell
>=0; cell
--)
7017 shift
-= ind
->nrecv
[cell
];
7020 /* Move the cg's present from previous grid pulses */
7021 cg0
= ncg_cell
[ncell
+cell
];
7022 cg1
= ncg_cell
[ncell
+cell
+1];
7023 cgindex
[cg1
+shift
] = cgindex
[cg1
];
7024 for(cg
=cg1
-1; cg
>=cg0
; cg
--)
7026 index_gl
[cg
+shift
] = index_gl
[cg
];
7027 copy_rvec(cg_cm
[cg
],cg_cm
[cg
+shift
]);
7028 cgindex
[cg
+shift
] = cgindex
[cg
];
7029 cginfo
[cg
+shift
] = cginfo
[cg
];
7031 /* Correct the already stored send indices for the shift */
7032 for(p
=1; p
<=pulse
; p
++)
7034 ind_p
= &cd
->ind
[p
];
7036 for(c
=0; c
<cell
; c
++)
7038 cg0
+= ind_p
->nsend
[c
];
7040 cg1
= cg0
+ ind_p
->nsend
[cell
];
7041 for(cg
=cg0
; cg
<cg1
; cg
++)
7043 ind_p
->index
[cg
] += shift
;
7049 /* Merge in the communicated buffers */
7053 for(cell
=0; cell
<ncell
; cell
++)
7055 cg1
= ncg_cell
[ncell
+cell
+1] + shift
;
7058 /* Correct the old cg indices */
7059 for(cg
=ncg_cell
[ncell
+cell
]; cg
<cg1
; cg
++)
7061 cgindex
[cg
+1] += shift_at
;
7064 for(cg
=0; cg
<ind
->nrecv
[cell
]; cg
++)
7066 /* Copy this charge group from the buffer */
7067 index_gl
[cg1
] = recv_i
[cg0
];
7068 copy_rvec(recv_vr
[cg0
],cg_cm
[cg1
]);
7069 /* Add it to the cgindex */
7070 cg_gl
= index_gl
[cg1
];
7071 cginfo
[cg1
] = ddcginfo(cginfo_mb
,cg_gl
);
7072 nat
= GET_CGINFO_NATOMS(cginfo
[cg1
]);
7073 cgindex
[cg1
+1] = cgindex
[cg1
] + nat
;
7078 shift
+= ind
->nrecv
[cell
];
7079 ncg_cell
[ncell
+cell
+1] = cg1
;
7083 static void make_cell2at_index(gmx_domdec_comm_dim_t
*cd
,
7084 int nzone
,int cg0
,const int *cgindex
)
7088 /* Store the atom block boundaries for easy copying of communication buffers
7091 for(zone
=0; zone
<nzone
; zone
++)
7093 for(p
=0; p
<cd
->np
; p
++) {
7094 cd
->ind
[p
].cell2at0
[zone
] = cgindex
[cg
];
7095 cg
+= cd
->ind
[p
].nrecv
[zone
];
7096 cd
->ind
[p
].cell2at1
[zone
] = cgindex
[cg
];
7101 static bool missing_link(t_blocka
*link
,int cg_gl
,char *bLocalCG
)
7107 for(i
=link
->index
[cg_gl
]; i
<link
->index
[cg_gl
+1]; i
++)
7109 if (!bLocalCG
[link
->a
[i
]])
7118 static void setup_dd_communication(gmx_domdec_t
*dd
,
7119 matrix box
,gmx_ddbox_t
*ddbox
,t_forcerec
*fr
)
7121 int dim_ind
,dim
,dim0
,dim1
=-1,dim2
=-1,dimd
,p
,nat_tot
;
7122 int nzone
,nzone_send
,zone
,zonei
,cg0
,cg1
;
7123 int c
,i
,j
,cg
,cg_gl
,nrcg
;
7124 int *zone_cg_range
,pos_cg
,*index_gl
,*cgindex
,*recv_i
;
7125 gmx_domdec_comm_t
*comm
;
7126 gmx_domdec_zones_t
*zones
;
7127 gmx_domdec_comm_dim_t
*cd
;
7128 gmx_domdec_ind_t
*ind
;
7129 cginfo_mb_t
*cginfo_mb
;
7130 bool bBondComm
,bDist2B
,bDistMB
,bDistMB_pulse
,bDistBonded
,bScrew
;
7131 real r_mb
,r_comm2
,r_scomm2
,r_bcomm2
,r
,r_0
,r_1
,r2
,rb2
,r2inc
,inv_ncg
,tric_sh
;
7133 real corner
[DIM
][4],corner_round_0
=0,corner_round_1
[4];
7134 real bcorner
[DIM
],bcorner_round_1
=0;
7136 rvec
*cg_cm
,*normal
,*v_d
,*v_0
=NULL
,*v_1
=NULL
,*recv_vr
;
7137 real skew_fac2_d
,skew_fac_01
;
7143 fprintf(debug
,"Setting up DD communication\n");
7149 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7151 dim
= dd
->dim
[dim_ind
];
7153 /* Check if we need to use triclinic distances */
7154 tric_dist
[dim_ind
] = 0;
7155 for(i
=0; i
<=dim_ind
; i
++)
7157 if (ddbox
->tric_dir
[dd
->dim
[i
]])
7159 tric_dist
[dim_ind
] = 1;
7164 bBondComm
= comm
->bBondComm
;
7166 /* Do we need to determine extra distances for multi-body bondeds? */
7167 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
7169 /* Do we need to determine extra distances for only two-body bondeds? */
7170 bDist2B
= (bBondComm
&& !bDistMB
);
7172 r_comm2
= sqr(comm
->cutoff
);
7173 r_bcomm2
= sqr(comm
->cutoff_mbody
);
7177 fprintf(debug
,"bBondComm %d, r_bc %f\n",bBondComm
,sqrt(r_bcomm2
));
7180 zones
= &comm
->zones
;
7183 /* The first dimension is equal for all cells */
7184 corner
[0][0] = comm
->cell_x0
[dim0
];
7187 bcorner
[0] = corner
[0][0];
7192 /* This cell row is only seen from the first row */
7193 corner
[1][0] = comm
->cell_x0
[dim1
];
7194 /* All rows can see this row */
7195 corner
[1][1] = comm
->cell_x0
[dim1
];
7198 corner
[1][1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].mch0
);
7201 /* For the multi-body distance we need the maximum */
7202 bcorner
[1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].p1_0
);
7205 /* Set the upper-right corner for rounding */
7206 corner_round_0
= comm
->cell_x1
[dim0
];
7213 corner
[2][j
] = comm
->cell_x0
[dim2
];
7217 /* Use the maximum of the i-cells that see a j-cell */
7218 for(i
=0; i
<zones
->nizone
; i
++)
7220 for(j
=zones
->izone
[i
].j0
; j
<zones
->izone
[i
].j1
; j
++)
7226 comm
->zone_d2
[zones
->shift
[i
][dim0
]][zones
->shift
[i
][dim1
]].mch0
);
7232 /* For the multi-body distance we need the maximum */
7233 bcorner
[2] = comm
->cell_x0
[dim2
];
7238 bcorner
[2] = max(bcorner
[2],
7239 comm
->zone_d2
[i
][j
].p1_0
);
7245 /* Set the upper-right corner for rounding */
7246 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7247 * Only cell (0,0,0) can see cell 7 (1,1,1)
7249 corner_round_1
[0] = comm
->cell_x1
[dim1
];
7250 corner_round_1
[3] = comm
->cell_x1
[dim1
];
7253 corner_round_1
[0] = max(comm
->cell_x1
[dim1
],
7254 comm
->zone_d1
[1].mch1
);
7257 /* For the multi-body distance we need the maximum */
7258 bcorner_round_1
= max(comm
->cell_x1
[dim1
],
7259 comm
->zone_d1
[1].p1_1
);
7265 /* Triclinic stuff */
7266 normal
= ddbox
->normal
;
7270 v_0
= ddbox
->v
[dim0
];
7271 if (ddbox
->tric_dir
[dim0
] && ddbox
->tric_dir
[dim1
])
7273 /* Determine the coupling coefficient for the distances
7274 * to the cell planes along dim0 and dim1 through dim2.
7275 * This is required for correct rounding.
7278 ddbox
->v
[dim0
][dim1
+1][dim0
]*ddbox
->v
[dim1
][dim1
+1][dim1
];
7281 fprintf(debug
,"\nskew_fac_01 %f\n",skew_fac_01
);
7287 v_1
= ddbox
->v
[dim1
];
7290 zone_cg_range
= zones
->cg_range
;
7291 index_gl
= dd
->index_gl
;
7292 cgindex
= dd
->cgindex
;
7293 cginfo_mb
= fr
->cginfo_mb
;
7295 zone_cg_range
[0] = 0;
7296 zone_cg_range
[1] = dd
->ncg_home
;
7297 comm
->zone_ncg1
[0] = dd
->ncg_home
;
7298 pos_cg
= dd
->ncg_home
;
7300 nat_tot
= dd
->nat_home
;
7302 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7304 dim
= dd
->dim
[dim_ind
];
7305 cd
= &comm
->cd
[dim_ind
];
7307 if (dim
>= ddbox
->npbcdim
&& dd
->ci
[dim
] == 0)
7309 /* No pbc in this dimension, the first node should not comm. */
7317 bScrew
= (dd
->bScrewPBC
&& dim
== XX
);
7319 v_d
= ddbox
->v
[dim
];
7320 skew_fac2_d
= sqr(ddbox
->skew_fac
[dim
]);
7322 cd
->bInPlace
= TRUE
;
7323 for(p
=0; p
<cd
->np
; p
++)
7325 /* Only atoms communicated in the first pulse are used
7326 * for multi-body bonded interactions or for bBondComm.
7328 bDistBonded
= ((bDistMB
|| bDist2B
) && p
== 0);
7329 bDistMB_pulse
= (bDistMB
&& bDistBonded
);
7334 for(zone
=0; zone
<nzone_send
; zone
++)
7336 if (tric_dist
[dim_ind
] && dim_ind
> 0)
7338 /* Determine slightly more optimized skew_fac's
7340 * This reduces the number of communicated atoms
7341 * by about 10% for 3D DD of rhombic dodecahedra.
7343 for(dimd
=0; dimd
<dim
; dimd
++)
7345 sf2_round
[dimd
] = 1;
7346 if (ddbox
->tric_dir
[dimd
])
7348 for(i
=dd
->dim
[dimd
]+1; i
<DIM
; i
++)
7350 /* If we are shifted in dimension i
7351 * and the cell plane is tilted forward
7352 * in dimension i, skip this coupling.
7354 if (!(zones
->shift
[nzone
+zone
][i
] &&
7355 ddbox
->v
[dimd
][i
][dimd
] >= 0))
7358 sqr(ddbox
->v
[dimd
][i
][dimd
]);
7361 sf2_round
[dimd
] = 1/sf2_round
[dimd
];
7366 zonei
= zone_perm
[dim_ind
][zone
];
7369 /* Here we permutate the zones to obtain a convenient order
7370 * for neighbor searching
7372 cg0
= zone_cg_range
[zonei
];
7373 cg1
= zone_cg_range
[zonei
+1];
7377 /* Look only at the cg's received in the previous grid pulse
7379 cg1
= zone_cg_range
[nzone
+zone
+1];
7380 cg0
= cg1
- cd
->ind
[p
-1].nrecv
[zone
];
7382 ind
->nsend
[zone
] = 0;
7383 for(cg
=cg0
; cg
<cg1
; cg
++)
7387 if (tric_dist
[dim_ind
] == 0)
7389 /* Rectangular direction, easy */
7390 r
= cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7397 r
= cg_cm
[cg
][dim
] - bcorner
[dim_ind
];
7403 /* Rounding gives at most a 16% reduction
7404 * in communicated atoms
7406 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7408 r
= cg_cm
[cg
][dim0
] - corner_round_0
;
7409 /* This is the first dimension, so always r >= 0 */
7416 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7418 r
= cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7425 r
= cg_cm
[cg
][dim1
] - bcorner_round_1
;
7435 /* Triclinic direction, more complicated */
7438 /* Rounding, conservative as the skew_fac multiplication
7439 * will slightly underestimate the distance.
7441 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7443 rn
[dim0
] = cg_cm
[cg
][dim0
] - corner_round_0
;
7444 for(i
=dim0
+1; i
<DIM
; i
++)
7446 rn
[dim0
] -= cg_cm
[cg
][i
]*v_0
[i
][dim0
];
7448 r2
= rn
[dim0
]*rn
[dim0
]*sf2_round
[dim0
];
7451 rb
[dim0
] = rn
[dim0
];
7454 /* Take care that the cell planes along dim0 might not
7455 * be orthogonal to those along dim1 and dim2.
7457 for(i
=1; i
<=dim_ind
; i
++)
7460 if (normal
[dim0
][dimd
] > 0)
7462 rn
[dimd
] -= rn
[dim0
]*normal
[dim0
][dimd
];
7465 rb
[dimd
] -= rb
[dim0
]*normal
[dim0
][dimd
];
7470 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7472 rn
[dim1
] += cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7474 for(i
=dim1
+1; i
<DIM
; i
++)
7476 tric_sh
-= cg_cm
[cg
][i
]*v_1
[i
][dim1
];
7478 rn
[dim1
] += tric_sh
;
7481 r2
+= rn
[dim1
]*rn
[dim1
]*sf2_round
[dim1
];
7482 /* Take care of coupling of the distances
7483 * to the planes along dim0 and dim1 through dim2.
7485 r2
-= rn
[dim0
]*rn
[dim1
]*skew_fac_01
;
7486 /* Take care that the cell planes along dim1
7487 * might not be orthogonal to that along dim2.
7489 if (normal
[dim1
][dim2
] > 0)
7491 rn
[dim2
] -= rn
[dim1
]*normal
[dim1
][dim2
];
7497 cg_cm
[cg
][dim1
] - bcorner_round_1
+ tric_sh
;
7500 rb2
+= rb
[dim1
]*rb
[dim1
]*sf2_round
[dim1
];
7501 /* Take care of coupling of the distances
7502 * to the planes along dim0 and dim1 through dim2.
7504 rb2
-= rb
[dim0
]*rb
[dim1
]*skew_fac_01
;
7505 /* Take care that the cell planes along dim1
7506 * might not be orthogonal to that along dim2.
7508 if (normal
[dim1
][dim2
] > 0)
7510 rb
[dim2
] -= rb
[dim1
]*normal
[dim1
][dim2
];
7515 /* The distance along the communication direction */
7516 rn
[dim
] += cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7518 for(i
=dim
+1; i
<DIM
; i
++)
7520 tric_sh
-= cg_cm
[cg
][i
]*v_d
[i
][dim
];
7525 r2
+= rn
[dim
]*rn
[dim
]*skew_fac2_d
;
7526 /* Take care of coupling of the distances
7527 * to the planes along dim0 and dim1 through dim2.
7529 if (dim_ind
== 1 && zonei
== 1)
7531 r2
-= rn
[dim0
]*rn
[dim
]*skew_fac_01
;
7537 rb
[dim
] += cg_cm
[cg
][dim
] - bcorner
[dim_ind
] + tric_sh
;
7540 rb2
+= rb
[dim
]*rb
[dim
]*skew_fac2_d
;
7541 /* Take care of coupling of the distances
7542 * to the planes along dim0 and dim1 through dim2.
7544 if (dim_ind
== 1 && zonei
== 1)
7546 rb2
-= rb
[dim0
]*rb
[dim
]*skew_fac_01
;
7554 ((bDistMB
&& rb2
< r_bcomm2
) ||
7555 (bDist2B
&& r2
< r_bcomm2
)) &&
7557 (GET_CGINFO_BOND_INTER(fr
->cginfo
[cg
]) &&
7558 missing_link(comm
->cglink
,index_gl
[cg
],
7561 /* Make an index to the local charge groups */
7562 if (nsend
+1 > ind
->nalloc
)
7564 ind
->nalloc
= over_alloc_large(nsend
+1);
7565 srenew(ind
->index
,ind
->nalloc
);
7567 if (nsend
+1 > comm
->nalloc_int
)
7569 comm
->nalloc_int
= over_alloc_large(nsend
+1);
7570 srenew(comm
->buf_int
,comm
->nalloc_int
);
7572 ind
->index
[nsend
] = cg
;
7573 comm
->buf_int
[nsend
] = index_gl
[cg
];
7575 vec_rvec_check_alloc(&comm
->vbuf
,nsend
+1);
7577 if (dd
->ci
[dim
] == 0)
7579 /* Correct cg_cm for pbc */
7580 rvec_add(cg_cm
[cg
],box
[dim
],comm
->vbuf
.v
[nsend
]);
7583 comm
->vbuf
.v
[nsend
][YY
] =
7584 box
[YY
][YY
]-comm
->vbuf
.v
[nsend
][YY
];
7585 comm
->vbuf
.v
[nsend
][ZZ
] =
7586 box
[ZZ
][ZZ
]-comm
->vbuf
.v
[nsend
][ZZ
];
7591 copy_rvec(cg_cm
[cg
],comm
->vbuf
.v
[nsend
]);
7594 nat
+= cgindex
[cg
+1] - cgindex
[cg
];
7598 /* Clear the counts in case we do not have pbc */
7599 for(zone
=nzone_send
; zone
<nzone
; zone
++)
7601 ind
->nsend
[zone
] = 0;
7603 ind
->nsend
[nzone
] = nsend
;
7604 ind
->nsend
[nzone
+1] = nat
;
7605 /* Communicate the number of cg's and atoms to receive */
7606 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7607 ind
->nsend
, nzone
+2,
7608 ind
->nrecv
, nzone
+2);
7610 /* The rvec buffer is also required for atom buffers of size nsend
7611 * in dd_move_x and dd_move_f.
7613 vec_rvec_check_alloc(&comm
->vbuf
,ind
->nsend
[nzone
+1]);
7617 /* We can receive in place if only the last zone is not empty */
7618 for(zone
=0; zone
<nzone
-1; zone
++)
7620 if (ind
->nrecv
[zone
] > 0)
7622 cd
->bInPlace
= FALSE
;
7627 /* The int buffer is only required here for the cg indices */
7628 if (ind
->nrecv
[nzone
] > comm
->nalloc_int2
)
7630 comm
->nalloc_int2
= over_alloc_dd(ind
->nrecv
[nzone
]);
7631 srenew(comm
->buf_int2
,comm
->nalloc_int2
);
7633 /* The rvec buffer is also required for atom buffers
7634 * of size nrecv in dd_move_x and dd_move_f.
7636 i
= max(cd
->ind
[0].nrecv
[nzone
+1],ind
->nrecv
[nzone
+1]);
7637 vec_rvec_check_alloc(&comm
->vbuf2
,i
);
7641 /* Make space for the global cg indices */
7642 if (pos_cg
+ ind
->nrecv
[nzone
] > dd
->cg_nalloc
7643 || dd
->cg_nalloc
== 0)
7645 dd
->cg_nalloc
= over_alloc_dd(pos_cg
+ ind
->nrecv
[nzone
]);
7646 srenew(index_gl
,dd
->cg_nalloc
);
7647 srenew(cgindex
,dd
->cg_nalloc
+1);
7649 /* Communicate the global cg indices */
7652 recv_i
= index_gl
+ pos_cg
;
7656 recv_i
= comm
->buf_int2
;
7658 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7659 comm
->buf_int
, nsend
,
7660 recv_i
, ind
->nrecv
[nzone
]);
7662 /* Make space for cg_cm */
7663 if (pos_cg
+ ind
->nrecv
[nzone
] > fr
->cg_nalloc
)
7665 dd_realloc_fr_cg(fr
,pos_cg
+ ind
->nrecv
[nzone
]);
7668 /* Communicate cg_cm */
7671 recv_vr
= cg_cm
+ pos_cg
;
7675 recv_vr
= comm
->vbuf2
.v
;
7677 dd_sendrecv_rvec(dd
, dim_ind
, dddirBackward
,
7678 comm
->vbuf
.v
, nsend
,
7679 recv_vr
, ind
->nrecv
[nzone
]);
7681 /* Make the charge group index */
7684 zone
= (p
== 0 ? 0 : nzone
- 1);
7685 while (zone
< nzone
)
7687 for(cg
=0; cg
<ind
->nrecv
[zone
]; cg
++)
7689 cg_gl
= index_gl
[pos_cg
];
7690 fr
->cginfo
[pos_cg
] = ddcginfo(cginfo_mb
,cg_gl
);
7691 nrcg
= GET_CGINFO_NATOMS(fr
->cginfo
[pos_cg
]);
7692 cgindex
[pos_cg
+1] = cgindex
[pos_cg
] + nrcg
;
7695 /* Update the charge group presence,
7696 * so we can use it in the next pass of the loop.
7698 comm
->bLocalCG
[cg_gl
] = TRUE
;
7704 comm
->zone_ncg1
[nzone
+zone
] = ind
->nrecv
[zone
];
7707 zone_cg_range
[nzone
+zone
] = pos_cg
;
7712 /* This part of the code is never executed with bBondComm. */
7713 merge_cg_buffers(nzone
,cd
,p
,zone_cg_range
,
7714 index_gl
,recv_i
,cg_cm
,recv_vr
,
7715 cgindex
,fr
->cginfo_mb
,fr
->cginfo
);
7716 pos_cg
+= ind
->nrecv
[nzone
];
7718 nat_tot
+= ind
->nrecv
[nzone
+1];
7722 /* Store the atom block for easy copying of communication buffers */
7723 make_cell2at_index(cd
,nzone
,zone_cg_range
[nzone
],cgindex
);
7727 dd
->index_gl
= index_gl
;
7728 dd
->cgindex
= cgindex
;
7730 dd
->ncg_tot
= zone_cg_range
[zones
->n
];
7731 dd
->nat_tot
= nat_tot
;
7732 comm
->nat
[ddnatHOME
] = dd
->nat_home
;
7733 for(i
=ddnatZONE
; i
<ddnatNR
; i
++)
7735 comm
->nat
[i
] = dd
->nat_tot
;
7740 /* We don't need to update cginfo, since that was alrady done above.
7741 * So we pass NULL for the forcerec.
7743 dd_set_cginfo(dd
->index_gl
,dd
->ncg_home
,dd
->ncg_tot
,
7744 NULL
,comm
->bLocalCG
);
7749 fprintf(debug
,"Finished setting up DD communication, zones:");
7750 for(c
=0; c
<zones
->n
; c
++)
7752 fprintf(debug
," %d",zones
->cg_range
[c
+1]-zones
->cg_range
[c
]);
7754 fprintf(debug
,"\n");
7758 static void set_cg_boundaries(gmx_domdec_zones_t
*zones
)
7762 for(c
=0; c
<zones
->nizone
; c
++)
7764 zones
->izone
[c
].cg1
= zones
->cg_range
[c
+1];
7765 zones
->izone
[c
].jcg0
= zones
->cg_range
[zones
->izone
[c
].j0
];
7766 zones
->izone
[c
].jcg1
= zones
->cg_range
[zones
->izone
[c
].j1
];
7770 static int comp_cgsort(const void *a
,const void *b
)
7774 gmx_cgsort_t
*cga
,*cgb
;
7775 cga
= (gmx_cgsort_t
*)a
;
7776 cgb
= (gmx_cgsort_t
*)b
;
7778 comp
= cga
->nsc
- cgb
->nsc
;
7781 comp
= cga
->ind_gl
- cgb
->ind_gl
;
7787 static void order_int_cg(int n
,gmx_cgsort_t
*sort
,
7792 /* Order the data */
7795 buf
[i
] = a
[sort
[i
].ind
];
7798 /* Copy back to the original array */
7805 static void order_vec_cg(int n
,gmx_cgsort_t
*sort
,
7810 /* Order the data */
7813 copy_rvec(v
[sort
[i
].ind
],buf
[i
]);
7816 /* Copy back to the original array */
7819 copy_rvec(buf
[i
],v
[i
]);
7823 static void order_vec_atom(int ncg
,int *cgindex
,gmx_cgsort_t
*sort
,
7826 int a
,atot
,cg
,cg0
,cg1
,i
;
7828 /* Order the data */
7830 for(cg
=0; cg
<ncg
; cg
++)
7832 cg0
= cgindex
[sort
[cg
].ind
];
7833 cg1
= cgindex
[sort
[cg
].ind
+1];
7834 for(i
=cg0
; i
<cg1
; i
++)
7836 copy_rvec(v
[i
],buf
[a
]);
7842 /* Copy back to the original array */
7843 for(a
=0; a
<atot
; a
++)
7845 copy_rvec(buf
[a
],v
[a
]);
7849 static void ordered_sort(int nsort2
,gmx_cgsort_t
*sort2
,
7850 int nsort_new
,gmx_cgsort_t
*sort_new
,
7851 gmx_cgsort_t
*sort1
)
7855 /* The new indices are not very ordered, so we qsort them */
7856 qsort_threadsafe(sort_new
,nsort_new
,sizeof(sort_new
[0]),comp_cgsort
);
7858 /* sort2 is already ordered, so now we can merge the two arrays */
7862 while(i2
< nsort2
|| i_new
< nsort_new
)
7866 sort1
[i1
++] = sort_new
[i_new
++];
7868 else if (i_new
== nsort_new
)
7870 sort1
[i1
++] = sort2
[i2
++];
7872 else if (sort2
[i2
].nsc
< sort_new
[i_new
].nsc
||
7873 (sort2
[i2
].nsc
== sort_new
[i_new
].nsc
&&
7874 sort2
[i2
].ind_gl
< sort_new
[i_new
].ind_gl
))
7876 sort1
[i1
++] = sort2
[i2
++];
7880 sort1
[i1
++] = sort_new
[i_new
++];
7885 static void dd_sort_state(gmx_domdec_t
*dd
,int ePBC
,
7886 rvec
*cgcm
,t_forcerec
*fr
,t_state
*state
,
7889 gmx_domdec_sort_t
*sort
;
7890 gmx_cgsort_t
*cgsort
,*sort_i
;
7891 int ncg_new
,nsort2
,nsort_new
,i
,cell_index
,*ibuf
,cgsize
;
7894 sort
= dd
->comm
->sort
;
7896 if (dd
->ncg_home
> sort
->sort_nalloc
)
7898 sort
->sort_nalloc
= over_alloc_dd(dd
->ncg_home
);
7899 srenew(sort
->sort1
,sort
->sort_nalloc
);
7900 srenew(sort
->sort2
,sort
->sort_nalloc
);
7903 if (ncg_home_old
>= 0)
7905 /* The charge groups that remained in the same ns grid cell
7906 * are completely ordered. So we can sort efficiently by sorting
7907 * the charge groups that did move into the stationary list.
7912 for(i
=0; i
<dd
->ncg_home
; i
++)
7914 /* Check if this cg did not move to another node */
7915 cell_index
= fr
->ns
.grid
->cell_index
[i
];
7916 if (cell_index
!= 4*fr
->ns
.grid
->ncells
)
7918 if (i
>= ncg_home_old
|| cell_index
!= sort
->sort1
[i
].nsc
)
7920 /* This cg is new on this node or moved ns grid cell */
7921 if (nsort_new
>= sort
->sort_new_nalloc
)
7923 sort
->sort_new_nalloc
= over_alloc_dd(nsort_new
+1);
7924 srenew(sort
->sort_new
,sort
->sort_new_nalloc
);
7926 sort_i
= &(sort
->sort_new
[nsort_new
++]);
7930 /* This cg did not move */
7931 sort_i
= &(sort
->sort2
[nsort2
++]);
7933 /* Sort on the ns grid cell indices
7934 * and the global topology index
7936 sort_i
->nsc
= cell_index
;
7937 sort_i
->ind_gl
= dd
->index_gl
[i
];
7944 fprintf(debug
,"ordered sort cgs: stationary %d moved %d\n",
7947 /* Sort efficiently */
7948 ordered_sort(nsort2
,sort
->sort2
,nsort_new
,sort
->sort_new
,sort
->sort1
);
7952 cgsort
= sort
->sort1
;
7954 for(i
=0; i
<dd
->ncg_home
; i
++)
7956 /* Sort on the ns grid cell indices
7957 * and the global topology index
7959 cgsort
[i
].nsc
= fr
->ns
.grid
->cell_index
[i
];
7960 cgsort
[i
].ind_gl
= dd
->index_gl
[i
];
7962 if (cgsort
[i
].nsc
!= 4*fr
->ns
.grid
->ncells
)
7969 fprintf(debug
,"qsort cgs: %d new home %d\n",dd
->ncg_home
,ncg_new
);
7971 /* Determine the order of the charge groups using qsort */
7972 qsort_threadsafe(cgsort
,dd
->ncg_home
,sizeof(cgsort
[0]),comp_cgsort
);
7974 cgsort
= sort
->sort1
;
7976 /* We alloc with the old size, since cgindex is still old */
7977 vec_rvec_check_alloc(&dd
->comm
->vbuf
,dd
->cgindex
[dd
->ncg_home
]);
7978 vbuf
= dd
->comm
->vbuf
.v
;
7980 /* Remove the charge groups which are no longer at home here */
7981 dd
->ncg_home
= ncg_new
;
7983 /* Reorder the state */
7984 for(i
=0; i
<estNR
; i
++)
7986 if (EST_DISTR(i
) && state
->flags
& (1<<i
))
7991 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->x
,vbuf
);
7994 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->v
,vbuf
);
7997 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->sd_X
,vbuf
);
8000 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->cg_p
,vbuf
);
8004 case estDISRE_INITF
:
8005 case estDISRE_RM3TAV
:
8006 case estORIRE_INITF
:
8008 /* No ordering required */
8011 gmx_incons("Unknown state entry encountered in dd_sort_state");
8017 order_vec_cg(dd
->ncg_home
,cgsort
,cgcm
,vbuf
);
8019 if (dd
->ncg_home
+1 > sort
->ibuf_nalloc
)
8021 sort
->ibuf_nalloc
= over_alloc_dd(dd
->ncg_home
+1);
8022 srenew(sort
->ibuf
,sort
->ibuf_nalloc
);
8025 /* Reorder the global cg index */
8026 order_int_cg(dd
->ncg_home
,cgsort
,dd
->index_gl
,ibuf
);
8027 /* Reorder the cginfo */
8028 order_int_cg(dd
->ncg_home
,cgsort
,fr
->cginfo
,ibuf
);
8029 /* Rebuild the local cg index */
8031 for(i
=0; i
<dd
->ncg_home
; i
++)
8033 cgsize
= dd
->cgindex
[cgsort
[i
].ind
+1] - dd
->cgindex
[cgsort
[i
].ind
];
8034 ibuf
[i
+1] = ibuf
[i
] + cgsize
;
8036 for(i
=0; i
<dd
->ncg_home
+1; i
++)
8038 dd
->cgindex
[i
] = ibuf
[i
];
8040 /* Set the home atom number */
8041 dd
->nat_home
= dd
->cgindex
[dd
->ncg_home
];
8043 /* Copy the sorted ns cell indices back to the ns grid struct */
8044 for(i
=0; i
<dd
->ncg_home
; i
++)
8046 fr
->ns
.grid
->cell_index
[i
] = cgsort
[i
].nsc
;
8048 fr
->ns
.grid
->nr
= dd
->ncg_home
;
8051 static void add_dd_statistics(gmx_domdec_t
*dd
)
8053 gmx_domdec_comm_t
*comm
;
8058 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8060 comm
->sum_nat
[ddnat
-ddnatZONE
] +=
8061 comm
->nat
[ddnat
] - comm
->nat
[ddnat
-1];
8066 void reset_dd_statistics_counters(gmx_domdec_t
*dd
)
8068 gmx_domdec_comm_t
*comm
;
8073 /* Reset all the statistics and counters for total run counting */
8074 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8076 comm
->sum_nat
[ddnat
-ddnatZONE
] = 0;
8080 comm
->load_step
= 0;
8083 clear_ivec(comm
->load_lim
);
8088 void print_dd_statistics(t_commrec
*cr
,t_inputrec
*ir
,FILE *fplog
)
8090 gmx_domdec_comm_t
*comm
;
8094 comm
= cr
->dd
->comm
;
8096 gmx_sumd(ddnatNR
-ddnatZONE
,comm
->sum_nat
,cr
);
8103 fprintf(fplog
,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8105 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8107 av
= comm
->sum_nat
[ddnat
-ddnatZONE
]/comm
->ndecomp
;
8112 " av. #atoms communicated per step for force: %d x %.1f\n",
8116 if (cr
->dd
->vsite_comm
)
8119 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8120 (EEL_PME(ir
->coulombtype
) || ir
->coulombtype
==eelEWALD
) ? 3 : 2,
8125 if (cr
->dd
->constraint_comm
)
8128 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
8129 1 + ir
->nLincsIter
,av
);
8133 gmx_incons(" Unknown type for DD statistics");
8136 fprintf(fplog
,"\n");
8138 if (comm
->bRecordLoad
&& EI_DYNAMICS(ir
->eI
))
8140 print_dd_load_av(fplog
,cr
->dd
);
8144 void dd_partition_system(FILE *fplog
,
8145 gmx_large_int_t step
,
8149 t_state
*state_global
,
8150 gmx_mtop_t
*top_global
,
8152 t_state
*state_local
,
8155 gmx_localtop_t
*top_local
,
8158 gmx_shellfc_t shellfc
,
8159 gmx_constr_t constr
,
8161 gmx_wallcycle_t wcycle
,
8165 gmx_domdec_comm_t
*comm
;
8166 gmx_ddbox_t ddbox
={0};
8168 gmx_large_int_t step_pcoupl
;
8169 rvec cell_ns_x0
,cell_ns_x1
;
8170 int i
,j
,n
,cg0
=0,ncg_home_old
=-1,nat_f_novirsum
;
8171 bool bBoxChanged
,bNStGlobalComm
,bDoDLB
,bCheckDLB
,bTurnOnDLB
,bLogLoad
;
8172 bool bRedist
,bSortCG
,bResortAll
;
8180 bBoxChanged
= (bMasterState
|| DEFORM(*ir
));
8181 if (ir
->epc
!= epcNO
)
8183 /* With nstcalcenery > 1 pressure coupling happens.
8184 * one step after calculating the energies.
8185 * Box scaling happens at the end of the MD step,
8186 * after the DD partitioning.
8187 * We therefore have to do DLB in the first partitioning
8188 * after an MD step where P-coupling occured.
8189 * We need to determine the last step in which p-coupling occurred.
8190 * MRS -- need to validate this for vv?
8192 n
= ir
->nstcalcenergy
;
8195 step_pcoupl
= step
- 1;
8199 step_pcoupl
= ((step
- 1)/n
)*n
+ 1;
8201 if (step_pcoupl
>= comm
->partition_step
)
8207 bNStGlobalComm
= (step
>= comm
->partition_step
+ nstglobalcomm
);
8209 if (!comm
->bDynLoadBal
)
8215 /* Should we do dynamic load balacing this step?
8216 * Since it requires (possibly expensive) global communication,
8217 * we might want to do DLB less frequently.
8219 if (bBoxChanged
|| ir
->epc
!= epcNO
)
8221 bDoDLB
= bBoxChanged
;
8225 bDoDLB
= bNStGlobalComm
;
8229 /* Check if we have recorded loads on the nodes */
8230 if (comm
->bRecordLoad
&& dd_load_count(comm
))
8232 if (comm
->eDLB
== edlbAUTO
&& !comm
->bDynLoadBal
)
8234 /* Check if we should use DLB at the second partitioning
8235 * and every 100 partitionings,
8236 * so the extra communication cost is negligible.
8238 n
= max(100,nstglobalcomm
);
8239 bCheckDLB
= (comm
->n_load_collect
== 0 ||
8240 comm
->n_load_have
% n
== n
-1);
8247 /* Print load every nstlog, first and last step to the log file */
8248 bLogLoad
= ((ir
->nstlog
> 0 && step
% ir
->nstlog
== 0) ||
8249 comm
->n_load_collect
== 0 ||
8250 (step
+ ir
->nstlist
> ir
->init_step
+ ir
->nsteps
));
8252 /* Avoid extra communication due to verbose screen output
8253 * when nstglobalcomm is set.
8255 if (bDoDLB
|| bLogLoad
|| bCheckDLB
||
8256 (bVerbose
&& (ir
->nstlist
== 0 || nstglobalcomm
<= ir
->nstlist
)))
8258 get_load_distribution(dd
,wcycle
);
8263 dd_print_load(fplog
,dd
,step
-1);
8267 dd_print_load_verbose(dd
);
8270 comm
->n_load_collect
++;
8273 /* Since the timings are node dependent, the master decides */
8277 (dd_force_imb_perf_loss(dd
) >= DD_PERF_LOSS
);
8280 fprintf(debug
,"step %s, imb loss %f\n",
8281 gmx_step_str(step
,sbuf
),
8282 dd_force_imb_perf_loss(dd
));
8285 dd_bcast(dd
,sizeof(bTurnOnDLB
),&bTurnOnDLB
);
8288 turn_on_dlb(fplog
,cr
,step
);
8293 comm
->n_load_have
++;
8296 cgs_gl
= &comm
->cgs_gl
;
8301 /* Clear the old state */
8302 clear_dd_indices(dd
,0,0);
8304 set_ddbox(dd
,bMasterState
,cr
,ir
,state_global
->box
,
8305 TRUE
,cgs_gl
,state_global
->x
,&ddbox
);
8307 get_cg_distribution(fplog
,step
,dd
,cgs_gl
,
8308 state_global
->box
,&ddbox
,state_global
->x
);
8310 dd_distribute_state(dd
,cgs_gl
,
8311 state_global
,state_local
,f
);
8313 dd_make_local_cgs(dd
,&top_local
->cgs
);
8315 if (dd
->ncg_home
> fr
->cg_nalloc
)
8317 dd_realloc_fr_cg(fr
,dd
->ncg_home
);
8319 calc_cgcm(fplog
,0,dd
->ncg_home
,
8320 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8322 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8324 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8328 else if (state_local
->ddp_count
!= dd
->ddp_count
)
8330 if (state_local
->ddp_count
> dd
->ddp_count
)
8332 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local
->ddp_count
,dd
->ddp_count
);
8335 if (state_local
->ddp_count_cg_gl
!= state_local
->ddp_count
)
8337 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local
->ddp_count_cg_gl
,state_local
->ddp_count
);
8340 /* Clear the old state */
8341 clear_dd_indices(dd
,0,0);
8343 /* Build the new indices */
8344 rebuild_cgindex(dd
,cgs_gl
->index
,state_local
);
8345 make_dd_indices(dd
,cgs_gl
->index
,0);
8347 /* Redetermine the cg COMs */
8348 calc_cgcm(fplog
,0,dd
->ncg_home
,
8349 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8351 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8353 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8355 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8356 TRUE
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8358 bRedist
= comm
->bDynLoadBal
;
8362 /* We have the full state, only redistribute the cgs */
8364 /* Clear the non-home indices */
8365 clear_dd_indices(dd
,dd
->ncg_home
,dd
->nat_home
);
8367 /* Avoid global communication for dim's without pbc and -gcom */
8368 if (!bNStGlobalComm
)
8370 copy_rvec(comm
->box0
,ddbox
.box0
);
8371 copy_rvec(comm
->box_size
,ddbox
.box_size
);
8373 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8374 bNStGlobalComm
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8379 /* For dim's without pbc and -gcom */
8380 copy_rvec(ddbox
.box0
,comm
->box0
);
8381 copy_rvec(ddbox
.box_size
,comm
->box_size
);
8383 set_dd_cell_sizes(dd
,&ddbox
,dynamic_dd_box(&ddbox
,ir
),bMasterState
,bDoDLB
,
8386 if (comm
->nstDDDumpGrid
> 0 && step
% comm
->nstDDDumpGrid
== 0)
8388 write_dd_grid_pdb("dd_grid",step
,dd
,state_local
->box
,&ddbox
);
8391 /* Check if we should sort the charge groups */
8392 if (comm
->nstSortCG
> 0)
8394 bSortCG
= (bMasterState
||
8395 (bRedist
&& (step
% comm
->nstSortCG
== 0)));
8402 ncg_home_old
= dd
->ncg_home
;
8406 cg0
= dd_redistribute_cg(fplog
,step
,dd
,ddbox
.tric_dir
,
8407 state_local
,f
,fr
,mdatoms
,
8411 get_nsgrid_boundaries(fr
->ns
.grid
,dd
,
8412 state_local
->box
,&ddbox
,&comm
->cell_x0
,&comm
->cell_x1
,
8413 dd
->ncg_home
,fr
->cg_cm
,
8414 cell_ns_x0
,cell_ns_x1
,&grid_density
);
8418 comm_dd_ns_cell_sizes(dd
,&ddbox
,cell_ns_x0
,cell_ns_x1
,step
);
8421 copy_ivec(fr
->ns
.grid
->n
,ncells_old
);
8422 grid_first(fplog
,fr
->ns
.grid
,dd
,&ddbox
,fr
->ePBC
,
8423 state_local
->box
,cell_ns_x0
,cell_ns_x1
,
8424 fr
->rlistlong
,grid_density
);
8425 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8426 copy_ivec(ddbox
.tric_dir
,comm
->tric_dir
);
8430 /* Sort the state on charge group position.
8431 * This enables exact restarts from this step.
8432 * It also improves performance by about 15% with larger numbers
8433 * of atoms per node.
8436 /* Fill the ns grid with the home cell,
8437 * so we can sort with the indices.
8439 set_zones_ncg_home(dd
);
8440 fill_grid(fplog
,&comm
->zones
,fr
->ns
.grid
,dd
->ncg_home
,
8441 0,dd
->ncg_home
,fr
->cg_cm
);
8443 /* Check if we can user the old order and ns grid cell indices
8444 * of the charge groups to sort the charge groups efficiently.
8446 bResortAll
= (bMasterState
||
8447 fr
->ns
.grid
->n
[XX
] != ncells_old
[XX
] ||
8448 fr
->ns
.grid
->n
[YY
] != ncells_old
[YY
] ||
8449 fr
->ns
.grid
->n
[ZZ
] != ncells_old
[ZZ
]);
8453 fprintf(debug
,"Step %s, sorting the %d home charge groups\n",
8454 gmx_step_str(step
,sbuf
),dd
->ncg_home
);
8456 dd_sort_state(dd
,ir
->ePBC
,fr
->cg_cm
,fr
,state_local
,
8457 bResortAll
? -1 : ncg_home_old
);
8458 /* Rebuild all the indices */
8460 ga2la_clear(dd
->ga2la
);
8463 /* Setup up the communication and communicate the coordinates */
8464 setup_dd_communication(dd
,state_local
->box
,&ddbox
,fr
);
8466 /* Set the indices */
8467 make_dd_indices(dd
,cgs_gl
->index
,cg0
);
8469 /* Set the charge group boundaries for neighbor searching */
8470 set_cg_boundaries(&comm
->zones
);
8473 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8474 -1,state_local->x,state_local->box);
8477 /* Extract a local topology from the global topology */
8478 for(i
=0; i
<dd
->ndim
; i
++)
8480 np
[dd
->dim
[i
]] = comm
->cd
[i
].np
;
8482 dd_make_local_top(fplog
,dd
,&comm
->zones
,dd
->npbcdim
,state_local
->box
,
8483 comm
->cellsize_min
,np
,
8484 fr
,vsite
,top_global
,top_local
);
8486 /* Set up the special atom communication */
8487 n
= comm
->nat
[ddnatZONE
];
8488 for(i
=ddnatZONE
+1; i
<ddnatNR
; i
++)
8493 if (vsite
&& vsite
->n_intercg_vsite
)
8495 n
= dd_make_local_vsites(dd
,n
,top_local
->idef
.il
);
8499 if (dd
->bInterCGcons
)
8501 /* Only for inter-cg constraints we need special code */
8502 n
= dd_make_local_constraints(dd
,n
,top_global
,
8503 constr
,ir
->nProjOrder
,
8504 &top_local
->idef
.il
[F_CONSTR
]);
8508 gmx_incons("Unknown special atom type setup");
8513 /* Make space for the extra coordinates for virtual site
8514 * or constraint communication.
8516 state_local
->natoms
= comm
->nat
[ddnatNR
-1];
8517 if (state_local
->natoms
> state_local
->nalloc
)
8519 dd_realloc_state(state_local
,f
,state_local
->natoms
);
8522 if (fr
->bF_NoVirSum
)
8524 if (vsite
&& vsite
->n_intercg_vsite
)
8526 nat_f_novirsum
= comm
->nat
[ddnatVSITE
];
8530 if (EEL_FULL(ir
->coulombtype
) && dd
->n_intercg_excl
> 0)
8532 nat_f_novirsum
= dd
->nat_tot
;
8536 nat_f_novirsum
= dd
->nat_home
;
8545 /* Set the number of atoms required for the force calculation.
8546 * Forces need to be constrained when using a twin-range setup
8547 * or with energy minimization. For simple simulations we could
8548 * avoid some allocation, zeroing and copying, but this is
8549 * probably not worth the complications ande checking.
8551 forcerec_set_ranges(fr
,dd
->ncg_home
,dd
->ncg_tot
,
8552 dd
->nat_tot
,comm
->nat
[ddnatCON
],nat_f_novirsum
);
8554 /* We make the all mdatoms up to nat_tot_con.
8555 * We could save some work by only setting invmass
8556 * between nat_tot and nat_tot_con.
8558 /* This call also sets the new number of home particles to dd->nat_home */
8559 atoms2md(top_global
,ir
,
8560 comm
->nat
[ddnatCON
],dd
->gatindex
,0,dd
->nat_home
,mdatoms
);
8562 /* Now we have the charges we can sort the FE interactions */
8563 dd_sort_local_top(dd
,mdatoms
,top_local
);
8567 /* Make the local shell stuff, currently no communication is done */
8568 make_local_shells(cr
,mdatoms
,shellfc
);
8571 if (ir
->implicit_solvent
)
8573 make_local_gb(cr
,fr
->born
,ir
->gb_algorithm
);
8576 if (!(cr
->duty
& DUTY_PME
))
8578 /* Send the charges to our PME only node */
8579 gmx_pme_send_q(cr
,mdatoms
->nChargePerturbed
,
8580 mdatoms
->chargeA
,mdatoms
->chargeB
,
8581 dd_pme_maxshift_x(dd
),dd_pme_maxshift_y(dd
));
8586 set_constraints(constr
,top_local
,ir
,mdatoms
,cr
);
8589 if (ir
->ePull
!= epullNO
)
8591 /* Update the local pull groups */
8592 dd_make_local_pull_groups(dd
,ir
->pull
,mdatoms
);
8595 add_dd_statistics(dd
);
8597 /* Make sure we only count the cycles for this DD partitioning */
8598 clear_dd_cycle_counts(dd
);
8600 /* Because the order of the atoms might have changed since
8601 * the last vsite construction, we need to communicate the constructing
8602 * atom coordinates again (for spreading the forces this MD step).
8604 dd_move_x_vsites(dd
,state_local
->box
,state_local
->x
);
8606 if (comm
->nstDDDump
> 0 && step
% comm
->nstDDDump
== 0)
8608 dd_move_x(dd
,state_local
->box
,state_local
->x
);
8609 write_dd_pdb("dd_dump",step
,"dump",top_global
,cr
,
8610 -1,state_local
->x
,state_local
->box
);
8613 /* Store the partitioning step */
8614 comm
->partition_step
= step
;
8616 /* Increase the DD partitioning counter */
8618 /* The state currently matches this DD partitioning count, store it */
8619 state_local
->ddp_count
= dd
->ddp_count
;
8622 /* The DD master node knows the complete cg distribution,
8623 * store the count so we can possibly skip the cg info communication.
8625 comm
->master_cg_ddp_count
= (bSortCG
? 0 : dd
->ddp_count
);
8628 if (comm
->DD_debug
> 0)
8630 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8631 check_index_consistency(dd
,top_global
->natoms
,ncg_mtop(top_global
),
8632 "after partitioning");