1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
30 #include "gmx_fatal.h"
31 #include "gmx_fatal_collective.h"
34 #include "domdec_network.h"
37 #include "chargegroup.h"
46 #include "pull_rotation.h"
47 #include "gmx_wallcycle.h"
51 #include "mtop_util.h"
53 #include "gmx_ga2la.h"
55 #include "nbnxn_search.h"
57 #include "gmx_omp_nthreads.h"
66 #define DDRANK(dd,rank) (rank)
67 #define DDMASTERRANK(dd) (dd->masterrank)
69 typedef struct gmx_domdec_master
71 /* The cell boundaries */
73 /* The global charge group division */
74 int *ncg
; /* Number of home charge groups for each node */
75 int *index
; /* Index of nnodes+1 into cg */
76 int *cg
; /* Global charge group index */
77 int *nat
; /* Number of home atoms for each node. */
78 int *ibuf
; /* Buffer for communication */
79 rvec
*vbuf
; /* Buffer for state scattering and gathering */
80 } gmx_domdec_master_t
;
84 /* The numbers of charge groups to send and receive for each cell
85 * that requires communication, the last entry contains the total
86 * number of atoms that needs to be communicated.
88 int nsend
[DD_MAXIZONE
+2];
89 int nrecv
[DD_MAXIZONE
+2];
90 /* The charge groups to send */
93 /* The atom range for non-in-place communication */
94 int cell2at0
[DD_MAXIZONE
];
95 int cell2at1
[DD_MAXIZONE
];
100 int np
; /* Number of grid pulses in this dimension */
101 int np_dlb
; /* For dlb, for use with edlbAUTO */
102 gmx_domdec_ind_t
*ind
; /* The indices to communicate, size np */
104 gmx_bool bInPlace
; /* Can we communicate in place? */
105 } gmx_domdec_comm_dim_t
;
109 gmx_bool
*bCellMin
; /* Temp. var.: is this cell size at the limit */
110 real
*cell_f
; /* State var.: cell boundaries, box relative */
111 real
*old_cell_f
; /* Temp. var.: old cell size */
112 real
*cell_f_max0
; /* State var.: max lower boundary, incl neighbors */
113 real
*cell_f_min1
; /* State var.: min upper boundary, incl neighbors */
114 real
*bound_min
; /* Temp. var.: lower limit for cell boundary */
115 real
*bound_max
; /* Temp. var.: upper limit for cell boundary */
116 gmx_bool bLimited
; /* State var.: is DLB limited in this dim and row */
117 real
*buf_ncd
; /* Temp. var. */
120 #define DD_NLOAD_MAX 9
122 /* Here floats are accurate enough, since these variables
123 * only influence the load balancing, not the actual MD results.
150 gmx_cgsort_t
*sort_new
;
162 /* This enum determines the order of the coordinates.
163 * ddnatHOME and ddnatZONE should be first and second,
164 * the others can be ordered as wanted.
166 enum { ddnatHOME
, ddnatZONE
, ddnatVSITE
, ddnatCON
, ddnatNR
};
168 enum { edlbAUTO
, edlbNO
, edlbYES
, edlbNR
};
169 const char *edlb_names
[edlbNR
] = { "auto", "no", "yes" };
173 int dim
; /* The dimension */
174 gmx_bool dim_match
;/* Tells if DD and PME dims match */
175 int nslab
; /* The number of PME slabs in this dimension */
176 real
*slb_dim_f
; /* Cell sizes for determining the PME comm. with SLB */
177 int *pp_min
; /* The minimum pp node location, size nslab */
178 int *pp_max
; /* The maximum pp node location,size nslab */
179 int maxshift
; /* The maximum shift for coordinate redistribution in PME */
184 real min0
; /* The minimum bottom of this zone */
185 real max1
; /* The maximum top of this zone */
186 real min1
; /* The minimum top of this zone */
187 real mch0
; /* The maximum bottom communicaton height for this zone */
188 real mch1
; /* The maximum top communicaton height for this zone */
189 real p1_0
; /* The bottom value of the first cell in this zone */
190 real p1_1
; /* The top value of the first cell in this zone */
195 gmx_domdec_ind_t ind
;
202 } dd_comm_setup_work_t
;
204 typedef struct gmx_domdec_comm
206 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
207 * unless stated otherwise.
210 /* The number of decomposition dimensions for PME, 0: no PME */
212 /* The number of nodes doing PME (PP/PME or only PME) */
216 /* The communication setup including the PME only nodes */
217 gmx_bool bCartesianPP_PME
;
220 int *pmenodes
; /* size npmenodes */
221 int *ddindex2simnodeid
; /* size npmenodes, only with bCartesianPP
222 * but with bCartesianPP_PME */
223 gmx_ddpme_t ddpme
[2];
225 /* The DD particle-particle nodes only */
226 gmx_bool bCartesianPP
;
227 int *ddindex2ddnodeid
; /* size npmenode, only with bCartesianPP_PME */
229 /* The global charge groups */
232 /* Should we sort the cgs */
234 gmx_domdec_sort_t
*sort
;
236 /* Are there charge groups? */
239 /* Are there bonded and multi-body interactions between charge groups? */
240 gmx_bool bInterCGBondeds
;
241 gmx_bool bInterCGMultiBody
;
243 /* Data for the optional bonded interaction atom communication range */
250 /* Are we actually using DLB? */
251 gmx_bool bDynLoadBal
;
253 /* Cell sizes for static load balancing, first index cartesian */
256 /* The width of the communicated boundaries */
259 /* The minimum cell size (including triclinic correction) */
261 /* For dlb, for use with edlbAUTO */
262 rvec cellsize_min_dlb
;
263 /* The lower limit for the DD cell size with DLB */
265 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
266 gmx_bool bVacDLBNoLimit
;
268 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
270 /* box0 and box_size are required with dim's without pbc and -gcom */
274 /* The cell boundaries */
278 /* The old location of the cell boundaries, to check cg displacements */
282 /* The communication setup and charge group boundaries for the zones */
283 gmx_domdec_zones_t zones
;
285 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
286 * cell boundaries of neighboring cells for dynamic load balancing.
288 gmx_ddzone_t zone_d1
[2];
289 gmx_ddzone_t zone_d2
[2][2];
291 /* The coordinate/force communication setup and indices */
292 gmx_domdec_comm_dim_t cd
[DIM
];
293 /* The maximum number of cells to communicate with in one dimension */
296 /* Which cg distribution is stored on the master node */
297 int master_cg_ddp_count
;
299 /* The number of cg's received from the direct neighbors */
300 int zone_ncg1
[DD_MAXZONE
];
302 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
305 /* Array for signalling if atoms have moved to another domain */
309 /* Communication buffer for general use */
313 /* Communication buffer for general use */
316 /* Temporary storage for thread parallel communication setup */
318 dd_comm_setup_work_t
*dth
;
320 /* Communication buffers only used with multiple grid pulses */
325 /* Communication buffers for local redistribution */
327 int cggl_flag_nalloc
[DIM
*2];
329 int cgcm_state_nalloc
[DIM
*2];
331 /* Cell sizes for dynamic load balancing */
332 gmx_domdec_root_t
**root
;
336 real cell_f_max0
[DIM
];
337 real cell_f_min1
[DIM
];
339 /* Stuff for load communication */
340 gmx_bool bRecordLoad
;
341 gmx_domdec_load_t
*load
;
343 MPI_Comm
*mpi_comm_load
;
346 /* Maximum DLB scaling per load balancing step in percent */
350 float cycl
[ddCyclNr
];
351 int cycl_n
[ddCyclNr
];
352 float cycl_max
[ddCyclNr
];
353 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
357 /* Have often have did we have load measurements */
359 /* Have often have we collected the load measurements */
363 double sum_nat
[ddnatNR
-ddnatZONE
];
373 /* The last partition step */
374 gmx_large_int_t partition_step
;
382 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
385 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
386 #define DD_FLAG_NRCG 65535
387 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
388 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
390 /* Zone permutation required to obtain consecutive charge groups
391 * for neighbor searching.
393 static const int zone_perm
[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
395 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
396 * components see only j zones with that component 0.
399 /* The DD zone order */
400 static const ivec dd_zo
[DD_MAXZONE
] =
401 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
406 static const ivec dd_zp3
[dd_zp3n
] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
411 static const ivec dd_zp2
[dd_zp2n
] = {{0,0,4},{1,3,4}};
416 static const ivec dd_zp1
[dd_zp1n
] = {{0,0,2}};
418 /* Factors used to avoid problems due to rounding issues */
419 #define DD_CELL_MARGIN 1.0001
420 #define DD_CELL_MARGIN2 1.00005
421 /* Factor to account for pressure scaling during nstlist steps */
422 #define DD_PRES_SCALE_MARGIN 1.02
424 /* Allowed performance loss before we DLB or warn */
425 #define DD_PERF_LOSS 0.05
427 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
429 /* Use separate MPI send and receive commands
430 * when nnodes <= GMX_DD_NNODES_SENDRECV.
431 * This saves memory (and some copying for small nnodes).
432 * For high parallelization scatter and gather calls are used.
434 #define GMX_DD_NNODES_SENDRECV 4
438 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
440 static void index2xyz(ivec nc,int ind,ivec xyz)
442 xyz[XX] = ind % nc[XX];
443 xyz[YY] = (ind / nc[XX]) % nc[YY];
444 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
448 /* This order is required to minimize the coordinate communication in PME
449 * which uses decomposition in the x direction.
451 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
453 static void ddindex2xyz(ivec nc
,int ind
,ivec xyz
)
455 xyz
[XX
] = ind
/ (nc
[YY
]*nc
[ZZ
]);
456 xyz
[YY
] = (ind
/ nc
[ZZ
]) % nc
[YY
];
457 xyz
[ZZ
] = ind
% nc
[ZZ
];
460 static int ddcoord2ddnodeid(gmx_domdec_t
*dd
,ivec c
)
465 ddindex
= dd_index(dd
->nc
,c
);
466 if (dd
->comm
->bCartesianPP_PME
)
468 ddnodeid
= dd
->comm
->ddindex2ddnodeid
[ddindex
];
470 else if (dd
->comm
->bCartesianPP
)
473 MPI_Cart_rank(dd
->mpi_comm_all
,c
,&ddnodeid
);
484 static gmx_bool
dynamic_dd_box(gmx_ddbox_t
*ddbox
,t_inputrec
*ir
)
486 return (ddbox
->nboundeddim
< DIM
|| DYNAMIC_BOX(*ir
));
489 int ddglatnr(gmx_domdec_t
*dd
,int i
)
499 if (i
>= dd
->comm
->nat
[ddnatNR
-1])
501 gmx_fatal(FARGS
,"glatnr called with %d, which is larger than the local number of atoms (%d)",i
,dd
->comm
->nat
[ddnatNR
-1]);
503 atnr
= dd
->gatindex
[i
] + 1;
509 t_block
*dd_charge_groups_global(gmx_domdec_t
*dd
)
511 return &dd
->comm
->cgs_gl
;
514 static void vec_rvec_init(vec_rvec_t
*v
)
520 static void vec_rvec_check_alloc(vec_rvec_t
*v
,int n
)
524 v
->nalloc
= over_alloc_dd(n
);
525 srenew(v
->v
,v
->nalloc
);
529 void dd_store_state(gmx_domdec_t
*dd
,t_state
*state
)
533 if (state
->ddp_count
!= dd
->ddp_count
)
535 gmx_incons("The state does not the domain decomposition state");
538 state
->ncg_gl
= dd
->ncg_home
;
539 if (state
->ncg_gl
> state
->cg_gl_nalloc
)
541 state
->cg_gl_nalloc
= over_alloc_dd(state
->ncg_gl
);
542 srenew(state
->cg_gl
,state
->cg_gl_nalloc
);
544 for(i
=0; i
<state
->ncg_gl
; i
++)
546 state
->cg_gl
[i
] = dd
->index_gl
[i
];
549 state
->ddp_count_cg_gl
= dd
->ddp_count
;
552 gmx_domdec_zones_t
*domdec_zones(gmx_domdec_t
*dd
)
554 return &dd
->comm
->zones
;
557 void dd_get_ns_ranges(gmx_domdec_t
*dd
,int icg
,
558 int *jcg0
,int *jcg1
,ivec shift0
,ivec shift1
)
560 gmx_domdec_zones_t
*zones
;
563 zones
= &dd
->comm
->zones
;
566 while (icg
>= zones
->izone
[izone
].cg1
)
575 else if (izone
< zones
->nizone
)
577 *jcg0
= zones
->izone
[izone
].jcg0
;
581 gmx_fatal(FARGS
,"DD icg %d out of range: izone (%d) >= nizone (%d)",
582 icg
,izone
,zones
->nizone
);
585 *jcg1
= zones
->izone
[izone
].jcg1
;
587 for(d
=0; d
<dd
->ndim
; d
++)
590 shift0
[dim
] = zones
->izone
[izone
].shift0
[dim
];
591 shift1
[dim
] = zones
->izone
[izone
].shift1
[dim
];
592 if (dd
->comm
->tric_dir
[dim
] || (dd
->bGridJump
&& d
> 0))
594 /* A conservative approach, this can be optimized */
601 int dd_natoms_vsite(gmx_domdec_t
*dd
)
603 return dd
->comm
->nat
[ddnatVSITE
];
606 void dd_get_constraint_range(gmx_domdec_t
*dd
,int *at_start
,int *at_end
)
608 *at_start
= dd
->comm
->nat
[ddnatCON
-1];
609 *at_end
= dd
->comm
->nat
[ddnatCON
];
612 void dd_move_x(gmx_domdec_t
*dd
,matrix box
,rvec x
[])
614 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
616 gmx_domdec_comm_t
*comm
;
617 gmx_domdec_comm_dim_t
*cd
;
618 gmx_domdec_ind_t
*ind
;
619 rvec shift
={0,0,0},*buf
,*rbuf
;
620 gmx_bool bPBC
,bScrew
;
624 cgindex
= dd
->cgindex
;
629 nat_tot
= dd
->nat_home
;
630 for(d
=0; d
<dd
->ndim
; d
++)
632 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
633 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
636 copy_rvec(box
[dd
->dim
[d
]],shift
);
639 for(p
=0; p
<cd
->np
; p
++)
646 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
648 at0
= cgindex
[index
[i
]];
649 at1
= cgindex
[index
[i
]+1];
650 for(j
=at0
; j
<at1
; j
++)
652 copy_rvec(x
[j
],buf
[n
]);
659 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
661 at0
= cgindex
[index
[i
]];
662 at1
= cgindex
[index
[i
]+1];
663 for(j
=at0
; j
<at1
; j
++)
665 /* We need to shift the coordinates */
666 rvec_add(x
[j
],shift
,buf
[n
]);
673 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
675 at0
= cgindex
[index
[i
]];
676 at1
= cgindex
[index
[i
]+1];
677 for(j
=at0
; j
<at1
; j
++)
680 buf
[n
][XX
] = x
[j
][XX
] + shift
[XX
];
682 * This operation requires a special shift force
683 * treatment, which is performed in calc_vir.
685 buf
[n
][YY
] = box
[YY
][YY
] - x
[j
][YY
];
686 buf
[n
][ZZ
] = box
[ZZ
][ZZ
] - x
[j
][ZZ
];
698 rbuf
= comm
->vbuf2
.v
;
700 /* Send and receive the coordinates */
701 dd_sendrecv_rvec(dd
, d
, dddirBackward
,
702 buf
, ind
->nsend
[nzone
+1],
703 rbuf
, ind
->nrecv
[nzone
+1]);
707 for(zone
=0; zone
<nzone
; zone
++)
709 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
711 copy_rvec(rbuf
[j
],x
[i
]);
716 nat_tot
+= ind
->nrecv
[nzone
+1];
722 void dd_move_f(gmx_domdec_t
*dd
,rvec f
[],rvec
*fshift
)
724 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
726 gmx_domdec_comm_t
*comm
;
727 gmx_domdec_comm_dim_t
*cd
;
728 gmx_domdec_ind_t
*ind
;
732 gmx_bool bPBC
,bScrew
;
736 cgindex
= dd
->cgindex
;
741 nzone
= comm
->zones
.n
/2;
742 nat_tot
= dd
->nat_tot
;
743 for(d
=dd
->ndim
-1; d
>=0; d
--)
745 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
746 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
747 if (fshift
== NULL
&& !bScrew
)
751 /* Determine which shift vector we need */
757 for(p
=cd
->np
-1; p
>=0; p
--) {
759 nat_tot
-= ind
->nrecv
[nzone
+1];
766 sbuf
= comm
->vbuf2
.v
;
768 for(zone
=0; zone
<nzone
; zone
++)
770 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
772 copy_rvec(f
[i
],sbuf
[j
]);
777 /* Communicate the forces */
778 dd_sendrecv_rvec(dd
, d
, dddirForward
,
779 sbuf
, ind
->nrecv
[nzone
+1],
780 buf
, ind
->nsend
[nzone
+1]);
782 /* Add the received forces */
786 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
788 at0
= cgindex
[index
[i
]];
789 at1
= cgindex
[index
[i
]+1];
790 for(j
=at0
; j
<at1
; j
++)
792 rvec_inc(f
[j
],buf
[n
]);
799 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
801 at0
= cgindex
[index
[i
]];
802 at1
= cgindex
[index
[i
]+1];
803 for(j
=at0
; j
<at1
; j
++)
805 rvec_inc(f
[j
],buf
[n
]);
806 /* Add this force to the shift force */
807 rvec_inc(fshift
[is
],buf
[n
]);
814 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
816 at0
= cgindex
[index
[i
]];
817 at1
= cgindex
[index
[i
]+1];
818 for(j
=at0
; j
<at1
; j
++)
820 /* Rotate the force */
821 f
[j
][XX
] += buf
[n
][XX
];
822 f
[j
][YY
] -= buf
[n
][YY
];
823 f
[j
][ZZ
] -= buf
[n
][ZZ
];
826 /* Add this force to the shift force */
827 rvec_inc(fshift
[is
],buf
[n
]);
838 void dd_atom_spread_real(gmx_domdec_t
*dd
,real v
[])
840 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
842 gmx_domdec_comm_t
*comm
;
843 gmx_domdec_comm_dim_t
*cd
;
844 gmx_domdec_ind_t
*ind
;
849 cgindex
= dd
->cgindex
;
851 buf
= &comm
->vbuf
.v
[0][0];
854 nat_tot
= dd
->nat_home
;
855 for(d
=0; d
<dd
->ndim
; d
++)
858 for(p
=0; p
<cd
->np
; p
++)
863 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
865 at0
= cgindex
[index
[i
]];
866 at1
= cgindex
[index
[i
]+1];
867 for(j
=at0
; j
<at1
; j
++)
880 rbuf
= &comm
->vbuf2
.v
[0][0];
882 /* Send and receive the coordinates */
883 dd_sendrecv_real(dd
, d
, dddirBackward
,
884 buf
, ind
->nsend
[nzone
+1],
885 rbuf
, ind
->nrecv
[nzone
+1]);
889 for(zone
=0; zone
<nzone
; zone
++)
891 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
898 nat_tot
+= ind
->nrecv
[nzone
+1];
904 void dd_atom_sum_real(gmx_domdec_t
*dd
,real v
[])
906 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
908 gmx_domdec_comm_t
*comm
;
909 gmx_domdec_comm_dim_t
*cd
;
910 gmx_domdec_ind_t
*ind
;
915 cgindex
= dd
->cgindex
;
917 buf
= &comm
->vbuf
.v
[0][0];
920 nzone
= comm
->zones
.n
/2;
921 nat_tot
= dd
->nat_tot
;
922 for(d
=dd
->ndim
-1; d
>=0; d
--)
925 for(p
=cd
->np
-1; p
>=0; p
--) {
927 nat_tot
-= ind
->nrecv
[nzone
+1];
934 sbuf
= &comm
->vbuf2
.v
[0][0];
936 for(zone
=0; zone
<nzone
; zone
++)
938 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
945 /* Communicate the forces */
946 dd_sendrecv_real(dd
, d
, dddirForward
,
947 sbuf
, ind
->nrecv
[nzone
+1],
948 buf
, ind
->nsend
[nzone
+1]);
950 /* Add the received forces */
952 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
954 at0
= cgindex
[index
[i
]];
955 at1
= cgindex
[index
[i
]+1];
956 for(j
=at0
; j
<at1
; j
++)
967 static void print_ddzone(FILE *fp
,int d
,int i
,int j
,gmx_ddzone_t
*zone
)
969 fprintf(fp
,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
971 zone
->min0
,zone
->max1
,
972 zone
->mch0
,zone
->mch0
,
973 zone
->p1_0
,zone
->p1_1
);
977 #define DDZONECOMM_MAXZONE 5
978 #define DDZONECOMM_BUFSIZE 3
980 static void dd_sendrecv_ddzone(const gmx_domdec_t
*dd
,
981 int ddimind
,int direction
,
982 gmx_ddzone_t
*buf_s
,int n_s
,
983 gmx_ddzone_t
*buf_r
,int n_r
)
985 #define ZBS DDZONECOMM_BUFSIZE
986 rvec vbuf_s
[DDZONECOMM_MAXZONE
*ZBS
];
987 rvec vbuf_r
[DDZONECOMM_MAXZONE
*ZBS
];
992 vbuf_s
[i
*ZBS
][0] = buf_s
[i
].min0
;
993 vbuf_s
[i
*ZBS
][1] = buf_s
[i
].max1
;
994 vbuf_s
[i
*ZBS
][2] = buf_s
[i
].min1
;
995 vbuf_s
[i
*ZBS
+1][0] = buf_s
[i
].mch0
;
996 vbuf_s
[i
*ZBS
+1][1] = buf_s
[i
].mch1
;
997 vbuf_s
[i
*ZBS
+1][2] = 0;
998 vbuf_s
[i
*ZBS
+2][0] = buf_s
[i
].p1_0
;
999 vbuf_s
[i
*ZBS
+2][1] = buf_s
[i
].p1_1
;
1000 vbuf_s
[i
*ZBS
+2][2] = 0;
1003 dd_sendrecv_rvec(dd
, ddimind
, direction
,
1007 for(i
=0; i
<n_r
; i
++)
1009 buf_r
[i
].min0
= vbuf_r
[i
*ZBS
][0];
1010 buf_r
[i
].max1
= vbuf_r
[i
*ZBS
][1];
1011 buf_r
[i
].min1
= vbuf_r
[i
*ZBS
][2];
1012 buf_r
[i
].mch0
= vbuf_r
[i
*ZBS
+1][0];
1013 buf_r
[i
].mch1
= vbuf_r
[i
*ZBS
+1][1];
1014 buf_r
[i
].p1_0
= vbuf_r
[i
*ZBS
+2][0];
1015 buf_r
[i
].p1_1
= vbuf_r
[i
*ZBS
+2][1];
1021 static void dd_move_cellx(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
1022 rvec cell_ns_x0
,rvec cell_ns_x1
)
1024 int d
,d1
,dim
,dim1
,pos
,buf_size
,i
,j
,k
,p
,npulse
,npulse_min
;
1026 gmx_ddzone_t buf_s
[DDZONECOMM_MAXZONE
];
1027 gmx_ddzone_t buf_r
[DDZONECOMM_MAXZONE
];
1028 gmx_ddzone_t buf_e
[DDZONECOMM_MAXZONE
];
1029 rvec extr_s
[2],extr_r
[2];
1031 real dist_d
,c
=0,det
;
1032 gmx_domdec_comm_t
*comm
;
1037 for(d
=1; d
<dd
->ndim
; d
++)
1040 zp
= (d
== 1) ? &comm
->zone_d1
[0] : &comm
->zone_d2
[0][0];
1041 zp
->min0
= cell_ns_x0
[dim
];
1042 zp
->max1
= cell_ns_x1
[dim
];
1043 zp
->min1
= cell_ns_x1
[dim
];
1044 zp
->mch0
= cell_ns_x0
[dim
];
1045 zp
->mch1
= cell_ns_x1
[dim
];
1046 zp
->p1_0
= cell_ns_x0
[dim
];
1047 zp
->p1_1
= cell_ns_x1
[dim
];
1050 for(d
=dd
->ndim
-2; d
>=0; d
--)
1053 bPBC
= (dim
< ddbox
->npbcdim
);
1055 /* Use an rvec to store two reals */
1056 extr_s
[d
][0] = comm
->cell_f0
[d
+1];
1057 extr_s
[d
][1] = comm
->cell_f1
[d
+1];
1058 extr_s
[d
][2] = comm
->cell_f1
[d
+1];
1061 /* Store the extremes in the backward sending buffer,
1062 * so the get updated separately from the forward communication.
1064 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1066 /* We invert the order to be able to use the same loop for buf_e */
1067 buf_s
[pos
].min0
= extr_s
[d1
][1];
1068 buf_s
[pos
].max1
= extr_s
[d1
][0];
1069 buf_s
[pos
].min1
= extr_s
[d1
][2];
1070 buf_s
[pos
].mch0
= 0;
1071 buf_s
[pos
].mch1
= 0;
1072 /* Store the cell corner of the dimension we communicate along */
1073 buf_s
[pos
].p1_0
= comm
->cell_x0
[dim
];
1074 buf_s
[pos
].p1_1
= 0;
1078 buf_s
[pos
] = (dd
->ndim
== 2) ? comm
->zone_d1
[0] : comm
->zone_d2
[0][0];
1081 if (dd
->ndim
== 3 && d
== 0)
1083 buf_s
[pos
] = comm
->zone_d2
[0][1];
1085 buf_s
[pos
] = comm
->zone_d1
[0];
1089 /* We only need to communicate the extremes
1090 * in the forward direction
1092 npulse
= comm
->cd
[d
].np
;
1095 /* Take the minimum to avoid double communication */
1096 npulse_min
= min(npulse
,dd
->nc
[dim
]-1-npulse
);
1100 /* Without PBC we should really not communicate over
1101 * the boundaries, but implementing that complicates
1102 * the communication setup and therefore we simply
1103 * do all communication, but ignore some data.
1105 npulse_min
= npulse
;
1107 for(p
=0; p
<npulse_min
; p
++)
1109 /* Communicate the extremes forward */
1110 bUse
= (bPBC
|| dd
->ci
[dim
] > 0);
1112 dd_sendrecv_rvec(dd
, d
, dddirForward
,
1113 extr_s
+d
, dd
->ndim
-d
-1,
1114 extr_r
+d
, dd
->ndim
-d
-1);
1118 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1120 extr_s
[d1
][0] = max(extr_s
[d1
][0],extr_r
[d1
][0]);
1121 extr_s
[d1
][1] = min(extr_s
[d1
][1],extr_r
[d1
][1]);
1122 extr_s
[d1
][2] = min(extr_s
[d1
][2],extr_r
[d1
][2]);
1128 for(p
=0; p
<npulse
; p
++)
1130 /* Communicate all the zone information backward */
1131 bUse
= (bPBC
|| dd
->ci
[dim
] < dd
->nc
[dim
] - 1);
1133 dd_sendrecv_ddzone(dd
, d
, dddirBackward
,
1140 for(d1
=d
+1; d1
<dd
->ndim
; d1
++)
1142 /* Determine the decrease of maximum required
1143 * communication height along d1 due to the distance along d,
1144 * this avoids a lot of useless atom communication.
1146 dist_d
= comm
->cell_x1
[dim
] - buf_r
[0].p1_0
;
1148 if (ddbox
->tric_dir
[dim
])
1150 /* c is the off-diagonal coupling between the cell planes
1151 * along directions d and d1.
1153 c
= ddbox
->v
[dim
][dd
->dim
[d1
]][dim
];
1159 det
= (1 + c
*c
)*comm
->cutoff
*comm
->cutoff
- dist_d
*dist_d
;
1162 dh
[d1
] = comm
->cutoff
- (c
*dist_d
+ sqrt(det
))/(1 + c
*c
);
1166 /* A negative value signals out of range */
1172 /* Accumulate the extremes over all pulses */
1173 for(i
=0; i
<buf_size
; i
++)
1177 buf_e
[i
] = buf_r
[i
];
1183 buf_e
[i
].min0
= min(buf_e
[i
].min0
,buf_r
[i
].min0
);
1184 buf_e
[i
].max1
= max(buf_e
[i
].max1
,buf_r
[i
].max1
);
1185 buf_e
[i
].min1
= min(buf_e
[i
].min1
,buf_r
[i
].min1
);
1188 if (dd
->ndim
== 3 && d
== 0 && i
== buf_size
- 1)
1196 if (bUse
&& dh
[d1
] >= 0)
1198 buf_e
[i
].mch0
= max(buf_e
[i
].mch0
,buf_r
[i
].mch0
-dh
[d1
]);
1199 buf_e
[i
].mch1
= max(buf_e
[i
].mch1
,buf_r
[i
].mch1
-dh
[d1
]);
1202 /* Copy the received buffer to the send buffer,
1203 * to pass the data through with the next pulse.
1205 buf_s
[i
] = buf_r
[i
];
1207 if (((bPBC
|| dd
->ci
[dim
]+npulse
< dd
->nc
[dim
]) && p
== npulse
-1) ||
1208 (!bPBC
&& dd
->ci
[dim
]+1+p
== dd
->nc
[dim
]-1))
1210 /* Store the extremes */
1213 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1215 extr_s
[d1
][1] = min(extr_s
[d1
][1],buf_e
[pos
].min0
);
1216 extr_s
[d1
][0] = max(extr_s
[d1
][0],buf_e
[pos
].max1
);
1217 extr_s
[d1
][2] = min(extr_s
[d1
][2],buf_e
[pos
].min1
);
1221 if (d
== 1 || (d
== 0 && dd
->ndim
== 3))
1225 comm
->zone_d2
[1-d
][i
] = buf_e
[pos
];
1231 comm
->zone_d1
[1] = buf_e
[pos
];
1245 print_ddzone(debug
,1,i
,0,&comm
->zone_d1
[i
]);
1247 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d1
[i
].min0
);
1248 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d1
[i
].max1
);
1260 print_ddzone(debug
,2,i
,j
,&comm
->zone_d2
[i
][j
]);
1262 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d2
[i
][j
].min0
);
1263 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d2
[i
][j
].max1
);
1267 for(d
=1; d
<dd
->ndim
; d
++)
1269 comm
->cell_f_max0
[d
] = extr_s
[d
-1][0];
1270 comm
->cell_f_min1
[d
] = extr_s
[d
-1][1];
1273 fprintf(debug
,"Cell fraction d %d, max0 %f, min1 %f\n",
1274 d
,comm
->cell_f_max0
[d
],comm
->cell_f_min1
[d
]);
1279 static void dd_collect_cg(gmx_domdec_t
*dd
,
1280 t_state
*state_local
)
1282 gmx_domdec_master_t
*ma
=NULL
;
1283 int buf2
[2],*ibuf
,i
,ncg_home
=0,*cg
=NULL
,nat_home
=0;
1286 if (state_local
->ddp_count
== dd
->comm
->master_cg_ddp_count
)
1288 /* The master has the correct distribution */
1292 if (state_local
->ddp_count
== dd
->ddp_count
)
1294 ncg_home
= dd
->ncg_home
;
1296 nat_home
= dd
->nat_home
;
1298 else if (state_local
->ddp_count_cg_gl
== state_local
->ddp_count
)
1300 cgs_gl
= &dd
->comm
->cgs_gl
;
1302 ncg_home
= state_local
->ncg_gl
;
1303 cg
= state_local
->cg_gl
;
1305 for(i
=0; i
<ncg_home
; i
++)
1307 nat_home
+= cgs_gl
->index
[cg
[i
]+1] - cgs_gl
->index
[cg
[i
]];
1312 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1315 buf2
[0] = dd
->ncg_home
;
1316 buf2
[1] = dd
->nat_home
;
1326 /* Collect the charge group and atom counts on the master */
1327 dd_gather(dd
,2*sizeof(int),buf2
,ibuf
);
1332 for(i
=0; i
<dd
->nnodes
; i
++)
1334 ma
->ncg
[i
] = ma
->ibuf
[2*i
];
1335 ma
->nat
[i
] = ma
->ibuf
[2*i
+1];
1336 ma
->index
[i
+1] = ma
->index
[i
] + ma
->ncg
[i
];
1339 /* Make byte counts and indices */
1340 for(i
=0; i
<dd
->nnodes
; i
++)
1342 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
1343 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
1347 fprintf(debug
,"Initial charge group distribution: ");
1348 for(i
=0; i
<dd
->nnodes
; i
++)
1349 fprintf(debug
," %d",ma
->ncg
[i
]);
1350 fprintf(debug
,"\n");
1354 /* Collect the charge group indices on the master */
1356 dd
->ncg_home
*sizeof(int),dd
->index_gl
,
1357 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
1358 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
1359 DDMASTER(dd
) ? ma
->cg
: NULL
);
1361 dd
->comm
->master_cg_ddp_count
= state_local
->ddp_count
;
1364 static void dd_collect_vec_sendrecv(gmx_domdec_t
*dd
,
1367 gmx_domdec_master_t
*ma
;
1368 int n
,i
,c
,a
,nalloc
=0;
1377 MPI_Send(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1378 dd
->rank
,dd
->mpi_comm_all
);
1381 /* Copy the master coordinates to the global array */
1382 cgs_gl
= &dd
->comm
->cgs_gl
;
1384 n
= DDMASTERRANK(dd
);
1386 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1388 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1390 copy_rvec(lv
[a
++],v
[c
]);
1394 for(n
=0; n
<dd
->nnodes
; n
++)
1398 if (ma
->nat
[n
] > nalloc
)
1400 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1404 MPI_Recv(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,DDRANK(dd
,n
),
1405 n
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1408 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1410 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1412 copy_rvec(buf
[a
++],v
[c
]);
1421 static void get_commbuffer_counts(gmx_domdec_t
*dd
,
1422 int **counts
,int **disps
)
1424 gmx_domdec_master_t
*ma
;
1429 /* Make the rvec count and displacment arrays */
1431 *disps
= ma
->ibuf
+ dd
->nnodes
;
1432 for(n
=0; n
<dd
->nnodes
; n
++)
1434 (*counts
)[n
] = ma
->nat
[n
]*sizeof(rvec
);
1435 (*disps
)[n
] = (n
== 0 ? 0 : (*disps
)[n
-1] + (*counts
)[n
-1]);
1439 static void dd_collect_vec_gatherv(gmx_domdec_t
*dd
,
1442 gmx_domdec_master_t
*ma
;
1443 int *rcounts
=NULL
,*disps
=NULL
;
1452 get_commbuffer_counts(dd
,&rcounts
,&disps
);
1457 dd_gatherv(dd
,dd
->nat_home
*sizeof(rvec
),lv
,rcounts
,disps
,buf
);
1461 cgs_gl
= &dd
->comm
->cgs_gl
;
1464 for(n
=0; n
<dd
->nnodes
; n
++)
1466 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1468 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1470 copy_rvec(buf
[a
++],v
[c
]);
1477 void dd_collect_vec(gmx_domdec_t
*dd
,
1478 t_state
*state_local
,rvec
*lv
,rvec
*v
)
1480 gmx_domdec_master_t
*ma
;
1481 int n
,i
,c
,a
,nalloc
=0;
1484 dd_collect_cg(dd
,state_local
);
1486 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1488 dd_collect_vec_sendrecv(dd
,lv
,v
);
1492 dd_collect_vec_gatherv(dd
,lv
,v
);
1497 void dd_collect_state(gmx_domdec_t
*dd
,
1498 t_state
*state_local
,t_state
*state
)
1502 nh
= state
->nhchainlength
;
1506 for (i
=0;i
<efptNR
;i
++) {
1507 state
->lambda
[i
] = state_local
->lambda
[i
];
1509 state
->fep_state
= state_local
->fep_state
;
1510 state
->veta
= state_local
->veta
;
1511 state
->vol0
= state_local
->vol0
;
1512 copy_mat(state_local
->box
,state
->box
);
1513 copy_mat(state_local
->boxv
,state
->boxv
);
1514 copy_mat(state_local
->svir_prev
,state
->svir_prev
);
1515 copy_mat(state_local
->fvir_prev
,state
->fvir_prev
);
1516 copy_mat(state_local
->pres_prev
,state
->pres_prev
);
1519 for(i
=0; i
<state_local
->ngtc
; i
++)
1521 for(j
=0; j
<nh
; j
++) {
1522 state
->nosehoover_xi
[i
*nh
+j
] = state_local
->nosehoover_xi
[i
*nh
+j
];
1523 state
->nosehoover_vxi
[i
*nh
+j
] = state_local
->nosehoover_vxi
[i
*nh
+j
];
1525 state
->therm_integral
[i
] = state_local
->therm_integral
[i
];
1527 for(i
=0; i
<state_local
->nnhpres
; i
++)
1529 for(j
=0; j
<nh
; j
++) {
1530 state
->nhpres_xi
[i
*nh
+j
] = state_local
->nhpres_xi
[i
*nh
+j
];
1531 state
->nhpres_vxi
[i
*nh
+j
] = state_local
->nhpres_vxi
[i
*nh
+j
];
1535 for(est
=0; est
<estNR
; est
++)
1537 if (EST_DISTR(est
) && (state_local
->flags
& (1<<est
)))
1541 dd_collect_vec(dd
,state_local
,state_local
->x
,state
->x
);
1544 dd_collect_vec(dd
,state_local
,state_local
->v
,state
->v
);
1547 dd_collect_vec(dd
,state_local
,state_local
->sd_X
,state
->sd_X
);
1550 dd_collect_vec(dd
,state_local
,state_local
->cg_p
,state
->cg_p
);
1553 if (state
->nrngi
== 1)
1557 for(i
=0; i
<state_local
->nrng
; i
++)
1559 state
->ld_rng
[i
] = state_local
->ld_rng
[i
];
1565 dd_gather(dd
,state_local
->nrng
*sizeof(state
->ld_rng
[0]),
1566 state_local
->ld_rng
,state
->ld_rng
);
1570 if (state
->nrngi
== 1)
1574 state
->ld_rngi
[0] = state_local
->ld_rngi
[0];
1579 dd_gather(dd
,sizeof(state
->ld_rngi
[0]),
1580 state_local
->ld_rngi
,state
->ld_rngi
);
1583 case estDISRE_INITF
:
1584 case estDISRE_RM3TAV
:
1585 case estORIRE_INITF
:
1589 gmx_incons("Unknown state entry encountered in dd_collect_state");
1595 static void dd_realloc_state(t_state
*state
,rvec
**f
,int nalloc
)
1601 fprintf(debug
,"Reallocating state: currently %d, required %d, allocating %d\n",state
->nalloc
,nalloc
,over_alloc_dd(nalloc
));
1604 state
->nalloc
= over_alloc_dd(nalloc
);
1606 for(est
=0; est
<estNR
; est
++)
1608 if (EST_DISTR(est
) && (state
->flags
& (1<<est
)))
1612 srenew(state
->x
,state
->nalloc
);
1615 srenew(state
->v
,state
->nalloc
);
1618 srenew(state
->sd_X
,state
->nalloc
);
1621 srenew(state
->cg_p
,state
->nalloc
);
1625 case estDISRE_INITF
:
1626 case estDISRE_RM3TAV
:
1627 case estORIRE_INITF
:
1629 /* No reallocation required */
1632 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1639 srenew(*f
,state
->nalloc
);
1643 static void dd_check_alloc_ncg(t_forcerec
*fr
,t_state
*state
,rvec
**f
,
1646 if (nalloc
> fr
->cg_nalloc
)
1650 fprintf(debug
,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr
->cg_nalloc
,nalloc
,over_alloc_dd(nalloc
));
1652 fr
->cg_nalloc
= over_alloc_dd(nalloc
);
1653 srenew(fr
->cginfo
,fr
->cg_nalloc
);
1654 if (fr
->cutoff_scheme
== ecutsGROUP
)
1656 srenew(fr
->cg_cm
,fr
->cg_nalloc
);
1659 if (fr
->cutoff_scheme
== ecutsVERLET
&& nalloc
> state
->nalloc
)
1661 /* We don't use charge groups, we use x in state to set up
1662 * the atom communication.
1664 dd_realloc_state(state
,f
,nalloc
);
1668 static void dd_distribute_vec_sendrecv(gmx_domdec_t
*dd
,t_block
*cgs
,
1671 gmx_domdec_master_t
*ma
;
1672 int n
,i
,c
,a
,nalloc
=0;
1679 for(n
=0; n
<dd
->nnodes
; n
++)
1683 if (ma
->nat
[n
] > nalloc
)
1685 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1688 /* Use lv as a temporary buffer */
1690 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1692 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1694 copy_rvec(v
[c
],buf
[a
++]);
1697 if (a
!= ma
->nat
[n
])
1699 gmx_fatal(FARGS
,"Internal error a (%d) != nat (%d)",
1704 MPI_Send(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,
1705 DDRANK(dd
,n
),n
,dd
->mpi_comm_all
);
1710 n
= DDMASTERRANK(dd
);
1712 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1714 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1716 copy_rvec(v
[c
],lv
[a
++]);
1723 MPI_Recv(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1724 MPI_ANY_TAG
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1729 static void dd_distribute_vec_scatterv(gmx_domdec_t
*dd
,t_block
*cgs
,
1732 gmx_domdec_master_t
*ma
;
1733 int *scounts
=NULL
,*disps
=NULL
;
1734 int n
,i
,c
,a
,nalloc
=0;
1741 get_commbuffer_counts(dd
,&scounts
,&disps
);
1745 for(n
=0; n
<dd
->nnodes
; n
++)
1747 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1749 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1751 copy_rvec(v
[c
],buf
[a
++]);
1757 dd_scatterv(dd
,scounts
,disps
,buf
,dd
->nat_home
*sizeof(rvec
),lv
);
1760 static void dd_distribute_vec(gmx_domdec_t
*dd
,t_block
*cgs
,rvec
*v
,rvec
*lv
)
1762 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1764 dd_distribute_vec_sendrecv(dd
,cgs
,v
,lv
);
1768 dd_distribute_vec_scatterv(dd
,cgs
,v
,lv
);
1772 static void dd_distribute_state(gmx_domdec_t
*dd
,t_block
*cgs
,
1773 t_state
*state
,t_state
*state_local
,
1778 nh
= state
->nhchainlength
;
1782 for(i
=0;i
<efptNR
;i
++)
1784 state_local
->lambda
[i
] = state
->lambda
[i
];
1786 state_local
->fep_state
= state
->fep_state
;
1787 state_local
->veta
= state
->veta
;
1788 state_local
->vol0
= state
->vol0
;
1789 copy_mat(state
->box
,state_local
->box
);
1790 copy_mat(state
->box_rel
,state_local
->box_rel
);
1791 copy_mat(state
->boxv
,state_local
->boxv
);
1792 copy_mat(state
->svir_prev
,state_local
->svir_prev
);
1793 copy_mat(state
->fvir_prev
,state_local
->fvir_prev
);
1794 for(i
=0; i
<state_local
->ngtc
; i
++)
1796 for(j
=0; j
<nh
; j
++) {
1797 state_local
->nosehoover_xi
[i
*nh
+j
] = state
->nosehoover_xi
[i
*nh
+j
];
1798 state_local
->nosehoover_vxi
[i
*nh
+j
] = state
->nosehoover_vxi
[i
*nh
+j
];
1800 state_local
->therm_integral
[i
] = state
->therm_integral
[i
];
1802 for(i
=0; i
<state_local
->nnhpres
; i
++)
1804 for(j
=0; j
<nh
; j
++) {
1805 state_local
->nhpres_xi
[i
*nh
+j
] = state
->nhpres_xi
[i
*nh
+j
];
1806 state_local
->nhpres_vxi
[i
*nh
+j
] = state
->nhpres_vxi
[i
*nh
+j
];
1810 dd_bcast(dd
,((efptNR
)*sizeof(real
)),state_local
->lambda
);
1811 dd_bcast(dd
,sizeof(int),&state_local
->fep_state
);
1812 dd_bcast(dd
,sizeof(real
),&state_local
->veta
);
1813 dd_bcast(dd
,sizeof(real
),&state_local
->vol0
);
1814 dd_bcast(dd
,sizeof(state_local
->box
),state_local
->box
);
1815 dd_bcast(dd
,sizeof(state_local
->box_rel
),state_local
->box_rel
);
1816 dd_bcast(dd
,sizeof(state_local
->boxv
),state_local
->boxv
);
1817 dd_bcast(dd
,sizeof(state_local
->svir_prev
),state_local
->svir_prev
);
1818 dd_bcast(dd
,sizeof(state_local
->fvir_prev
),state_local
->fvir_prev
);
1819 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_xi
);
1820 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_vxi
);
1821 dd_bcast(dd
,state_local
->ngtc
*sizeof(double),state_local
->therm_integral
);
1822 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_xi
);
1823 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_vxi
);
1825 if (dd
->nat_home
> state_local
->nalloc
)
1827 dd_realloc_state(state_local
,f
,dd
->nat_home
);
1829 for(i
=0; i
<estNR
; i
++)
1831 if (EST_DISTR(i
) && (state_local
->flags
& (1<<i
)))
1835 dd_distribute_vec(dd
,cgs
,state
->x
,state_local
->x
);
1838 dd_distribute_vec(dd
,cgs
,state
->v
,state_local
->v
);
1841 dd_distribute_vec(dd
,cgs
,state
->sd_X
,state_local
->sd_X
);
1844 dd_distribute_vec(dd
,cgs
,state
->cg_p
,state_local
->cg_p
);
1847 if (state
->nrngi
== 1)
1850 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1851 state
->ld_rng
,state_local
->ld_rng
);
1856 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1857 state
->ld_rng
,state_local
->ld_rng
);
1861 if (state
->nrngi
== 1)
1863 dd_bcastc(dd
,sizeof(state_local
->ld_rngi
[0]),
1864 state
->ld_rngi
,state_local
->ld_rngi
);
1868 dd_scatter(dd
,sizeof(state_local
->ld_rngi
[0]),
1869 state
->ld_rngi
,state_local
->ld_rngi
);
1872 case estDISRE_INITF
:
1873 case estDISRE_RM3TAV
:
1874 case estORIRE_INITF
:
1876 /* Not implemented yet */
1879 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1885 static char dim2char(int dim
)
1891 case XX
: c
= 'X'; break;
1892 case YY
: c
= 'Y'; break;
1893 case ZZ
: c
= 'Z'; break;
1894 default: gmx_fatal(FARGS
,"Unknown dim %d",dim
);
1900 static void write_dd_grid_pdb(const char *fn
,gmx_large_int_t step
,
1901 gmx_domdec_t
*dd
,matrix box
,gmx_ddbox_t
*ddbox
)
1903 rvec grid_s
[2],*grid_r
=NULL
,cx
,r
;
1904 char fname
[STRLEN
],format
[STRLEN
],buf
[22];
1910 copy_rvec(dd
->comm
->cell_x0
,grid_s
[0]);
1911 copy_rvec(dd
->comm
->cell_x1
,grid_s
[1]);
1915 snew(grid_r
,2*dd
->nnodes
);
1918 dd_gather(dd
,2*sizeof(rvec
),grid_s
[0],DDMASTER(dd
) ? grid_r
[0] : NULL
);
1922 for(d
=0; d
<DIM
; d
++)
1924 for(i
=0; i
<DIM
; i
++)
1932 if (d
< ddbox
->npbcdim
&& dd
->nc
[d
] > 1)
1934 tric
[d
][i
] = box
[i
][d
]/box
[i
][i
];
1943 sprintf(fname
,"%s_%s.pdb",fn
,gmx_step_str(step
,buf
));
1944 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1945 out
= gmx_fio_fopen(fname
,"w");
1946 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1948 for(i
=0; i
<dd
->nnodes
; i
++)
1950 vol
= dd
->nnodes
/(box
[XX
][XX
]*box
[YY
][YY
]*box
[ZZ
][ZZ
]);
1951 for(d
=0; d
<DIM
; d
++)
1953 vol
*= grid_r
[i
*2+1][d
] - grid_r
[i
*2][d
];
1961 cx
[XX
] = grid_r
[i
*2+x
][XX
];
1962 cx
[YY
] = grid_r
[i
*2+y
][YY
];
1963 cx
[ZZ
] = grid_r
[i
*2+z
][ZZ
];
1965 fprintf(out
,format
,"ATOM",a
++,"CA","GLY",' ',1+i
,
1966 10*r
[XX
],10*r
[YY
],10*r
[ZZ
],1.0,vol
);
1970 for(d
=0; d
<DIM
; d
++)
1976 case 0: y
= 1 + i
*8 + 2*x
; break;
1977 case 1: y
= 1 + i
*8 + 2*x
- (x
% 2); break;
1978 case 2: y
= 1 + i
*8 + x
; break;
1980 fprintf(out
,"%6s%5d%5d\n","CONECT",y
,y
+(1<<d
));
1984 gmx_fio_fclose(out
);
1989 void write_dd_pdb(const char *fn
,gmx_large_int_t step
,const char *title
,
1990 gmx_mtop_t
*mtop
,t_commrec
*cr
,
1991 int natoms
,rvec x
[],matrix box
)
1993 char fname
[STRLEN
],format
[STRLEN
],format4
[STRLEN
],buf
[22];
1996 char *atomname
,*resname
;
2003 natoms
= dd
->comm
->nat
[ddnatVSITE
];
2006 sprintf(fname
,"%s_%s_n%d.pdb",fn
,gmx_step_str(step
,buf
),cr
->sim_nodeid
);
2008 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
2009 sprintf(format4
,"%s%s\n",pdbformat4
,"%6.2f%6.2f");
2011 out
= gmx_fio_fopen(fname
,"w");
2013 fprintf(out
,"TITLE %s\n",title
);
2014 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
2015 for(i
=0; i
<natoms
; i
++)
2017 ii
= dd
->gatindex
[i
];
2018 gmx_mtop_atominfo_global(mtop
,ii
,&atomname
,&resnr
,&resname
);
2019 if (i
< dd
->comm
->nat
[ddnatZONE
])
2022 while (i
>= dd
->cgindex
[dd
->comm
->zones
.cg_range
[c
+1]])
2028 else if (i
< dd
->comm
->nat
[ddnatVSITE
])
2030 b
= dd
->comm
->zones
.n
;
2034 b
= dd
->comm
->zones
.n
+ 1;
2036 fprintf(out
,strlen(atomname
)<4 ? format
: format4
,
2037 "ATOM",(ii
+1)%100000,
2038 atomname
,resname
,' ',resnr
%10000,' ',
2039 10*x
[i
][XX
],10*x
[i
][YY
],10*x
[i
][ZZ
],1.0,b
);
2041 fprintf(out
,"TER\n");
2043 gmx_fio_fclose(out
);
2046 real
dd_cutoff_mbody(gmx_domdec_t
*dd
)
2048 gmx_domdec_comm_t
*comm
;
2055 if (comm
->bInterCGBondeds
)
2057 if (comm
->cutoff_mbody
> 0)
2059 r
= comm
->cutoff_mbody
;
2063 /* cutoff_mbody=0 means we do not have DLB */
2064 r
= comm
->cellsize_min
[dd
->dim
[0]];
2065 for(di
=1; di
<dd
->ndim
; di
++)
2067 r
= min(r
,comm
->cellsize_min
[dd
->dim
[di
]]);
2069 if (comm
->bBondComm
)
2071 r
= max(r
,comm
->cutoff_mbody
);
2075 r
= min(r
,comm
->cutoff
);
2083 real
dd_cutoff_twobody(gmx_domdec_t
*dd
)
2087 r_mb
= dd_cutoff_mbody(dd
);
2089 return max(dd
->comm
->cutoff
,r_mb
);
2093 static void dd_cart_coord2pmecoord(gmx_domdec_t
*dd
,ivec coord
,ivec coord_pme
)
2097 nc
= dd
->nc
[dd
->comm
->cartpmedim
];
2098 ntot
= dd
->comm
->ntot
[dd
->comm
->cartpmedim
];
2099 copy_ivec(coord
,coord_pme
);
2100 coord_pme
[dd
->comm
->cartpmedim
] =
2101 nc
+ (coord
[dd
->comm
->cartpmedim
]*(ntot
- nc
) + (ntot
- nc
)/2)/nc
;
2104 static int low_ddindex2pmeindex(int ndd
,int npme
,int ddindex
)
2106 /* Here we assign a PME node to communicate with this DD node
2107 * by assuming that the major index of both is x.
2108 * We add cr->npmenodes/2 to obtain an even distribution.
2110 return (ddindex
*npme
+ npme
/2)/ndd
;
2113 static int ddindex2pmeindex(const gmx_domdec_t
*dd
,int ddindex
)
2115 return low_ddindex2pmeindex(dd
->nnodes
,dd
->comm
->npmenodes
,ddindex
);
2118 static int cr_ddindex2pmeindex(const t_commrec
*cr
,int ddindex
)
2120 return low_ddindex2pmeindex(cr
->dd
->nnodes
,cr
->npmenodes
,ddindex
);
2123 static int *dd_pmenodes(t_commrec
*cr
)
2128 snew(pmenodes
,cr
->npmenodes
);
2130 for(i
=0; i
<cr
->dd
->nnodes
; i
++) {
2131 p0
= cr_ddindex2pmeindex(cr
,i
);
2132 p1
= cr_ddindex2pmeindex(cr
,i
+1);
2133 if (i
+1 == cr
->dd
->nnodes
|| p1
> p0
) {
2135 fprintf(debug
,"pmenode[%d] = %d\n",n
,i
+1+n
);
2136 pmenodes
[n
] = i
+ 1 + n
;
2144 static int gmx_ddcoord2pmeindex(t_commrec
*cr
,int x
,int y
,int z
)
2147 ivec coords
,coords_pme
,nc
;
2152 if (dd->comm->bCartesian) {
2153 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2154 dd_coords2pmecoords(dd,coords,coords_pme);
2155 copy_ivec(dd->ntot,nc);
2156 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2157 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2159 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2161 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2167 slab
= ddindex2pmeindex(dd
,dd_index(dd
->nc
,coords
));
2172 static int ddcoord2simnodeid(t_commrec
*cr
,int x
,int y
,int z
)
2174 gmx_domdec_comm_t
*comm
;
2176 int ddindex
,nodeid
=-1;
2178 comm
= cr
->dd
->comm
;
2183 if (comm
->bCartesianPP_PME
)
2186 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&nodeid
);
2191 ddindex
= dd_index(cr
->dd
->nc
,coords
);
2192 if (comm
->bCartesianPP
)
2194 nodeid
= comm
->ddindex2simnodeid
[ddindex
];
2200 nodeid
= ddindex
+ gmx_ddcoord2pmeindex(cr
,x
,y
,z
);
2212 static int dd_simnode2pmenode(t_commrec
*cr
,int sim_nodeid
)
2215 gmx_domdec_comm_t
*comm
;
2216 ivec coord
,coord_pme
;
2223 /* This assumes a uniform x domain decomposition grid cell size */
2224 if (comm
->bCartesianPP_PME
)
2227 MPI_Cart_coords(cr
->mpi_comm_mysim
,sim_nodeid
,DIM
,coord
);
2228 if (coord
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
2230 /* This is a PP node */
2231 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2232 MPI_Cart_rank(cr
->mpi_comm_mysim
,coord_pme
,&pmenode
);
2236 else if (comm
->bCartesianPP
)
2238 if (sim_nodeid
< dd
->nnodes
)
2240 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2245 /* This assumes DD cells with identical x coordinates
2246 * are numbered sequentially.
2248 if (dd
->comm
->pmenodes
== NULL
)
2250 if (sim_nodeid
< dd
->nnodes
)
2252 /* The DD index equals the nodeid */
2253 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2259 while (sim_nodeid
> dd
->comm
->pmenodes
[i
])
2263 if (sim_nodeid
< dd
->comm
->pmenodes
[i
])
2265 pmenode
= dd
->comm
->pmenodes
[i
];
2273 gmx_bool
gmx_pmeonlynode(t_commrec
*cr
,int sim_nodeid
)
2275 gmx_bool bPMEOnlyNode
;
2277 if (DOMAINDECOMP(cr
))
2279 bPMEOnlyNode
= (dd_simnode2pmenode(cr
,sim_nodeid
) == -1);
2283 bPMEOnlyNode
= FALSE
;
2286 return bPMEOnlyNode
;
2289 void get_pme_ddnodes(t_commrec
*cr
,int pmenodeid
,
2290 int *nmy_ddnodes
,int **my_ddnodes
,int *node_peer
)
2294 ivec coord
,coord_pme
;
2298 snew(*my_ddnodes
,(dd
->nnodes
+cr
->npmenodes
-1)/cr
->npmenodes
);
2301 for(x
=0; x
<dd
->nc
[XX
]; x
++)
2303 for(y
=0; y
<dd
->nc
[YY
]; y
++)
2305 for(z
=0; z
<dd
->nc
[ZZ
]; z
++)
2307 if (dd
->comm
->bCartesianPP_PME
)
2312 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2313 if (dd
->ci
[XX
] == coord_pme
[XX
] &&
2314 dd
->ci
[YY
] == coord_pme
[YY
] &&
2315 dd
->ci
[ZZ
] == coord_pme
[ZZ
])
2316 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2320 /* The slab corresponds to the nodeid in the PME group */
2321 if (gmx_ddcoord2pmeindex(cr
,x
,y
,z
) == pmenodeid
)
2323 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2330 /* The last PP-only node is the peer node */
2331 *node_peer
= (*my_ddnodes
)[*nmy_ddnodes
-1];
2335 fprintf(debug
,"Receive coordinates from PP nodes:");
2336 for(x
=0; x
<*nmy_ddnodes
; x
++)
2338 fprintf(debug
," %d",(*my_ddnodes
)[x
]);
2340 fprintf(debug
,"\n");
2344 static gmx_bool
receive_vir_ener(t_commrec
*cr
)
2346 gmx_domdec_comm_t
*comm
;
2347 int pmenode
,coords
[DIM
],rank
;
2351 if (cr
->npmenodes
< cr
->dd
->nnodes
)
2353 comm
= cr
->dd
->comm
;
2354 if (comm
->bCartesianPP_PME
)
2356 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2358 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,coords
);
2359 coords
[comm
->cartpmedim
]++;
2360 if (coords
[comm
->cartpmedim
] < cr
->dd
->nc
[comm
->cartpmedim
])
2362 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&rank
);
2363 if (dd_simnode2pmenode(cr
,rank
) == pmenode
)
2365 /* This is not the last PP node for pmenode */
2373 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2374 if (cr
->sim_nodeid
+1 < cr
->nnodes
&&
2375 dd_simnode2pmenode(cr
,cr
->sim_nodeid
+1) == pmenode
)
2377 /* This is not the last PP node for pmenode */
2386 static void set_zones_ncg_home(gmx_domdec_t
*dd
)
2388 gmx_domdec_zones_t
*zones
;
2391 zones
= &dd
->comm
->zones
;
2393 zones
->cg_range
[0] = 0;
2394 for(i
=1; i
<zones
->n
+1; i
++)
2396 zones
->cg_range
[i
] = dd
->ncg_home
;
2400 static void rebuild_cgindex(gmx_domdec_t
*dd
,
2401 const int *gcgs_index
,t_state
*state
)
2403 int nat
,i
,*ind
,*dd_cg_gl
,*cgindex
,cg_gl
;
2406 dd_cg_gl
= dd
->index_gl
;
2407 cgindex
= dd
->cgindex
;
2410 for(i
=0; i
<state
->ncg_gl
; i
++)
2414 dd_cg_gl
[i
] = cg_gl
;
2415 nat
+= gcgs_index
[cg_gl
+1] - gcgs_index
[cg_gl
];
2419 dd
->ncg_home
= state
->ncg_gl
;
2422 set_zones_ncg_home(dd
);
2425 static int ddcginfo(const cginfo_mb_t
*cginfo_mb
,int cg
)
2427 while (cg
>= cginfo_mb
->cg_end
)
2432 return cginfo_mb
->cginfo
[(cg
- cginfo_mb
->cg_start
) % cginfo_mb
->cg_mod
];
2435 static void dd_set_cginfo(int *index_gl
,int cg0
,int cg1
,
2436 t_forcerec
*fr
,char *bLocalCG
)
2438 cginfo_mb_t
*cginfo_mb
;
2444 cginfo_mb
= fr
->cginfo_mb
;
2445 cginfo
= fr
->cginfo
;
2447 for(cg
=cg0
; cg
<cg1
; cg
++)
2449 cginfo
[cg
] = ddcginfo(cginfo_mb
,index_gl
[cg
]);
2453 if (bLocalCG
!= NULL
)
2455 for(cg
=cg0
; cg
<cg1
; cg
++)
2457 bLocalCG
[index_gl
[cg
]] = TRUE
;
2462 static void make_dd_indices(gmx_domdec_t
*dd
,
2463 const int *gcgs_index
,int cg_start
)
2465 int nzone
,zone
,zone1
,cg0
,cg1
,cg1_p1
,cg
,cg_gl
,a
,a_gl
;
2466 int *zone2cg
,*zone_ncg1
,*index_gl
,*gatindex
;
2471 bLocalCG
= dd
->comm
->bLocalCG
;
2473 if (dd
->nat_tot
> dd
->gatindex_nalloc
)
2475 dd
->gatindex_nalloc
= over_alloc_dd(dd
->nat_tot
);
2476 srenew(dd
->gatindex
,dd
->gatindex_nalloc
);
2479 nzone
= dd
->comm
->zones
.n
;
2480 zone2cg
= dd
->comm
->zones
.cg_range
;
2481 zone_ncg1
= dd
->comm
->zone_ncg1
;
2482 index_gl
= dd
->index_gl
;
2483 gatindex
= dd
->gatindex
;
2484 bCGs
= dd
->comm
->bCGs
;
2486 if (zone2cg
[1] != dd
->ncg_home
)
2488 gmx_incons("dd->ncg_zone is not up to date");
2491 /* Make the local to global and global to local atom index */
2492 a
= dd
->cgindex
[cg_start
];
2493 for(zone
=0; zone
<nzone
; zone
++)
2501 cg0
= zone2cg
[zone
];
2503 cg1
= zone2cg
[zone
+1];
2504 cg1_p1
= cg0
+ zone_ncg1
[zone
];
2506 for(cg
=cg0
; cg
<cg1
; cg
++)
2511 /* Signal that this cg is from more than one pulse away */
2514 cg_gl
= index_gl
[cg
];
2517 for(a_gl
=gcgs_index
[cg_gl
]; a_gl
<gcgs_index
[cg_gl
+1]; a_gl
++)
2520 ga2la_set(dd
->ga2la
,a_gl
,a
,zone1
);
2526 gatindex
[a
] = cg_gl
;
2527 ga2la_set(dd
->ga2la
,cg_gl
,a
,zone1
);
2534 static int check_bLocalCG(gmx_domdec_t
*dd
,int ncg_sys
,const char *bLocalCG
,
2540 if (bLocalCG
== NULL
)
2544 for(i
=0; i
<dd
->ncg_tot
; i
++)
2546 if (!bLocalCG
[dd
->index_gl
[i
]])
2549 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd
->rank
,where
,i
+1,dd
->index_gl
[i
]+1,dd
->ncg_home
);
2554 for(i
=0; i
<ncg_sys
; i
++)
2561 if (ngl
!= dd
->ncg_tot
)
2563 fprintf(stderr
,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd
->rank
,where
,ngl
,dd
->ncg_tot
);
2570 static void check_index_consistency(gmx_domdec_t
*dd
,
2571 int natoms_sys
,int ncg_sys
,
2574 int nerr
,ngl
,i
,a
,cell
;
2579 if (dd
->comm
->DD_debug
> 1)
2581 snew(have
,natoms_sys
);
2582 for(a
=0; a
<dd
->nat_tot
; a
++)
2584 if (have
[dd
->gatindex
[a
]] > 0)
2586 fprintf(stderr
,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd
->rank
,dd
->gatindex
[a
]+1,have
[dd
->gatindex
[a
]],a
+1);
2590 have
[dd
->gatindex
[a
]] = a
+ 1;
2596 snew(have
,dd
->nat_tot
);
2599 for(i
=0; i
<natoms_sys
; i
++)
2601 if (ga2la_get(dd
->ga2la
,i
,&a
,&cell
))
2603 if (a
>= dd
->nat_tot
)
2605 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd
->rank
,i
+1,a
+1,dd
->nat_tot
);
2611 if (dd
->gatindex
[a
] != i
)
2613 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd
->rank
,i
+1,a
+1,dd
->gatindex
[a
]+1);
2620 if (ngl
!= dd
->nat_tot
)
2623 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2624 dd
->rank
,where
,ngl
,dd
->nat_tot
);
2626 for(a
=0; a
<dd
->nat_tot
; a
++)
2631 "DD node %d, %s: local atom %d, global %d has no global index\n",
2632 dd
->rank
,where
,a
+1,dd
->gatindex
[a
]+1);
2637 nerr
+= check_bLocalCG(dd
,ncg_sys
,dd
->comm
->bLocalCG
,where
);
2640 gmx_fatal(FARGS
,"DD node %d, %s: %d atom/cg index inconsistencies",
2641 dd
->rank
,where
,nerr
);
2645 static void clear_dd_indices(gmx_domdec_t
*dd
,int cg_start
,int a_start
)
2652 /* Clear the whole list without searching */
2653 ga2la_clear(dd
->ga2la
);
2657 for(i
=a_start
; i
<dd
->nat_tot
; i
++)
2659 ga2la_del(dd
->ga2la
,dd
->gatindex
[i
]);
2663 bLocalCG
= dd
->comm
->bLocalCG
;
2666 for(i
=cg_start
; i
<dd
->ncg_tot
; i
++)
2668 bLocalCG
[dd
->index_gl
[i
]] = FALSE
;
2672 dd_clear_local_vsite_indices(dd
);
2674 if (dd
->constraints
)
2676 dd_clear_local_constraint_indices(dd
);
2680 static real
grid_jump_limit(gmx_domdec_comm_t
*comm
,real cutoff
,
2683 real grid_jump_limit
;
2685 /* The distance between the boundaries of cells at distance
2686 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2687 * and by the fact that cells should not be shifted by more than
2688 * half their size, such that cg's only shift by one cell
2689 * at redecomposition.
2691 grid_jump_limit
= comm
->cellsize_limit
;
2692 if (!comm
->bVacDLBNoLimit
)
2694 grid_jump_limit
= max(grid_jump_limit
,
2695 cutoff
/comm
->cd
[dim_ind
].np
);
2698 return grid_jump_limit
;
2701 static gmx_bool
check_grid_jump(gmx_large_int_t step
,
2707 gmx_domdec_comm_t
*comm
;
2716 for(d
=1; d
<dd
->ndim
; d
++)
2719 limit
= grid_jump_limit(comm
,cutoff
,d
);
2720 bfac
= ddbox
->box_size
[dim
];
2721 if (ddbox
->tric_dir
[dim
])
2723 bfac
*= ddbox
->skew_fac
[dim
];
2725 if ((comm
->cell_f1
[d
] - comm
->cell_f_max0
[d
])*bfac
< limit
||
2726 (comm
->cell_f0
[d
] - comm
->cell_f_min1
[d
])*bfac
> -limit
)
2734 /* This error should never be triggered under normal
2735 * circumstances, but you never know ...
2737 gmx_fatal(FARGS
,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2738 gmx_step_str(step
,buf
),
2739 dim2char(dim
),dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
2747 static int dd_load_count(gmx_domdec_comm_t
*comm
)
2749 return (comm
->eFlop
? comm
->flop_n
: comm
->cycl_n
[ddCyclF
]);
2752 static float dd_force_load(gmx_domdec_comm_t
*comm
)
2759 if (comm
->eFlop
> 1)
2761 load
*= 1.0 + (comm
->eFlop
- 1)*(0.1*rand()/RAND_MAX
- 0.05);
2766 load
= comm
->cycl
[ddCyclF
];
2767 if (comm
->cycl_n
[ddCyclF
] > 1)
2769 /* Subtract the maximum of the last n cycle counts
2770 * to get rid of possible high counts due to other soures,
2771 * for instance system activity, that would otherwise
2772 * affect the dynamic load balancing.
2774 load
-= comm
->cycl_max
[ddCyclF
];
2781 static void set_slb_pme_dim_f(gmx_domdec_t
*dd
,int dim
,real
**dim_f
)
2783 gmx_domdec_comm_t
*comm
;
2788 snew(*dim_f
,dd
->nc
[dim
]+1);
2790 for(i
=1; i
<dd
->nc
[dim
]; i
++)
2792 if (comm
->slb_frac
[dim
])
2794 (*dim_f
)[i
] = (*dim_f
)[i
-1] + comm
->slb_frac
[dim
][i
-1];
2798 (*dim_f
)[i
] = (real
)i
/(real
)dd
->nc
[dim
];
2801 (*dim_f
)[dd
->nc
[dim
]] = 1;
2804 static void init_ddpme(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,int dimind
)
2806 int pmeindex
,slab
,nso
,i
;
2809 if (dimind
== 0 && dd
->dim
[0] == YY
&& dd
->comm
->npmenodes_x
== 1)
2815 ddpme
->dim
= dimind
;
2817 ddpme
->dim_match
= (ddpme
->dim
== dd
->dim
[dimind
]);
2819 ddpme
->nslab
= (ddpme
->dim
== 0 ?
2820 dd
->comm
->npmenodes_x
:
2821 dd
->comm
->npmenodes_y
);
2823 if (ddpme
->nslab
<= 1)
2828 nso
= dd
->comm
->npmenodes
/ddpme
->nslab
;
2829 /* Determine for each PME slab the PP location range for dimension dim */
2830 snew(ddpme
->pp_min
,ddpme
->nslab
);
2831 snew(ddpme
->pp_max
,ddpme
->nslab
);
2832 for(slab
=0; slab
<ddpme
->nslab
; slab
++) {
2833 ddpme
->pp_min
[slab
] = dd
->nc
[dd
->dim
[dimind
]] - 1;
2834 ddpme
->pp_max
[slab
] = 0;
2836 for(i
=0; i
<dd
->nnodes
; i
++) {
2837 ddindex2xyz(dd
->nc
,i
,xyz
);
2838 /* For y only use our y/z slab.
2839 * This assumes that the PME x grid size matches the DD grid size.
2841 if (dimind
== 0 || xyz
[XX
] == dd
->ci
[XX
]) {
2842 pmeindex
= ddindex2pmeindex(dd
,i
);
2844 slab
= pmeindex
/nso
;
2846 slab
= pmeindex
% ddpme
->nslab
;
2848 ddpme
->pp_min
[slab
] = min(ddpme
->pp_min
[slab
],xyz
[dimind
]);
2849 ddpme
->pp_max
[slab
] = max(ddpme
->pp_max
[slab
],xyz
[dimind
]);
2853 set_slb_pme_dim_f(dd
,ddpme
->dim
,&ddpme
->slb_dim_f
);
2856 int dd_pme_maxshift_x(gmx_domdec_t
*dd
)
2858 if (dd
->comm
->ddpme
[0].dim
== XX
)
2860 return dd
->comm
->ddpme
[0].maxshift
;
2868 int dd_pme_maxshift_y(gmx_domdec_t
*dd
)
2870 if (dd
->comm
->ddpme
[0].dim
== YY
)
2872 return dd
->comm
->ddpme
[0].maxshift
;
2874 else if (dd
->comm
->npmedecompdim
>= 2 && dd
->comm
->ddpme
[1].dim
== YY
)
2876 return dd
->comm
->ddpme
[1].maxshift
;
2884 static void set_pme_maxshift(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2885 gmx_bool bUniform
,gmx_ddbox_t
*ddbox
,real
*cell_f
)
2887 gmx_domdec_comm_t
*comm
;
2890 real range
,pme_boundary
;
2894 nc
= dd
->nc
[ddpme
->dim
];
2897 if (!ddpme
->dim_match
)
2899 /* PP decomposition is not along dim: the worst situation */
2902 else if (ns
<= 3 || (bUniform
&& ns
== nc
))
2904 /* The optimal situation */
2909 /* We need to check for all pme nodes which nodes they
2910 * could possibly need to communicate with.
2912 xmin
= ddpme
->pp_min
;
2913 xmax
= ddpme
->pp_max
;
2914 /* Allow for atoms to be maximally 2/3 times the cut-off
2915 * out of their DD cell. This is a reasonable balance between
2916 * between performance and support for most charge-group/cut-off
2919 range
= 2.0/3.0*comm
->cutoff
/ddbox
->box_size
[ddpme
->dim
];
2920 /* Avoid extra communication when we are exactly at a boundary */
2926 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2927 pme_boundary
= (real
)s
/ns
;
2930 cell_f
[xmax
[s
-(sh
+1) ]+1] + range
> pme_boundary
) ||
2932 cell_f
[xmax
[s
-(sh
+1)+ns
]+1] - 1 + range
> pme_boundary
)))
2936 pme_boundary
= (real
)(s
+1)/ns
;
2939 cell_f
[xmin
[s
+(sh
+1) ] ] - range
< pme_boundary
) ||
2941 cell_f
[xmin
[s
+(sh
+1)-ns
] ] + 1 - range
< pme_boundary
)))
2948 ddpme
->maxshift
= sh
;
2952 fprintf(debug
,"PME slab communication range for dim %d is %d\n",
2953 ddpme
->dim
,ddpme
->maxshift
);
2957 static void check_box_size(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2961 for(d
=0; d
<dd
->ndim
; d
++)
2964 if (dim
< ddbox
->nboundeddim
&&
2965 ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
] <
2966 dd
->nc
[dim
]*dd
->comm
->cellsize_limit
*DD_CELL_MARGIN
)
2968 gmx_fatal(FARGS
,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2969 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
2970 dd
->nc
[dim
],dd
->comm
->cellsize_limit
);
2975 static void set_dd_cell_sizes_slb(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
2976 gmx_bool bMaster
,ivec npulse
)
2978 gmx_domdec_comm_t
*comm
;
2981 real
*cell_x
,cell_dx
,cellsize
;
2985 for(d
=0; d
<DIM
; d
++)
2987 cellsize_min
[d
] = ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
];
2989 if (dd
->nc
[d
] == 1 || comm
->slb_frac
[d
] == NULL
)
2992 cell_dx
= ddbox
->box_size
[d
]/dd
->nc
[d
];
2995 for(j
=0; j
<dd
->nc
[d
]+1; j
++)
2997 dd
->ma
->cell_x
[d
][j
] = ddbox
->box0
[d
] + j
*cell_dx
;
3002 comm
->cell_x0
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
] )*cell_dx
;
3003 comm
->cell_x1
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
]+1)*cell_dx
;
3005 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
3006 while (cellsize
*npulse
[d
] < comm
->cutoff
&& npulse
[d
] < dd
->nc
[d
]-1)
3010 cellsize_min
[d
] = cellsize
;
3014 /* Statically load balanced grid */
3015 /* Also when we are not doing a master distribution we determine
3016 * all cell borders in a loop to obtain identical values
3017 * to the master distribution case and to determine npulse.
3021 cell_x
= dd
->ma
->cell_x
[d
];
3025 snew(cell_x
,dd
->nc
[d
]+1);
3027 cell_x
[0] = ddbox
->box0
[d
];
3028 for(j
=0; j
<dd
->nc
[d
]; j
++)
3030 cell_dx
= ddbox
->box_size
[d
]*comm
->slb_frac
[d
][j
];
3031 cell_x
[j
+1] = cell_x
[j
] + cell_dx
;
3032 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
3033 while (cellsize
*npulse
[d
] < comm
->cutoff
&&
3034 npulse
[d
] < dd
->nc
[d
]-1)
3038 cellsize_min
[d
] = min(cellsize_min
[d
],cellsize
);
3042 comm
->cell_x0
[d
] = cell_x
[dd
->ci
[d
]];
3043 comm
->cell_x1
[d
] = cell_x
[dd
->ci
[d
]+1];
3047 /* The following limitation is to avoid that a cell would receive
3048 * some of its own home charge groups back over the periodic boundary.
3049 * Double charge groups cause trouble with the global indices.
3051 if (d
< ddbox
->npbcdim
&&
3052 dd
->nc
[d
] > 1 && npulse
[d
] >= dd
->nc
[d
])
3054 gmx_fatal_collective(FARGS
,NULL
,dd
,
3055 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3056 dim2char(d
),ddbox
->box_size
[d
],ddbox
->skew_fac
[d
],
3058 dd
->nc
[d
],dd
->nc
[d
],
3059 dd
->nnodes
> dd
->nc
[d
] ? "cells" : "processors");
3063 if (!comm
->bDynLoadBal
)
3065 copy_rvec(cellsize_min
,comm
->cellsize_min
);
3068 for(d
=0; d
<comm
->npmedecompdim
; d
++)
3070 set_pme_maxshift(dd
,&comm
->ddpme
[d
],
3071 comm
->slb_frac
[dd
->dim
[d
]]==NULL
,ddbox
,
3072 comm
->ddpme
[d
].slb_dim_f
);
3077 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t
*dd
,
3078 int d
,int dim
,gmx_domdec_root_t
*root
,
3080 gmx_bool bUniform
,gmx_large_int_t step
, real cellsize_limit_f
, int range
[])
3082 gmx_domdec_comm_t
*comm
;
3083 int ncd
,i
,j
,nmin
,nmin_old
;
3084 gmx_bool bLimLo
,bLimHi
;
3086 real fac
,halfway
,cellsize_limit_f_i
,region_size
;
3087 gmx_bool bPBC
,bLastHi
=FALSE
;
3088 int nrange
[]={range
[0],range
[1]};
3090 region_size
= root
->cell_f
[range
[1]]-root
->cell_f
[range
[0]];
3096 bPBC
= (dim
< ddbox
->npbcdim
);
3098 cell_size
= root
->buf_ncd
;
3102 fprintf(debug
,"enforce_limits: %d %d\n",range
[0],range
[1]);
3105 /* First we need to check if the scaling does not make cells
3106 * smaller than the smallest allowed size.
3107 * We need to do this iteratively, since if a cell is too small,
3108 * it needs to be enlarged, which makes all the other cells smaller,
3109 * which could in turn make another cell smaller than allowed.
3111 for(i
=range
[0]; i
<range
[1]; i
++)
3113 root
->bCellMin
[i
] = FALSE
;
3119 /* We need the total for normalization */
3121 for(i
=range
[0]; i
<range
[1]; i
++)
3123 if (root
->bCellMin
[i
] == FALSE
)
3125 fac
+= cell_size
[i
];
3128 fac
= ( region_size
- nmin
*cellsize_limit_f
)/fac
; /* substracting cells already set to cellsize_limit_f */
3129 /* Determine the cell boundaries */
3130 for(i
=range
[0]; i
<range
[1]; i
++)
3132 if (root
->bCellMin
[i
] == FALSE
)
3134 cell_size
[i
] *= fac
;
3135 if (!bPBC
&& (i
== 0 || i
== dd
->nc
[dim
] -1))
3137 cellsize_limit_f_i
= 0;
3141 cellsize_limit_f_i
= cellsize_limit_f
;
3143 if (cell_size
[i
] < cellsize_limit_f_i
)
3145 root
->bCellMin
[i
] = TRUE
;
3146 cell_size
[i
] = cellsize_limit_f_i
;
3150 root
->cell_f
[i
+1] = root
->cell_f
[i
] + cell_size
[i
];
3153 while (nmin
> nmin_old
);
3156 cell_size
[i
] = root
->cell_f
[i
+1] - root
->cell_f
[i
];
3157 /* For this check we should not use DD_CELL_MARGIN,
3158 * but a slightly smaller factor,
3159 * since rounding could get use below the limit.
3161 if (bPBC
&& cell_size
[i
] < cellsize_limit_f
*DD_CELL_MARGIN2
/DD_CELL_MARGIN
)
3164 gmx_fatal(FARGS
,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3165 gmx_step_str(step
,buf
),
3166 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
3167 ncd
,comm
->cellsize_min
[dim
]);
3170 root
->bLimited
= (nmin
> 0) || (range
[0]>0) || (range
[1]<ncd
);
3174 /* Check if the boundary did not displace more than halfway
3175 * each of the cells it bounds, as this could cause problems,
3176 * especially when the differences between cell sizes are large.
3177 * If changes are applied, they will not make cells smaller
3178 * than the cut-off, as we check all the boundaries which
3179 * might be affected by a change and if the old state was ok,
3180 * the cells will at most be shrunk back to their old size.
3182 for(i
=range
[0]+1; i
<range
[1]; i
++)
3184 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
-1]);
3185 if (root
->cell_f
[i
] < halfway
)
3187 root
->cell_f
[i
] = halfway
;
3188 /* Check if the change also causes shifts of the next boundaries */
3189 for(j
=i
+1; j
<range
[1]; j
++)
3191 if (root
->cell_f
[j
] < root
->cell_f
[j
-1] + cellsize_limit_f
)
3192 root
->cell_f
[j
] = root
->cell_f
[j
-1] + cellsize_limit_f
;
3195 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
+1]);
3196 if (root
->cell_f
[i
] > halfway
)
3198 root
->cell_f
[i
] = halfway
;
3199 /* Check if the change also causes shifts of the next boundaries */
3200 for(j
=i
-1; j
>=range
[0]+1; j
--)
3202 if (root
->cell_f
[j
] > root
->cell_f
[j
+1] - cellsize_limit_f
)
3203 root
->cell_f
[j
] = root
->cell_f
[j
+1] - cellsize_limit_f
;
3209 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3210 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3211 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3212 * for a and b nrange is used */
3215 /* Take care of the staggering of the cell boundaries */
3218 for(i
=range
[0]; i
<range
[1]; i
++)
3220 root
->cell_f_max0
[i
] = root
->cell_f
[i
];
3221 root
->cell_f_min1
[i
] = root
->cell_f
[i
+1];
3226 for(i
=range
[0]+1; i
<range
[1]; i
++)
3228 bLimLo
= (root
->cell_f
[i
] < root
->bound_min
[i
]);
3229 bLimHi
= (root
->cell_f
[i
] > root
->bound_max
[i
]);
3230 if (bLimLo
&& bLimHi
)
3232 /* Both limits violated, try the best we can */
3233 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3234 root
->cell_f
[i
] = 0.5*(root
->bound_min
[i
] + root
->bound_max
[i
]);
3237 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3241 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3247 /* root->cell_f[i] = root->bound_min[i]; */
3248 nrange
[1]=i
; /* only store violation location. There could be a LimLo violation following with an higher index */
3251 else if (bLimHi
&& !bLastHi
)
3254 if (nrange
[1] < range
[1]) /* found a LimLo before */
3256 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3257 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3258 nrange
[0]=nrange
[1];
3260 root
->cell_f
[i
] = root
->bound_max
[i
];
3262 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3267 if (nrange
[1] < range
[1]) /* found last a LimLo */
3269 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3270 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3271 nrange
[0]=nrange
[1];
3273 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3275 else if (nrange
[0] > range
[0]) /* found at least one LimHi */
3277 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3284 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t
*dd
,
3285 int d
,int dim
,gmx_domdec_root_t
*root
,
3286 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3287 gmx_bool bUniform
,gmx_large_int_t step
)
3289 gmx_domdec_comm_t
*comm
;
3292 real load_aver
,load_i
,imbalance
,change
,change_max
,sc
;
3293 real cellsize_limit_f
,dist_min_f
,dist_min_f_hard
,space
;
3297 int range
[] = { 0, 0 };
3301 /* Convert the maximum change from the input percentage to a fraction */
3302 change_limit
= comm
->dlb_scale_lim
*0.01;
3306 bPBC
= (dim
< ddbox
->npbcdim
);
3308 cell_size
= root
->buf_ncd
;
3310 /* Store the original boundaries */
3311 for(i
=0; i
<ncd
+1; i
++)
3313 root
->old_cell_f
[i
] = root
->cell_f
[i
];
3316 for(i
=0; i
<ncd
; i
++)
3318 cell_size
[i
] = 1.0/ncd
;
3321 else if (dd_load_count(comm
))
3323 load_aver
= comm
->load
[d
].sum_m
/ncd
;
3325 for(i
=0; i
<ncd
; i
++)
3327 /* Determine the relative imbalance of cell i */
3328 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3329 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3330 /* Determine the change of the cell size using underrelaxation */
3331 change
= -relax
*imbalance
;
3332 change_max
= max(change_max
,max(change
,-change
));
3334 /* Limit the amount of scaling.
3335 * We need to use the same rescaling for all cells in one row,
3336 * otherwise the load balancing might not converge.
3339 if (change_max
> change_limit
)
3341 sc
*= change_limit
/change_max
;
3343 for(i
=0; i
<ncd
; i
++)
3345 /* Determine the relative imbalance of cell i */
3346 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3347 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3348 /* Determine the change of the cell size using underrelaxation */
3349 change
= -sc
*imbalance
;
3350 cell_size
[i
] = (root
->cell_f
[i
+1]-root
->cell_f
[i
])*(1 + change
);
3354 cellsize_limit_f
= comm
->cellsize_min
[dim
]/ddbox
->box_size
[dim
];
3355 cellsize_limit_f
*= DD_CELL_MARGIN
;
3356 dist_min_f_hard
= grid_jump_limit(comm
,comm
->cutoff
,d
)/ddbox
->box_size
[dim
];
3357 dist_min_f
= dist_min_f_hard
* DD_CELL_MARGIN
;
3358 if (ddbox
->tric_dir
[dim
])
3360 cellsize_limit_f
/= ddbox
->skew_fac
[dim
];
3361 dist_min_f
/= ddbox
->skew_fac
[dim
];
3363 if (bDynamicBox
&& d
> 0)
3365 dist_min_f
*= DD_PRES_SCALE_MARGIN
;
3367 if (d
> 0 && !bUniform
)
3369 /* Make sure that the grid is not shifted too much */
3370 for(i
=1; i
<ncd
; i
++) {
3371 if (root
->cell_f_min1
[i
] - root
->cell_f_max0
[i
-1] < 2 * dist_min_f_hard
)
3373 gmx_incons("Inconsistent DD boundary staggering limits!");
3375 root
->bound_min
[i
] = root
->cell_f_max0
[i
-1] + dist_min_f
;
3376 space
= root
->cell_f
[i
] - (root
->cell_f_max0
[i
-1] + dist_min_f
);
3378 root
->bound_min
[i
] += 0.5*space
;
3380 root
->bound_max
[i
] = root
->cell_f_min1
[i
] - dist_min_f
;
3381 space
= root
->cell_f
[i
] - (root
->cell_f_min1
[i
] - dist_min_f
);
3383 root
->bound_max
[i
] += 0.5*space
;
3388 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3390 root
->cell_f_max0
[i
-1] + dist_min_f
,
3391 root
->bound_min
[i
],root
->cell_f
[i
],root
->bound_max
[i
],
3392 root
->cell_f_min1
[i
] - dist_min_f
);
3397 root
->cell_f
[0] = 0;
3398 root
->cell_f
[ncd
] = 1;
3399 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, range
);
3402 /* After the checks above, the cells should obey the cut-off
3403 * restrictions, but it does not hurt to check.
3405 for(i
=0; i
<ncd
; i
++)
3409 fprintf(debug
,"Relative bounds dim %d cell %d: %f %f\n",
3410 dim
,i
,root
->cell_f
[i
],root
->cell_f
[i
+1]);
3413 if ((bPBC
|| (i
!= 0 && i
!= dd
->nc
[dim
]-1)) &&
3414 root
->cell_f
[i
+1] - root
->cell_f
[i
] <
3415 cellsize_limit_f
/DD_CELL_MARGIN
)
3419 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3420 gmx_step_str(step
,buf
),dim2char(dim
),i
,
3421 (root
->cell_f
[i
+1] - root
->cell_f
[i
])
3422 *ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]);
3427 /* Store the cell boundaries of the lower dimensions at the end */
3428 for(d1
=0; d1
<d
; d1
++)
3430 root
->cell_f
[pos
++] = comm
->cell_f0
[d1
];
3431 root
->cell_f
[pos
++] = comm
->cell_f1
[d1
];
3434 if (d
< comm
->npmedecompdim
)
3436 /* The master determines the maximum shift for
3437 * the coordinate communication between separate PME nodes.
3439 set_pme_maxshift(dd
,&comm
->ddpme
[d
],bUniform
,ddbox
,root
->cell_f
);
3441 root
->cell_f
[pos
++] = comm
->ddpme
[0].maxshift
;
3444 root
->cell_f
[pos
++] = comm
->ddpme
[1].maxshift
;
3448 static void relative_to_absolute_cell_bounds(gmx_domdec_t
*dd
,
3449 gmx_ddbox_t
*ddbox
,int dimind
)
3451 gmx_domdec_comm_t
*comm
;
3456 /* Set the cell dimensions */
3457 dim
= dd
->dim
[dimind
];
3458 comm
->cell_x0
[dim
] = comm
->cell_f0
[dimind
]*ddbox
->box_size
[dim
];
3459 comm
->cell_x1
[dim
] = comm
->cell_f1
[dimind
]*ddbox
->box_size
[dim
];
3460 if (dim
>= ddbox
->nboundeddim
)
3462 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3463 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3467 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3468 int d
,int dim
,real
*cell_f_row
,
3471 gmx_domdec_comm_t
*comm
;
3477 /* Each node would only need to know two fractions,
3478 * but it is probably cheaper to broadcast the whole array.
3480 MPI_Bcast(cell_f_row
,DD_CELL_F_SIZE(dd
,d
)*sizeof(real
),MPI_BYTE
,
3481 0,comm
->mpi_comm_load
[d
]);
3483 /* Copy the fractions for this dimension from the buffer */
3484 comm
->cell_f0
[d
] = cell_f_row
[dd
->ci
[dim
] ];
3485 comm
->cell_f1
[d
] = cell_f_row
[dd
->ci
[dim
]+1];
3486 /* The whole array was communicated, so set the buffer position */
3487 pos
= dd
->nc
[dim
] + 1;
3488 for(d1
=0; d1
<=d
; d1
++)
3492 /* Copy the cell fractions of the lower dimensions */
3493 comm
->cell_f0
[d1
] = cell_f_row
[pos
++];
3494 comm
->cell_f1
[d1
] = cell_f_row
[pos
++];
3496 relative_to_absolute_cell_bounds(dd
,ddbox
,d1
);
3498 /* Convert the communicated shift from float to int */
3499 comm
->ddpme
[0].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3502 comm
->ddpme
[1].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3506 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t
*dd
,
3507 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3508 gmx_bool bUniform
,gmx_large_int_t step
)
3510 gmx_domdec_comm_t
*comm
;
3512 gmx_bool bRowMember
,bRowRoot
;
3517 for(d
=0; d
<dd
->ndim
; d
++)
3522 for(d1
=d
; d1
<dd
->ndim
; d1
++)
3524 if (dd
->ci
[dd
->dim
[d1
]] > 0)
3537 set_dd_cell_sizes_dlb_root(dd
,d
,dim
,comm
->root
[d
],
3538 ddbox
,bDynamicBox
,bUniform
,step
);
3539 cell_f_row
= comm
->root
[d
]->cell_f
;
3543 cell_f_row
= comm
->cell_f_row
;
3545 distribute_dd_cell_sizes_dlb(dd
,d
,dim
,cell_f_row
,ddbox
);
3550 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
3554 /* This function assumes the box is static and should therefore
3555 * not be called when the box has changed since the last
3556 * call to dd_partition_system.
3558 for(d
=0; d
<dd
->ndim
; d
++)
3560 relative_to_absolute_cell_bounds(dd
,ddbox
,d
);
3566 static void set_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3567 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3568 gmx_bool bUniform
,gmx_bool bDoDLB
,gmx_large_int_t step
,
3569 gmx_wallcycle_t wcycle
)
3571 gmx_domdec_comm_t
*comm
;
3578 wallcycle_start(wcycle
,ewcDDCOMMBOUND
);
3579 set_dd_cell_sizes_dlb_change(dd
,ddbox
,bDynamicBox
,bUniform
,step
);
3580 wallcycle_stop(wcycle
,ewcDDCOMMBOUND
);
3582 else if (bDynamicBox
)
3584 set_dd_cell_sizes_dlb_nochange(dd
,ddbox
);
3587 /* Set the dimensions for which no DD is used */
3588 for(dim
=0; dim
<DIM
; dim
++) {
3589 if (dd
->nc
[dim
] == 1) {
3590 comm
->cell_x0
[dim
] = 0;
3591 comm
->cell_x1
[dim
] = ddbox
->box_size
[dim
];
3592 if (dim
>= ddbox
->nboundeddim
)
3594 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3595 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3601 static void realloc_comm_ind(gmx_domdec_t
*dd
,ivec npulse
)
3604 gmx_domdec_comm_dim_t
*cd
;
3606 for(d
=0; d
<dd
->ndim
; d
++)
3608 cd
= &dd
->comm
->cd
[d
];
3609 np
= npulse
[dd
->dim
[d
]];
3610 if (np
> cd
->np_nalloc
)
3614 fprintf(debug
,"(Re)allocing cd for %c to %d pulses\n",
3615 dim2char(dd
->dim
[d
]),np
);
3617 if (DDMASTER(dd
) && cd
->np_nalloc
> 0)
3619 fprintf(stderr
,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd
->dim
[d
]),np
);
3622 for(i
=cd
->np_nalloc
; i
<np
; i
++)
3624 cd
->ind
[i
].index
= NULL
;
3625 cd
->ind
[i
].nalloc
= 0;
3634 static void set_dd_cell_sizes(gmx_domdec_t
*dd
,
3635 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3636 gmx_bool bUniform
,gmx_bool bDoDLB
,gmx_large_int_t step
,
3637 gmx_wallcycle_t wcycle
)
3639 gmx_domdec_comm_t
*comm
;
3645 /* Copy the old cell boundaries for the cg displacement check */
3646 copy_rvec(comm
->cell_x0
,comm
->old_cell_x0
);
3647 copy_rvec(comm
->cell_x1
,comm
->old_cell_x1
);
3649 if (comm
->bDynLoadBal
)
3653 check_box_size(dd
,ddbox
);
3655 set_dd_cell_sizes_dlb(dd
,ddbox
,bDynamicBox
,bUniform
,bDoDLB
,step
,wcycle
);
3659 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,npulse
);
3660 realloc_comm_ind(dd
,npulse
);
3665 for(d
=0; d
<DIM
; d
++)
3667 fprintf(debug
,"cell_x[%d] %f - %f skew_fac %f\n",
3668 d
,comm
->cell_x0
[d
],comm
->cell_x1
[d
],ddbox
->skew_fac
[d
]);
3673 static void comm_dd_ns_cell_sizes(gmx_domdec_t
*dd
,
3675 rvec cell_ns_x0
,rvec cell_ns_x1
,
3676 gmx_large_int_t step
)
3678 gmx_domdec_comm_t
*comm
;
3683 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
3685 dim
= dd
->dim
[dim_ind
];
3687 /* Without PBC we don't have restrictions on the outer cells */
3688 if (!(dim
>= ddbox
->npbcdim
&&
3689 (dd
->ci
[dim
] == 0 || dd
->ci
[dim
] == dd
->nc
[dim
] - 1)) &&
3690 comm
->bDynLoadBal
&&
3691 (comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
])*ddbox
->skew_fac
[dim
] <
3692 comm
->cellsize_min
[dim
])
3695 gmx_fatal(FARGS
,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3696 gmx_step_str(step
,buf
),dim2char(dim
),
3697 comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
],
3698 ddbox
->skew_fac
[dim
],
3699 dd
->comm
->cellsize_min
[dim
],
3700 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
3704 if ((dd
->bGridJump
&& dd
->ndim
> 1) || ddbox
->nboundeddim
< DIM
)
3706 /* Communicate the boundaries and update cell_ns_x0/1 */
3707 dd_move_cellx(dd
,ddbox
,cell_ns_x0
,cell_ns_x1
);
3708 if (dd
->bGridJump
&& dd
->ndim
> 1)
3710 check_grid_jump(step
,dd
,dd
->comm
->cutoff
,ddbox
,TRUE
);
3715 static void make_tric_corr_matrix(int npbcdim
,matrix box
,matrix tcm
)
3719 tcm
[YY
][XX
] = -box
[YY
][XX
]/box
[YY
][YY
];
3727 tcm
[ZZ
][XX
] = -(box
[ZZ
][YY
]*tcm
[YY
][XX
] + box
[ZZ
][XX
])/box
[ZZ
][ZZ
];
3728 tcm
[ZZ
][YY
] = -box
[ZZ
][YY
]/box
[ZZ
][ZZ
];
3737 static void check_screw_box(matrix box
)
3739 /* Mathematical limitation */
3740 if (box
[YY
][XX
] != 0 || box
[ZZ
][XX
] != 0)
3742 gmx_fatal(FARGS
,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3745 /* Limitation due to the asymmetry of the eighth shell method */
3746 if (box
[ZZ
][YY
] != 0)
3748 gmx_fatal(FARGS
,"pbc=screw with non-zero box_zy is not supported");
3752 static void distribute_cg(FILE *fplog
,gmx_large_int_t step
,
3753 matrix box
,ivec tric_dir
,t_block
*cgs
,rvec pos
[],
3756 gmx_domdec_master_t
*ma
;
3757 int **tmp_ind
=NULL
,*tmp_nalloc
=NULL
;
3758 int i
,icg
,j
,k
,k0
,k1
,d
,npbcdim
;
3760 rvec box_size
,cg_cm
;
3762 real nrcg
,inv_ncg
,pos_d
;
3764 gmx_bool bUnbounded
,bScrew
;
3768 if (tmp_ind
== NULL
)
3770 snew(tmp_nalloc
,dd
->nnodes
);
3771 snew(tmp_ind
,dd
->nnodes
);
3772 for(i
=0; i
<dd
->nnodes
; i
++)
3774 tmp_nalloc
[i
] = over_alloc_large(cgs
->nr
/dd
->nnodes
+1);
3775 snew(tmp_ind
[i
],tmp_nalloc
[i
]);
3779 /* Clear the count */
3780 for(i
=0; i
<dd
->nnodes
; i
++)
3786 make_tric_corr_matrix(dd
->npbcdim
,box
,tcm
);
3788 cgindex
= cgs
->index
;
3790 /* Compute the center of geometry for all charge groups */
3791 for(icg
=0; icg
<cgs
->nr
; icg
++)
3794 k1
= cgindex
[icg
+1];
3798 copy_rvec(pos
[k0
],cg_cm
);
3805 for(k
=k0
; (k
<k1
); k
++)
3807 rvec_inc(cg_cm
,pos
[k
]);
3809 for(d
=0; (d
<DIM
); d
++)
3811 cg_cm
[d
] *= inv_ncg
;
3814 /* Put the charge group in the box and determine the cell index */
3815 for(d
=DIM
-1; d
>=0; d
--) {
3817 if (d
< dd
->npbcdim
)
3819 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
3820 if (tric_dir
[d
] && dd
->nc
[d
] > 1)
3822 /* Use triclinic coordintates for this dimension */
3823 for(j
=d
+1; j
<DIM
; j
++)
3825 pos_d
+= cg_cm
[j
]*tcm
[j
][d
];
3828 while(pos_d
>= box
[d
][d
])
3831 rvec_dec(cg_cm
,box
[d
]);
3834 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3835 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3837 for(k
=k0
; (k
<k1
); k
++)
3839 rvec_dec(pos
[k
],box
[d
]);
3842 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3843 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3850 rvec_inc(cg_cm
,box
[d
]);
3853 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3854 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3856 for(k
=k0
; (k
<k1
); k
++)
3858 rvec_inc(pos
[k
],box
[d
]);
3860 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3861 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3866 /* This could be done more efficiently */
3868 while(ind
[d
]+1 < dd
->nc
[d
] && pos_d
>= ma
->cell_x
[d
][ind
[d
]+1])
3873 i
= dd_index(dd
->nc
,ind
);
3874 if (ma
->ncg
[i
] == tmp_nalloc
[i
])
3876 tmp_nalloc
[i
] = over_alloc_large(ma
->ncg
[i
]+1);
3877 srenew(tmp_ind
[i
],tmp_nalloc
[i
]);
3879 tmp_ind
[i
][ma
->ncg
[i
]] = icg
;
3881 ma
->nat
[i
] += cgindex
[icg
+1] - cgindex
[icg
];
3885 for(i
=0; i
<dd
->nnodes
; i
++)
3888 for(k
=0; k
<ma
->ncg
[i
]; k
++)
3890 ma
->cg
[k1
++] = tmp_ind
[i
][k
];
3893 ma
->index
[dd
->nnodes
] = k1
;
3895 for(i
=0; i
<dd
->nnodes
; i
++)
3905 fprintf(fplog
,"Charge group distribution at step %s:",
3906 gmx_step_str(step
,buf
));
3907 for(i
=0; i
<dd
->nnodes
; i
++)
3909 fprintf(fplog
," %d",ma
->ncg
[i
]);
3911 fprintf(fplog
,"\n");
3915 static void get_cg_distribution(FILE *fplog
,gmx_large_int_t step
,gmx_domdec_t
*dd
,
3916 t_block
*cgs
,matrix box
,gmx_ddbox_t
*ddbox
,
3919 gmx_domdec_master_t
*ma
=NULL
;
3922 int *ibuf
,buf2
[2] = { 0, 0 };
3923 gmx_bool bMaster
= DDMASTER(dd
);
3930 check_screw_box(box
);
3933 set_dd_cell_sizes_slb(dd
,ddbox
,TRUE
,npulse
);
3935 distribute_cg(fplog
,step
,box
,ddbox
->tric_dir
,cgs
,pos
,dd
);
3936 for(i
=0; i
<dd
->nnodes
; i
++)
3938 ma
->ibuf
[2*i
] = ma
->ncg
[i
];
3939 ma
->ibuf
[2*i
+1] = ma
->nat
[i
];
3947 dd_scatter(dd
,2*sizeof(int),ibuf
,buf2
);
3949 dd
->ncg_home
= buf2
[0];
3950 dd
->nat_home
= buf2
[1];
3951 dd
->ncg_tot
= dd
->ncg_home
;
3952 dd
->nat_tot
= dd
->nat_home
;
3953 if (dd
->ncg_home
> dd
->cg_nalloc
|| dd
->cg_nalloc
== 0)
3955 dd
->cg_nalloc
= over_alloc_dd(dd
->ncg_home
);
3956 srenew(dd
->index_gl
,dd
->cg_nalloc
);
3957 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
3961 for(i
=0; i
<dd
->nnodes
; i
++)
3963 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
3964 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
3969 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
3970 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
3971 DDMASTER(dd
) ? ma
->cg
: NULL
,
3972 dd
->ncg_home
*sizeof(int),dd
->index_gl
);
3974 /* Determine the home charge group sizes */
3976 for(i
=0; i
<dd
->ncg_home
; i
++)
3978 cg_gl
= dd
->index_gl
[i
];
3980 dd
->cgindex
[i
] + cgs
->index
[cg_gl
+1] - cgs
->index
[cg_gl
];
3985 fprintf(debug
,"Home charge groups:\n");
3986 for(i
=0; i
<dd
->ncg_home
; i
++)
3988 fprintf(debug
," %d",dd
->index_gl
[i
]);
3990 fprintf(debug
,"\n");
3992 fprintf(debug
,"\n");
3996 static int compact_and_copy_vec_at(int ncg
,int *move
,
3999 rvec
*src
,gmx_domdec_comm_t
*comm
,
4002 int m
,icg
,i
,i0
,i1
,nrcg
;
4008 for(m
=0; m
<DIM
*2; m
++)
4014 for(icg
=0; icg
<ncg
; icg
++)
4016 i1
= cgindex
[icg
+1];
4022 /* Compact the home array in place */
4023 for(i
=i0
; i
<i1
; i
++)
4025 copy_rvec(src
[i
],src
[home_pos
++]);
4031 /* Copy to the communication buffer */
4033 pos_vec
[m
] += 1 + vec
*nrcg
;
4034 for(i
=i0
; i
<i1
; i
++)
4036 copy_rvec(src
[i
],comm
->cgcm_state
[m
][pos_vec
[m
]++]);
4038 pos_vec
[m
] += (nvec
- vec
- 1)*nrcg
;
4042 home_pos
+= i1
- i0
;
4050 static int compact_and_copy_vec_cg(int ncg
,int *move
,
4052 int nvec
,rvec
*src
,gmx_domdec_comm_t
*comm
,
4055 int m
,icg
,i0
,i1
,nrcg
;
4061 for(m
=0; m
<DIM
*2; m
++)
4067 for(icg
=0; icg
<ncg
; icg
++)
4069 i1
= cgindex
[icg
+1];
4075 /* Compact the home array in place */
4076 copy_rvec(src
[icg
],src
[home_pos
++]);
4082 /* Copy to the communication buffer */
4083 copy_rvec(src
[icg
],comm
->cgcm_state
[m
][pos_vec
[m
]]);
4084 pos_vec
[m
] += 1 + nrcg
*nvec
;
4096 static int compact_ind(int ncg
,int *move
,
4097 int *index_gl
,int *cgindex
,
4099 gmx_ga2la_t ga2la
,char *bLocalCG
,
4102 int cg
,nat
,a0
,a1
,a
,a_gl
;
4107 for(cg
=0; cg
<ncg
; cg
++)
4113 /* Compact the home arrays in place.
4114 * Anything that can be done here avoids access to global arrays.
4116 cgindex
[home_pos
] = nat
;
4117 for(a
=a0
; a
<a1
; a
++)
4120 gatindex
[nat
] = a_gl
;
4121 /* The cell number stays 0, so we don't need to set it */
4122 ga2la_change_la(ga2la
,a_gl
,nat
);
4125 index_gl
[home_pos
] = index_gl
[cg
];
4126 cginfo
[home_pos
] = cginfo
[cg
];
4127 /* The charge group remains local, so bLocalCG does not change */
4132 /* Clear the global indices */
4133 for(a
=a0
; a
<a1
; a
++)
4135 ga2la_del(ga2la
,gatindex
[a
]);
4139 bLocalCG
[index_gl
[cg
]] = FALSE
;
4143 cgindex
[home_pos
] = nat
;
4148 static void clear_and_mark_ind(int ncg
,int *move
,
4149 int *index_gl
,int *cgindex
,int *gatindex
,
4150 gmx_ga2la_t ga2la
,char *bLocalCG
,
4155 for(cg
=0; cg
<ncg
; cg
++)
4161 /* Clear the global indices */
4162 for(a
=a0
; a
<a1
; a
++)
4164 ga2la_del(ga2la
,gatindex
[a
]);
4168 bLocalCG
[index_gl
[cg
]] = FALSE
;
4170 /* Signal that this cg has moved using the ns cell index.
4171 * Here we set it to -1. fill_grid will change it
4172 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4174 cell_index
[cg
] = -1;
4179 static void print_cg_move(FILE *fplog
,
4181 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4182 gmx_bool bHaveLimitdAndCMOld
,real limitd
,
4183 rvec cm_old
,rvec cm_new
,real pos_d
)
4185 gmx_domdec_comm_t
*comm
;
4190 fprintf(fplog
,"\nStep %s:\n",gmx_step_str(step
,buf
));
4191 if (bHaveLimitdAndCMOld
)
4193 fprintf(fplog
,"The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4194 ddglatnr(dd
,dd
->cgindex
[cg
]),limitd
,dim2char(dim
));
4198 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4199 ddglatnr(dd
,dd
->cgindex
[cg
]),dim2char(dim
));
4201 fprintf(fplog
,"distance out of cell %f\n",
4202 dir
==1 ? pos_d
- comm
->cell_x1
[dim
] : pos_d
- comm
->cell_x0
[dim
]);
4203 if (bHaveLimitdAndCMOld
)
4205 fprintf(fplog
,"Old coordinates: %8.3f %8.3f %8.3f\n",
4206 cm_old
[XX
],cm_old
[YY
],cm_old
[ZZ
]);
4208 fprintf(fplog
,"New coordinates: %8.3f %8.3f %8.3f\n",
4209 cm_new
[XX
],cm_new
[YY
],cm_new
[ZZ
]);
4210 fprintf(fplog
,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4212 comm
->old_cell_x0
[dim
],comm
->old_cell_x1
[dim
]);
4213 fprintf(fplog
,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4215 comm
->cell_x0
[dim
],comm
->cell_x1
[dim
]);
4218 static void cg_move_error(FILE *fplog
,
4220 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4221 gmx_bool bHaveLimitdAndCMOld
,real limitd
,
4222 rvec cm_old
,rvec cm_new
,real pos_d
)
4226 print_cg_move(fplog
, dd
,step
,cg
,dim
,dir
,
4227 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4229 print_cg_move(stderr
,dd
,step
,cg
,dim
,dir
,
4230 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4232 "A charge group moved too far between two domain decomposition steps\n"
4233 "This usually means that your system is not well equilibrated");
4236 static void rotate_state_atom(t_state
*state
,int a
)
4240 for(est
=0; est
<estNR
; est
++)
4242 if (EST_DISTR(est
) && (state
->flags
& (1<<est
))) {
4245 /* Rotate the complete state; for a rectangular box only */
4246 state
->x
[a
][YY
] = state
->box
[YY
][YY
] - state
->x
[a
][YY
];
4247 state
->x
[a
][ZZ
] = state
->box
[ZZ
][ZZ
] - state
->x
[a
][ZZ
];
4250 state
->v
[a
][YY
] = -state
->v
[a
][YY
];
4251 state
->v
[a
][ZZ
] = -state
->v
[a
][ZZ
];
4254 state
->sd_X
[a
][YY
] = -state
->sd_X
[a
][YY
];
4255 state
->sd_X
[a
][ZZ
] = -state
->sd_X
[a
][ZZ
];
4258 state
->cg_p
[a
][YY
] = -state
->cg_p
[a
][YY
];
4259 state
->cg_p
[a
][ZZ
] = -state
->cg_p
[a
][ZZ
];
4261 case estDISRE_INITF
:
4262 case estDISRE_RM3TAV
:
4263 case estORIRE_INITF
:
4265 /* These are distances, so not affected by rotation */
4268 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4274 static int *get_moved(gmx_domdec_comm_t
*comm
,int natoms
)
4276 if (natoms
> comm
->moved_nalloc
)
4278 /* Contents should be preserved here */
4279 comm
->moved_nalloc
= over_alloc_dd(natoms
);
4280 srenew(comm
->moved
,comm
->moved_nalloc
);
4286 static void calc_cg_move(FILE *fplog
,gmx_large_int_t step
,
4289 ivec tric_dir
,matrix tcm
,
4290 rvec cell_x0
,rvec cell_x1
,
4291 rvec limitd
,rvec limit0
,rvec limit1
,
4293 int cg_start
,int cg_end
,
4298 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4299 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4306 npbcdim
= dd
->npbcdim
;
4308 for(cg
=cg_start
; cg
<cg_end
; cg
++)
4315 copy_rvec(state
->x
[k0
],cm_new
);
4322 for(k
=k0
; (k
<k1
); k
++)
4324 rvec_inc(cm_new
,state
->x
[k
]);
4326 for(d
=0; (d
<DIM
); d
++)
4328 cm_new
[d
] = inv_ncg
*cm_new
[d
];
4333 /* Do pbc and check DD cell boundary crossings */
4334 for(d
=DIM
-1; d
>=0; d
--)
4338 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
4339 /* Determine the location of this cg in lattice coordinates */
4343 for(d2
=d
+1; d2
<DIM
; d2
++)
4345 pos_d
+= cm_new
[d2
]*tcm
[d2
][d
];
4348 /* Put the charge group in the triclinic unit-cell */
4349 if (pos_d
>= cell_x1
[d
])
4351 if (pos_d
>= limit1
[d
])
4353 cg_move_error(fplog
,dd
,step
,cg
,d
,1,TRUE
,limitd
[d
],
4354 cg_cm
[cg
],cm_new
,pos_d
);
4357 if (dd
->ci
[d
] == dd
->nc
[d
] - 1)
4359 rvec_dec(cm_new
,state
->box
[d
]);
4362 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4363 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4365 for(k
=k0
; (k
<k1
); k
++)
4367 rvec_dec(state
->x
[k
],state
->box
[d
]);
4370 rotate_state_atom(state
,k
);
4375 else if (pos_d
< cell_x0
[d
])
4377 if (pos_d
< limit0
[d
])
4379 cg_move_error(fplog
,dd
,step
,cg
,d
,-1,TRUE
,limitd
[d
],
4380 cg_cm
[cg
],cm_new
,pos_d
);
4385 rvec_inc(cm_new
,state
->box
[d
]);
4388 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4389 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4391 for(k
=k0
; (k
<k1
); k
++)
4393 rvec_inc(state
->x
[k
],state
->box
[d
]);
4396 rotate_state_atom(state
,k
);
4402 else if (d
< npbcdim
)
4404 /* Put the charge group in the rectangular unit-cell */
4405 while (cm_new
[d
] >= state
->box
[d
][d
])
4407 rvec_dec(cm_new
,state
->box
[d
]);
4408 for(k
=k0
; (k
<k1
); k
++)
4410 rvec_dec(state
->x
[k
],state
->box
[d
]);
4413 while (cm_new
[d
] < 0)
4415 rvec_inc(cm_new
,state
->box
[d
]);
4416 for(k
=k0
; (k
<k1
); k
++)
4418 rvec_inc(state
->x
[k
],state
->box
[d
]);
4424 copy_rvec(cm_new
,cg_cm
[cg
]);
4426 /* Determine where this cg should go */
4429 for(d
=0; d
<dd
->ndim
; d
++)
4434 flag
|= DD_FLAG_FW(d
);
4440 else if (dev
[dim
] == -1)
4442 flag
|= DD_FLAG_BW(d
);
4444 if (dd
->nc
[dim
] > 2)
4455 /* Temporarily store the flag in move */
4456 move
[cg
] = mc
+ flag
;
4460 static void dd_redistribute_cg(FILE *fplog
,gmx_large_int_t step
,
4461 gmx_domdec_t
*dd
,ivec tric_dir
,
4462 t_state
*state
,rvec
**f
,
4463 t_forcerec
*fr
,t_mdatoms
*md
,
4471 int ncg
[DIM
*2],nat
[DIM
*2];
4472 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4473 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4474 int sbuf
[2],rbuf
[2];
4475 int home_pos_cg
,home_pos_at
,buf_pos
;
4477 gmx_bool bV
=FALSE
,bSDX
=FALSE
,bCGP
=FALSE
;
4482 rvec
*cg_cm
=NULL
,cell_x0
,cell_x1
,limitd
,limit0
,limit1
,cm_new
;
4484 cginfo_mb_t
*cginfo_mb
;
4485 gmx_domdec_comm_t
*comm
;
4491 check_screw_box(state
->box
);
4495 if (fr
->cutoff_scheme
== ecutsGROUP
)
4500 for(i
=0; i
<estNR
; i
++)
4506 case estX
: /* Always present */ break;
4507 case estV
: bV
= (state
->flags
& (1<<i
)); break;
4508 case estSDX
: bSDX
= (state
->flags
& (1<<i
)); break;
4509 case estCGP
: bCGP
= (state
->flags
& (1<<i
)); break;
4512 case estDISRE_INITF
:
4513 case estDISRE_RM3TAV
:
4514 case estORIRE_INITF
:
4516 /* No processing required */
4519 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4524 if (dd
->ncg_tot
> comm
->nalloc_int
)
4526 comm
->nalloc_int
= over_alloc_dd(dd
->ncg_tot
);
4527 srenew(comm
->buf_int
,comm
->nalloc_int
);
4529 move
= comm
->buf_int
;
4531 /* Clear the count */
4532 for(c
=0; c
<dd
->ndim
*2; c
++)
4538 npbcdim
= dd
->npbcdim
;
4540 for(d
=0; (d
<DIM
); d
++)
4542 limitd
[d
] = dd
->comm
->cellsize_min
[d
];
4543 if (d
>= npbcdim
&& dd
->ci
[d
] == 0)
4545 cell_x0
[d
] = -GMX_FLOAT_MAX
;
4549 cell_x0
[d
] = comm
->cell_x0
[d
];
4551 if (d
>= npbcdim
&& dd
->ci
[d
] == dd
->nc
[d
] - 1)
4553 cell_x1
[d
] = GMX_FLOAT_MAX
;
4557 cell_x1
[d
] = comm
->cell_x1
[d
];
4561 limit0
[d
] = comm
->old_cell_x0
[d
] - limitd
[d
];
4562 limit1
[d
] = comm
->old_cell_x1
[d
] + limitd
[d
];
4566 /* We check after communication if a charge group moved
4567 * more than one cell. Set the pre-comm check limit to float_max.
4569 limit0
[d
] = -GMX_FLOAT_MAX
;
4570 limit1
[d
] = GMX_FLOAT_MAX
;
4574 make_tric_corr_matrix(npbcdim
,state
->box
,tcm
);
4576 cgindex
= dd
->cgindex
;
4578 nthread
= gmx_omp_nthreads_get(emntDomdec
);
4580 /* Compute the center of geometry for all home charge groups
4581 * and put them in the box and determine where they should go.
4583 #pragma omp parallel for num_threads(nthread) schedule(static)
4584 for(thread
=0; thread
<nthread
; thread
++)
4586 calc_cg_move(fplog
,step
,dd
,state
,tric_dir
,tcm
,
4587 cell_x0
,cell_x1
,limitd
,limit0
,limit1
,
4589 ( thread
*dd
->ncg_home
)/nthread
,
4590 ((thread
+1)*dd
->ncg_home
)/nthread
,
4591 fr
->cutoff_scheme
==ecutsGROUP
? cg_cm
: state
->x
,
4595 for(cg
=0; cg
<dd
->ncg_home
; cg
++)
4600 flag
= mc
& ~DD_FLAG_NRCG
;
4601 mc
= mc
& DD_FLAG_NRCG
;
4604 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4606 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4607 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4609 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
] = dd
->index_gl
[cg
];
4610 /* We store the cg size in the lower 16 bits
4611 * and the place where the charge group should go
4612 * in the next 6 bits. This saves some communication volume.
4614 nrcg
= cgindex
[cg
+1] - cgindex
[cg
];
4615 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
+1] = nrcg
| flag
;
4621 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
4622 inc_nrnb(nrnb
,eNR_RESETX
,dd
->ncg_home
);
4625 for(i
=0; i
<dd
->ndim
*2; i
++)
4627 *ncg_moved
+= ncg
[i
];
4644 /* Make sure the communication buffers are large enough */
4645 for(mc
=0; mc
<dd
->ndim
*2; mc
++)
4647 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4648 if (nvr
> comm
->cgcm_state_nalloc
[mc
])
4650 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
);
4651 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4655 switch (fr
->cutoff_scheme
)
4658 /* Recalculating cg_cm might be cheaper than communicating,
4659 * but that could give rise to rounding issues.
4662 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4663 nvec
,cg_cm
,comm
,bCompact
);
4666 /* Without charge groups we send the moved atom coordinates
4667 * over twice. This is so the code below can be used without
4668 * many conditionals for both for with and without charge groups.
4671 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4672 nvec
,state
->x
,comm
,FALSE
);
4675 home_pos_cg
-= *ncg_moved
;
4679 gmx_incons("unimplemented");
4685 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4686 nvec
,vec
++,state
->x
,comm
,bCompact
);
4689 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4690 nvec
,vec
++,state
->v
,comm
,bCompact
);
4694 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4695 nvec
,vec
++,state
->sd_X
,comm
,bCompact
);
4699 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4700 nvec
,vec
++,state
->cg_p
,comm
,bCompact
);
4705 compact_ind(dd
->ncg_home
,move
,
4706 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4707 dd
->ga2la
,comm
->bLocalCG
,
4712 if (fr
->cutoff_scheme
== ecutsVERLET
)
4714 moved
= get_moved(comm
,dd
->ncg_home
);
4716 for(k
=0; k
<dd
->ncg_home
; k
++)
4723 moved
= fr
->ns
.grid
->cell_index
;
4726 clear_and_mark_ind(dd
->ncg_home
,move
,
4727 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4728 dd
->ga2la
,comm
->bLocalCG
,
4732 cginfo_mb
= fr
->cginfo_mb
;
4734 *ncg_stay_home
= home_pos_cg
;
4735 for(d
=0; d
<dd
->ndim
; d
++)
4741 for(dir
=0; dir
<(dd
->nc
[dim
]==2 ? 1 : 2); dir
++)
4744 /* Communicate the cg and atom counts */
4749 fprintf(debug
,"Sending ddim %d dir %d: ncg %d nat %d\n",
4750 d
,dir
,sbuf
[0],sbuf
[1]);
4752 dd_sendrecv_int(dd
, d
, dir
, sbuf
, 2, rbuf
, 2);
4754 if ((ncg_recv
+rbuf
[0])*DD_CGIBS
> comm
->nalloc_int
)
4756 comm
->nalloc_int
= over_alloc_dd((ncg_recv
+rbuf
[0])*DD_CGIBS
);
4757 srenew(comm
->buf_int
,comm
->nalloc_int
);
4760 /* Communicate the charge group indices, sizes and flags */
4761 dd_sendrecv_int(dd
, d
, dir
,
4762 comm
->cggl_flag
[cdd
], sbuf
[0]*DD_CGIBS
,
4763 comm
->buf_int
+ncg_recv
*DD_CGIBS
, rbuf
[0]*DD_CGIBS
);
4765 nvs
= ncg
[cdd
] + nat
[cdd
]*nvec
;
4766 i
= rbuf
[0] + rbuf
[1] *nvec
;
4767 vec_rvec_check_alloc(&comm
->vbuf
,nvr
+i
);
4769 /* Communicate cgcm and state */
4770 dd_sendrecv_rvec(dd
, d
, dir
,
4771 comm
->cgcm_state
[cdd
], nvs
,
4772 comm
->vbuf
.v
+nvr
, i
);
4773 ncg_recv
+= rbuf
[0];
4774 nat_recv
+= rbuf
[1];
4778 /* Process the received charge groups */
4780 for(cg
=0; cg
<ncg_recv
; cg
++)
4782 flag
= comm
->buf_int
[cg
*DD_CGIBS
+1];
4784 if (dim
>= npbcdim
&& dd
->nc
[dim
] > 2)
4786 /* No pbc in this dim and more than one domain boundary.
4787 * We do a separate check if a charge group didn't move too far.
4789 if (((flag
& DD_FLAG_FW(d
)) &&
4790 comm
->vbuf
.v
[buf_pos
][dim
] > cell_x1
[dim
]) ||
4791 ((flag
& DD_FLAG_BW(d
)) &&
4792 comm
->vbuf
.v
[buf_pos
][dim
] < cell_x0
[dim
]))
4794 cg_move_error(fplog
,dd
,step
,cg
,dim
,
4795 (flag
& DD_FLAG_FW(d
)) ? 1 : 0,
4797 comm
->vbuf
.v
[buf_pos
],
4798 comm
->vbuf
.v
[buf_pos
],
4799 comm
->vbuf
.v
[buf_pos
][dim
]);
4806 /* Check which direction this cg should go */
4807 for(d2
=d
+1; (d2
<dd
->ndim
&& mc
==-1); d2
++)
4811 /* The cell boundaries for dimension d2 are not equal
4812 * for each cell row of the lower dimension(s),
4813 * therefore we might need to redetermine where
4814 * this cg should go.
4817 /* If this cg crosses the box boundary in dimension d2
4818 * we can use the communicated flag, so we do not
4819 * have to worry about pbc.
4821 if (!((dd
->ci
[dim2
] == dd
->nc
[dim2
]-1 &&
4822 (flag
& DD_FLAG_FW(d2
))) ||
4823 (dd
->ci
[dim2
] == 0 &&
4824 (flag
& DD_FLAG_BW(d2
)))))
4826 /* Clear the two flags for this dimension */
4827 flag
&= ~(DD_FLAG_FW(d2
) | DD_FLAG_BW(d2
));
4828 /* Determine the location of this cg
4829 * in lattice coordinates
4831 pos_d
= comm
->vbuf
.v
[buf_pos
][dim2
];
4834 for(d3
=dim2
+1; d3
<DIM
; d3
++)
4837 comm
->vbuf
.v
[buf_pos
][d3
]*tcm
[d3
][dim2
];
4840 /* Check of we are not at the box edge.
4841 * pbc is only handled in the first step above,
4842 * but this check could move over pbc while
4843 * the first step did not due to different rounding.
4845 if (pos_d
>= cell_x1
[dim2
] &&
4846 dd
->ci
[dim2
] != dd
->nc
[dim2
]-1)
4848 flag
|= DD_FLAG_FW(d2
);
4850 else if (pos_d
< cell_x0
[dim2
] &&
4853 flag
|= DD_FLAG_BW(d2
);
4855 comm
->buf_int
[cg
*DD_CGIBS
+1] = flag
;
4858 /* Set to which neighboring cell this cg should go */
4859 if (flag
& DD_FLAG_FW(d2
))
4863 else if (flag
& DD_FLAG_BW(d2
))
4865 if (dd
->nc
[dd
->dim
[d2
]] > 2)
4877 nrcg
= flag
& DD_FLAG_NRCG
;
4880 if (home_pos_cg
+1 > dd
->cg_nalloc
)
4882 dd
->cg_nalloc
= over_alloc_dd(home_pos_cg
+1);
4883 srenew(dd
->index_gl
,dd
->cg_nalloc
);
4884 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
4886 /* Set the global charge group index and size */
4887 dd
->index_gl
[home_pos_cg
] = comm
->buf_int
[cg
*DD_CGIBS
];
4888 dd
->cgindex
[home_pos_cg
+1] = dd
->cgindex
[home_pos_cg
] + nrcg
;
4889 /* Copy the state from the buffer */
4890 dd_check_alloc_ncg(fr
,state
,f
,home_pos_cg
+1);
4891 if (fr
->cutoff_scheme
== ecutsGROUP
)
4894 copy_rvec(comm
->vbuf
.v
[buf_pos
],cg_cm
[home_pos_cg
]);
4898 /* Set the cginfo */
4899 fr
->cginfo
[home_pos_cg
] = ddcginfo(cginfo_mb
,
4900 dd
->index_gl
[home_pos_cg
]);
4903 comm
->bLocalCG
[dd
->index_gl
[home_pos_cg
]] = TRUE
;
4906 if (home_pos_at
+nrcg
> state
->nalloc
)
4908 dd_realloc_state(state
,f
,home_pos_at
+nrcg
);
4910 for(i
=0; i
<nrcg
; i
++)
4912 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4913 state
->x
[home_pos_at
+i
]);
4917 for(i
=0; i
<nrcg
; i
++)
4919 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4920 state
->v
[home_pos_at
+i
]);
4925 for(i
=0; i
<nrcg
; i
++)
4927 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4928 state
->sd_X
[home_pos_at
+i
]);
4933 for(i
=0; i
<nrcg
; i
++)
4935 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4936 state
->cg_p
[home_pos_at
+i
]);
4940 home_pos_at
+= nrcg
;
4944 /* Reallocate the buffers if necessary */
4945 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4947 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4948 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4950 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4951 if (nvr
+ 1 + nrcg
*nvec
> comm
->cgcm_state_nalloc
[mc
])
4953 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
+ 1 + nrcg
*nvec
);
4954 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4956 /* Copy from the receive to the send buffers */
4957 memcpy(comm
->cggl_flag
[mc
] + ncg
[mc
]*DD_CGIBS
,
4958 comm
->buf_int
+ cg
*DD_CGIBS
,
4959 DD_CGIBS
*sizeof(int));
4960 memcpy(comm
->cgcm_state
[mc
][nvr
],
4961 comm
->vbuf
.v
[buf_pos
],
4962 (1+nrcg
*nvec
)*sizeof(rvec
));
4963 buf_pos
+= 1 + nrcg
*nvec
;
4970 /* With sorting (!bCompact) the indices are now only partially up to date
4971 * and ncg_home and nat_home are not the real count, since there are
4972 * "holes" in the arrays for the charge groups that moved to neighbors.
4974 if (fr
->cutoff_scheme
== ecutsVERLET
)
4976 moved
= get_moved(comm
,home_pos_cg
);
4978 for(i
=dd
->ncg_home
; i
<home_pos_cg
; i
++)
4983 dd
->ncg_home
= home_pos_cg
;
4984 dd
->nat_home
= home_pos_at
;
4989 "Finished repartitioning: cgs moved out %d, new home %d\n",
4990 *ncg_moved
,dd
->ncg_home
-*ncg_moved
);
4995 void dd_cycles_add(gmx_domdec_t
*dd
,float cycles
,int ddCycl
)
4997 dd
->comm
->cycl
[ddCycl
] += cycles
;
4998 dd
->comm
->cycl_n
[ddCycl
]++;
4999 if (cycles
> dd
->comm
->cycl_max
[ddCycl
])
5001 dd
->comm
->cycl_max
[ddCycl
] = cycles
;
5005 static double force_flop_count(t_nrnb
*nrnb
)
5012 for(i
=eNR_NBKERNEL010
; i
<eNR_NBKERNEL_FREE_ENERGY
; i
++)
5014 /* To get closer to the real timings, we half the count
5015 * for the normal loops and again half it for water loops.
5018 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
5020 sum
+= nrnb
->n
[i
]*0.25*cost_nrnb(i
);
5024 sum
+= nrnb
->n
[i
]*0.50*cost_nrnb(i
);
5027 for(i
=eNR_NBKERNEL_FREE_ENERGY
; i
<=eNR_NB14
; i
++)
5030 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
5031 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
5033 for(i
=eNR_BONDS
; i
<=eNR_WALLS
; i
++)
5035 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
5041 void dd_force_flop_start(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
5043 if (dd
->comm
->eFlop
)
5045 dd
->comm
->flop
-= force_flop_count(nrnb
);
5048 void dd_force_flop_stop(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
5050 if (dd
->comm
->eFlop
)
5052 dd
->comm
->flop
+= force_flop_count(nrnb
);
5057 static void clear_dd_cycle_counts(gmx_domdec_t
*dd
)
5061 for(i
=0; i
<ddCyclNr
; i
++)
5063 dd
->comm
->cycl
[i
] = 0;
5064 dd
->comm
->cycl_n
[i
] = 0;
5065 dd
->comm
->cycl_max
[i
] = 0;
5068 dd
->comm
->flop_n
= 0;
5071 static void get_load_distribution(gmx_domdec_t
*dd
,gmx_wallcycle_t wcycle
)
5073 gmx_domdec_comm_t
*comm
;
5074 gmx_domdec_load_t
*load
;
5075 gmx_domdec_root_t
*root
=NULL
;
5076 int d
,dim
,cid
,i
,pos
;
5077 float cell_frac
=0,sbuf
[DD_NLOAD_MAX
];
5082 fprintf(debug
,"get_load_distribution start\n");
5085 wallcycle_start(wcycle
,ewcDDCOMMLOAD
);
5089 bSepPME
= (dd
->pme_nodeid
>= 0);
5091 for(d
=dd
->ndim
-1; d
>=0; d
--)
5094 /* Check if we participate in the communication in this dimension */
5095 if (d
== dd
->ndim
-1 ||
5096 (dd
->ci
[dd
->dim
[d
+1]]==0 && dd
->ci
[dd
->dim
[dd
->ndim
-1]]==0))
5098 load
= &comm
->load
[d
];
5101 cell_frac
= comm
->cell_f1
[d
] - comm
->cell_f0
[d
];
5104 if (d
== dd
->ndim
-1)
5106 sbuf
[pos
++] = dd_force_load(comm
);
5107 sbuf
[pos
++] = sbuf
[0];
5110 sbuf
[pos
++] = sbuf
[0];
5111 sbuf
[pos
++] = cell_frac
;
5114 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
5115 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
5120 sbuf
[pos
++] = comm
->cycl
[ddCyclPPduringPME
];
5121 sbuf
[pos
++] = comm
->cycl
[ddCyclPME
];
5126 sbuf
[pos
++] = comm
->load
[d
+1].sum
;
5127 sbuf
[pos
++] = comm
->load
[d
+1].max
;
5130 sbuf
[pos
++] = comm
->load
[d
+1].sum_m
;
5131 sbuf
[pos
++] = comm
->load
[d
+1].cvol_min
*cell_frac
;
5132 sbuf
[pos
++] = comm
->load
[d
+1].flags
;
5135 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
5136 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
5141 sbuf
[pos
++] = comm
->load
[d
+1].mdf
;
5142 sbuf
[pos
++] = comm
->load
[d
+1].pme
;
5146 /* Communicate a row in DD direction d.
5147 * The communicators are setup such that the root always has rank 0.
5150 MPI_Gather(sbuf
,load
->nload
*sizeof(float),MPI_BYTE
,
5151 load
->load
,load
->nload
*sizeof(float),MPI_BYTE
,
5152 0,comm
->mpi_comm_load
[d
]);
5154 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5156 /* We are the root, process this row */
5157 if (comm
->bDynLoadBal
)
5159 root
= comm
->root
[d
];
5169 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5171 load
->sum
+= load
->load
[pos
++];
5172 load
->max
= max(load
->max
,load
->load
[pos
]);
5178 /* This direction could not be load balanced properly,
5179 * therefore we need to use the maximum iso the average load.
5181 load
->sum_m
= max(load
->sum_m
,load
->load
[pos
]);
5185 load
->sum_m
+= load
->load
[pos
];
5188 load
->cvol_min
= min(load
->cvol_min
,load
->load
[pos
]);
5192 load
->flags
= (int)(load
->load
[pos
++] + 0.5);
5196 root
->cell_f_max0
[i
] = load
->load
[pos
++];
5197 root
->cell_f_min1
[i
] = load
->load
[pos
++];
5202 load
->mdf
= max(load
->mdf
,load
->load
[pos
]);
5204 load
->pme
= max(load
->pme
,load
->load
[pos
]);
5208 if (comm
->bDynLoadBal
&& root
->bLimited
)
5210 load
->sum_m
*= dd
->nc
[dim
];
5211 load
->flags
|= (1<<d
);
5219 comm
->nload
+= dd_load_count(comm
);
5220 comm
->load_step
+= comm
->cycl
[ddCyclStep
];
5221 comm
->load_sum
+= comm
->load
[0].sum
;
5222 comm
->load_max
+= comm
->load
[0].max
;
5223 if (comm
->bDynLoadBal
)
5225 for(d
=0; d
<dd
->ndim
; d
++)
5227 if (comm
->load
[0].flags
& (1<<d
))
5229 comm
->load_lim
[d
]++;
5235 comm
->load_mdf
+= comm
->load
[0].mdf
;
5236 comm
->load_pme
+= comm
->load
[0].pme
;
5240 wallcycle_stop(wcycle
,ewcDDCOMMLOAD
);
5244 fprintf(debug
,"get_load_distribution finished\n");
5248 static float dd_force_imb_perf_loss(gmx_domdec_t
*dd
)
5250 /* Return the relative performance loss on the total run time
5251 * due to the force calculation load imbalance.
5253 if (dd
->comm
->nload
> 0)
5256 (dd
->comm
->load_max
*dd
->nnodes
- dd
->comm
->load_sum
)/
5257 (dd
->comm
->load_step
*dd
->nnodes
);
5265 static void print_dd_load_av(FILE *fplog
,gmx_domdec_t
*dd
)
5268 int npp
,npme
,nnodes
,d
,limp
;
5269 float imbal
,pme_f_ratio
,lossf
,lossp
=0;
5271 gmx_domdec_comm_t
*comm
;
5274 if (DDMASTER(dd
) && comm
->nload
> 0)
5277 npme
= (dd
->pme_nodeid
>= 0) ? comm
->npmenodes
: 0;
5278 nnodes
= npp
+ npme
;
5279 imbal
= comm
->load_max
*npp
/comm
->load_sum
- 1;
5280 lossf
= dd_force_imb_perf_loss(dd
);
5281 sprintf(buf
," Average load imbalance: %.1f %%\n",imbal
*100);
5282 fprintf(fplog
,"%s",buf
);
5283 fprintf(stderr
,"\n");
5284 fprintf(stderr
,"%s",buf
);
5285 sprintf(buf
," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf
*100);
5286 fprintf(fplog
,"%s",buf
);
5287 fprintf(stderr
,"%s",buf
);
5289 if (comm
->bDynLoadBal
)
5291 sprintf(buf
," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5292 for(d
=0; d
<dd
->ndim
; d
++)
5294 limp
= (200*comm
->load_lim
[d
]+1)/(2*comm
->nload
);
5295 sprintf(buf
+strlen(buf
)," %c %d %%",dim2char(dd
->dim
[d
]),limp
);
5301 sprintf(buf
+strlen(buf
),"\n");
5302 fprintf(fplog
,"%s",buf
);
5303 fprintf(stderr
,"%s",buf
);
5307 pme_f_ratio
= comm
->load_pme
/comm
->load_mdf
;
5308 lossp
= (comm
->load_pme
-comm
->load_mdf
)/comm
->load_step
;
5311 lossp
*= (float)npme
/(float)nnodes
;
5315 lossp
*= (float)npp
/(float)nnodes
;
5317 sprintf(buf
," Average PME mesh/force load: %5.3f\n",pme_f_ratio
);
5318 fprintf(fplog
,"%s",buf
);
5319 fprintf(stderr
,"%s",buf
);
5320 sprintf(buf
," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp
)*100);
5321 fprintf(fplog
,"%s",buf
);
5322 fprintf(stderr
,"%s",buf
);
5324 fprintf(fplog
,"\n");
5325 fprintf(stderr
,"\n");
5327 if (lossf
>= DD_PERF_LOSS
)
5330 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5331 " in the domain decomposition.\n",lossf
*100);
5332 if (!comm
->bDynLoadBal
)
5334 sprintf(buf
+strlen(buf
)," You might want to use dynamic load balancing (option -dlb.)\n");
5338 sprintf(buf
+strlen(buf
)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5340 fprintf(fplog
,"%s\n",buf
);
5341 fprintf(stderr
,"%s\n",buf
);
5343 if (npme
> 0 && fabs(lossp
) >= DD_PERF_LOSS
)
5346 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5347 " had %s work to do than the PP nodes.\n"
5348 " You might want to %s the number of PME nodes\n"
5349 " or %s the cut-off and the grid spacing.\n",
5351 (lossp
< 0) ? "less" : "more",
5352 (lossp
< 0) ? "decrease" : "increase",
5353 (lossp
< 0) ? "decrease" : "increase");
5354 fprintf(fplog
,"%s\n",buf
);
5355 fprintf(stderr
,"%s\n",buf
);
5360 static float dd_vol_min(gmx_domdec_t
*dd
)
5362 return dd
->comm
->load
[0].cvol_min
*dd
->nnodes
;
5365 static gmx_bool
dd_load_flags(gmx_domdec_t
*dd
)
5367 return dd
->comm
->load
[0].flags
;
5370 static float dd_f_imbal(gmx_domdec_t
*dd
)
5372 return dd
->comm
->load
[0].max
*dd
->nnodes
/dd
->comm
->load
[0].sum
- 1;
5375 float dd_pme_f_ratio(gmx_domdec_t
*dd
)
5377 if (dd
->comm
->cycl_n
[ddCyclPME
] > 0)
5379 return dd
->comm
->load
[0].pme
/dd
->comm
->load
[0].mdf
;
5387 static void dd_print_load(FILE *fplog
,gmx_domdec_t
*dd
,gmx_large_int_t step
)
5392 flags
= dd_load_flags(dd
);
5396 "DD load balancing is limited by minimum cell size in dimension");
5397 for(d
=0; d
<dd
->ndim
; d
++)
5401 fprintf(fplog
," %c",dim2char(dd
->dim
[d
]));
5404 fprintf(fplog
,"\n");
5406 fprintf(fplog
,"DD step %s",gmx_step_str(step
,buf
));
5407 if (dd
->comm
->bDynLoadBal
)
5409 fprintf(fplog
," vol min/aver %5.3f%c",
5410 dd_vol_min(dd
),flags
? '!' : ' ');
5412 fprintf(fplog
," load imb.: force %4.1f%%",dd_f_imbal(dd
)*100);
5413 if (dd
->comm
->cycl_n
[ddCyclPME
])
5415 fprintf(fplog
," pme mesh/force %5.3f",dd_pme_f_ratio(dd
));
5417 fprintf(fplog
,"\n\n");
5420 static void dd_print_load_verbose(gmx_domdec_t
*dd
)
5422 if (dd
->comm
->bDynLoadBal
)
5424 fprintf(stderr
,"vol %4.2f%c ",
5425 dd_vol_min(dd
),dd_load_flags(dd
) ? '!' : ' ');
5427 fprintf(stderr
,"imb F %2d%% ",(int)(dd_f_imbal(dd
)*100+0.5));
5428 if (dd
->comm
->cycl_n
[ddCyclPME
])
5430 fprintf(stderr
,"pme/F %4.2f ",dd_pme_f_ratio(dd
));
5435 static void make_load_communicator(gmx_domdec_t
*dd
, int dim_ind
,ivec loc
)
5440 gmx_domdec_root_t
*root
;
5441 gmx_bool bPartOfGroup
= FALSE
;
5443 dim
= dd
->dim
[dim_ind
];
5444 copy_ivec(loc
,loc_c
);
5445 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5448 rank
= dd_index(dd
->nc
,loc_c
);
5449 if (rank
== dd
->rank
)
5451 /* This process is part of the group */
5452 bPartOfGroup
= TRUE
;
5455 MPI_Comm_split(dd
->mpi_comm_all
, bPartOfGroup
?0:MPI_UNDEFINED
, dd
->rank
,
5459 dd
->comm
->mpi_comm_load
[dim_ind
] = c_row
;
5460 if (dd
->comm
->eDLB
!= edlbNO
)
5462 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5464 /* This is the root process of this row */
5465 snew(dd
->comm
->root
[dim_ind
],1);
5466 root
= dd
->comm
->root
[dim_ind
];
5467 snew(root
->cell_f
,DD_CELL_F_SIZE(dd
,dim_ind
));
5468 snew(root
->old_cell_f
,dd
->nc
[dim
]+1);
5469 snew(root
->bCellMin
,dd
->nc
[dim
]);
5472 snew(root
->cell_f_max0
,dd
->nc
[dim
]);
5473 snew(root
->cell_f_min1
,dd
->nc
[dim
]);
5474 snew(root
->bound_min
,dd
->nc
[dim
]);
5475 snew(root
->bound_max
,dd
->nc
[dim
]);
5477 snew(root
->buf_ncd
,dd
->nc
[dim
]);
5481 /* This is not a root process, we only need to receive cell_f */
5482 snew(dd
->comm
->cell_f_row
,DD_CELL_F_SIZE(dd
,dim_ind
));
5485 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5487 snew(dd
->comm
->load
[dim_ind
].load
,dd
->nc
[dim
]*DD_NLOAD_MAX
);
5493 static void make_load_communicators(gmx_domdec_t
*dd
)
5500 fprintf(debug
,"Making load communicators\n");
5502 snew(dd
->comm
->load
,dd
->ndim
);
5503 snew(dd
->comm
->mpi_comm_load
,dd
->ndim
);
5506 make_load_communicator(dd
,0,loc
);
5509 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5511 make_load_communicator(dd
,1,loc
);
5516 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5519 for(j
=0; j
<dd
->nc
[dim1
]; j
++) {
5521 make_load_communicator(dd
,2,loc
);
5527 fprintf(debug
,"Finished making load communicators\n");
5531 void setup_dd_grid(FILE *fplog
,gmx_domdec_t
*dd
)
5537 ivec dd_zp
[DD_MAXIZONE
];
5538 gmx_domdec_zones_t
*zones
;
5539 gmx_domdec_ns_ranges_t
*izone
;
5541 for(d
=0; d
<dd
->ndim
; d
++)
5544 copy_ivec(dd
->ci
,tmp
);
5545 tmp
[dim
] = (tmp
[dim
] + 1) % dd
->nc
[dim
];
5546 dd
->neighbor
[d
][0] = ddcoord2ddnodeid(dd
,tmp
);
5547 copy_ivec(dd
->ci
,tmp
);
5548 tmp
[dim
] = (tmp
[dim
] - 1 + dd
->nc
[dim
]) % dd
->nc
[dim
];
5549 dd
->neighbor
[d
][1] = ddcoord2ddnodeid(dd
,tmp
);
5552 fprintf(debug
,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5555 dd
->neighbor
[d
][1]);
5561 fprintf(stderr
,"Making %dD domain decomposition %d x %d x %d\n",
5562 dd
->ndim
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5566 fprintf(fplog
,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5568 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],
5569 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5576 for(i
=0; i
<nzonep
; i
++)
5578 copy_ivec(dd_zp3
[i
],dd_zp
[i
]);
5584 for(i
=0; i
<nzonep
; i
++)
5586 copy_ivec(dd_zp2
[i
],dd_zp
[i
]);
5592 for(i
=0; i
<nzonep
; i
++)
5594 copy_ivec(dd_zp1
[i
],dd_zp
[i
]);
5598 gmx_fatal(FARGS
,"Can only do 1, 2 or 3D domain decomposition");
5603 zones
= &dd
->comm
->zones
;
5605 for(i
=0; i
<nzone
; i
++)
5608 clear_ivec(zones
->shift
[i
]);
5609 for(d
=0; d
<dd
->ndim
; d
++)
5611 zones
->shift
[i
][dd
->dim
[d
]] = dd_zo
[i
][m
++];
5616 for(i
=0; i
<nzone
; i
++)
5618 for(d
=0; d
<DIM
; d
++)
5620 s
[d
] = dd
->ci
[d
] - zones
->shift
[i
][d
];
5625 else if (s
[d
] >= dd
->nc
[d
])
5631 zones
->nizone
= nzonep
;
5632 for(i
=0; i
<zones
->nizone
; i
++)
5634 if (dd_zp
[i
][0] != i
)
5636 gmx_fatal(FARGS
,"Internal inconsistency in the dd grid setup");
5638 izone
= &zones
->izone
[i
];
5639 izone
->j0
= dd_zp
[i
][1];
5640 izone
->j1
= dd_zp
[i
][2];
5641 for(dim
=0; dim
<DIM
; dim
++)
5643 if (dd
->nc
[dim
] == 1)
5645 /* All shifts should be allowed */
5646 izone
->shift0
[dim
] = -1;
5647 izone
->shift1
[dim
] = 1;
5652 izone->shift0[d] = 0;
5653 izone->shift1[d] = 0;
5654 for(j=izone->j0; j<izone->j1; j++) {
5655 if (dd->shift[j][d] > dd->shift[i][d])
5656 izone->shift0[d] = -1;
5657 if (dd->shift[j][d] < dd->shift[i][d])
5658 izone->shift1[d] = 1;
5664 /* Assume the shift are not more than 1 cell */
5665 izone
->shift0
[dim
] = 1;
5666 izone
->shift1
[dim
] = -1;
5667 for(j
=izone
->j0
; j
<izone
->j1
; j
++)
5669 shift_diff
= zones
->shift
[j
][dim
] - zones
->shift
[i
][dim
];
5670 if (shift_diff
< izone
->shift0
[dim
])
5672 izone
->shift0
[dim
] = shift_diff
;
5674 if (shift_diff
> izone
->shift1
[dim
])
5676 izone
->shift1
[dim
] = shift_diff
;
5683 if (dd
->comm
->eDLB
!= edlbNO
)
5685 snew(dd
->comm
->root
,dd
->ndim
);
5688 if (dd
->comm
->bRecordLoad
)
5690 make_load_communicators(dd
);
5694 static void make_pp_communicator(FILE *fplog
,t_commrec
*cr
,int reorder
)
5697 gmx_domdec_comm_t
*comm
;
5708 if (comm
->bCartesianPP
)
5710 /* Set up cartesian communication for the particle-particle part */
5713 fprintf(fplog
,"Will use a Cartesian communicator: %d x %d x %d\n",
5714 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5717 for(i
=0; i
<DIM
; i
++)
5721 MPI_Cart_create(cr
->mpi_comm_mygroup
,DIM
,dd
->nc
,periods
,reorder
,
5723 /* We overwrite the old communicator with the new cartesian one */
5724 cr
->mpi_comm_mygroup
= comm_cart
;
5727 dd
->mpi_comm_all
= cr
->mpi_comm_mygroup
;
5728 MPI_Comm_rank(dd
->mpi_comm_all
,&dd
->rank
);
5730 if (comm
->bCartesianPP_PME
)
5732 /* Since we want to use the original cartesian setup for sim,
5733 * and not the one after split, we need to make an index.
5735 snew(comm
->ddindex2ddnodeid
,dd
->nnodes
);
5736 comm
->ddindex2ddnodeid
[dd_index(dd
->nc
,dd
->ci
)] = dd
->rank
;
5737 gmx_sumi(dd
->nnodes
,comm
->ddindex2ddnodeid
,cr
);
5738 /* Get the rank of the DD master,
5739 * above we made sure that the master node is a PP node.
5749 MPI_Allreduce(&rank
,&dd
->masterrank
,1,MPI_INT
,MPI_SUM
,dd
->mpi_comm_all
);
5751 else if (comm
->bCartesianPP
)
5753 if (cr
->npmenodes
== 0)
5755 /* The PP communicator is also
5756 * the communicator for this simulation
5758 cr
->mpi_comm_mysim
= cr
->mpi_comm_mygroup
;
5760 cr
->nodeid
= dd
->rank
;
5762 MPI_Cart_coords(dd
->mpi_comm_all
,dd
->rank
,DIM
,dd
->ci
);
5764 /* We need to make an index to go from the coordinates
5765 * to the nodeid of this simulation.
5767 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5768 snew(buf
,dd
->nnodes
);
5769 if (cr
->duty
& DUTY_PP
)
5771 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5773 /* Communicate the ddindex to simulation nodeid index */
5774 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5775 cr
->mpi_comm_mysim
);
5778 /* Determine the master coordinates and rank.
5779 * The DD master should be the same node as the master of this sim.
5781 for(i
=0; i
<dd
->nnodes
; i
++)
5783 if (comm
->ddindex2simnodeid
[i
] == 0)
5785 ddindex2xyz(dd
->nc
,i
,dd
->master_ci
);
5786 MPI_Cart_rank(dd
->mpi_comm_all
,dd
->master_ci
,&dd
->masterrank
);
5791 fprintf(debug
,"The master rank is %d\n",dd
->masterrank
);
5796 /* No Cartesian communicators */
5797 /* We use the rank in dd->comm->all as DD index */
5798 ddindex2xyz(dd
->nc
,dd
->rank
,dd
->ci
);
5799 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5801 clear_ivec(dd
->master_ci
);
5808 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5809 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5814 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5815 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5819 static void receive_ddindex2simnodeid(t_commrec
*cr
)
5823 gmx_domdec_comm_t
*comm
;
5830 if (!comm
->bCartesianPP_PME
&& comm
->bCartesianPP
)
5832 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5833 snew(buf
,dd
->nnodes
);
5834 if (cr
->duty
& DUTY_PP
)
5836 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5839 /* Communicate the ddindex to simulation nodeid index */
5840 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5841 cr
->mpi_comm_mysim
);
5848 static gmx_domdec_master_t
*init_gmx_domdec_master_t(gmx_domdec_t
*dd
,
5851 gmx_domdec_master_t
*ma
;
5856 snew(ma
->ncg
,dd
->nnodes
);
5857 snew(ma
->index
,dd
->nnodes
+1);
5859 snew(ma
->nat
,dd
->nnodes
);
5860 snew(ma
->ibuf
,dd
->nnodes
*2);
5861 snew(ma
->cell_x
,DIM
);
5862 for(i
=0; i
<DIM
; i
++)
5864 snew(ma
->cell_x
[i
],dd
->nc
[i
]+1);
5867 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
5873 snew(ma
->vbuf
,natoms
);
5879 static void split_communicator(FILE *fplog
,t_commrec
*cr
,int dd_node_order
,
5883 gmx_domdec_comm_t
*comm
;
5894 if (comm
->bCartesianPP
)
5896 for(i
=1; i
<DIM
; i
++)
5898 bDiv
[i
] = ((cr
->npmenodes
*dd
->nc
[i
]) % (dd
->nnodes
) == 0);
5900 if (bDiv
[YY
] || bDiv
[ZZ
])
5902 comm
->bCartesianPP_PME
= TRUE
;
5903 /* If we have 2D PME decomposition, which is always in x+y,
5904 * we stack the PME only nodes in z.
5905 * Otherwise we choose the direction that provides the thinnest slab
5906 * of PME only nodes as this will have the least effect
5907 * on the PP communication.
5908 * But for the PME communication the opposite might be better.
5910 if (bDiv
[ZZ
] && (comm
->npmenodes_y
> 1 ||
5912 dd
->nc
[YY
] > dd
->nc
[ZZ
]))
5914 comm
->cartpmedim
= ZZ
;
5918 comm
->cartpmedim
= YY
;
5920 comm
->ntot
[comm
->cartpmedim
]
5921 += (cr
->npmenodes
*dd
->nc
[comm
->cartpmedim
])/dd
->nnodes
;
5925 fprintf(fplog
,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr
->npmenodes
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[XX
],dd
->nc
[ZZ
]);
5927 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5932 if (comm
->bCartesianPP_PME
)
5936 fprintf(fplog
,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm
->ntot
[XX
],comm
->ntot
[YY
],comm
->ntot
[ZZ
]);
5939 for(i
=0; i
<DIM
; i
++)
5943 MPI_Cart_create(cr
->mpi_comm_mysim
,DIM
,comm
->ntot
,periods
,reorder
,
5946 MPI_Comm_rank(comm_cart
,&rank
);
5947 if (MASTERNODE(cr
) && rank
!= 0)
5949 gmx_fatal(FARGS
,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5952 /* With this assigment we loose the link to the original communicator
5953 * which will usually be MPI_COMM_WORLD, unless have multisim.
5955 cr
->mpi_comm_mysim
= comm_cart
;
5956 cr
->sim_nodeid
= rank
;
5958 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,dd
->ci
);
5962 fprintf(fplog
,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5963 cr
->sim_nodeid
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5966 if (dd
->ci
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
5970 if (cr
->npmenodes
== 0 ||
5971 dd
->ci
[comm
->cartpmedim
] >= dd
->nc
[comm
->cartpmedim
])
5973 cr
->duty
= DUTY_PME
;
5976 /* Split the sim communicator into PP and PME only nodes */
5977 MPI_Comm_split(cr
->mpi_comm_mysim
,
5979 dd_index(comm
->ntot
,dd
->ci
),
5980 &cr
->mpi_comm_mygroup
);
5984 switch (dd_node_order
)
5989 fprintf(fplog
,"Order of the nodes: PP first, PME last\n");
5992 case ddnoINTERLEAVE
:
5993 /* Interleave the PP-only and PME-only nodes,
5994 * as on clusters with dual-core machines this will double
5995 * the communication bandwidth of the PME processes
5996 * and thus speed up the PP <-> PME and inter PME communication.
6000 fprintf(fplog
,"Interleaving PP and PME nodes\n");
6002 comm
->pmenodes
= dd_pmenodes(cr
);
6007 gmx_fatal(FARGS
,"Unknown dd_node_order=%d",dd_node_order
);
6010 if (dd_simnode2pmenode(cr
,cr
->sim_nodeid
) == -1)
6012 cr
->duty
= DUTY_PME
;
6019 /* Split the sim communicator into PP and PME only nodes */
6020 MPI_Comm_split(cr
->mpi_comm_mysim
,
6023 &cr
->mpi_comm_mygroup
);
6024 MPI_Comm_rank(cr
->mpi_comm_mygroup
,&cr
->nodeid
);
6030 fprintf(fplog
,"This is a %s only node\n\n",
6031 (cr
->duty
& DUTY_PP
) ? "particle-particle" : "PME-mesh");
6035 void make_dd_communicators(FILE *fplog
,t_commrec
*cr
,int dd_node_order
)
6038 gmx_domdec_comm_t
*comm
;
6044 copy_ivec(dd
->nc
,comm
->ntot
);
6046 comm
->bCartesianPP
= (dd_node_order
== ddnoCARTESIAN
);
6047 comm
->bCartesianPP_PME
= FALSE
;
6049 /* Reorder the nodes by default. This might change the MPI ranks.
6050 * Real reordering is only supported on very few architectures,
6051 * Blue Gene is one of them.
6053 CartReorder
= (getenv("GMX_NO_CART_REORDER") == NULL
);
6055 if (cr
->npmenodes
> 0)
6057 /* Split the communicator into a PP and PME part */
6058 split_communicator(fplog
,cr
,dd_node_order
,CartReorder
);
6059 if (comm
->bCartesianPP_PME
)
6061 /* We (possibly) reordered the nodes in split_communicator,
6062 * so it is no longer required in make_pp_communicator.
6064 CartReorder
= FALSE
;
6069 /* All nodes do PP and PME */
6071 /* We do not require separate communicators */
6072 cr
->mpi_comm_mygroup
= cr
->mpi_comm_mysim
;
6076 if (cr
->duty
& DUTY_PP
)
6078 /* Copy or make a new PP communicator */
6079 make_pp_communicator(fplog
,cr
,CartReorder
);
6083 receive_ddindex2simnodeid(cr
);
6086 if (!(cr
->duty
& DUTY_PME
))
6088 /* Set up the commnuication to our PME node */
6089 dd
->pme_nodeid
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
6090 dd
->pme_receive_vir_ener
= receive_vir_ener(cr
);
6093 fprintf(debug
,"My pme_nodeid %d receive ener %d\n",
6094 dd
->pme_nodeid
,dd
->pme_receive_vir_ener
);
6099 dd
->pme_nodeid
= -1;
6104 dd
->ma
= init_gmx_domdec_master_t(dd
,
6106 comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
]);
6110 static real
*get_slb_frac(FILE *fplog
,const char *dir
,int nc
,const char *size_string
)
6117 if (nc
> 1 && size_string
!= NULL
)
6121 fprintf(fplog
,"Using static load balancing for the %s direction\n",
6126 for (i
=0; i
<nc
; i
++)
6129 sscanf(size_string
,"%lf%n",&dbl
,&n
);
6132 gmx_fatal(FARGS
,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir
,size_string
);
6141 fprintf(fplog
,"Relative cell sizes:");
6143 for (i
=0; i
<nc
; i
++)
6148 fprintf(fplog
," %5.3f",slb_frac
[i
]);
6153 fprintf(fplog
,"\n");
6160 static int multi_body_bondeds_count(gmx_mtop_t
*mtop
)
6163 gmx_mtop_ilistloop_t iloop
;
6167 iloop
= gmx_mtop_ilistloop_init(mtop
);
6168 while (gmx_mtop_ilistloop_next(iloop
,&il
,&nmol
))
6170 for(ftype
=0; ftype
<F_NRE
; ftype
++)
6172 if ((interaction_function
[ftype
].flags
& IF_BOND
) &&
6175 n
+= nmol
*il
[ftype
].nr
/(1 + NRAL(ftype
));
6183 static int dd_nst_env(FILE *fplog
,const char *env_var
,int def
)
6189 val
= getenv(env_var
);
6192 if (sscanf(val
,"%d",&nst
) <= 0)
6198 fprintf(fplog
,"Found env.var. %s = %s, using value %d\n",
6206 static void dd_warning(t_commrec
*cr
,FILE *fplog
,const char *warn_string
)
6210 fprintf(stderr
,"\n%s\n",warn_string
);
6214 fprintf(fplog
,"\n%s\n",warn_string
);
6218 static void check_dd_restrictions(t_commrec
*cr
,gmx_domdec_t
*dd
,
6219 t_inputrec
*ir
,FILE *fplog
)
6221 if (ir
->ePBC
== epbcSCREW
&&
6222 (dd
->nc
[XX
] == 1 || dd
->nc
[YY
] > 1 || dd
->nc
[ZZ
] > 1))
6224 gmx_fatal(FARGS
,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names
[ir
->ePBC
]);
6227 if (ir
->ns_type
== ensSIMPLE
)
6229 gmx_fatal(FARGS
,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6232 if (ir
->nstlist
== 0)
6234 gmx_fatal(FARGS
,"Domain decomposition does not work with nstlist=0");
6237 if (ir
->comm_mode
== ecmANGULAR
&& ir
->ePBC
!= epbcNONE
)
6239 dd_warning(cr
,fplog
,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6243 static real
average_cellsize_min(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
6248 r
= ddbox
->box_size
[XX
];
6249 for(di
=0; di
<dd
->ndim
; di
++)
6252 /* Check using the initial average cell size */
6253 r
= min(r
,ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6259 static int check_dlb_support(FILE *fplog
,t_commrec
*cr
,
6260 const char *dlb_opt
,gmx_bool bRecordLoad
,
6261 unsigned long Flags
,t_inputrec
*ir
)
6269 case 'a': eDLB
= edlbAUTO
; break;
6270 case 'n': eDLB
= edlbNO
; break;
6271 case 'y': eDLB
= edlbYES
; break;
6272 default: gmx_incons("Unknown dlb_opt");
6275 if (Flags
& MD_RERUN
)
6280 if (!EI_DYNAMICS(ir
->eI
))
6282 if (eDLB
== edlbYES
)
6284 sprintf(buf
,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir
->eI
));
6285 dd_warning(cr
,fplog
,buf
);
6293 dd_warning(cr
,fplog
,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6298 if (Flags
& MD_REPRODUCIBLE
)
6305 dd_warning(cr
,fplog
,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
6309 dd_warning(cr
,fplog
,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6312 gmx_fatal(FARGS
,"Death horror: undefined case (%d) for load balancing choice",eDLB
);
6320 static void set_dd_dim(FILE *fplog
,gmx_domdec_t
*dd
)
6325 if (getenv("GMX_DD_ORDER_ZYX") != NULL
)
6327 /* Decomposition order z,y,x */
6330 fprintf(fplog
,"Using domain decomposition order z, y, x\n");
6332 for(dim
=DIM
-1; dim
>=0; dim
--)
6334 if (dd
->nc
[dim
] > 1)
6336 dd
->dim
[dd
->ndim
++] = dim
;
6342 /* Decomposition order x,y,z */
6343 for(dim
=0; dim
<DIM
; dim
++)
6345 if (dd
->nc
[dim
] > 1)
6347 dd
->dim
[dd
->ndim
++] = dim
;
6353 static gmx_domdec_comm_t
*init_dd_comm()
6355 gmx_domdec_comm_t
*comm
;
6359 snew(comm
->cggl_flag
,DIM
*2);
6360 snew(comm
->cgcm_state
,DIM
*2);
6361 for(i
=0; i
<DIM
*2; i
++)
6363 comm
->cggl_flag_nalloc
[i
] = 0;
6364 comm
->cgcm_state_nalloc
[i
] = 0;
6367 comm
->nalloc_int
= 0;
6368 comm
->buf_int
= NULL
;
6370 vec_rvec_init(&comm
->vbuf
);
6372 comm
->n_load_have
= 0;
6373 comm
->n_load_collect
= 0;
6375 for(i
=0; i
<ddnatNR
-ddnatZONE
; i
++)
6377 comm
->sum_nat
[i
] = 0;
6381 comm
->load_step
= 0;
6384 clear_ivec(comm
->load_lim
);
6391 gmx_domdec_t
*init_domain_decomposition(FILE *fplog
,t_commrec
*cr
,
6392 unsigned long Flags
,
6394 real comm_distance_min
,real rconstr
,
6395 const char *dlb_opt
,real dlb_scale
,
6396 const char *sizex
,const char *sizey
,const char *sizez
,
6397 gmx_mtop_t
*mtop
,t_inputrec
*ir
,
6400 int *npme_x
,int *npme_y
)
6403 gmx_domdec_comm_t
*comm
;
6406 real r_2b
,r_mb
,r_bonded
=-1,r_bonded_limit
=-1,limit
,acs
;
6413 "\nInitializing Domain Decomposition on %d nodes\n",cr
->nnodes
);
6418 dd
->comm
= init_dd_comm();
6420 snew(comm
->cggl_flag
,DIM
*2);
6421 snew(comm
->cgcm_state
,DIM
*2);
6423 dd
->npbcdim
= ePBC2npbcdim(ir
->ePBC
);
6424 dd
->bScrewPBC
= (ir
->ePBC
== epbcSCREW
);
6426 dd
->bSendRecv2
= dd_nst_env(fplog
,"GMX_DD_SENDRECV2",0);
6427 comm
->dlb_scale_lim
= dd_nst_env(fplog
,"GMX_DLB_MAX",10);
6428 comm
->eFlop
= dd_nst_env(fplog
,"GMX_DLB_FLOP",0);
6429 recload
= dd_nst_env(fplog
,"GMX_DD_LOAD",1);
6430 comm
->nstSortCG
= dd_nst_env(fplog
,"GMX_DD_SORT",1);
6431 comm
->nstDDDump
= dd_nst_env(fplog
,"GMX_DD_DUMP",0);
6432 comm
->nstDDDumpGrid
= dd_nst_env(fplog
,"GMX_DD_DUMP_GRID",0);
6433 comm
->DD_debug
= dd_nst_env(fplog
,"GMX_DD_DEBUG",0);
6435 dd
->pme_recv_f_alloc
= 0;
6436 dd
->pme_recv_f_buf
= NULL
;
6438 if (dd
->bSendRecv2
&& fplog
)
6440 fprintf(fplog
,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6446 fprintf(fplog
,"Will load balance based on FLOP count\n");
6448 if (comm
->eFlop
> 1)
6450 srand(1+cr
->nodeid
);
6452 comm
->bRecordLoad
= TRUE
;
6456 comm
->bRecordLoad
= (wallcycle_have_counter() && recload
> 0);
6460 comm
->eDLB
= check_dlb_support(fplog
,cr
,dlb_opt
,comm
->bRecordLoad
,Flags
,ir
);
6462 comm
->bDynLoadBal
= (comm
->eDLB
== edlbYES
);
6465 fprintf(fplog
,"Dynamic load balancing: %s\n",edlb_names
[comm
->eDLB
]);
6467 dd
->bGridJump
= comm
->bDynLoadBal
;
6469 if (comm
->nstSortCG
)
6473 if (comm
->nstSortCG
== 1)
6475 fprintf(fplog
,"Will sort the charge groups at every domain (re)decomposition\n");
6479 fprintf(fplog
,"Will sort the charge groups every %d steps\n",
6489 fprintf(fplog
,"Will not sort the charge groups\n");
6493 comm
->bCGs
= (ncg_mtop(mtop
) < mtop
->natoms
);
6495 comm
->bInterCGBondeds
= (ncg_mtop(mtop
) > mtop
->mols
.nr
);
6496 if (comm
->bInterCGBondeds
)
6498 comm
->bInterCGMultiBody
= (multi_body_bondeds_count(mtop
) > 0);
6502 comm
->bInterCGMultiBody
= FALSE
;
6505 dd
->bInterCGcons
= inter_charge_group_constraints(mtop
);
6506 dd
->bInterCGsettles
= inter_charge_group_settles(mtop
);
6508 if (ir
->rlistlong
== 0)
6510 /* Set the cut-off to some very large value,
6511 * so we don't need if statements everywhere in the code.
6512 * We use sqrt, since the cut-off is squared in some places.
6514 comm
->cutoff
= GMX_CUTOFF_INF
;
6518 comm
->cutoff
= ir
->rlistlong
;
6520 comm
->cutoff_mbody
= 0;
6522 comm
->cellsize_limit
= 0;
6523 comm
->bBondComm
= FALSE
;
6525 if (comm
->bInterCGBondeds
)
6527 if (comm_distance_min
> 0)
6529 comm
->cutoff_mbody
= comm_distance_min
;
6530 if (Flags
& MD_DDBONDCOMM
)
6532 comm
->bBondComm
= (comm
->cutoff_mbody
> comm
->cutoff
);
6536 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6538 r_bonded_limit
= comm
->cutoff_mbody
;
6540 else if (ir
->bPeriodicMols
)
6542 /* Can not easily determine the required cut-off */
6543 dd_warning(cr
,fplog
,"NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6544 comm
->cutoff_mbody
= comm
->cutoff
/2;
6545 r_bonded_limit
= comm
->cutoff_mbody
;
6551 dd_bonded_cg_distance(fplog
,dd
,mtop
,ir
,x
,box
,
6552 Flags
& MD_DDBONDCHECK
,&r_2b
,&r_mb
);
6554 gmx_bcast(sizeof(r_2b
),&r_2b
,cr
);
6555 gmx_bcast(sizeof(r_mb
),&r_mb
,cr
);
6557 /* We use an initial margin of 10% for the minimum cell size,
6558 * except when we are just below the non-bonded cut-off.
6560 if (Flags
& MD_DDBONDCOMM
)
6562 if (max(r_2b
,r_mb
) > comm
->cutoff
)
6564 r_bonded
= max(r_2b
,r_mb
);
6565 r_bonded_limit
= 1.1*r_bonded
;
6566 comm
->bBondComm
= TRUE
;
6571 r_bonded_limit
= min(1.1*r_bonded
,comm
->cutoff
);
6573 /* We determine cutoff_mbody later */
6577 /* No special bonded communication,
6578 * simply increase the DD cut-off.
6580 r_bonded_limit
= 1.1*max(r_2b
,r_mb
);
6581 comm
->cutoff_mbody
= r_bonded_limit
;
6582 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6585 comm
->cellsize_limit
= max(comm
->cellsize_limit
,r_bonded_limit
);
6589 "Minimum cell size due to bonded interactions: %.3f nm\n",
6590 comm
->cellsize_limit
);
6594 if (dd
->bInterCGcons
&& rconstr
<= 0)
6596 /* There is a cell size limit due to the constraints (P-LINCS) */
6597 rconstr
= constr_r_max(fplog
,mtop
,ir
);
6601 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6603 if (rconstr
> comm
->cellsize_limit
)
6605 fprintf(fplog
,"This distance will limit the DD cell size, you can override this with -rcon\n");
6609 else if (rconstr
> 0 && fplog
)
6611 /* Here we do not check for dd->bInterCGcons,
6612 * because one can also set a cell size limit for virtual sites only
6613 * and at this point we don't know yet if there are intercg v-sites.
6616 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6619 comm
->cellsize_limit
= max(comm
->cellsize_limit
,rconstr
);
6621 comm
->cgs_gl
= gmx_mtop_global_cgs(mtop
);
6625 copy_ivec(nc
,dd
->nc
);
6626 set_dd_dim(fplog
,dd
);
6627 set_ddbox_cr(cr
,&dd
->nc
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6629 if (cr
->npmenodes
== -1)
6633 acs
= average_cellsize_min(dd
,ddbox
);
6634 if (acs
< comm
->cellsize_limit
)
6638 fprintf(fplog
,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs
,comm
->cellsize_limit
);
6640 gmx_fatal_collective(FARGS
,cr
,NULL
,
6641 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6642 acs
,comm
->cellsize_limit
);
6647 set_ddbox_cr(cr
,NULL
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6649 /* We need to choose the optimal DD grid and possibly PME nodes */
6650 limit
= dd_choose_grid(fplog
,cr
,dd
,ir
,mtop
,box
,ddbox
,
6651 comm
->eDLB
!=edlbNO
,dlb_scale
,
6652 comm
->cellsize_limit
,comm
->cutoff
,
6653 comm
->bInterCGBondeds
,comm
->bInterCGMultiBody
);
6655 if (dd
->nc
[XX
] == 0)
6657 bC
= (dd
->bInterCGcons
&& rconstr
> r_bonded_limit
);
6658 sprintf(buf
,"Change the number of nodes or mdrun option %s%s%s",
6659 !bC
? "-rdd" : "-rcon",
6660 comm
->eDLB
!=edlbNO
? " or -dds" : "",
6661 bC
? " or your LINCS settings" : "");
6663 gmx_fatal_collective(FARGS
,cr
,NULL
,
6664 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6666 "Look in the log file for details on the domain decomposition",
6667 cr
->nnodes
-cr
->npmenodes
,limit
,buf
);
6669 set_dd_dim(fplog
,dd
);
6675 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6676 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],cr
->npmenodes
);
6679 dd
->nnodes
= dd
->nc
[XX
]*dd
->nc
[YY
]*dd
->nc
[ZZ
];
6680 if (cr
->nnodes
- dd
->nnodes
!= cr
->npmenodes
)
6682 gmx_fatal_collective(FARGS
,cr
,NULL
,
6683 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6684 dd
->nnodes
,cr
->nnodes
- cr
->npmenodes
,cr
->nnodes
);
6686 if (cr
->npmenodes
> dd
->nnodes
)
6688 gmx_fatal_collective(FARGS
,cr
,NULL
,
6689 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.",cr
->npmenodes
,dd
->nnodes
);
6691 if (cr
->npmenodes
> 0)
6693 comm
->npmenodes
= cr
->npmenodes
;
6697 comm
->npmenodes
= dd
->nnodes
;
6700 if (EEL_PME(ir
->coulombtype
))
6702 /* The following choices should match those
6703 * in comm_cost_est in domdec_setup.c.
6704 * Note that here the checks have to take into account
6705 * that the decomposition might occur in a different order than xyz
6706 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6707 * in which case they will not match those in comm_cost_est,
6708 * but since that is mainly for testing purposes that's fine.
6710 if (dd
->ndim
>= 2 && dd
->dim
[0] == XX
&& dd
->dim
[1] == YY
&&
6711 comm
->npmenodes
> dd
->nc
[XX
] && comm
->npmenodes
% dd
->nc
[XX
] == 0 &&
6712 getenv("GMX_PMEONEDD") == NULL
)
6714 comm
->npmedecompdim
= 2;
6715 comm
->npmenodes_x
= dd
->nc
[XX
];
6716 comm
->npmenodes_y
= comm
->npmenodes
/comm
->npmenodes_x
;
6720 /* In case nc is 1 in both x and y we could still choose to
6721 * decompose pme in y instead of x, but we use x for simplicity.
6723 comm
->npmedecompdim
= 1;
6724 if (dd
->dim
[0] == YY
)
6726 comm
->npmenodes_x
= 1;
6727 comm
->npmenodes_y
= comm
->npmenodes
;
6731 comm
->npmenodes_x
= comm
->npmenodes
;
6732 comm
->npmenodes_y
= 1;
6737 fprintf(fplog
,"PME domain decomposition: %d x %d x %d\n",
6738 comm
->npmenodes_x
,comm
->npmenodes_y
,1);
6743 comm
->npmedecompdim
= 0;
6744 comm
->npmenodes_x
= 0;
6745 comm
->npmenodes_y
= 0;
6748 /* Technically we don't need both of these,
6749 * but it simplifies code not having to recalculate it.
6751 *npme_x
= comm
->npmenodes_x
;
6752 *npme_y
= comm
->npmenodes_y
;
6754 snew(comm
->slb_frac
,DIM
);
6755 if (comm
->eDLB
== edlbNO
)
6757 comm
->slb_frac
[XX
] = get_slb_frac(fplog
,"x",dd
->nc
[XX
],sizex
);
6758 comm
->slb_frac
[YY
] = get_slb_frac(fplog
,"y",dd
->nc
[YY
],sizey
);
6759 comm
->slb_frac
[ZZ
] = get_slb_frac(fplog
,"z",dd
->nc
[ZZ
],sizez
);
6762 if (comm
->bInterCGBondeds
&& comm
->cutoff_mbody
== 0)
6764 if (comm
->bBondComm
|| comm
->eDLB
!= edlbNO
)
6766 /* Set the bonded communication distance to halfway
6767 * the minimum and the maximum,
6768 * since the extra communication cost is nearly zero.
6770 acs
= average_cellsize_min(dd
,ddbox
);
6771 comm
->cutoff_mbody
= 0.5*(r_bonded
+ acs
);
6772 if (comm
->eDLB
!= edlbNO
)
6774 /* Check if this does not limit the scaling */
6775 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,dlb_scale
*acs
);
6777 if (!comm
->bBondComm
)
6779 /* Without bBondComm do not go beyond the n.b. cut-off */
6780 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,comm
->cutoff
);
6781 if (comm
->cellsize_limit
>= comm
->cutoff
)
6783 /* We don't loose a lot of efficieny
6784 * when increasing it to the n.b. cut-off.
6785 * It can even be slightly faster, because we need
6786 * less checks for the communication setup.
6788 comm
->cutoff_mbody
= comm
->cutoff
;
6791 /* Check if we did not end up below our original limit */
6792 comm
->cutoff_mbody
= max(comm
->cutoff_mbody
,r_bonded_limit
);
6794 if (comm
->cutoff_mbody
> comm
->cellsize_limit
)
6796 comm
->cellsize_limit
= comm
->cutoff_mbody
;
6799 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6804 fprintf(debug
,"Bonded atom communication beyond the cut-off: %d\n"
6805 "cellsize limit %f\n",
6806 comm
->bBondComm
,comm
->cellsize_limit
);
6811 check_dd_restrictions(cr
,dd
,ir
,fplog
);
6814 comm
->partition_step
= INT_MIN
;
6817 clear_dd_cycle_counts(dd
);
6822 static void set_dlb_limits(gmx_domdec_t
*dd
)
6827 for(d
=0; d
<dd
->ndim
; d
++)
6829 dd
->comm
->cd
[d
].np
= dd
->comm
->cd
[d
].np_dlb
;
6830 dd
->comm
->cellsize_min
[dd
->dim
[d
]] =
6831 dd
->comm
->cellsize_min_dlb
[dd
->dim
[d
]];
6836 static void turn_on_dlb(FILE *fplog
,t_commrec
*cr
,gmx_large_int_t step
)
6839 gmx_domdec_comm_t
*comm
;
6849 fprintf(fplog
,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step
,buf
),dd_force_imb_perf_loss(dd
)*100);
6852 cellsize_min
= comm
->cellsize_min
[dd
->dim
[0]];
6853 for(d
=1; d
<dd
->ndim
; d
++)
6855 cellsize_min
= min(cellsize_min
,comm
->cellsize_min
[dd
->dim
[d
]]);
6858 if (cellsize_min
< comm
->cellsize_limit
*1.05)
6860 dd_warning(cr
,fplog
,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6862 /* Change DLB from "auto" to "no". */
6863 comm
->eDLB
= edlbNO
;
6868 dd_warning(cr
,fplog
,"NOTE: Turning on dynamic load balancing\n");
6869 comm
->bDynLoadBal
= TRUE
;
6870 dd
->bGridJump
= TRUE
;
6874 /* We can set the required cell size info here,
6875 * so we do not need to communicate this.
6876 * The grid is completely uniform.
6878 for(d
=0; d
<dd
->ndim
; d
++)
6882 comm
->load
[d
].sum_m
= comm
->load
[d
].sum
;
6884 nc
= dd
->nc
[dd
->dim
[d
]];
6887 comm
->root
[d
]->cell_f
[i
] = i
/(real
)nc
;
6890 comm
->root
[d
]->cell_f_max0
[i
] = i
/(real
)nc
;
6891 comm
->root
[d
]->cell_f_min1
[i
] = (i
+1)/(real
)nc
;
6894 comm
->root
[d
]->cell_f
[nc
] = 1.0;
6899 static char *init_bLocalCG(gmx_mtop_t
*mtop
)
6904 ncg
= ncg_mtop(mtop
);
6906 for(cg
=0; cg
<ncg
; cg
++)
6908 bLocalCG
[cg
] = FALSE
;
6914 void dd_init_bondeds(FILE *fplog
,
6915 gmx_domdec_t
*dd
,gmx_mtop_t
*mtop
,
6916 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
6917 t_inputrec
*ir
,gmx_bool bBCheck
,cginfo_mb_t
*cginfo_mb
)
6919 gmx_domdec_comm_t
*comm
;
6923 dd_make_reverse_top(fplog
,dd
,mtop
,vsite
,constr
,ir
,bBCheck
);
6927 if (comm
->bBondComm
)
6929 /* Communicate atoms beyond the cut-off for bonded interactions */
6932 comm
->cglink
= make_charge_group_links(mtop
,dd
,cginfo_mb
);
6934 comm
->bLocalCG
= init_bLocalCG(mtop
);
6938 /* Only communicate atoms based on cut-off */
6939 comm
->cglink
= NULL
;
6940 comm
->bLocalCG
= NULL
;
6944 static void print_dd_settings(FILE *fplog
,gmx_domdec_t
*dd
,
6946 gmx_bool bDynLoadBal
,real dlb_scale
,
6949 gmx_domdec_comm_t
*comm
;
6964 fprintf(fplog
,"The maximum number of communication pulses is:");
6965 for(d
=0; d
<dd
->ndim
; d
++)
6967 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),comm
->cd
[d
].np_dlb
);
6969 fprintf(fplog
,"\n");
6970 fprintf(fplog
,"The minimum size for domain decomposition cells is %.3f nm\n",comm
->cellsize_limit
);
6971 fprintf(fplog
,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale
);
6972 fprintf(fplog
,"The allowed shrink of domain decomposition cells is:");
6973 for(d
=0; d
<DIM
; d
++)
6977 if (d
>= ddbox
->npbcdim
&& dd
->nc
[d
] == 2)
6984 comm
->cellsize_min_dlb
[d
]/
6985 (ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6987 fprintf(fplog
," %c %.2f",dim2char(d
),shrink
);
6990 fprintf(fplog
,"\n");
6994 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,np
);
6995 fprintf(fplog
,"The initial number of communication pulses is:");
6996 for(d
=0; d
<dd
->ndim
; d
++)
6998 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),np
[dd
->dim
[d
]]);
7000 fprintf(fplog
,"\n");
7001 fprintf(fplog
,"The initial domain decomposition cell size is:");
7002 for(d
=0; d
<DIM
; d
++) {
7005 fprintf(fplog
," %c %.2f nm",
7006 dim2char(d
),dd
->comm
->cellsize_min
[d
]);
7009 fprintf(fplog
,"\n\n");
7012 if (comm
->bInterCGBondeds
|| dd
->vsite_comm
|| dd
->constraint_comm
)
7014 fprintf(fplog
,"The maximum allowed distance for charge groups involved in interactions is:\n");
7015 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7016 "non-bonded interactions","",comm
->cutoff
);
7020 limit
= dd
->comm
->cellsize_limit
;
7024 if (dynamic_dd_box(ddbox
,ir
))
7026 fprintf(fplog
,"(the following are initial values, they could change due to box deformation)\n");
7028 limit
= dd
->comm
->cellsize_min
[XX
];
7029 for(d
=1; d
<DIM
; d
++)
7031 limit
= min(limit
,dd
->comm
->cellsize_min
[d
]);
7035 if (comm
->bInterCGBondeds
)
7037 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7038 "two-body bonded interactions","(-rdd)",
7039 max(comm
->cutoff
,comm
->cutoff_mbody
));
7040 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7041 "multi-body bonded interactions","(-rdd)",
7042 (comm
->bBondComm
|| dd
->bGridJump
) ? comm
->cutoff_mbody
: min(comm
->cutoff
,limit
));
7046 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7047 "virtual site constructions","(-rcon)",limit
);
7049 if (dd
->constraint_comm
)
7051 sprintf(buf
,"atoms separated by up to %d constraints",
7053 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7054 buf
,"(-rcon)",limit
);
7056 fprintf(fplog
,"\n");
7062 static void set_cell_limits_dlb(gmx_domdec_t
*dd
,
7064 const t_inputrec
*ir
,
7065 const gmx_ddbox_t
*ddbox
)
7067 gmx_domdec_comm_t
*comm
;
7068 int d
,dim
,npulse
,npulse_d_max
,npulse_d
;
7073 bNoCutOff
= (ir
->rvdw
== 0 || ir
->rcoulomb
== 0);
7075 /* Determine the maximum number of comm. pulses in one dimension */
7077 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
7079 /* Determine the maximum required number of grid pulses */
7080 if (comm
->cellsize_limit
>= comm
->cutoff
)
7082 /* Only a single pulse is required */
7085 else if (!bNoCutOff
&& comm
->cellsize_limit
> 0)
7087 /* We round down slightly here to avoid overhead due to the latency
7088 * of extra communication calls when the cut-off
7089 * would be only slightly longer than the cell size.
7090 * Later cellsize_limit is redetermined,
7091 * so we can not miss interactions due to this rounding.
7093 npulse
= (int)(0.96 + comm
->cutoff
/comm
->cellsize_limit
);
7097 /* There is no cell size limit */
7098 npulse
= max(dd
->nc
[XX
]-1,max(dd
->nc
[YY
]-1,dd
->nc
[ZZ
]-1));
7101 if (!bNoCutOff
&& npulse
> 1)
7103 /* See if we can do with less pulses, based on dlb_scale */
7105 for(d
=0; d
<dd
->ndim
; d
++)
7108 npulse_d
= (int)(1 + dd
->nc
[dim
]*comm
->cutoff
7109 /(ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]*dlb_scale
));
7110 npulse_d_max
= max(npulse_d_max
,npulse_d
);
7112 npulse
= min(npulse
,npulse_d_max
);
7115 /* This env var can override npulse */
7116 d
= dd_nst_env(debug
,"GMX_DD_NPULSE",0);
7123 comm
->bVacDLBNoLimit
= (ir
->ePBC
== epbcNONE
);
7124 for(d
=0; d
<dd
->ndim
; d
++)
7126 comm
->cd
[d
].np_dlb
= min(npulse
,dd
->nc
[dd
->dim
[d
]]-1);
7127 comm
->cd
[d
].np_nalloc
= comm
->cd
[d
].np_dlb
;
7128 snew(comm
->cd
[d
].ind
,comm
->cd
[d
].np_nalloc
);
7129 comm
->maxpulse
= max(comm
->maxpulse
,comm
->cd
[d
].np_dlb
);
7130 if (comm
->cd
[d
].np_dlb
< dd
->nc
[dd
->dim
[d
]]-1)
7132 comm
->bVacDLBNoLimit
= FALSE
;
7136 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7137 if (!comm
->bVacDLBNoLimit
)
7139 comm
->cellsize_limit
= max(comm
->cellsize_limit
,
7140 comm
->cutoff
/comm
->maxpulse
);
7142 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
7143 /* Set the minimum cell size for each DD dimension */
7144 for(d
=0; d
<dd
->ndim
; d
++)
7146 if (comm
->bVacDLBNoLimit
||
7147 comm
->cd
[d
].np_dlb
*comm
->cellsize_limit
>= comm
->cutoff
)
7149 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->cellsize_limit
;
7153 comm
->cellsize_min_dlb
[dd
->dim
[d
]] =
7154 comm
->cutoff
/comm
->cd
[d
].np_dlb
;
7157 if (comm
->cutoff_mbody
<= 0)
7159 comm
->cutoff_mbody
= min(comm
->cutoff
,comm
->cellsize_limit
);
7161 if (comm
->bDynLoadBal
)
7167 gmx_bool
dd_bonded_molpbc(gmx_domdec_t
*dd
,int ePBC
)
7169 /* If each molecule is a single charge group
7170 * or we use domain decomposition for each periodic dimension,
7171 * we do not need to take pbc into account for the bonded interactions.
7173 return (ePBC
!= epbcNONE
&& dd
->comm
->bInterCGBondeds
&&
7176 (dd
->nc
[ZZ
]>1 || ePBC
==epbcXY
)));
7179 void set_dd_parameters(FILE *fplog
,gmx_domdec_t
*dd
,real dlb_scale
,
7180 t_inputrec
*ir
,t_forcerec
*fr
,
7183 gmx_domdec_comm_t
*comm
;
7189 /* Initialize the thread data.
7190 * This can not be done in init_domain_decomposition,
7191 * as the numbers of threads is determined later.
7193 comm
->nth
= gmx_omp_nthreads_get(emntDomdec
);
7196 snew(comm
->dth
,comm
->nth
);
7199 if (EEL_PME(ir
->coulombtype
))
7201 init_ddpme(dd
,&comm
->ddpme
[0],0);
7202 if (comm
->npmedecompdim
>= 2)
7204 init_ddpme(dd
,&comm
->ddpme
[1],1);
7209 comm
->npmenodes
= 0;
7210 if (dd
->pme_nodeid
>= 0)
7212 gmx_fatal_collective(FARGS
,NULL
,dd
,
7213 "Can not have separate PME nodes without PME electrostatics");
7219 fprintf(debug
,"The DD cut-off is %f\n",comm
->cutoff
);
7221 if (comm
->eDLB
!= edlbNO
)
7223 set_cell_limits_dlb(dd
,dlb_scale
,ir
,ddbox
);
7226 print_dd_settings(fplog
,dd
,ir
,comm
->bDynLoadBal
,dlb_scale
,ddbox
);
7227 if (comm
->eDLB
== edlbAUTO
)
7231 fprintf(fplog
,"When dynamic load balancing gets turned on, these settings will change to:\n");
7233 print_dd_settings(fplog
,dd
,ir
,TRUE
,dlb_scale
,ddbox
);
7236 if (ir
->ePBC
== epbcNONE
)
7238 vol_frac
= 1 - 1/(double)dd
->nnodes
;
7243 (1 + comm_box_frac(dd
->nc
,comm
->cutoff
,ddbox
))/(double)dd
->nnodes
;
7247 fprintf(debug
,"Volume fraction for all DD zones: %f\n",vol_frac
);
7249 natoms_tot
= comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
];
7251 dd
->ga2la
= ga2la_init(natoms_tot
,vol_frac
*natoms_tot
);
7254 gmx_bool
change_dd_cutoff(t_commrec
*cr
,t_state
*state
,t_inputrec
*ir
,
7265 set_ddbox(dd
,FALSE
,cr
,ir
,state
->box
,
7266 TRUE
,&dd
->comm
->cgs_gl
,state
->x
,&ddbox
);
7270 for(d
=0; d
<dd
->ndim
; d
++)
7274 inv_cell_size
= DD_CELL_MARGIN
*dd
->nc
[dim
]/ddbox
.box_size
[dim
];
7275 if (dynamic_dd_box(&ddbox
,ir
))
7277 inv_cell_size
*= DD_PRES_SCALE_MARGIN
;
7280 np
= 1 + (int)(cutoff_req
*inv_cell_size
*ddbox
.skew_fac
[dim
]);
7282 if (dd
->comm
->eDLB
!= edlbNO
&& dim
< ddbox
.npbcdim
&&
7283 dd
->comm
->cd
[d
].np_dlb
> 0)
7285 if (np
> dd
->comm
->cd
[d
].np_dlb
)
7290 /* If a current local cell size is smaller than the requested
7291 * cut-off, we could still fix it, but this gets very complicated.
7292 * Without fixing here, we might actually need more checks.
7294 if ((dd
->comm
->cell_x1
[dim
] - dd
->comm
->cell_x0
[dim
])*ddbox
.skew_fac
[dim
]*dd
->comm
->cd
[d
].np_dlb
< cutoff_req
)
7301 if (dd
->comm
->eDLB
!= edlbNO
)
7303 if (check_grid_jump(0,dd
,cutoff_req
,&ddbox
,FALSE
))
7308 gmx_sumi(1,&LocallyLimited
,cr
);
7310 if (LocallyLimited
> 0)
7316 dd
->comm
->cutoff
= cutoff_req
;
7321 static void merge_cg_buffers(int ncell
,
7322 gmx_domdec_comm_dim_t
*cd
, int pulse
,
7324 int *index_gl
, int *recv_i
,
7325 rvec
*cg_cm
, rvec
*recv_vr
,
7327 cginfo_mb_t
*cginfo_mb
,int *cginfo
)
7329 gmx_domdec_ind_t
*ind
,*ind_p
;
7330 int p
,cell
,c
,cg
,cg0
,cg1
,cg_gl
,nat
;
7333 ind
= &cd
->ind
[pulse
];
7335 /* First correct the already stored data */
7336 shift
= ind
->nrecv
[ncell
];
7337 for(cell
=ncell
-1; cell
>=0; cell
--)
7339 shift
-= ind
->nrecv
[cell
];
7342 /* Move the cg's present from previous grid pulses */
7343 cg0
= ncg_cell
[ncell
+cell
];
7344 cg1
= ncg_cell
[ncell
+cell
+1];
7345 cgindex
[cg1
+shift
] = cgindex
[cg1
];
7346 for(cg
=cg1
-1; cg
>=cg0
; cg
--)
7348 index_gl
[cg
+shift
] = index_gl
[cg
];
7349 copy_rvec(cg_cm
[cg
],cg_cm
[cg
+shift
]);
7350 cgindex
[cg
+shift
] = cgindex
[cg
];
7351 cginfo
[cg
+shift
] = cginfo
[cg
];
7353 /* Correct the already stored send indices for the shift */
7354 for(p
=1; p
<=pulse
; p
++)
7356 ind_p
= &cd
->ind
[p
];
7358 for(c
=0; c
<cell
; c
++)
7360 cg0
+= ind_p
->nsend
[c
];
7362 cg1
= cg0
+ ind_p
->nsend
[cell
];
7363 for(cg
=cg0
; cg
<cg1
; cg
++)
7365 ind_p
->index
[cg
] += shift
;
7371 /* Merge in the communicated buffers */
7375 for(cell
=0; cell
<ncell
; cell
++)
7377 cg1
= ncg_cell
[ncell
+cell
+1] + shift
;
7380 /* Correct the old cg indices */
7381 for(cg
=ncg_cell
[ncell
+cell
]; cg
<cg1
; cg
++)
7383 cgindex
[cg
+1] += shift_at
;
7386 for(cg
=0; cg
<ind
->nrecv
[cell
]; cg
++)
7388 /* Copy this charge group from the buffer */
7389 index_gl
[cg1
] = recv_i
[cg0
];
7390 copy_rvec(recv_vr
[cg0
],cg_cm
[cg1
]);
7391 /* Add it to the cgindex */
7392 cg_gl
= index_gl
[cg1
];
7393 cginfo
[cg1
] = ddcginfo(cginfo_mb
,cg_gl
);
7394 nat
= GET_CGINFO_NATOMS(cginfo
[cg1
]);
7395 cgindex
[cg1
+1] = cgindex
[cg1
] + nat
;
7400 shift
+= ind
->nrecv
[cell
];
7401 ncg_cell
[ncell
+cell
+1] = cg1
;
7405 static void make_cell2at_index(gmx_domdec_comm_dim_t
*cd
,
7406 int nzone
,int cg0
,const int *cgindex
)
7410 /* Store the atom block boundaries for easy copying of communication buffers
7413 for(zone
=0; zone
<nzone
; zone
++)
7415 for(p
=0; p
<cd
->np
; p
++) {
7416 cd
->ind
[p
].cell2at0
[zone
] = cgindex
[cg
];
7417 cg
+= cd
->ind
[p
].nrecv
[zone
];
7418 cd
->ind
[p
].cell2at1
[zone
] = cgindex
[cg
];
7423 static gmx_bool
missing_link(t_blocka
*link
,int cg_gl
,char *bLocalCG
)
7429 for(i
=link
->index
[cg_gl
]; i
<link
->index
[cg_gl
+1]; i
++)
7431 if (!bLocalCG
[link
->a
[i
]])
7440 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7442 real c
[DIM
][4]; /* the corners for the non-bonded communication */
7443 real cr0
; /* corner for rounding */
7444 real cr1
[4]; /* corners for rounding */
7445 real bc
[DIM
]; /* corners for bounded communication */
7446 real bcr1
; /* corner for rounding for bonded communication */
7449 /* Determine the corners of the domain(s) we are communicating with */
7451 set_dd_corners(const gmx_domdec_t
*dd
,
7452 int dim0
, int dim1
, int dim2
,
7456 const gmx_domdec_comm_t
*comm
;
7457 const gmx_domdec_zones_t
*zones
;
7462 zones
= &comm
->zones
;
7464 /* Keep the compiler happy */
7468 /* The first dimension is equal for all cells */
7469 c
->c
[0][0] = comm
->cell_x0
[dim0
];
7472 c
->bc
[0] = c
->c
[0][0];
7477 /* This cell row is only seen from the first row */
7478 c
->c
[1][0] = comm
->cell_x0
[dim1
];
7479 /* All rows can see this row */
7480 c
->c
[1][1] = comm
->cell_x0
[dim1
];
7483 c
->c
[1][1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].mch0
);
7486 /* For the multi-body distance we need the maximum */
7487 c
->bc
[1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].p1_0
);
7490 /* Set the upper-right corner for rounding */
7491 c
->cr0
= comm
->cell_x1
[dim0
];
7498 c
->c
[2][j
] = comm
->cell_x0
[dim2
];
7502 /* Use the maximum of the i-cells that see a j-cell */
7503 for(i
=0; i
<zones
->nizone
; i
++)
7505 for(j
=zones
->izone
[i
].j0
; j
<zones
->izone
[i
].j1
; j
++)
7511 comm
->zone_d2
[zones
->shift
[i
][dim0
]][zones
->shift
[i
][dim1
]].mch0
);
7517 /* For the multi-body distance we need the maximum */
7518 c
->bc
[2] = comm
->cell_x0
[dim2
];
7523 c
->bc
[2] = max(c
->bc
[2],comm
->zone_d2
[i
][j
].p1_0
);
7529 /* Set the upper-right corner for rounding */
7530 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7531 * Only cell (0,0,0) can see cell 7 (1,1,1)
7533 c
->cr1
[0] = comm
->cell_x1
[dim1
];
7534 c
->cr1
[3] = comm
->cell_x1
[dim1
];
7537 c
->cr1
[0] = max(comm
->cell_x1
[dim1
],comm
->zone_d1
[1].mch1
);
7540 /* For the multi-body distance we need the maximum */
7541 c
->bcr1
= max(comm
->cell_x1
[dim1
],comm
->zone_d1
[1].p1_1
);
7548 /* Determine which cg's we need to send in this pulse from this zone */
7550 get_zone_pulse_cgs(gmx_domdec_t
*dd
,
7551 int zonei
, int zone
,
7553 const int *index_gl
,
7555 int dim
, int dim_ind
,
7556 int dim0
, int dim1
, int dim2
,
7557 real r_comm2
, real r_bcomm2
,
7561 real skew_fac2_d
, real skew_fac_01
,
7562 rvec
*v_d
, rvec
*v_0
, rvec
*v_1
,
7563 const dd_corners_t
*c
,
7565 gmx_bool bDistBonded
,
7571 gmx_domdec_ind_t
*ind
,
7572 int **ibuf
, int *ibuf_nalloc
,
7578 gmx_domdec_comm_t
*comm
;
7580 gmx_bool bDistMB_pulse
;
7582 real r2
,rb2
,r
,tric_sh
;
7585 int nsend_z
,nsend
,nat
;
7589 bScrew
= (dd
->bScrewPBC
&& dim
== XX
);
7591 bDistMB_pulse
= (bDistMB
&& bDistBonded
);
7597 for(cg
=cg0
; cg
<cg1
; cg
++)
7601 if (tric_dist
[dim_ind
] == 0)
7603 /* Rectangular direction, easy */
7604 r
= cg_cm
[cg
][dim
] - c
->c
[dim_ind
][zone
];
7611 r
= cg_cm
[cg
][dim
] - c
->bc
[dim_ind
];
7617 /* Rounding gives at most a 16% reduction
7618 * in communicated atoms
7620 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7622 r
= cg_cm
[cg
][dim0
] - c
->cr0
;
7623 /* This is the first dimension, so always r >= 0 */
7630 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7632 r
= cg_cm
[cg
][dim1
] - c
->cr1
[zone
];
7639 r
= cg_cm
[cg
][dim1
] - c
->bcr1
;
7649 /* Triclinic direction, more complicated */
7652 /* Rounding, conservative as the skew_fac multiplication
7653 * will slightly underestimate the distance.
7655 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7657 rn
[dim0
] = cg_cm
[cg
][dim0
] - c
->cr0
;
7658 for(i
=dim0
+1; i
<DIM
; i
++)
7660 rn
[dim0
] -= cg_cm
[cg
][i
]*v_0
[i
][dim0
];
7662 r2
= rn
[dim0
]*rn
[dim0
]*sf2_round
[dim0
];
7665 rb
[dim0
] = rn
[dim0
];
7668 /* Take care that the cell planes along dim0 might not
7669 * be orthogonal to those along dim1 and dim2.
7671 for(i
=1; i
<=dim_ind
; i
++)
7674 if (normal
[dim0
][dimd
] > 0)
7676 rn
[dimd
] -= rn
[dim0
]*normal
[dim0
][dimd
];
7679 rb
[dimd
] -= rb
[dim0
]*normal
[dim0
][dimd
];
7684 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7686 rn
[dim1
] += cg_cm
[cg
][dim1
] - c
->cr1
[zone
];
7688 for(i
=dim1
+1; i
<DIM
; i
++)
7690 tric_sh
-= cg_cm
[cg
][i
]*v_1
[i
][dim1
];
7692 rn
[dim1
] += tric_sh
;
7695 r2
+= rn
[dim1
]*rn
[dim1
]*sf2_round
[dim1
];
7696 /* Take care of coupling of the distances
7697 * to the planes along dim0 and dim1 through dim2.
7699 r2
-= rn
[dim0
]*rn
[dim1
]*skew_fac_01
;
7700 /* Take care that the cell planes along dim1
7701 * might not be orthogonal to that along dim2.
7703 if (normal
[dim1
][dim2
] > 0)
7705 rn
[dim2
] -= rn
[dim1
]*normal
[dim1
][dim2
];
7711 cg_cm
[cg
][dim1
] - c
->bcr1
+ tric_sh
;
7714 rb2
+= rb
[dim1
]*rb
[dim1
]*sf2_round
[dim1
];
7715 /* Take care of coupling of the distances
7716 * to the planes along dim0 and dim1 through dim2.
7718 rb2
-= rb
[dim0
]*rb
[dim1
]*skew_fac_01
;
7719 /* Take care that the cell planes along dim1
7720 * might not be orthogonal to that along dim2.
7722 if (normal
[dim1
][dim2
] > 0)
7724 rb
[dim2
] -= rb
[dim1
]*normal
[dim1
][dim2
];
7729 /* The distance along the communication direction */
7730 rn
[dim
] += cg_cm
[cg
][dim
] - c
->c
[dim_ind
][zone
];
7732 for(i
=dim
+1; i
<DIM
; i
++)
7734 tric_sh
-= cg_cm
[cg
][i
]*v_d
[i
][dim
];
7739 r2
+= rn
[dim
]*rn
[dim
]*skew_fac2_d
;
7740 /* Take care of coupling of the distances
7741 * to the planes along dim0 and dim1 through dim2.
7743 if (dim_ind
== 1 && zonei
== 1)
7745 r2
-= rn
[dim0
]*rn
[dim
]*skew_fac_01
;
7751 rb
[dim
] += cg_cm
[cg
][dim
] - c
->bc
[dim_ind
] + tric_sh
;
7754 rb2
+= rb
[dim
]*rb
[dim
]*skew_fac2_d
;
7755 /* Take care of coupling of the distances
7756 * to the planes along dim0 and dim1 through dim2.
7758 if (dim_ind
== 1 && zonei
== 1)
7760 rb2
-= rb
[dim0
]*rb
[dim
]*skew_fac_01
;
7768 ((bDistMB
&& rb2
< r_bcomm2
) ||
7769 (bDist2B
&& r2
< r_bcomm2
)) &&
7771 (GET_CGINFO_BOND_INTER(cginfo
[cg
]) &&
7772 missing_link(comm
->cglink
,index_gl
[cg
],
7775 /* Make an index to the local charge groups */
7776 if (nsend
+1 > ind
->nalloc
)
7778 ind
->nalloc
= over_alloc_large(nsend
+1);
7779 srenew(ind
->index
,ind
->nalloc
);
7781 if (nsend
+1 > *ibuf_nalloc
)
7783 *ibuf_nalloc
= over_alloc_large(nsend
+1);
7784 srenew(*ibuf
,*ibuf_nalloc
);
7786 ind
->index
[nsend
] = cg
;
7787 (*ibuf
)[nsend
] = index_gl
[cg
];
7789 vec_rvec_check_alloc(vbuf
,nsend
+1);
7791 if (dd
->ci
[dim
] == 0)
7793 /* Correct cg_cm for pbc */
7794 rvec_add(cg_cm
[cg
],box
[dim
],vbuf
->v
[nsend
]);
7797 vbuf
->v
[nsend
][YY
] = box
[YY
][YY
] - vbuf
->v
[nsend
][YY
];
7798 vbuf
->v
[nsend
][ZZ
] = box
[ZZ
][ZZ
] - vbuf
->v
[nsend
][ZZ
];
7803 copy_rvec(cg_cm
[cg
],vbuf
->v
[nsend
]);
7806 nat
+= cgindex
[cg
+1] - cgindex
[cg
];
7812 *nsend_z_ptr
= nsend_z
;
7815 static void setup_dd_communication(gmx_domdec_t
*dd
,
7816 matrix box
,gmx_ddbox_t
*ddbox
,
7817 t_forcerec
*fr
,t_state
*state
,rvec
**f
)
7819 int dim_ind
,dim
,dim0
,dim1
,dim2
,dimd
,p
,nat_tot
;
7820 int nzone
,nzone_send
,zone
,zonei
,cg0
,cg1
;
7821 int c
,i
,j
,cg
,cg_gl
,nrcg
;
7822 int *zone_cg_range
,pos_cg
,*index_gl
,*cgindex
,*recv_i
;
7823 gmx_domdec_comm_t
*comm
;
7824 gmx_domdec_zones_t
*zones
;
7825 gmx_domdec_comm_dim_t
*cd
;
7826 gmx_domdec_ind_t
*ind
;
7827 cginfo_mb_t
*cginfo_mb
;
7828 gmx_bool bBondComm
,bDist2B
,bDistMB
,bDistBonded
;
7829 real r_mb
,r_comm2
,r_scomm2
,r_bcomm2
,r_0
,r_1
,r2inc
,inv_ncg
;
7830 dd_corners_t corners
;
7832 rvec
*cg_cm
,*normal
,*v_d
,*v_0
=NULL
,*v_1
=NULL
,*recv_vr
;
7833 real skew_fac2_d
,skew_fac_01
;
7840 fprintf(debug
,"Setting up DD communication\n");
7845 switch (fr
->cutoff_scheme
)
7854 gmx_incons("unimplemented");
7858 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7860 dim
= dd
->dim
[dim_ind
];
7862 /* Check if we need to use triclinic distances */
7863 tric_dist
[dim_ind
] = 0;
7864 for(i
=0; i
<=dim_ind
; i
++)
7866 if (ddbox
->tric_dir
[dd
->dim
[i
]])
7868 tric_dist
[dim_ind
] = 1;
7873 bBondComm
= comm
->bBondComm
;
7875 /* Do we need to determine extra distances for multi-body bondeds? */
7876 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
7878 /* Do we need to determine extra distances for only two-body bondeds? */
7879 bDist2B
= (bBondComm
&& !bDistMB
);
7881 r_comm2
= sqr(comm
->cutoff
);
7882 r_bcomm2
= sqr(comm
->cutoff_mbody
);
7886 fprintf(debug
,"bBondComm %d, r_bc %f\n",bBondComm
,sqrt(r_bcomm2
));
7889 zones
= &comm
->zones
;
7892 dim1
= (dd
->ndim
>= 2 ? dd
->dim
[1] : -1);
7893 dim2
= (dd
->ndim
>= 3 ? dd
->dim
[2] : -1);
7895 set_dd_corners(dd
,dim0
,dim1
,dim2
,bDistMB
,&corners
);
7897 /* Triclinic stuff */
7898 normal
= ddbox
->normal
;
7902 v_0
= ddbox
->v
[dim0
];
7903 if (ddbox
->tric_dir
[dim0
] && ddbox
->tric_dir
[dim1
])
7905 /* Determine the coupling coefficient for the distances
7906 * to the cell planes along dim0 and dim1 through dim2.
7907 * This is required for correct rounding.
7910 ddbox
->v
[dim0
][dim1
+1][dim0
]*ddbox
->v
[dim1
][dim1
+1][dim1
];
7913 fprintf(debug
,"\nskew_fac_01 %f\n",skew_fac_01
);
7919 v_1
= ddbox
->v
[dim1
];
7922 zone_cg_range
= zones
->cg_range
;
7923 index_gl
= dd
->index_gl
;
7924 cgindex
= dd
->cgindex
;
7925 cginfo_mb
= fr
->cginfo_mb
;
7927 zone_cg_range
[0] = 0;
7928 zone_cg_range
[1] = dd
->ncg_home
;
7929 comm
->zone_ncg1
[0] = dd
->ncg_home
;
7930 pos_cg
= dd
->ncg_home
;
7932 nat_tot
= dd
->nat_home
;
7934 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7936 dim
= dd
->dim
[dim_ind
];
7937 cd
= &comm
->cd
[dim_ind
];
7939 if (dim
>= ddbox
->npbcdim
&& dd
->ci
[dim
] == 0)
7941 /* No pbc in this dimension, the first node should not comm. */
7949 v_d
= ddbox
->v
[dim
];
7950 skew_fac2_d
= sqr(ddbox
->skew_fac
[dim
]);
7952 cd
->bInPlace
= TRUE
;
7953 for(p
=0; p
<cd
->np
; p
++)
7955 /* Only atoms communicated in the first pulse are used
7956 * for multi-body bonded interactions or for bBondComm.
7958 bDistBonded
= ((bDistMB
|| bDist2B
) && p
== 0);
7963 for(zone
=0; zone
<nzone_send
; zone
++)
7965 if (tric_dist
[dim_ind
] && dim_ind
> 0)
7967 /* Determine slightly more optimized skew_fac's
7969 * This reduces the number of communicated atoms
7970 * by about 10% for 3D DD of rhombic dodecahedra.
7972 for(dimd
=0; dimd
<dim
; dimd
++)
7974 sf2_round
[dimd
] = 1;
7975 if (ddbox
->tric_dir
[dimd
])
7977 for(i
=dd
->dim
[dimd
]+1; i
<DIM
; i
++)
7979 /* If we are shifted in dimension i
7980 * and the cell plane is tilted forward
7981 * in dimension i, skip this coupling.
7983 if (!(zones
->shift
[nzone
+zone
][i
] &&
7984 ddbox
->v
[dimd
][i
][dimd
] >= 0))
7987 sqr(ddbox
->v
[dimd
][i
][dimd
]);
7990 sf2_round
[dimd
] = 1/sf2_round
[dimd
];
7995 zonei
= zone_perm
[dim_ind
][zone
];
7998 /* Here we permutate the zones to obtain a convenient order
7999 * for neighbor searching
8001 cg0
= zone_cg_range
[zonei
];
8002 cg1
= zone_cg_range
[zonei
+1];
8006 /* Look only at the cg's received in the previous grid pulse
8008 cg1
= zone_cg_range
[nzone
+zone
+1];
8009 cg0
= cg1
- cd
->ind
[p
-1].nrecv
[zone
];
8012 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8013 for(th
=0; th
<comm
->nth
; th
++)
8015 gmx_domdec_ind_t
*ind_p
;
8016 int **ibuf_p
,*ibuf_nalloc_p
;
8018 int *nsend_p
,*nat_p
;
8024 /* Thread 0 writes in the comm buffers */
8026 ibuf_p
= &comm
->buf_int
;
8027 ibuf_nalloc_p
= &comm
->nalloc_int
;
8028 vbuf_p
= &comm
->vbuf
;
8031 nsend_zone_p
= &ind
->nsend
[zone
];
8035 /* Other threads write into temp buffers */
8036 ind_p
= &comm
->dth
[th
].ind
;
8037 ibuf_p
= &comm
->dth
[th
].ibuf
;
8038 ibuf_nalloc_p
= &comm
->dth
[th
].ibuf_nalloc
;
8039 vbuf_p
= &comm
->dth
[th
].vbuf
;
8040 nsend_p
= &comm
->dth
[th
].nsend
;
8041 nat_p
= &comm
->dth
[th
].nat
;
8042 nsend_zone_p
= &comm
->dth
[th
].nsend_zone
;
8044 comm
->dth
[th
].nsend
= 0;
8045 comm
->dth
[th
].nat
= 0;
8046 comm
->dth
[th
].nsend_zone
= 0;
8056 cg0_th
= cg0
+ ((cg1
- cg0
)* th
)/comm
->nth
;
8057 cg1_th
= cg0
+ ((cg1
- cg0
)*(th
+1))/comm
->nth
;
8060 /* Get the cg's for this pulse in this zone */
8061 get_zone_pulse_cgs(dd
,zonei
,zone
,cg0_th
,cg1_th
,
8063 dim
,dim_ind
,dim0
,dim1
,dim2
,
8066 normal
,skew_fac2_d
,skew_fac_01
,
8067 v_d
,v_0
,v_1
,&corners
,sf2_round
,
8068 bDistBonded
,bBondComm
,
8072 ibuf_p
,ibuf_nalloc_p
,
8078 /* Append data of threads>=1 to the communication buffers */
8079 for(th
=1; th
<comm
->nth
; th
++)
8081 dd_comm_setup_work_t
*dth
;
8084 dth
= &comm
->dth
[th
];
8086 ns1
= nsend
+ dth
->nsend_zone
;
8087 if (ns1
> ind
->nalloc
)
8089 ind
->nalloc
= over_alloc_dd(ns1
);
8090 srenew(ind
->index
,ind
->nalloc
);
8092 if (ns1
> comm
->nalloc_int
)
8094 comm
->nalloc_int
= over_alloc_dd(ns1
);
8095 srenew(comm
->buf_int
,comm
->nalloc_int
);
8097 if (ns1
> comm
->vbuf
.nalloc
)
8099 comm
->vbuf
.nalloc
= over_alloc_dd(ns1
);
8100 srenew(comm
->vbuf
.v
,comm
->vbuf
.nalloc
);
8103 for(i
=0; i
<dth
->nsend_zone
; i
++)
8105 ind
->index
[nsend
] = dth
->ind
.index
[i
];
8106 comm
->buf_int
[nsend
] = dth
->ibuf
[i
];
8107 copy_rvec(dth
->vbuf
.v
[i
],
8108 comm
->vbuf
.v
[nsend
]);
8112 ind
->nsend
[zone
] += dth
->nsend_zone
;
8115 /* Clear the counts in case we do not have pbc */
8116 for(zone
=nzone_send
; zone
<nzone
; zone
++)
8118 ind
->nsend
[zone
] = 0;
8120 ind
->nsend
[nzone
] = nsend
;
8121 ind
->nsend
[nzone
+1] = nat
;
8122 /* Communicate the number of cg's and atoms to receive */
8123 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
8124 ind
->nsend
, nzone
+2,
8125 ind
->nrecv
, nzone
+2);
8127 /* The rvec buffer is also required for atom buffers of size nsend
8128 * in dd_move_x and dd_move_f.
8130 vec_rvec_check_alloc(&comm
->vbuf
,ind
->nsend
[nzone
+1]);
8134 /* We can receive in place if only the last zone is not empty */
8135 for(zone
=0; zone
<nzone
-1; zone
++)
8137 if (ind
->nrecv
[zone
] > 0)
8139 cd
->bInPlace
= FALSE
;
8144 /* The int buffer is only required here for the cg indices */
8145 if (ind
->nrecv
[nzone
] > comm
->nalloc_int2
)
8147 comm
->nalloc_int2
= over_alloc_dd(ind
->nrecv
[nzone
]);
8148 srenew(comm
->buf_int2
,comm
->nalloc_int2
);
8150 /* The rvec buffer is also required for atom buffers
8151 * of size nrecv in dd_move_x and dd_move_f.
8153 i
= max(cd
->ind
[0].nrecv
[nzone
+1],ind
->nrecv
[nzone
+1]);
8154 vec_rvec_check_alloc(&comm
->vbuf2
,i
);
8158 /* Make space for the global cg indices */
8159 if (pos_cg
+ ind
->nrecv
[nzone
] > dd
->cg_nalloc
8160 || dd
->cg_nalloc
== 0)
8162 dd
->cg_nalloc
= over_alloc_dd(pos_cg
+ ind
->nrecv
[nzone
]);
8163 srenew(index_gl
,dd
->cg_nalloc
);
8164 srenew(cgindex
,dd
->cg_nalloc
+1);
8166 /* Communicate the global cg indices */
8169 recv_i
= index_gl
+ pos_cg
;
8173 recv_i
= comm
->buf_int2
;
8175 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
8176 comm
->buf_int
, nsend
,
8177 recv_i
, ind
->nrecv
[nzone
]);
8179 /* Make space for cg_cm */
8180 dd_check_alloc_ncg(fr
,state
,f
,pos_cg
+ ind
->nrecv
[nzone
]);
8181 if (fr
->cutoff_scheme
== ecutsGROUP
)
8189 /* Communicate cg_cm */
8192 recv_vr
= cg_cm
+ pos_cg
;
8196 recv_vr
= comm
->vbuf2
.v
;
8198 dd_sendrecv_rvec(dd
, dim_ind
, dddirBackward
,
8199 comm
->vbuf
.v
, nsend
,
8200 recv_vr
, ind
->nrecv
[nzone
]);
8202 /* Make the charge group index */
8205 zone
= (p
== 0 ? 0 : nzone
- 1);
8206 while (zone
< nzone
)
8208 for(cg
=0; cg
<ind
->nrecv
[zone
]; cg
++)
8210 cg_gl
= index_gl
[pos_cg
];
8211 fr
->cginfo
[pos_cg
] = ddcginfo(cginfo_mb
,cg_gl
);
8212 nrcg
= GET_CGINFO_NATOMS(fr
->cginfo
[pos_cg
]);
8213 cgindex
[pos_cg
+1] = cgindex
[pos_cg
] + nrcg
;
8216 /* Update the charge group presence,
8217 * so we can use it in the next pass of the loop.
8219 comm
->bLocalCG
[cg_gl
] = TRUE
;
8225 comm
->zone_ncg1
[nzone
+zone
] = ind
->nrecv
[zone
];
8228 zone_cg_range
[nzone
+zone
] = pos_cg
;
8233 /* This part of the code is never executed with bBondComm. */
8234 merge_cg_buffers(nzone
,cd
,p
,zone_cg_range
,
8235 index_gl
,recv_i
,cg_cm
,recv_vr
,
8236 cgindex
,fr
->cginfo_mb
,fr
->cginfo
);
8237 pos_cg
+= ind
->nrecv
[nzone
];
8239 nat_tot
+= ind
->nrecv
[nzone
+1];
8243 /* Store the atom block for easy copying of communication buffers */
8244 make_cell2at_index(cd
,nzone
,zone_cg_range
[nzone
],cgindex
);
8248 dd
->index_gl
= index_gl
;
8249 dd
->cgindex
= cgindex
;
8251 dd
->ncg_tot
= zone_cg_range
[zones
->n
];
8252 dd
->nat_tot
= nat_tot
;
8253 comm
->nat
[ddnatHOME
] = dd
->nat_home
;
8254 for(i
=ddnatZONE
; i
<ddnatNR
; i
++)
8256 comm
->nat
[i
] = dd
->nat_tot
;
8261 /* We don't need to update cginfo, since that was alrady done above.
8262 * So we pass NULL for the forcerec.
8264 dd_set_cginfo(dd
->index_gl
,dd
->ncg_home
,dd
->ncg_tot
,
8265 NULL
,comm
->bLocalCG
);
8270 fprintf(debug
,"Finished setting up DD communication, zones:");
8271 for(c
=0; c
<zones
->n
; c
++)
8273 fprintf(debug
," %d",zones
->cg_range
[c
+1]-zones
->cg_range
[c
]);
8275 fprintf(debug
,"\n");
8279 static void set_cg_boundaries(gmx_domdec_zones_t
*zones
)
8283 for(c
=0; c
<zones
->nizone
; c
++)
8285 zones
->izone
[c
].cg1
= zones
->cg_range
[c
+1];
8286 zones
->izone
[c
].jcg0
= zones
->cg_range
[zones
->izone
[c
].j0
];
8287 zones
->izone
[c
].jcg1
= zones
->cg_range
[zones
->izone
[c
].j1
];
8291 static void set_zones_size(gmx_domdec_t
*dd
,
8292 matrix box
,const gmx_ddbox_t
*ddbox
,
8293 int zone_start
,int zone_end
)
8295 gmx_domdec_comm_t
*comm
;
8296 gmx_domdec_zones_t
*zones
;
8298 int z
,zi
,zj0
,zj1
,d
,dim
;
8301 real size_j
,add_tric
;
8306 zones
= &comm
->zones
;
8308 /* Do we need to determine extra distances for multi-body bondeds? */
8309 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
8311 for(z
=zone_start
; z
<zone_end
; z
++)
8313 /* Copy cell limits to zone limits.
8314 * Valid for non-DD dims and non-shifted dims.
8316 copy_rvec(comm
->cell_x0
,zones
->size
[z
].x0
);
8317 copy_rvec(comm
->cell_x1
,zones
->size
[z
].x1
);
8320 for(d
=0; d
<dd
->ndim
; d
++)
8324 for(z
=0; z
<zones
->n
; z
++)
8326 /* With a staggered grid we have different sizes
8327 * for non-shifted dimensions.
8329 if (dd
->bGridJump
&& zones
->shift
[z
][dim
] == 0)
8333 zones
->size
[z
].x0
[dim
] = comm
->zone_d1
[zones
->shift
[z
][dd
->dim
[d
-1]]].min0
;
8334 zones
->size
[z
].x1
[dim
] = comm
->zone_d1
[zones
->shift
[z
][dd
->dim
[d
-1]]].max1
;
8338 zones
->size
[z
].x0
[dim
] = comm
->zone_d2
[zones
->shift
[z
][dd
->dim
[d
-2]]][zones
->shift
[z
][dd
->dim
[d
-1]]].min0
;
8339 zones
->size
[z
].x1
[dim
] = comm
->zone_d2
[zones
->shift
[z
][dd
->dim
[d
-2]]][zones
->shift
[z
][dd
->dim
[d
-1]]].max1
;
8345 rcmbs
= comm
->cutoff_mbody
;
8346 if (ddbox
->tric_dir
[dim
])
8348 rcs
/= ddbox
->skew_fac
[dim
];
8349 rcmbs
/= ddbox
->skew_fac
[dim
];
8352 /* Set the lower limit for the shifted zone dimensions */
8353 for(z
=zone_start
; z
<zone_end
; z
++)
8355 if (zones
->shift
[z
][dim
] > 0)
8358 if (!dd
->bGridJump
|| d
== 0)
8360 zones
->size
[z
].x0
[dim
] = comm
->cell_x1
[dim
];
8361 zones
->size
[z
].x1
[dim
] = comm
->cell_x1
[dim
] + rcs
;
8365 /* Here we take the lower limit of the zone from
8366 * the lowest domain of the zone below.
8370 zones
->size
[z
].x0
[dim
] =
8371 comm
->zone_d1
[zones
->shift
[z
][dd
->dim
[d
-1]]].min1
;
8377 zones
->size
[z
].x0
[dim
] =
8378 zones
->size
[zone_perm
[2][z
-4]].x0
[dim
];
8382 zones
->size
[z
].x0
[dim
] =
8383 comm
->zone_d2
[zones
->shift
[z
][dd
->dim
[d
-2]]][zones
->shift
[z
][dd
->dim
[d
-1]]].min1
;
8386 /* A temporary limit, is updated below */
8387 zones
->size
[z
].x1
[dim
] = zones
->size
[z
].x0
[dim
];
8391 for(zi
=0; zi
<zones
->nizone
; zi
++)
8393 if (zones
->shift
[zi
][dim
] == 0)
8395 /* This takes the whole zone into account.
8396 * With multiple pulses this will lead
8397 * to a larger zone then strictly necessary.
8399 zones
->size
[z
].x1
[dim
] = max(zones
->size
[z
].x1
[dim
],
8400 zones
->size
[zi
].x1
[dim
]+rcmbs
);
8408 /* Loop over the i-zones to set the upper limit of each
8411 for(zi
=0; zi
<zones
->nizone
; zi
++)
8413 if (zones
->shift
[zi
][dim
] == 0)
8415 for(z
=zones
->izone
[zi
].j0
; z
<zones
->izone
[zi
].j1
; z
++)
8417 if (zones
->shift
[z
][dim
] > 0)
8419 zones
->size
[z
].x1
[dim
] = max(zones
->size
[z
].x1
[dim
],
8420 zones
->size
[zi
].x1
[dim
]+rcs
);
8427 for(z
=zone_start
; z
<zone_end
; z
++)
8429 for(i
=0; i
<DIM
; i
++)
8431 zones
->size
[z
].bb_x0
[i
] = zones
->size
[z
].x0
[i
];
8432 zones
->size
[z
].bb_x1
[i
] = zones
->size
[z
].x1
[i
];
8434 for(j
=i
+1; j
<ddbox
->npbcdim
; j
++)
8436 /* With 1D domain decomposition the cg's are not in
8437 * the triclinic box, but trilinic x-y and rectangular y-z.
8439 if (box
[j
][i
] != 0 &&
8440 !(dd
->ndim
== 1 && i
== YY
&& j
== ZZ
))
8442 /* Correct for triclinic offset of the lower corner */
8443 add_tric
= zones
->size
[z
].x0
[j
]*box
[j
][i
]/box
[j
][j
];
8444 zones
->size
[z
].bb_x0
[i
] += add_tric
;
8445 zones
->size
[z
].bb_x1
[i
] += add_tric
;
8447 /* Correct for triclinic offset of the upper corner */
8448 size_j
= zones
->size
[z
].x1
[j
] - zones
->size
[z
].x0
[j
];
8449 add_tric
= size_j
*box
[j
][i
]/box
[j
][j
];
8453 zones
->size
[z
].bb_x0
[i
] += add_tric
;
8457 zones
->size
[z
].bb_x1
[i
] += add_tric
;
8464 if (zone_start
== 0)
8467 for(dim
=0; dim
<DIM
; dim
++)
8469 vol
*= zones
->size
[0].x1
[dim
] - zones
->size
[0].x0
[dim
];
8471 zones
->dens_zone0
= (zones
->cg_range
[1] - zones
->cg_range
[0])/vol
;
8476 for(z
=zone_start
; z
<zone_end
; z
++)
8478 fprintf(debug
,"zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8480 zones
->size
[z
].x0
[XX
],zones
->size
[z
].x1
[XX
],
8481 zones
->size
[z
].x0
[YY
],zones
->size
[z
].x1
[YY
],
8482 zones
->size
[z
].x0
[ZZ
],zones
->size
[z
].x1
[ZZ
]);
8483 fprintf(debug
,"zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8485 zones
->size
[z
].bb_x0
[XX
],zones
->size
[z
].bb_x1
[XX
],
8486 zones
->size
[z
].bb_x0
[YY
],zones
->size
[z
].bb_x1
[YY
],
8487 zones
->size
[z
].bb_x0
[ZZ
],zones
->size
[z
].bb_x1
[ZZ
]);
8492 static int comp_cgsort(const void *a
,const void *b
)
8496 gmx_cgsort_t
*cga
,*cgb
;
8497 cga
= (gmx_cgsort_t
*)a
;
8498 cgb
= (gmx_cgsort_t
*)b
;
8500 comp
= cga
->nsc
- cgb
->nsc
;
8503 comp
= cga
->ind_gl
- cgb
->ind_gl
;
8509 static void order_int_cg(int n
,const gmx_cgsort_t
*sort
,
8514 /* Order the data */
8517 buf
[i
] = a
[sort
[i
].ind
];
8520 /* Copy back to the original array */
8527 static void order_vec_cg(int n
,const gmx_cgsort_t
*sort
,
8532 /* Order the data */
8535 copy_rvec(v
[sort
[i
].ind
],buf
[i
]);
8538 /* Copy back to the original array */
8541 copy_rvec(buf
[i
],v
[i
]);
8545 static void order_vec_atom(int ncg
,const int *cgindex
,const gmx_cgsort_t
*sort
,
8548 int a
,atot
,cg
,cg0
,cg1
,i
;
8550 if (cgindex
== NULL
)
8552 /* Avoid the useless loop of the atoms within a cg */
8553 order_vec_cg(ncg
,sort
,v
,buf
);
8558 /* Order the data */
8560 for(cg
=0; cg
<ncg
; cg
++)
8562 cg0
= cgindex
[sort
[cg
].ind
];
8563 cg1
= cgindex
[sort
[cg
].ind
+1];
8564 for(i
=cg0
; i
<cg1
; i
++)
8566 copy_rvec(v
[i
],buf
[a
]);
8572 /* Copy back to the original array */
8573 for(a
=0; a
<atot
; a
++)
8575 copy_rvec(buf
[a
],v
[a
]);
8579 static void ordered_sort(int nsort2
,gmx_cgsort_t
*sort2
,
8580 int nsort_new
,gmx_cgsort_t
*sort_new
,
8581 gmx_cgsort_t
*sort1
)
8585 /* The new indices are not very ordered, so we qsort them */
8586 qsort_threadsafe(sort_new
,nsort_new
,sizeof(sort_new
[0]),comp_cgsort
);
8588 /* sort2 is already ordered, so now we can merge the two arrays */
8592 while(i2
< nsort2
|| i_new
< nsort_new
)
8596 sort1
[i1
++] = sort_new
[i_new
++];
8598 else if (i_new
== nsort_new
)
8600 sort1
[i1
++] = sort2
[i2
++];
8602 else if (sort2
[i2
].nsc
< sort_new
[i_new
].nsc
||
8603 (sort2
[i2
].nsc
== sort_new
[i_new
].nsc
&&
8604 sort2
[i2
].ind_gl
< sort_new
[i_new
].ind_gl
))
8606 sort1
[i1
++] = sort2
[i2
++];
8610 sort1
[i1
++] = sort_new
[i_new
++];
8615 static int dd_sort_order(gmx_domdec_t
*dd
,t_forcerec
*fr
,int ncg_home_old
)
8617 gmx_domdec_sort_t
*sort
;
8618 gmx_cgsort_t
*cgsort
,*sort_i
;
8619 int ncg_new
,nsort2
,nsort_new
,i
,*a
,moved
,*ibuf
;
8620 int sort_last
,sort_skip
;
8622 sort
= dd
->comm
->sort
;
8624 a
= fr
->ns
.grid
->cell_index
;
8626 moved
= NSGRID_SIGNAL_MOVED_FAC
*fr
->ns
.grid
->ncells
;
8628 if (ncg_home_old
>= 0)
8630 /* The charge groups that remained in the same ns grid cell
8631 * are completely ordered. So we can sort efficiently by sorting
8632 * the charge groups that did move into the stationary list.
8637 for(i
=0; i
<dd
->ncg_home
; i
++)
8639 /* Check if this cg did not move to another node */
8642 if (i
>= ncg_home_old
|| a
[i
] != sort
->sort
[i
].nsc
)
8644 /* This cg is new on this node or moved ns grid cell */
8645 if (nsort_new
>= sort
->sort_new_nalloc
)
8647 sort
->sort_new_nalloc
= over_alloc_dd(nsort_new
+1);
8648 srenew(sort
->sort_new
,sort
->sort_new_nalloc
);
8650 sort_i
= &(sort
->sort_new
[nsort_new
++]);
8654 /* This cg did not move */
8655 sort_i
= &(sort
->sort2
[nsort2
++]);
8657 /* Sort on the ns grid cell indices
8658 * and the global topology index.
8659 * index_gl is irrelevant with cell ns,
8660 * but we set it here anyhow to avoid a conditional.
8663 sort_i
->ind_gl
= dd
->index_gl
[i
];
8670 fprintf(debug
,"ordered sort cgs: stationary %d moved %d\n",
8673 /* Sort efficiently */
8674 ordered_sort(nsort2
,sort
->sort2
,nsort_new
,sort
->sort_new
,
8679 cgsort
= sort
->sort
;
8681 for(i
=0; i
<dd
->ncg_home
; i
++)
8683 /* Sort on the ns grid cell indices
8684 * and the global topology index
8686 cgsort
[i
].nsc
= a
[i
];
8687 cgsort
[i
].ind_gl
= dd
->index_gl
[i
];
8689 if (cgsort
[i
].nsc
< moved
)
8696 fprintf(debug
,"qsort cgs: %d new home %d\n",dd
->ncg_home
,ncg_new
);
8698 /* Determine the order of the charge groups using qsort */
8699 qsort_threadsafe(cgsort
,dd
->ncg_home
,sizeof(cgsort
[0]),comp_cgsort
);
8705 static int dd_sort_order_nbnxn(gmx_domdec_t
*dd
,t_forcerec
*fr
)
8708 int ncg_new
,i
,*a
,na
;
8710 sort
= dd
->comm
->sort
->sort
;
8712 nbnxn_get_atomorder(fr
->nbv
->nbs
,&a
,&na
);
8719 sort
[ncg_new
].ind
= a
[i
];
8727 static void dd_sort_state(gmx_domdec_t
*dd
,int ePBC
,
8728 rvec
*cgcm
,t_forcerec
*fr
,t_state
*state
,
8731 gmx_domdec_sort_t
*sort
;
8732 gmx_cgsort_t
*cgsort
,*sort_i
;
8734 int ncg_new
,i
,*ibuf
,cgsize
;
8737 sort
= dd
->comm
->sort
;
8739 if (dd
->ncg_home
> sort
->sort_nalloc
)
8741 sort
->sort_nalloc
= over_alloc_dd(dd
->ncg_home
);
8742 srenew(sort
->sort
,sort
->sort_nalloc
);
8743 srenew(sort
->sort2
,sort
->sort_nalloc
);
8745 cgsort
= sort
->sort
;
8747 switch (fr
->cutoff_scheme
)
8750 ncg_new
= dd_sort_order(dd
,fr
,ncg_home_old
);
8753 ncg_new
= dd_sort_order_nbnxn(dd
,fr
);
8756 gmx_incons("unimplemented");
8760 /* We alloc with the old size, since cgindex is still old */
8761 vec_rvec_check_alloc(&dd
->comm
->vbuf
,dd
->cgindex
[dd
->ncg_home
]);
8762 vbuf
= dd
->comm
->vbuf
.v
;
8766 cgindex
= dd
->cgindex
;
8773 /* Remove the charge groups which are no longer at home here */
8774 dd
->ncg_home
= ncg_new
;
8777 fprintf(debug
,"Set the new home charge group count to %d\n",
8781 /* Reorder the state */
8782 for(i
=0; i
<estNR
; i
++)
8784 if (EST_DISTR(i
) && (state
->flags
& (1<<i
)))
8789 order_vec_atom(dd
->ncg_home
,cgindex
,cgsort
,state
->x
,vbuf
);
8792 order_vec_atom(dd
->ncg_home
,cgindex
,cgsort
,state
->v
,vbuf
);
8795 order_vec_atom(dd
->ncg_home
,cgindex
,cgsort
,state
->sd_X
,vbuf
);
8798 order_vec_atom(dd
->ncg_home
,cgindex
,cgsort
,state
->cg_p
,vbuf
);
8802 case estDISRE_INITF
:
8803 case estDISRE_RM3TAV
:
8804 case estORIRE_INITF
:
8806 /* No ordering required */
8809 gmx_incons("Unknown state entry encountered in dd_sort_state");
8814 if (fr
->cutoff_scheme
== ecutsGROUP
)
8817 order_vec_cg(dd
->ncg_home
,cgsort
,cgcm
,vbuf
);
8820 if (dd
->ncg_home
+1 > sort
->ibuf_nalloc
)
8822 sort
->ibuf_nalloc
= over_alloc_dd(dd
->ncg_home
+1);
8823 srenew(sort
->ibuf
,sort
->ibuf_nalloc
);
8826 /* Reorder the global cg index */
8827 order_int_cg(dd
->ncg_home
,cgsort
,dd
->index_gl
,ibuf
);
8828 /* Reorder the cginfo */
8829 order_int_cg(dd
->ncg_home
,cgsort
,fr
->cginfo
,ibuf
);
8830 /* Rebuild the local cg index */
8834 for(i
=0; i
<dd
->ncg_home
; i
++)
8836 cgsize
= dd
->cgindex
[cgsort
[i
].ind
+1] - dd
->cgindex
[cgsort
[i
].ind
];
8837 ibuf
[i
+1] = ibuf
[i
] + cgsize
;
8839 for(i
=0; i
<dd
->ncg_home
+1; i
++)
8841 dd
->cgindex
[i
] = ibuf
[i
];
8846 for(i
=0; i
<dd
->ncg_home
+1; i
++)
8851 /* Set the home atom number */
8852 dd
->nat_home
= dd
->cgindex
[dd
->ncg_home
];
8854 if (fr
->cutoff_scheme
== ecutsVERLET
)
8856 /* The atoms are now exactly in grid order, update the grid order */
8857 nbnxn_set_atomorder(fr
->nbv
->nbs
);
8861 /* Copy the sorted ns cell indices back to the ns grid struct */
8862 for(i
=0; i
<dd
->ncg_home
; i
++)
8864 fr
->ns
.grid
->cell_index
[i
] = cgsort
[i
].nsc
;
8866 fr
->ns
.grid
->nr
= dd
->ncg_home
;
8870 static void add_dd_statistics(gmx_domdec_t
*dd
)
8872 gmx_domdec_comm_t
*comm
;
8877 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8879 comm
->sum_nat
[ddnat
-ddnatZONE
] +=
8880 comm
->nat
[ddnat
] - comm
->nat
[ddnat
-1];
8885 void reset_dd_statistics_counters(gmx_domdec_t
*dd
)
8887 gmx_domdec_comm_t
*comm
;
8892 /* Reset all the statistics and counters for total run counting */
8893 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8895 comm
->sum_nat
[ddnat
-ddnatZONE
] = 0;
8899 comm
->load_step
= 0;
8902 clear_ivec(comm
->load_lim
);
8907 void print_dd_statistics(t_commrec
*cr
,t_inputrec
*ir
,FILE *fplog
)
8909 gmx_domdec_comm_t
*comm
;
8913 comm
= cr
->dd
->comm
;
8915 gmx_sumd(ddnatNR
-ddnatZONE
,comm
->sum_nat
,cr
);
8922 fprintf(fplog
,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8924 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8926 av
= comm
->sum_nat
[ddnat
-ddnatZONE
]/comm
->ndecomp
;
8931 " av. #atoms communicated per step for force: %d x %.1f\n",
8935 if (cr
->dd
->vsite_comm
)
8938 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8939 (EEL_PME(ir
->coulombtype
) || ir
->coulombtype
==eelEWALD
) ? 3 : 2,
8944 if (cr
->dd
->constraint_comm
)
8947 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
8948 1 + ir
->nLincsIter
,av
);
8952 gmx_incons(" Unknown type for DD statistics");
8955 fprintf(fplog
,"\n");
8957 if (comm
->bRecordLoad
&& EI_DYNAMICS(ir
->eI
))
8959 print_dd_load_av(fplog
,cr
->dd
);
8963 void dd_partition_system(FILE *fplog
,
8964 gmx_large_int_t step
,
8966 gmx_bool bMasterState
,
8968 t_state
*state_global
,
8969 gmx_mtop_t
*top_global
,
8971 t_state
*state_local
,
8974 gmx_localtop_t
*top_local
,
8977 gmx_shellfc_t shellfc
,
8978 gmx_constr_t constr
,
8980 gmx_wallcycle_t wcycle
,
8984 gmx_domdec_comm_t
*comm
;
8985 gmx_ddbox_t ddbox
={0};
8987 gmx_large_int_t step_pcoupl
;
8988 rvec cell_ns_x0
,cell_ns_x1
;
8989 int i
,j
,n
,cg0
=0,ncg_home_old
=-1,ncg_moved
,nat_f_novirsum
;
8990 gmx_bool bBoxChanged
,bNStGlobalComm
,bDoDLB
,bCheckDLB
,bTurnOnDLB
,bLogLoad
;
8991 gmx_bool bRedist
,bSortCG
,bResortAll
;
8992 ivec ncells_old
={0,0,0},ncells_new
={0,0,0},np
;
8999 bBoxChanged
= (bMasterState
|| DEFORM(*ir
));
9000 if (ir
->epc
!= epcNO
)
9002 /* With nstpcouple > 1 pressure coupling happens.
9003 * one step after calculating the pressure.
9004 * Box scaling happens at the end of the MD step,
9005 * after the DD partitioning.
9006 * We therefore have to do DLB in the first partitioning
9007 * after an MD step where P-coupling occured.
9008 * We need to determine the last step in which p-coupling occurred.
9009 * MRS -- need to validate this for vv?
9014 step_pcoupl
= step
- 1;
9018 step_pcoupl
= ((step
- 1)/n
)*n
+ 1;
9020 if (step_pcoupl
>= comm
->partition_step
)
9026 bNStGlobalComm
= (step
% nstglobalcomm
== 0);
9028 if (!comm
->bDynLoadBal
)
9034 /* Should we do dynamic load balacing this step?
9035 * Since it requires (possibly expensive) global communication,
9036 * we might want to do DLB less frequently.
9038 if (bBoxChanged
|| ir
->epc
!= epcNO
)
9040 bDoDLB
= bBoxChanged
;
9044 bDoDLB
= bNStGlobalComm
;
9048 /* Check if we have recorded loads on the nodes */
9049 if (comm
->bRecordLoad
&& dd_load_count(comm
))
9051 if (comm
->eDLB
== edlbAUTO
&& !comm
->bDynLoadBal
)
9053 /* Check if we should use DLB at the second partitioning
9054 * and every 100 partitionings,
9055 * so the extra communication cost is negligible.
9057 n
= max(100,nstglobalcomm
);
9058 bCheckDLB
= (comm
->n_load_collect
== 0 ||
9059 comm
->n_load_have
% n
== n
-1);
9066 /* Print load every nstlog, first and last step to the log file */
9067 bLogLoad
= ((ir
->nstlog
> 0 && step
% ir
->nstlog
== 0) ||
9068 comm
->n_load_collect
== 0 ||
9070 (step
+ ir
->nstlist
> ir
->init_step
+ ir
->nsteps
)));
9072 /* Avoid extra communication due to verbose screen output
9073 * when nstglobalcomm is set.
9075 if (bDoDLB
|| bLogLoad
|| bCheckDLB
||
9076 (bVerbose
&& (ir
->nstlist
== 0 || nstglobalcomm
<= ir
->nstlist
)))
9078 get_load_distribution(dd
,wcycle
);
9083 dd_print_load(fplog
,dd
,step
-1);
9087 dd_print_load_verbose(dd
);
9090 comm
->n_load_collect
++;
9093 /* Since the timings are node dependent, the master decides */
9097 (dd_force_imb_perf_loss(dd
) >= DD_PERF_LOSS
);
9100 fprintf(debug
,"step %s, imb loss %f\n",
9101 gmx_step_str(step
,sbuf
),
9102 dd_force_imb_perf_loss(dd
));
9105 dd_bcast(dd
,sizeof(bTurnOnDLB
),&bTurnOnDLB
);
9108 turn_on_dlb(fplog
,cr
,step
);
9113 comm
->n_load_have
++;
9116 cgs_gl
= &comm
->cgs_gl
;
9121 /* Clear the old state */
9122 clear_dd_indices(dd
,0,0);
9124 set_ddbox(dd
,bMasterState
,cr
,ir
,state_global
->box
,
9125 TRUE
,cgs_gl
,state_global
->x
,&ddbox
);
9127 get_cg_distribution(fplog
,step
,dd
,cgs_gl
,
9128 state_global
->box
,&ddbox
,state_global
->x
);
9130 dd_distribute_state(dd
,cgs_gl
,
9131 state_global
,state_local
,f
);
9133 dd_make_local_cgs(dd
,&top_local
->cgs
);
9135 /* Ensure that we have space for the new distribution */
9136 dd_check_alloc_ncg(fr
,state_local
,f
,dd
->ncg_home
);
9138 if (fr
->cutoff_scheme
== ecutsGROUP
)
9140 calc_cgcm(fplog
,0,dd
->ncg_home
,
9141 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
9144 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
9146 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
9150 else if (state_local
->ddp_count
!= dd
->ddp_count
)
9152 if (state_local
->ddp_count
> dd
->ddp_count
)
9154 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local
->ddp_count
,dd
->ddp_count
);
9157 if (state_local
->ddp_count_cg_gl
!= state_local
->ddp_count
)
9159 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local
->ddp_count_cg_gl
,state_local
->ddp_count
);
9162 /* Clear the old state */
9163 clear_dd_indices(dd
,0,0);
9165 /* Build the new indices */
9166 rebuild_cgindex(dd
,cgs_gl
->index
,state_local
);
9167 make_dd_indices(dd
,cgs_gl
->index
,0);
9169 if (fr
->cutoff_scheme
== ecutsGROUP
)
9171 /* Redetermine the cg COMs */
9172 calc_cgcm(fplog
,0,dd
->ncg_home
,
9173 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
9176 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
9178 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
9180 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
9181 TRUE
,&top_local
->cgs
,state_local
->x
,&ddbox
);
9183 bRedist
= comm
->bDynLoadBal
;
9187 /* We have the full state, only redistribute the cgs */
9189 /* Clear the non-home indices */
9190 clear_dd_indices(dd
,dd
->ncg_home
,dd
->nat_home
);
9192 /* Avoid global communication for dim's without pbc and -gcom */
9193 if (!bNStGlobalComm
)
9195 copy_rvec(comm
->box0
,ddbox
.box0
);
9196 copy_rvec(comm
->box_size
,ddbox
.box_size
);
9198 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
9199 bNStGlobalComm
,&top_local
->cgs
,state_local
->x
,&ddbox
);
9204 /* For dim's without pbc and -gcom */
9205 copy_rvec(ddbox
.box0
,comm
->box0
);
9206 copy_rvec(ddbox
.box_size
,comm
->box_size
);
9208 set_dd_cell_sizes(dd
,&ddbox
,dynamic_dd_box(&ddbox
,ir
),bMasterState
,bDoDLB
,
9211 if (comm
->nstDDDumpGrid
> 0 && step
% comm
->nstDDDumpGrid
== 0)
9213 write_dd_grid_pdb("dd_grid",step
,dd
,state_local
->box
,&ddbox
);
9216 /* Check if we should sort the charge groups */
9217 if (comm
->nstSortCG
> 0)
9219 bSortCG
= (bMasterState
||
9220 (bRedist
&& (step
% comm
->nstSortCG
== 0)));
9227 ncg_home_old
= dd
->ncg_home
;
9232 wallcycle_sub_start(wcycle
,ewcsDD_REDIST
);
9234 dd_redistribute_cg(fplog
,step
,dd
,ddbox
.tric_dir
,
9235 state_local
,f
,fr
,mdatoms
,
9236 !bSortCG
,nrnb
,&cg0
,&ncg_moved
);
9238 wallcycle_sub_stop(wcycle
,ewcsDD_REDIST
);
9241 get_nsgrid_boundaries(ddbox
.nboundeddim
,state_local
->box
,
9243 &comm
->cell_x0
,&comm
->cell_x1
,
9244 dd
->ncg_home
,fr
->cg_cm
,
9245 cell_ns_x0
,cell_ns_x1
,&grid_density
);
9249 comm_dd_ns_cell_sizes(dd
,&ddbox
,cell_ns_x0
,cell_ns_x1
,step
);
9252 switch (fr
->cutoff_scheme
)
9255 copy_ivec(fr
->ns
.grid
->n
,ncells_old
);
9256 grid_first(fplog
,fr
->ns
.grid
,dd
,&ddbox
,fr
->ePBC
,
9257 state_local
->box
,cell_ns_x0
,cell_ns_x1
,
9258 fr
->rlistlong
,grid_density
);
9261 nbnxn_get_ncells(fr
->nbv
->nbs
,&ncells_old
[XX
],&ncells_old
[YY
]);
9264 gmx_incons("unimplemented");
9266 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9267 copy_ivec(ddbox
.tric_dir
,comm
->tric_dir
);
9271 wallcycle_sub_start(wcycle
,ewcsDD_GRID
);
9273 /* Sort the state on charge group position.
9274 * This enables exact restarts from this step.
9275 * It also improves performance by about 15% with larger numbers
9276 * of atoms per node.
9279 /* Fill the ns grid with the home cell,
9280 * so we can sort with the indices.
9282 set_zones_ncg_home(dd
);
9284 switch (fr
->cutoff_scheme
)
9287 set_zones_size(dd
,state_local
->box
,&ddbox
,0,1);
9289 nbnxn_put_on_grid(fr
->nbv
->nbs
,fr
->ePBC
,state_local
->box
,
9291 comm
->zones
.size
[0].bb_x0
,
9292 comm
->zones
.size
[0].bb_x1
,
9294 comm
->zones
.dens_zone0
,
9297 ncg_moved
,comm
->moved
,
9298 fr
->nbv
->grp
[eintLocal
].kernel_type
,
9299 fr
->nbv
->grp
[eintLocal
].nbat
);
9301 nbnxn_get_ncells(fr
->nbv
->nbs
,&ncells_new
[XX
],&ncells_new
[YY
]);
9304 fill_grid(fplog
,&comm
->zones
,fr
->ns
.grid
,dd
->ncg_home
,
9305 0,dd
->ncg_home
,fr
->cg_cm
);
9307 copy_ivec(fr
->ns
.grid
->n
,ncells_new
);
9310 gmx_incons("unimplemented");
9313 bResortAll
= bMasterState
;
9315 /* Check if we can user the old order and ns grid cell indices
9316 * of the charge groups to sort the charge groups efficiently.
9318 if (ncells_new
[XX
] != ncells_old
[XX
] ||
9319 ncells_new
[YY
] != ncells_old
[YY
] ||
9320 ncells_new
[ZZ
] != ncells_old
[ZZ
])
9327 fprintf(debug
,"Step %s, sorting the %d home charge groups\n",
9328 gmx_step_str(step
,sbuf
),dd
->ncg_home
);
9330 dd_sort_state(dd
,ir
->ePBC
,fr
->cg_cm
,fr
,state_local
,
9331 bResortAll
? -1 : ncg_home_old
);
9332 /* Rebuild all the indices */
9334 ga2la_clear(dd
->ga2la
);
9336 wallcycle_sub_stop(wcycle
,ewcsDD_GRID
);
9339 wallcycle_sub_start(wcycle
,ewcsDD_SETUPCOMM
);
9341 /* Setup up the communication and communicate the coordinates */
9342 setup_dd_communication(dd
,state_local
->box
,&ddbox
,fr
,state_local
,f
);
9344 /* Set the indices */
9345 make_dd_indices(dd
,cgs_gl
->index
,cg0
);
9347 /* Set the charge group boundaries for neighbor searching */
9348 set_cg_boundaries(&comm
->zones
);
9350 if (fr
->cutoff_scheme
== ecutsVERLET
)
9352 set_zones_size(dd
,state_local
->box
,&ddbox
,
9353 bSortCG
? 1 : 0,comm
->zones
.n
);
9356 wallcycle_sub_stop(wcycle
,ewcsDD_SETUPCOMM
);
9359 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9360 -1,state_local->x,state_local->box);
9363 wallcycle_sub_start(wcycle
,ewcsDD_MAKETOP
);
9365 /* Extract a local topology from the global topology */
9366 for(i
=0; i
<dd
->ndim
; i
++)
9368 np
[dd
->dim
[i
]] = comm
->cd
[i
].np
;
9370 dd_make_local_top(fplog
,dd
,&comm
->zones
,dd
->npbcdim
,state_local
->box
,
9371 comm
->cellsize_min
,np
,
9373 fr
->cutoff_scheme
==ecutsGROUP
? fr
->cg_cm
: state_local
->x
,
9374 vsite
,top_global
,top_local
);
9376 wallcycle_sub_stop(wcycle
,ewcsDD_MAKETOP
);
9378 wallcycle_sub_start(wcycle
,ewcsDD_MAKECONSTR
);
9380 /* Set up the special atom communication */
9381 n
= comm
->nat
[ddnatZONE
];
9382 for(i
=ddnatZONE
+1; i
<ddnatNR
; i
++)
9387 if (vsite
&& vsite
->n_intercg_vsite
)
9389 n
= dd_make_local_vsites(dd
,n
,top_local
->idef
.il
);
9393 if (dd
->bInterCGcons
|| dd
->bInterCGsettles
)
9395 /* Only for inter-cg constraints we need special code */
9396 n
= dd_make_local_constraints(dd
,n
,top_global
,fr
->cginfo
,
9397 constr
,ir
->nProjOrder
,
9398 top_local
->idef
.il
);
9402 gmx_incons("Unknown special atom type setup");
9407 wallcycle_sub_stop(wcycle
,ewcsDD_MAKECONSTR
);
9409 wallcycle_sub_start(wcycle
,ewcsDD_TOPOTHER
);
9411 /* Make space for the extra coordinates for virtual site
9412 * or constraint communication.
9414 state_local
->natoms
= comm
->nat
[ddnatNR
-1];
9415 if (state_local
->natoms
> state_local
->nalloc
)
9417 dd_realloc_state(state_local
,f
,state_local
->natoms
);
9420 if (fr
->bF_NoVirSum
)
9422 if (vsite
&& vsite
->n_intercg_vsite
)
9424 nat_f_novirsum
= comm
->nat
[ddnatVSITE
];
9428 if (EEL_FULL(ir
->coulombtype
) && dd
->n_intercg_excl
> 0)
9430 nat_f_novirsum
= dd
->nat_tot
;
9434 nat_f_novirsum
= dd
->nat_home
;
9443 /* Set the number of atoms required for the force calculation.
9444 * Forces need to be constrained when using a twin-range setup
9445 * or with energy minimization. For simple simulations we could
9446 * avoid some allocation, zeroing and copying, but this is
9447 * probably not worth the complications ande checking.
9449 forcerec_set_ranges(fr
,dd
->ncg_home
,dd
->ncg_tot
,
9450 dd
->nat_tot
,comm
->nat
[ddnatCON
],nat_f_novirsum
);
9452 /* We make the all mdatoms up to nat_tot_con.
9453 * We could save some work by only setting invmass
9454 * between nat_tot and nat_tot_con.
9456 /* This call also sets the new number of home particles to dd->nat_home */
9457 atoms2md(top_global
,ir
,
9458 comm
->nat
[ddnatCON
],dd
->gatindex
,0,dd
->nat_home
,mdatoms
);
9460 /* Now we have the charges we can sort the FE interactions */
9461 dd_sort_local_top(dd
,mdatoms
,top_local
);
9465 /* Make the local shell stuff, currently no communication is done */
9466 make_local_shells(cr
,mdatoms
,shellfc
);
9469 if (ir
->implicit_solvent
)
9471 make_local_gb(cr
,fr
->born
,ir
->gb_algorithm
);
9474 init_bonded_thread_force_reduction(fr
,&top_local
->idef
);
9476 if (!(cr
->duty
& DUTY_PME
))
9478 /* Send the charges to our PME only node */
9479 gmx_pme_send_q(cr
,mdatoms
->nChargePerturbed
,
9480 mdatoms
->chargeA
,mdatoms
->chargeB
,
9481 dd_pme_maxshift_x(dd
),dd_pme_maxshift_y(dd
));
9486 set_constraints(constr
,top_local
,ir
,mdatoms
,cr
);
9489 if (ir
->ePull
!= epullNO
)
9491 /* Update the local pull groups */
9492 dd_make_local_pull_groups(dd
,ir
->pull
,mdatoms
);
9497 /* Update the local rotation groups */
9498 dd_make_local_rotation_groups(dd
,ir
->rot
);
9502 add_dd_statistics(dd
);
9504 /* Make sure we only count the cycles for this DD partitioning */
9505 clear_dd_cycle_counts(dd
);
9507 /* Because the order of the atoms might have changed since
9508 * the last vsite construction, we need to communicate the constructing
9509 * atom coordinates again (for spreading the forces this MD step).
9511 dd_move_x_vsites(dd
,state_local
->box
,state_local
->x
);
9513 wallcycle_sub_stop(wcycle
,ewcsDD_TOPOTHER
);
9515 if (comm
->nstDDDump
> 0 && step
% comm
->nstDDDump
== 0)
9517 dd_move_x(dd
,state_local
->box
,state_local
->x
);
9518 write_dd_pdb("dd_dump",step
,"dump",top_global
,cr
,
9519 -1,state_local
->x
,state_local
->box
);
9522 /* Store the partitioning step */
9523 comm
->partition_step
= step
;
9525 /* Increase the DD partitioning counter */
9527 /* The state currently matches this DD partitioning count, store it */
9528 state_local
->ddp_count
= dd
->ddp_count
;
9531 /* The DD master node knows the complete cg distribution,
9532 * store the count so we can possibly skip the cg info communication.
9534 comm
->master_cg_ddp_count
= (bSortCG
? 0 : dd
->ddp_count
);
9537 if (comm
->DD_debug
> 0)
9539 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9540 check_index_consistency(dd
,top_global
->natoms
,ncg_mtop(top_global
),
9541 "after partitioning");