2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2008
5 * Copyright (c) 2012, by the GROMACS development team, led by
6 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
7 * others, as listed in the AUTHORS file in the top-level source
8 * directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gmx_fatal.h"
49 #include "gmx_fatal_collective.h"
52 #include "domdec_network.h"
55 #include "chargegroup.h"
64 #include "pull_rotation.h"
65 #include "gmx_wallcycle.h"
69 #include "mtop_util.h"
71 #include "gmx_ga2la.h"
73 #include "nbnxn_search.h"
75 #include "gmx_omp_nthreads.h"
84 #define DDRANK(dd,rank) (rank)
85 #define DDMASTERRANK(dd) (dd->masterrank)
87 typedef struct gmx_domdec_master
89 /* The cell boundaries */
91 /* The global charge group division */
92 int *ncg
; /* Number of home charge groups for each node */
93 int *index
; /* Index of nnodes+1 into cg */
94 int *cg
; /* Global charge group index */
95 int *nat
; /* Number of home atoms for each node. */
96 int *ibuf
; /* Buffer for communication */
97 rvec
*vbuf
; /* Buffer for state scattering and gathering */
98 } gmx_domdec_master_t
;
102 /* The numbers of charge groups to send and receive for each cell
103 * that requires communication, the last entry contains the total
104 * number of atoms that needs to be communicated.
106 int nsend
[DD_MAXIZONE
+2];
107 int nrecv
[DD_MAXIZONE
+2];
108 /* The charge groups to send */
111 /* The atom range for non-in-place communication */
112 int cell2at0
[DD_MAXIZONE
];
113 int cell2at1
[DD_MAXIZONE
];
118 int np
; /* Number of grid pulses in this dimension */
119 int np_dlb
; /* For dlb, for use with edlbAUTO */
120 gmx_domdec_ind_t
*ind
; /* The indices to communicate, size np */
122 gmx_bool bInPlace
; /* Can we communicate in place? */
123 } gmx_domdec_comm_dim_t
;
127 gmx_bool
*bCellMin
; /* Temp. var.: is this cell size at the limit */
128 real
*cell_f
; /* State var.: cell boundaries, box relative */
129 real
*old_cell_f
; /* Temp. var.: old cell size */
130 real
*cell_f_max0
; /* State var.: max lower boundary, incl neighbors */
131 real
*cell_f_min1
; /* State var.: min upper boundary, incl neighbors */
132 real
*bound_min
; /* Temp. var.: lower limit for cell boundary */
133 real
*bound_max
; /* Temp. var.: upper limit for cell boundary */
134 gmx_bool bLimited
; /* State var.: is DLB limited in this dim and row */
135 real
*buf_ncd
; /* Temp. var. */
138 #define DD_NLOAD_MAX 9
140 /* Here floats are accurate enough, since these variables
141 * only influence the load balancing, not the actual MD results.
168 gmx_cgsort_t
*sort_new
;
180 /* This enum determines the order of the coordinates.
181 * ddnatHOME and ddnatZONE should be first and second,
182 * the others can be ordered as wanted.
184 enum { ddnatHOME
, ddnatZONE
, ddnatVSITE
, ddnatCON
, ddnatNR
};
186 enum { edlbAUTO
, edlbNO
, edlbYES
, edlbNR
};
187 const char *edlb_names
[edlbNR
] = { "auto", "no", "yes" };
191 int dim
; /* The dimension */
192 gmx_bool dim_match
;/* Tells if DD and PME dims match */
193 int nslab
; /* The number of PME slabs in this dimension */
194 real
*slb_dim_f
; /* Cell sizes for determining the PME comm. with SLB */
195 int *pp_min
; /* The minimum pp node location, size nslab */
196 int *pp_max
; /* The maximum pp node location,size nslab */
197 int maxshift
; /* The maximum shift for coordinate redistribution in PME */
202 real min0
; /* The minimum bottom of this zone */
203 real max1
; /* The maximum top of this zone */
204 real min1
; /* The minimum top of this zone */
205 real mch0
; /* The maximum bottom communicaton height for this zone */
206 real mch1
; /* The maximum top communicaton height for this zone */
207 real p1_0
; /* The bottom value of the first cell in this zone */
208 real p1_1
; /* The top value of the first cell in this zone */
213 gmx_domdec_ind_t ind
;
220 } dd_comm_setup_work_t
;
222 typedef struct gmx_domdec_comm
224 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
225 * unless stated otherwise.
228 /* The number of decomposition dimensions for PME, 0: no PME */
230 /* The number of nodes doing PME (PP/PME or only PME) */
234 /* The communication setup including the PME only nodes */
235 gmx_bool bCartesianPP_PME
;
238 int *pmenodes
; /* size npmenodes */
239 int *ddindex2simnodeid
; /* size npmenodes, only with bCartesianPP
240 * but with bCartesianPP_PME */
241 gmx_ddpme_t ddpme
[2];
243 /* The DD particle-particle nodes only */
244 gmx_bool bCartesianPP
;
245 int *ddindex2ddnodeid
; /* size npmenode, only with bCartesianPP_PME */
247 /* The global charge groups */
250 /* Should we sort the cgs */
252 gmx_domdec_sort_t
*sort
;
254 /* Are there charge groups? */
257 /* Are there bonded and multi-body interactions between charge groups? */
258 gmx_bool bInterCGBondeds
;
259 gmx_bool bInterCGMultiBody
;
261 /* Data for the optional bonded interaction atom communication range */
268 /* Are we actually using DLB? */
269 gmx_bool bDynLoadBal
;
271 /* Cell sizes for static load balancing, first index cartesian */
274 /* The width of the communicated boundaries */
277 /* The minimum cell size (including triclinic correction) */
279 /* For dlb, for use with edlbAUTO */
280 rvec cellsize_min_dlb
;
281 /* The lower limit for the DD cell size with DLB */
283 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
284 gmx_bool bVacDLBNoLimit
;
286 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
288 /* box0 and box_size are required with dim's without pbc and -gcom */
292 /* The cell boundaries */
296 /* The old location of the cell boundaries, to check cg displacements */
300 /* The communication setup and charge group boundaries for the zones */
301 gmx_domdec_zones_t zones
;
303 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
304 * cell boundaries of neighboring cells for dynamic load balancing.
306 gmx_ddzone_t zone_d1
[2];
307 gmx_ddzone_t zone_d2
[2][2];
309 /* The coordinate/force communication setup and indices */
310 gmx_domdec_comm_dim_t cd
[DIM
];
311 /* The maximum number of cells to communicate with in one dimension */
314 /* Which cg distribution is stored on the master node */
315 int master_cg_ddp_count
;
317 /* The number of cg's received from the direct neighbors */
318 int zone_ncg1
[DD_MAXZONE
];
320 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
323 /* Array for signalling if atoms have moved to another domain */
327 /* Communication buffer for general use */
331 /* Communication buffer for general use */
334 /* Temporary storage for thread parallel communication setup */
336 dd_comm_setup_work_t
*dth
;
338 /* Communication buffers only used with multiple grid pulses */
343 /* Communication buffers for local redistribution */
345 int cggl_flag_nalloc
[DIM
*2];
347 int cgcm_state_nalloc
[DIM
*2];
349 /* Cell sizes for dynamic load balancing */
350 gmx_domdec_root_t
**root
;
354 real cell_f_max0
[DIM
];
355 real cell_f_min1
[DIM
];
357 /* Stuff for load communication */
358 gmx_bool bRecordLoad
;
359 gmx_domdec_load_t
*load
;
361 MPI_Comm
*mpi_comm_load
;
364 /* Maximum DLB scaling per load balancing step in percent */
368 float cycl
[ddCyclNr
];
369 int cycl_n
[ddCyclNr
];
370 float cycl_max
[ddCyclNr
];
371 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
375 /* Have often have did we have load measurements */
377 /* Have often have we collected the load measurements */
381 double sum_nat
[ddnatNR
-ddnatZONE
];
391 /* The last partition step */
392 gmx_large_int_t partition_step
;
400 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
403 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
404 #define DD_FLAG_NRCG 65535
405 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
406 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
408 /* Zone permutation required to obtain consecutive charge groups
409 * for neighbor searching.
411 static const int zone_perm
[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
413 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
414 * components see only j zones with that component 0.
417 /* The DD zone order */
418 static const ivec dd_zo
[DD_MAXZONE
] =
419 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
424 static const ivec dd_zp3
[dd_zp3n
] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
429 static const ivec dd_zp2
[dd_zp2n
] = {{0,0,4},{1,3,4}};
434 static const ivec dd_zp1
[dd_zp1n
] = {{0,0,2}};
436 /* Factors used to avoid problems due to rounding issues */
437 #define DD_CELL_MARGIN 1.0001
438 #define DD_CELL_MARGIN2 1.00005
439 /* Factor to account for pressure scaling during nstlist steps */
440 #define DD_PRES_SCALE_MARGIN 1.02
442 /* Allowed performance loss before we DLB or warn */
443 #define DD_PERF_LOSS 0.05
445 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
447 /* Use separate MPI send and receive commands
448 * when nnodes <= GMX_DD_NNODES_SENDRECV.
449 * This saves memory (and some copying for small nnodes).
450 * For high parallelization scatter and gather calls are used.
452 #define GMX_DD_NNODES_SENDRECV 4
456 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
458 static void index2xyz(ivec nc,int ind,ivec xyz)
460 xyz[XX] = ind % nc[XX];
461 xyz[YY] = (ind / nc[XX]) % nc[YY];
462 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
466 /* This order is required to minimize the coordinate communication in PME
467 * which uses decomposition in the x direction.
469 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
471 static void ddindex2xyz(ivec nc
,int ind
,ivec xyz
)
473 xyz
[XX
] = ind
/ (nc
[YY
]*nc
[ZZ
]);
474 xyz
[YY
] = (ind
/ nc
[ZZ
]) % nc
[YY
];
475 xyz
[ZZ
] = ind
% nc
[ZZ
];
478 static int ddcoord2ddnodeid(gmx_domdec_t
*dd
,ivec c
)
483 ddindex
= dd_index(dd
->nc
,c
);
484 if (dd
->comm
->bCartesianPP_PME
)
486 ddnodeid
= dd
->comm
->ddindex2ddnodeid
[ddindex
];
488 else if (dd
->comm
->bCartesianPP
)
491 MPI_Cart_rank(dd
->mpi_comm_all
,c
,&ddnodeid
);
502 static gmx_bool
dynamic_dd_box(gmx_ddbox_t
*ddbox
,t_inputrec
*ir
)
504 return (ddbox
->nboundeddim
< DIM
|| DYNAMIC_BOX(*ir
));
507 int ddglatnr(gmx_domdec_t
*dd
,int i
)
517 if (i
>= dd
->comm
->nat
[ddnatNR
-1])
519 gmx_fatal(FARGS
,"glatnr called with %d, which is larger than the local number of atoms (%d)",i
,dd
->comm
->nat
[ddnatNR
-1]);
521 atnr
= dd
->gatindex
[i
] + 1;
527 t_block
*dd_charge_groups_global(gmx_domdec_t
*dd
)
529 return &dd
->comm
->cgs_gl
;
532 static void vec_rvec_init(vec_rvec_t
*v
)
538 static void vec_rvec_check_alloc(vec_rvec_t
*v
,int n
)
542 v
->nalloc
= over_alloc_dd(n
);
543 srenew(v
->v
,v
->nalloc
);
547 void dd_store_state(gmx_domdec_t
*dd
,t_state
*state
)
551 if (state
->ddp_count
!= dd
->ddp_count
)
553 gmx_incons("The state does not the domain decomposition state");
556 state
->ncg_gl
= dd
->ncg_home
;
557 if (state
->ncg_gl
> state
->cg_gl_nalloc
)
559 state
->cg_gl_nalloc
= over_alloc_dd(state
->ncg_gl
);
560 srenew(state
->cg_gl
,state
->cg_gl_nalloc
);
562 for(i
=0; i
<state
->ncg_gl
; i
++)
564 state
->cg_gl
[i
] = dd
->index_gl
[i
];
567 state
->ddp_count_cg_gl
= dd
->ddp_count
;
570 gmx_domdec_zones_t
*domdec_zones(gmx_domdec_t
*dd
)
572 return &dd
->comm
->zones
;
575 void dd_get_ns_ranges(gmx_domdec_t
*dd
,int icg
,
576 int *jcg0
,int *jcg1
,ivec shift0
,ivec shift1
)
578 gmx_domdec_zones_t
*zones
;
581 zones
= &dd
->comm
->zones
;
584 while (icg
>= zones
->izone
[izone
].cg1
)
593 else if (izone
< zones
->nizone
)
595 *jcg0
= zones
->izone
[izone
].jcg0
;
599 gmx_fatal(FARGS
,"DD icg %d out of range: izone (%d) >= nizone (%d)",
600 icg
,izone
,zones
->nizone
);
603 *jcg1
= zones
->izone
[izone
].jcg1
;
605 for(d
=0; d
<dd
->ndim
; d
++)
608 shift0
[dim
] = zones
->izone
[izone
].shift0
[dim
];
609 shift1
[dim
] = zones
->izone
[izone
].shift1
[dim
];
610 if (dd
->comm
->tric_dir
[dim
] || (dd
->bGridJump
&& d
> 0))
612 /* A conservative approach, this can be optimized */
619 int dd_natoms_vsite(gmx_domdec_t
*dd
)
621 return dd
->comm
->nat
[ddnatVSITE
];
624 void dd_get_constraint_range(gmx_domdec_t
*dd
,int *at_start
,int *at_end
)
626 *at_start
= dd
->comm
->nat
[ddnatCON
-1];
627 *at_end
= dd
->comm
->nat
[ddnatCON
];
630 void dd_move_x(gmx_domdec_t
*dd
,matrix box
,rvec x
[])
632 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
634 gmx_domdec_comm_t
*comm
;
635 gmx_domdec_comm_dim_t
*cd
;
636 gmx_domdec_ind_t
*ind
;
637 rvec shift
={0,0,0},*buf
,*rbuf
;
638 gmx_bool bPBC
,bScrew
;
642 cgindex
= dd
->cgindex
;
647 nat_tot
= dd
->nat_home
;
648 for(d
=0; d
<dd
->ndim
; d
++)
650 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
651 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
654 copy_rvec(box
[dd
->dim
[d
]],shift
);
657 for(p
=0; p
<cd
->np
; p
++)
664 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
666 at0
= cgindex
[index
[i
]];
667 at1
= cgindex
[index
[i
]+1];
668 for(j
=at0
; j
<at1
; j
++)
670 copy_rvec(x
[j
],buf
[n
]);
677 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
679 at0
= cgindex
[index
[i
]];
680 at1
= cgindex
[index
[i
]+1];
681 for(j
=at0
; j
<at1
; j
++)
683 /* We need to shift the coordinates */
684 rvec_add(x
[j
],shift
,buf
[n
]);
691 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
693 at0
= cgindex
[index
[i
]];
694 at1
= cgindex
[index
[i
]+1];
695 for(j
=at0
; j
<at1
; j
++)
698 buf
[n
][XX
] = x
[j
][XX
] + shift
[XX
];
700 * This operation requires a special shift force
701 * treatment, which is performed in calc_vir.
703 buf
[n
][YY
] = box
[YY
][YY
] - x
[j
][YY
];
704 buf
[n
][ZZ
] = box
[ZZ
][ZZ
] - x
[j
][ZZ
];
716 rbuf
= comm
->vbuf2
.v
;
718 /* Send and receive the coordinates */
719 dd_sendrecv_rvec(dd
, d
, dddirBackward
,
720 buf
, ind
->nsend
[nzone
+1],
721 rbuf
, ind
->nrecv
[nzone
+1]);
725 for(zone
=0; zone
<nzone
; zone
++)
727 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
729 copy_rvec(rbuf
[j
],x
[i
]);
734 nat_tot
+= ind
->nrecv
[nzone
+1];
740 void dd_move_f(gmx_domdec_t
*dd
,rvec f
[],rvec
*fshift
)
742 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
744 gmx_domdec_comm_t
*comm
;
745 gmx_domdec_comm_dim_t
*cd
;
746 gmx_domdec_ind_t
*ind
;
750 gmx_bool bPBC
,bScrew
;
754 cgindex
= dd
->cgindex
;
759 nzone
= comm
->zones
.n
/2;
760 nat_tot
= dd
->nat_tot
;
761 for(d
=dd
->ndim
-1; d
>=0; d
--)
763 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
764 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
765 if (fshift
== NULL
&& !bScrew
)
769 /* Determine which shift vector we need */
775 for(p
=cd
->np
-1; p
>=0; p
--) {
777 nat_tot
-= ind
->nrecv
[nzone
+1];
784 sbuf
= comm
->vbuf2
.v
;
786 for(zone
=0; zone
<nzone
; zone
++)
788 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
790 copy_rvec(f
[i
],sbuf
[j
]);
795 /* Communicate the forces */
796 dd_sendrecv_rvec(dd
, d
, dddirForward
,
797 sbuf
, ind
->nrecv
[nzone
+1],
798 buf
, ind
->nsend
[nzone
+1]);
800 /* Add the received forces */
804 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
806 at0
= cgindex
[index
[i
]];
807 at1
= cgindex
[index
[i
]+1];
808 for(j
=at0
; j
<at1
; j
++)
810 rvec_inc(f
[j
],buf
[n
]);
817 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
819 at0
= cgindex
[index
[i
]];
820 at1
= cgindex
[index
[i
]+1];
821 for(j
=at0
; j
<at1
; j
++)
823 rvec_inc(f
[j
],buf
[n
]);
824 /* Add this force to the shift force */
825 rvec_inc(fshift
[is
],buf
[n
]);
832 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
834 at0
= cgindex
[index
[i
]];
835 at1
= cgindex
[index
[i
]+1];
836 for(j
=at0
; j
<at1
; j
++)
838 /* Rotate the force */
839 f
[j
][XX
] += buf
[n
][XX
];
840 f
[j
][YY
] -= buf
[n
][YY
];
841 f
[j
][ZZ
] -= buf
[n
][ZZ
];
844 /* Add this force to the shift force */
845 rvec_inc(fshift
[is
],buf
[n
]);
856 void dd_atom_spread_real(gmx_domdec_t
*dd
,real v
[])
858 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
860 gmx_domdec_comm_t
*comm
;
861 gmx_domdec_comm_dim_t
*cd
;
862 gmx_domdec_ind_t
*ind
;
867 cgindex
= dd
->cgindex
;
869 buf
= &comm
->vbuf
.v
[0][0];
872 nat_tot
= dd
->nat_home
;
873 for(d
=0; d
<dd
->ndim
; d
++)
876 for(p
=0; p
<cd
->np
; p
++)
881 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
883 at0
= cgindex
[index
[i
]];
884 at1
= cgindex
[index
[i
]+1];
885 for(j
=at0
; j
<at1
; j
++)
898 rbuf
= &comm
->vbuf2
.v
[0][0];
900 /* Send and receive the coordinates */
901 dd_sendrecv_real(dd
, d
, dddirBackward
,
902 buf
, ind
->nsend
[nzone
+1],
903 rbuf
, ind
->nrecv
[nzone
+1]);
907 for(zone
=0; zone
<nzone
; zone
++)
909 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
916 nat_tot
+= ind
->nrecv
[nzone
+1];
922 void dd_atom_sum_real(gmx_domdec_t
*dd
,real v
[])
924 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
926 gmx_domdec_comm_t
*comm
;
927 gmx_domdec_comm_dim_t
*cd
;
928 gmx_domdec_ind_t
*ind
;
933 cgindex
= dd
->cgindex
;
935 buf
= &comm
->vbuf
.v
[0][0];
938 nzone
= comm
->zones
.n
/2;
939 nat_tot
= dd
->nat_tot
;
940 for(d
=dd
->ndim
-1; d
>=0; d
--)
943 for(p
=cd
->np
-1; p
>=0; p
--) {
945 nat_tot
-= ind
->nrecv
[nzone
+1];
952 sbuf
= &comm
->vbuf2
.v
[0][0];
954 for(zone
=0; zone
<nzone
; zone
++)
956 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
963 /* Communicate the forces */
964 dd_sendrecv_real(dd
, d
, dddirForward
,
965 sbuf
, ind
->nrecv
[nzone
+1],
966 buf
, ind
->nsend
[nzone
+1]);
968 /* Add the received forces */
970 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
972 at0
= cgindex
[index
[i
]];
973 at1
= cgindex
[index
[i
]+1];
974 for(j
=at0
; j
<at1
; j
++)
985 static void print_ddzone(FILE *fp
,int d
,int i
,int j
,gmx_ddzone_t
*zone
)
987 fprintf(fp
,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
989 zone
->min0
,zone
->max1
,
990 zone
->mch0
,zone
->mch0
,
991 zone
->p1_0
,zone
->p1_1
);
995 #define DDZONECOMM_MAXZONE 5
996 #define DDZONECOMM_BUFSIZE 3
998 static void dd_sendrecv_ddzone(const gmx_domdec_t
*dd
,
999 int ddimind
,int direction
,
1000 gmx_ddzone_t
*buf_s
,int n_s
,
1001 gmx_ddzone_t
*buf_r
,int n_r
)
1003 #define ZBS DDZONECOMM_BUFSIZE
1004 rvec vbuf_s
[DDZONECOMM_MAXZONE
*ZBS
];
1005 rvec vbuf_r
[DDZONECOMM_MAXZONE
*ZBS
];
1008 for(i
=0; i
<n_s
; i
++)
1010 vbuf_s
[i
*ZBS
][0] = buf_s
[i
].min0
;
1011 vbuf_s
[i
*ZBS
][1] = buf_s
[i
].max1
;
1012 vbuf_s
[i
*ZBS
][2] = buf_s
[i
].min1
;
1013 vbuf_s
[i
*ZBS
+1][0] = buf_s
[i
].mch0
;
1014 vbuf_s
[i
*ZBS
+1][1] = buf_s
[i
].mch1
;
1015 vbuf_s
[i
*ZBS
+1][2] = 0;
1016 vbuf_s
[i
*ZBS
+2][0] = buf_s
[i
].p1_0
;
1017 vbuf_s
[i
*ZBS
+2][1] = buf_s
[i
].p1_1
;
1018 vbuf_s
[i
*ZBS
+2][2] = 0;
1021 dd_sendrecv_rvec(dd
, ddimind
, direction
,
1025 for(i
=0; i
<n_r
; i
++)
1027 buf_r
[i
].min0
= vbuf_r
[i
*ZBS
][0];
1028 buf_r
[i
].max1
= vbuf_r
[i
*ZBS
][1];
1029 buf_r
[i
].min1
= vbuf_r
[i
*ZBS
][2];
1030 buf_r
[i
].mch0
= vbuf_r
[i
*ZBS
+1][0];
1031 buf_r
[i
].mch1
= vbuf_r
[i
*ZBS
+1][1];
1032 buf_r
[i
].p1_0
= vbuf_r
[i
*ZBS
+2][0];
1033 buf_r
[i
].p1_1
= vbuf_r
[i
*ZBS
+2][1];
1039 static void dd_move_cellx(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
1040 rvec cell_ns_x0
,rvec cell_ns_x1
)
1042 int d
,d1
,dim
,dim1
,pos
,buf_size
,i
,j
,k
,p
,npulse
,npulse_min
;
1044 gmx_ddzone_t buf_s
[DDZONECOMM_MAXZONE
];
1045 gmx_ddzone_t buf_r
[DDZONECOMM_MAXZONE
];
1046 gmx_ddzone_t buf_e
[DDZONECOMM_MAXZONE
];
1047 rvec extr_s
[2],extr_r
[2];
1049 real dist_d
,c
=0,det
;
1050 gmx_domdec_comm_t
*comm
;
1055 for(d
=1; d
<dd
->ndim
; d
++)
1058 zp
= (d
== 1) ? &comm
->zone_d1
[0] : &comm
->zone_d2
[0][0];
1059 zp
->min0
= cell_ns_x0
[dim
];
1060 zp
->max1
= cell_ns_x1
[dim
];
1061 zp
->min1
= cell_ns_x1
[dim
];
1062 zp
->mch0
= cell_ns_x0
[dim
];
1063 zp
->mch1
= cell_ns_x1
[dim
];
1064 zp
->p1_0
= cell_ns_x0
[dim
];
1065 zp
->p1_1
= cell_ns_x1
[dim
];
1068 for(d
=dd
->ndim
-2; d
>=0; d
--)
1071 bPBC
= (dim
< ddbox
->npbcdim
);
1073 /* Use an rvec to store two reals */
1074 extr_s
[d
][0] = comm
->cell_f0
[d
+1];
1075 extr_s
[d
][1] = comm
->cell_f1
[d
+1];
1076 extr_s
[d
][2] = comm
->cell_f1
[d
+1];
1079 /* Store the extremes in the backward sending buffer,
1080 * so the get updated separately from the forward communication.
1082 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1084 /* We invert the order to be able to use the same loop for buf_e */
1085 buf_s
[pos
].min0
= extr_s
[d1
][1];
1086 buf_s
[pos
].max1
= extr_s
[d1
][0];
1087 buf_s
[pos
].min1
= extr_s
[d1
][2];
1088 buf_s
[pos
].mch0
= 0;
1089 buf_s
[pos
].mch1
= 0;
1090 /* Store the cell corner of the dimension we communicate along */
1091 buf_s
[pos
].p1_0
= comm
->cell_x0
[dim
];
1092 buf_s
[pos
].p1_1
= 0;
1096 buf_s
[pos
] = (dd
->ndim
== 2) ? comm
->zone_d1
[0] : comm
->zone_d2
[0][0];
1099 if (dd
->ndim
== 3 && d
== 0)
1101 buf_s
[pos
] = comm
->zone_d2
[0][1];
1103 buf_s
[pos
] = comm
->zone_d1
[0];
1107 /* We only need to communicate the extremes
1108 * in the forward direction
1110 npulse
= comm
->cd
[d
].np
;
1113 /* Take the minimum to avoid double communication */
1114 npulse_min
= min(npulse
,dd
->nc
[dim
]-1-npulse
);
1118 /* Without PBC we should really not communicate over
1119 * the boundaries, but implementing that complicates
1120 * the communication setup and therefore we simply
1121 * do all communication, but ignore some data.
1123 npulse_min
= npulse
;
1125 for(p
=0; p
<npulse_min
; p
++)
1127 /* Communicate the extremes forward */
1128 bUse
= (bPBC
|| dd
->ci
[dim
] > 0);
1130 dd_sendrecv_rvec(dd
, d
, dddirForward
,
1131 extr_s
+d
, dd
->ndim
-d
-1,
1132 extr_r
+d
, dd
->ndim
-d
-1);
1136 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1138 extr_s
[d1
][0] = max(extr_s
[d1
][0],extr_r
[d1
][0]);
1139 extr_s
[d1
][1] = min(extr_s
[d1
][1],extr_r
[d1
][1]);
1140 extr_s
[d1
][2] = min(extr_s
[d1
][2],extr_r
[d1
][2]);
1146 for(p
=0; p
<npulse
; p
++)
1148 /* Communicate all the zone information backward */
1149 bUse
= (bPBC
|| dd
->ci
[dim
] < dd
->nc
[dim
] - 1);
1151 dd_sendrecv_ddzone(dd
, d
, dddirBackward
,
1158 for(d1
=d
+1; d1
<dd
->ndim
; d1
++)
1160 /* Determine the decrease of maximum required
1161 * communication height along d1 due to the distance along d,
1162 * this avoids a lot of useless atom communication.
1164 dist_d
= comm
->cell_x1
[dim
] - buf_r
[0].p1_0
;
1166 if (ddbox
->tric_dir
[dim
])
1168 /* c is the off-diagonal coupling between the cell planes
1169 * along directions d and d1.
1171 c
= ddbox
->v
[dim
][dd
->dim
[d1
]][dim
];
1177 det
= (1 + c
*c
)*comm
->cutoff
*comm
->cutoff
- dist_d
*dist_d
;
1180 dh
[d1
] = comm
->cutoff
- (c
*dist_d
+ sqrt(det
))/(1 + c
*c
);
1184 /* A negative value signals out of range */
1190 /* Accumulate the extremes over all pulses */
1191 for(i
=0; i
<buf_size
; i
++)
1195 buf_e
[i
] = buf_r
[i
];
1201 buf_e
[i
].min0
= min(buf_e
[i
].min0
,buf_r
[i
].min0
);
1202 buf_e
[i
].max1
= max(buf_e
[i
].max1
,buf_r
[i
].max1
);
1203 buf_e
[i
].min1
= min(buf_e
[i
].min1
,buf_r
[i
].min1
);
1206 if (dd
->ndim
== 3 && d
== 0 && i
== buf_size
- 1)
1214 if (bUse
&& dh
[d1
] >= 0)
1216 buf_e
[i
].mch0
= max(buf_e
[i
].mch0
,buf_r
[i
].mch0
-dh
[d1
]);
1217 buf_e
[i
].mch1
= max(buf_e
[i
].mch1
,buf_r
[i
].mch1
-dh
[d1
]);
1220 /* Copy the received buffer to the send buffer,
1221 * to pass the data through with the next pulse.
1223 buf_s
[i
] = buf_r
[i
];
1225 if (((bPBC
|| dd
->ci
[dim
]+npulse
< dd
->nc
[dim
]) && p
== npulse
-1) ||
1226 (!bPBC
&& dd
->ci
[dim
]+1+p
== dd
->nc
[dim
]-1))
1228 /* Store the extremes */
1231 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1233 extr_s
[d1
][1] = min(extr_s
[d1
][1],buf_e
[pos
].min0
);
1234 extr_s
[d1
][0] = max(extr_s
[d1
][0],buf_e
[pos
].max1
);
1235 extr_s
[d1
][2] = min(extr_s
[d1
][2],buf_e
[pos
].min1
);
1239 if (d
== 1 || (d
== 0 && dd
->ndim
== 3))
1243 comm
->zone_d2
[1-d
][i
] = buf_e
[pos
];
1249 comm
->zone_d1
[1] = buf_e
[pos
];
1263 print_ddzone(debug
,1,i
,0,&comm
->zone_d1
[i
]);
1265 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d1
[i
].min0
);
1266 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d1
[i
].max1
);
1278 print_ddzone(debug
,2,i
,j
,&comm
->zone_d2
[i
][j
]);
1280 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d2
[i
][j
].min0
);
1281 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d2
[i
][j
].max1
);
1285 for(d
=1; d
<dd
->ndim
; d
++)
1287 comm
->cell_f_max0
[d
] = extr_s
[d
-1][0];
1288 comm
->cell_f_min1
[d
] = extr_s
[d
-1][1];
1291 fprintf(debug
,"Cell fraction d %d, max0 %f, min1 %f\n",
1292 d
,comm
->cell_f_max0
[d
],comm
->cell_f_min1
[d
]);
1297 static void dd_collect_cg(gmx_domdec_t
*dd
,
1298 t_state
*state_local
)
1300 gmx_domdec_master_t
*ma
=NULL
;
1301 int buf2
[2],*ibuf
,i
,ncg_home
=0,*cg
=NULL
,nat_home
=0;
1304 if (state_local
->ddp_count
== dd
->comm
->master_cg_ddp_count
)
1306 /* The master has the correct distribution */
1310 if (state_local
->ddp_count
== dd
->ddp_count
)
1312 ncg_home
= dd
->ncg_home
;
1314 nat_home
= dd
->nat_home
;
1316 else if (state_local
->ddp_count_cg_gl
== state_local
->ddp_count
)
1318 cgs_gl
= &dd
->comm
->cgs_gl
;
1320 ncg_home
= state_local
->ncg_gl
;
1321 cg
= state_local
->cg_gl
;
1323 for(i
=0; i
<ncg_home
; i
++)
1325 nat_home
+= cgs_gl
->index
[cg
[i
]+1] - cgs_gl
->index
[cg
[i
]];
1330 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1333 buf2
[0] = dd
->ncg_home
;
1334 buf2
[1] = dd
->nat_home
;
1344 /* Collect the charge group and atom counts on the master */
1345 dd_gather(dd
,2*sizeof(int),buf2
,ibuf
);
1350 for(i
=0; i
<dd
->nnodes
; i
++)
1352 ma
->ncg
[i
] = ma
->ibuf
[2*i
];
1353 ma
->nat
[i
] = ma
->ibuf
[2*i
+1];
1354 ma
->index
[i
+1] = ma
->index
[i
] + ma
->ncg
[i
];
1357 /* Make byte counts and indices */
1358 for(i
=0; i
<dd
->nnodes
; i
++)
1360 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
1361 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
1365 fprintf(debug
,"Initial charge group distribution: ");
1366 for(i
=0; i
<dd
->nnodes
; i
++)
1367 fprintf(debug
," %d",ma
->ncg
[i
]);
1368 fprintf(debug
,"\n");
1372 /* Collect the charge group indices on the master */
1374 dd
->ncg_home
*sizeof(int),dd
->index_gl
,
1375 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
1376 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
1377 DDMASTER(dd
) ? ma
->cg
: NULL
);
1379 dd
->comm
->master_cg_ddp_count
= state_local
->ddp_count
;
1382 static void dd_collect_vec_sendrecv(gmx_domdec_t
*dd
,
1385 gmx_domdec_master_t
*ma
;
1386 int n
,i
,c
,a
,nalloc
=0;
1395 MPI_Send(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1396 dd
->rank
,dd
->mpi_comm_all
);
1399 /* Copy the master coordinates to the global array */
1400 cgs_gl
= &dd
->comm
->cgs_gl
;
1402 n
= DDMASTERRANK(dd
);
1404 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1406 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1408 copy_rvec(lv
[a
++],v
[c
]);
1412 for(n
=0; n
<dd
->nnodes
; n
++)
1416 if (ma
->nat
[n
] > nalloc
)
1418 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1422 MPI_Recv(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,DDRANK(dd
,n
),
1423 n
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1426 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1428 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1430 copy_rvec(buf
[a
++],v
[c
]);
1439 static void get_commbuffer_counts(gmx_domdec_t
*dd
,
1440 int **counts
,int **disps
)
1442 gmx_domdec_master_t
*ma
;
1447 /* Make the rvec count and displacment arrays */
1449 *disps
= ma
->ibuf
+ dd
->nnodes
;
1450 for(n
=0; n
<dd
->nnodes
; n
++)
1452 (*counts
)[n
] = ma
->nat
[n
]*sizeof(rvec
);
1453 (*disps
)[n
] = (n
== 0 ? 0 : (*disps
)[n
-1] + (*counts
)[n
-1]);
1457 static void dd_collect_vec_gatherv(gmx_domdec_t
*dd
,
1460 gmx_domdec_master_t
*ma
;
1461 int *rcounts
=NULL
,*disps
=NULL
;
1470 get_commbuffer_counts(dd
,&rcounts
,&disps
);
1475 dd_gatherv(dd
,dd
->nat_home
*sizeof(rvec
),lv
,rcounts
,disps
,buf
);
1479 cgs_gl
= &dd
->comm
->cgs_gl
;
1482 for(n
=0; n
<dd
->nnodes
; n
++)
1484 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1486 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1488 copy_rvec(buf
[a
++],v
[c
]);
1495 void dd_collect_vec(gmx_domdec_t
*dd
,
1496 t_state
*state_local
,rvec
*lv
,rvec
*v
)
1498 gmx_domdec_master_t
*ma
;
1499 int n
,i
,c
,a
,nalloc
=0;
1502 dd_collect_cg(dd
,state_local
);
1504 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1506 dd_collect_vec_sendrecv(dd
,lv
,v
);
1510 dd_collect_vec_gatherv(dd
,lv
,v
);
1515 void dd_collect_state(gmx_domdec_t
*dd
,
1516 t_state
*state_local
,t_state
*state
)
1520 nh
= state
->nhchainlength
;
1524 for (i
=0;i
<efptNR
;i
++) {
1525 state
->lambda
[i
] = state_local
->lambda
[i
];
1527 state
->fep_state
= state_local
->fep_state
;
1528 state
->veta
= state_local
->veta
;
1529 state
->vol0
= state_local
->vol0
;
1530 copy_mat(state_local
->box
,state
->box
);
1531 copy_mat(state_local
->boxv
,state
->boxv
);
1532 copy_mat(state_local
->svir_prev
,state
->svir_prev
);
1533 copy_mat(state_local
->fvir_prev
,state
->fvir_prev
);
1534 copy_mat(state_local
->pres_prev
,state
->pres_prev
);
1537 for(i
=0; i
<state_local
->ngtc
; i
++)
1539 for(j
=0; j
<nh
; j
++) {
1540 state
->nosehoover_xi
[i
*nh
+j
] = state_local
->nosehoover_xi
[i
*nh
+j
];
1541 state
->nosehoover_vxi
[i
*nh
+j
] = state_local
->nosehoover_vxi
[i
*nh
+j
];
1543 state
->therm_integral
[i
] = state_local
->therm_integral
[i
];
1545 for(i
=0; i
<state_local
->nnhpres
; i
++)
1547 for(j
=0; j
<nh
; j
++) {
1548 state
->nhpres_xi
[i
*nh
+j
] = state_local
->nhpres_xi
[i
*nh
+j
];
1549 state
->nhpres_vxi
[i
*nh
+j
] = state_local
->nhpres_vxi
[i
*nh
+j
];
1553 for(est
=0; est
<estNR
; est
++)
1555 if (EST_DISTR(est
) && (state_local
->flags
& (1<<est
)))
1559 dd_collect_vec(dd
,state_local
,state_local
->x
,state
->x
);
1562 dd_collect_vec(dd
,state_local
,state_local
->v
,state
->v
);
1565 dd_collect_vec(dd
,state_local
,state_local
->sd_X
,state
->sd_X
);
1568 dd_collect_vec(dd
,state_local
,state_local
->cg_p
,state
->cg_p
);
1571 if (state
->nrngi
== 1)
1575 for(i
=0; i
<state_local
->nrng
; i
++)
1577 state
->ld_rng
[i
] = state_local
->ld_rng
[i
];
1583 dd_gather(dd
,state_local
->nrng
*sizeof(state
->ld_rng
[0]),
1584 state_local
->ld_rng
,state
->ld_rng
);
1588 if (state
->nrngi
== 1)
1592 state
->ld_rngi
[0] = state_local
->ld_rngi
[0];
1597 dd_gather(dd
,sizeof(state
->ld_rngi
[0]),
1598 state_local
->ld_rngi
,state
->ld_rngi
);
1601 case estDISRE_INITF
:
1602 case estDISRE_RM3TAV
:
1603 case estORIRE_INITF
:
1607 gmx_incons("Unknown state entry encountered in dd_collect_state");
1613 static void dd_realloc_state(t_state
*state
,rvec
**f
,int nalloc
)
1619 fprintf(debug
,"Reallocating state: currently %d, required %d, allocating %d\n",state
->nalloc
,nalloc
,over_alloc_dd(nalloc
));
1622 state
->nalloc
= over_alloc_dd(nalloc
);
1624 for(est
=0; est
<estNR
; est
++)
1626 if (EST_DISTR(est
) && (state
->flags
& (1<<est
)))
1630 srenew(state
->x
,state
->nalloc
);
1633 srenew(state
->v
,state
->nalloc
);
1636 srenew(state
->sd_X
,state
->nalloc
);
1639 srenew(state
->cg_p
,state
->nalloc
);
1643 case estDISRE_INITF
:
1644 case estDISRE_RM3TAV
:
1645 case estORIRE_INITF
:
1647 /* No reallocation required */
1650 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1657 srenew(*f
,state
->nalloc
);
1661 static void dd_check_alloc_ncg(t_forcerec
*fr
,t_state
*state
,rvec
**f
,
1664 if (nalloc
> fr
->cg_nalloc
)
1668 fprintf(debug
,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr
->cg_nalloc
,nalloc
,over_alloc_dd(nalloc
));
1670 fr
->cg_nalloc
= over_alloc_dd(nalloc
);
1671 srenew(fr
->cginfo
,fr
->cg_nalloc
);
1672 if (fr
->cutoff_scheme
== ecutsGROUP
)
1674 srenew(fr
->cg_cm
,fr
->cg_nalloc
);
1677 if (fr
->cutoff_scheme
== ecutsVERLET
&& nalloc
> state
->nalloc
)
1679 /* We don't use charge groups, we use x in state to set up
1680 * the atom communication.
1682 dd_realloc_state(state
,f
,nalloc
);
1686 static void dd_distribute_vec_sendrecv(gmx_domdec_t
*dd
,t_block
*cgs
,
1689 gmx_domdec_master_t
*ma
;
1690 int n
,i
,c
,a
,nalloc
=0;
1697 for(n
=0; n
<dd
->nnodes
; n
++)
1701 if (ma
->nat
[n
] > nalloc
)
1703 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1706 /* Use lv as a temporary buffer */
1708 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1710 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1712 copy_rvec(v
[c
],buf
[a
++]);
1715 if (a
!= ma
->nat
[n
])
1717 gmx_fatal(FARGS
,"Internal error a (%d) != nat (%d)",
1722 MPI_Send(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,
1723 DDRANK(dd
,n
),n
,dd
->mpi_comm_all
);
1728 n
= DDMASTERRANK(dd
);
1730 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1732 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1734 copy_rvec(v
[c
],lv
[a
++]);
1741 MPI_Recv(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1742 MPI_ANY_TAG
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1747 static void dd_distribute_vec_scatterv(gmx_domdec_t
*dd
,t_block
*cgs
,
1750 gmx_domdec_master_t
*ma
;
1751 int *scounts
=NULL
,*disps
=NULL
;
1752 int n
,i
,c
,a
,nalloc
=0;
1759 get_commbuffer_counts(dd
,&scounts
,&disps
);
1763 for(n
=0; n
<dd
->nnodes
; n
++)
1765 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1767 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1769 copy_rvec(v
[c
],buf
[a
++]);
1775 dd_scatterv(dd
,scounts
,disps
,buf
,dd
->nat_home
*sizeof(rvec
),lv
);
1778 static void dd_distribute_vec(gmx_domdec_t
*dd
,t_block
*cgs
,rvec
*v
,rvec
*lv
)
1780 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1782 dd_distribute_vec_sendrecv(dd
,cgs
,v
,lv
);
1786 dd_distribute_vec_scatterv(dd
,cgs
,v
,lv
);
1790 static void dd_distribute_state(gmx_domdec_t
*dd
,t_block
*cgs
,
1791 t_state
*state
,t_state
*state_local
,
1796 nh
= state
->nhchainlength
;
1800 for(i
=0;i
<efptNR
;i
++)
1802 state_local
->lambda
[i
] = state
->lambda
[i
];
1804 state_local
->fep_state
= state
->fep_state
;
1805 state_local
->veta
= state
->veta
;
1806 state_local
->vol0
= state
->vol0
;
1807 copy_mat(state
->box
,state_local
->box
);
1808 copy_mat(state
->box_rel
,state_local
->box_rel
);
1809 copy_mat(state
->boxv
,state_local
->boxv
);
1810 copy_mat(state
->svir_prev
,state_local
->svir_prev
);
1811 copy_mat(state
->fvir_prev
,state_local
->fvir_prev
);
1812 for(i
=0; i
<state_local
->ngtc
; i
++)
1814 for(j
=0; j
<nh
; j
++) {
1815 state_local
->nosehoover_xi
[i
*nh
+j
] = state
->nosehoover_xi
[i
*nh
+j
];
1816 state_local
->nosehoover_vxi
[i
*nh
+j
] = state
->nosehoover_vxi
[i
*nh
+j
];
1818 state_local
->therm_integral
[i
] = state
->therm_integral
[i
];
1820 for(i
=0; i
<state_local
->nnhpres
; i
++)
1822 for(j
=0; j
<nh
; j
++) {
1823 state_local
->nhpres_xi
[i
*nh
+j
] = state
->nhpres_xi
[i
*nh
+j
];
1824 state_local
->nhpres_vxi
[i
*nh
+j
] = state
->nhpres_vxi
[i
*nh
+j
];
1828 dd_bcast(dd
,((efptNR
)*sizeof(real
)),state_local
->lambda
);
1829 dd_bcast(dd
,sizeof(int),&state_local
->fep_state
);
1830 dd_bcast(dd
,sizeof(real
),&state_local
->veta
);
1831 dd_bcast(dd
,sizeof(real
),&state_local
->vol0
);
1832 dd_bcast(dd
,sizeof(state_local
->box
),state_local
->box
);
1833 dd_bcast(dd
,sizeof(state_local
->box_rel
),state_local
->box_rel
);
1834 dd_bcast(dd
,sizeof(state_local
->boxv
),state_local
->boxv
);
1835 dd_bcast(dd
,sizeof(state_local
->svir_prev
),state_local
->svir_prev
);
1836 dd_bcast(dd
,sizeof(state_local
->fvir_prev
),state_local
->fvir_prev
);
1837 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_xi
);
1838 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_vxi
);
1839 dd_bcast(dd
,state_local
->ngtc
*sizeof(double),state_local
->therm_integral
);
1840 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_xi
);
1841 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_vxi
);
1843 if (dd
->nat_home
> state_local
->nalloc
)
1845 dd_realloc_state(state_local
,f
,dd
->nat_home
);
1847 for(i
=0; i
<estNR
; i
++)
1849 if (EST_DISTR(i
) && (state_local
->flags
& (1<<i
)))
1853 dd_distribute_vec(dd
,cgs
,state
->x
,state_local
->x
);
1856 dd_distribute_vec(dd
,cgs
,state
->v
,state_local
->v
);
1859 dd_distribute_vec(dd
,cgs
,state
->sd_X
,state_local
->sd_X
);
1862 dd_distribute_vec(dd
,cgs
,state
->cg_p
,state_local
->cg_p
);
1865 if (state
->nrngi
== 1)
1868 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1869 state
->ld_rng
,state_local
->ld_rng
);
1874 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1875 state
->ld_rng
,state_local
->ld_rng
);
1879 if (state
->nrngi
== 1)
1881 dd_bcastc(dd
,sizeof(state_local
->ld_rngi
[0]),
1882 state
->ld_rngi
,state_local
->ld_rngi
);
1886 dd_scatter(dd
,sizeof(state_local
->ld_rngi
[0]),
1887 state
->ld_rngi
,state_local
->ld_rngi
);
1890 case estDISRE_INITF
:
1891 case estDISRE_RM3TAV
:
1892 case estORIRE_INITF
:
1894 /* Not implemented yet */
1897 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1903 static char dim2char(int dim
)
1909 case XX
: c
= 'X'; break;
1910 case YY
: c
= 'Y'; break;
1911 case ZZ
: c
= 'Z'; break;
1912 default: gmx_fatal(FARGS
,"Unknown dim %d",dim
);
1918 static void write_dd_grid_pdb(const char *fn
,gmx_large_int_t step
,
1919 gmx_domdec_t
*dd
,matrix box
,gmx_ddbox_t
*ddbox
)
1921 rvec grid_s
[2],*grid_r
=NULL
,cx
,r
;
1922 char fname
[STRLEN
],format
[STRLEN
],buf
[22];
1928 copy_rvec(dd
->comm
->cell_x0
,grid_s
[0]);
1929 copy_rvec(dd
->comm
->cell_x1
,grid_s
[1]);
1933 snew(grid_r
,2*dd
->nnodes
);
1936 dd_gather(dd
,2*sizeof(rvec
),grid_s
[0],DDMASTER(dd
) ? grid_r
[0] : NULL
);
1940 for(d
=0; d
<DIM
; d
++)
1942 for(i
=0; i
<DIM
; i
++)
1950 if (d
< ddbox
->npbcdim
&& dd
->nc
[d
] > 1)
1952 tric
[d
][i
] = box
[i
][d
]/box
[i
][i
];
1961 sprintf(fname
,"%s_%s.pdb",fn
,gmx_step_str(step
,buf
));
1962 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1963 out
= gmx_fio_fopen(fname
,"w");
1964 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1966 for(i
=0; i
<dd
->nnodes
; i
++)
1968 vol
= dd
->nnodes
/(box
[XX
][XX
]*box
[YY
][YY
]*box
[ZZ
][ZZ
]);
1969 for(d
=0; d
<DIM
; d
++)
1971 vol
*= grid_r
[i
*2+1][d
] - grid_r
[i
*2][d
];
1979 cx
[XX
] = grid_r
[i
*2+x
][XX
];
1980 cx
[YY
] = grid_r
[i
*2+y
][YY
];
1981 cx
[ZZ
] = grid_r
[i
*2+z
][ZZ
];
1983 fprintf(out
,format
,"ATOM",a
++,"CA","GLY",' ',1+i
,
1984 10*r
[XX
],10*r
[YY
],10*r
[ZZ
],1.0,vol
);
1988 for(d
=0; d
<DIM
; d
++)
1994 case 0: y
= 1 + i
*8 + 2*x
; break;
1995 case 1: y
= 1 + i
*8 + 2*x
- (x
% 2); break;
1996 case 2: y
= 1 + i
*8 + x
; break;
1998 fprintf(out
,"%6s%5d%5d\n","CONECT",y
,y
+(1<<d
));
2002 gmx_fio_fclose(out
);
2007 void write_dd_pdb(const char *fn
,gmx_large_int_t step
,const char *title
,
2008 gmx_mtop_t
*mtop
,t_commrec
*cr
,
2009 int natoms
,rvec x
[],matrix box
)
2011 char fname
[STRLEN
],format
[STRLEN
],format4
[STRLEN
],buf
[22];
2014 char *atomname
,*resname
;
2021 natoms
= dd
->comm
->nat
[ddnatVSITE
];
2024 sprintf(fname
,"%s_%s_n%d.pdb",fn
,gmx_step_str(step
,buf
),cr
->sim_nodeid
);
2026 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
2027 sprintf(format4
,"%s%s\n",pdbformat4
,"%6.2f%6.2f");
2029 out
= gmx_fio_fopen(fname
,"w");
2031 fprintf(out
,"TITLE %s\n",title
);
2032 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
2033 for(i
=0; i
<natoms
; i
++)
2035 ii
= dd
->gatindex
[i
];
2036 gmx_mtop_atominfo_global(mtop
,ii
,&atomname
,&resnr
,&resname
);
2037 if (i
< dd
->comm
->nat
[ddnatZONE
])
2040 while (i
>= dd
->cgindex
[dd
->comm
->zones
.cg_range
[c
+1]])
2046 else if (i
< dd
->comm
->nat
[ddnatVSITE
])
2048 b
= dd
->comm
->zones
.n
;
2052 b
= dd
->comm
->zones
.n
+ 1;
2054 fprintf(out
,strlen(atomname
)<4 ? format
: format4
,
2055 "ATOM",(ii
+1)%100000,
2056 atomname
,resname
,' ',resnr
%10000,' ',
2057 10*x
[i
][XX
],10*x
[i
][YY
],10*x
[i
][ZZ
],1.0,b
);
2059 fprintf(out
,"TER\n");
2061 gmx_fio_fclose(out
);
2064 real
dd_cutoff_mbody(gmx_domdec_t
*dd
)
2066 gmx_domdec_comm_t
*comm
;
2073 if (comm
->bInterCGBondeds
)
2075 if (comm
->cutoff_mbody
> 0)
2077 r
= comm
->cutoff_mbody
;
2081 /* cutoff_mbody=0 means we do not have DLB */
2082 r
= comm
->cellsize_min
[dd
->dim
[0]];
2083 for(di
=1; di
<dd
->ndim
; di
++)
2085 r
= min(r
,comm
->cellsize_min
[dd
->dim
[di
]]);
2087 if (comm
->bBondComm
)
2089 r
= max(r
,comm
->cutoff_mbody
);
2093 r
= min(r
,comm
->cutoff
);
2101 real
dd_cutoff_twobody(gmx_domdec_t
*dd
)
2105 r_mb
= dd_cutoff_mbody(dd
);
2107 return max(dd
->comm
->cutoff
,r_mb
);
2111 static void dd_cart_coord2pmecoord(gmx_domdec_t
*dd
,ivec coord
,ivec coord_pme
)
2115 nc
= dd
->nc
[dd
->comm
->cartpmedim
];
2116 ntot
= dd
->comm
->ntot
[dd
->comm
->cartpmedim
];
2117 copy_ivec(coord
,coord_pme
);
2118 coord_pme
[dd
->comm
->cartpmedim
] =
2119 nc
+ (coord
[dd
->comm
->cartpmedim
]*(ntot
- nc
) + (ntot
- nc
)/2)/nc
;
2122 static int low_ddindex2pmeindex(int ndd
,int npme
,int ddindex
)
2124 /* Here we assign a PME node to communicate with this DD node
2125 * by assuming that the major index of both is x.
2126 * We add cr->npmenodes/2 to obtain an even distribution.
2128 return (ddindex
*npme
+ npme
/2)/ndd
;
2131 static int ddindex2pmeindex(const gmx_domdec_t
*dd
,int ddindex
)
2133 return low_ddindex2pmeindex(dd
->nnodes
,dd
->comm
->npmenodes
,ddindex
);
2136 static int cr_ddindex2pmeindex(const t_commrec
*cr
,int ddindex
)
2138 return low_ddindex2pmeindex(cr
->dd
->nnodes
,cr
->npmenodes
,ddindex
);
2141 static int *dd_pmenodes(t_commrec
*cr
)
2146 snew(pmenodes
,cr
->npmenodes
);
2148 for(i
=0; i
<cr
->dd
->nnodes
; i
++) {
2149 p0
= cr_ddindex2pmeindex(cr
,i
);
2150 p1
= cr_ddindex2pmeindex(cr
,i
+1);
2151 if (i
+1 == cr
->dd
->nnodes
|| p1
> p0
) {
2153 fprintf(debug
,"pmenode[%d] = %d\n",n
,i
+1+n
);
2154 pmenodes
[n
] = i
+ 1 + n
;
2162 static int gmx_ddcoord2pmeindex(t_commrec
*cr
,int x
,int y
,int z
)
2165 ivec coords
,coords_pme
,nc
;
2170 if (dd->comm->bCartesian) {
2171 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2172 dd_coords2pmecoords(dd,coords,coords_pme);
2173 copy_ivec(dd->ntot,nc);
2174 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2175 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2177 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2179 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2185 slab
= ddindex2pmeindex(dd
,dd_index(dd
->nc
,coords
));
2190 static int ddcoord2simnodeid(t_commrec
*cr
,int x
,int y
,int z
)
2192 gmx_domdec_comm_t
*comm
;
2194 int ddindex
,nodeid
=-1;
2196 comm
= cr
->dd
->comm
;
2201 if (comm
->bCartesianPP_PME
)
2204 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&nodeid
);
2209 ddindex
= dd_index(cr
->dd
->nc
,coords
);
2210 if (comm
->bCartesianPP
)
2212 nodeid
= comm
->ddindex2simnodeid
[ddindex
];
2218 nodeid
= ddindex
+ gmx_ddcoord2pmeindex(cr
,x
,y
,z
);
2230 static int dd_simnode2pmenode(t_commrec
*cr
,int sim_nodeid
)
2233 gmx_domdec_comm_t
*comm
;
2234 ivec coord
,coord_pme
;
2241 /* This assumes a uniform x domain decomposition grid cell size */
2242 if (comm
->bCartesianPP_PME
)
2245 MPI_Cart_coords(cr
->mpi_comm_mysim
,sim_nodeid
,DIM
,coord
);
2246 if (coord
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
2248 /* This is a PP node */
2249 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2250 MPI_Cart_rank(cr
->mpi_comm_mysim
,coord_pme
,&pmenode
);
2254 else if (comm
->bCartesianPP
)
2256 if (sim_nodeid
< dd
->nnodes
)
2258 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2263 /* This assumes DD cells with identical x coordinates
2264 * are numbered sequentially.
2266 if (dd
->comm
->pmenodes
== NULL
)
2268 if (sim_nodeid
< dd
->nnodes
)
2270 /* The DD index equals the nodeid */
2271 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2277 while (sim_nodeid
> dd
->comm
->pmenodes
[i
])
2281 if (sim_nodeid
< dd
->comm
->pmenodes
[i
])
2283 pmenode
= dd
->comm
->pmenodes
[i
];
2291 gmx_bool
gmx_pmeonlynode(t_commrec
*cr
,int sim_nodeid
)
2293 gmx_bool bPMEOnlyNode
;
2295 if (DOMAINDECOMP(cr
))
2297 bPMEOnlyNode
= (dd_simnode2pmenode(cr
,sim_nodeid
) == -1);
2301 bPMEOnlyNode
= FALSE
;
2304 return bPMEOnlyNode
;
2307 void get_pme_ddnodes(t_commrec
*cr
,int pmenodeid
,
2308 int *nmy_ddnodes
,int **my_ddnodes
,int *node_peer
)
2312 ivec coord
,coord_pme
;
2316 snew(*my_ddnodes
,(dd
->nnodes
+cr
->npmenodes
-1)/cr
->npmenodes
);
2319 for(x
=0; x
<dd
->nc
[XX
]; x
++)
2321 for(y
=0; y
<dd
->nc
[YY
]; y
++)
2323 for(z
=0; z
<dd
->nc
[ZZ
]; z
++)
2325 if (dd
->comm
->bCartesianPP_PME
)
2330 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2331 if (dd
->ci
[XX
] == coord_pme
[XX
] &&
2332 dd
->ci
[YY
] == coord_pme
[YY
] &&
2333 dd
->ci
[ZZ
] == coord_pme
[ZZ
])
2334 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2338 /* The slab corresponds to the nodeid in the PME group */
2339 if (gmx_ddcoord2pmeindex(cr
,x
,y
,z
) == pmenodeid
)
2341 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2348 /* The last PP-only node is the peer node */
2349 *node_peer
= (*my_ddnodes
)[*nmy_ddnodes
-1];
2353 fprintf(debug
,"Receive coordinates from PP nodes:");
2354 for(x
=0; x
<*nmy_ddnodes
; x
++)
2356 fprintf(debug
," %d",(*my_ddnodes
)[x
]);
2358 fprintf(debug
,"\n");
2362 static gmx_bool
receive_vir_ener(t_commrec
*cr
)
2364 gmx_domdec_comm_t
*comm
;
2365 int pmenode
,coords
[DIM
],rank
;
2369 if (cr
->npmenodes
< cr
->dd
->nnodes
)
2371 comm
= cr
->dd
->comm
;
2372 if (comm
->bCartesianPP_PME
)
2374 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2376 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,coords
);
2377 coords
[comm
->cartpmedim
]++;
2378 if (coords
[comm
->cartpmedim
] < cr
->dd
->nc
[comm
->cartpmedim
])
2380 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&rank
);
2381 if (dd_simnode2pmenode(cr
,rank
) == pmenode
)
2383 /* This is not the last PP node for pmenode */
2391 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2392 if (cr
->sim_nodeid
+1 < cr
->nnodes
&&
2393 dd_simnode2pmenode(cr
,cr
->sim_nodeid
+1) == pmenode
)
2395 /* This is not the last PP node for pmenode */
2404 static void set_zones_ncg_home(gmx_domdec_t
*dd
)
2406 gmx_domdec_zones_t
*zones
;
2409 zones
= &dd
->comm
->zones
;
2411 zones
->cg_range
[0] = 0;
2412 for(i
=1; i
<zones
->n
+1; i
++)
2414 zones
->cg_range
[i
] = dd
->ncg_home
;
2418 static void rebuild_cgindex(gmx_domdec_t
*dd
,
2419 const int *gcgs_index
,t_state
*state
)
2421 int nat
,i
,*ind
,*dd_cg_gl
,*cgindex
,cg_gl
;
2424 dd_cg_gl
= dd
->index_gl
;
2425 cgindex
= dd
->cgindex
;
2428 for(i
=0; i
<state
->ncg_gl
; i
++)
2432 dd_cg_gl
[i
] = cg_gl
;
2433 nat
+= gcgs_index
[cg_gl
+1] - gcgs_index
[cg_gl
];
2437 dd
->ncg_home
= state
->ncg_gl
;
2440 set_zones_ncg_home(dd
);
2443 static int ddcginfo(const cginfo_mb_t
*cginfo_mb
,int cg
)
2445 while (cg
>= cginfo_mb
->cg_end
)
2450 return cginfo_mb
->cginfo
[(cg
- cginfo_mb
->cg_start
) % cginfo_mb
->cg_mod
];
2453 static void dd_set_cginfo(int *index_gl
,int cg0
,int cg1
,
2454 t_forcerec
*fr
,char *bLocalCG
)
2456 cginfo_mb_t
*cginfo_mb
;
2462 cginfo_mb
= fr
->cginfo_mb
;
2463 cginfo
= fr
->cginfo
;
2465 for(cg
=cg0
; cg
<cg1
; cg
++)
2467 cginfo
[cg
] = ddcginfo(cginfo_mb
,index_gl
[cg
]);
2471 if (bLocalCG
!= NULL
)
2473 for(cg
=cg0
; cg
<cg1
; cg
++)
2475 bLocalCG
[index_gl
[cg
]] = TRUE
;
2480 static void make_dd_indices(gmx_domdec_t
*dd
,
2481 const int *gcgs_index
,int cg_start
)
2483 int nzone
,zone
,zone1
,cg0
,cg1
,cg1_p1
,cg
,cg_gl
,a
,a_gl
;
2484 int *zone2cg
,*zone_ncg1
,*index_gl
,*gatindex
;
2489 bLocalCG
= dd
->comm
->bLocalCG
;
2491 if (dd
->nat_tot
> dd
->gatindex_nalloc
)
2493 dd
->gatindex_nalloc
= over_alloc_dd(dd
->nat_tot
);
2494 srenew(dd
->gatindex
,dd
->gatindex_nalloc
);
2497 nzone
= dd
->comm
->zones
.n
;
2498 zone2cg
= dd
->comm
->zones
.cg_range
;
2499 zone_ncg1
= dd
->comm
->zone_ncg1
;
2500 index_gl
= dd
->index_gl
;
2501 gatindex
= dd
->gatindex
;
2502 bCGs
= dd
->comm
->bCGs
;
2504 if (zone2cg
[1] != dd
->ncg_home
)
2506 gmx_incons("dd->ncg_zone is not up to date");
2509 /* Make the local to global and global to local atom index */
2510 a
= dd
->cgindex
[cg_start
];
2511 for(zone
=0; zone
<nzone
; zone
++)
2519 cg0
= zone2cg
[zone
];
2521 cg1
= zone2cg
[zone
+1];
2522 cg1_p1
= cg0
+ zone_ncg1
[zone
];
2524 for(cg
=cg0
; cg
<cg1
; cg
++)
2529 /* Signal that this cg is from more than one pulse away */
2532 cg_gl
= index_gl
[cg
];
2535 for(a_gl
=gcgs_index
[cg_gl
]; a_gl
<gcgs_index
[cg_gl
+1]; a_gl
++)
2538 ga2la_set(dd
->ga2la
,a_gl
,a
,zone1
);
2544 gatindex
[a
] = cg_gl
;
2545 ga2la_set(dd
->ga2la
,cg_gl
,a
,zone1
);
2552 static int check_bLocalCG(gmx_domdec_t
*dd
,int ncg_sys
,const char *bLocalCG
,
2558 if (bLocalCG
== NULL
)
2562 for(i
=0; i
<dd
->ncg_tot
; i
++)
2564 if (!bLocalCG
[dd
->index_gl
[i
]])
2567 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd
->rank
,where
,i
+1,dd
->index_gl
[i
]+1,dd
->ncg_home
);
2572 for(i
=0; i
<ncg_sys
; i
++)
2579 if (ngl
!= dd
->ncg_tot
)
2581 fprintf(stderr
,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd
->rank
,where
,ngl
,dd
->ncg_tot
);
2588 static void check_index_consistency(gmx_domdec_t
*dd
,
2589 int natoms_sys
,int ncg_sys
,
2592 int nerr
,ngl
,i
,a
,cell
;
2597 if (dd
->comm
->DD_debug
> 1)
2599 snew(have
,natoms_sys
);
2600 for(a
=0; a
<dd
->nat_tot
; a
++)
2602 if (have
[dd
->gatindex
[a
]] > 0)
2604 fprintf(stderr
,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd
->rank
,dd
->gatindex
[a
]+1,have
[dd
->gatindex
[a
]],a
+1);
2608 have
[dd
->gatindex
[a
]] = a
+ 1;
2614 snew(have
,dd
->nat_tot
);
2617 for(i
=0; i
<natoms_sys
; i
++)
2619 if (ga2la_get(dd
->ga2la
,i
,&a
,&cell
))
2621 if (a
>= dd
->nat_tot
)
2623 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd
->rank
,i
+1,a
+1,dd
->nat_tot
);
2629 if (dd
->gatindex
[a
] != i
)
2631 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd
->rank
,i
+1,a
+1,dd
->gatindex
[a
]+1);
2638 if (ngl
!= dd
->nat_tot
)
2641 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2642 dd
->rank
,where
,ngl
,dd
->nat_tot
);
2644 for(a
=0; a
<dd
->nat_tot
; a
++)
2649 "DD node %d, %s: local atom %d, global %d has no global index\n",
2650 dd
->rank
,where
,a
+1,dd
->gatindex
[a
]+1);
2655 nerr
+= check_bLocalCG(dd
,ncg_sys
,dd
->comm
->bLocalCG
,where
);
2658 gmx_fatal(FARGS
,"DD node %d, %s: %d atom/cg index inconsistencies",
2659 dd
->rank
,where
,nerr
);
2663 static void clear_dd_indices(gmx_domdec_t
*dd
,int cg_start
,int a_start
)
2670 /* Clear the whole list without searching */
2671 ga2la_clear(dd
->ga2la
);
2675 for(i
=a_start
; i
<dd
->nat_tot
; i
++)
2677 ga2la_del(dd
->ga2la
,dd
->gatindex
[i
]);
2681 bLocalCG
= dd
->comm
->bLocalCG
;
2684 for(i
=cg_start
; i
<dd
->ncg_tot
; i
++)
2686 bLocalCG
[dd
->index_gl
[i
]] = FALSE
;
2690 dd_clear_local_vsite_indices(dd
);
2692 if (dd
->constraints
)
2694 dd_clear_local_constraint_indices(dd
);
2698 static real
grid_jump_limit(gmx_domdec_comm_t
*comm
,real cutoff
,
2701 real grid_jump_limit
;
2703 /* The distance between the boundaries of cells at distance
2704 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2705 * and by the fact that cells should not be shifted by more than
2706 * half their size, such that cg's only shift by one cell
2707 * at redecomposition.
2709 grid_jump_limit
= comm
->cellsize_limit
;
2710 if (!comm
->bVacDLBNoLimit
)
2712 grid_jump_limit
= max(grid_jump_limit
,
2713 cutoff
/comm
->cd
[dim_ind
].np
);
2716 return grid_jump_limit
;
2719 static gmx_bool
check_grid_jump(gmx_large_int_t step
,
2725 gmx_domdec_comm_t
*comm
;
2734 for(d
=1; d
<dd
->ndim
; d
++)
2737 limit
= grid_jump_limit(comm
,cutoff
,d
);
2738 bfac
= ddbox
->box_size
[dim
];
2739 if (ddbox
->tric_dir
[dim
])
2741 bfac
*= ddbox
->skew_fac
[dim
];
2743 if ((comm
->cell_f1
[d
] - comm
->cell_f_max0
[d
])*bfac
< limit
||
2744 (comm
->cell_f0
[d
] - comm
->cell_f_min1
[d
])*bfac
> -limit
)
2752 /* This error should never be triggered under normal
2753 * circumstances, but you never know ...
2755 gmx_fatal(FARGS
,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
2756 gmx_step_str(step
,buf
),
2757 dim2char(dim
),dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
2765 static int dd_load_count(gmx_domdec_comm_t
*comm
)
2767 return (comm
->eFlop
? comm
->flop_n
: comm
->cycl_n
[ddCyclF
]);
2770 static float dd_force_load(gmx_domdec_comm_t
*comm
)
2777 if (comm
->eFlop
> 1)
2779 load
*= 1.0 + (comm
->eFlop
- 1)*(0.1*rand()/RAND_MAX
- 0.05);
2784 load
= comm
->cycl
[ddCyclF
];
2785 if (comm
->cycl_n
[ddCyclF
] > 1)
2787 /* Subtract the maximum of the last n cycle counts
2788 * to get rid of possible high counts due to other soures,
2789 * for instance system activity, that would otherwise
2790 * affect the dynamic load balancing.
2792 load
-= comm
->cycl_max
[ddCyclF
];
2799 static void set_slb_pme_dim_f(gmx_domdec_t
*dd
,int dim
,real
**dim_f
)
2801 gmx_domdec_comm_t
*comm
;
2806 snew(*dim_f
,dd
->nc
[dim
]+1);
2808 for(i
=1; i
<dd
->nc
[dim
]; i
++)
2810 if (comm
->slb_frac
[dim
])
2812 (*dim_f
)[i
] = (*dim_f
)[i
-1] + comm
->slb_frac
[dim
][i
-1];
2816 (*dim_f
)[i
] = (real
)i
/(real
)dd
->nc
[dim
];
2819 (*dim_f
)[dd
->nc
[dim
]] = 1;
2822 static void init_ddpme(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,int dimind
)
2824 int pmeindex
,slab
,nso
,i
;
2827 if (dimind
== 0 && dd
->dim
[0] == YY
&& dd
->comm
->npmenodes_x
== 1)
2833 ddpme
->dim
= dimind
;
2835 ddpme
->dim_match
= (ddpme
->dim
== dd
->dim
[dimind
]);
2837 ddpme
->nslab
= (ddpme
->dim
== 0 ?
2838 dd
->comm
->npmenodes_x
:
2839 dd
->comm
->npmenodes_y
);
2841 if (ddpme
->nslab
<= 1)
2846 nso
= dd
->comm
->npmenodes
/ddpme
->nslab
;
2847 /* Determine for each PME slab the PP location range for dimension dim */
2848 snew(ddpme
->pp_min
,ddpme
->nslab
);
2849 snew(ddpme
->pp_max
,ddpme
->nslab
);
2850 for(slab
=0; slab
<ddpme
->nslab
; slab
++) {
2851 ddpme
->pp_min
[slab
] = dd
->nc
[dd
->dim
[dimind
]] - 1;
2852 ddpme
->pp_max
[slab
] = 0;
2854 for(i
=0; i
<dd
->nnodes
; i
++) {
2855 ddindex2xyz(dd
->nc
,i
,xyz
);
2856 /* For y only use our y/z slab.
2857 * This assumes that the PME x grid size matches the DD grid size.
2859 if (dimind
== 0 || xyz
[XX
] == dd
->ci
[XX
]) {
2860 pmeindex
= ddindex2pmeindex(dd
,i
);
2862 slab
= pmeindex
/nso
;
2864 slab
= pmeindex
% ddpme
->nslab
;
2866 ddpme
->pp_min
[slab
] = min(ddpme
->pp_min
[slab
],xyz
[dimind
]);
2867 ddpme
->pp_max
[slab
] = max(ddpme
->pp_max
[slab
],xyz
[dimind
]);
2871 set_slb_pme_dim_f(dd
,ddpme
->dim
,&ddpme
->slb_dim_f
);
2874 int dd_pme_maxshift_x(gmx_domdec_t
*dd
)
2876 if (dd
->comm
->ddpme
[0].dim
== XX
)
2878 return dd
->comm
->ddpme
[0].maxshift
;
2886 int dd_pme_maxshift_y(gmx_domdec_t
*dd
)
2888 if (dd
->comm
->ddpme
[0].dim
== YY
)
2890 return dd
->comm
->ddpme
[0].maxshift
;
2892 else if (dd
->comm
->npmedecompdim
>= 2 && dd
->comm
->ddpme
[1].dim
== YY
)
2894 return dd
->comm
->ddpme
[1].maxshift
;
2902 static void set_pme_maxshift(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2903 gmx_bool bUniform
,gmx_ddbox_t
*ddbox
,real
*cell_f
)
2905 gmx_domdec_comm_t
*comm
;
2908 real range
,pme_boundary
;
2912 nc
= dd
->nc
[ddpme
->dim
];
2915 if (!ddpme
->dim_match
)
2917 /* PP decomposition is not along dim: the worst situation */
2920 else if (ns
<= 3 || (bUniform
&& ns
== nc
))
2922 /* The optimal situation */
2927 /* We need to check for all pme nodes which nodes they
2928 * could possibly need to communicate with.
2930 xmin
= ddpme
->pp_min
;
2931 xmax
= ddpme
->pp_max
;
2932 /* Allow for atoms to be maximally 2/3 times the cut-off
2933 * out of their DD cell. This is a reasonable balance between
2934 * between performance and support for most charge-group/cut-off
2937 range
= 2.0/3.0*comm
->cutoff
/ddbox
->box_size
[ddpme
->dim
];
2938 /* Avoid extra communication when we are exactly at a boundary */
2944 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2945 pme_boundary
= (real
)s
/ns
;
2948 cell_f
[xmax
[s
-(sh
+1) ]+1] + range
> pme_boundary
) ||
2950 cell_f
[xmax
[s
-(sh
+1)+ns
]+1] - 1 + range
> pme_boundary
)))
2954 pme_boundary
= (real
)(s
+1)/ns
;
2957 cell_f
[xmin
[s
+(sh
+1) ] ] - range
< pme_boundary
) ||
2959 cell_f
[xmin
[s
+(sh
+1)-ns
] ] + 1 - range
< pme_boundary
)))
2966 ddpme
->maxshift
= sh
;
2970 fprintf(debug
,"PME slab communication range for dim %d is %d\n",
2971 ddpme
->dim
,ddpme
->maxshift
);
2975 static void check_box_size(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2979 for(d
=0; d
<dd
->ndim
; d
++)
2982 if (dim
< ddbox
->nboundeddim
&&
2983 ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
] <
2984 dd
->nc
[dim
]*dd
->comm
->cellsize_limit
*DD_CELL_MARGIN
)
2986 gmx_fatal(FARGS
,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2987 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
2988 dd
->nc
[dim
],dd
->comm
->cellsize_limit
);
2993 static void set_dd_cell_sizes_slb(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
2994 gmx_bool bMaster
,ivec npulse
)
2996 gmx_domdec_comm_t
*comm
;
2999 real
*cell_x
,cell_dx
,cellsize
;
3003 for(d
=0; d
<DIM
; d
++)
3005 cellsize_min
[d
] = ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
];
3007 if (dd
->nc
[d
] == 1 || comm
->slb_frac
[d
] == NULL
)
3010 cell_dx
= ddbox
->box_size
[d
]/dd
->nc
[d
];
3013 for(j
=0; j
<dd
->nc
[d
]+1; j
++)
3015 dd
->ma
->cell_x
[d
][j
] = ddbox
->box0
[d
] + j
*cell_dx
;
3020 comm
->cell_x0
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
] )*cell_dx
;
3021 comm
->cell_x1
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
]+1)*cell_dx
;
3023 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
3024 while (cellsize
*npulse
[d
] < comm
->cutoff
&& npulse
[d
] < dd
->nc
[d
]-1)
3028 cellsize_min
[d
] = cellsize
;
3032 /* Statically load balanced grid */
3033 /* Also when we are not doing a master distribution we determine
3034 * all cell borders in a loop to obtain identical values
3035 * to the master distribution case and to determine npulse.
3039 cell_x
= dd
->ma
->cell_x
[d
];
3043 snew(cell_x
,dd
->nc
[d
]+1);
3045 cell_x
[0] = ddbox
->box0
[d
];
3046 for(j
=0; j
<dd
->nc
[d
]; j
++)
3048 cell_dx
= ddbox
->box_size
[d
]*comm
->slb_frac
[d
][j
];
3049 cell_x
[j
+1] = cell_x
[j
] + cell_dx
;
3050 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
3051 while (cellsize
*npulse
[d
] < comm
->cutoff
&&
3052 npulse
[d
] < dd
->nc
[d
]-1)
3056 cellsize_min
[d
] = min(cellsize_min
[d
],cellsize
);
3060 comm
->cell_x0
[d
] = cell_x
[dd
->ci
[d
]];
3061 comm
->cell_x1
[d
] = cell_x
[dd
->ci
[d
]+1];
3065 /* The following limitation is to avoid that a cell would receive
3066 * some of its own home charge groups back over the periodic boundary.
3067 * Double charge groups cause trouble with the global indices.
3069 if (d
< ddbox
->npbcdim
&&
3070 dd
->nc
[d
] > 1 && npulse
[d
] >= dd
->nc
[d
])
3072 gmx_fatal_collective(FARGS
,NULL
,dd
,
3073 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
3074 dim2char(d
),ddbox
->box_size
[d
],ddbox
->skew_fac
[d
],
3076 dd
->nc
[d
],dd
->nc
[d
],
3077 dd
->nnodes
> dd
->nc
[d
] ? "cells" : "processors");
3081 if (!comm
->bDynLoadBal
)
3083 copy_rvec(cellsize_min
,comm
->cellsize_min
);
3086 for(d
=0; d
<comm
->npmedecompdim
; d
++)
3088 set_pme_maxshift(dd
,&comm
->ddpme
[d
],
3089 comm
->slb_frac
[dd
->dim
[d
]]==NULL
,ddbox
,
3090 comm
->ddpme
[d
].slb_dim_f
);
3095 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t
*dd
,
3096 int d
,int dim
,gmx_domdec_root_t
*root
,
3098 gmx_bool bUniform
,gmx_large_int_t step
, real cellsize_limit_f
, int range
[])
3100 gmx_domdec_comm_t
*comm
;
3101 int ncd
,i
,j
,nmin
,nmin_old
;
3102 gmx_bool bLimLo
,bLimHi
;
3104 real fac
,halfway
,cellsize_limit_f_i
,region_size
;
3105 gmx_bool bPBC
,bLastHi
=FALSE
;
3106 int nrange
[]={range
[0],range
[1]};
3108 region_size
= root
->cell_f
[range
[1]]-root
->cell_f
[range
[0]];
3114 bPBC
= (dim
< ddbox
->npbcdim
);
3116 cell_size
= root
->buf_ncd
;
3120 fprintf(debug
,"enforce_limits: %d %d\n",range
[0],range
[1]);
3123 /* First we need to check if the scaling does not make cells
3124 * smaller than the smallest allowed size.
3125 * We need to do this iteratively, since if a cell is too small,
3126 * it needs to be enlarged, which makes all the other cells smaller,
3127 * which could in turn make another cell smaller than allowed.
3129 for(i
=range
[0]; i
<range
[1]; i
++)
3131 root
->bCellMin
[i
] = FALSE
;
3137 /* We need the total for normalization */
3139 for(i
=range
[0]; i
<range
[1]; i
++)
3141 if (root
->bCellMin
[i
] == FALSE
)
3143 fac
+= cell_size
[i
];
3146 fac
= ( region_size
- nmin
*cellsize_limit_f
)/fac
; /* substracting cells already set to cellsize_limit_f */
3147 /* Determine the cell boundaries */
3148 for(i
=range
[0]; i
<range
[1]; i
++)
3150 if (root
->bCellMin
[i
] == FALSE
)
3152 cell_size
[i
] *= fac
;
3153 if (!bPBC
&& (i
== 0 || i
== dd
->nc
[dim
] -1))
3155 cellsize_limit_f_i
= 0;
3159 cellsize_limit_f_i
= cellsize_limit_f
;
3161 if (cell_size
[i
] < cellsize_limit_f_i
)
3163 root
->bCellMin
[i
] = TRUE
;
3164 cell_size
[i
] = cellsize_limit_f_i
;
3168 root
->cell_f
[i
+1] = root
->cell_f
[i
] + cell_size
[i
];
3171 while (nmin
> nmin_old
);
3174 cell_size
[i
] = root
->cell_f
[i
+1] - root
->cell_f
[i
];
3175 /* For this check we should not use DD_CELL_MARGIN,
3176 * but a slightly smaller factor,
3177 * since rounding could get use below the limit.
3179 if (bPBC
&& cell_size
[i
] < cellsize_limit_f
*DD_CELL_MARGIN2
/DD_CELL_MARGIN
)
3182 gmx_fatal(FARGS
,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3183 gmx_step_str(step
,buf
),
3184 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
3185 ncd
,comm
->cellsize_min
[dim
]);
3188 root
->bLimited
= (nmin
> 0) || (range
[0]>0) || (range
[1]<ncd
);
3192 /* Check if the boundary did not displace more than halfway
3193 * each of the cells it bounds, as this could cause problems,
3194 * especially when the differences between cell sizes are large.
3195 * If changes are applied, they will not make cells smaller
3196 * than the cut-off, as we check all the boundaries which
3197 * might be affected by a change and if the old state was ok,
3198 * the cells will at most be shrunk back to their old size.
3200 for(i
=range
[0]+1; i
<range
[1]; i
++)
3202 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
-1]);
3203 if (root
->cell_f
[i
] < halfway
)
3205 root
->cell_f
[i
] = halfway
;
3206 /* Check if the change also causes shifts of the next boundaries */
3207 for(j
=i
+1; j
<range
[1]; j
++)
3209 if (root
->cell_f
[j
] < root
->cell_f
[j
-1] + cellsize_limit_f
)
3210 root
->cell_f
[j
] = root
->cell_f
[j
-1] + cellsize_limit_f
;
3213 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
+1]);
3214 if (root
->cell_f
[i
] > halfway
)
3216 root
->cell_f
[i
] = halfway
;
3217 /* Check if the change also causes shifts of the next boundaries */
3218 for(j
=i
-1; j
>=range
[0]+1; j
--)
3220 if (root
->cell_f
[j
] > root
->cell_f
[j
+1] - cellsize_limit_f
)
3221 root
->cell_f
[j
] = root
->cell_f
[j
+1] - cellsize_limit_f
;
3227 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3228 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3229 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3230 * for a and b nrange is used */
3233 /* Take care of the staggering of the cell boundaries */
3236 for(i
=range
[0]; i
<range
[1]; i
++)
3238 root
->cell_f_max0
[i
] = root
->cell_f
[i
];
3239 root
->cell_f_min1
[i
] = root
->cell_f
[i
+1];
3244 for(i
=range
[0]+1; i
<range
[1]; i
++)
3246 bLimLo
= (root
->cell_f
[i
] < root
->bound_min
[i
]);
3247 bLimHi
= (root
->cell_f
[i
] > root
->bound_max
[i
]);
3248 if (bLimLo
&& bLimHi
)
3250 /* Both limits violated, try the best we can */
3251 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3252 root
->cell_f
[i
] = 0.5*(root
->bound_min
[i
] + root
->bound_max
[i
]);
3255 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3259 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3265 /* root->cell_f[i] = root->bound_min[i]; */
3266 nrange
[1]=i
; /* only store violation location. There could be a LimLo violation following with an higher index */
3269 else if (bLimHi
&& !bLastHi
)
3272 if (nrange
[1] < range
[1]) /* found a LimLo before */
3274 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3275 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3276 nrange
[0]=nrange
[1];
3278 root
->cell_f
[i
] = root
->bound_max
[i
];
3280 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3285 if (nrange
[1] < range
[1]) /* found last a LimLo */
3287 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3288 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3289 nrange
[0]=nrange
[1];
3291 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3293 else if (nrange
[0] > range
[0]) /* found at least one LimHi */
3295 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3302 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t
*dd
,
3303 int d
,int dim
,gmx_domdec_root_t
*root
,
3304 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3305 gmx_bool bUniform
,gmx_large_int_t step
)
3307 gmx_domdec_comm_t
*comm
;
3310 real load_aver
,load_i
,imbalance
,change
,change_max
,sc
;
3311 real cellsize_limit_f
,dist_min_f
,dist_min_f_hard
,space
;
3315 int range
[] = { 0, 0 };
3319 /* Convert the maximum change from the input percentage to a fraction */
3320 change_limit
= comm
->dlb_scale_lim
*0.01;
3324 bPBC
= (dim
< ddbox
->npbcdim
);
3326 cell_size
= root
->buf_ncd
;
3328 /* Store the original boundaries */
3329 for(i
=0; i
<ncd
+1; i
++)
3331 root
->old_cell_f
[i
] = root
->cell_f
[i
];
3334 for(i
=0; i
<ncd
; i
++)
3336 cell_size
[i
] = 1.0/ncd
;
3339 else if (dd_load_count(comm
))
3341 load_aver
= comm
->load
[d
].sum_m
/ncd
;
3343 for(i
=0; i
<ncd
; i
++)
3345 /* Determine the relative imbalance of cell i */
3346 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3347 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3348 /* Determine the change of the cell size using underrelaxation */
3349 change
= -relax
*imbalance
;
3350 change_max
= max(change_max
,max(change
,-change
));
3352 /* Limit the amount of scaling.
3353 * We need to use the same rescaling for all cells in one row,
3354 * otherwise the load balancing might not converge.
3357 if (change_max
> change_limit
)
3359 sc
*= change_limit
/change_max
;
3361 for(i
=0; i
<ncd
; i
++)
3363 /* Determine the relative imbalance of cell i */
3364 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3365 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3366 /* Determine the change of the cell size using underrelaxation */
3367 change
= -sc
*imbalance
;
3368 cell_size
[i
] = (root
->cell_f
[i
+1]-root
->cell_f
[i
])*(1 + change
);
3372 cellsize_limit_f
= comm
->cellsize_min
[dim
]/ddbox
->box_size
[dim
];
3373 cellsize_limit_f
*= DD_CELL_MARGIN
;
3374 dist_min_f_hard
= grid_jump_limit(comm
,comm
->cutoff
,d
)/ddbox
->box_size
[dim
];
3375 dist_min_f
= dist_min_f_hard
* DD_CELL_MARGIN
;
3376 if (ddbox
->tric_dir
[dim
])
3378 cellsize_limit_f
/= ddbox
->skew_fac
[dim
];
3379 dist_min_f
/= ddbox
->skew_fac
[dim
];
3381 if (bDynamicBox
&& d
> 0)
3383 dist_min_f
*= DD_PRES_SCALE_MARGIN
;
3385 if (d
> 0 && !bUniform
)
3387 /* Make sure that the grid is not shifted too much */
3388 for(i
=1; i
<ncd
; i
++) {
3389 if (root
->cell_f_min1
[i
] - root
->cell_f_max0
[i
-1] < 2 * dist_min_f_hard
)
3391 gmx_incons("Inconsistent DD boundary staggering limits!");
3393 root
->bound_min
[i
] = root
->cell_f_max0
[i
-1] + dist_min_f
;
3394 space
= root
->cell_f
[i
] - (root
->cell_f_max0
[i
-1] + dist_min_f
);
3396 root
->bound_min
[i
] += 0.5*space
;
3398 root
->bound_max
[i
] = root
->cell_f_min1
[i
] - dist_min_f
;
3399 space
= root
->cell_f
[i
] - (root
->cell_f_min1
[i
] - dist_min_f
);
3401 root
->bound_max
[i
] += 0.5*space
;
3406 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3408 root
->cell_f_max0
[i
-1] + dist_min_f
,
3409 root
->bound_min
[i
],root
->cell_f
[i
],root
->bound_max
[i
],
3410 root
->cell_f_min1
[i
] - dist_min_f
);
3415 root
->cell_f
[0] = 0;
3416 root
->cell_f
[ncd
] = 1;
3417 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, range
);
3420 /* After the checks above, the cells should obey the cut-off
3421 * restrictions, but it does not hurt to check.
3423 for(i
=0; i
<ncd
; i
++)
3427 fprintf(debug
,"Relative bounds dim %d cell %d: %f %f\n",
3428 dim
,i
,root
->cell_f
[i
],root
->cell_f
[i
+1]);
3431 if ((bPBC
|| (i
!= 0 && i
!= dd
->nc
[dim
]-1)) &&
3432 root
->cell_f
[i
+1] - root
->cell_f
[i
] <
3433 cellsize_limit_f
/DD_CELL_MARGIN
)
3437 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3438 gmx_step_str(step
,buf
),dim2char(dim
),i
,
3439 (root
->cell_f
[i
+1] - root
->cell_f
[i
])
3440 *ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]);
3445 /* Store the cell boundaries of the lower dimensions at the end */
3446 for(d1
=0; d1
<d
; d1
++)
3448 root
->cell_f
[pos
++] = comm
->cell_f0
[d1
];
3449 root
->cell_f
[pos
++] = comm
->cell_f1
[d1
];
3452 if (d
< comm
->npmedecompdim
)
3454 /* The master determines the maximum shift for
3455 * the coordinate communication between separate PME nodes.
3457 set_pme_maxshift(dd
,&comm
->ddpme
[d
],bUniform
,ddbox
,root
->cell_f
);
3459 root
->cell_f
[pos
++] = comm
->ddpme
[0].maxshift
;
3462 root
->cell_f
[pos
++] = comm
->ddpme
[1].maxshift
;
3466 static void relative_to_absolute_cell_bounds(gmx_domdec_t
*dd
,
3467 gmx_ddbox_t
*ddbox
,int dimind
)
3469 gmx_domdec_comm_t
*comm
;
3474 /* Set the cell dimensions */
3475 dim
= dd
->dim
[dimind
];
3476 comm
->cell_x0
[dim
] = comm
->cell_f0
[dimind
]*ddbox
->box_size
[dim
];
3477 comm
->cell_x1
[dim
] = comm
->cell_f1
[dimind
]*ddbox
->box_size
[dim
];
3478 if (dim
>= ddbox
->nboundeddim
)
3480 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3481 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3485 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3486 int d
,int dim
,real
*cell_f_row
,
3489 gmx_domdec_comm_t
*comm
;
3495 /* Each node would only need to know two fractions,
3496 * but it is probably cheaper to broadcast the whole array.
3498 MPI_Bcast(cell_f_row
,DD_CELL_F_SIZE(dd
,d
)*sizeof(real
),MPI_BYTE
,
3499 0,comm
->mpi_comm_load
[d
]);
3501 /* Copy the fractions for this dimension from the buffer */
3502 comm
->cell_f0
[d
] = cell_f_row
[dd
->ci
[dim
] ];
3503 comm
->cell_f1
[d
] = cell_f_row
[dd
->ci
[dim
]+1];
3504 /* The whole array was communicated, so set the buffer position */
3505 pos
= dd
->nc
[dim
] + 1;
3506 for(d1
=0; d1
<=d
; d1
++)
3510 /* Copy the cell fractions of the lower dimensions */
3511 comm
->cell_f0
[d1
] = cell_f_row
[pos
++];
3512 comm
->cell_f1
[d1
] = cell_f_row
[pos
++];
3514 relative_to_absolute_cell_bounds(dd
,ddbox
,d1
);
3516 /* Convert the communicated shift from float to int */
3517 comm
->ddpme
[0].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3520 comm
->ddpme
[1].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3524 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t
*dd
,
3525 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3526 gmx_bool bUniform
,gmx_large_int_t step
)
3528 gmx_domdec_comm_t
*comm
;
3530 gmx_bool bRowMember
,bRowRoot
;
3535 for(d
=0; d
<dd
->ndim
; d
++)
3540 for(d1
=d
; d1
<dd
->ndim
; d1
++)
3542 if (dd
->ci
[dd
->dim
[d1
]] > 0)
3555 set_dd_cell_sizes_dlb_root(dd
,d
,dim
,comm
->root
[d
],
3556 ddbox
,bDynamicBox
,bUniform
,step
);
3557 cell_f_row
= comm
->root
[d
]->cell_f
;
3561 cell_f_row
= comm
->cell_f_row
;
3563 distribute_dd_cell_sizes_dlb(dd
,d
,dim
,cell_f_row
,ddbox
);
3568 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
3572 /* This function assumes the box is static and should therefore
3573 * not be called when the box has changed since the last
3574 * call to dd_partition_system.
3576 for(d
=0; d
<dd
->ndim
; d
++)
3578 relative_to_absolute_cell_bounds(dd
,ddbox
,d
);
3584 static void set_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3585 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3586 gmx_bool bUniform
,gmx_bool bDoDLB
,gmx_large_int_t step
,
3587 gmx_wallcycle_t wcycle
)
3589 gmx_domdec_comm_t
*comm
;
3596 wallcycle_start(wcycle
,ewcDDCOMMBOUND
);
3597 set_dd_cell_sizes_dlb_change(dd
,ddbox
,bDynamicBox
,bUniform
,step
);
3598 wallcycle_stop(wcycle
,ewcDDCOMMBOUND
);
3600 else if (bDynamicBox
)
3602 set_dd_cell_sizes_dlb_nochange(dd
,ddbox
);
3605 /* Set the dimensions for which no DD is used */
3606 for(dim
=0; dim
<DIM
; dim
++) {
3607 if (dd
->nc
[dim
] == 1) {
3608 comm
->cell_x0
[dim
] = 0;
3609 comm
->cell_x1
[dim
] = ddbox
->box_size
[dim
];
3610 if (dim
>= ddbox
->nboundeddim
)
3612 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3613 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3619 static void realloc_comm_ind(gmx_domdec_t
*dd
,ivec npulse
)
3622 gmx_domdec_comm_dim_t
*cd
;
3624 for(d
=0; d
<dd
->ndim
; d
++)
3626 cd
= &dd
->comm
->cd
[d
];
3627 np
= npulse
[dd
->dim
[d
]];
3628 if (np
> cd
->np_nalloc
)
3632 fprintf(debug
,"(Re)allocing cd for %c to %d pulses\n",
3633 dim2char(dd
->dim
[d
]),np
);
3635 if (DDMASTER(dd
) && cd
->np_nalloc
> 0)
3637 fprintf(stderr
,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd
->dim
[d
]),np
);
3640 for(i
=cd
->np_nalloc
; i
<np
; i
++)
3642 cd
->ind
[i
].index
= NULL
;
3643 cd
->ind
[i
].nalloc
= 0;
3652 static void set_dd_cell_sizes(gmx_domdec_t
*dd
,
3653 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3654 gmx_bool bUniform
,gmx_bool bDoDLB
,gmx_large_int_t step
,
3655 gmx_wallcycle_t wcycle
)
3657 gmx_domdec_comm_t
*comm
;
3663 /* Copy the old cell boundaries for the cg displacement check */
3664 copy_rvec(comm
->cell_x0
,comm
->old_cell_x0
);
3665 copy_rvec(comm
->cell_x1
,comm
->old_cell_x1
);
3667 if (comm
->bDynLoadBal
)
3671 check_box_size(dd
,ddbox
);
3673 set_dd_cell_sizes_dlb(dd
,ddbox
,bDynamicBox
,bUniform
,bDoDLB
,step
,wcycle
);
3677 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,npulse
);
3678 realloc_comm_ind(dd
,npulse
);
3683 for(d
=0; d
<DIM
; d
++)
3685 fprintf(debug
,"cell_x[%d] %f - %f skew_fac %f\n",
3686 d
,comm
->cell_x0
[d
],comm
->cell_x1
[d
],ddbox
->skew_fac
[d
]);
3691 static void comm_dd_ns_cell_sizes(gmx_domdec_t
*dd
,
3693 rvec cell_ns_x0
,rvec cell_ns_x1
,
3694 gmx_large_int_t step
)
3696 gmx_domdec_comm_t
*comm
;
3701 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
3703 dim
= dd
->dim
[dim_ind
];
3705 /* Without PBC we don't have restrictions on the outer cells */
3706 if (!(dim
>= ddbox
->npbcdim
&&
3707 (dd
->ci
[dim
] == 0 || dd
->ci
[dim
] == dd
->nc
[dim
] - 1)) &&
3708 comm
->bDynLoadBal
&&
3709 (comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
])*ddbox
->skew_fac
[dim
] <
3710 comm
->cellsize_min
[dim
])
3713 gmx_fatal(FARGS
,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3714 gmx_step_str(step
,buf
),dim2char(dim
),
3715 comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
],
3716 ddbox
->skew_fac
[dim
],
3717 dd
->comm
->cellsize_min
[dim
],
3718 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
3722 if ((dd
->bGridJump
&& dd
->ndim
> 1) || ddbox
->nboundeddim
< DIM
)
3724 /* Communicate the boundaries and update cell_ns_x0/1 */
3725 dd_move_cellx(dd
,ddbox
,cell_ns_x0
,cell_ns_x1
);
3726 if (dd
->bGridJump
&& dd
->ndim
> 1)
3728 check_grid_jump(step
,dd
,dd
->comm
->cutoff
,ddbox
,TRUE
);
3733 static void make_tric_corr_matrix(int npbcdim
,matrix box
,matrix tcm
)
3737 tcm
[YY
][XX
] = -box
[YY
][XX
]/box
[YY
][YY
];
3745 tcm
[ZZ
][XX
] = -(box
[ZZ
][YY
]*tcm
[YY
][XX
] + box
[ZZ
][XX
])/box
[ZZ
][ZZ
];
3746 tcm
[ZZ
][YY
] = -box
[ZZ
][YY
]/box
[ZZ
][ZZ
];
3755 static void check_screw_box(matrix box
)
3757 /* Mathematical limitation */
3758 if (box
[YY
][XX
] != 0 || box
[ZZ
][XX
] != 0)
3760 gmx_fatal(FARGS
,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3763 /* Limitation due to the asymmetry of the eighth shell method */
3764 if (box
[ZZ
][YY
] != 0)
3766 gmx_fatal(FARGS
,"pbc=screw with non-zero box_zy is not supported");
3770 static void distribute_cg(FILE *fplog
,gmx_large_int_t step
,
3771 matrix box
,ivec tric_dir
,t_block
*cgs
,rvec pos
[],
3774 gmx_domdec_master_t
*ma
;
3775 int **tmp_ind
=NULL
,*tmp_nalloc
=NULL
;
3776 int i
,icg
,j
,k
,k0
,k1
,d
,npbcdim
;
3778 rvec box_size
,cg_cm
;
3780 real nrcg
,inv_ncg
,pos_d
;
3782 gmx_bool bUnbounded
,bScrew
;
3786 if (tmp_ind
== NULL
)
3788 snew(tmp_nalloc
,dd
->nnodes
);
3789 snew(tmp_ind
,dd
->nnodes
);
3790 for(i
=0; i
<dd
->nnodes
; i
++)
3792 tmp_nalloc
[i
] = over_alloc_large(cgs
->nr
/dd
->nnodes
+1);
3793 snew(tmp_ind
[i
],tmp_nalloc
[i
]);
3797 /* Clear the count */
3798 for(i
=0; i
<dd
->nnodes
; i
++)
3804 make_tric_corr_matrix(dd
->npbcdim
,box
,tcm
);
3806 cgindex
= cgs
->index
;
3808 /* Compute the center of geometry for all charge groups */
3809 for(icg
=0; icg
<cgs
->nr
; icg
++)
3812 k1
= cgindex
[icg
+1];
3816 copy_rvec(pos
[k0
],cg_cm
);
3823 for(k
=k0
; (k
<k1
); k
++)
3825 rvec_inc(cg_cm
,pos
[k
]);
3827 for(d
=0; (d
<DIM
); d
++)
3829 cg_cm
[d
] *= inv_ncg
;
3832 /* Put the charge group in the box and determine the cell index */
3833 for(d
=DIM
-1; d
>=0; d
--) {
3835 if (d
< dd
->npbcdim
)
3837 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
3838 if (tric_dir
[d
] && dd
->nc
[d
] > 1)
3840 /* Use triclinic coordintates for this dimension */
3841 for(j
=d
+1; j
<DIM
; j
++)
3843 pos_d
+= cg_cm
[j
]*tcm
[j
][d
];
3846 while(pos_d
>= box
[d
][d
])
3849 rvec_dec(cg_cm
,box
[d
]);
3852 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3853 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3855 for(k
=k0
; (k
<k1
); k
++)
3857 rvec_dec(pos
[k
],box
[d
]);
3860 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3861 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3868 rvec_inc(cg_cm
,box
[d
]);
3871 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3872 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3874 for(k
=k0
; (k
<k1
); k
++)
3876 rvec_inc(pos
[k
],box
[d
]);
3878 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3879 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3884 /* This could be done more efficiently */
3886 while(ind
[d
]+1 < dd
->nc
[d
] && pos_d
>= ma
->cell_x
[d
][ind
[d
]+1])
3891 i
= dd_index(dd
->nc
,ind
);
3892 if (ma
->ncg
[i
] == tmp_nalloc
[i
])
3894 tmp_nalloc
[i
] = over_alloc_large(ma
->ncg
[i
]+1);
3895 srenew(tmp_ind
[i
],tmp_nalloc
[i
]);
3897 tmp_ind
[i
][ma
->ncg
[i
]] = icg
;
3899 ma
->nat
[i
] += cgindex
[icg
+1] - cgindex
[icg
];
3903 for(i
=0; i
<dd
->nnodes
; i
++)
3906 for(k
=0; k
<ma
->ncg
[i
]; k
++)
3908 ma
->cg
[k1
++] = tmp_ind
[i
][k
];
3911 ma
->index
[dd
->nnodes
] = k1
;
3913 for(i
=0; i
<dd
->nnodes
; i
++)
3923 fprintf(fplog
,"Charge group distribution at step %s:",
3924 gmx_step_str(step
,buf
));
3925 for(i
=0; i
<dd
->nnodes
; i
++)
3927 fprintf(fplog
," %d",ma
->ncg
[i
]);
3929 fprintf(fplog
,"\n");
3933 static void get_cg_distribution(FILE *fplog
,gmx_large_int_t step
,gmx_domdec_t
*dd
,
3934 t_block
*cgs
,matrix box
,gmx_ddbox_t
*ddbox
,
3937 gmx_domdec_master_t
*ma
=NULL
;
3940 int *ibuf
,buf2
[2] = { 0, 0 };
3941 gmx_bool bMaster
= DDMASTER(dd
);
3948 check_screw_box(box
);
3951 set_dd_cell_sizes_slb(dd
,ddbox
,TRUE
,npulse
);
3953 distribute_cg(fplog
,step
,box
,ddbox
->tric_dir
,cgs
,pos
,dd
);
3954 for(i
=0; i
<dd
->nnodes
; i
++)
3956 ma
->ibuf
[2*i
] = ma
->ncg
[i
];
3957 ma
->ibuf
[2*i
+1] = ma
->nat
[i
];
3965 dd_scatter(dd
,2*sizeof(int),ibuf
,buf2
);
3967 dd
->ncg_home
= buf2
[0];
3968 dd
->nat_home
= buf2
[1];
3969 dd
->ncg_tot
= dd
->ncg_home
;
3970 dd
->nat_tot
= dd
->nat_home
;
3971 if (dd
->ncg_home
> dd
->cg_nalloc
|| dd
->cg_nalloc
== 0)
3973 dd
->cg_nalloc
= over_alloc_dd(dd
->ncg_home
);
3974 srenew(dd
->index_gl
,dd
->cg_nalloc
);
3975 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
3979 for(i
=0; i
<dd
->nnodes
; i
++)
3981 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
3982 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
3987 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
3988 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
3989 DDMASTER(dd
) ? ma
->cg
: NULL
,
3990 dd
->ncg_home
*sizeof(int),dd
->index_gl
);
3992 /* Determine the home charge group sizes */
3994 for(i
=0; i
<dd
->ncg_home
; i
++)
3996 cg_gl
= dd
->index_gl
[i
];
3998 dd
->cgindex
[i
] + cgs
->index
[cg_gl
+1] - cgs
->index
[cg_gl
];
4003 fprintf(debug
,"Home charge groups:\n");
4004 for(i
=0; i
<dd
->ncg_home
; i
++)
4006 fprintf(debug
," %d",dd
->index_gl
[i
]);
4008 fprintf(debug
,"\n");
4010 fprintf(debug
,"\n");
4014 static int compact_and_copy_vec_at(int ncg
,int *move
,
4017 rvec
*src
,gmx_domdec_comm_t
*comm
,
4020 int m
,icg
,i
,i0
,i1
,nrcg
;
4026 for(m
=0; m
<DIM
*2; m
++)
4032 for(icg
=0; icg
<ncg
; icg
++)
4034 i1
= cgindex
[icg
+1];
4040 /* Compact the home array in place */
4041 for(i
=i0
; i
<i1
; i
++)
4043 copy_rvec(src
[i
],src
[home_pos
++]);
4049 /* Copy to the communication buffer */
4051 pos_vec
[m
] += 1 + vec
*nrcg
;
4052 for(i
=i0
; i
<i1
; i
++)
4054 copy_rvec(src
[i
],comm
->cgcm_state
[m
][pos_vec
[m
]++]);
4056 pos_vec
[m
] += (nvec
- vec
- 1)*nrcg
;
4060 home_pos
+= i1
- i0
;
4068 static int compact_and_copy_vec_cg(int ncg
,int *move
,
4070 int nvec
,rvec
*src
,gmx_domdec_comm_t
*comm
,
4073 int m
,icg
,i0
,i1
,nrcg
;
4079 for(m
=0; m
<DIM
*2; m
++)
4085 for(icg
=0; icg
<ncg
; icg
++)
4087 i1
= cgindex
[icg
+1];
4093 /* Compact the home array in place */
4094 copy_rvec(src
[icg
],src
[home_pos
++]);
4100 /* Copy to the communication buffer */
4101 copy_rvec(src
[icg
],comm
->cgcm_state
[m
][pos_vec
[m
]]);
4102 pos_vec
[m
] += 1 + nrcg
*nvec
;
4114 static int compact_ind(int ncg
,int *move
,
4115 int *index_gl
,int *cgindex
,
4117 gmx_ga2la_t ga2la
,char *bLocalCG
,
4120 int cg
,nat
,a0
,a1
,a
,a_gl
;
4125 for(cg
=0; cg
<ncg
; cg
++)
4131 /* Compact the home arrays in place.
4132 * Anything that can be done here avoids access to global arrays.
4134 cgindex
[home_pos
] = nat
;
4135 for(a
=a0
; a
<a1
; a
++)
4138 gatindex
[nat
] = a_gl
;
4139 /* The cell number stays 0, so we don't need to set it */
4140 ga2la_change_la(ga2la
,a_gl
,nat
);
4143 index_gl
[home_pos
] = index_gl
[cg
];
4144 cginfo
[home_pos
] = cginfo
[cg
];
4145 /* The charge group remains local, so bLocalCG does not change */
4150 /* Clear the global indices */
4151 for(a
=a0
; a
<a1
; a
++)
4153 ga2la_del(ga2la
,gatindex
[a
]);
4157 bLocalCG
[index_gl
[cg
]] = FALSE
;
4161 cgindex
[home_pos
] = nat
;
4166 static void clear_and_mark_ind(int ncg
,int *move
,
4167 int *index_gl
,int *cgindex
,int *gatindex
,
4168 gmx_ga2la_t ga2la
,char *bLocalCG
,
4173 for(cg
=0; cg
<ncg
; cg
++)
4179 /* Clear the global indices */
4180 for(a
=a0
; a
<a1
; a
++)
4182 ga2la_del(ga2la
,gatindex
[a
]);
4186 bLocalCG
[index_gl
[cg
]] = FALSE
;
4188 /* Signal that this cg has moved using the ns cell index.
4189 * Here we set it to -1. fill_grid will change it
4190 * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
4192 cell_index
[cg
] = -1;
4197 static void print_cg_move(FILE *fplog
,
4199 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4200 gmx_bool bHaveLimitdAndCMOld
,real limitd
,
4201 rvec cm_old
,rvec cm_new
,real pos_d
)
4203 gmx_domdec_comm_t
*comm
;
4208 fprintf(fplog
,"\nStep %s:\n",gmx_step_str(step
,buf
));
4209 if (bHaveLimitdAndCMOld
)
4211 fprintf(fplog
,"The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
4212 ddglatnr(dd
,dd
->cgindex
[cg
]),limitd
,dim2char(dim
));
4216 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4217 ddglatnr(dd
,dd
->cgindex
[cg
]),dim2char(dim
));
4219 fprintf(fplog
,"distance out of cell %f\n",
4220 dir
==1 ? pos_d
- comm
->cell_x1
[dim
] : pos_d
- comm
->cell_x0
[dim
]);
4221 if (bHaveLimitdAndCMOld
)
4223 fprintf(fplog
,"Old coordinates: %8.3f %8.3f %8.3f\n",
4224 cm_old
[XX
],cm_old
[YY
],cm_old
[ZZ
]);
4226 fprintf(fplog
,"New coordinates: %8.3f %8.3f %8.3f\n",
4227 cm_new
[XX
],cm_new
[YY
],cm_new
[ZZ
]);
4228 fprintf(fplog
,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4230 comm
->old_cell_x0
[dim
],comm
->old_cell_x1
[dim
]);
4231 fprintf(fplog
,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4233 comm
->cell_x0
[dim
],comm
->cell_x1
[dim
]);
4236 static void cg_move_error(FILE *fplog
,
4238 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4239 gmx_bool bHaveLimitdAndCMOld
,real limitd
,
4240 rvec cm_old
,rvec cm_new
,real pos_d
)
4244 print_cg_move(fplog
, dd
,step
,cg
,dim
,dir
,
4245 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4247 print_cg_move(stderr
,dd
,step
,cg
,dim
,dir
,
4248 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4250 "A charge group moved too far between two domain decomposition steps\n"
4251 "This usually means that your system is not well equilibrated");
4254 static void rotate_state_atom(t_state
*state
,int a
)
4258 for(est
=0; est
<estNR
; est
++)
4260 if (EST_DISTR(est
) && (state
->flags
& (1<<est
))) {
4263 /* Rotate the complete state; for a rectangular box only */
4264 state
->x
[a
][YY
] = state
->box
[YY
][YY
] - state
->x
[a
][YY
];
4265 state
->x
[a
][ZZ
] = state
->box
[ZZ
][ZZ
] - state
->x
[a
][ZZ
];
4268 state
->v
[a
][YY
] = -state
->v
[a
][YY
];
4269 state
->v
[a
][ZZ
] = -state
->v
[a
][ZZ
];
4272 state
->sd_X
[a
][YY
] = -state
->sd_X
[a
][YY
];
4273 state
->sd_X
[a
][ZZ
] = -state
->sd_X
[a
][ZZ
];
4276 state
->cg_p
[a
][YY
] = -state
->cg_p
[a
][YY
];
4277 state
->cg_p
[a
][ZZ
] = -state
->cg_p
[a
][ZZ
];
4279 case estDISRE_INITF
:
4280 case estDISRE_RM3TAV
:
4281 case estORIRE_INITF
:
4283 /* These are distances, so not affected by rotation */
4286 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4292 static int *get_moved(gmx_domdec_comm_t
*comm
,int natoms
)
4294 if (natoms
> comm
->moved_nalloc
)
4296 /* Contents should be preserved here */
4297 comm
->moved_nalloc
= over_alloc_dd(natoms
);
4298 srenew(comm
->moved
,comm
->moved_nalloc
);
4304 static void calc_cg_move(FILE *fplog
,gmx_large_int_t step
,
4307 ivec tric_dir
,matrix tcm
,
4308 rvec cell_x0
,rvec cell_x1
,
4309 rvec limitd
,rvec limit0
,rvec limit1
,
4311 int cg_start
,int cg_end
,
4316 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4317 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4324 npbcdim
= dd
->npbcdim
;
4326 for(cg
=cg_start
; cg
<cg_end
; cg
++)
4333 copy_rvec(state
->x
[k0
],cm_new
);
4340 for(k
=k0
; (k
<k1
); k
++)
4342 rvec_inc(cm_new
,state
->x
[k
]);
4344 for(d
=0; (d
<DIM
); d
++)
4346 cm_new
[d
] = inv_ncg
*cm_new
[d
];
4351 /* Do pbc and check DD cell boundary crossings */
4352 for(d
=DIM
-1; d
>=0; d
--)
4356 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
4357 /* Determine the location of this cg in lattice coordinates */
4361 for(d2
=d
+1; d2
<DIM
; d2
++)
4363 pos_d
+= cm_new
[d2
]*tcm
[d2
][d
];
4366 /* Put the charge group in the triclinic unit-cell */
4367 if (pos_d
>= cell_x1
[d
])
4369 if (pos_d
>= limit1
[d
])
4371 cg_move_error(fplog
,dd
,step
,cg
,d
,1,TRUE
,limitd
[d
],
4372 cg_cm
[cg
],cm_new
,pos_d
);
4375 if (dd
->ci
[d
] == dd
->nc
[d
] - 1)
4377 rvec_dec(cm_new
,state
->box
[d
]);
4380 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4381 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4383 for(k
=k0
; (k
<k1
); k
++)
4385 rvec_dec(state
->x
[k
],state
->box
[d
]);
4388 rotate_state_atom(state
,k
);
4393 else if (pos_d
< cell_x0
[d
])
4395 if (pos_d
< limit0
[d
])
4397 cg_move_error(fplog
,dd
,step
,cg
,d
,-1,TRUE
,limitd
[d
],
4398 cg_cm
[cg
],cm_new
,pos_d
);
4403 rvec_inc(cm_new
,state
->box
[d
]);
4406 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4407 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4409 for(k
=k0
; (k
<k1
); k
++)
4411 rvec_inc(state
->x
[k
],state
->box
[d
]);
4414 rotate_state_atom(state
,k
);
4420 else if (d
< npbcdim
)
4422 /* Put the charge group in the rectangular unit-cell */
4423 while (cm_new
[d
] >= state
->box
[d
][d
])
4425 rvec_dec(cm_new
,state
->box
[d
]);
4426 for(k
=k0
; (k
<k1
); k
++)
4428 rvec_dec(state
->x
[k
],state
->box
[d
]);
4431 while (cm_new
[d
] < 0)
4433 rvec_inc(cm_new
,state
->box
[d
]);
4434 for(k
=k0
; (k
<k1
); k
++)
4436 rvec_inc(state
->x
[k
],state
->box
[d
]);
4442 copy_rvec(cm_new
,cg_cm
[cg
]);
4444 /* Determine where this cg should go */
4447 for(d
=0; d
<dd
->ndim
; d
++)
4452 flag
|= DD_FLAG_FW(d
);
4458 else if (dev
[dim
] == -1)
4460 flag
|= DD_FLAG_BW(d
);
4462 if (dd
->nc
[dim
] > 2)
4473 /* Temporarily store the flag in move */
4474 move
[cg
] = mc
+ flag
;
4478 static void dd_redistribute_cg(FILE *fplog
,gmx_large_int_t step
,
4479 gmx_domdec_t
*dd
,ivec tric_dir
,
4480 t_state
*state
,rvec
**f
,
4481 t_forcerec
*fr
,t_mdatoms
*md
,
4489 int ncg
[DIM
*2],nat
[DIM
*2];
4490 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4491 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4492 int sbuf
[2],rbuf
[2];
4493 int home_pos_cg
,home_pos_at
,buf_pos
;
4495 gmx_bool bV
=FALSE
,bSDX
=FALSE
,bCGP
=FALSE
;
4500 rvec
*cg_cm
=NULL
,cell_x0
,cell_x1
,limitd
,limit0
,limit1
,cm_new
;
4502 cginfo_mb_t
*cginfo_mb
;
4503 gmx_domdec_comm_t
*comm
;
4509 check_screw_box(state
->box
);
4513 if (fr
->cutoff_scheme
== ecutsGROUP
)
4518 for(i
=0; i
<estNR
; i
++)
4524 case estX
: /* Always present */ break;
4525 case estV
: bV
= (state
->flags
& (1<<i
)); break;
4526 case estSDX
: bSDX
= (state
->flags
& (1<<i
)); break;
4527 case estCGP
: bCGP
= (state
->flags
& (1<<i
)); break;
4530 case estDISRE_INITF
:
4531 case estDISRE_RM3TAV
:
4532 case estORIRE_INITF
:
4534 /* No processing required */
4537 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4542 if (dd
->ncg_tot
> comm
->nalloc_int
)
4544 comm
->nalloc_int
= over_alloc_dd(dd
->ncg_tot
);
4545 srenew(comm
->buf_int
,comm
->nalloc_int
);
4547 move
= comm
->buf_int
;
4549 /* Clear the count */
4550 for(c
=0; c
<dd
->ndim
*2; c
++)
4556 npbcdim
= dd
->npbcdim
;
4558 for(d
=0; (d
<DIM
); d
++)
4560 limitd
[d
] = dd
->comm
->cellsize_min
[d
];
4561 if (d
>= npbcdim
&& dd
->ci
[d
] == 0)
4563 cell_x0
[d
] = -GMX_FLOAT_MAX
;
4567 cell_x0
[d
] = comm
->cell_x0
[d
];
4569 if (d
>= npbcdim
&& dd
->ci
[d
] == dd
->nc
[d
] - 1)
4571 cell_x1
[d
] = GMX_FLOAT_MAX
;
4575 cell_x1
[d
] = comm
->cell_x1
[d
];
4579 limit0
[d
] = comm
->old_cell_x0
[d
] - limitd
[d
];
4580 limit1
[d
] = comm
->old_cell_x1
[d
] + limitd
[d
];
4584 /* We check after communication if a charge group moved
4585 * more than one cell. Set the pre-comm check limit to float_max.
4587 limit0
[d
] = -GMX_FLOAT_MAX
;
4588 limit1
[d
] = GMX_FLOAT_MAX
;
4592 make_tric_corr_matrix(npbcdim
,state
->box
,tcm
);
4594 cgindex
= dd
->cgindex
;
4596 nthread
= gmx_omp_nthreads_get(emntDomdec
);
4598 /* Compute the center of geometry for all home charge groups
4599 * and put them in the box and determine where they should go.
4601 #pragma omp parallel for num_threads(nthread) schedule(static)
4602 for(thread
=0; thread
<nthread
; thread
++)
4604 calc_cg_move(fplog
,step
,dd
,state
,tric_dir
,tcm
,
4605 cell_x0
,cell_x1
,limitd
,limit0
,limit1
,
4607 ( thread
*dd
->ncg_home
)/nthread
,
4608 ((thread
+1)*dd
->ncg_home
)/nthread
,
4609 fr
->cutoff_scheme
==ecutsGROUP
? cg_cm
: state
->x
,
4613 for(cg
=0; cg
<dd
->ncg_home
; cg
++)
4618 flag
= mc
& ~DD_FLAG_NRCG
;
4619 mc
= mc
& DD_FLAG_NRCG
;
4622 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4624 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4625 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4627 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
] = dd
->index_gl
[cg
];
4628 /* We store the cg size in the lower 16 bits
4629 * and the place where the charge group should go
4630 * in the next 6 bits. This saves some communication volume.
4632 nrcg
= cgindex
[cg
+1] - cgindex
[cg
];
4633 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
+1] = nrcg
| flag
;
4639 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
4640 inc_nrnb(nrnb
,eNR_RESETX
,dd
->ncg_home
);
4643 for(i
=0; i
<dd
->ndim
*2; i
++)
4645 *ncg_moved
+= ncg
[i
];
4662 /* Make sure the communication buffers are large enough */
4663 for(mc
=0; mc
<dd
->ndim
*2; mc
++)
4665 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4666 if (nvr
> comm
->cgcm_state_nalloc
[mc
])
4668 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
);
4669 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4673 switch (fr
->cutoff_scheme
)
4676 /* Recalculating cg_cm might be cheaper than communicating,
4677 * but that could give rise to rounding issues.
4680 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4681 nvec
,cg_cm
,comm
,bCompact
);
4684 /* Without charge groups we send the moved atom coordinates
4685 * over twice. This is so the code below can be used without
4686 * many conditionals for both for with and without charge groups.
4689 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4690 nvec
,state
->x
,comm
,FALSE
);
4693 home_pos_cg
-= *ncg_moved
;
4697 gmx_incons("unimplemented");
4703 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4704 nvec
,vec
++,state
->x
,comm
,bCompact
);
4707 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4708 nvec
,vec
++,state
->v
,comm
,bCompact
);
4712 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4713 nvec
,vec
++,state
->sd_X
,comm
,bCompact
);
4717 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4718 nvec
,vec
++,state
->cg_p
,comm
,bCompact
);
4723 compact_ind(dd
->ncg_home
,move
,
4724 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4725 dd
->ga2la
,comm
->bLocalCG
,
4730 if (fr
->cutoff_scheme
== ecutsVERLET
)
4732 moved
= get_moved(comm
,dd
->ncg_home
);
4734 for(k
=0; k
<dd
->ncg_home
; k
++)
4741 moved
= fr
->ns
.grid
->cell_index
;
4744 clear_and_mark_ind(dd
->ncg_home
,move
,
4745 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4746 dd
->ga2la
,comm
->bLocalCG
,
4750 cginfo_mb
= fr
->cginfo_mb
;
4752 *ncg_stay_home
= home_pos_cg
;
4753 for(d
=0; d
<dd
->ndim
; d
++)
4759 for(dir
=0; dir
<(dd
->nc
[dim
]==2 ? 1 : 2); dir
++)
4762 /* Communicate the cg and atom counts */
4767 fprintf(debug
,"Sending ddim %d dir %d: ncg %d nat %d\n",
4768 d
,dir
,sbuf
[0],sbuf
[1]);
4770 dd_sendrecv_int(dd
, d
, dir
, sbuf
, 2, rbuf
, 2);
4772 if ((ncg_recv
+rbuf
[0])*DD_CGIBS
> comm
->nalloc_int
)
4774 comm
->nalloc_int
= over_alloc_dd((ncg_recv
+rbuf
[0])*DD_CGIBS
);
4775 srenew(comm
->buf_int
,comm
->nalloc_int
);
4778 /* Communicate the charge group indices, sizes and flags */
4779 dd_sendrecv_int(dd
, d
, dir
,
4780 comm
->cggl_flag
[cdd
], sbuf
[0]*DD_CGIBS
,
4781 comm
->buf_int
+ncg_recv
*DD_CGIBS
, rbuf
[0]*DD_CGIBS
);
4783 nvs
= ncg
[cdd
] + nat
[cdd
]*nvec
;
4784 i
= rbuf
[0] + rbuf
[1] *nvec
;
4785 vec_rvec_check_alloc(&comm
->vbuf
,nvr
+i
);
4787 /* Communicate cgcm and state */
4788 dd_sendrecv_rvec(dd
, d
, dir
,
4789 comm
->cgcm_state
[cdd
], nvs
,
4790 comm
->vbuf
.v
+nvr
, i
);
4791 ncg_recv
+= rbuf
[0];
4792 nat_recv
+= rbuf
[1];
4796 /* Process the received charge groups */
4798 for(cg
=0; cg
<ncg_recv
; cg
++)
4800 flag
= comm
->buf_int
[cg
*DD_CGIBS
+1];
4802 if (dim
>= npbcdim
&& dd
->nc
[dim
] > 2)
4804 /* No pbc in this dim and more than one domain boundary.
4805 * We do a separate check if a charge group didn't move too far.
4807 if (((flag
& DD_FLAG_FW(d
)) &&
4808 comm
->vbuf
.v
[buf_pos
][dim
] > cell_x1
[dim
]) ||
4809 ((flag
& DD_FLAG_BW(d
)) &&
4810 comm
->vbuf
.v
[buf_pos
][dim
] < cell_x0
[dim
]))
4812 cg_move_error(fplog
,dd
,step
,cg
,dim
,
4813 (flag
& DD_FLAG_FW(d
)) ? 1 : 0,
4815 comm
->vbuf
.v
[buf_pos
],
4816 comm
->vbuf
.v
[buf_pos
],
4817 comm
->vbuf
.v
[buf_pos
][dim
]);
4824 /* Check which direction this cg should go */
4825 for(d2
=d
+1; (d2
<dd
->ndim
&& mc
==-1); d2
++)
4829 /* The cell boundaries for dimension d2 are not equal
4830 * for each cell row of the lower dimension(s),
4831 * therefore we might need to redetermine where
4832 * this cg should go.
4835 /* If this cg crosses the box boundary in dimension d2
4836 * we can use the communicated flag, so we do not
4837 * have to worry about pbc.
4839 if (!((dd
->ci
[dim2
] == dd
->nc
[dim2
]-1 &&
4840 (flag
& DD_FLAG_FW(d2
))) ||
4841 (dd
->ci
[dim2
] == 0 &&
4842 (flag
& DD_FLAG_BW(d2
)))))
4844 /* Clear the two flags for this dimension */
4845 flag
&= ~(DD_FLAG_FW(d2
) | DD_FLAG_BW(d2
));
4846 /* Determine the location of this cg
4847 * in lattice coordinates
4849 pos_d
= comm
->vbuf
.v
[buf_pos
][dim2
];
4852 for(d3
=dim2
+1; d3
<DIM
; d3
++)
4855 comm
->vbuf
.v
[buf_pos
][d3
]*tcm
[d3
][dim2
];
4858 /* Check of we are not at the box edge.
4859 * pbc is only handled in the first step above,
4860 * but this check could move over pbc while
4861 * the first step did not due to different rounding.
4863 if (pos_d
>= cell_x1
[dim2
] &&
4864 dd
->ci
[dim2
] != dd
->nc
[dim2
]-1)
4866 flag
|= DD_FLAG_FW(d2
);
4868 else if (pos_d
< cell_x0
[dim2
] &&
4871 flag
|= DD_FLAG_BW(d2
);
4873 comm
->buf_int
[cg
*DD_CGIBS
+1] = flag
;
4876 /* Set to which neighboring cell this cg should go */
4877 if (flag
& DD_FLAG_FW(d2
))
4881 else if (flag
& DD_FLAG_BW(d2
))
4883 if (dd
->nc
[dd
->dim
[d2
]] > 2)
4895 nrcg
= flag
& DD_FLAG_NRCG
;
4898 if (home_pos_cg
+1 > dd
->cg_nalloc
)
4900 dd
->cg_nalloc
= over_alloc_dd(home_pos_cg
+1);
4901 srenew(dd
->index_gl
,dd
->cg_nalloc
);
4902 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
4904 /* Set the global charge group index and size */
4905 dd
->index_gl
[home_pos_cg
] = comm
->buf_int
[cg
*DD_CGIBS
];
4906 dd
->cgindex
[home_pos_cg
+1] = dd
->cgindex
[home_pos_cg
] + nrcg
;
4907 /* Copy the state from the buffer */
4908 dd_check_alloc_ncg(fr
,state
,f
,home_pos_cg
+1);
4909 if (fr
->cutoff_scheme
== ecutsGROUP
)
4912 copy_rvec(comm
->vbuf
.v
[buf_pos
],cg_cm
[home_pos_cg
]);
4916 /* Set the cginfo */
4917 fr
->cginfo
[home_pos_cg
] = ddcginfo(cginfo_mb
,
4918 dd
->index_gl
[home_pos_cg
]);
4921 comm
->bLocalCG
[dd
->index_gl
[home_pos_cg
]] = TRUE
;
4924 if (home_pos_at
+nrcg
> state
->nalloc
)
4926 dd_realloc_state(state
,f
,home_pos_at
+nrcg
);
4928 for(i
=0; i
<nrcg
; i
++)
4930 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4931 state
->x
[home_pos_at
+i
]);
4935 for(i
=0; i
<nrcg
; i
++)
4937 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4938 state
->v
[home_pos_at
+i
]);
4943 for(i
=0; i
<nrcg
; i
++)
4945 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4946 state
->sd_X
[home_pos_at
+i
]);
4951 for(i
=0; i
<nrcg
; i
++)
4953 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4954 state
->cg_p
[home_pos_at
+i
]);
4958 home_pos_at
+= nrcg
;
4962 /* Reallocate the buffers if necessary */
4963 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4965 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4966 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4968 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4969 if (nvr
+ 1 + nrcg
*nvec
> comm
->cgcm_state_nalloc
[mc
])
4971 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
+ 1 + nrcg
*nvec
);
4972 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4974 /* Copy from the receive to the send buffers */
4975 memcpy(comm
->cggl_flag
[mc
] + ncg
[mc
]*DD_CGIBS
,
4976 comm
->buf_int
+ cg
*DD_CGIBS
,
4977 DD_CGIBS
*sizeof(int));
4978 memcpy(comm
->cgcm_state
[mc
][nvr
],
4979 comm
->vbuf
.v
[buf_pos
],
4980 (1+nrcg
*nvec
)*sizeof(rvec
));
4981 buf_pos
+= 1 + nrcg
*nvec
;
4988 /* With sorting (!bCompact) the indices are now only partially up to date
4989 * and ncg_home and nat_home are not the real count, since there are
4990 * "holes" in the arrays for the charge groups that moved to neighbors.
4992 if (fr
->cutoff_scheme
== ecutsVERLET
)
4994 moved
= get_moved(comm
,home_pos_cg
);
4996 for(i
=dd
->ncg_home
; i
<home_pos_cg
; i
++)
5001 dd
->ncg_home
= home_pos_cg
;
5002 dd
->nat_home
= home_pos_at
;
5007 "Finished repartitioning: cgs moved out %d, new home %d\n",
5008 *ncg_moved
,dd
->ncg_home
-*ncg_moved
);
5013 void dd_cycles_add(gmx_domdec_t
*dd
,float cycles
,int ddCycl
)
5015 dd
->comm
->cycl
[ddCycl
] += cycles
;
5016 dd
->comm
->cycl_n
[ddCycl
]++;
5017 if (cycles
> dd
->comm
->cycl_max
[ddCycl
])
5019 dd
->comm
->cycl_max
[ddCycl
] = cycles
;
5023 static double force_flop_count(t_nrnb
*nrnb
)
5030 for(i
=0; i
<eNR_NBKERNEL_FREE_ENERGY
; i
++)
5032 /* To get closer to the real timings, we half the count
5033 * for the normal loops and again half it for water loops.
5036 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
5038 sum
+= nrnb
->n
[i
]*0.25*cost_nrnb(i
);
5042 sum
+= nrnb
->n
[i
]*0.50*cost_nrnb(i
);
5045 for(i
=eNR_NBKERNEL_FREE_ENERGY
; i
<=eNR_NB14
; i
++)
5048 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
5049 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
5051 for(i
=eNR_BONDS
; i
<=eNR_WALLS
; i
++)
5053 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
5059 void dd_force_flop_start(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
5061 if (dd
->comm
->eFlop
)
5063 dd
->comm
->flop
-= force_flop_count(nrnb
);
5066 void dd_force_flop_stop(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
5068 if (dd
->comm
->eFlop
)
5070 dd
->comm
->flop
+= force_flop_count(nrnb
);
5075 static void clear_dd_cycle_counts(gmx_domdec_t
*dd
)
5079 for(i
=0; i
<ddCyclNr
; i
++)
5081 dd
->comm
->cycl
[i
] = 0;
5082 dd
->comm
->cycl_n
[i
] = 0;
5083 dd
->comm
->cycl_max
[i
] = 0;
5086 dd
->comm
->flop_n
= 0;
5089 static void get_load_distribution(gmx_domdec_t
*dd
,gmx_wallcycle_t wcycle
)
5091 gmx_domdec_comm_t
*comm
;
5092 gmx_domdec_load_t
*load
;
5093 gmx_domdec_root_t
*root
=NULL
;
5094 int d
,dim
,cid
,i
,pos
;
5095 float cell_frac
=0,sbuf
[DD_NLOAD_MAX
];
5100 fprintf(debug
,"get_load_distribution start\n");
5103 wallcycle_start(wcycle
,ewcDDCOMMLOAD
);
5107 bSepPME
= (dd
->pme_nodeid
>= 0);
5109 for(d
=dd
->ndim
-1; d
>=0; d
--)
5112 /* Check if we participate in the communication in this dimension */
5113 if (d
== dd
->ndim
-1 ||
5114 (dd
->ci
[dd
->dim
[d
+1]]==0 && dd
->ci
[dd
->dim
[dd
->ndim
-1]]==0))
5116 load
= &comm
->load
[d
];
5119 cell_frac
= comm
->cell_f1
[d
] - comm
->cell_f0
[d
];
5122 if (d
== dd
->ndim
-1)
5124 sbuf
[pos
++] = dd_force_load(comm
);
5125 sbuf
[pos
++] = sbuf
[0];
5128 sbuf
[pos
++] = sbuf
[0];
5129 sbuf
[pos
++] = cell_frac
;
5132 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
5133 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
5138 sbuf
[pos
++] = comm
->cycl
[ddCyclPPduringPME
];
5139 sbuf
[pos
++] = comm
->cycl
[ddCyclPME
];
5144 sbuf
[pos
++] = comm
->load
[d
+1].sum
;
5145 sbuf
[pos
++] = comm
->load
[d
+1].max
;
5148 sbuf
[pos
++] = comm
->load
[d
+1].sum_m
;
5149 sbuf
[pos
++] = comm
->load
[d
+1].cvol_min
*cell_frac
;
5150 sbuf
[pos
++] = comm
->load
[d
+1].flags
;
5153 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
5154 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
5159 sbuf
[pos
++] = comm
->load
[d
+1].mdf
;
5160 sbuf
[pos
++] = comm
->load
[d
+1].pme
;
5164 /* Communicate a row in DD direction d.
5165 * The communicators are setup such that the root always has rank 0.
5168 MPI_Gather(sbuf
,load
->nload
*sizeof(float),MPI_BYTE
,
5169 load
->load
,load
->nload
*sizeof(float),MPI_BYTE
,
5170 0,comm
->mpi_comm_load
[d
]);
5172 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5174 /* We are the root, process this row */
5175 if (comm
->bDynLoadBal
)
5177 root
= comm
->root
[d
];
5187 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5189 load
->sum
+= load
->load
[pos
++];
5190 load
->max
= max(load
->max
,load
->load
[pos
]);
5196 /* This direction could not be load balanced properly,
5197 * therefore we need to use the maximum iso the average load.
5199 load
->sum_m
= max(load
->sum_m
,load
->load
[pos
]);
5203 load
->sum_m
+= load
->load
[pos
];
5206 load
->cvol_min
= min(load
->cvol_min
,load
->load
[pos
]);
5210 load
->flags
= (int)(load
->load
[pos
++] + 0.5);
5214 root
->cell_f_max0
[i
] = load
->load
[pos
++];
5215 root
->cell_f_min1
[i
] = load
->load
[pos
++];
5220 load
->mdf
= max(load
->mdf
,load
->load
[pos
]);
5222 load
->pme
= max(load
->pme
,load
->load
[pos
]);
5226 if (comm
->bDynLoadBal
&& root
->bLimited
)
5228 load
->sum_m
*= dd
->nc
[dim
];
5229 load
->flags
|= (1<<d
);
5237 comm
->nload
+= dd_load_count(comm
);
5238 comm
->load_step
+= comm
->cycl
[ddCyclStep
];
5239 comm
->load_sum
+= comm
->load
[0].sum
;
5240 comm
->load_max
+= comm
->load
[0].max
;
5241 if (comm
->bDynLoadBal
)
5243 for(d
=0; d
<dd
->ndim
; d
++)
5245 if (comm
->load
[0].flags
& (1<<d
))
5247 comm
->load_lim
[d
]++;
5253 comm
->load_mdf
+= comm
->load
[0].mdf
;
5254 comm
->load_pme
+= comm
->load
[0].pme
;
5258 wallcycle_stop(wcycle
,ewcDDCOMMLOAD
);
5262 fprintf(debug
,"get_load_distribution finished\n");
5266 static float dd_force_imb_perf_loss(gmx_domdec_t
*dd
)
5268 /* Return the relative performance loss on the total run time
5269 * due to the force calculation load imbalance.
5271 if (dd
->comm
->nload
> 0)
5274 (dd
->comm
->load_max
*dd
->nnodes
- dd
->comm
->load_sum
)/
5275 (dd
->comm
->load_step
*dd
->nnodes
);
5283 static void print_dd_load_av(FILE *fplog
,gmx_domdec_t
*dd
)
5286 int npp
,npme
,nnodes
,d
,limp
;
5287 float imbal
,pme_f_ratio
,lossf
,lossp
=0;
5289 gmx_domdec_comm_t
*comm
;
5292 if (DDMASTER(dd
) && comm
->nload
> 0)
5295 npme
= (dd
->pme_nodeid
>= 0) ? comm
->npmenodes
: 0;
5296 nnodes
= npp
+ npme
;
5297 imbal
= comm
->load_max
*npp
/comm
->load_sum
- 1;
5298 lossf
= dd_force_imb_perf_loss(dd
);
5299 sprintf(buf
," Average load imbalance: %.1f %%\n",imbal
*100);
5300 fprintf(fplog
,"%s",buf
);
5301 fprintf(stderr
,"\n");
5302 fprintf(stderr
,"%s",buf
);
5303 sprintf(buf
," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf
*100);
5304 fprintf(fplog
,"%s",buf
);
5305 fprintf(stderr
,"%s",buf
);
5307 if (comm
->bDynLoadBal
)
5309 sprintf(buf
," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5310 for(d
=0; d
<dd
->ndim
; d
++)
5312 limp
= (200*comm
->load_lim
[d
]+1)/(2*comm
->nload
);
5313 sprintf(buf
+strlen(buf
)," %c %d %%",dim2char(dd
->dim
[d
]),limp
);
5319 sprintf(buf
+strlen(buf
),"\n");
5320 fprintf(fplog
,"%s",buf
);
5321 fprintf(stderr
,"%s",buf
);
5325 pme_f_ratio
= comm
->load_pme
/comm
->load_mdf
;
5326 lossp
= (comm
->load_pme
-comm
->load_mdf
)/comm
->load_step
;
5329 lossp
*= (float)npme
/(float)nnodes
;
5333 lossp
*= (float)npp
/(float)nnodes
;
5335 sprintf(buf
," Average PME mesh/force load: %5.3f\n",pme_f_ratio
);
5336 fprintf(fplog
,"%s",buf
);
5337 fprintf(stderr
,"%s",buf
);
5338 sprintf(buf
," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp
)*100);
5339 fprintf(fplog
,"%s",buf
);
5340 fprintf(stderr
,"%s",buf
);
5342 fprintf(fplog
,"\n");
5343 fprintf(stderr
,"\n");
5345 if (lossf
>= DD_PERF_LOSS
)
5348 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5349 " in the domain decomposition.\n",lossf
*100);
5350 if (!comm
->bDynLoadBal
)
5352 sprintf(buf
+strlen(buf
)," You might want to use dynamic load balancing (option -dlb.)\n");
5356 sprintf(buf
+strlen(buf
)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5358 fprintf(fplog
,"%s\n",buf
);
5359 fprintf(stderr
,"%s\n",buf
);
5361 if (npme
> 0 && fabs(lossp
) >= DD_PERF_LOSS
)
5364 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5365 " had %s work to do than the PP nodes.\n"
5366 " You might want to %s the number of PME nodes\n"
5367 " or %s the cut-off and the grid spacing.\n",
5369 (lossp
< 0) ? "less" : "more",
5370 (lossp
< 0) ? "decrease" : "increase",
5371 (lossp
< 0) ? "decrease" : "increase");
5372 fprintf(fplog
,"%s\n",buf
);
5373 fprintf(stderr
,"%s\n",buf
);
5378 static float dd_vol_min(gmx_domdec_t
*dd
)
5380 return dd
->comm
->load
[0].cvol_min
*dd
->nnodes
;
5383 static gmx_bool
dd_load_flags(gmx_domdec_t
*dd
)
5385 return dd
->comm
->load
[0].flags
;
5388 static float dd_f_imbal(gmx_domdec_t
*dd
)
5390 return dd
->comm
->load
[0].max
*dd
->nnodes
/dd
->comm
->load
[0].sum
- 1;
5393 float dd_pme_f_ratio(gmx_domdec_t
*dd
)
5395 if (dd
->comm
->cycl_n
[ddCyclPME
] > 0)
5397 return dd
->comm
->load
[0].pme
/dd
->comm
->load
[0].mdf
;
5405 static void dd_print_load(FILE *fplog
,gmx_domdec_t
*dd
,gmx_large_int_t step
)
5410 flags
= dd_load_flags(dd
);
5414 "DD load balancing is limited by minimum cell size in dimension");
5415 for(d
=0; d
<dd
->ndim
; d
++)
5419 fprintf(fplog
," %c",dim2char(dd
->dim
[d
]));
5422 fprintf(fplog
,"\n");
5424 fprintf(fplog
,"DD step %s",gmx_step_str(step
,buf
));
5425 if (dd
->comm
->bDynLoadBal
)
5427 fprintf(fplog
," vol min/aver %5.3f%c",
5428 dd_vol_min(dd
),flags
? '!' : ' ');
5430 fprintf(fplog
," load imb.: force %4.1f%%",dd_f_imbal(dd
)*100);
5431 if (dd
->comm
->cycl_n
[ddCyclPME
])
5433 fprintf(fplog
," pme mesh/force %5.3f",dd_pme_f_ratio(dd
));
5435 fprintf(fplog
,"\n\n");
5438 static void dd_print_load_verbose(gmx_domdec_t
*dd
)
5440 if (dd
->comm
->bDynLoadBal
)
5442 fprintf(stderr
,"vol %4.2f%c ",
5443 dd_vol_min(dd
),dd_load_flags(dd
) ? '!' : ' ');
5445 fprintf(stderr
,"imb F %2d%% ",(int)(dd_f_imbal(dd
)*100+0.5));
5446 if (dd
->comm
->cycl_n
[ddCyclPME
])
5448 fprintf(stderr
,"pme/F %4.2f ",dd_pme_f_ratio(dd
));
5453 static void make_load_communicator(gmx_domdec_t
*dd
, int dim_ind
,ivec loc
)
5458 gmx_domdec_root_t
*root
;
5459 gmx_bool bPartOfGroup
= FALSE
;
5461 dim
= dd
->dim
[dim_ind
];
5462 copy_ivec(loc
,loc_c
);
5463 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5466 rank
= dd_index(dd
->nc
,loc_c
);
5467 if (rank
== dd
->rank
)
5469 /* This process is part of the group */
5470 bPartOfGroup
= TRUE
;
5473 MPI_Comm_split(dd
->mpi_comm_all
, bPartOfGroup
?0:MPI_UNDEFINED
, dd
->rank
,
5477 dd
->comm
->mpi_comm_load
[dim_ind
] = c_row
;
5478 if (dd
->comm
->eDLB
!= edlbNO
)
5480 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5482 /* This is the root process of this row */
5483 snew(dd
->comm
->root
[dim_ind
],1);
5484 root
= dd
->comm
->root
[dim_ind
];
5485 snew(root
->cell_f
,DD_CELL_F_SIZE(dd
,dim_ind
));
5486 snew(root
->old_cell_f
,dd
->nc
[dim
]+1);
5487 snew(root
->bCellMin
,dd
->nc
[dim
]);
5490 snew(root
->cell_f_max0
,dd
->nc
[dim
]);
5491 snew(root
->cell_f_min1
,dd
->nc
[dim
]);
5492 snew(root
->bound_min
,dd
->nc
[dim
]);
5493 snew(root
->bound_max
,dd
->nc
[dim
]);
5495 snew(root
->buf_ncd
,dd
->nc
[dim
]);
5499 /* This is not a root process, we only need to receive cell_f */
5500 snew(dd
->comm
->cell_f_row
,DD_CELL_F_SIZE(dd
,dim_ind
));
5503 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5505 snew(dd
->comm
->load
[dim_ind
].load
,dd
->nc
[dim
]*DD_NLOAD_MAX
);
5511 static void make_load_communicators(gmx_domdec_t
*dd
)
5518 fprintf(debug
,"Making load communicators\n");
5520 snew(dd
->comm
->load
,dd
->ndim
);
5521 snew(dd
->comm
->mpi_comm_load
,dd
->ndim
);
5524 make_load_communicator(dd
,0,loc
);
5527 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5529 make_load_communicator(dd
,1,loc
);
5534 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5537 for(j
=0; j
<dd
->nc
[dim1
]; j
++) {
5539 make_load_communicator(dd
,2,loc
);
5545 fprintf(debug
,"Finished making load communicators\n");
5549 void setup_dd_grid(FILE *fplog
,gmx_domdec_t
*dd
)
5555 ivec dd_zp
[DD_MAXIZONE
];
5556 gmx_domdec_zones_t
*zones
;
5557 gmx_domdec_ns_ranges_t
*izone
;
5559 for(d
=0; d
<dd
->ndim
; d
++)
5562 copy_ivec(dd
->ci
,tmp
);
5563 tmp
[dim
] = (tmp
[dim
] + 1) % dd
->nc
[dim
];
5564 dd
->neighbor
[d
][0] = ddcoord2ddnodeid(dd
,tmp
);
5565 copy_ivec(dd
->ci
,tmp
);
5566 tmp
[dim
] = (tmp
[dim
] - 1 + dd
->nc
[dim
]) % dd
->nc
[dim
];
5567 dd
->neighbor
[d
][1] = ddcoord2ddnodeid(dd
,tmp
);
5570 fprintf(debug
,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5573 dd
->neighbor
[d
][1]);
5579 fprintf(stderr
,"Making %dD domain decomposition %d x %d x %d\n",
5580 dd
->ndim
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5584 fprintf(fplog
,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5586 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],
5587 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5594 for(i
=0; i
<nzonep
; i
++)
5596 copy_ivec(dd_zp3
[i
],dd_zp
[i
]);
5602 for(i
=0; i
<nzonep
; i
++)
5604 copy_ivec(dd_zp2
[i
],dd_zp
[i
]);
5610 for(i
=0; i
<nzonep
; i
++)
5612 copy_ivec(dd_zp1
[i
],dd_zp
[i
]);
5616 gmx_fatal(FARGS
,"Can only do 1, 2 or 3D domain decomposition");
5621 zones
= &dd
->comm
->zones
;
5623 for(i
=0; i
<nzone
; i
++)
5626 clear_ivec(zones
->shift
[i
]);
5627 for(d
=0; d
<dd
->ndim
; d
++)
5629 zones
->shift
[i
][dd
->dim
[d
]] = dd_zo
[i
][m
++];
5634 for(i
=0; i
<nzone
; i
++)
5636 for(d
=0; d
<DIM
; d
++)
5638 s
[d
] = dd
->ci
[d
] - zones
->shift
[i
][d
];
5643 else if (s
[d
] >= dd
->nc
[d
])
5649 zones
->nizone
= nzonep
;
5650 for(i
=0; i
<zones
->nizone
; i
++)
5652 if (dd_zp
[i
][0] != i
)
5654 gmx_fatal(FARGS
,"Internal inconsistency in the dd grid setup");
5656 izone
= &zones
->izone
[i
];
5657 izone
->j0
= dd_zp
[i
][1];
5658 izone
->j1
= dd_zp
[i
][2];
5659 for(dim
=0; dim
<DIM
; dim
++)
5661 if (dd
->nc
[dim
] == 1)
5663 /* All shifts should be allowed */
5664 izone
->shift0
[dim
] = -1;
5665 izone
->shift1
[dim
] = 1;
5670 izone->shift0[d] = 0;
5671 izone->shift1[d] = 0;
5672 for(j=izone->j0; j<izone->j1; j++) {
5673 if (dd->shift[j][d] > dd->shift[i][d])
5674 izone->shift0[d] = -1;
5675 if (dd->shift[j][d] < dd->shift[i][d])
5676 izone->shift1[d] = 1;
5682 /* Assume the shift are not more than 1 cell */
5683 izone
->shift0
[dim
] = 1;
5684 izone
->shift1
[dim
] = -1;
5685 for(j
=izone
->j0
; j
<izone
->j1
; j
++)
5687 shift_diff
= zones
->shift
[j
][dim
] - zones
->shift
[i
][dim
];
5688 if (shift_diff
< izone
->shift0
[dim
])
5690 izone
->shift0
[dim
] = shift_diff
;
5692 if (shift_diff
> izone
->shift1
[dim
])
5694 izone
->shift1
[dim
] = shift_diff
;
5701 if (dd
->comm
->eDLB
!= edlbNO
)
5703 snew(dd
->comm
->root
,dd
->ndim
);
5706 if (dd
->comm
->bRecordLoad
)
5708 make_load_communicators(dd
);
5712 static void make_pp_communicator(FILE *fplog
,t_commrec
*cr
,int reorder
)
5715 gmx_domdec_comm_t
*comm
;
5726 if (comm
->bCartesianPP
)
5728 /* Set up cartesian communication for the particle-particle part */
5731 fprintf(fplog
,"Will use a Cartesian communicator: %d x %d x %d\n",
5732 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5735 for(i
=0; i
<DIM
; i
++)
5739 MPI_Cart_create(cr
->mpi_comm_mygroup
,DIM
,dd
->nc
,periods
,reorder
,
5741 /* We overwrite the old communicator with the new cartesian one */
5742 cr
->mpi_comm_mygroup
= comm_cart
;
5745 dd
->mpi_comm_all
= cr
->mpi_comm_mygroup
;
5746 MPI_Comm_rank(dd
->mpi_comm_all
,&dd
->rank
);
5748 if (comm
->bCartesianPP_PME
)
5750 /* Since we want to use the original cartesian setup for sim,
5751 * and not the one after split, we need to make an index.
5753 snew(comm
->ddindex2ddnodeid
,dd
->nnodes
);
5754 comm
->ddindex2ddnodeid
[dd_index(dd
->nc
,dd
->ci
)] = dd
->rank
;
5755 gmx_sumi(dd
->nnodes
,comm
->ddindex2ddnodeid
,cr
);
5756 /* Get the rank of the DD master,
5757 * above we made sure that the master node is a PP node.
5767 MPI_Allreduce(&rank
,&dd
->masterrank
,1,MPI_INT
,MPI_SUM
,dd
->mpi_comm_all
);
5769 else if (comm
->bCartesianPP
)
5771 if (cr
->npmenodes
== 0)
5773 /* The PP communicator is also
5774 * the communicator for this simulation
5776 cr
->mpi_comm_mysim
= cr
->mpi_comm_mygroup
;
5778 cr
->nodeid
= dd
->rank
;
5780 MPI_Cart_coords(dd
->mpi_comm_all
,dd
->rank
,DIM
,dd
->ci
);
5782 /* We need to make an index to go from the coordinates
5783 * to the nodeid of this simulation.
5785 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5786 snew(buf
,dd
->nnodes
);
5787 if (cr
->duty
& DUTY_PP
)
5789 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5791 /* Communicate the ddindex to simulation nodeid index */
5792 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5793 cr
->mpi_comm_mysim
);
5796 /* Determine the master coordinates and rank.
5797 * The DD master should be the same node as the master of this sim.
5799 for(i
=0; i
<dd
->nnodes
; i
++)
5801 if (comm
->ddindex2simnodeid
[i
] == 0)
5803 ddindex2xyz(dd
->nc
,i
,dd
->master_ci
);
5804 MPI_Cart_rank(dd
->mpi_comm_all
,dd
->master_ci
,&dd
->masterrank
);
5809 fprintf(debug
,"The master rank is %d\n",dd
->masterrank
);
5814 /* No Cartesian communicators */
5815 /* We use the rank in dd->comm->all as DD index */
5816 ddindex2xyz(dd
->nc
,dd
->rank
,dd
->ci
);
5817 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5819 clear_ivec(dd
->master_ci
);
5826 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5827 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5832 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5833 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5837 static void receive_ddindex2simnodeid(t_commrec
*cr
)
5841 gmx_domdec_comm_t
*comm
;
5848 if (!comm
->bCartesianPP_PME
&& comm
->bCartesianPP
)
5850 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5851 snew(buf
,dd
->nnodes
);
5852 if (cr
->duty
& DUTY_PP
)
5854 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5857 /* Communicate the ddindex to simulation nodeid index */
5858 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5859 cr
->mpi_comm_mysim
);
5866 static gmx_domdec_master_t
*init_gmx_domdec_master_t(gmx_domdec_t
*dd
,
5869 gmx_domdec_master_t
*ma
;
5874 snew(ma
->ncg
,dd
->nnodes
);
5875 snew(ma
->index
,dd
->nnodes
+1);
5877 snew(ma
->nat
,dd
->nnodes
);
5878 snew(ma
->ibuf
,dd
->nnodes
*2);
5879 snew(ma
->cell_x
,DIM
);
5880 for(i
=0; i
<DIM
; i
++)
5882 snew(ma
->cell_x
[i
],dd
->nc
[i
]+1);
5885 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
5891 snew(ma
->vbuf
,natoms
);
5897 static void split_communicator(FILE *fplog
,t_commrec
*cr
,int dd_node_order
,
5901 gmx_domdec_comm_t
*comm
;
5912 if (comm
->bCartesianPP
)
5914 for(i
=1; i
<DIM
; i
++)
5916 bDiv
[i
] = ((cr
->npmenodes
*dd
->nc
[i
]) % (dd
->nnodes
) == 0);
5918 if (bDiv
[YY
] || bDiv
[ZZ
])
5920 comm
->bCartesianPP_PME
= TRUE
;
5921 /* If we have 2D PME decomposition, which is always in x+y,
5922 * we stack the PME only nodes in z.
5923 * Otherwise we choose the direction that provides the thinnest slab
5924 * of PME only nodes as this will have the least effect
5925 * on the PP communication.
5926 * But for the PME communication the opposite might be better.
5928 if (bDiv
[ZZ
] && (comm
->npmenodes_y
> 1 ||
5930 dd
->nc
[YY
] > dd
->nc
[ZZ
]))
5932 comm
->cartpmedim
= ZZ
;
5936 comm
->cartpmedim
= YY
;
5938 comm
->ntot
[comm
->cartpmedim
]
5939 += (cr
->npmenodes
*dd
->nc
[comm
->cartpmedim
])/dd
->nnodes
;
5943 fprintf(fplog
,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr
->npmenodes
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[XX
],dd
->nc
[ZZ
]);
5945 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5950 if (comm
->bCartesianPP_PME
)
5954 fprintf(fplog
,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm
->ntot
[XX
],comm
->ntot
[YY
],comm
->ntot
[ZZ
]);
5957 for(i
=0; i
<DIM
; i
++)
5961 MPI_Cart_create(cr
->mpi_comm_mysim
,DIM
,comm
->ntot
,periods
,reorder
,
5964 MPI_Comm_rank(comm_cart
,&rank
);
5965 if (MASTERNODE(cr
) && rank
!= 0)
5967 gmx_fatal(FARGS
,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5970 /* With this assigment we loose the link to the original communicator
5971 * which will usually be MPI_COMM_WORLD, unless have multisim.
5973 cr
->mpi_comm_mysim
= comm_cart
;
5974 cr
->sim_nodeid
= rank
;
5976 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,dd
->ci
);
5980 fprintf(fplog
,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5981 cr
->sim_nodeid
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5984 if (dd
->ci
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
5988 if (cr
->npmenodes
== 0 ||
5989 dd
->ci
[comm
->cartpmedim
] >= dd
->nc
[comm
->cartpmedim
])
5991 cr
->duty
= DUTY_PME
;
5994 /* Split the sim communicator into PP and PME only nodes */
5995 MPI_Comm_split(cr
->mpi_comm_mysim
,
5997 dd_index(comm
->ntot
,dd
->ci
),
5998 &cr
->mpi_comm_mygroup
);
6002 switch (dd_node_order
)
6007 fprintf(fplog
,"Order of the nodes: PP first, PME last\n");
6010 case ddnoINTERLEAVE
:
6011 /* Interleave the PP-only and PME-only nodes,
6012 * as on clusters with dual-core machines this will double
6013 * the communication bandwidth of the PME processes
6014 * and thus speed up the PP <-> PME and inter PME communication.
6018 fprintf(fplog
,"Interleaving PP and PME nodes\n");
6020 comm
->pmenodes
= dd_pmenodes(cr
);
6025 gmx_fatal(FARGS
,"Unknown dd_node_order=%d",dd_node_order
);
6028 if (dd_simnode2pmenode(cr
,cr
->sim_nodeid
) == -1)
6030 cr
->duty
= DUTY_PME
;
6037 /* Split the sim communicator into PP and PME only nodes */
6038 MPI_Comm_split(cr
->mpi_comm_mysim
,
6041 &cr
->mpi_comm_mygroup
);
6042 MPI_Comm_rank(cr
->mpi_comm_mygroup
,&cr
->nodeid
);
6048 fprintf(fplog
,"This is a %s only node\n\n",
6049 (cr
->duty
& DUTY_PP
) ? "particle-particle" : "PME-mesh");
6053 void make_dd_communicators(FILE *fplog
,t_commrec
*cr
,int dd_node_order
)
6056 gmx_domdec_comm_t
*comm
;
6062 copy_ivec(dd
->nc
,comm
->ntot
);
6064 comm
->bCartesianPP
= (dd_node_order
== ddnoCARTESIAN
);
6065 comm
->bCartesianPP_PME
= FALSE
;
6067 /* Reorder the nodes by default. This might change the MPI ranks.
6068 * Real reordering is only supported on very few architectures,
6069 * Blue Gene is one of them.
6071 CartReorder
= (getenv("GMX_NO_CART_REORDER") == NULL
);
6073 if (cr
->npmenodes
> 0)
6075 /* Split the communicator into a PP and PME part */
6076 split_communicator(fplog
,cr
,dd_node_order
,CartReorder
);
6077 if (comm
->bCartesianPP_PME
)
6079 /* We (possibly) reordered the nodes in split_communicator,
6080 * so it is no longer required in make_pp_communicator.
6082 CartReorder
= FALSE
;
6087 /* All nodes do PP and PME */
6089 /* We do not require separate communicators */
6090 cr
->mpi_comm_mygroup
= cr
->mpi_comm_mysim
;
6094 if (cr
->duty
& DUTY_PP
)
6096 /* Copy or make a new PP communicator */
6097 make_pp_communicator(fplog
,cr
,CartReorder
);
6101 receive_ddindex2simnodeid(cr
);
6104 if (!(cr
->duty
& DUTY_PME
))
6106 /* Set up the commnuication to our PME node */
6107 dd
->pme_nodeid
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
6108 dd
->pme_receive_vir_ener
= receive_vir_ener(cr
);
6111 fprintf(debug
,"My pme_nodeid %d receive ener %d\n",
6112 dd
->pme_nodeid
,dd
->pme_receive_vir_ener
);
6117 dd
->pme_nodeid
= -1;
6122 dd
->ma
= init_gmx_domdec_master_t(dd
,
6124 comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
]);
6128 static real
*get_slb_frac(FILE *fplog
,const char *dir
,int nc
,const char *size_string
)
6135 if (nc
> 1 && size_string
!= NULL
)
6139 fprintf(fplog
,"Using static load balancing for the %s direction\n",
6144 for (i
=0; i
<nc
; i
++)
6147 sscanf(size_string
,"%lf%n",&dbl
,&n
);
6150 gmx_fatal(FARGS
,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir
,size_string
);
6159 fprintf(fplog
,"Relative cell sizes:");
6161 for (i
=0; i
<nc
; i
++)
6166 fprintf(fplog
," %5.3f",slb_frac
[i
]);
6171 fprintf(fplog
,"\n");
6178 static int multi_body_bondeds_count(gmx_mtop_t
*mtop
)
6181 gmx_mtop_ilistloop_t iloop
;
6185 iloop
= gmx_mtop_ilistloop_init(mtop
);
6186 while (gmx_mtop_ilistloop_next(iloop
,&il
,&nmol
))
6188 for(ftype
=0; ftype
<F_NRE
; ftype
++)
6190 if ((interaction_function
[ftype
].flags
& IF_BOND
) &&
6193 n
+= nmol
*il
[ftype
].nr
/(1 + NRAL(ftype
));
6201 static int dd_nst_env(FILE *fplog
,const char *env_var
,int def
)
6207 val
= getenv(env_var
);
6210 if (sscanf(val
,"%d",&nst
) <= 0)
6216 fprintf(fplog
,"Found env.var. %s = %s, using value %d\n",
6224 static void dd_warning(t_commrec
*cr
,FILE *fplog
,const char *warn_string
)
6228 fprintf(stderr
,"\n%s\n",warn_string
);
6232 fprintf(fplog
,"\n%s\n",warn_string
);
6236 static void check_dd_restrictions(t_commrec
*cr
,gmx_domdec_t
*dd
,
6237 t_inputrec
*ir
,FILE *fplog
)
6239 if (ir
->ePBC
== epbcSCREW
&&
6240 (dd
->nc
[XX
] == 1 || dd
->nc
[YY
] > 1 || dd
->nc
[ZZ
] > 1))
6242 gmx_fatal(FARGS
,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names
[ir
->ePBC
]);
6245 if (ir
->ns_type
== ensSIMPLE
)
6247 gmx_fatal(FARGS
,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6250 if (ir
->nstlist
== 0)
6252 gmx_fatal(FARGS
,"Domain decomposition does not work with nstlist=0");
6255 if (ir
->comm_mode
== ecmANGULAR
&& ir
->ePBC
!= epbcNONE
)
6257 dd_warning(cr
,fplog
,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6261 static real
average_cellsize_min(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
6266 r
= ddbox
->box_size
[XX
];
6267 for(di
=0; di
<dd
->ndim
; di
++)
6270 /* Check using the initial average cell size */
6271 r
= min(r
,ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6277 static int check_dlb_support(FILE *fplog
,t_commrec
*cr
,
6278 const char *dlb_opt
,gmx_bool bRecordLoad
,
6279 unsigned long Flags
,t_inputrec
*ir
)
6287 case 'a': eDLB
= edlbAUTO
; break;
6288 case 'n': eDLB
= edlbNO
; break;
6289 case 'y': eDLB
= edlbYES
; break;
6290 default: gmx_incons("Unknown dlb_opt");
6293 if (Flags
& MD_RERUN
)
6298 if (!EI_DYNAMICS(ir
->eI
))
6300 if (eDLB
== edlbYES
)
6302 sprintf(buf
,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir
->eI
));
6303 dd_warning(cr
,fplog
,buf
);
6311 dd_warning(cr
,fplog
,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6316 if (Flags
& MD_REPRODUCIBLE
)
6323 dd_warning(cr
,fplog
,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
6327 dd_warning(cr
,fplog
,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
6330 gmx_fatal(FARGS
,"Death horror: undefined case (%d) for load balancing choice",eDLB
);
6338 static void set_dd_dim(FILE *fplog
,gmx_domdec_t
*dd
)
6343 if (getenv("GMX_DD_ORDER_ZYX") != NULL
)
6345 /* Decomposition order z,y,x */
6348 fprintf(fplog
,"Using domain decomposition order z, y, x\n");
6350 for(dim
=DIM
-1; dim
>=0; dim
--)
6352 if (dd
->nc
[dim
] > 1)
6354 dd
->dim
[dd
->ndim
++] = dim
;
6360 /* Decomposition order x,y,z */
6361 for(dim
=0; dim
<DIM
; dim
++)
6363 if (dd
->nc
[dim
] > 1)
6365 dd
->dim
[dd
->ndim
++] = dim
;
6371 static gmx_domdec_comm_t
*init_dd_comm()
6373 gmx_domdec_comm_t
*comm
;
6377 snew(comm
->cggl_flag
,DIM
*2);
6378 snew(comm
->cgcm_state
,DIM
*2);
6379 for(i
=0; i
<DIM
*2; i
++)
6381 comm
->cggl_flag_nalloc
[i
] = 0;
6382 comm
->cgcm_state_nalloc
[i
] = 0;
6385 comm
->nalloc_int
= 0;
6386 comm
->buf_int
= NULL
;
6388 vec_rvec_init(&comm
->vbuf
);
6390 comm
->n_load_have
= 0;
6391 comm
->n_load_collect
= 0;
6393 for(i
=0; i
<ddnatNR
-ddnatZONE
; i
++)
6395 comm
->sum_nat
[i
] = 0;
6399 comm
->load_step
= 0;
6402 clear_ivec(comm
->load_lim
);
6409 gmx_domdec_t
*init_domain_decomposition(FILE *fplog
,t_commrec
*cr
,
6410 unsigned long Flags
,
6412 real comm_distance_min
,real rconstr
,
6413 const char *dlb_opt
,real dlb_scale
,
6414 const char *sizex
,const char *sizey
,const char *sizez
,
6415 gmx_mtop_t
*mtop
,t_inputrec
*ir
,
6418 int *npme_x
,int *npme_y
)
6421 gmx_domdec_comm_t
*comm
;
6424 real r_2b
,r_mb
,r_bonded
=-1,r_bonded_limit
=-1,limit
,acs
;
6431 "\nInitializing Domain Decomposition on %d nodes\n",cr
->nnodes
);
6436 dd
->comm
= init_dd_comm();
6438 snew(comm
->cggl_flag
,DIM
*2);
6439 snew(comm
->cgcm_state
,DIM
*2);
6441 dd
->npbcdim
= ePBC2npbcdim(ir
->ePBC
);
6442 dd
->bScrewPBC
= (ir
->ePBC
== epbcSCREW
);
6444 dd
->bSendRecv2
= dd_nst_env(fplog
,"GMX_DD_SENDRECV2",0);
6445 comm
->dlb_scale_lim
= dd_nst_env(fplog
,"GMX_DLB_MAX",10);
6446 comm
->eFlop
= dd_nst_env(fplog
,"GMX_DLB_FLOP",0);
6447 recload
= dd_nst_env(fplog
,"GMX_DD_LOAD",1);
6448 comm
->nstSortCG
= dd_nst_env(fplog
,"GMX_DD_SORT",1);
6449 comm
->nstDDDump
= dd_nst_env(fplog
,"GMX_DD_DUMP",0);
6450 comm
->nstDDDumpGrid
= dd_nst_env(fplog
,"GMX_DD_DUMP_GRID",0);
6451 comm
->DD_debug
= dd_nst_env(fplog
,"GMX_DD_DEBUG",0);
6453 dd
->pme_recv_f_alloc
= 0;
6454 dd
->pme_recv_f_buf
= NULL
;
6456 if (dd
->bSendRecv2
&& fplog
)
6458 fprintf(fplog
,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6464 fprintf(fplog
,"Will load balance based on FLOP count\n");
6466 if (comm
->eFlop
> 1)
6468 srand(1+cr
->nodeid
);
6470 comm
->bRecordLoad
= TRUE
;
6474 comm
->bRecordLoad
= (wallcycle_have_counter() && recload
> 0);
6478 comm
->eDLB
= check_dlb_support(fplog
,cr
,dlb_opt
,comm
->bRecordLoad
,Flags
,ir
);
6480 comm
->bDynLoadBal
= (comm
->eDLB
== edlbYES
);
6483 fprintf(fplog
,"Dynamic load balancing: %s\n",edlb_names
[comm
->eDLB
]);
6485 dd
->bGridJump
= comm
->bDynLoadBal
;
6487 if (comm
->nstSortCG
)
6491 if (comm
->nstSortCG
== 1)
6493 fprintf(fplog
,"Will sort the charge groups at every domain (re)decomposition\n");
6497 fprintf(fplog
,"Will sort the charge groups every %d steps\n",
6507 fprintf(fplog
,"Will not sort the charge groups\n");
6511 comm
->bCGs
= (ncg_mtop(mtop
) < mtop
->natoms
);
6513 comm
->bInterCGBondeds
= (ncg_mtop(mtop
) > mtop
->mols
.nr
);
6514 if (comm
->bInterCGBondeds
)
6516 comm
->bInterCGMultiBody
= (multi_body_bondeds_count(mtop
) > 0);
6520 comm
->bInterCGMultiBody
= FALSE
;
6523 dd
->bInterCGcons
= inter_charge_group_constraints(mtop
);
6524 dd
->bInterCGsettles
= inter_charge_group_settles(mtop
);
6526 if (ir
->rlistlong
== 0)
6528 /* Set the cut-off to some very large value,
6529 * so we don't need if statements everywhere in the code.
6530 * We use sqrt, since the cut-off is squared in some places.
6532 comm
->cutoff
= GMX_CUTOFF_INF
;
6536 comm
->cutoff
= ir
->rlistlong
;
6538 comm
->cutoff_mbody
= 0;
6540 comm
->cellsize_limit
= 0;
6541 comm
->bBondComm
= FALSE
;
6543 if (comm
->bInterCGBondeds
)
6545 if (comm_distance_min
> 0)
6547 comm
->cutoff_mbody
= comm_distance_min
;
6548 if (Flags
& MD_DDBONDCOMM
)
6550 comm
->bBondComm
= (comm
->cutoff_mbody
> comm
->cutoff
);
6554 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6556 r_bonded_limit
= comm
->cutoff_mbody
;
6558 else if (ir
->bPeriodicMols
)
6560 /* Can not easily determine the required cut-off */
6561 dd_warning(cr
,fplog
,"NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
6562 comm
->cutoff_mbody
= comm
->cutoff
/2;
6563 r_bonded_limit
= comm
->cutoff_mbody
;
6569 dd_bonded_cg_distance(fplog
,dd
,mtop
,ir
,x
,box
,
6570 Flags
& MD_DDBONDCHECK
,&r_2b
,&r_mb
);
6572 gmx_bcast(sizeof(r_2b
),&r_2b
,cr
);
6573 gmx_bcast(sizeof(r_mb
),&r_mb
,cr
);
6575 /* We use an initial margin of 10% for the minimum cell size,
6576 * except when we are just below the non-bonded cut-off.
6578 if (Flags
& MD_DDBONDCOMM
)
6580 if (max(r_2b
,r_mb
) > comm
->cutoff
)
6582 r_bonded
= max(r_2b
,r_mb
);
6583 r_bonded_limit
= 1.1*r_bonded
;
6584 comm
->bBondComm
= TRUE
;
6589 r_bonded_limit
= min(1.1*r_bonded
,comm
->cutoff
);
6591 /* We determine cutoff_mbody later */
6595 /* No special bonded communication,
6596 * simply increase the DD cut-off.
6598 r_bonded_limit
= 1.1*max(r_2b
,r_mb
);
6599 comm
->cutoff_mbody
= r_bonded_limit
;
6600 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6603 comm
->cellsize_limit
= max(comm
->cellsize_limit
,r_bonded_limit
);
6607 "Minimum cell size due to bonded interactions: %.3f nm\n",
6608 comm
->cellsize_limit
);
6612 if (dd
->bInterCGcons
&& rconstr
<= 0)
6614 /* There is a cell size limit due to the constraints (P-LINCS) */
6615 rconstr
= constr_r_max(fplog
,mtop
,ir
);
6619 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6621 if (rconstr
> comm
->cellsize_limit
)
6623 fprintf(fplog
,"This distance will limit the DD cell size, you can override this with -rcon\n");
6627 else if (rconstr
> 0 && fplog
)
6629 /* Here we do not check for dd->bInterCGcons,
6630 * because one can also set a cell size limit for virtual sites only
6631 * and at this point we don't know yet if there are intercg v-sites.
6634 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6637 comm
->cellsize_limit
= max(comm
->cellsize_limit
,rconstr
);
6639 comm
->cgs_gl
= gmx_mtop_global_cgs(mtop
);
6643 copy_ivec(nc
,dd
->nc
);
6644 set_dd_dim(fplog
,dd
);
6645 set_ddbox_cr(cr
,&dd
->nc
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6647 if (cr
->npmenodes
== -1)
6651 acs
= average_cellsize_min(dd
,ddbox
);
6652 if (acs
< comm
->cellsize_limit
)
6656 fprintf(fplog
,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs
,comm
->cellsize_limit
);
6658 gmx_fatal_collective(FARGS
,cr
,NULL
,
6659 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6660 acs
,comm
->cellsize_limit
);
6665 set_ddbox_cr(cr
,NULL
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6667 /* We need to choose the optimal DD grid and possibly PME nodes */
6668 limit
= dd_choose_grid(fplog
,cr
,dd
,ir
,mtop
,box
,ddbox
,
6669 comm
->eDLB
!=edlbNO
,dlb_scale
,
6670 comm
->cellsize_limit
,comm
->cutoff
,
6671 comm
->bInterCGBondeds
,comm
->bInterCGMultiBody
);
6673 if (dd
->nc
[XX
] == 0)
6675 bC
= (dd
->bInterCGcons
&& rconstr
> r_bonded_limit
);
6676 sprintf(buf
,"Change the number of nodes or mdrun option %s%s%s",
6677 !bC
? "-rdd" : "-rcon",
6678 comm
->eDLB
!=edlbNO
? " or -dds" : "",
6679 bC
? " or your LINCS settings" : "");
6681 gmx_fatal_collective(FARGS
,cr
,NULL
,
6682 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6684 "Look in the log file for details on the domain decomposition",
6685 cr
->nnodes
-cr
->npmenodes
,limit
,buf
);
6687 set_dd_dim(fplog
,dd
);
6693 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6694 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],cr
->npmenodes
);
6697 dd
->nnodes
= dd
->nc
[XX
]*dd
->nc
[YY
]*dd
->nc
[ZZ
];
6698 if (cr
->nnodes
- dd
->nnodes
!= cr
->npmenodes
)
6700 gmx_fatal_collective(FARGS
,cr
,NULL
,
6701 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6702 dd
->nnodes
,cr
->nnodes
- cr
->npmenodes
,cr
->nnodes
);
6704 if (cr
->npmenodes
> dd
->nnodes
)
6706 gmx_fatal_collective(FARGS
,cr
,NULL
,
6707 "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.",cr
->npmenodes
,dd
->nnodes
);
6709 if (cr
->npmenodes
> 0)
6711 comm
->npmenodes
= cr
->npmenodes
;
6715 comm
->npmenodes
= dd
->nnodes
;
6718 if (EEL_PME(ir
->coulombtype
))
6720 /* The following choices should match those
6721 * in comm_cost_est in domdec_setup.c.
6722 * Note that here the checks have to take into account
6723 * that the decomposition might occur in a different order than xyz
6724 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6725 * in which case they will not match those in comm_cost_est,
6726 * but since that is mainly for testing purposes that's fine.
6728 if (dd
->ndim
>= 2 && dd
->dim
[0] == XX
&& dd
->dim
[1] == YY
&&
6729 comm
->npmenodes
> dd
->nc
[XX
] && comm
->npmenodes
% dd
->nc
[XX
] == 0 &&
6730 getenv("GMX_PMEONEDD") == NULL
)
6732 comm
->npmedecompdim
= 2;
6733 comm
->npmenodes_x
= dd
->nc
[XX
];
6734 comm
->npmenodes_y
= comm
->npmenodes
/comm
->npmenodes_x
;
6738 /* In case nc is 1 in both x and y we could still choose to
6739 * decompose pme in y instead of x, but we use x for simplicity.
6741 comm
->npmedecompdim
= 1;
6742 if (dd
->dim
[0] == YY
)
6744 comm
->npmenodes_x
= 1;
6745 comm
->npmenodes_y
= comm
->npmenodes
;
6749 comm
->npmenodes_x
= comm
->npmenodes
;
6750 comm
->npmenodes_y
= 1;
6755 fprintf(fplog
,"PME domain decomposition: %d x %d x %d\n",
6756 comm
->npmenodes_x
,comm
->npmenodes_y
,1);
6761 comm
->npmedecompdim
= 0;
6762 comm
->npmenodes_x
= 0;
6763 comm
->npmenodes_y
= 0;
6766 /* Technically we don't need both of these,
6767 * but it simplifies code not having to recalculate it.
6769 *npme_x
= comm
->npmenodes_x
;
6770 *npme_y
= comm
->npmenodes_y
;
6772 snew(comm
->slb_frac
,DIM
);
6773 if (comm
->eDLB
== edlbNO
)
6775 comm
->slb_frac
[XX
] = get_slb_frac(fplog
,"x",dd
->nc
[XX
],sizex
);
6776 comm
->slb_frac
[YY
] = get_slb_frac(fplog
,"y",dd
->nc
[YY
],sizey
);
6777 comm
->slb_frac
[ZZ
] = get_slb_frac(fplog
,"z",dd
->nc
[ZZ
],sizez
);
6780 if (comm
->bInterCGBondeds
&& comm
->cutoff_mbody
== 0)
6782 if (comm
->bBondComm
|| comm
->eDLB
!= edlbNO
)
6784 /* Set the bonded communication distance to halfway
6785 * the minimum and the maximum,
6786 * since the extra communication cost is nearly zero.
6788 acs
= average_cellsize_min(dd
,ddbox
);
6789 comm
->cutoff_mbody
= 0.5*(r_bonded
+ acs
);
6790 if (comm
->eDLB
!= edlbNO
)
6792 /* Check if this does not limit the scaling */
6793 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,dlb_scale
*acs
);
6795 if (!comm
->bBondComm
)
6797 /* Without bBondComm do not go beyond the n.b. cut-off */
6798 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,comm
->cutoff
);
6799 if (comm
->cellsize_limit
>= comm
->cutoff
)
6801 /* We don't loose a lot of efficieny
6802 * when increasing it to the n.b. cut-off.
6803 * It can even be slightly faster, because we need
6804 * less checks for the communication setup.
6806 comm
->cutoff_mbody
= comm
->cutoff
;
6809 /* Check if we did not end up below our original limit */
6810 comm
->cutoff_mbody
= max(comm
->cutoff_mbody
,r_bonded_limit
);
6812 if (comm
->cutoff_mbody
> comm
->cellsize_limit
)
6814 comm
->cellsize_limit
= comm
->cutoff_mbody
;
6817 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6822 fprintf(debug
,"Bonded atom communication beyond the cut-off: %d\n"
6823 "cellsize limit %f\n",
6824 comm
->bBondComm
,comm
->cellsize_limit
);
6829 check_dd_restrictions(cr
,dd
,ir
,fplog
);
6832 comm
->partition_step
= INT_MIN
;
6835 clear_dd_cycle_counts(dd
);
6840 static void set_dlb_limits(gmx_domdec_t
*dd
)
6845 for(d
=0; d
<dd
->ndim
; d
++)
6847 dd
->comm
->cd
[d
].np
= dd
->comm
->cd
[d
].np_dlb
;
6848 dd
->comm
->cellsize_min
[dd
->dim
[d
]] =
6849 dd
->comm
->cellsize_min_dlb
[dd
->dim
[d
]];
6854 static void turn_on_dlb(FILE *fplog
,t_commrec
*cr
,gmx_large_int_t step
)
6857 gmx_domdec_comm_t
*comm
;
6867 fprintf(fplog
,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step
,buf
),dd_force_imb_perf_loss(dd
)*100);
6870 cellsize_min
= comm
->cellsize_min
[dd
->dim
[0]];
6871 for(d
=1; d
<dd
->ndim
; d
++)
6873 cellsize_min
= min(cellsize_min
,comm
->cellsize_min
[dd
->dim
[d
]]);
6876 if (cellsize_min
< comm
->cellsize_limit
*1.05)
6878 dd_warning(cr
,fplog
,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6880 /* Change DLB from "auto" to "no". */
6881 comm
->eDLB
= edlbNO
;
6886 dd_warning(cr
,fplog
,"NOTE: Turning on dynamic load balancing\n");
6887 comm
->bDynLoadBal
= TRUE
;
6888 dd
->bGridJump
= TRUE
;
6892 /* We can set the required cell size info here,
6893 * so we do not need to communicate this.
6894 * The grid is completely uniform.
6896 for(d
=0; d
<dd
->ndim
; d
++)
6900 comm
->load
[d
].sum_m
= comm
->load
[d
].sum
;
6902 nc
= dd
->nc
[dd
->dim
[d
]];
6905 comm
->root
[d
]->cell_f
[i
] = i
/(real
)nc
;
6908 comm
->root
[d
]->cell_f_max0
[i
] = i
/(real
)nc
;
6909 comm
->root
[d
]->cell_f_min1
[i
] = (i
+1)/(real
)nc
;
6912 comm
->root
[d
]->cell_f
[nc
] = 1.0;
6917 static char *init_bLocalCG(gmx_mtop_t
*mtop
)
6922 ncg
= ncg_mtop(mtop
);
6924 for(cg
=0; cg
<ncg
; cg
++)
6926 bLocalCG
[cg
] = FALSE
;
6932 void dd_init_bondeds(FILE *fplog
,
6933 gmx_domdec_t
*dd
,gmx_mtop_t
*mtop
,
6934 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
6935 t_inputrec
*ir
,gmx_bool bBCheck
,cginfo_mb_t
*cginfo_mb
)
6937 gmx_domdec_comm_t
*comm
;
6941 dd_make_reverse_top(fplog
,dd
,mtop
,vsite
,constr
,ir
,bBCheck
);
6945 if (comm
->bBondComm
)
6947 /* Communicate atoms beyond the cut-off for bonded interactions */
6950 comm
->cglink
= make_charge_group_links(mtop
,dd
,cginfo_mb
);
6952 comm
->bLocalCG
= init_bLocalCG(mtop
);
6956 /* Only communicate atoms based on cut-off */
6957 comm
->cglink
= NULL
;
6958 comm
->bLocalCG
= NULL
;
6962 static void print_dd_settings(FILE *fplog
,gmx_domdec_t
*dd
,
6964 gmx_bool bDynLoadBal
,real dlb_scale
,
6967 gmx_domdec_comm_t
*comm
;
6982 fprintf(fplog
,"The maximum number of communication pulses is:");
6983 for(d
=0; d
<dd
->ndim
; d
++)
6985 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),comm
->cd
[d
].np_dlb
);
6987 fprintf(fplog
,"\n");
6988 fprintf(fplog
,"The minimum size for domain decomposition cells is %.3f nm\n",comm
->cellsize_limit
);
6989 fprintf(fplog
,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale
);
6990 fprintf(fplog
,"The allowed shrink of domain decomposition cells is:");
6991 for(d
=0; d
<DIM
; d
++)
6995 if (d
>= ddbox
->npbcdim
&& dd
->nc
[d
] == 2)
7002 comm
->cellsize_min_dlb
[d
]/
7003 (ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
7005 fprintf(fplog
," %c %.2f",dim2char(d
),shrink
);
7008 fprintf(fplog
,"\n");
7012 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,np
);
7013 fprintf(fplog
,"The initial number of communication pulses is:");
7014 for(d
=0; d
<dd
->ndim
; d
++)
7016 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),np
[dd
->dim
[d
]]);
7018 fprintf(fplog
,"\n");
7019 fprintf(fplog
,"The initial domain decomposition cell size is:");
7020 for(d
=0; d
<DIM
; d
++) {
7023 fprintf(fplog
," %c %.2f nm",
7024 dim2char(d
),dd
->comm
->cellsize_min
[d
]);
7027 fprintf(fplog
,"\n\n");
7030 if (comm
->bInterCGBondeds
|| dd
->vsite_comm
|| dd
->constraint_comm
)
7032 fprintf(fplog
,"The maximum allowed distance for charge groups involved in interactions is:\n");
7033 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7034 "non-bonded interactions","",comm
->cutoff
);
7038 limit
= dd
->comm
->cellsize_limit
;
7042 if (dynamic_dd_box(ddbox
,ir
))
7044 fprintf(fplog
,"(the following are initial values, they could change due to box deformation)\n");
7046 limit
= dd
->comm
->cellsize_min
[XX
];
7047 for(d
=1; d
<DIM
; d
++)
7049 limit
= min(limit
,dd
->comm
->cellsize_min
[d
]);
7053 if (comm
->bInterCGBondeds
)
7055 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7056 "two-body bonded interactions","(-rdd)",
7057 max(comm
->cutoff
,comm
->cutoff_mbody
));
7058 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7059 "multi-body bonded interactions","(-rdd)",
7060 (comm
->bBondComm
|| dd
->bGridJump
) ? comm
->cutoff_mbody
: min(comm
->cutoff
,limit
));
7064 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7065 "virtual site constructions","(-rcon)",limit
);
7067 if (dd
->constraint_comm
)
7069 sprintf(buf
,"atoms separated by up to %d constraints",
7071 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
7072 buf
,"(-rcon)",limit
);
7074 fprintf(fplog
,"\n");
7080 static void set_cell_limits_dlb(gmx_domdec_t
*dd
,
7082 const t_inputrec
*ir
,
7083 const gmx_ddbox_t
*ddbox
)
7085 gmx_domdec_comm_t
*comm
;
7086 int d
,dim
,npulse
,npulse_d_max
,npulse_d
;
7091 bNoCutOff
= (ir
->rvdw
== 0 || ir
->rcoulomb
== 0);
7093 /* Determine the maximum number of comm. pulses in one dimension */
7095 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
7097 /* Determine the maximum required number of grid pulses */
7098 if (comm
->cellsize_limit
>= comm
->cutoff
)
7100 /* Only a single pulse is required */
7103 else if (!bNoCutOff
&& comm
->cellsize_limit
> 0)
7105 /* We round down slightly here to avoid overhead due to the latency
7106 * of extra communication calls when the cut-off
7107 * would be only slightly longer than the cell size.
7108 * Later cellsize_limit is redetermined,
7109 * so we can not miss interactions due to this rounding.
7111 npulse
= (int)(0.96 + comm
->cutoff
/comm
->cellsize_limit
);
7115 /* There is no cell size limit */
7116 npulse
= max(dd
->nc
[XX
]-1,max(dd
->nc
[YY
]-1,dd
->nc
[ZZ
]-1));
7119 if (!bNoCutOff
&& npulse
> 1)
7121 /* See if we can do with less pulses, based on dlb_scale */
7123 for(d
=0; d
<dd
->ndim
; d
++)
7126 npulse_d
= (int)(1 + dd
->nc
[dim
]*comm
->cutoff
7127 /(ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]*dlb_scale
));
7128 npulse_d_max
= max(npulse_d_max
,npulse_d
);
7130 npulse
= min(npulse
,npulse_d_max
);
7133 /* This env var can override npulse */
7134 d
= dd_nst_env(debug
,"GMX_DD_NPULSE",0);
7141 comm
->bVacDLBNoLimit
= (ir
->ePBC
== epbcNONE
);
7142 for(d
=0; d
<dd
->ndim
; d
++)
7144 comm
->cd
[d
].np_dlb
= min(npulse
,dd
->nc
[dd
->dim
[d
]]-1);
7145 comm
->cd
[d
].np_nalloc
= comm
->cd
[d
].np_dlb
;
7146 snew(comm
->cd
[d
].ind
,comm
->cd
[d
].np_nalloc
);
7147 comm
->maxpulse
= max(comm
->maxpulse
,comm
->cd
[d
].np_dlb
);
7148 if (comm
->cd
[d
].np_dlb
< dd
->nc
[dd
->dim
[d
]]-1)
7150 comm
->bVacDLBNoLimit
= FALSE
;
7154 /* cellsize_limit is set for LINCS in init_domain_decomposition */
7155 if (!comm
->bVacDLBNoLimit
)
7157 comm
->cellsize_limit
= max(comm
->cellsize_limit
,
7158 comm
->cutoff
/comm
->maxpulse
);
7160 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
7161 /* Set the minimum cell size for each DD dimension */
7162 for(d
=0; d
<dd
->ndim
; d
++)
7164 if (comm
->bVacDLBNoLimit
||
7165 comm
->cd
[d
].np_dlb
*comm
->cellsize_limit
>= comm
->cutoff
)
7167 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->cellsize_limit
;
7171 comm
->cellsize_min_dlb
[dd
->dim
[d
]] =
7172 comm
->cutoff
/comm
->cd
[d
].np_dlb
;
7175 if (comm
->cutoff_mbody
<= 0)
7177 comm
->cutoff_mbody
= min(comm
->cutoff
,comm
->cellsize_limit
);
7179 if (comm
->bDynLoadBal
)
7185 gmx_bool
dd_bonded_molpbc(gmx_domdec_t
*dd
,int ePBC
)
7187 /* If each molecule is a single charge group
7188 * or we use domain decomposition for each periodic dimension,
7189 * we do not need to take pbc into account for the bonded interactions.
7191 return (ePBC
!= epbcNONE
&& dd
->comm
->bInterCGBondeds
&&
7194 (dd
->nc
[ZZ
]>1 || ePBC
==epbcXY
)));
7197 void set_dd_parameters(FILE *fplog
,gmx_domdec_t
*dd
,real dlb_scale
,
7198 t_inputrec
*ir
,t_forcerec
*fr
,
7201 gmx_domdec_comm_t
*comm
;
7207 /* Initialize the thread data.
7208 * This can not be done in init_domain_decomposition,
7209 * as the numbers of threads is determined later.
7211 comm
->nth
= gmx_omp_nthreads_get(emntDomdec
);
7214 snew(comm
->dth
,comm
->nth
);
7217 if (EEL_PME(ir
->coulombtype
))
7219 init_ddpme(dd
,&comm
->ddpme
[0],0);
7220 if (comm
->npmedecompdim
>= 2)
7222 init_ddpme(dd
,&comm
->ddpme
[1],1);
7227 comm
->npmenodes
= 0;
7228 if (dd
->pme_nodeid
>= 0)
7230 gmx_fatal_collective(FARGS
,NULL
,dd
,
7231 "Can not have separate PME nodes without PME electrostatics");
7237 fprintf(debug
,"The DD cut-off is %f\n",comm
->cutoff
);
7239 if (comm
->eDLB
!= edlbNO
)
7241 set_cell_limits_dlb(dd
,dlb_scale
,ir
,ddbox
);
7244 print_dd_settings(fplog
,dd
,ir
,comm
->bDynLoadBal
,dlb_scale
,ddbox
);
7245 if (comm
->eDLB
== edlbAUTO
)
7249 fprintf(fplog
,"When dynamic load balancing gets turned on, these settings will change to:\n");
7251 print_dd_settings(fplog
,dd
,ir
,TRUE
,dlb_scale
,ddbox
);
7254 if (ir
->ePBC
== epbcNONE
)
7256 vol_frac
= 1 - 1/(double)dd
->nnodes
;
7261 (1 + comm_box_frac(dd
->nc
,comm
->cutoff
,ddbox
))/(double)dd
->nnodes
;
7265 fprintf(debug
,"Volume fraction for all DD zones: %f\n",vol_frac
);
7267 natoms_tot
= comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
];
7269 dd
->ga2la
= ga2la_init(natoms_tot
,vol_frac
*natoms_tot
);
7272 gmx_bool
change_dd_cutoff(t_commrec
*cr
,t_state
*state
,t_inputrec
*ir
,
7283 set_ddbox(dd
,FALSE
,cr
,ir
,state
->box
,
7284 TRUE
,&dd
->comm
->cgs_gl
,state
->x
,&ddbox
);
7288 for(d
=0; d
<dd
->ndim
; d
++)
7292 inv_cell_size
= DD_CELL_MARGIN
*dd
->nc
[dim
]/ddbox
.box_size
[dim
];
7293 if (dynamic_dd_box(&ddbox
,ir
))
7295 inv_cell_size
*= DD_PRES_SCALE_MARGIN
;
7298 np
= 1 + (int)(cutoff_req
*inv_cell_size
*ddbox
.skew_fac
[dim
]);
7300 if (dd
->comm
->eDLB
!= edlbNO
&& dim
< ddbox
.npbcdim
&&
7301 dd
->comm
->cd
[d
].np_dlb
> 0)
7303 if (np
> dd
->comm
->cd
[d
].np_dlb
)
7308 /* If a current local cell size is smaller than the requested
7309 * cut-off, we could still fix it, but this gets very complicated.
7310 * Without fixing here, we might actually need more checks.
7312 if ((dd
->comm
->cell_x1
[dim
] - dd
->comm
->cell_x0
[dim
])*ddbox
.skew_fac
[dim
]*dd
->comm
->cd
[d
].np_dlb
< cutoff_req
)
7319 if (dd
->comm
->eDLB
!= edlbNO
)
7321 /* If DLB is not active yet, we don't need to check the grid jumps.
7322 * Actually we shouldn't, because then the grid jump data is not set.
7324 if (dd
->comm
->bDynLoadBal
&&
7325 check_grid_jump(0,dd
,cutoff_req
,&ddbox
,FALSE
))
7330 gmx_sumi(1,&LocallyLimited
,cr
);
7332 if (LocallyLimited
> 0)
7338 dd
->comm
->cutoff
= cutoff_req
;
7343 static void merge_cg_buffers(int ncell
,
7344 gmx_domdec_comm_dim_t
*cd
, int pulse
,
7346 int *index_gl
, int *recv_i
,
7347 rvec
*cg_cm
, rvec
*recv_vr
,
7349 cginfo_mb_t
*cginfo_mb
,int *cginfo
)
7351 gmx_domdec_ind_t
*ind
,*ind_p
;
7352 int p
,cell
,c
,cg
,cg0
,cg1
,cg_gl
,nat
;
7355 ind
= &cd
->ind
[pulse
];
7357 /* First correct the already stored data */
7358 shift
= ind
->nrecv
[ncell
];
7359 for(cell
=ncell
-1; cell
>=0; cell
--)
7361 shift
-= ind
->nrecv
[cell
];
7364 /* Move the cg's present from previous grid pulses */
7365 cg0
= ncg_cell
[ncell
+cell
];
7366 cg1
= ncg_cell
[ncell
+cell
+1];
7367 cgindex
[cg1
+shift
] = cgindex
[cg1
];
7368 for(cg
=cg1
-1; cg
>=cg0
; cg
--)
7370 index_gl
[cg
+shift
] = index_gl
[cg
];
7371 copy_rvec(cg_cm
[cg
],cg_cm
[cg
+shift
]);
7372 cgindex
[cg
+shift
] = cgindex
[cg
];
7373 cginfo
[cg
+shift
] = cginfo
[cg
];
7375 /* Correct the already stored send indices for the shift */
7376 for(p
=1; p
<=pulse
; p
++)
7378 ind_p
= &cd
->ind
[p
];
7380 for(c
=0; c
<cell
; c
++)
7382 cg0
+= ind_p
->nsend
[c
];
7384 cg1
= cg0
+ ind_p
->nsend
[cell
];
7385 for(cg
=cg0
; cg
<cg1
; cg
++)
7387 ind_p
->index
[cg
] += shift
;
7393 /* Merge in the communicated buffers */
7397 for(cell
=0; cell
<ncell
; cell
++)
7399 cg1
= ncg_cell
[ncell
+cell
+1] + shift
;
7402 /* Correct the old cg indices */
7403 for(cg
=ncg_cell
[ncell
+cell
]; cg
<cg1
; cg
++)
7405 cgindex
[cg
+1] += shift_at
;
7408 for(cg
=0; cg
<ind
->nrecv
[cell
]; cg
++)
7410 /* Copy this charge group from the buffer */
7411 index_gl
[cg1
] = recv_i
[cg0
];
7412 copy_rvec(recv_vr
[cg0
],cg_cm
[cg1
]);
7413 /* Add it to the cgindex */
7414 cg_gl
= index_gl
[cg1
];
7415 cginfo
[cg1
] = ddcginfo(cginfo_mb
,cg_gl
);
7416 nat
= GET_CGINFO_NATOMS(cginfo
[cg1
]);
7417 cgindex
[cg1
+1] = cgindex
[cg1
] + nat
;
7422 shift
+= ind
->nrecv
[cell
];
7423 ncg_cell
[ncell
+cell
+1] = cg1
;
7427 static void make_cell2at_index(gmx_domdec_comm_dim_t
*cd
,
7428 int nzone
,int cg0
,const int *cgindex
)
7432 /* Store the atom block boundaries for easy copying of communication buffers
7435 for(zone
=0; zone
<nzone
; zone
++)
7437 for(p
=0; p
<cd
->np
; p
++) {
7438 cd
->ind
[p
].cell2at0
[zone
] = cgindex
[cg
];
7439 cg
+= cd
->ind
[p
].nrecv
[zone
];
7440 cd
->ind
[p
].cell2at1
[zone
] = cgindex
[cg
];
7445 static gmx_bool
missing_link(t_blocka
*link
,int cg_gl
,char *bLocalCG
)
7451 for(i
=link
->index
[cg_gl
]; i
<link
->index
[cg_gl
+1]; i
++)
7453 if (!bLocalCG
[link
->a
[i
]])
7462 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
7464 real c
[DIM
][4]; /* the corners for the non-bonded communication */
7465 real cr0
; /* corner for rounding */
7466 real cr1
[4]; /* corners for rounding */
7467 real bc
[DIM
]; /* corners for bounded communication */
7468 real bcr1
; /* corner for rounding for bonded communication */
7471 /* Determine the corners of the domain(s) we are communicating with */
7473 set_dd_corners(const gmx_domdec_t
*dd
,
7474 int dim0
, int dim1
, int dim2
,
7478 const gmx_domdec_comm_t
*comm
;
7479 const gmx_domdec_zones_t
*zones
;
7484 zones
= &comm
->zones
;
7486 /* Keep the compiler happy */
7490 /* The first dimension is equal for all cells */
7491 c
->c
[0][0] = comm
->cell_x0
[dim0
];
7494 c
->bc
[0] = c
->c
[0][0];
7499 /* This cell row is only seen from the first row */
7500 c
->c
[1][0] = comm
->cell_x0
[dim1
];
7501 /* All rows can see this row */
7502 c
->c
[1][1] = comm
->cell_x0
[dim1
];
7505 c
->c
[1][1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].mch0
);
7508 /* For the multi-body distance we need the maximum */
7509 c
->bc
[1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].p1_0
);
7512 /* Set the upper-right corner for rounding */
7513 c
->cr0
= comm
->cell_x1
[dim0
];
7520 c
->c
[2][j
] = comm
->cell_x0
[dim2
];
7524 /* Use the maximum of the i-cells that see a j-cell */
7525 for(i
=0; i
<zones
->nizone
; i
++)
7527 for(j
=zones
->izone
[i
].j0
; j
<zones
->izone
[i
].j1
; j
++)
7533 comm
->zone_d2
[zones
->shift
[i
][dim0
]][zones
->shift
[i
][dim1
]].mch0
);
7539 /* For the multi-body distance we need the maximum */
7540 c
->bc
[2] = comm
->cell_x0
[dim2
];
7545 c
->bc
[2] = max(c
->bc
[2],comm
->zone_d2
[i
][j
].p1_0
);
7551 /* Set the upper-right corner for rounding */
7552 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7553 * Only cell (0,0,0) can see cell 7 (1,1,1)
7555 c
->cr1
[0] = comm
->cell_x1
[dim1
];
7556 c
->cr1
[3] = comm
->cell_x1
[dim1
];
7559 c
->cr1
[0] = max(comm
->cell_x1
[dim1
],comm
->zone_d1
[1].mch1
);
7562 /* For the multi-body distance we need the maximum */
7563 c
->bcr1
= max(comm
->cell_x1
[dim1
],comm
->zone_d1
[1].p1_1
);
7570 /* Determine which cg's we need to send in this pulse from this zone */
7572 get_zone_pulse_cgs(gmx_domdec_t
*dd
,
7573 int zonei
, int zone
,
7575 const int *index_gl
,
7577 int dim
, int dim_ind
,
7578 int dim0
, int dim1
, int dim2
,
7579 real r_comm2
, real r_bcomm2
,
7583 real skew_fac2_d
, real skew_fac_01
,
7584 rvec
*v_d
, rvec
*v_0
, rvec
*v_1
,
7585 const dd_corners_t
*c
,
7587 gmx_bool bDistBonded
,
7593 gmx_domdec_ind_t
*ind
,
7594 int **ibuf
, int *ibuf_nalloc
,
7600 gmx_domdec_comm_t
*comm
;
7602 gmx_bool bDistMB_pulse
;
7604 real r2
,rb2
,r
,tric_sh
;
7607 int nsend_z
,nsend
,nat
;
7611 bScrew
= (dd
->bScrewPBC
&& dim
== XX
);
7613 bDistMB_pulse
= (bDistMB
&& bDistBonded
);
7619 for(cg
=cg0
; cg
<cg1
; cg
++)
7623 if (tric_dist
[dim_ind
] == 0)
7625 /* Rectangular direction, easy */
7626 r
= cg_cm
[cg
][dim
] - c
->c
[dim_ind
][zone
];
7633 r
= cg_cm
[cg
][dim
] - c
->bc
[dim_ind
];
7639 /* Rounding gives at most a 16% reduction
7640 * in communicated atoms
7642 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7644 r
= cg_cm
[cg
][dim0
] - c
->cr0
;
7645 /* This is the first dimension, so always r >= 0 */
7652 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7654 r
= cg_cm
[cg
][dim1
] - c
->cr1
[zone
];
7661 r
= cg_cm
[cg
][dim1
] - c
->bcr1
;
7671 /* Triclinic direction, more complicated */
7674 /* Rounding, conservative as the skew_fac multiplication
7675 * will slightly underestimate the distance.
7677 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7679 rn
[dim0
] = cg_cm
[cg
][dim0
] - c
->cr0
;
7680 for(i
=dim0
+1; i
<DIM
; i
++)
7682 rn
[dim0
] -= cg_cm
[cg
][i
]*v_0
[i
][dim0
];
7684 r2
= rn
[dim0
]*rn
[dim0
]*sf2_round
[dim0
];
7687 rb
[dim0
] = rn
[dim0
];
7690 /* Take care that the cell planes along dim0 might not
7691 * be orthogonal to those along dim1 and dim2.
7693 for(i
=1; i
<=dim_ind
; i
++)
7696 if (normal
[dim0
][dimd
] > 0)
7698 rn
[dimd
] -= rn
[dim0
]*normal
[dim0
][dimd
];
7701 rb
[dimd
] -= rb
[dim0
]*normal
[dim0
][dimd
];
7706 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7708 rn
[dim1
] += cg_cm
[cg
][dim1
] - c
->cr1
[zone
];
7710 for(i
=dim1
+1; i
<DIM
; i
++)
7712 tric_sh
-= cg_cm
[cg
][i
]*v_1
[i
][dim1
];
7714 rn
[dim1
] += tric_sh
;
7717 r2
+= rn
[dim1
]*rn
[dim1
]*sf2_round
[dim1
];
7718 /* Take care of coupling of the distances
7719 * to the planes along dim0 and dim1 through dim2.
7721 r2
-= rn
[dim0
]*rn
[dim1
]*skew_fac_01
;
7722 /* Take care that the cell planes along dim1
7723 * might not be orthogonal to that along dim2.
7725 if (normal
[dim1
][dim2
] > 0)
7727 rn
[dim2
] -= rn
[dim1
]*normal
[dim1
][dim2
];
7733 cg_cm
[cg
][dim1
] - c
->bcr1
+ tric_sh
;
7736 rb2
+= rb
[dim1
]*rb
[dim1
]*sf2_round
[dim1
];
7737 /* Take care of coupling of the distances
7738 * to the planes along dim0 and dim1 through dim2.
7740 rb2
-= rb
[dim0
]*rb
[dim1
]*skew_fac_01
;
7741 /* Take care that the cell planes along dim1
7742 * might not be orthogonal to that along dim2.
7744 if (normal
[dim1
][dim2
] > 0)
7746 rb
[dim2
] -= rb
[dim1
]*normal
[dim1
][dim2
];
7751 /* The distance along the communication direction */
7752 rn
[dim
] += cg_cm
[cg
][dim
] - c
->c
[dim_ind
][zone
];
7754 for(i
=dim
+1; i
<DIM
; i
++)
7756 tric_sh
-= cg_cm
[cg
][i
]*v_d
[i
][dim
];
7761 r2
+= rn
[dim
]*rn
[dim
]*skew_fac2_d
;
7762 /* Take care of coupling of the distances
7763 * to the planes along dim0 and dim1 through dim2.
7765 if (dim_ind
== 1 && zonei
== 1)
7767 r2
-= rn
[dim0
]*rn
[dim
]*skew_fac_01
;
7773 rb
[dim
] += cg_cm
[cg
][dim
] - c
->bc
[dim_ind
] + tric_sh
;
7776 rb2
+= rb
[dim
]*rb
[dim
]*skew_fac2_d
;
7777 /* Take care of coupling of the distances
7778 * to the planes along dim0 and dim1 through dim2.
7780 if (dim_ind
== 1 && zonei
== 1)
7782 rb2
-= rb
[dim0
]*rb
[dim
]*skew_fac_01
;
7790 ((bDistMB
&& rb2
< r_bcomm2
) ||
7791 (bDist2B
&& r2
< r_bcomm2
)) &&
7793 (GET_CGINFO_BOND_INTER(cginfo
[cg
]) &&
7794 missing_link(comm
->cglink
,index_gl
[cg
],
7797 /* Make an index to the local charge groups */
7798 if (nsend
+1 > ind
->nalloc
)
7800 ind
->nalloc
= over_alloc_large(nsend
+1);
7801 srenew(ind
->index
,ind
->nalloc
);
7803 if (nsend
+1 > *ibuf_nalloc
)
7805 *ibuf_nalloc
= over_alloc_large(nsend
+1);
7806 srenew(*ibuf
,*ibuf_nalloc
);
7808 ind
->index
[nsend
] = cg
;
7809 (*ibuf
)[nsend
] = index_gl
[cg
];
7811 vec_rvec_check_alloc(vbuf
,nsend
+1);
7813 if (dd
->ci
[dim
] == 0)
7815 /* Correct cg_cm for pbc */
7816 rvec_add(cg_cm
[cg
],box
[dim
],vbuf
->v
[nsend
]);
7819 vbuf
->v
[nsend
][YY
] = box
[YY
][YY
] - vbuf
->v
[nsend
][YY
];
7820 vbuf
->v
[nsend
][ZZ
] = box
[ZZ
][ZZ
] - vbuf
->v
[nsend
][ZZ
];
7825 copy_rvec(cg_cm
[cg
],vbuf
->v
[nsend
]);
7828 nat
+= cgindex
[cg
+1] - cgindex
[cg
];
7834 *nsend_z_ptr
= nsend_z
;
7837 static void setup_dd_communication(gmx_domdec_t
*dd
,
7838 matrix box
,gmx_ddbox_t
*ddbox
,
7839 t_forcerec
*fr
,t_state
*state
,rvec
**f
)
7841 int dim_ind
,dim
,dim0
,dim1
,dim2
,dimd
,p
,nat_tot
;
7842 int nzone
,nzone_send
,zone
,zonei
,cg0
,cg1
;
7843 int c
,i
,j
,cg
,cg_gl
,nrcg
;
7844 int *zone_cg_range
,pos_cg
,*index_gl
,*cgindex
,*recv_i
;
7845 gmx_domdec_comm_t
*comm
;
7846 gmx_domdec_zones_t
*zones
;
7847 gmx_domdec_comm_dim_t
*cd
;
7848 gmx_domdec_ind_t
*ind
;
7849 cginfo_mb_t
*cginfo_mb
;
7850 gmx_bool bBondComm
,bDist2B
,bDistMB
,bDistBonded
;
7851 real r_mb
,r_comm2
,r_scomm2
,r_bcomm2
,r_0
,r_1
,r2inc
,inv_ncg
;
7852 dd_corners_t corners
;
7854 rvec
*cg_cm
,*normal
,*v_d
,*v_0
=NULL
,*v_1
=NULL
,*recv_vr
;
7855 real skew_fac2_d
,skew_fac_01
;
7862 fprintf(debug
,"Setting up DD communication\n");
7867 switch (fr
->cutoff_scheme
)
7876 gmx_incons("unimplemented");
7880 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7882 dim
= dd
->dim
[dim_ind
];
7884 /* Check if we need to use triclinic distances */
7885 tric_dist
[dim_ind
] = 0;
7886 for(i
=0; i
<=dim_ind
; i
++)
7888 if (ddbox
->tric_dir
[dd
->dim
[i
]])
7890 tric_dist
[dim_ind
] = 1;
7895 bBondComm
= comm
->bBondComm
;
7897 /* Do we need to determine extra distances for multi-body bondeds? */
7898 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
7900 /* Do we need to determine extra distances for only two-body bondeds? */
7901 bDist2B
= (bBondComm
&& !bDistMB
);
7903 r_comm2
= sqr(comm
->cutoff
);
7904 r_bcomm2
= sqr(comm
->cutoff_mbody
);
7908 fprintf(debug
,"bBondComm %d, r_bc %f\n",bBondComm
,sqrt(r_bcomm2
));
7911 zones
= &comm
->zones
;
7914 dim1
= (dd
->ndim
>= 2 ? dd
->dim
[1] : -1);
7915 dim2
= (dd
->ndim
>= 3 ? dd
->dim
[2] : -1);
7917 set_dd_corners(dd
,dim0
,dim1
,dim2
,bDistMB
,&corners
);
7919 /* Triclinic stuff */
7920 normal
= ddbox
->normal
;
7924 v_0
= ddbox
->v
[dim0
];
7925 if (ddbox
->tric_dir
[dim0
] && ddbox
->tric_dir
[dim1
])
7927 /* Determine the coupling coefficient for the distances
7928 * to the cell planes along dim0 and dim1 through dim2.
7929 * This is required for correct rounding.
7932 ddbox
->v
[dim0
][dim1
+1][dim0
]*ddbox
->v
[dim1
][dim1
+1][dim1
];
7935 fprintf(debug
,"\nskew_fac_01 %f\n",skew_fac_01
);
7941 v_1
= ddbox
->v
[dim1
];
7944 zone_cg_range
= zones
->cg_range
;
7945 index_gl
= dd
->index_gl
;
7946 cgindex
= dd
->cgindex
;
7947 cginfo_mb
= fr
->cginfo_mb
;
7949 zone_cg_range
[0] = 0;
7950 zone_cg_range
[1] = dd
->ncg_home
;
7951 comm
->zone_ncg1
[0] = dd
->ncg_home
;
7952 pos_cg
= dd
->ncg_home
;
7954 nat_tot
= dd
->nat_home
;
7956 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7958 dim
= dd
->dim
[dim_ind
];
7959 cd
= &comm
->cd
[dim_ind
];
7961 if (dim
>= ddbox
->npbcdim
&& dd
->ci
[dim
] == 0)
7963 /* No pbc in this dimension, the first node should not comm. */
7971 v_d
= ddbox
->v
[dim
];
7972 skew_fac2_d
= sqr(ddbox
->skew_fac
[dim
]);
7974 cd
->bInPlace
= TRUE
;
7975 for(p
=0; p
<cd
->np
; p
++)
7977 /* Only atoms communicated in the first pulse are used
7978 * for multi-body bonded interactions or for bBondComm.
7980 bDistBonded
= ((bDistMB
|| bDist2B
) && p
== 0);
7985 for(zone
=0; zone
<nzone_send
; zone
++)
7987 if (tric_dist
[dim_ind
] && dim_ind
> 0)
7989 /* Determine slightly more optimized skew_fac's
7991 * This reduces the number of communicated atoms
7992 * by about 10% for 3D DD of rhombic dodecahedra.
7994 for(dimd
=0; dimd
<dim
; dimd
++)
7996 sf2_round
[dimd
] = 1;
7997 if (ddbox
->tric_dir
[dimd
])
7999 for(i
=dd
->dim
[dimd
]+1; i
<DIM
; i
++)
8001 /* If we are shifted in dimension i
8002 * and the cell plane is tilted forward
8003 * in dimension i, skip this coupling.
8005 if (!(zones
->shift
[nzone
+zone
][i
] &&
8006 ddbox
->v
[dimd
][i
][dimd
] >= 0))
8009 sqr(ddbox
->v
[dimd
][i
][dimd
]);
8012 sf2_round
[dimd
] = 1/sf2_round
[dimd
];
8017 zonei
= zone_perm
[dim_ind
][zone
];
8020 /* Here we permutate the zones to obtain a convenient order
8021 * for neighbor searching
8023 cg0
= zone_cg_range
[zonei
];
8024 cg1
= zone_cg_range
[zonei
+1];
8028 /* Look only at the cg's received in the previous grid pulse
8030 cg1
= zone_cg_range
[nzone
+zone
+1];
8031 cg0
= cg1
- cd
->ind
[p
-1].nrecv
[zone
];
8034 #pragma omp parallel for num_threads(comm->nth) schedule(static)
8035 for(th
=0; th
<comm
->nth
; th
++)
8037 gmx_domdec_ind_t
*ind_p
;
8038 int **ibuf_p
,*ibuf_nalloc_p
;
8040 int *nsend_p
,*nat_p
;
8046 /* Thread 0 writes in the comm buffers */
8048 ibuf_p
= &comm
->buf_int
;
8049 ibuf_nalloc_p
= &comm
->nalloc_int
;
8050 vbuf_p
= &comm
->vbuf
;
8053 nsend_zone_p
= &ind
->nsend
[zone
];
8057 /* Other threads write into temp buffers */
8058 ind_p
= &comm
->dth
[th
].ind
;
8059 ibuf_p
= &comm
->dth
[th
].ibuf
;
8060 ibuf_nalloc_p
= &comm
->dth
[th
].ibuf_nalloc
;
8061 vbuf_p
= &comm
->dth
[th
].vbuf
;
8062 nsend_p
= &comm
->dth
[th
].nsend
;
8063 nat_p
= &comm
->dth
[th
].nat
;
8064 nsend_zone_p
= &comm
->dth
[th
].nsend_zone
;
8066 comm
->dth
[th
].nsend
= 0;
8067 comm
->dth
[th
].nat
= 0;
8068 comm
->dth
[th
].nsend_zone
= 0;
8078 cg0_th
= cg0
+ ((cg1
- cg0
)* th
)/comm
->nth
;
8079 cg1_th
= cg0
+ ((cg1
- cg0
)*(th
+1))/comm
->nth
;
8082 /* Get the cg's for this pulse in this zone */
8083 get_zone_pulse_cgs(dd
,zonei
,zone
,cg0_th
,cg1_th
,
8085 dim
,dim_ind
,dim0
,dim1
,dim2
,
8088 normal
,skew_fac2_d
,skew_fac_01
,
8089 v_d
,v_0
,v_1
,&corners
,sf2_round
,
8090 bDistBonded
,bBondComm
,
8094 ibuf_p
,ibuf_nalloc_p
,
8100 /* Append data of threads>=1 to the communication buffers */
8101 for(th
=1; th
<comm
->nth
; th
++)
8103 dd_comm_setup_work_t
*dth
;
8106 dth
= &comm
->dth
[th
];
8108 ns1
= nsend
+ dth
->nsend_zone
;
8109 if (ns1
> ind
->nalloc
)
8111 ind
->nalloc
= over_alloc_dd(ns1
);
8112 srenew(ind
->index
,ind
->nalloc
);
8114 if (ns1
> comm
->nalloc_int
)
8116 comm
->nalloc_int
= over_alloc_dd(ns1
);
8117 srenew(comm
->buf_int
,comm
->nalloc_int
);
8119 if (ns1
> comm
->vbuf
.nalloc
)
8121 comm
->vbuf
.nalloc
= over_alloc_dd(ns1
);
8122 srenew(comm
->vbuf
.v
,comm
->vbuf
.nalloc
);
8125 for(i
=0; i
<dth
->nsend_zone
; i
++)
8127 ind
->index
[nsend
] = dth
->ind
.index
[i
];
8128 comm
->buf_int
[nsend
] = dth
->ibuf
[i
];
8129 copy_rvec(dth
->vbuf
.v
[i
],
8130 comm
->vbuf
.v
[nsend
]);
8134 ind
->nsend
[zone
] += dth
->nsend_zone
;
8137 /* Clear the counts in case we do not have pbc */
8138 for(zone
=nzone_send
; zone
<nzone
; zone
++)
8140 ind
->nsend
[zone
] = 0;
8142 ind
->nsend
[nzone
] = nsend
;
8143 ind
->nsend
[nzone
+1] = nat
;
8144 /* Communicate the number of cg's and atoms to receive */
8145 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
8146 ind
->nsend
, nzone
+2,
8147 ind
->nrecv
, nzone
+2);
8149 /* The rvec buffer is also required for atom buffers of size nsend
8150 * in dd_move_x and dd_move_f.
8152 vec_rvec_check_alloc(&comm
->vbuf
,ind
->nsend
[nzone
+1]);
8156 /* We can receive in place if only the last zone is not empty */
8157 for(zone
=0; zone
<nzone
-1; zone
++)
8159 if (ind
->nrecv
[zone
] > 0)
8161 cd
->bInPlace
= FALSE
;
8166 /* The int buffer is only required here for the cg indices */
8167 if (ind
->nrecv
[nzone
] > comm
->nalloc_int2
)
8169 comm
->nalloc_int2
= over_alloc_dd(ind
->nrecv
[nzone
]);
8170 srenew(comm
->buf_int2
,comm
->nalloc_int2
);
8172 /* The rvec buffer is also required for atom buffers
8173 * of size nrecv in dd_move_x and dd_move_f.
8175 i
= max(cd
->ind
[0].nrecv
[nzone
+1],ind
->nrecv
[nzone
+1]);
8176 vec_rvec_check_alloc(&comm
->vbuf2
,i
);
8180 /* Make space for the global cg indices */
8181 if (pos_cg
+ ind
->nrecv
[nzone
] > dd
->cg_nalloc
8182 || dd
->cg_nalloc
== 0)
8184 dd
->cg_nalloc
= over_alloc_dd(pos_cg
+ ind
->nrecv
[nzone
]);
8185 srenew(index_gl
,dd
->cg_nalloc
);
8186 srenew(cgindex
,dd
->cg_nalloc
+1);
8188 /* Communicate the global cg indices */
8191 recv_i
= index_gl
+ pos_cg
;
8195 recv_i
= comm
->buf_int2
;
8197 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
8198 comm
->buf_int
, nsend
,
8199 recv_i
, ind
->nrecv
[nzone
]);
8201 /* Make space for cg_cm */
8202 dd_check_alloc_ncg(fr
,state
,f
,pos_cg
+ ind
->nrecv
[nzone
]);
8203 if (fr
->cutoff_scheme
== ecutsGROUP
)
8211 /* Communicate cg_cm */
8214 recv_vr
= cg_cm
+ pos_cg
;
8218 recv_vr
= comm
->vbuf2
.v
;
8220 dd_sendrecv_rvec(dd
, dim_ind
, dddirBackward
,
8221 comm
->vbuf
.v
, nsend
,
8222 recv_vr
, ind
->nrecv
[nzone
]);
8224 /* Make the charge group index */
8227 zone
= (p
== 0 ? 0 : nzone
- 1);
8228 while (zone
< nzone
)
8230 for(cg
=0; cg
<ind
->nrecv
[zone
]; cg
++)
8232 cg_gl
= index_gl
[pos_cg
];
8233 fr
->cginfo
[pos_cg
] = ddcginfo(cginfo_mb
,cg_gl
);
8234 nrcg
= GET_CGINFO_NATOMS(fr
->cginfo
[pos_cg
]);
8235 cgindex
[pos_cg
+1] = cgindex
[pos_cg
] + nrcg
;
8238 /* Update the charge group presence,
8239 * so we can use it in the next pass of the loop.
8241 comm
->bLocalCG
[cg_gl
] = TRUE
;
8247 comm
->zone_ncg1
[nzone
+zone
] = ind
->nrecv
[zone
];
8250 zone_cg_range
[nzone
+zone
] = pos_cg
;
8255 /* This part of the code is never executed with bBondComm. */
8256 merge_cg_buffers(nzone
,cd
,p
,zone_cg_range
,
8257 index_gl
,recv_i
,cg_cm
,recv_vr
,
8258 cgindex
,fr
->cginfo_mb
,fr
->cginfo
);
8259 pos_cg
+= ind
->nrecv
[nzone
];
8261 nat_tot
+= ind
->nrecv
[nzone
+1];
8265 /* Store the atom block for easy copying of communication buffers */
8266 make_cell2at_index(cd
,nzone
,zone_cg_range
[nzone
],cgindex
);
8270 dd
->index_gl
= index_gl
;
8271 dd
->cgindex
= cgindex
;
8273 dd
->ncg_tot
= zone_cg_range
[zones
->n
];
8274 dd
->nat_tot
= nat_tot
;
8275 comm
->nat
[ddnatHOME
] = dd
->nat_home
;
8276 for(i
=ddnatZONE
; i
<ddnatNR
; i
++)
8278 comm
->nat
[i
] = dd
->nat_tot
;
8283 /* We don't need to update cginfo, since that was alrady done above.
8284 * So we pass NULL for the forcerec.
8286 dd_set_cginfo(dd
->index_gl
,dd
->ncg_home
,dd
->ncg_tot
,
8287 NULL
,comm
->bLocalCG
);
8292 fprintf(debug
,"Finished setting up DD communication, zones:");
8293 for(c
=0; c
<zones
->n
; c
++)
8295 fprintf(debug
," %d",zones
->cg_range
[c
+1]-zones
->cg_range
[c
]);
8297 fprintf(debug
,"\n");
8301 static void set_cg_boundaries(gmx_domdec_zones_t
*zones
)
8305 for(c
=0; c
<zones
->nizone
; c
++)
8307 zones
->izone
[c
].cg1
= zones
->cg_range
[c
+1];
8308 zones
->izone
[c
].jcg0
= zones
->cg_range
[zones
->izone
[c
].j0
];
8309 zones
->izone
[c
].jcg1
= zones
->cg_range
[zones
->izone
[c
].j1
];
8313 static void set_zones_size(gmx_domdec_t
*dd
,
8314 matrix box
,const gmx_ddbox_t
*ddbox
,
8315 int zone_start
,int zone_end
)
8317 gmx_domdec_comm_t
*comm
;
8318 gmx_domdec_zones_t
*zones
;
8320 int z
,zi
,zj0
,zj1
,d
,dim
;
8323 real size_j
,add_tric
;
8328 zones
= &comm
->zones
;
8330 /* Do we need to determine extra distances for multi-body bondeds? */
8331 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
8333 for(z
=zone_start
; z
<zone_end
; z
++)
8335 /* Copy cell limits to zone limits.
8336 * Valid for non-DD dims and non-shifted dims.
8338 copy_rvec(comm
->cell_x0
,zones
->size
[z
].x0
);
8339 copy_rvec(comm
->cell_x1
,zones
->size
[z
].x1
);
8342 for(d
=0; d
<dd
->ndim
; d
++)
8346 for(z
=0; z
<zones
->n
; z
++)
8348 /* With a staggered grid we have different sizes
8349 * for non-shifted dimensions.
8351 if (dd
->bGridJump
&& zones
->shift
[z
][dim
] == 0)
8355 zones
->size
[z
].x0
[dim
] = comm
->zone_d1
[zones
->shift
[z
][dd
->dim
[d
-1]]].min0
;
8356 zones
->size
[z
].x1
[dim
] = comm
->zone_d1
[zones
->shift
[z
][dd
->dim
[d
-1]]].max1
;
8360 zones
->size
[z
].x0
[dim
] = comm
->zone_d2
[zones
->shift
[z
][dd
->dim
[d
-2]]][zones
->shift
[z
][dd
->dim
[d
-1]]].min0
;
8361 zones
->size
[z
].x1
[dim
] = comm
->zone_d2
[zones
->shift
[z
][dd
->dim
[d
-2]]][zones
->shift
[z
][dd
->dim
[d
-1]]].max1
;
8367 rcmbs
= comm
->cutoff_mbody
;
8368 if (ddbox
->tric_dir
[dim
])
8370 rcs
/= ddbox
->skew_fac
[dim
];
8371 rcmbs
/= ddbox
->skew_fac
[dim
];
8374 /* Set the lower limit for the shifted zone dimensions */
8375 for(z
=zone_start
; z
<zone_end
; z
++)
8377 if (zones
->shift
[z
][dim
] > 0)
8380 if (!dd
->bGridJump
|| d
== 0)
8382 zones
->size
[z
].x0
[dim
] = comm
->cell_x1
[dim
];
8383 zones
->size
[z
].x1
[dim
] = comm
->cell_x1
[dim
] + rcs
;
8387 /* Here we take the lower limit of the zone from
8388 * the lowest domain of the zone below.
8392 zones
->size
[z
].x0
[dim
] =
8393 comm
->zone_d1
[zones
->shift
[z
][dd
->dim
[d
-1]]].min1
;
8399 zones
->size
[z
].x0
[dim
] =
8400 zones
->size
[zone_perm
[2][z
-4]].x0
[dim
];
8404 zones
->size
[z
].x0
[dim
] =
8405 comm
->zone_d2
[zones
->shift
[z
][dd
->dim
[d
-2]]][zones
->shift
[z
][dd
->dim
[d
-1]]].min1
;
8408 /* A temporary limit, is updated below */
8409 zones
->size
[z
].x1
[dim
] = zones
->size
[z
].x0
[dim
];
8413 for(zi
=0; zi
<zones
->nizone
; zi
++)
8415 if (zones
->shift
[zi
][dim
] == 0)
8417 /* This takes the whole zone into account.
8418 * With multiple pulses this will lead
8419 * to a larger zone then strictly necessary.
8421 zones
->size
[z
].x1
[dim
] = max(zones
->size
[z
].x1
[dim
],
8422 zones
->size
[zi
].x1
[dim
]+rcmbs
);
8430 /* Loop over the i-zones to set the upper limit of each
8433 for(zi
=0; zi
<zones
->nizone
; zi
++)
8435 if (zones
->shift
[zi
][dim
] == 0)
8437 for(z
=zones
->izone
[zi
].j0
; z
<zones
->izone
[zi
].j1
; z
++)
8439 if (zones
->shift
[z
][dim
] > 0)
8441 zones
->size
[z
].x1
[dim
] = max(zones
->size
[z
].x1
[dim
],
8442 zones
->size
[zi
].x1
[dim
]+rcs
);
8449 for(z
=zone_start
; z
<zone_end
; z
++)
8451 /* Initialization only required to keep the compiler happy */
8452 rvec corner_min
={0,0,0},corner_max
={0,0,0},corner
;
8455 /* To determine the bounding box for a zone we need to find
8456 * the extreme corners of 4, 2 or 1 corners.
8458 nc
= 1 << (ddbox
->npbcdim
- 1);
8462 /* Set up a zone corner at x=0, ignoring trilinic couplings */
8466 corner
[YY
] = zones
->size
[z
].x0
[YY
];
8470 corner
[YY
] = zones
->size
[z
].x1
[YY
];
8474 corner
[ZZ
] = zones
->size
[z
].x0
[ZZ
];
8478 corner
[ZZ
] = zones
->size
[z
].x1
[ZZ
];
8480 if (dd
->ndim
== 1 && box
[ZZ
][YY
] != 0)
8482 /* With 1D domain decomposition the cg's are not in
8483 * the triclinic box, but triclinic x-y and rectangular y-z.
8484 * Shift y back, so it will later end up at 0.
8486 corner
[YY
] -= corner
[ZZ
]*box
[ZZ
][YY
]/box
[ZZ
][ZZ
];
8488 /* Apply the triclinic couplings */
8489 for(i
=YY
; i
<ddbox
->npbcdim
; i
++)
8493 corner
[j
] += corner
[i
]*box
[i
][j
]/box
[i
][i
];
8498 copy_rvec(corner
,corner_min
);
8499 copy_rvec(corner
,corner_max
);
8503 for(i
=0; i
<DIM
; i
++)
8505 corner_min
[i
] = min(corner_min
[i
],corner
[i
]);
8506 corner_max
[i
] = max(corner_max
[i
],corner
[i
]);
8510 /* Copy the extreme cornes without offset along x */
8511 for(i
=0; i
<DIM
; i
++)
8513 zones
->size
[z
].bb_x0
[i
] = corner_min
[i
];
8514 zones
->size
[z
].bb_x1
[i
] = corner_max
[i
];
8516 /* Add the offset along x */
8517 zones
->size
[z
].bb_x0
[XX
] += zones
->size
[z
].x0
[XX
];
8518 zones
->size
[z
].bb_x1
[XX
] += zones
->size
[z
].x1
[XX
];
8521 if (zone_start
== 0)
8524 for(dim
=0; dim
<DIM
; dim
++)
8526 vol
*= zones
->size
[0].x1
[dim
] - zones
->size
[0].x0
[dim
];
8528 zones
->dens_zone0
= (zones
->cg_range
[1] - zones
->cg_range
[0])/vol
;
8533 for(z
=zone_start
; z
<zone_end
; z
++)
8535 fprintf(debug
,"zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8537 zones
->size
[z
].x0
[XX
],zones
->size
[z
].x1
[XX
],
8538 zones
->size
[z
].x0
[YY
],zones
->size
[z
].x1
[YY
],
8539 zones
->size
[z
].x0
[ZZ
],zones
->size
[z
].x1
[ZZ
]);
8540 fprintf(debug
,"zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
8542 zones
->size
[z
].bb_x0
[XX
],zones
->size
[z
].bb_x1
[XX
],
8543 zones
->size
[z
].bb_x0
[YY
],zones
->size
[z
].bb_x1
[YY
],
8544 zones
->size
[z
].bb_x0
[ZZ
],zones
->size
[z
].bb_x1
[ZZ
]);
8549 static int comp_cgsort(const void *a
,const void *b
)
8553 gmx_cgsort_t
*cga
,*cgb
;
8554 cga
= (gmx_cgsort_t
*)a
;
8555 cgb
= (gmx_cgsort_t
*)b
;
8557 comp
= cga
->nsc
- cgb
->nsc
;
8560 comp
= cga
->ind_gl
- cgb
->ind_gl
;
8566 static void order_int_cg(int n
,const gmx_cgsort_t
*sort
,
8571 /* Order the data */
8574 buf
[i
] = a
[sort
[i
].ind
];
8577 /* Copy back to the original array */
8584 static void order_vec_cg(int n
,const gmx_cgsort_t
*sort
,
8589 /* Order the data */
8592 copy_rvec(v
[sort
[i
].ind
],buf
[i
]);
8595 /* Copy back to the original array */
8598 copy_rvec(buf
[i
],v
[i
]);
8602 static void order_vec_atom(int ncg
,const int *cgindex
,const gmx_cgsort_t
*sort
,
8605 int a
,atot
,cg
,cg0
,cg1
,i
;
8607 if (cgindex
== NULL
)
8609 /* Avoid the useless loop of the atoms within a cg */
8610 order_vec_cg(ncg
,sort
,v
,buf
);
8615 /* Order the data */
8617 for(cg
=0; cg
<ncg
; cg
++)
8619 cg0
= cgindex
[sort
[cg
].ind
];
8620 cg1
= cgindex
[sort
[cg
].ind
+1];
8621 for(i
=cg0
; i
<cg1
; i
++)
8623 copy_rvec(v
[i
],buf
[a
]);
8629 /* Copy back to the original array */
8630 for(a
=0; a
<atot
; a
++)
8632 copy_rvec(buf
[a
],v
[a
]);
8636 static void ordered_sort(int nsort2
,gmx_cgsort_t
*sort2
,
8637 int nsort_new
,gmx_cgsort_t
*sort_new
,
8638 gmx_cgsort_t
*sort1
)
8642 /* The new indices are not very ordered, so we qsort them */
8643 qsort_threadsafe(sort_new
,nsort_new
,sizeof(sort_new
[0]),comp_cgsort
);
8645 /* sort2 is already ordered, so now we can merge the two arrays */
8649 while(i2
< nsort2
|| i_new
< nsort_new
)
8653 sort1
[i1
++] = sort_new
[i_new
++];
8655 else if (i_new
== nsort_new
)
8657 sort1
[i1
++] = sort2
[i2
++];
8659 else if (sort2
[i2
].nsc
< sort_new
[i_new
].nsc
||
8660 (sort2
[i2
].nsc
== sort_new
[i_new
].nsc
&&
8661 sort2
[i2
].ind_gl
< sort_new
[i_new
].ind_gl
))
8663 sort1
[i1
++] = sort2
[i2
++];
8667 sort1
[i1
++] = sort_new
[i_new
++];
8672 static int dd_sort_order(gmx_domdec_t
*dd
,t_forcerec
*fr
,int ncg_home_old
)
8674 gmx_domdec_sort_t
*sort
;
8675 gmx_cgsort_t
*cgsort
,*sort_i
;
8676 int ncg_new
,nsort2
,nsort_new
,i
,*a
,moved
,*ibuf
;
8677 int sort_last
,sort_skip
;
8679 sort
= dd
->comm
->sort
;
8681 a
= fr
->ns
.grid
->cell_index
;
8683 moved
= NSGRID_SIGNAL_MOVED_FAC
*fr
->ns
.grid
->ncells
;
8685 if (ncg_home_old
>= 0)
8687 /* The charge groups that remained in the same ns grid cell
8688 * are completely ordered. So we can sort efficiently by sorting
8689 * the charge groups that did move into the stationary list.
8694 for(i
=0; i
<dd
->ncg_home
; i
++)
8696 /* Check if this cg did not move to another node */
8699 if (i
>= ncg_home_old
|| a
[i
] != sort
->sort
[i
].nsc
)
8701 /* This cg is new on this node or moved ns grid cell */
8702 if (nsort_new
>= sort
->sort_new_nalloc
)
8704 sort
->sort_new_nalloc
= over_alloc_dd(nsort_new
+1);
8705 srenew(sort
->sort_new
,sort
->sort_new_nalloc
);
8707 sort_i
= &(sort
->sort_new
[nsort_new
++]);
8711 /* This cg did not move */
8712 sort_i
= &(sort
->sort2
[nsort2
++]);
8714 /* Sort on the ns grid cell indices
8715 * and the global topology index.
8716 * index_gl is irrelevant with cell ns,
8717 * but we set it here anyhow to avoid a conditional.
8720 sort_i
->ind_gl
= dd
->index_gl
[i
];
8727 fprintf(debug
,"ordered sort cgs: stationary %d moved %d\n",
8730 /* Sort efficiently */
8731 ordered_sort(nsort2
,sort
->sort2
,nsort_new
,sort
->sort_new
,
8736 cgsort
= sort
->sort
;
8738 for(i
=0; i
<dd
->ncg_home
; i
++)
8740 /* Sort on the ns grid cell indices
8741 * and the global topology index
8743 cgsort
[i
].nsc
= a
[i
];
8744 cgsort
[i
].ind_gl
= dd
->index_gl
[i
];
8746 if (cgsort
[i
].nsc
< moved
)
8753 fprintf(debug
,"qsort cgs: %d new home %d\n",dd
->ncg_home
,ncg_new
);
8755 /* Determine the order of the charge groups using qsort */
8756 qsort_threadsafe(cgsort
,dd
->ncg_home
,sizeof(cgsort
[0]),comp_cgsort
);
8762 static int dd_sort_order_nbnxn(gmx_domdec_t
*dd
,t_forcerec
*fr
)
8765 int ncg_new
,i
,*a
,na
;
8767 sort
= dd
->comm
->sort
->sort
;
8769 nbnxn_get_atomorder(fr
->nbv
->nbs
,&a
,&na
);
8776 sort
[ncg_new
].ind
= a
[i
];
8784 static void dd_sort_state(gmx_domdec_t
*dd
,int ePBC
,
8785 rvec
*cgcm
,t_forcerec
*fr
,t_state
*state
,
8788 gmx_domdec_sort_t
*sort
;
8789 gmx_cgsort_t
*cgsort
,*sort_i
;
8791 int ncg_new
,i
,*ibuf
,cgsize
;
8794 sort
= dd
->comm
->sort
;
8796 if (dd
->ncg_home
> sort
->sort_nalloc
)
8798 sort
->sort_nalloc
= over_alloc_dd(dd
->ncg_home
);
8799 srenew(sort
->sort
,sort
->sort_nalloc
);
8800 srenew(sort
->sort2
,sort
->sort_nalloc
);
8802 cgsort
= sort
->sort
;
8804 switch (fr
->cutoff_scheme
)
8807 ncg_new
= dd_sort_order(dd
,fr
,ncg_home_old
);
8810 ncg_new
= dd_sort_order_nbnxn(dd
,fr
);
8813 gmx_incons("unimplemented");
8817 /* We alloc with the old size, since cgindex is still old */
8818 vec_rvec_check_alloc(&dd
->comm
->vbuf
,dd
->cgindex
[dd
->ncg_home
]);
8819 vbuf
= dd
->comm
->vbuf
.v
;
8823 cgindex
= dd
->cgindex
;
8830 /* Remove the charge groups which are no longer at home here */
8831 dd
->ncg_home
= ncg_new
;
8834 fprintf(debug
,"Set the new home charge group count to %d\n",
8838 /* Reorder the state */
8839 for(i
=0; i
<estNR
; i
++)
8841 if (EST_DISTR(i
) && (state
->flags
& (1<<i
)))
8846 order_vec_atom(dd
->ncg_home
,cgindex
,cgsort
,state
->x
,vbuf
);
8849 order_vec_atom(dd
->ncg_home
,cgindex
,cgsort
,state
->v
,vbuf
);
8852 order_vec_atom(dd
->ncg_home
,cgindex
,cgsort
,state
->sd_X
,vbuf
);
8855 order_vec_atom(dd
->ncg_home
,cgindex
,cgsort
,state
->cg_p
,vbuf
);
8859 case estDISRE_INITF
:
8860 case estDISRE_RM3TAV
:
8861 case estORIRE_INITF
:
8863 /* No ordering required */
8866 gmx_incons("Unknown state entry encountered in dd_sort_state");
8871 if (fr
->cutoff_scheme
== ecutsGROUP
)
8874 order_vec_cg(dd
->ncg_home
,cgsort
,cgcm
,vbuf
);
8877 if (dd
->ncg_home
+1 > sort
->ibuf_nalloc
)
8879 sort
->ibuf_nalloc
= over_alloc_dd(dd
->ncg_home
+1);
8880 srenew(sort
->ibuf
,sort
->ibuf_nalloc
);
8883 /* Reorder the global cg index */
8884 order_int_cg(dd
->ncg_home
,cgsort
,dd
->index_gl
,ibuf
);
8885 /* Reorder the cginfo */
8886 order_int_cg(dd
->ncg_home
,cgsort
,fr
->cginfo
,ibuf
);
8887 /* Rebuild the local cg index */
8891 for(i
=0; i
<dd
->ncg_home
; i
++)
8893 cgsize
= dd
->cgindex
[cgsort
[i
].ind
+1] - dd
->cgindex
[cgsort
[i
].ind
];
8894 ibuf
[i
+1] = ibuf
[i
] + cgsize
;
8896 for(i
=0; i
<dd
->ncg_home
+1; i
++)
8898 dd
->cgindex
[i
] = ibuf
[i
];
8903 for(i
=0; i
<dd
->ncg_home
+1; i
++)
8908 /* Set the home atom number */
8909 dd
->nat_home
= dd
->cgindex
[dd
->ncg_home
];
8911 if (fr
->cutoff_scheme
== ecutsVERLET
)
8913 /* The atoms are now exactly in grid order, update the grid order */
8914 nbnxn_set_atomorder(fr
->nbv
->nbs
);
8918 /* Copy the sorted ns cell indices back to the ns grid struct */
8919 for(i
=0; i
<dd
->ncg_home
; i
++)
8921 fr
->ns
.grid
->cell_index
[i
] = cgsort
[i
].nsc
;
8923 fr
->ns
.grid
->nr
= dd
->ncg_home
;
8927 static void add_dd_statistics(gmx_domdec_t
*dd
)
8929 gmx_domdec_comm_t
*comm
;
8934 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8936 comm
->sum_nat
[ddnat
-ddnatZONE
] +=
8937 comm
->nat
[ddnat
] - comm
->nat
[ddnat
-1];
8942 void reset_dd_statistics_counters(gmx_domdec_t
*dd
)
8944 gmx_domdec_comm_t
*comm
;
8949 /* Reset all the statistics and counters for total run counting */
8950 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8952 comm
->sum_nat
[ddnat
-ddnatZONE
] = 0;
8956 comm
->load_step
= 0;
8959 clear_ivec(comm
->load_lim
);
8964 void print_dd_statistics(t_commrec
*cr
,t_inputrec
*ir
,FILE *fplog
)
8966 gmx_domdec_comm_t
*comm
;
8970 comm
= cr
->dd
->comm
;
8972 gmx_sumd(ddnatNR
-ddnatZONE
,comm
->sum_nat
,cr
);
8979 fprintf(fplog
,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8981 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8983 av
= comm
->sum_nat
[ddnat
-ddnatZONE
]/comm
->ndecomp
;
8988 " av. #atoms communicated per step for force: %d x %.1f\n",
8992 if (cr
->dd
->vsite_comm
)
8995 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8996 (EEL_PME(ir
->coulombtype
) || ir
->coulombtype
==eelEWALD
) ? 3 : 2,
9001 if (cr
->dd
->constraint_comm
)
9004 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
9005 1 + ir
->nLincsIter
,av
);
9009 gmx_incons(" Unknown type for DD statistics");
9012 fprintf(fplog
,"\n");
9014 if (comm
->bRecordLoad
&& EI_DYNAMICS(ir
->eI
))
9016 print_dd_load_av(fplog
,cr
->dd
);
9020 void dd_partition_system(FILE *fplog
,
9021 gmx_large_int_t step
,
9023 gmx_bool bMasterState
,
9025 t_state
*state_global
,
9026 gmx_mtop_t
*top_global
,
9028 t_state
*state_local
,
9031 gmx_localtop_t
*top_local
,
9034 gmx_shellfc_t shellfc
,
9035 gmx_constr_t constr
,
9037 gmx_wallcycle_t wcycle
,
9041 gmx_domdec_comm_t
*comm
;
9042 gmx_ddbox_t ddbox
={0};
9044 gmx_large_int_t step_pcoupl
;
9045 rvec cell_ns_x0
,cell_ns_x1
;
9046 int i
,j
,n
,cg0
=0,ncg_home_old
=-1,ncg_moved
,nat_f_novirsum
;
9047 gmx_bool bBoxChanged
,bNStGlobalComm
,bDoDLB
,bCheckDLB
,bTurnOnDLB
,bLogLoad
;
9048 gmx_bool bRedist
,bSortCG
,bResortAll
;
9049 ivec ncells_old
={0,0,0},ncells_new
={0,0,0},np
;
9056 bBoxChanged
= (bMasterState
|| DEFORM(*ir
));
9057 if (ir
->epc
!= epcNO
)
9059 /* With nstpcouple > 1 pressure coupling happens.
9060 * one step after calculating the pressure.
9061 * Box scaling happens at the end of the MD step,
9062 * after the DD partitioning.
9063 * We therefore have to do DLB in the first partitioning
9064 * after an MD step where P-coupling occured.
9065 * We need to determine the last step in which p-coupling occurred.
9066 * MRS -- need to validate this for vv?
9071 step_pcoupl
= step
- 1;
9075 step_pcoupl
= ((step
- 1)/n
)*n
+ 1;
9077 if (step_pcoupl
>= comm
->partition_step
)
9083 bNStGlobalComm
= (step
% nstglobalcomm
== 0);
9085 if (!comm
->bDynLoadBal
)
9091 /* Should we do dynamic load balacing this step?
9092 * Since it requires (possibly expensive) global communication,
9093 * we might want to do DLB less frequently.
9095 if (bBoxChanged
|| ir
->epc
!= epcNO
)
9097 bDoDLB
= bBoxChanged
;
9101 bDoDLB
= bNStGlobalComm
;
9105 /* Check if we have recorded loads on the nodes */
9106 if (comm
->bRecordLoad
&& dd_load_count(comm
))
9108 if (comm
->eDLB
== edlbAUTO
&& !comm
->bDynLoadBal
)
9110 /* Check if we should use DLB at the second partitioning
9111 * and every 100 partitionings,
9112 * so the extra communication cost is negligible.
9114 n
= max(100,nstglobalcomm
);
9115 bCheckDLB
= (comm
->n_load_collect
== 0 ||
9116 comm
->n_load_have
% n
== n
-1);
9123 /* Print load every nstlog, first and last step to the log file */
9124 bLogLoad
= ((ir
->nstlog
> 0 && step
% ir
->nstlog
== 0) ||
9125 comm
->n_load_collect
== 0 ||
9127 (step
+ ir
->nstlist
> ir
->init_step
+ ir
->nsteps
)));
9129 /* Avoid extra communication due to verbose screen output
9130 * when nstglobalcomm is set.
9132 if (bDoDLB
|| bLogLoad
|| bCheckDLB
||
9133 (bVerbose
&& (ir
->nstlist
== 0 || nstglobalcomm
<= ir
->nstlist
)))
9135 get_load_distribution(dd
,wcycle
);
9140 dd_print_load(fplog
,dd
,step
-1);
9144 dd_print_load_verbose(dd
);
9147 comm
->n_load_collect
++;
9150 /* Since the timings are node dependent, the master decides */
9154 (dd_force_imb_perf_loss(dd
) >= DD_PERF_LOSS
);
9157 fprintf(debug
,"step %s, imb loss %f\n",
9158 gmx_step_str(step
,sbuf
),
9159 dd_force_imb_perf_loss(dd
));
9162 dd_bcast(dd
,sizeof(bTurnOnDLB
),&bTurnOnDLB
);
9165 turn_on_dlb(fplog
,cr
,step
);
9170 comm
->n_load_have
++;
9173 cgs_gl
= &comm
->cgs_gl
;
9178 /* Clear the old state */
9179 clear_dd_indices(dd
,0,0);
9181 set_ddbox(dd
,bMasterState
,cr
,ir
,state_global
->box
,
9182 TRUE
,cgs_gl
,state_global
->x
,&ddbox
);
9184 get_cg_distribution(fplog
,step
,dd
,cgs_gl
,
9185 state_global
->box
,&ddbox
,state_global
->x
);
9187 dd_distribute_state(dd
,cgs_gl
,
9188 state_global
,state_local
,f
);
9190 dd_make_local_cgs(dd
,&top_local
->cgs
);
9192 /* Ensure that we have space for the new distribution */
9193 dd_check_alloc_ncg(fr
,state_local
,f
,dd
->ncg_home
);
9195 if (fr
->cutoff_scheme
== ecutsGROUP
)
9197 calc_cgcm(fplog
,0,dd
->ncg_home
,
9198 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
9201 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
9203 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
9207 else if (state_local
->ddp_count
!= dd
->ddp_count
)
9209 if (state_local
->ddp_count
> dd
->ddp_count
)
9211 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local
->ddp_count
,dd
->ddp_count
);
9214 if (state_local
->ddp_count_cg_gl
!= state_local
->ddp_count
)
9216 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local
->ddp_count_cg_gl
,state_local
->ddp_count
);
9219 /* Clear the old state */
9220 clear_dd_indices(dd
,0,0);
9222 /* Build the new indices */
9223 rebuild_cgindex(dd
,cgs_gl
->index
,state_local
);
9224 make_dd_indices(dd
,cgs_gl
->index
,0);
9226 if (fr
->cutoff_scheme
== ecutsGROUP
)
9228 /* Redetermine the cg COMs */
9229 calc_cgcm(fplog
,0,dd
->ncg_home
,
9230 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
9233 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
9235 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
9237 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
9238 TRUE
,&top_local
->cgs
,state_local
->x
,&ddbox
);
9240 bRedist
= comm
->bDynLoadBal
;
9244 /* We have the full state, only redistribute the cgs */
9246 /* Clear the non-home indices */
9247 clear_dd_indices(dd
,dd
->ncg_home
,dd
->nat_home
);
9249 /* Avoid global communication for dim's without pbc and -gcom */
9250 if (!bNStGlobalComm
)
9252 copy_rvec(comm
->box0
,ddbox
.box0
);
9253 copy_rvec(comm
->box_size
,ddbox
.box_size
);
9255 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
9256 bNStGlobalComm
,&top_local
->cgs
,state_local
->x
,&ddbox
);
9261 /* For dim's without pbc and -gcom */
9262 copy_rvec(ddbox
.box0
,comm
->box0
);
9263 copy_rvec(ddbox
.box_size
,comm
->box_size
);
9265 set_dd_cell_sizes(dd
,&ddbox
,dynamic_dd_box(&ddbox
,ir
),bMasterState
,bDoDLB
,
9268 if (comm
->nstDDDumpGrid
> 0 && step
% comm
->nstDDDumpGrid
== 0)
9270 write_dd_grid_pdb("dd_grid",step
,dd
,state_local
->box
,&ddbox
);
9273 /* Check if we should sort the charge groups */
9274 if (comm
->nstSortCG
> 0)
9276 bSortCG
= (bMasterState
||
9277 (bRedist
&& (step
% comm
->nstSortCG
== 0)));
9284 ncg_home_old
= dd
->ncg_home
;
9289 wallcycle_sub_start(wcycle
,ewcsDD_REDIST
);
9291 dd_redistribute_cg(fplog
,step
,dd
,ddbox
.tric_dir
,
9292 state_local
,f
,fr
,mdatoms
,
9293 !bSortCG
,nrnb
,&cg0
,&ncg_moved
);
9295 wallcycle_sub_stop(wcycle
,ewcsDD_REDIST
);
9298 get_nsgrid_boundaries(ddbox
.nboundeddim
,state_local
->box
,
9300 &comm
->cell_x0
,&comm
->cell_x1
,
9301 dd
->ncg_home
,fr
->cg_cm
,
9302 cell_ns_x0
,cell_ns_x1
,&grid_density
);
9306 comm_dd_ns_cell_sizes(dd
,&ddbox
,cell_ns_x0
,cell_ns_x1
,step
);
9309 switch (fr
->cutoff_scheme
)
9312 copy_ivec(fr
->ns
.grid
->n
,ncells_old
);
9313 grid_first(fplog
,fr
->ns
.grid
,dd
,&ddbox
,fr
->ePBC
,
9314 state_local
->box
,cell_ns_x0
,cell_ns_x1
,
9315 fr
->rlistlong
,grid_density
);
9318 nbnxn_get_ncells(fr
->nbv
->nbs
,&ncells_old
[XX
],&ncells_old
[YY
]);
9321 gmx_incons("unimplemented");
9323 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
9324 copy_ivec(ddbox
.tric_dir
,comm
->tric_dir
);
9328 wallcycle_sub_start(wcycle
,ewcsDD_GRID
);
9330 /* Sort the state on charge group position.
9331 * This enables exact restarts from this step.
9332 * It also improves performance by about 15% with larger numbers
9333 * of atoms per node.
9336 /* Fill the ns grid with the home cell,
9337 * so we can sort with the indices.
9339 set_zones_ncg_home(dd
);
9341 switch (fr
->cutoff_scheme
)
9344 set_zones_size(dd
,state_local
->box
,&ddbox
,0,1);
9346 nbnxn_put_on_grid(fr
->nbv
->nbs
,fr
->ePBC
,state_local
->box
,
9348 comm
->zones
.size
[0].bb_x0
,
9349 comm
->zones
.size
[0].bb_x1
,
9351 comm
->zones
.dens_zone0
,
9354 ncg_moved
,comm
->moved
,
9355 fr
->nbv
->grp
[eintLocal
].kernel_type
,
9356 fr
->nbv
->grp
[eintLocal
].nbat
);
9358 nbnxn_get_ncells(fr
->nbv
->nbs
,&ncells_new
[XX
],&ncells_new
[YY
]);
9361 fill_grid(fplog
,&comm
->zones
,fr
->ns
.grid
,dd
->ncg_home
,
9362 0,dd
->ncg_home
,fr
->cg_cm
);
9364 copy_ivec(fr
->ns
.grid
->n
,ncells_new
);
9367 gmx_incons("unimplemented");
9370 bResortAll
= bMasterState
;
9372 /* Check if we can user the old order and ns grid cell indices
9373 * of the charge groups to sort the charge groups efficiently.
9375 if (ncells_new
[XX
] != ncells_old
[XX
] ||
9376 ncells_new
[YY
] != ncells_old
[YY
] ||
9377 ncells_new
[ZZ
] != ncells_old
[ZZ
])
9384 fprintf(debug
,"Step %s, sorting the %d home charge groups\n",
9385 gmx_step_str(step
,sbuf
),dd
->ncg_home
);
9387 dd_sort_state(dd
,ir
->ePBC
,fr
->cg_cm
,fr
,state_local
,
9388 bResortAll
? -1 : ncg_home_old
);
9389 /* Rebuild all the indices */
9391 ga2la_clear(dd
->ga2la
);
9393 wallcycle_sub_stop(wcycle
,ewcsDD_GRID
);
9396 wallcycle_sub_start(wcycle
,ewcsDD_SETUPCOMM
);
9398 /* Setup up the communication and communicate the coordinates */
9399 setup_dd_communication(dd
,state_local
->box
,&ddbox
,fr
,state_local
,f
);
9401 /* Set the indices */
9402 make_dd_indices(dd
,cgs_gl
->index
,cg0
);
9404 /* Set the charge group boundaries for neighbor searching */
9405 set_cg_boundaries(&comm
->zones
);
9407 if (fr
->cutoff_scheme
== ecutsVERLET
)
9409 set_zones_size(dd
,state_local
->box
,&ddbox
,
9410 bSortCG
? 1 : 0,comm
->zones
.n
);
9413 wallcycle_sub_stop(wcycle
,ewcsDD_SETUPCOMM
);
9416 write_dd_pdb("dd_home",step,"dump",top_global,cr,
9417 -1,state_local->x,state_local->box);
9420 wallcycle_sub_start(wcycle
,ewcsDD_MAKETOP
);
9422 /* Extract a local topology from the global topology */
9423 for(i
=0; i
<dd
->ndim
; i
++)
9425 np
[dd
->dim
[i
]] = comm
->cd
[i
].np
;
9427 dd_make_local_top(fplog
,dd
,&comm
->zones
,dd
->npbcdim
,state_local
->box
,
9428 comm
->cellsize_min
,np
,
9430 fr
->cutoff_scheme
==ecutsGROUP
? fr
->cg_cm
: state_local
->x
,
9431 vsite
,top_global
,top_local
);
9433 wallcycle_sub_stop(wcycle
,ewcsDD_MAKETOP
);
9435 wallcycle_sub_start(wcycle
,ewcsDD_MAKECONSTR
);
9437 /* Set up the special atom communication */
9438 n
= comm
->nat
[ddnatZONE
];
9439 for(i
=ddnatZONE
+1; i
<ddnatNR
; i
++)
9444 if (vsite
&& vsite
->n_intercg_vsite
)
9446 n
= dd_make_local_vsites(dd
,n
,top_local
->idef
.il
);
9450 if (dd
->bInterCGcons
|| dd
->bInterCGsettles
)
9452 /* Only for inter-cg constraints we need special code */
9453 n
= dd_make_local_constraints(dd
,n
,top_global
,fr
->cginfo
,
9454 constr
,ir
->nProjOrder
,
9455 top_local
->idef
.il
);
9459 gmx_incons("Unknown special atom type setup");
9464 wallcycle_sub_stop(wcycle
,ewcsDD_MAKECONSTR
);
9466 wallcycle_sub_start(wcycle
,ewcsDD_TOPOTHER
);
9468 /* Make space for the extra coordinates for virtual site
9469 * or constraint communication.
9471 state_local
->natoms
= comm
->nat
[ddnatNR
-1];
9472 if (state_local
->natoms
> state_local
->nalloc
)
9474 dd_realloc_state(state_local
,f
,state_local
->natoms
);
9477 if (fr
->bF_NoVirSum
)
9479 if (vsite
&& vsite
->n_intercg_vsite
)
9481 nat_f_novirsum
= comm
->nat
[ddnatVSITE
];
9485 if (EEL_FULL(ir
->coulombtype
) && dd
->n_intercg_excl
> 0)
9487 nat_f_novirsum
= dd
->nat_tot
;
9491 nat_f_novirsum
= dd
->nat_home
;
9500 /* Set the number of atoms required for the force calculation.
9501 * Forces need to be constrained when using a twin-range setup
9502 * or with energy minimization. For simple simulations we could
9503 * avoid some allocation, zeroing and copying, but this is
9504 * probably not worth the complications ande checking.
9506 forcerec_set_ranges(fr
,dd
->ncg_home
,dd
->ncg_tot
,
9507 dd
->nat_tot
,comm
->nat
[ddnatCON
],nat_f_novirsum
);
9509 /* We make the all mdatoms up to nat_tot_con.
9510 * We could save some work by only setting invmass
9511 * between nat_tot and nat_tot_con.
9513 /* This call also sets the new number of home particles to dd->nat_home */
9514 atoms2md(top_global
,ir
,
9515 comm
->nat
[ddnatCON
],dd
->gatindex
,0,dd
->nat_home
,mdatoms
);
9517 /* Now we have the charges we can sort the FE interactions */
9518 dd_sort_local_top(dd
,mdatoms
,top_local
);
9522 /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
9523 split_vsites_over_threads(top_local
->idef
.il
,mdatoms
,FALSE
,vsite
);
9528 /* Make the local shell stuff, currently no communication is done */
9529 make_local_shells(cr
,mdatoms
,shellfc
);
9532 if (ir
->implicit_solvent
)
9534 make_local_gb(cr
,fr
->born
,ir
->gb_algorithm
);
9537 init_bonded_thread_force_reduction(fr
,&top_local
->idef
);
9539 if (!(cr
->duty
& DUTY_PME
))
9541 /* Send the charges to our PME only node */
9542 gmx_pme_send_q(cr
,mdatoms
->nChargePerturbed
,
9543 mdatoms
->chargeA
,mdatoms
->chargeB
,
9544 dd_pme_maxshift_x(dd
),dd_pme_maxshift_y(dd
));
9549 set_constraints(constr
,top_local
,ir
,mdatoms
,cr
);
9552 if (ir
->ePull
!= epullNO
)
9554 /* Update the local pull groups */
9555 dd_make_local_pull_groups(dd
,ir
->pull
,mdatoms
);
9560 /* Update the local rotation groups */
9561 dd_make_local_rotation_groups(dd
,ir
->rot
);
9565 add_dd_statistics(dd
);
9567 /* Make sure we only count the cycles for this DD partitioning */
9568 clear_dd_cycle_counts(dd
);
9570 /* Because the order of the atoms might have changed since
9571 * the last vsite construction, we need to communicate the constructing
9572 * atom coordinates again (for spreading the forces this MD step).
9574 dd_move_x_vsites(dd
,state_local
->box
,state_local
->x
);
9576 wallcycle_sub_stop(wcycle
,ewcsDD_TOPOTHER
);
9578 if (comm
->nstDDDump
> 0 && step
% comm
->nstDDDump
== 0)
9580 dd_move_x(dd
,state_local
->box
,state_local
->x
);
9581 write_dd_pdb("dd_dump",step
,"dump",top_global
,cr
,
9582 -1,state_local
->x
,state_local
->box
);
9585 /* Store the partitioning step */
9586 comm
->partition_step
= step
;
9588 /* Increase the DD partitioning counter */
9590 /* The state currently matches this DD partitioning count, store it */
9591 state_local
->ddp_count
= dd
->ddp_count
;
9594 /* The DD master node knows the complete cg distribution,
9595 * store the count so we can possibly skip the cg info communication.
9597 comm
->master_cg_ddp_count
= (bSortCG
? 0 : dd
->ddp_count
);
9600 if (comm
->DD_debug
> 0)
9602 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
9603 check_index_consistency(dd
,top_global
->natoms
,ncg_mtop(top_global
),
9604 "after partitioning");