1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
32 #include "domdec_network.h"
43 #include "gmx_wallcycle.h"
47 #include "mtop_util.h"
49 #include "gmx_ga2la.h"
58 #define DDRANK(dd,rank) (rank)
59 #define DDMASTERRANK(dd) (dd->masterrank)
61 typedef struct gmx_domdec_master
63 /* The cell boundaries */
65 /* The global charge group division */
66 int *ncg
; /* Number of home charge groups for each node */
67 int *index
; /* Index of nnodes+1 into cg */
68 int *cg
; /* Global charge group index */
69 int *nat
; /* Number of home atoms for each node. */
70 int *ibuf
; /* Buffer for communication */
71 rvec
*vbuf
; /* Buffer for state scattering and gathering */
72 } gmx_domdec_master_t
;
76 /* The numbers of charge groups to send and receive for each cell
77 * that requires communication, the last entry contains the total
78 * number of atoms that needs to be communicated.
80 int nsend
[DD_MAXIZONE
+2];
81 int nrecv
[DD_MAXIZONE
+2];
82 /* The charge groups to send */
85 /* The atom range for non-in-place communication */
86 int cell2at0
[DD_MAXIZONE
];
87 int cell2at1
[DD_MAXIZONE
];
92 int np
; /* Number of grid pulses in this dimension */
93 int np_dlb
; /* For dlb, for use with edlbAUTO */
94 gmx_domdec_ind_t
*ind
; /* The indices to communicate, size np */
96 bool bInPlace
; /* Can we communicate in place? */
97 } gmx_domdec_comm_dim_t
;
101 bool *bCellMin
; /* Temp. var.: is this cell size at the limit */
102 real
*cell_f
; /* State var.: cell boundaries, box relative */
103 real
*old_cell_f
; /* Temp. var.: old cell size */
104 real
*cell_f_max0
; /* State var.: max lower boundary, incl neighbors */
105 real
*cell_f_min1
; /* State var.: min upper boundary, incl neighbors */
106 real
*bound_min
; /* Temp. var.: lower limit for cell boundary */
107 real
*bound_max
; /* Temp. var.: upper limit for cell boundary */
108 bool bLimited
; /* State var.: is DLB limited in this dim and row */
109 real
*buf_ncd
; /* Temp. var. */
112 #define DD_NLOAD_MAX 9
114 /* Here floats are accurate enough, since these variables
115 * only influence the load balancing, not the actual MD results.
139 gmx_cgsort_t
*sort1
,*sort2
;
141 gmx_cgsort_t
*sort_new
;
153 /* This enum determines the order of the coordinates.
154 * ddnatHOME and ddnatZONE should be first and second,
155 * the others can be ordered as wanted.
157 enum { ddnatHOME
, ddnatZONE
, ddnatVSITE
, ddnatCON
, ddnatNR
};
159 enum { edlbAUTO
, edlbNO
, edlbYES
, edlbNR
};
160 const char *edlb_names
[edlbNR
] = { "auto", "no", "yes" };
164 int dimind
; /* The dimension index */
165 int nslab
; /* The number of PME slabs in this dimension */
166 real
*slb_dim_f
; /* Cell sizes for determining the PME comm. with SLB */
167 int *pp_min
; /* The minimum pp node location, size nslab */
168 int *pp_max
; /* The maximum pp node location,size nslab */
169 int maxshift
; /* The maximum shift for coordinate redistribution in PME */
174 real min0
; /* The minimum bottom of this zone */
175 real max1
; /* The maximum top of this zone */
176 real mch0
; /* The maximum bottom communicaton height for this zone */
177 real mch1
; /* The maximum top communicaton height for this zone */
178 real p1_0
; /* The bottom value of the first cell in this zone */
179 real p1_1
; /* The top value of the first cell in this zone */
182 typedef struct gmx_domdec_comm
184 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
185 * unless stated otherwise.
188 /* The number of decomposition dimensions for PME, 0: no PME */
190 /* The number of nodes doing PME (PP/PME or only PME) */
193 /* The communication setup including the PME only nodes */
194 bool bCartesianPP_PME
;
197 int *pmenodes
; /* size npmenodes */
198 int *ddindex2simnodeid
; /* size npmenodes, only with bCartesianPP
199 * but with bCartesianPP_PME */
200 gmx_ddpme_t ddpme
[2];
202 /* The DD particle-particle nodes only */
204 int *ddindex2ddnodeid
; /* size npmenode, only with bCartesianPP_PME */
206 /* The global charge groups */
209 /* Should we sort the cgs */
211 gmx_domdec_sort_t
*sort
;
213 /* Are there bonded and multi-body interactions between charge groups? */
214 bool bInterCGBondeds
;
215 bool bInterCGMultiBody
;
217 /* Data for the optional bonded interaction atom communication range */
224 /* Are we actually using DLB? */
227 /* Cell sizes for static load balancing, first index cartesian */
230 /* The width of the communicated boundaries */
233 /* The minimum cell size (including triclinic correction) */
235 /* For dlb, for use with edlbAUTO */
236 rvec cellsize_min_dlb
;
237 /* The lower limit for the DD cell size with DLB */
239 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
242 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
244 /* box0 and box_size are required with dim's without pbc and -gcom */
248 /* The cell boundaries */
252 /* The old location of the cell boundaries, to check cg displacements */
256 /* The communication setup and charge group boundaries for the zones */
257 gmx_domdec_zones_t zones
;
259 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
260 * cell boundaries of neighboring cells for dynamic load balancing.
262 gmx_ddzone_t zone_d1
[2];
263 gmx_ddzone_t zone_d2
[2][2];
265 /* The coordinate/force communication setup and indices */
266 gmx_domdec_comm_dim_t cd
[DIM
];
267 /* The maximum number of cells to communicate with in one dimension */
270 /* Which cg distribution is stored on the master node */
271 int master_cg_ddp_count
;
273 /* The number of cg's received from the direct neighbors */
274 int zone_ncg1
[DD_MAXZONE
];
276 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
279 /* Communication buffer for general use */
283 /* Communication buffer for general use */
286 /* Communication buffers only used with multiple grid pulses */
291 /* Communication buffers for local redistribution */
293 int cggl_flag_nalloc
[DIM
*2];
295 int cgcm_state_nalloc
[DIM
*2];
297 /* Cell sizes for dynamic load balancing */
298 gmx_domdec_root_t
**root
;
302 real cell_f_max0
[DIM
];
303 real cell_f_min1
[DIM
];
305 /* Stuff for load communication */
307 gmx_domdec_load_t
*load
;
309 MPI_Comm
*mpi_comm_load
;
312 float cycl
[ddCyclNr
];
313 int cycl_n
[ddCyclNr
];
314 float cycl_max
[ddCyclNr
];
315 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
319 /* Have often have did we have load measurements */
321 /* Have often have we collected the load measurements */
325 double sum_nat
[ddnatNR
-ddnatZONE
];
335 /* The last partition step */
336 gmx_step_t partition_step
;
344 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
347 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
348 #define DD_FLAG_NRCG 65535
349 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
350 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
352 /* Zone permutation required to obtain consecutive charge groups
353 * for neighbor searching.
355 static const int zone_perm
[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
357 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
358 * components see only j zones with that component 0.
361 /* The DD zone order */
362 static const ivec dd_zo
[DD_MAXZONE
] =
363 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
368 static const ivec dd_zp3
[dd_zp3n
] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
373 static const ivec dd_zp2
[dd_zp2n
] = {{0,0,4},{1,3,4}};
378 static const ivec dd_zp1
[dd_zp1n
] = {{0,0,2}};
380 /* Factors used to avoid problems due to rounding issues */
381 #define DD_CELL_MARGIN 1.0001
382 #define DD_CELL_MARGIN2 1.00005
383 /* Factor to account for pressure scaling during nstlist steps */
384 #define DD_PRES_SCALE_MARGIN 1.02
386 /* Allowed performance loss before we DLB or warn */
387 #define DD_PERF_LOSS 0.05
389 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
391 /* Use separate MPI send and receive commands
392 * when nnodes <= GMX_DD_NNODES_SENDRECV.
393 * This saves memory (and some copying for small nnodes).
394 * For high parallelization scatter and gather calls are used.
396 #define GMX_DD_NNODES_SENDRECV 4
400 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
402 static void index2xyz(ivec nc,int ind,ivec xyz)
404 xyz[XX] = ind % nc[XX];
405 xyz[YY] = (ind / nc[XX]) % nc[YY];
406 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
410 /* This order is required to minimize the coordinate communication in PME
411 * which uses decomposition in the x direction.
413 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
415 static void ddindex2xyz(ivec nc
,int ind
,ivec xyz
)
417 xyz
[XX
] = ind
/ (nc
[YY
]*nc
[ZZ
]);
418 xyz
[YY
] = (ind
/ nc
[ZZ
]) % nc
[YY
];
419 xyz
[ZZ
] = ind
% nc
[ZZ
];
422 static int ddcoord2ddnodeid(gmx_domdec_t
*dd
,ivec c
)
427 ddindex
= dd_index(dd
->nc
,c
);
428 if (dd
->comm
->bCartesianPP_PME
)
430 ddnodeid
= dd
->comm
->ddindex2ddnodeid
[ddindex
];
432 else if (dd
->comm
->bCartesianPP
)
435 MPI_Cart_rank(dd
->mpi_comm_all
,c
,&ddnodeid
);
446 static bool dynamic_dd_box(gmx_ddbox_t
*ddbox
,t_inputrec
*ir
)
448 return (ddbox
->nboundeddim
< DIM
|| DYNAMIC_BOX(*ir
));
451 int ddglatnr(gmx_domdec_t
*dd
,int i
)
461 if (i
>= dd
->comm
->nat
[ddnatNR
-1])
463 gmx_fatal(FARGS
,"glatnr called with %d, which is larger than the local number of atoms (%d)",i
,dd
->comm
->nat
[ddnatNR
-1]);
465 atnr
= dd
->gatindex
[i
] + 1;
471 t_block
*dd_charge_groups_global(gmx_domdec_t
*dd
)
473 return &dd
->comm
->cgs_gl
;
476 static void check_vec_rvec_alloc(vec_rvec_t
*v
,int n
)
480 v
->nalloc
= over_alloc_dd(n
);
481 srenew(v
->v
,v
->nalloc
);
485 void dd_store_state(gmx_domdec_t
*dd
,t_state
*state
)
489 if (state
->ddp_count
!= dd
->ddp_count
)
491 gmx_incons("The state does not the domain decomposition state");
494 state
->ncg_gl
= dd
->ncg_home
;
495 if (state
->ncg_gl
> state
->cg_gl_nalloc
)
497 state
->cg_gl_nalloc
= over_alloc_dd(state
->ncg_gl
);
498 srenew(state
->cg_gl
,state
->cg_gl_nalloc
);
500 for(i
=0; i
<state
->ncg_gl
; i
++)
502 state
->cg_gl
[i
] = dd
->index_gl
[i
];
505 state
->ddp_count_cg_gl
= dd
->ddp_count
;
508 gmx_domdec_zones_t
*domdec_zones(gmx_domdec_t
*dd
)
510 return &dd
->comm
->zones
;
513 void dd_get_ns_ranges(gmx_domdec_t
*dd
,int icg
,
514 int *jcg0
,int *jcg1
,ivec shift0
,ivec shift1
)
516 gmx_domdec_zones_t
*zones
;
519 zones
= &dd
->comm
->zones
;
522 while (icg
>= zones
->izone
[izone
].cg1
)
531 else if (izone
< zones
->nizone
)
533 *jcg0
= zones
->izone
[izone
].jcg0
;
537 gmx_fatal(FARGS
,"DD icg %d out of range: izone (%d) >= nizone (%d)",
538 icg
,izone
,zones
->nizone
);
541 *jcg1
= zones
->izone
[izone
].jcg1
;
543 for(d
=0; d
<dd
->ndim
; d
++)
546 shift0
[dim
] = zones
->izone
[izone
].shift0
[dim
];
547 shift1
[dim
] = zones
->izone
[izone
].shift1
[dim
];
548 if (dd
->comm
->tric_dir
[dim
] || (dd
->bGridJump
&& d
> 0))
550 /* A conservative approach, this can be optimized */
557 int dd_natoms_vsite(gmx_domdec_t
*dd
)
559 return dd
->comm
->nat
[ddnatVSITE
];
562 void dd_get_constraint_range(gmx_domdec_t
*dd
,int *at_start
,int *at_end
)
564 *at_start
= dd
->comm
->nat
[ddnatCON
-1];
565 *at_end
= dd
->comm
->nat
[ddnatCON
];
568 void dd_move_x(gmx_domdec_t
*dd
,matrix box
,rvec x
[])
570 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
572 gmx_domdec_comm_t
*comm
;
573 gmx_domdec_comm_dim_t
*cd
;
574 gmx_domdec_ind_t
*ind
;
575 rvec shift
={0,0,0},*buf
,*rbuf
;
580 cgindex
= dd
->cgindex
;
585 nat_tot
= dd
->nat_home
;
586 for(d
=0; d
<dd
->ndim
; d
++)
588 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
589 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
592 copy_rvec(box
[dd
->dim
[d
]],shift
);
595 for(p
=0; p
<cd
->np
; p
++)
602 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
604 at0
= cgindex
[index
[i
]];
605 at1
= cgindex
[index
[i
]+1];
606 for(j
=at0
; j
<at1
; j
++)
608 copy_rvec(x
[j
],buf
[n
]);
615 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
617 at0
= cgindex
[index
[i
]];
618 at1
= cgindex
[index
[i
]+1];
619 for(j
=at0
; j
<at1
; j
++)
621 /* We need to shift the coordinates */
622 rvec_add(x
[j
],shift
,buf
[n
]);
629 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
631 at0
= cgindex
[index
[i
]];
632 at1
= cgindex
[index
[i
]+1];
633 for(j
=at0
; j
<at1
; j
++)
636 buf
[n
][XX
] = x
[j
][XX
] + shift
[XX
];
638 * This operation requires a special shift force
639 * treatment, which is performed in calc_vir.
641 buf
[n
][YY
] = box
[YY
][YY
] - x
[j
][YY
];
642 buf
[n
][ZZ
] = box
[ZZ
][ZZ
] - x
[j
][ZZ
];
654 rbuf
= comm
->vbuf2
.v
;
656 /* Send and receive the coordinates */
657 dd_sendrecv_rvec(dd
, d
, dddirBackward
,
658 buf
, ind
->nsend
[nzone
+1],
659 rbuf
, ind
->nrecv
[nzone
+1]);
663 for(zone
=0; zone
<nzone
; zone
++)
665 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
667 copy_rvec(rbuf
[j
],x
[i
]);
672 nat_tot
+= ind
->nrecv
[nzone
+1];
678 void dd_move_f(gmx_domdec_t
*dd
,rvec f
[],rvec
*fshift
)
680 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
682 gmx_domdec_comm_t
*comm
;
683 gmx_domdec_comm_dim_t
*cd
;
684 gmx_domdec_ind_t
*ind
;
692 cgindex
= dd
->cgindex
;
697 nzone
= comm
->zones
.n
/2;
698 nat_tot
= dd
->nat_tot
;
699 for(d
=dd
->ndim
-1; d
>=0; d
--)
701 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
702 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
703 if (fshift
== NULL
&& !bScrew
)
707 /* Determine which shift vector we need */
713 for(p
=cd
->np
-1; p
>=0; p
--) {
715 nat_tot
-= ind
->nrecv
[nzone
+1];
722 sbuf
= comm
->vbuf2
.v
;
724 for(zone
=0; zone
<nzone
; zone
++)
726 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
728 copy_rvec(f
[i
],sbuf
[j
]);
733 /* Communicate the forces */
734 dd_sendrecv_rvec(dd
, d
, dddirForward
,
735 sbuf
, ind
->nrecv
[nzone
+1],
736 buf
, ind
->nsend
[nzone
+1]);
738 /* Add the received forces */
742 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
744 at0
= cgindex
[index
[i
]];
745 at1
= cgindex
[index
[i
]+1];
746 for(j
=at0
; j
<at1
; j
++)
748 rvec_inc(f
[j
],buf
[n
]);
755 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
757 at0
= cgindex
[index
[i
]];
758 at1
= cgindex
[index
[i
]+1];
759 for(j
=at0
; j
<at1
; j
++)
761 rvec_inc(f
[j
],buf
[n
]);
762 /* Add this force to the shift force */
763 rvec_inc(fshift
[is
],buf
[n
]);
770 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
772 at0
= cgindex
[index
[i
]];
773 at1
= cgindex
[index
[i
]+1];
774 for(j
=at0
; j
<at1
; j
++)
776 /* Rotate the force */
777 f
[j
][XX
] += buf
[n
][XX
];
778 f
[j
][YY
] -= buf
[n
][YY
];
779 f
[j
][ZZ
] -= buf
[n
][ZZ
];
782 /* Add this force to the shift force */
783 rvec_inc(fshift
[is
],buf
[n
]);
794 void dd_atom_spread_real(gmx_domdec_t
*dd
,real v
[])
796 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
798 gmx_domdec_comm_t
*comm
;
799 gmx_domdec_comm_dim_t
*cd
;
800 gmx_domdec_ind_t
*ind
;
805 cgindex
= dd
->cgindex
;
807 buf
= &comm
->vbuf
.v
[0][0];
810 nat_tot
= dd
->nat_home
;
811 for(d
=0; d
<dd
->ndim
; d
++)
814 for(p
=0; p
<cd
->np
; p
++)
819 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
821 at0
= cgindex
[index
[i
]];
822 at1
= cgindex
[index
[i
]+1];
823 for(j
=at0
; j
<at1
; j
++)
836 rbuf
= &comm
->vbuf2
.v
[0][0];
838 /* Send and receive the coordinates */
839 dd_sendrecv_real(dd
, d
, dddirBackward
,
840 buf
, ind
->nsend
[nzone
+1],
841 rbuf
, ind
->nrecv
[nzone
+1]);
845 for(zone
=0; zone
<nzone
; zone
++)
847 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
854 nat_tot
+= ind
->nrecv
[nzone
+1];
860 void dd_atom_sum_real(gmx_domdec_t
*dd
,real v
[])
862 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
864 gmx_domdec_comm_t
*comm
;
865 gmx_domdec_comm_dim_t
*cd
;
866 gmx_domdec_ind_t
*ind
;
871 cgindex
= dd
->cgindex
;
873 buf
= &comm
->vbuf
.v
[0][0];
876 nzone
= comm
->zones
.n
/2;
877 nat_tot
= dd
->nat_tot
;
878 for(d
=dd
->ndim
-1; d
>=0; d
--)
881 for(p
=cd
->np
-1; p
>=0; p
--) {
883 nat_tot
-= ind
->nrecv
[nzone
+1];
890 sbuf
= &comm
->vbuf2
.v
[0][0];
892 for(zone
=0; zone
<nzone
; zone
++)
894 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
901 /* Communicate the forces */
902 dd_sendrecv_real(dd
, d
, dddirForward
,
903 sbuf
, ind
->nrecv
[nzone
+1],
904 buf
, ind
->nsend
[nzone
+1]);
906 /* Add the received forces */
908 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
910 at0
= cgindex
[index
[i
]];
911 at1
= cgindex
[index
[i
]+1];
912 for(j
=at0
; j
<at1
; j
++)
923 static void print_ddzone(FILE *fp
,int d
,int i
,int j
,gmx_ddzone_t
*zone
)
925 fprintf(fp
,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
927 zone
->min0
,zone
->max1
,
928 zone
->mch0
,zone
->mch0
,
929 zone
->p1_0
,zone
->p1_1
);
932 static void dd_sendrecv_ddzone(const gmx_domdec_t
*dd
,
933 int ddimind
,int direction
,
934 gmx_ddzone_t
*buf_s
,int n_s
,
935 gmx_ddzone_t
*buf_r
,int n_r
)
937 rvec vbuf_s
[5*2],vbuf_r
[5*2];
942 vbuf_s
[i
*2 ][0] = buf_s
[i
].min0
;
943 vbuf_s
[i
*2 ][1] = buf_s
[i
].max1
;
944 vbuf_s
[i
*2 ][2] = buf_s
[i
].mch0
;
945 vbuf_s
[i
*2+1][0] = buf_s
[i
].mch1
;
946 vbuf_s
[i
*2+1][1] = buf_s
[i
].p1_0
;
947 vbuf_s
[i
*2+1][2] = buf_s
[i
].p1_1
;
950 dd_sendrecv_rvec(dd
, ddimind
, direction
,
956 buf_r
[i
].min0
= vbuf_r
[i
*2 ][0];
957 buf_r
[i
].max1
= vbuf_r
[i
*2 ][1];
958 buf_r
[i
].mch0
= vbuf_r
[i
*2 ][2];
959 buf_r
[i
].mch1
= vbuf_r
[i
*2+1][0];
960 buf_r
[i
].p1_0
= vbuf_r
[i
*2+1][1];
961 buf_r
[i
].p1_1
= vbuf_r
[i
*2+1][2];
965 static void dd_move_cellx(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
966 rvec cell_ns_x0
,rvec cell_ns_x1
)
968 int d
,d1
,dim
,dim1
,pos
,buf_size
,i
,j
,k
,p
,npulse
,npulse_min
;
969 gmx_ddzone_t
*zp
,buf_s
[5],buf_r
[5],buf_e
[5];
970 rvec extr_s
[2],extr_r
[2];
973 gmx_domdec_comm_t
*comm
;
978 for(d
=1; d
<dd
->ndim
; d
++)
981 zp
= (d
== 1) ? &comm
->zone_d1
[0] : &comm
->zone_d2
[0][0];
982 zp
->min0
= cell_ns_x0
[dim
];
983 zp
->max1
= cell_ns_x1
[dim
];
984 zp
->mch0
= cell_ns_x0
[dim
];
985 zp
->mch1
= cell_ns_x1
[dim
];
986 zp
->p1_0
= cell_ns_x0
[dim
];
987 zp
->p1_1
= cell_ns_x1
[dim
];
990 for(d
=dd
->ndim
-2; d
>=0; d
--)
993 bPBC
= (dim
< ddbox
->npbcdim
);
995 /* Use an rvec to store two reals */
996 extr_s
[d
][0] = comm
->cell_f0
[d
+1];
997 extr_s
[d
][1] = comm
->cell_f1
[d
+1];
1001 /* Store the extremes in the backward sending buffer,
1002 * so the get updated separately from the forward communication.
1004 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1006 /* We invert the order to be able to use the same loop for buf_e */
1007 buf_s
[pos
].min0
= extr_s
[d1
][1];
1008 buf_s
[pos
].max1
= extr_s
[d1
][0];
1009 buf_s
[pos
].mch0
= 0;
1010 buf_s
[pos
].mch1
= 0;
1011 /* Store the cell corner of the dimension we communicate along */
1012 buf_s
[pos
].p1_0
= comm
->cell_x0
[dim
];
1013 buf_s
[pos
].p1_1
= 0;
1017 buf_s
[pos
] = (dd
->ndim
== 2) ? comm
->zone_d1
[0] : comm
->zone_d2
[0][0];
1020 if (dd
->ndim
== 3 && d
== 0)
1022 buf_s
[pos
] = comm
->zone_d2
[0][1];
1024 buf_s
[pos
] = comm
->zone_d1
[0];
1028 /* We only need to communicate the extremes
1029 * in the forward direction
1031 npulse
= comm
->cd
[d
].np
;
1034 /* Take the minimum to avoid double communication */
1035 npulse_min
= min(npulse
,dd
->nc
[dim
]-1-npulse
);
1039 /* Without PBC we should really not communicate over
1040 * the boundaries, but implementing that complicates
1041 * the communication setup and therefore we simply
1042 * do all communication, but ignore some data.
1044 npulse_min
= npulse
;
1046 for(p
=0; p
<npulse_min
; p
++)
1048 /* Communicate the extremes forward */
1049 bUse
= (bPBC
|| dd
->ci
[dim
] > 0);
1051 dd_sendrecv_rvec(dd
, d
, dddirForward
,
1052 extr_s
+d
, dd
->ndim
-d
-1,
1053 extr_r
+d
, dd
->ndim
-d
-1);
1057 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1059 extr_s
[d1
][0] = max(extr_s
[d1
][0],extr_r
[d1
][0]);
1060 extr_s
[d1
][1] = min(extr_s
[d1
][1],extr_r
[d1
][1]);
1066 for(p
=0; p
<npulse
; p
++)
1068 /* Communicate all the zone information backward */
1069 bUse
= (bPBC
|| dd
->ci
[dim
] < dd
->nc
[dim
] - 1);
1071 dd_sendrecv_ddzone(dd
, d
, dddirBackward
,
1078 for(d1
=d
+1; d1
<dd
->ndim
; d1
++)
1080 /* Determine the decrease of maximum required
1081 * communication height along d1 due to the distance along d,
1082 * this avoids a lot of useless atom communication.
1084 dist_d
= comm
->cell_x1
[dim
] - buf_r
[0].p1_0
;
1086 if (ddbox
->tric_dir
[dim
])
1088 /* c is the off-diagonal coupling between the cell planes
1089 * along directions d and d1.
1091 c
= ddbox
->v
[dim
][dd
->dim
[d1
]][dim
];
1097 det
= (1 + c
*c
)*comm
->cutoff
*comm
->cutoff
- dist_d
*dist_d
;
1100 dh
[d1
] = comm
->cutoff
- (c
*dist_d
+ sqrt(det
))/(1 + c
*c
);
1104 /* A negative value signals out of range */
1110 /* Accumulate the extremes over all pulses */
1111 for(i
=0; i
<buf_size
; i
++)
1115 buf_e
[i
] = buf_r
[i
];
1121 buf_e
[i
].min0
= min(buf_e
[i
].min0
,buf_r
[i
].min0
);
1122 buf_e
[i
].max1
= max(buf_e
[i
].max1
,buf_r
[i
].max1
);
1125 if (dd
->ndim
== 3 && d
== 0 && i
== buf_size
- 1)
1133 if (bUse
&& dh
[d1
] >= 0)
1135 buf_e
[i
].mch0
= max(buf_e
[i
].mch0
,buf_r
[i
].mch0
-dh
[d1
]);
1136 buf_e
[i
].mch1
= max(buf_e
[i
].mch1
,buf_r
[i
].mch1
-dh
[d1
]);
1139 /* Copy the received buffer to the send buffer,
1140 * to pass the data through with the next pulse.
1142 buf_s
[i
] = buf_r
[i
];
1144 if (((bPBC
|| dd
->ci
[dim
]+npulse
< dd
->nc
[dim
]) && p
== npulse
-1) ||
1145 (!bPBC
&& dd
->ci
[dim
]+1+p
== dd
->nc
[dim
]-1))
1147 /* Store the extremes */
1150 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1152 extr_s
[d1
][1] = min(extr_s
[d1
][1],buf_e
[pos
].min0
);
1153 extr_s
[d1
][0] = max(extr_s
[d1
][0],buf_e
[pos
].max1
);
1157 if (d
== 1 || (d
== 0 && dd
->ndim
== 3))
1161 comm
->zone_d2
[1-d
][i
] = buf_e
[pos
];
1167 comm
->zone_d1
[1] = buf_e
[pos
];
1181 print_ddzone(debug
,1,i
,0,&comm
->zone_d1
[i
]);
1183 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d1
[i
].min0
);
1184 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d1
[i
].max1
);
1196 print_ddzone(debug
,2,i
,j
,&comm
->zone_d2
[i
][j
]);
1198 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d2
[i
][j
].min0
);
1199 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d2
[i
][j
].max1
);
1203 for(d
=1; d
<dd
->ndim
; d
++)
1205 comm
->cell_f_max0
[d
] = extr_s
[d
-1][0];
1206 comm
->cell_f_min1
[d
] = extr_s
[d
-1][1];
1209 fprintf(debug
,"Cell fraction d %d, max0 %f, min1 %f\n",
1210 d
,comm
->cell_f_max0
[d
],comm
->cell_f_min1
[d
]);
1215 static void dd_collect_cg(gmx_domdec_t
*dd
,
1216 t_state
*state_local
)
1218 gmx_domdec_master_t
*ma
=NULL
;
1219 int buf2
[2],*ibuf
,i
,ncg_home
=0,*cg
=NULL
,nat_home
=0;
1222 if (state_local
->ddp_count
== dd
->comm
->master_cg_ddp_count
)
1224 /* The master has the correct distribution */
1228 if (state_local
->ddp_count
== dd
->ddp_count
)
1230 ncg_home
= dd
->ncg_home
;
1232 nat_home
= dd
->nat_home
;
1234 else if (state_local
->ddp_count_cg_gl
== state_local
->ddp_count
)
1236 cgs_gl
= &dd
->comm
->cgs_gl
;
1238 ncg_home
= state_local
->ncg_gl
;
1239 cg
= state_local
->cg_gl
;
1241 for(i
=0; i
<ncg_home
; i
++)
1243 nat_home
+= cgs_gl
->index
[cg
[i
]+1] - cgs_gl
->index
[cg
[i
]];
1248 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1251 buf2
[0] = dd
->ncg_home
;
1252 buf2
[1] = dd
->nat_home
;
1262 /* Collect the charge group and atom counts on the master */
1263 dd_gather(dd
,2*sizeof(int),buf2
,ibuf
);
1268 for(i
=0; i
<dd
->nnodes
; i
++)
1270 ma
->ncg
[i
] = ma
->ibuf
[2*i
];
1271 ma
->nat
[i
] = ma
->ibuf
[2*i
+1];
1272 ma
->index
[i
+1] = ma
->index
[i
] + ma
->ncg
[i
];
1275 /* Make byte counts and indices */
1276 for(i
=0; i
<dd
->nnodes
; i
++)
1278 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
1279 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
1283 fprintf(debug
,"Initial charge group distribution: ");
1284 for(i
=0; i
<dd
->nnodes
; i
++)
1285 fprintf(debug
," %d",ma
->ncg
[i
]);
1286 fprintf(debug
,"\n");
1290 /* Collect the charge group indices on the master */
1292 dd
->ncg_home
*sizeof(int),dd
->index_gl
,
1293 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
1294 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
1295 DDMASTER(dd
) ? ma
->cg
: NULL
);
1297 dd
->comm
->master_cg_ddp_count
= state_local
->ddp_count
;
1300 static void dd_collect_vec_sendrecv(gmx_domdec_t
*dd
,
1303 gmx_domdec_master_t
*ma
;
1304 int n
,i
,c
,a
,nalloc
=0;
1313 MPI_Send(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1314 dd
->rank
,dd
->mpi_comm_all
);
1317 /* Copy the master coordinates to the global array */
1318 cgs_gl
= &dd
->comm
->cgs_gl
;
1320 n
= DDMASTERRANK(dd
);
1322 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1324 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1326 copy_rvec(lv
[a
++],v
[c
]);
1330 for(n
=0; n
<dd
->nnodes
; n
++)
1334 if (ma
->nat
[n
] > nalloc
)
1336 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1340 MPI_Recv(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,DDRANK(dd
,n
),
1341 n
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1344 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1346 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1348 copy_rvec(buf
[a
++],v
[c
]);
1357 static void get_commbuffer_counts(gmx_domdec_t
*dd
,
1358 int **counts
,int **disps
)
1360 gmx_domdec_master_t
*ma
;
1365 /* Make the rvec count and displacment arrays */
1367 *disps
= ma
->ibuf
+ dd
->nnodes
;
1368 for(n
=0; n
<dd
->nnodes
; n
++)
1370 (*counts
)[n
] = ma
->nat
[n
]*sizeof(rvec
);
1371 (*disps
)[n
] = (n
== 0 ? 0 : (*disps
)[n
-1] + (*counts
)[n
-1]);
1375 static void dd_collect_vec_gatherv(gmx_domdec_t
*dd
,
1378 gmx_domdec_master_t
*ma
;
1379 int *rcounts
=NULL
,*disps
=NULL
;
1388 get_commbuffer_counts(dd
,&rcounts
,&disps
);
1393 dd_gatherv(dd
,dd
->nat_home
*sizeof(rvec
),lv
,rcounts
,disps
,buf
);
1397 cgs_gl
= &dd
->comm
->cgs_gl
;
1400 for(n
=0; n
<dd
->nnodes
; n
++)
1402 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1404 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1406 copy_rvec(buf
[a
++],v
[c
]);
1413 void dd_collect_vec(gmx_domdec_t
*dd
,
1414 t_state
*state_local
,rvec
*lv
,rvec
*v
)
1416 gmx_domdec_master_t
*ma
;
1417 int n
,i
,c
,a
,nalloc
=0;
1420 dd_collect_cg(dd
,state_local
);
1422 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1424 dd_collect_vec_sendrecv(dd
,lv
,v
);
1428 dd_collect_vec_gatherv(dd
,lv
,v
);
1433 void dd_collect_state(gmx_domdec_t
*dd
,
1434 t_state
*state_local
,t_state
*state
)
1440 state
->lambda
= state_local
->lambda
;
1441 copy_mat(state_local
->box
,state
->box
);
1442 copy_mat(state_local
->boxv
,state
->boxv
);
1443 copy_mat(state_local
->pres_prev
,state
->pres_prev
);
1444 for(i
=0; i
<state_local
->ngtc
; i
++)
1446 state
->nosehoover_xi
[i
] = state_local
->nosehoover_xi
[i
];
1447 state
->therm_integral
[i
] = state_local
->therm_integral
[i
];
1450 for(est
=estX
; est
<estNR
; est
++)
1452 if (state_local
->flags
& (1<<est
))
1456 dd_collect_vec(dd
,state_local
,state_local
->x
,state
->x
);
1459 dd_collect_vec(dd
,state_local
,state_local
->v
,state
->v
);
1462 dd_collect_vec(dd
,state_local
,state_local
->sd_X
,state
->sd_X
);
1465 dd_collect_vec(dd
,state_local
,state_local
->cg_p
,state
->cg_p
);
1468 if (state
->nrngi
== 1)
1472 for(i
=0; i
<state_local
->nrng
; i
++)
1474 state
->ld_rng
[i
] = state_local
->ld_rng
[i
];
1480 dd_gather(dd
,state_local
->nrng
*sizeof(state
->ld_rng
[0]),
1481 state_local
->ld_rng
,state
->ld_rng
);
1485 if (state
->nrngi
== 1)
1489 state
->ld_rngi
[0] = state_local
->ld_rngi
[0];
1494 dd_gather(dd
,sizeof(state
->ld_rngi
[0]),
1495 state_local
->ld_rngi
,state
->ld_rngi
);
1498 case estDISRE_INITF
:
1499 case estDISRE_RM3TAV
:
1500 case estORIRE_INITF
:
1504 gmx_incons("Unknown state entry encountered in dd_collect_state");
1510 static void dd_realloc_fr_cg(t_forcerec
*fr
,int nalloc
)
1514 fprintf(debug
,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr
->cg_nalloc
,nalloc
,over_alloc_dd(nalloc
));
1516 fr
->cg_nalloc
= over_alloc_dd(nalloc
);
1517 srenew(fr
->cg_cm
,fr
->cg_nalloc
);
1518 srenew(fr
->cginfo
,fr
->cg_nalloc
);
1521 static void dd_realloc_state(t_state
*state
,rvec
**f
,int nalloc
)
1527 fprintf(debug
,"Reallocating state: currently %d, required %d, allocating %d\n",state
->nalloc
,nalloc
,over_alloc_dd(nalloc
));
1530 state
->nalloc
= over_alloc_dd(nalloc
);
1532 for(est
=estX
; est
<estNR
; est
++)
1534 if (state
->flags
& (1<<est
))
1538 srenew(state
->x
,state
->nalloc
);
1541 srenew(state
->v
,state
->nalloc
);
1544 srenew(state
->sd_X
,state
->nalloc
);
1547 srenew(state
->cg_p
,state
->nalloc
);
1551 case estDISRE_INITF
:
1552 case estDISRE_RM3TAV
:
1553 case estORIRE_INITF
:
1555 /* No reallocation required */
1558 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1565 srenew(*f
,state
->nalloc
);
1569 static void dd_distribute_vec_sendrecv(gmx_domdec_t
*dd
,t_block
*cgs
,
1572 gmx_domdec_master_t
*ma
;
1573 int n
,i
,c
,a
,nalloc
=0;
1580 for(n
=0; n
<dd
->nnodes
; n
++)
1584 if (ma
->nat
[n
] > nalloc
)
1586 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1589 /* Use lv as a temporary buffer */
1591 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1593 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1595 copy_rvec(v
[c
],buf
[a
++]);
1598 if (a
!= ma
->nat
[n
])
1600 gmx_fatal(FARGS
,"Internal error a (%d) != nat (%d)",
1605 MPI_Send(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,
1606 DDRANK(dd
,n
),n
,dd
->mpi_comm_all
);
1611 n
= DDMASTERRANK(dd
);
1613 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1615 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1617 copy_rvec(v
[c
],lv
[a
++]);
1624 MPI_Recv(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1625 MPI_ANY_TAG
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1630 static void dd_distribute_vec_scatterv(gmx_domdec_t
*dd
,t_block
*cgs
,
1633 gmx_domdec_master_t
*ma
;
1634 int *scounts
=NULL
,*disps
=NULL
;
1635 int n
,i
,c
,a
,nalloc
=0;
1642 get_commbuffer_counts(dd
,&scounts
,&disps
);
1646 for(n
=0; n
<dd
->nnodes
; n
++)
1648 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1650 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1652 copy_rvec(v
[c
],buf
[a
++]);
1658 dd_scatterv(dd
,scounts
,disps
,buf
,dd
->nat_home
*sizeof(rvec
),lv
);
1661 static void dd_distribute_vec(gmx_domdec_t
*dd
,t_block
*cgs
,rvec
*v
,rvec
*lv
)
1663 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1665 dd_distribute_vec_sendrecv(dd
,cgs
,v
,lv
);
1669 dd_distribute_vec_scatterv(dd
,cgs
,v
,lv
);
1673 static void dd_distribute_state(gmx_domdec_t
*dd
,t_block
*cgs
,
1674 t_state
*state
,t_state
*state_local
,
1681 state_local
->lambda
= state
->lambda
;
1682 copy_mat(state
->box
,state_local
->box
);
1683 copy_mat(state
->box_rel
,state_local
->box_rel
);
1684 copy_mat(state
->boxv
,state_local
->boxv
);
1685 for(i
=0; i
<state_local
->ngtc
; i
++)
1687 state_local
->nosehoover_xi
[i
] = state
->nosehoover_xi
[i
];
1688 state_local
->therm_integral
[i
] = state
->therm_integral
[i
];
1691 dd_bcast(dd
,sizeof(real
),&state_local
->lambda
);
1692 dd_bcast(dd
,sizeof(state_local
->box
),state_local
->box
);
1693 dd_bcast(dd
,sizeof(state_local
->box_rel
),state_local
->box_rel
);
1694 dd_bcast(dd
,sizeof(state_local
->boxv
),state_local
->boxv
);
1695 dd_bcast(dd
,state_local
->ngtc
*sizeof(real
),state_local
->nosehoover_xi
);
1696 dd_bcast(dd
,state_local
->ngtc
*sizeof(real
),state_local
->therm_integral
);
1698 if (dd
->nat_home
> state_local
->nalloc
)
1700 dd_realloc_state(state_local
,f
,dd
->nat_home
);
1702 for(i
=estX
; i
<estNR
; i
++)
1704 if (state_local
->flags
& (1<<i
))
1708 dd_distribute_vec(dd
,cgs
,state
->x
,state_local
->x
);
1711 dd_distribute_vec(dd
,cgs
,state
->v
,state_local
->v
);
1714 dd_distribute_vec(dd
,cgs
,state
->sd_X
,state_local
->sd_X
);
1717 dd_distribute_vec(dd
,cgs
,state
->cg_p
,state_local
->cg_p
);
1720 if (state
->nrngi
== 1)
1723 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1724 state
->ld_rng
,state_local
->ld_rng
);
1729 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1730 state
->ld_rng
,state_local
->ld_rng
);
1734 if (state
->nrngi
== 1)
1736 dd_bcastc(dd
,sizeof(state_local
->ld_rngi
[0]),
1737 state
->ld_rngi
,state_local
->ld_rngi
);
1741 dd_scatter(dd
,sizeof(state_local
->ld_rngi
[0]),
1742 state
->ld_rngi
,state_local
->ld_rngi
);
1745 case estDISRE_INITF
:
1746 case estDISRE_RM3TAV
:
1747 case estORIRE_INITF
:
1749 /* Not implemented yet */
1752 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1758 static char dim2char(int dim
)
1764 case XX
: c
= 'X'; break;
1765 case YY
: c
= 'Y'; break;
1766 case ZZ
: c
= 'Z'; break;
1767 default: gmx_fatal(FARGS
,"Unknown dim %d",dim
);
1773 static void write_dd_grid_pdb(const char *fn
,gmx_step_t step
,
1774 gmx_domdec_t
*dd
,matrix box
,gmx_ddbox_t
*ddbox
)
1776 rvec grid_s
[2],*grid_r
=NULL
,cx
,r
;
1777 char fname
[STRLEN
],format
[STRLEN
],buf
[22];
1783 copy_rvec(dd
->comm
->cell_x0
,grid_s
[0]);
1784 copy_rvec(dd
->comm
->cell_x1
,grid_s
[1]);
1788 snew(grid_r
,2*dd
->nnodes
);
1791 dd_gather(dd
,2*sizeof(rvec
),grid_s
[0],DDMASTER(dd
) ? grid_r
[0] : NULL
);
1795 for(d
=0; d
<DIM
; d
++)
1797 for(i
=0; i
<DIM
; i
++)
1805 if (dd
->nc
[d
] > 1 && d
< ddbox
->npbcdim
)
1807 tric
[d
][i
] = box
[i
][d
]/box
[i
][i
];
1816 sprintf(fname
,"%s_%s.pdb",fn
,gmx_step_str(step
,buf
));
1817 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1818 out
= gmx_fio_fopen(fname
,"w");
1819 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1821 for(i
=0; i
<dd
->nnodes
; i
++)
1823 vol
= dd
->nnodes
/(box
[XX
][XX
]*box
[YY
][YY
]*box
[ZZ
][ZZ
]);
1824 for(d
=0; d
<DIM
; d
++)
1826 vol
*= grid_r
[i
*2+1][d
] - grid_r
[i
*2][d
];
1834 cx
[XX
] = grid_r
[i
*2+x
][XX
];
1835 cx
[YY
] = grid_r
[i
*2+y
][YY
];
1836 cx
[ZZ
] = grid_r
[i
*2+z
][ZZ
];
1838 fprintf(out
,format
,"ATOM",a
++,"CA","GLY",' ',1+i
,
1839 10*r
[XX
],10*r
[YY
],10*r
[ZZ
],1.0,vol
);
1843 for(d
=0; d
<DIM
; d
++)
1849 case 0: y
= 1 + i
*8 + 2*x
; break;
1850 case 1: y
= 1 + i
*8 + 2*x
- (x
% 2); break;
1851 case 2: y
= 1 + i
*8 + x
; break;
1853 fprintf(out
,"%6s%5d%5d\n","CONECT",y
,y
+(1<<d
));
1857 gmx_fio_fclose(out
);
1862 void write_dd_pdb(const char *fn
,gmx_step_t step
,const char *title
,
1863 gmx_mtop_t
*mtop
,t_commrec
*cr
,
1864 int natoms
,rvec x
[],matrix box
)
1866 char fname
[STRLEN
],format
[STRLEN
],format4
[STRLEN
],buf
[22];
1869 char *atomname
,*resname
;
1876 natoms
= dd
->comm
->nat
[ddnatVSITE
];
1879 sprintf(fname
,"%s_%s_n%d.pdb",fn
,gmx_step_str(step
,buf
),cr
->sim_nodeid
);
1881 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1882 sprintf(format4
,"%s%s\n",pdbformat4
,"%6.2f%6.2f");
1884 out
= gmx_fio_fopen(fname
,"w");
1886 fprintf(out
,"TITLE %s\n",title
);
1887 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1888 for(i
=0; i
<natoms
; i
++)
1890 ii
= dd
->gatindex
[i
];
1891 gmx_mtop_atominfo_global(mtop
,ii
,&atomname
,&resnr
,&resname
);
1892 if (i
< dd
->comm
->nat
[ddnatZONE
])
1895 while (i
>= dd
->cgindex
[dd
->comm
->zones
.cg_range
[c
+1]])
1901 else if (i
< dd
->comm
->nat
[ddnatVSITE
])
1903 b
= dd
->comm
->zones
.n
;
1907 b
= dd
->comm
->zones
.n
+ 1;
1909 fprintf(out
,strlen(atomname
)<4 ? format
: format4
,
1910 "ATOM",(ii
+1)%100000,
1911 atomname
,resname
,' ',(resnr
+1)%10000,' ',
1912 10*x
[i
][XX
],10*x
[i
][YY
],10*x
[i
][ZZ
],1.0,b
);
1914 fprintf(out
,"TER\n");
1916 gmx_fio_fclose(out
);
1919 real
dd_cutoff_mbody(gmx_domdec_t
*dd
)
1921 gmx_domdec_comm_t
*comm
;
1928 if (comm
->bInterCGBondeds
)
1930 if (comm
->cutoff_mbody
> 0)
1932 r
= comm
->cutoff_mbody
;
1936 /* cutoff_mbody=0 means we do not have DLB */
1937 r
= comm
->cellsize_min
[dd
->dim
[0]];
1938 for(di
=1; di
<dd
->ndim
; di
++)
1940 r
= min(r
,comm
->cellsize_min
[dd
->dim
[di
]]);
1942 if (comm
->bBondComm
)
1944 r
= max(r
,comm
->cutoff_mbody
);
1948 r
= min(r
,comm
->cutoff
);
1956 real
dd_cutoff_twobody(gmx_domdec_t
*dd
)
1960 r_mb
= dd_cutoff_mbody(dd
);
1962 return max(dd
->comm
->cutoff
,r_mb
);
1966 static void dd_cart_coord2pmecoord(gmx_domdec_t
*dd
,ivec coord
,ivec coord_pme
)
1970 nc
= dd
->nc
[dd
->comm
->cartpmedim
];
1971 ntot
= dd
->comm
->ntot
[dd
->comm
->cartpmedim
];
1972 copy_ivec(coord
,coord_pme
);
1973 coord_pme
[dd
->comm
->cartpmedim
] =
1974 nc
+ (coord
[dd
->comm
->cartpmedim
]*(ntot
- nc
) + (ntot
- nc
)/2)/nc
;
1977 static int low_ddindex2pmeindex(int ndd
,int npme
,int ddindex
)
1979 /* Here we assign a PME node to communicate with this DD node
1980 * by assuming that the major index of both is x.
1981 * We add cr->npmenodes/2 to obtain an even distribution.
1983 return (ddindex
*npme
+ npme
/2)/ndd
;
1986 static int ddindex2pmeindex(const gmx_domdec_t
*dd
,int ddindex
)
1988 return low_ddindex2pmeindex(dd
->nnodes
,dd
->comm
->npmenodes
,ddindex
);
1991 static int cr_ddindex2pmeindex(const t_commrec
*cr
,int ddindex
)
1993 return low_ddindex2pmeindex(cr
->dd
->nnodes
,cr
->npmenodes
,ddindex
);
1996 static int *dd_pmenodes(t_commrec
*cr
)
2001 snew(pmenodes
,cr
->npmenodes
);
2003 for(i
=0; i
<cr
->dd
->nnodes
; i
++) {
2004 p0
= cr_ddindex2pmeindex(cr
,i
);
2005 p1
= cr_ddindex2pmeindex(cr
,i
+1);
2006 if (i
+1 == cr
->dd
->nnodes
|| p1
> p0
) {
2008 fprintf(debug
,"pmenode[%d] = %d\n",n
,i
+1+n
);
2009 pmenodes
[n
] = i
+ 1 + n
;
2017 static int gmx_ddcoord2pmeindex(t_commrec
*cr
,int x
,int y
,int z
)
2020 ivec coords
,coords_pme
,nc
;
2025 if (dd->comm->bCartesian) {
2026 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2027 dd_coords2pmecoords(dd,coords,coords_pme);
2028 copy_ivec(dd->ntot,nc);
2029 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2030 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2032 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2034 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2040 slab
= ddindex2pmeindex(dd
,dd_index(dd
->nc
,coords
));
2045 static int ddcoord2simnodeid(t_commrec
*cr
,int x
,int y
,int z
)
2047 gmx_domdec_comm_t
*comm
;
2049 int ddindex
,nodeid
=-1;
2051 comm
= cr
->dd
->comm
;
2056 if (comm
->bCartesianPP_PME
)
2059 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&nodeid
);
2064 ddindex
= dd_index(cr
->dd
->nc
,coords
);
2065 if (comm
->bCartesianPP
)
2067 nodeid
= comm
->ddindex2simnodeid
[ddindex
];
2073 nodeid
= ddindex
+ gmx_ddcoord2pmeindex(cr
,x
,y
,z
);
2085 static int dd_simnode2pmenode(t_commrec
*cr
,int sim_nodeid
)
2088 gmx_domdec_comm_t
*comm
;
2089 ivec coord
,coord_pme
;
2096 /* This assumes a uniform x domain decomposition grid cell size */
2097 if (comm
->bCartesianPP_PME
)
2100 MPI_Cart_coords(cr
->mpi_comm_mysim
,sim_nodeid
,DIM
,coord
);
2101 if (coord
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
2103 /* This is a PP node */
2104 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2105 MPI_Cart_rank(cr
->mpi_comm_mysim
,coord_pme
,&pmenode
);
2109 else if (comm
->bCartesianPP
)
2111 if (sim_nodeid
< dd
->nnodes
)
2113 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2118 /* This assumes DD cells with identical x coordinates
2119 * are numbered sequentially.
2121 if (dd
->comm
->pmenodes
== NULL
)
2123 if (sim_nodeid
< dd
->nnodes
)
2125 /* The DD index equals the nodeid */
2126 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2132 while (sim_nodeid
> dd
->comm
->pmenodes
[i
])
2136 if (sim_nodeid
< dd
->comm
->pmenodes
[i
])
2138 pmenode
= dd
->comm
->pmenodes
[i
];
2146 bool gmx_pmeonlynode(t_commrec
*cr
,int sim_nodeid
)
2150 if (DOMAINDECOMP(cr
))
2152 bPMEOnlyNode
= (dd_simnode2pmenode(cr
,sim_nodeid
) == -1);
2156 bPMEOnlyNode
= FALSE
;
2159 return bPMEOnlyNode
;
2162 void get_pme_ddnodes(t_commrec
*cr
,int pmenodeid
,
2163 int *nmy_ddnodes
,int **my_ddnodes
,int *node_peer
)
2167 ivec coord
,coord_pme
;
2171 snew(*my_ddnodes
,(dd
->nnodes
+cr
->npmenodes
-1)/cr
->npmenodes
);
2174 for(x
=0; x
<dd
->nc
[XX
]; x
++)
2176 for(y
=0; y
<dd
->nc
[YY
]; y
++)
2178 for(z
=0; z
<dd
->nc
[ZZ
]; z
++)
2180 if (dd
->comm
->bCartesianPP_PME
)
2185 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2186 if (dd
->ci
[XX
] == coord_pme
[XX
] &&
2187 dd
->ci
[YY
] == coord_pme
[YY
] &&
2188 dd
->ci
[ZZ
] == coord_pme
[ZZ
])
2189 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2193 /* The slab corresponds to the nodeid in the PME group */
2194 if (gmx_ddcoord2pmeindex(cr
,x
,y
,z
) == pmenodeid
)
2196 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2203 /* The last PP-only node is the peer node */
2204 *node_peer
= (*my_ddnodes
)[*nmy_ddnodes
-1];
2208 fprintf(debug
,"Receive coordinates from PP nodes:");
2209 for(x
=0; x
<*nmy_ddnodes
; x
++)
2211 fprintf(debug
," %d",(*my_ddnodes
)[x
]);
2213 fprintf(debug
,"\n");
2217 static bool receive_vir_ener(t_commrec
*cr
)
2219 gmx_domdec_comm_t
*comm
;
2220 int pmenode
,coords
[DIM
],rank
;
2224 if (cr
->npmenodes
< cr
->dd
->nnodes
)
2226 comm
= cr
->dd
->comm
;
2227 if (comm
->bCartesianPP_PME
)
2229 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2231 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,coords
);
2232 coords
[comm
->cartpmedim
]++;
2233 if (coords
[comm
->cartpmedim
] < cr
->dd
->nc
[comm
->cartpmedim
])
2235 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&rank
);
2236 if (dd_simnode2pmenode(cr
,rank
) == pmenode
)
2238 /* This is not the last PP node for pmenode */
2246 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2247 if (cr
->sim_nodeid
+1 < cr
->nnodes
&&
2248 dd_simnode2pmenode(cr
,cr
->sim_nodeid
+1) == pmenode
)
2250 /* This is not the last PP node for pmenode */
2259 static void set_zones_ncg_home(gmx_domdec_t
*dd
)
2261 gmx_domdec_zones_t
*zones
;
2264 zones
= &dd
->comm
->zones
;
2266 zones
->cg_range
[0] = 0;
2267 for(i
=1; i
<zones
->n
+1; i
++)
2269 zones
->cg_range
[i
] = dd
->ncg_home
;
2273 static void rebuild_cgindex(gmx_domdec_t
*dd
,int *gcgs_index
,t_state
*state
)
2275 int nat
,i
,*ind
,*dd_cg_gl
,*cgindex
,cg_gl
;
2278 dd_cg_gl
= dd
->index_gl
;
2279 cgindex
= dd
->cgindex
;
2282 for(i
=0; i
<state
->ncg_gl
; i
++)
2286 dd_cg_gl
[i
] = cg_gl
;
2287 nat
+= gcgs_index
[cg_gl
+1] - gcgs_index
[cg_gl
];
2291 dd
->ncg_home
= state
->ncg_gl
;
2294 set_zones_ncg_home(dd
);
2297 static int ddcginfo(const cginfo_mb_t
*cginfo_mb
,int cg
)
2299 while (cg
>= cginfo_mb
->cg_end
)
2304 return cginfo_mb
->cginfo
[(cg
- cginfo_mb
->cg_start
) % cginfo_mb
->cg_mod
];
2307 static void dd_set_cginfo(int *index_gl
,int cg0
,int cg1
,
2308 t_forcerec
*fr
,char *bLocalCG
)
2310 cginfo_mb_t
*cginfo_mb
;
2316 cginfo_mb
= fr
->cginfo_mb
;
2317 cginfo
= fr
->cginfo
;
2319 for(cg
=cg0
; cg
<cg1
; cg
++)
2321 cginfo
[cg
] = ddcginfo(cginfo_mb
,index_gl
[cg
]);
2325 if (bLocalCG
!= NULL
)
2327 for(cg
=cg0
; cg
<cg1
; cg
++)
2329 bLocalCG
[index_gl
[cg
]] = TRUE
;
2334 static void make_dd_indices(gmx_domdec_t
*dd
,int *gcgs_index
,int cg_start
)
2336 int nzone
,zone
,zone1
,cg0
,cg
,cg_gl
,a
,a_gl
;
2337 int *zone2cg
,*zone_ncg1
,*index_gl
,*gatindex
;
2341 bLocalCG
= dd
->comm
->bLocalCG
;
2343 if (dd
->nat_tot
> dd
->gatindex_nalloc
)
2345 dd
->gatindex_nalloc
= over_alloc_dd(dd
->nat_tot
);
2346 srenew(dd
->gatindex
,dd
->gatindex_nalloc
);
2349 nzone
= dd
->comm
->zones
.n
;
2350 zone2cg
= dd
->comm
->zones
.cg_range
;
2351 zone_ncg1
= dd
->comm
->zone_ncg1
;
2352 index_gl
= dd
->index_gl
;
2353 gatindex
= dd
->gatindex
;
2355 if (zone2cg
[1] != dd
->ncg_home
)
2357 gmx_incons("dd->ncg_zone is not up to date");
2360 /* Make the local to global and global to local atom index */
2361 a
= dd
->cgindex
[cg_start
];
2362 for(zone
=0; zone
<nzone
; zone
++)
2370 cg0
= zone2cg
[zone
];
2372 for(cg
=cg0
; cg
<zone2cg
[zone
+1]; cg
++)
2375 if (cg
- cg0
>= zone_ncg1
[zone
])
2377 /* Signal that this cg is from more than one zone away */
2380 cg_gl
= index_gl
[cg
];
2381 for(a_gl
=gcgs_index
[cg_gl
]; a_gl
<gcgs_index
[cg_gl
+1]; a_gl
++)
2384 ga2la_set(dd
->ga2la
,a_gl
,a
,zone1
);
2391 static int check_bLocalCG(gmx_domdec_t
*dd
,int ncg_sys
,const char *bLocalCG
,
2397 if (bLocalCG
== NULL
)
2401 for(i
=0; i
<dd
->ncg_tot
; i
++)
2403 if (!bLocalCG
[dd
->index_gl
[i
]])
2406 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd
->rank
,where
,i
+1,dd
->index_gl
[i
]+1,dd
->ncg_home
);
2411 for(i
=0; i
<ncg_sys
; i
++)
2418 if (ngl
!= dd
->ncg_tot
)
2420 fprintf(stderr
,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd
->rank
,where
,ngl
,dd
->ncg_tot
);
2427 static void check_index_consistency(gmx_domdec_t
*dd
,
2428 int natoms_sys
,int ncg_sys
,
2431 int nerr
,ngl
,i
,a
,cell
;
2436 if (dd
->comm
->DD_debug
> 1)
2438 snew(have
,natoms_sys
);
2439 for(a
=0; a
<dd
->nat_tot
; a
++)
2441 if (have
[dd
->gatindex
[a
]] > 0)
2443 fprintf(stderr
,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd
->rank
,dd
->gatindex
[a
]+1,have
[dd
->gatindex
[a
]],a
+1);
2447 have
[dd
->gatindex
[a
]] = a
+ 1;
2453 snew(have
,dd
->nat_tot
);
2456 for(i
=0; i
<natoms_sys
; i
++)
2458 if (ga2la_get(dd
->ga2la
,i
,&a
,&cell
))
2460 if (a
>= dd
->nat_tot
)
2462 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd
->rank
,i
+1,a
+1,dd
->nat_tot
);
2468 if (dd
->gatindex
[a
] != i
)
2470 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd
->rank
,i
+1,a
+1,dd
->gatindex
[a
]+1);
2477 if (ngl
!= dd
->nat_tot
)
2480 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2481 dd
->rank
,where
,ngl
,dd
->nat_tot
);
2483 for(a
=0; a
<dd
->nat_tot
; a
++)
2488 "DD node %d, %s: local atom %d, global %d has no global index\n",
2489 dd
->rank
,where
,a
+1,dd
->gatindex
[a
]+1);
2494 nerr
+= check_bLocalCG(dd
,ncg_sys
,dd
->comm
->bLocalCG
,where
);
2497 gmx_fatal(FARGS
,"DD node %d, %s: %d atom/cg index inconsistencies",
2498 dd
->rank
,where
,nerr
);
2502 static void clear_dd_indices(gmx_domdec_t
*dd
,int cg_start
,int a_start
)
2509 /* Clear the whole list without searching */
2510 ga2la_clear(dd
->ga2la
);
2514 for(i
=a_start
; i
<dd
->nat_tot
; i
++)
2516 ga2la_del(dd
->ga2la
,dd
->gatindex
[i
]);
2520 bLocalCG
= dd
->comm
->bLocalCG
;
2523 for(i
=cg_start
; i
<dd
->ncg_tot
; i
++)
2525 bLocalCG
[dd
->index_gl
[i
]] = FALSE
;
2529 dd_clear_local_vsite_indices(dd
);
2531 if (dd
->constraints
)
2533 dd_clear_local_constraint_indices(dd
);
2537 static real
grid_jump_limit(gmx_domdec_comm_t
*comm
,int dim_ind
)
2539 real grid_jump_limit
;
2541 /* The distance between the boundaries of cells at distance
2542 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2543 * and by the fact that cells should not be shifted by more than
2544 * half their size, such that cg's only shift by one cell
2545 * at redecomposition.
2547 grid_jump_limit
= comm
->cellsize_limit
;
2548 if (!comm
->bVacDLBNoLimit
)
2550 grid_jump_limit
= max(grid_jump_limit
,
2551 comm
->cutoff
/comm
->cd
[dim_ind
].np
);
2554 return grid_jump_limit
;
2557 static void check_grid_jump(gmx_step_t step
,gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2559 gmx_domdec_comm_t
*comm
;
2565 for(d
=1; d
<dd
->ndim
; d
++)
2568 limit
= grid_jump_limit(comm
,d
);
2569 bfac
= ddbox
->box_size
[dim
];
2570 if (ddbox
->tric_dir
[dim
])
2572 bfac
*= ddbox
->skew_fac
[dim
];
2574 if ((comm
->cell_f1
[d
] - comm
->cell_f_max0
[d
])*bfac
< limit
||
2575 (comm
->cell_f0
[d
] - comm
->cell_f_min1
[d
])*bfac
> -limit
)
2578 gmx_fatal(FARGS
,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2579 gmx_step_str(step
,buf
),
2580 dim2char(dim
),dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
2585 static int dd_load_count(gmx_domdec_comm_t
*comm
)
2587 return (comm
->eFlop
? comm
->flop_n
: comm
->cycl_n
[ddCyclF
]);
2590 static float dd_force_load(gmx_domdec_comm_t
*comm
)
2597 if (comm
->eFlop
> 1)
2599 load
*= 1.0 + (comm
->eFlop
- 1)*(0.1*rand()/RAND_MAX
- 0.05);
2604 load
= comm
->cycl
[ddCyclF
];
2605 if (comm
->cycl_n
[ddCyclF
] > 1)
2607 /* Subtract the maximum of the last n cycle counts
2608 * to get rid of possible high counts due to other soures,
2609 * for instance system activity, that would otherwise
2610 * affect the dynamic load balancing.
2612 load
-= comm
->cycl_max
[ddCyclF
];
2619 static void set_slb_pme_dim_f(gmx_domdec_t
*dd
,int dimind
,real
**dim_f
)
2621 gmx_domdec_comm_t
*comm
;
2626 if (dd
->dim
[dimind
] != dimind
)
2632 snew(*dim_f
,dd
->nc
[dimind
]+1);
2634 for(i
=1; i
<dd
->nc
[dimind
]; i
++)
2636 if (comm
->slb_frac
[dimind
])
2638 (*dim_f
)[i
] = (*dim_f
)[i
-1] + comm
->slb_frac
[dimind
][i
-1];
2642 (*dim_f
)[i
] = (real
)i
/(real
)dd
->nc
[dimind
];
2645 (*dim_f
)[dd
->nc
[dimind
]] = 1;
2648 static void init_ddpme(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2649 int dimind
,int nslab
)
2651 int pmeindex
,slab
,nso
,i
;
2654 ddpme
->dimind
= dimind
;
2655 ddpme
->nslab
= nslab
;
2662 nso
= dd
->comm
->npmenodes
/nslab
;
2663 /* Determine for each PME slab the PP locacation range for dimension dim */
2664 snew(ddpme
->pp_min
,nslab
);
2665 snew(ddpme
->pp_max
,nslab
);
2666 for(slab
=0; slab
<nslab
; slab
++) {
2667 ddpme
->pp_min
[slab
] = dd
->nc
[dd
->dim
[dimind
]] - 1;
2668 ddpme
->pp_max
[slab
] = 0;
2670 for(i
=0; i
<dd
->nnodes
; i
++) {
2671 ddindex2xyz(dd
->nc
,i
,xyz
);
2672 /* For y only use our y/z slab.
2673 * This assumes that the PME x grid size matches the DD grid size.
2675 if (dimind
== 0 || xyz
[YY
] == dd
->ci
[YY
]) {
2676 pmeindex
= ddindex2pmeindex(dd
,i
);
2678 slab
= pmeindex
/nso
;
2680 slab
= pmeindex
% nslab
;
2682 ddpme
->pp_min
[slab
] = min(ddpme
->pp_min
[slab
],xyz
[dimind
]);
2683 ddpme
->pp_max
[slab
] = max(ddpme
->pp_max
[slab
],xyz
[dimind
]);
2687 set_slb_pme_dim_f(dd
,ddpme
->dimind
,&ddpme
->slb_dim_f
);
2690 int dd_pme_maxshift0(gmx_domdec_t
*dd
)
2692 return dd
->comm
->ddpme
[0].maxshift
;
2695 int dd_pme_maxshift1(gmx_domdec_t
*dd
)
2697 return dd
->comm
->ddpme
[1].maxshift
;
2700 static void set_pme_maxshift(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2701 bool bUniform
,gmx_ddbox_t
*ddbox
,real
*cell_f
)
2703 gmx_domdec_comm_t
*comm
;
2706 real range
,pme_boundary
;
2710 dim
= ddpme
->dimind
;
2714 if (dd
->dim
[dim
] != dim
)
2716 /* PP decomposition is not along dim: the worst situation */
2719 else if (ns
<= 3 || (bUniform
&& ns
== nc
))
2721 /* The optimal situation */
2726 /* We need to check for all pme nodes which nodes they
2727 * could possibly need to communicate with.
2729 xmin
= ddpme
->pp_min
;
2730 xmax
= ddpme
->pp_max
;
2731 /* Allow for atoms to be maximally half the cell size or cut-off
2732 * out of their DD cell.
2734 range
= 0.5*min(comm
->cellsize_min
[dim
],comm
->cutoff
);
2735 range
/= ddbox
->skew_fac
[dim
]*ddbox
->box_size
[dim
];
2736 /* Avoid unlucky rounding at exactly 0.5 */
2742 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2743 pme_boundary
= (real
)s
/ns
;
2746 cell_f
[xmax
[s
-(sh
+1) ]+1] + range
> pme_boundary
) ||
2748 cell_f
[xmax
[s
-(sh
+1)+ns
]+1] - 1 + range
> pme_boundary
)))
2752 pme_boundary
= (real
)(s
+1)/ns
;
2755 cell_f
[xmin
[s
+(sh
+1) ] ] - range
< pme_boundary
) ||
2757 cell_f
[xmin
[s
+(sh
+1)-ns
] ] + 1 - range
< pme_boundary
)))
2764 ddpme
->maxshift
= sh
;
2768 fprintf(debug
,"PME slab communication range for dimind %d is %d\n",
2769 ddpme
->dimind
,ddpme
->maxshift
);
2773 static void check_box_size(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2777 for(d
=0; d
<dd
->ndim
; d
++)
2780 if (dim
< ddbox
->nboundeddim
&&
2781 ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
] <
2782 dd
->nc
[dim
]*dd
->comm
->cellsize_limit
*DD_CELL_MARGIN
)
2784 gmx_fatal(FARGS
,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2785 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
2786 dd
->nc
[dim
],dd
->comm
->cellsize_limit
);
2791 static void set_dd_cell_sizes_slb(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
2792 bool bMaster
,ivec npulse
)
2794 gmx_domdec_comm_t
*comm
;
2797 real
*cell_x
,cell_dx
,cellsize
;
2801 for(d
=0; d
<DIM
; d
++)
2803 cellsize_min
[d
] = ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
];
2805 if (dd
->nc
[d
] == 1 || comm
->slb_frac
[d
] == NULL
)
2808 cell_dx
= ddbox
->box_size
[d
]/dd
->nc
[d
];
2811 for(j
=0; j
<dd
->nc
[d
]+1; j
++)
2813 dd
->ma
->cell_x
[d
][j
] = ddbox
->box0
[d
] + j
*cell_dx
;
2818 comm
->cell_x0
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
] )*cell_dx
;
2819 comm
->cell_x1
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
]+1)*cell_dx
;
2821 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2822 while (cellsize
*npulse
[d
] < comm
->cutoff
&& npulse
[d
] < dd
->nc
[d
]-1)
2826 cellsize_min
[d
] = cellsize
;
2830 /* Statically load balanced grid */
2831 /* Also when we are not doing a master distribution we determine
2832 * all cell borders in a loop to obtain identical values
2833 * to the master distribution case and to determine npulse.
2837 cell_x
= dd
->ma
->cell_x
[d
];
2841 snew(cell_x
,dd
->nc
[d
]+1);
2843 cell_x
[0] = ddbox
->box0
[d
];
2844 for(j
=0; j
<dd
->nc
[d
]; j
++)
2846 cell_dx
= ddbox
->box_size
[d
]*comm
->slb_frac
[d
][j
];
2847 cell_x
[j
+1] = cell_x
[j
] + cell_dx
;
2848 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2849 while (cellsize
*npulse
[d
] < comm
->cutoff
&&
2850 npulse
[d
] < dd
->nc
[d
]-1)
2854 cellsize_min
[d
] = min(cellsize_min
[d
],cellsize
);
2858 comm
->cell_x0
[d
] = cell_x
[dd
->ci
[d
]];
2859 comm
->cell_x1
[d
] = cell_x
[dd
->ci
[d
]+1];
2863 /* The following limitation is to avoid that a cell would receive
2864 * some of its own home charge groups back over the periodic boundary.
2865 * Double charge groups cause trouble with the global indices.
2867 if (d
< ddbox
->npbcdim
&&
2868 dd
->nc
[d
] > 1 && npulse
[d
] >= dd
->nc
[d
])
2872 gmx_fatal(FARGS
,"The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2873 dim2char(d
),ddbox
->box_size
[d
],ddbox
->skew_fac
[d
],
2875 dd
->nc
[d
],dd
->nc
[d
],
2876 dd
->nnodes
> dd
->nc
[d
] ? "cells" : "processors");
2879 MPI_Abort(MPI_COMM_WORLD
, 0);
2886 if (!comm
->bDynLoadBal
)
2888 copy_rvec(cellsize_min
,comm
->cellsize_min
);
2891 for(d
=0; d
<comm
->npmedecompdim
; d
++)
2893 set_pme_maxshift(dd
,&comm
->ddpme
[d
],
2894 comm
->slb_frac
[dd
->dim
[d
]]==NULL
,ddbox
,
2895 comm
->ddpme
[d
].slb_dim_f
);
2900 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t
*dd
,
2901 int d
,int dim
,gmx_domdec_root_t
*root
,
2903 bool bUniform
,gmx_step_t step
, real cellsize_limit_f
, int range
[])
2905 gmx_domdec_comm_t
*comm
;
2906 int ncd
,i
,j
,nmin
,nmin_old
;
2909 real fac
,halfway
,cellsize_limit_f_i
,region_size
;
2910 bool bPBC
,bLastHi
=FALSE
;
2911 int nrange
[]={range
[0],range
[1]};
2913 region_size
= root
->cell_f
[range
[1]]-root
->cell_f
[range
[0]];
2919 bPBC
= (dim
< ddbox
->npbcdim
);
2921 cell_size
= root
->buf_ncd
;
2925 fprintf(debug
,"enforce_limits: %d %d\n",range
[0],range
[1]);
2928 /* First we need to check if the scaling does not make cells
2929 * smaller than the smallest allowed size.
2930 * We need to do this iteratively, since if a cell is too small,
2931 * it needs to be enlarged, which makes all the other cells smaller,
2932 * which could in turn make another cell smaller than allowed.
2934 for(i
=range
[0]; i
<range
[1]; i
++)
2936 root
->bCellMin
[i
] = FALSE
;
2942 /* We need the total for normalization */
2944 for(i
=range
[0]; i
<range
[1]; i
++)
2946 if (root
->bCellMin
[i
] == FALSE
)
2948 fac
+= cell_size
[i
];
2951 fac
= ( region_size
- nmin
*cellsize_limit_f
)/fac
; /* substracting cells already set to cellsize_limit_f */
2952 /* Determine the cell boundaries */
2953 for(i
=range
[0]; i
<range
[1]; i
++)
2955 if (root
->bCellMin
[i
] == FALSE
)
2957 cell_size
[i
] *= fac
;
2958 if (!bPBC
&& (i
== 0 || i
== dd
->nc
[dim
] -1))
2960 cellsize_limit_f_i
= 0;
2964 cellsize_limit_f_i
= cellsize_limit_f
;
2966 if (cell_size
[i
] < cellsize_limit_f_i
)
2968 root
->bCellMin
[i
] = TRUE
;
2969 cell_size
[i
] = cellsize_limit_f_i
;
2973 root
->cell_f
[i
+1] = root
->cell_f
[i
] + cell_size
[i
];
2976 while (nmin
> nmin_old
);
2979 cell_size
[i
] = root
->cell_f
[i
+1] - root
->cell_f
[i
];
2980 /* For this check we should not use DD_CELL_MARGIN,
2981 * but a slightly smaller factor,
2982 * since rounding could get use below the limit.
2984 if (bPBC
&& cell_size
[i
] < cellsize_limit_f
*DD_CELL_MARGIN2
/DD_CELL_MARGIN
)
2987 gmx_fatal(FARGS
,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
2988 gmx_step_str(step
,buf
),
2989 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
2990 ncd
,comm
->cellsize_min
[dim
]);
2993 root
->bLimited
= (nmin
> 0) || (range
[0]>0) || (range
[1]<ncd
);
2997 /* Check if the boundary did not displace more than halfway
2998 * each of the cells it bounds, as this could cause problems,
2999 * especially when the differences between cell sizes are large.
3000 * If changes are applied, they will not make cells smaller
3001 * than the cut-off, as we check all the boundaries which
3002 * might be affected by a change and if the old state was ok,
3003 * the cells will at most be shrunk back to their old size.
3005 for(i
=range
[0]+1; i
<range
[1]; i
++)
3007 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
-1]);
3008 if (root
->cell_f
[i
] < halfway
)
3010 root
->cell_f
[i
] = halfway
;
3011 /* Check if the change also causes shifts of the next boundaries */
3012 for(j
=i
+1; j
<range
[1]; j
++)
3014 if (root
->cell_f
[j
] < root
->cell_f
[j
-1] + cellsize_limit_f
)
3015 root
->cell_f
[j
] = root
->cell_f
[j
-1] + cellsize_limit_f
;
3018 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
+1]);
3019 if (root
->cell_f
[i
] > halfway
)
3021 root
->cell_f
[i
] = halfway
;
3022 /* Check if the change also causes shifts of the next boundaries */
3023 for(j
=i
-1; j
>=range
[0]+1; j
--)
3025 if (root
->cell_f
[j
] > root
->cell_f
[j
+1] - cellsize_limit_f
)
3026 root
->cell_f
[j
] = root
->cell_f
[j
+1] - cellsize_limit_f
;
3032 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3033 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3034 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3035 * for a and b nrange is used */
3038 /* Take care of the staggering of the cell boundaries */
3041 for(i
=range
[0]; i
<range
[1]; i
++)
3043 root
->cell_f_max0
[i
] = root
->cell_f
[i
];
3044 root
->cell_f_min1
[i
] = root
->cell_f
[i
+1];
3049 for(i
=range
[0]+1; i
<range
[1]; i
++)
3051 bLimLo
= (root
->cell_f
[i
] < root
->bound_min
[i
]);
3052 bLimHi
= (root
->cell_f
[i
] > root
->bound_max
[i
]);
3053 if (bLimLo
&& bLimHi
)
3055 /* Both limits violated, try the best we can */
3056 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3057 root
->cell_f
[i
] = 0.5*(root
->bound_min
[i
] + root
->bound_max
[i
]);
3060 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3064 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3070 /* root->cell_f[i] = root->bound_min[i]; */
3071 nrange
[1]=i
; /* only store violation location. There could be a LimLo violation following with an higher index */
3074 else if (bLimHi
&& !bLastHi
)
3077 if (nrange
[1] < range
[1]) /* found a LimLo before */
3079 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3080 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3081 nrange
[0]=nrange
[1];
3083 root
->cell_f
[i
] = root
->bound_max
[i
];
3085 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3090 if (nrange
[1] < range
[1]) /* found last a LimLo */
3092 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3093 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3094 nrange
[0]=nrange
[1];
3096 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3098 else if (nrange
[0] > range
[0]) /* found at least one LimHi */
3100 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3107 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t
*dd
,
3108 int d
,int dim
,gmx_domdec_root_t
*root
,
3109 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3110 bool bUniform
,gmx_step_t step
)
3112 gmx_domdec_comm_t
*comm
;
3115 real load_aver
,load_i
,imbalance
,change
,change_max
,sc
;
3116 real cellsize_limit_f
,dist_min_f
,dist_min_f_hard
,space
;
3117 real change_limit
= 0.1;
3120 int range
[] = { 0, 0 };
3126 bPBC
= (dim
< ddbox
->npbcdim
);
3128 cell_size
= root
->buf_ncd
;
3130 /* Store the original boundaries */
3131 for(i
=0; i
<ncd
+1; i
++)
3133 root
->old_cell_f
[i
] = root
->cell_f
[i
];
3136 for(i
=0; i
<ncd
; i
++)
3138 cell_size
[i
] = 1.0/ncd
;
3141 else if (dd_load_count(comm
))
3143 load_aver
= comm
->load
[d
].sum_m
/ncd
;
3145 for(i
=0; i
<ncd
; i
++)
3147 /* Determine the relative imbalance of cell i */
3148 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3149 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3150 /* Determine the change of the cell size using underrelaxation */
3151 change
= -relax
*imbalance
;
3152 change_max
= max(change_max
,max(change
,-change
));
3154 /* Limit the amount of scaling.
3155 * We need to use the same rescaling for all cells in one row,
3156 * otherwise the load balancing might not converge.
3159 if (change_max
> change_limit
)
3161 sc
*= change_limit
/change_max
;
3163 for(i
=0; i
<ncd
; i
++)
3165 /* Determine the relative imbalance of cell i */
3166 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3167 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3168 /* Determine the change of the cell size using underrelaxation */
3169 change
= -sc
*imbalance
;
3170 cell_size
[i
] = (root
->cell_f
[i
+1]-root
->cell_f
[i
])*(1 + change
);
3174 cellsize_limit_f
= comm
->cellsize_min
[dim
]/ddbox
->box_size
[dim
];
3175 cellsize_limit_f
*= DD_CELL_MARGIN
;
3176 dist_min_f_hard
= grid_jump_limit(comm
,d
)/ddbox
->box_size
[dim
];
3177 dist_min_f
= dist_min_f_hard
* DD_CELL_MARGIN
;
3178 if (ddbox
->tric_dir
[dim
])
3180 cellsize_limit_f
/= ddbox
->skew_fac
[dim
];
3181 dist_min_f
/= ddbox
->skew_fac
[dim
];
3183 if (bDynamicBox
&& d
> 0)
3185 dist_min_f
*= DD_PRES_SCALE_MARGIN
;
3187 if (d
> 0 && !bUniform
)
3189 /* Make sure that the grid is not shifted too much */
3190 for(i
=1; i
<ncd
; i
++) {
3191 if (root
->cell_f_min1
[i
] - root
->cell_f_max0
[i
-1] < 2 * dist_min_f_hard
)
3193 gmx_incons("Inconsistent DD boundary staggering limits!");
3195 root
->bound_min
[i
] = root
->cell_f_max0
[i
-1] + dist_min_f
;
3196 space
= root
->cell_f
[i
] - (root
->cell_f_max0
[i
-1] + dist_min_f
);
3198 root
->bound_min
[i
] += 0.5*space
;
3200 root
->bound_max
[i
] = root
->cell_f_min1
[i
] - dist_min_f
;
3201 space
= root
->cell_f
[i
] - (root
->cell_f_min1
[i
] - dist_min_f
);
3203 root
->bound_max
[i
] += 0.5*space
;
3208 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3210 root
->cell_f_max0
[i
-1] + dist_min_f
,
3211 root
->bound_min
[i
],root
->cell_f
[i
],root
->bound_max
[i
],
3212 root
->cell_f_min1
[i
] - dist_min_f
);
3217 root
->cell_f
[0] = 0;
3218 root
->cell_f
[ncd
] = 1;
3219 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, range
);
3222 /* After the checks above, the cells should obey the cut-off
3223 * restrictions, but it does not hurt to check.
3225 for(i
=0; i
<ncd
; i
++)
3229 fprintf(debug
,"Relative bounds dim %d cell %d: %f %f\n",
3230 dim
,i
,root
->cell_f
[i
],root
->cell_f
[i
+1]);
3233 if ((bPBC
|| (i
!= 0 && i
!= dd
->nc
[dim
]-1)) &&
3234 root
->cell_f
[i
+1] - root
->cell_f
[i
] <
3235 cellsize_limit_f
/DD_CELL_MARGIN
)
3239 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3240 gmx_step_str(step
,buf
),dim2char(dim
),i
,
3241 (root
->cell_f
[i
+1] - root
->cell_f
[i
])
3242 *ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]);
3247 /* Store the cell boundaries of the lower dimensions at the end */
3248 for(d1
=0; d1
<d
; d1
++)
3250 root
->cell_f
[pos
++] = comm
->cell_f0
[d1
];
3251 root
->cell_f
[pos
++] = comm
->cell_f1
[d1
];
3254 if (d
< comm
->npmedecompdim
)
3256 /* The master determines the maximum shift for
3257 * the coordinate communication between separate PME nodes.
3259 set_pme_maxshift(dd
,&comm
->ddpme
[d
],bUniform
,ddbox
,root
->cell_f
);
3261 root
->cell_f
[pos
++] = comm
->ddpme
[0].maxshift
;
3264 root
->cell_f
[pos
++] = comm
->ddpme
[1].maxshift
;
3268 static void relative_to_absolute_cell_bounds(gmx_domdec_t
*dd
,
3269 gmx_ddbox_t
*ddbox
,int dimind
)
3271 gmx_domdec_comm_t
*comm
;
3276 /* Set the cell dimensions */
3277 dim
= dd
->dim
[dimind
];
3278 comm
->cell_x0
[dim
] = comm
->cell_f0
[dimind
]*ddbox
->box_size
[dim
];
3279 comm
->cell_x1
[dim
] = comm
->cell_f1
[dimind
]*ddbox
->box_size
[dim
];
3280 if (dim
>= ddbox
->nboundeddim
)
3282 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3283 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3287 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3288 int d
,int dim
,real
*cell_f_row
,
3291 gmx_domdec_comm_t
*comm
;
3297 /* Each node would only need to know two fractions,
3298 * but it is probably cheaper to broadcast the whole array.
3300 MPI_Bcast(cell_f_row
,DD_CELL_F_SIZE(dd
,d
)*sizeof(real
),MPI_BYTE
,
3301 0,comm
->mpi_comm_load
[d
]);
3303 /* Copy the fractions for this dimension from the buffer */
3304 comm
->cell_f0
[d
] = cell_f_row
[dd
->ci
[dim
] ];
3305 comm
->cell_f1
[d
] = cell_f_row
[dd
->ci
[dim
]+1];
3306 /* The whole array was communicated, so set the buffer position */
3307 pos
= dd
->nc
[dim
] + 1;
3308 for(d1
=0; d1
<=d
; d1
++)
3312 /* Copy the cell fractions of the lower dimensions */
3313 comm
->cell_f0
[d1
] = cell_f_row
[pos
++];
3314 comm
->cell_f1
[d1
] = cell_f_row
[pos
++];
3316 relative_to_absolute_cell_bounds(dd
,ddbox
,d1
);
3318 /* Convert the communicated shift from float to int */
3319 comm
->ddpme
[0].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3322 comm
->ddpme
[1].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3326 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t
*dd
,
3327 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3328 bool bUniform
,gmx_step_t step
)
3330 gmx_domdec_comm_t
*comm
;
3332 bool bRowMember
,bRowRoot
;
3337 for(d
=0; d
<dd
->ndim
; d
++)
3342 for(d1
=d
; d1
<dd
->ndim
; d1
++)
3344 if (dd
->ci
[dd
->dim
[d1
]] > 0)
3357 set_dd_cell_sizes_dlb_root(dd
,d
,dim
,comm
->root
[d
],
3358 ddbox
,bDynamicBox
,bUniform
,step
);
3359 cell_f_row
= comm
->root
[d
]->cell_f
;
3363 cell_f_row
= comm
->cell_f_row
;
3365 distribute_dd_cell_sizes_dlb(dd
,d
,dim
,cell_f_row
,ddbox
);
3370 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
3374 /* This function assumes the box is static and should therefore
3375 * not be called when the box has changed since the last
3376 * call to dd_partition_system.
3378 for(d
=0; d
<dd
->ndim
; d
++)
3380 relative_to_absolute_cell_bounds(dd
,ddbox
,d
);
3386 static void set_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3387 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3388 bool bUniform
,bool bDoDLB
,gmx_step_t step
,
3389 gmx_wallcycle_t wcycle
)
3391 gmx_domdec_comm_t
*comm
;
3398 wallcycle_start(wcycle
,ewcDDCOMMBOUND
);
3399 set_dd_cell_sizes_dlb_change(dd
,ddbox
,bDynamicBox
,bUniform
,step
);
3400 wallcycle_stop(wcycle
,ewcDDCOMMBOUND
);
3402 else if (bDynamicBox
)
3404 set_dd_cell_sizes_dlb_nochange(dd
,ddbox
);
3407 /* Set the dimensions for which no DD is used */
3408 for(dim
=0; dim
<DIM
; dim
++) {
3409 if (dd
->nc
[dim
] == 1) {
3410 comm
->cell_x0
[dim
] = 0;
3411 comm
->cell_x1
[dim
] = ddbox
->box_size
[dim
];
3412 if (dim
>= ddbox
->nboundeddim
)
3414 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3415 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3421 static void realloc_comm_ind(gmx_domdec_t
*dd
,ivec npulse
)
3424 gmx_domdec_comm_dim_t
*cd
;
3426 for(d
=0; d
<dd
->ndim
; d
++)
3428 cd
= &dd
->comm
->cd
[d
];
3429 np
= npulse
[dd
->dim
[d
]];
3430 if (np
> cd
->np_nalloc
)
3434 fprintf(debug
,"(Re)allocing cd for %c to %d pulses\n",
3435 dim2char(dd
->dim
[d
]),np
);
3437 if (DDMASTER(dd
) && cd
->np_nalloc
> 0)
3439 fprintf(stderr
,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd
->dim
[d
]),np
);
3442 for(i
=cd
->np_nalloc
; i
<np
; i
++)
3444 cd
->ind
[i
].index
= NULL
;
3445 cd
->ind
[i
].nalloc
= 0;
3454 static void set_dd_cell_sizes(gmx_domdec_t
*dd
,
3455 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3456 bool bUniform
,bool bDoDLB
,gmx_step_t step
,
3457 gmx_wallcycle_t wcycle
)
3459 gmx_domdec_comm_t
*comm
;
3465 /* Copy the old cell boundaries for the cg displacement check */
3466 copy_rvec(comm
->cell_x0
,comm
->old_cell_x0
);
3467 copy_rvec(comm
->cell_x1
,comm
->old_cell_x1
);
3469 if (comm
->bDynLoadBal
)
3473 check_box_size(dd
,ddbox
);
3475 set_dd_cell_sizes_dlb(dd
,ddbox
,bDynamicBox
,bUniform
,bDoDLB
,step
,wcycle
);
3479 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,npulse
);
3480 realloc_comm_ind(dd
,npulse
);
3485 for(d
=0; d
<DIM
; d
++)
3487 fprintf(debug
,"cell_x[%d] %f - %f skew_fac %f\n",
3488 d
,comm
->cell_x0
[d
],comm
->cell_x1
[d
],ddbox
->skew_fac
[d
]);
3493 static void comm_dd_ns_cell_sizes(gmx_domdec_t
*dd
,
3495 rvec cell_ns_x0
,rvec cell_ns_x1
,
3498 gmx_domdec_comm_t
*comm
;
3503 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
3505 dim
= dd
->dim
[dim_ind
];
3507 /* Without PBC we don't have restrictions on the outer cells */
3508 if (!(dim
>= ddbox
->npbcdim
&&
3509 (dd
->ci
[dim
] == 0 || dd
->ci
[dim
] == dd
->nc
[dim
] - 1)) &&
3510 comm
->bDynLoadBal
&&
3511 (comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
])*ddbox
->skew_fac
[dim
] <
3512 comm
->cellsize_min
[dim
])
3515 gmx_fatal(FARGS
,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3516 gmx_step_str(step
,buf
),dim2char(dim
),
3517 comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
],
3518 ddbox
->skew_fac
[dim
],
3519 dd
->comm
->cellsize_min
[dim
],
3520 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
3524 if ((dd
->bGridJump
&& dd
->ndim
> 1) || ddbox
->nboundeddim
< DIM
)
3526 /* Communicate the boundaries and update cell_ns_x0/1 */
3527 dd_move_cellx(dd
,ddbox
,cell_ns_x0
,cell_ns_x1
);
3528 if (dd
->bGridJump
&& dd
->ndim
> 1)
3530 check_grid_jump(step
,dd
,ddbox
);
3535 static void make_tric_corr_matrix(int npbcdim
,matrix box
,matrix tcm
)
3539 tcm
[YY
][XX
] = -box
[YY
][XX
]/box
[YY
][YY
];
3547 tcm
[ZZ
][XX
] = -(box
[ZZ
][YY
]*tcm
[YY
][XX
] + box
[ZZ
][XX
])/box
[ZZ
][ZZ
];
3548 tcm
[ZZ
][YY
] = -box
[ZZ
][YY
]/box
[ZZ
][ZZ
];
3557 static void check_screw_box(matrix box
)
3559 /* Mathematical limitation */
3560 if (box
[YY
][XX
] != 0 || box
[ZZ
][XX
] != 0)
3562 gmx_fatal(FARGS
,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3565 /* Limitation due to the asymmetry of the eighth shell method */
3566 if (box
[ZZ
][YY
] != 0)
3568 gmx_fatal(FARGS
,"pbc=screw with non-zero box_zy is not supported");
3572 static void distribute_cg(FILE *fplog
,gmx_step_t step
,
3573 matrix box
,ivec tric_dir
,t_block
*cgs
,rvec pos
[],
3576 gmx_domdec_master_t
*ma
;
3577 int **tmp_ind
=NULL
,*tmp_nalloc
=NULL
;
3578 int i
,icg
,j
,k
,k0
,k1
,d
,npbcdim
;
3580 rvec box_size
,cg_cm
;
3582 real nrcg
,inv_ncg
,pos_d
;
3584 bool bUnbounded
,bScrew
;
3588 if (tmp_ind
== NULL
)
3590 snew(tmp_nalloc
,dd
->nnodes
);
3591 snew(tmp_ind
,dd
->nnodes
);
3592 for(i
=0; i
<dd
->nnodes
; i
++)
3594 tmp_nalloc
[i
] = over_alloc_large(cgs
->nr
/dd
->nnodes
+1);
3595 snew(tmp_ind
[i
],tmp_nalloc
[i
]);
3599 /* Clear the count */
3600 for(i
=0; i
<dd
->nnodes
; i
++)
3606 make_tric_corr_matrix(dd
->npbcdim
,box
,tcm
);
3608 cgindex
= cgs
->index
;
3610 /* Compute the center of geometry for all charge groups */
3611 for(icg
=0; icg
<cgs
->nr
; icg
++)
3614 k1
= cgindex
[icg
+1];
3618 copy_rvec(pos
[k0
],cg_cm
);
3625 for(k
=k0
; (k
<k1
); k
++)
3627 rvec_inc(cg_cm
,pos
[k
]);
3629 for(d
=0; (d
<DIM
); d
++)
3631 cg_cm
[d
] *= inv_ncg
;
3634 /* Put the charge group in the box and determine the cell index */
3635 for(d
=DIM
-1; d
>=0; d
--) {
3637 if (d
< dd
->npbcdim
)
3639 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
3640 if (tric_dir
[d
] && dd
->nc
[d
] > 1)
3642 /* Use triclinic coordintates for this dimension */
3643 for(j
=d
+1; j
<DIM
; j
++)
3645 pos_d
+= cg_cm
[j
]*tcm
[j
][d
];
3648 while(pos_d
>= box
[d
][d
])
3651 rvec_dec(cg_cm
,box
[d
]);
3654 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3655 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3657 for(k
=k0
; (k
<k1
); k
++)
3659 rvec_dec(pos
[k
],box
[d
]);
3662 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3663 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3670 rvec_inc(cg_cm
,box
[d
]);
3673 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3674 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3676 for(k
=k0
; (k
<k1
); k
++)
3678 rvec_inc(pos
[k
],box
[d
]);
3680 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3681 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3686 /* This could be done more efficiently */
3688 while(ind
[d
]+1 < dd
->nc
[d
] && pos_d
>= ma
->cell_x
[d
][ind
[d
]+1])
3693 i
= dd_index(dd
->nc
,ind
);
3694 if (ma
->ncg
[i
] == tmp_nalloc
[i
])
3696 tmp_nalloc
[i
] = over_alloc_large(ma
->ncg
[i
]+1);
3697 srenew(tmp_ind
[i
],tmp_nalloc
[i
]);
3699 tmp_ind
[i
][ma
->ncg
[i
]] = icg
;
3701 ma
->nat
[i
] += cgindex
[icg
+1] - cgindex
[icg
];
3705 for(i
=0; i
<dd
->nnodes
; i
++)
3708 for(k
=0; k
<ma
->ncg
[i
]; k
++)
3710 ma
->cg
[k1
++] = tmp_ind
[i
][k
];
3713 ma
->index
[dd
->nnodes
] = k1
;
3715 for(i
=0; i
<dd
->nnodes
; i
++)
3725 fprintf(fplog
,"Charge group distribution at step %s:",
3726 gmx_step_str(step
,buf
));
3727 for(i
=0; i
<dd
->nnodes
; i
++)
3729 fprintf(fplog
," %d",ma
->ncg
[i
]);
3731 fprintf(fplog
,"\n");
3735 static void get_cg_distribution(FILE *fplog
,gmx_step_t step
,gmx_domdec_t
*dd
,
3736 t_block
*cgs
,matrix box
,gmx_ddbox_t
*ddbox
,
3739 gmx_domdec_master_t
*ma
=NULL
;
3742 int *ibuf
,buf2
[2] = { 0, 0 };
3750 check_screw_box(box
);
3753 set_dd_cell_sizes_slb(dd
,ddbox
,TRUE
,npulse
);
3755 distribute_cg(fplog
,step
,box
,ddbox
->tric_dir
,cgs
,pos
,dd
);
3756 for(i
=0; i
<dd
->nnodes
; i
++)
3758 ma
->ibuf
[2*i
] = ma
->ncg
[i
];
3759 ma
->ibuf
[2*i
+1] = ma
->nat
[i
];
3767 dd_scatter(dd
,2*sizeof(int),ibuf
,buf2
);
3769 dd
->ncg_home
= buf2
[0];
3770 dd
->nat_home
= buf2
[1];
3771 dd
->ncg_tot
= dd
->ncg_home
;
3772 dd
->nat_tot
= dd
->nat_home
;
3773 if (dd
->ncg_home
> dd
->cg_nalloc
|| dd
->cg_nalloc
== 0)
3775 dd
->cg_nalloc
= over_alloc_dd(dd
->ncg_home
);
3776 srenew(dd
->index_gl
,dd
->cg_nalloc
);
3777 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
3781 for(i
=0; i
<dd
->nnodes
; i
++)
3783 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
3784 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
3789 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
3790 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
3791 DDMASTER(dd
) ? ma
->cg
: NULL
,
3792 dd
->ncg_home
*sizeof(int),dd
->index_gl
);
3794 /* Determine the home charge group sizes */
3796 for(i
=0; i
<dd
->ncg_home
; i
++)
3798 cg_gl
= dd
->index_gl
[i
];
3800 dd
->cgindex
[i
] + cgs
->index
[cg_gl
+1] - cgs
->index
[cg_gl
];
3805 fprintf(debug
,"Home charge groups:\n");
3806 for(i
=0; i
<dd
->ncg_home
; i
++)
3808 fprintf(debug
," %d",dd
->index_gl
[i
]);
3810 fprintf(debug
,"\n");
3812 fprintf(debug
,"\n");
3816 static int compact_and_copy_vec_at(int ncg
,int *move
,
3819 rvec
*src
,gmx_domdec_comm_t
*comm
,
3822 int m
,icg
,i
,i0
,i1
,nrcg
;
3828 for(m
=0; m
<DIM
*2; m
++)
3834 for(icg
=0; icg
<ncg
; icg
++)
3836 i1
= cgindex
[icg
+1];
3842 /* Compact the home array in place */
3843 for(i
=i0
; i
<i1
; i
++)
3845 copy_rvec(src
[i
],src
[home_pos
++]);
3851 /* Copy to the communication buffer */
3853 pos_vec
[m
] += 1 + vec
*nrcg
;
3854 for(i
=i0
; i
<i1
; i
++)
3856 copy_rvec(src
[i
],comm
->cgcm_state
[m
][pos_vec
[m
]++]);
3858 pos_vec
[m
] += (nvec
- vec
- 1)*nrcg
;
3862 home_pos
+= i1
- i0
;
3870 static int compact_and_copy_vec_cg(int ncg
,int *move
,
3872 int nvec
,rvec
*src
,gmx_domdec_comm_t
*comm
,
3875 int m
,icg
,i0
,i1
,nrcg
;
3881 for(m
=0; m
<DIM
*2; m
++)
3887 for(icg
=0; icg
<ncg
; icg
++)
3889 i1
= cgindex
[icg
+1];
3895 /* Compact the home array in place */
3896 copy_rvec(src
[icg
],src
[home_pos
++]);
3902 /* Copy to the communication buffer */
3903 copy_rvec(src
[icg
],comm
->cgcm_state
[m
][pos_vec
[m
]]);
3904 pos_vec
[m
] += 1 + nrcg
*nvec
;
3916 static int compact_ind(int ncg
,int *move
,
3917 int *index_gl
,int *cgindex
,
3919 gmx_ga2la_t ga2la
,char *bLocalCG
,
3922 int cg
,nat
,a0
,a1
,a
,a_gl
;
3927 for(cg
=0; cg
<ncg
; cg
++)
3933 /* Compact the home arrays in place.
3934 * Anything that can be done here avoids access to global arrays.
3936 cgindex
[home_pos
] = nat
;
3937 for(a
=a0
; a
<a1
; a
++)
3940 gatindex
[nat
] = a_gl
;
3941 /* The cell number stays 0, so we don't need to set it */
3942 ga2la_change_la(ga2la
,a_gl
,nat
);
3945 index_gl
[home_pos
] = index_gl
[cg
];
3946 cginfo
[home_pos
] = cginfo
[cg
];
3947 /* The charge group remains local, so bLocalCG does not change */
3952 /* Clear the global indices */
3953 for(a
=a0
; a
<a1
; a
++)
3955 ga2la_del(ga2la
,gatindex
[a
]);
3959 bLocalCG
[index_gl
[cg
]] = FALSE
;
3963 cgindex
[home_pos
] = nat
;
3968 static void clear_and_mark_ind(int ncg
,int *move
,
3969 int *index_gl
,int *cgindex
,int *gatindex
,
3970 gmx_ga2la_t ga2la
,char *bLocalCG
,
3975 for(cg
=0; cg
<ncg
; cg
++)
3981 /* Clear the global indices */
3982 for(a
=a0
; a
<a1
; a
++)
3984 ga2la_del(ga2la
,gatindex
[a
]);
3988 bLocalCG
[index_gl
[cg
]] = FALSE
;
3990 /* Signal that this cg has moved using the ns cell index.
3991 * Here we set it to -1.
3992 * fill_grid will change it from -1 to 4*grid->ncells.
3994 cell_index
[cg
] = -1;
3999 static void print_cg_move(FILE *fplog
,
4001 gmx_step_t step
,int cg
,int dim
,int dir
,
4003 rvec cm_old
,rvec cm_new
,real pos_d
)
4005 gmx_domdec_comm_t
*comm
;
4010 fprintf(fplog
,"\nStep %s:\n",gmx_step_str(step
,buf
));
4011 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4012 ddglatnr(dd
,dd
->cgindex
[cg
]),limitd
,dim2char(dim
));
4013 fprintf(fplog
,"distance out of cell %f\n",
4014 dir
==1 ? pos_d
- comm
->cell_x1
[dim
] : pos_d
- comm
->cell_x0
[dim
]);
4015 fprintf(fplog
,"Old coordinates: %8.3f %8.3f %8.3f\n",
4016 cm_old
[XX
],cm_old
[YY
],cm_old
[ZZ
]);
4017 fprintf(fplog
,"New coordinates: %8.3f %8.3f %8.3f\n",
4018 cm_new
[XX
],cm_new
[YY
],cm_new
[ZZ
]);
4019 fprintf(fplog
,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4021 comm
->old_cell_x0
[dim
],comm
->old_cell_x1
[dim
]);
4022 fprintf(fplog
,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4024 comm
->cell_x0
[dim
],comm
->cell_x1
[dim
]);
4027 static void cg_move_error(FILE *fplog
,
4029 gmx_step_t step
,int cg
,int dim
,int dir
,
4031 rvec cm_old
,rvec cm_new
,real pos_d
)
4035 print_cg_move(fplog
, dd
,step
,cg
,dim
,dir
,limitd
,cm_old
,cm_new
,pos_d
);
4037 print_cg_move(stderr
,dd
,step
,cg
,dim
,dir
,limitd
,cm_old
,cm_new
,pos_d
);
4039 "A charge group moved too far between two domain decomposition steps\n"
4040 "This usually means that your system is not well equilibrated");
4043 static void rotate_state_atom(t_state
*state
,int a
)
4047 for(est
=estX
; est
<estNR
; est
++)
4049 if (state
->flags
& (1<<est
)) {
4052 /* Rotate the complete state; for a rectangular box only */
4053 state
->x
[a
][YY
] = state
->box
[YY
][YY
] - state
->x
[a
][YY
];
4054 state
->x
[a
][ZZ
] = state
->box
[ZZ
][ZZ
] - state
->x
[a
][ZZ
];
4057 state
->v
[a
][YY
] = -state
->v
[a
][YY
];
4058 state
->v
[a
][ZZ
] = -state
->v
[a
][ZZ
];
4061 state
->sd_X
[a
][YY
] = -state
->sd_X
[a
][YY
];
4062 state
->sd_X
[a
][ZZ
] = -state
->sd_X
[a
][ZZ
];
4065 state
->cg_p
[a
][YY
] = -state
->cg_p
[a
][YY
];
4066 state
->cg_p
[a
][ZZ
] = -state
->cg_p
[a
][ZZ
];
4068 case estDISRE_INITF
:
4069 case estDISRE_RM3TAV
:
4070 case estORIRE_INITF
:
4072 /* These are distances, so not affected by rotation */
4075 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4081 static int dd_redistribute_cg(FILE *fplog
,gmx_step_t step
,
4082 gmx_domdec_t
*dd
,ivec tric_dir
,
4083 t_state
*state
,rvec
**f
,
4084 t_forcerec
*fr
,t_mdatoms
*md
,
4090 int ncg
[DIM
*2],nat
[DIM
*2];
4091 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4092 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4093 int sbuf
[2],rbuf
[2];
4094 int home_pos_cg
,home_pos_at
,ncg_stay_home
,buf_pos
;
4096 bool bV
=FALSE
,bSDX
=FALSE
,bCGP
=FALSE
;
4101 rvec
*cg_cm
,cell_x0
,cell_x1
,limitd
,limit0
,limit1
,cm_new
;
4103 cginfo_mb_t
*cginfo_mb
;
4104 gmx_domdec_comm_t
*comm
;
4108 check_screw_box(state
->box
);
4114 for(i
=estX
; i
<estNR
; i
++)
4118 case estX
: /* Always present */ break;
4119 case estV
: bV
= (state
->flags
& (1<<i
)); break;
4120 case estSDX
: bSDX
= (state
->flags
& (1<<i
)); break;
4121 case estCGP
: bCGP
= (state
->flags
& (1<<i
)); break;
4124 case estDISRE_INITF
:
4125 case estDISRE_RM3TAV
:
4126 case estORIRE_INITF
:
4128 /* No processing required */
4131 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4135 if (dd
->ncg_tot
> comm
->nalloc_int
)
4137 comm
->nalloc_int
= over_alloc_dd(dd
->ncg_tot
);
4138 srenew(comm
->buf_int
,comm
->nalloc_int
);
4140 move
= comm
->buf_int
;
4142 /* Clear the count */
4143 for(c
=0; c
<dd
->ndim
*2; c
++)
4149 npbcdim
= dd
->npbcdim
;
4151 for(d
=0; (d
<DIM
); d
++)
4153 limitd
[d
] = dd
->comm
->cellsize_min
[d
];
4154 if (d
>= npbcdim
&& dd
->ci
[d
] == 0)
4156 cell_x0
[d
] = -GMX_FLOAT_MAX
;
4160 cell_x0
[d
] = comm
->cell_x0
[d
];
4162 if (d
>= npbcdim
&& dd
->ci
[d
] == dd
->nc
[d
] - 1)
4164 cell_x1
[d
] = GMX_FLOAT_MAX
;
4168 cell_x1
[d
] = comm
->cell_x1
[d
];
4170 limit0
[d
] = comm
->old_cell_x0
[d
] - limitd
[d
];
4171 limit1
[d
] = comm
->old_cell_x1
[d
] + limitd
[d
];
4174 make_tric_corr_matrix(npbcdim
,state
->box
,tcm
);
4176 cgindex
= dd
->cgindex
;
4178 /* Compute the center of geometry for all home charge groups
4179 * and put them in the box and determine where they should go.
4181 for(cg
=0; cg
<dd
->ncg_home
; cg
++)
4188 copy_rvec(state
->x
[k0
],cm_new
);
4195 for(k
=k0
; (k
<k1
); k
++)
4197 rvec_inc(cm_new
,state
->x
[k
]);
4199 for(d
=0; (d
<DIM
); d
++)
4201 cm_new
[d
] = inv_ncg
*cm_new
[d
];
4206 /* Do pbc and check DD cell boundary crossings */
4207 for(d
=DIM
-1; d
>=0; d
--)
4211 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
4212 /* Determine the location of this cg in lattice coordinates */
4216 for(d2
=d
+1; d2
<DIM
; d2
++)
4218 pos_d
+= cm_new
[d2
]*tcm
[d2
][d
];
4221 /* Put the charge group in the triclinic unit-cell */
4222 if (pos_d
>= cell_x1
[d
])
4224 if (pos_d
>= limit1
[d
])
4226 cg_move_error(fplog
,dd
,step
,cg
,d
,1,limitd
[d
],
4227 cg_cm
[cg
],cm_new
,pos_d
);
4230 if (dd
->ci
[d
] == dd
->nc
[d
] - 1)
4232 rvec_dec(cm_new
,state
->box
[d
]);
4235 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4236 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4238 for(k
=k0
; (k
<k1
); k
++)
4240 rvec_dec(state
->x
[k
],state
->box
[d
]);
4243 rotate_state_atom(state
,k
);
4248 else if (pos_d
< cell_x0
[d
])
4250 if (pos_d
< limit0
[d
])
4252 cg_move_error(fplog
,dd
,step
,cg
,d
,-1,limitd
[d
],
4253 cg_cm
[cg
],cm_new
,pos_d
);
4258 rvec_inc(cm_new
,state
->box
[d
]);
4261 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4262 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4264 for(k
=k0
; (k
<k1
); k
++)
4266 rvec_inc(state
->x
[k
],state
->box
[d
]);
4269 rotate_state_atom(state
,k
);
4275 else if (d
< npbcdim
)
4277 /* Put the charge group in the rectangular unit-cell */
4278 while (cm_new
[d
] >= state
->box
[d
][d
])
4280 rvec_dec(cm_new
,state
->box
[d
]);
4281 for(k
=k0
; (k
<k1
); k
++)
4283 rvec_dec(state
->x
[k
],state
->box
[d
]);
4286 while (cm_new
[d
] < 0)
4288 rvec_inc(cm_new
,state
->box
[d
]);
4289 for(k
=k0
; (k
<k1
); k
++)
4291 rvec_inc(state
->x
[k
],state
->box
[d
]);
4297 copy_rvec(cm_new
,cg_cm
[cg
]);
4299 /* Determine where this cg should go */
4302 for(d
=0; d
<dd
->ndim
; d
++)
4307 flag
|= DD_FLAG_FW(d
);
4313 else if (dev
[dim
] == -1)
4315 flag
|= DD_FLAG_BW(d
);
4317 if (dd
->nc
[dim
] > 2)
4331 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4333 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4334 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4336 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
] = dd
->index_gl
[cg
];
4337 /* We store the cg size in the lower 16 bits
4338 * and the place where the charge group should go
4339 * in the next 6 bits. This saves some communication volume.
4341 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
+1] = nrcg
| flag
;
4347 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
4348 inc_nrnb(nrnb
,eNR_RESETX
,dd
->ncg_home
);
4364 /* Make sure the communication buffers are large enough */
4365 for(mc
=0; mc
<dd
->ndim
*2; mc
++)
4367 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4368 if (nvr
> comm
->cgcm_state_nalloc
[mc
])
4370 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
);
4371 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4375 /* Recalculating cg_cm might be cheaper than communicating,
4376 * but that could give rise to rounding issues.
4379 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4380 nvec
,cg_cm
,comm
,bCompact
);
4384 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4385 nvec
,vec
++,state
->x
,comm
,bCompact
);
4388 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4389 nvec
,vec
++,state
->v
,comm
,bCompact
);
4393 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4394 nvec
,vec
++,state
->sd_X
,comm
,bCompact
);
4398 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4399 nvec
,vec
++,state
->cg_p
,comm
,bCompact
);
4404 compact_ind(dd
->ncg_home
,move
,
4405 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4406 dd
->ga2la
,comm
->bLocalCG
,
4411 clear_and_mark_ind(dd
->ncg_home
,move
,
4412 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4413 dd
->ga2la
,comm
->bLocalCG
,
4414 fr
->ns
.grid
->cell_index
);
4417 cginfo_mb
= fr
->cginfo_mb
;
4419 ncg_stay_home
= home_pos_cg
;
4420 for(d
=0; d
<dd
->ndim
; d
++)
4426 for(dir
=0; dir
<(dd
->nc
[dim
]==2 ? 1 : 2); dir
++)
4429 /* Communicate the cg and atom counts */
4434 fprintf(debug
,"Sending ddim %d dir %d: ncg %d nat %d\n",
4435 d
,dir
,sbuf
[0],sbuf
[1]);
4437 dd_sendrecv_int(dd
, d
, dir
, sbuf
, 2, rbuf
, 2);
4439 if ((ncg_recv
+rbuf
[0])*DD_CGIBS
> comm
->nalloc_int
)
4441 comm
->nalloc_int
= over_alloc_dd((ncg_recv
+rbuf
[0])*DD_CGIBS
);
4442 srenew(comm
->buf_int
,comm
->nalloc_int
);
4445 /* Communicate the charge group indices, sizes and flags */
4446 dd_sendrecv_int(dd
, d
, dir
,
4447 comm
->cggl_flag
[cdd
], sbuf
[0]*DD_CGIBS
,
4448 comm
->buf_int
+ncg_recv
*DD_CGIBS
, rbuf
[0]*DD_CGIBS
);
4450 nvs
= ncg
[cdd
] + nat
[cdd
]*nvec
;
4451 i
= rbuf
[0] + rbuf
[1] *nvec
;
4452 check_vec_rvec_alloc(&comm
->vbuf
,nvr
+i
);
4454 /* Communicate cgcm and state */
4455 dd_sendrecv_rvec(dd
, d
, dir
,
4456 comm
->cgcm_state
[cdd
], nvs
,
4457 comm
->vbuf
.v
+nvr
, i
);
4458 ncg_recv
+= rbuf
[0];
4459 nat_recv
+= rbuf
[1];
4463 /* Process the received charge groups */
4465 for(cg
=0; cg
<ncg_recv
; cg
++)
4467 flag
= comm
->buf_int
[cg
*DD_CGIBS
+1];
4471 /* Check which direction this cg should go */
4472 for(d2
=d
+1; (d2
<dd
->ndim
&& mc
==-1); d2
++)
4476 /* The cell boundaries for dimension d2 are not equal
4477 * for each cell row of the lower dimension(s),
4478 * therefore we might need to redetermine where
4479 * this cg should go.
4482 /* If this cg crosses the box boundary in dimension d2
4483 * we can use the communicated flag, so we do not
4484 * have to worry about pbc.
4486 if (!((dd
->ci
[dim2
] == dd
->nc
[dim2
]-1 &&
4487 (flag
& DD_FLAG_FW(d2
))) ||
4488 (dd
->ci
[dim2
] == 0 &&
4489 (flag
& DD_FLAG_BW(d2
)))))
4491 /* Clear the two flags for this dimension */
4492 flag
&= ~(DD_FLAG_FW(d2
) | DD_FLAG_BW(d2
));
4493 /* Determine the location of this cg
4494 * in lattice coordinates
4496 pos_d
= comm
->vbuf
.v
[buf_pos
][dim2
];
4499 for(d3
=dim2
+1; d3
<DIM
; d3
++)
4502 comm
->vbuf
.v
[buf_pos
][d3
]*tcm
[d3
][dim2
];
4505 /* Check of we are not at the box edge.
4506 * pbc is only handled in the first step above,
4507 * but this check could move over pbc while
4508 * the first step did not due to different rounding.
4510 if (pos_d
>= cell_x1
[dim2
] &&
4511 dd
->ci
[dim2
] != dd
->nc
[dim2
]-1)
4513 flag
|= DD_FLAG_FW(d2
);
4515 else if (pos_d
< cell_x0
[dim2
] &&
4518 flag
|= DD_FLAG_BW(d2
);
4520 comm
->buf_int
[cg
*DD_CGIBS
+1] = flag
;
4523 /* Set to which neighboring cell this cg should go */
4524 if (flag
& DD_FLAG_FW(d2
))
4528 else if (flag
& DD_FLAG_BW(d2
))
4530 if (dd
->nc
[dd
->dim
[d2
]] > 2)
4542 nrcg
= flag
& DD_FLAG_NRCG
;
4545 if (home_pos_cg
+1 > dd
->cg_nalloc
)
4547 dd
->cg_nalloc
= over_alloc_dd(home_pos_cg
+1);
4548 srenew(dd
->index_gl
,dd
->cg_nalloc
);
4549 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
4551 /* Set the global charge group index and size */
4552 dd
->index_gl
[home_pos_cg
] = comm
->buf_int
[cg
*DD_CGIBS
];
4553 dd
->cgindex
[home_pos_cg
+1] = dd
->cgindex
[home_pos_cg
] + nrcg
;
4554 /* Copy the state from the buffer */
4555 if (home_pos_cg
>= fr
->cg_nalloc
)
4557 dd_realloc_fr_cg(fr
,home_pos_cg
+1);
4560 copy_rvec(comm
->vbuf
.v
[buf_pos
++],cg_cm
[home_pos_cg
]);
4561 /* Set the cginfo */
4562 fr
->cginfo
[home_pos_cg
] = ddcginfo(cginfo_mb
,
4563 dd
->index_gl
[home_pos_cg
]);
4566 comm
->bLocalCG
[dd
->index_gl
[home_pos_cg
]] = TRUE
;
4569 if (home_pos_at
+nrcg
> state
->nalloc
)
4571 dd_realloc_state(state
,f
,home_pos_at
+nrcg
);
4573 for(i
=0; i
<nrcg
; i
++)
4575 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4576 state
->x
[home_pos_at
+i
]);
4580 for(i
=0; i
<nrcg
; i
++)
4582 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4583 state
->v
[home_pos_at
+i
]);
4588 for(i
=0; i
<nrcg
; i
++)
4590 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4591 state
->sd_X
[home_pos_at
+i
]);
4596 for(i
=0; i
<nrcg
; i
++)
4598 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4599 state
->cg_p
[home_pos_at
+i
]);
4603 home_pos_at
+= nrcg
;
4607 /* Reallocate the buffers if necessary */
4608 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4610 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4611 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4613 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4614 if (nvr
+ 1 + nrcg
*nvec
> comm
->cgcm_state_nalloc
[mc
])
4616 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
+ 1 + nrcg
*nvec
);
4617 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4619 /* Copy from the receive to the send buffers */
4620 memcpy(comm
->cggl_flag
[mc
] + ncg
[mc
]*DD_CGIBS
,
4621 comm
->buf_int
+ cg
*DD_CGIBS
,
4622 DD_CGIBS
*sizeof(int));
4623 memcpy(comm
->cgcm_state
[mc
][nvr
],
4624 comm
->vbuf
.v
[buf_pos
],
4625 (1+nrcg
*nvec
)*sizeof(rvec
));
4626 buf_pos
+= 1 + nrcg
*nvec
;
4633 /* With sorting (!bCompact) the indices are now only partially up to date
4634 * and ncg_home and nat_home are not the real count, since there are
4635 * "holes" in the arrays for the charge groups that moved to neighbors.
4637 dd
->ncg_home
= home_pos_cg
;
4638 dd
->nat_home
= home_pos_at
;
4642 fprintf(debug
,"Finished repartitioning\n");
4645 return ncg_stay_home
;
4648 void dd_cycles_add(gmx_domdec_t
*dd
,float cycles
,int ddCycl
)
4650 dd
->comm
->cycl
[ddCycl
] += cycles
;
4651 dd
->comm
->cycl_n
[ddCycl
]++;
4652 if (cycles
> dd
->comm
->cycl_max
[ddCycl
])
4654 dd
->comm
->cycl_max
[ddCycl
] = cycles
;
4658 static double force_flop_count(t_nrnb
*nrnb
)
4665 for(i
=eNR_NBKERNEL010
; i
<eNR_NBKERNEL_FREE_ENERGY
; i
++)
4667 /* To get closer to the real timings, we half the count
4668 * for the normal loops and again half it for water loops.
4671 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4673 sum
+= nrnb
->n
[i
]*0.25*cost_nrnb(i
);
4677 sum
+= nrnb
->n
[i
]*0.50*cost_nrnb(i
);
4680 for(i
=eNR_NBKERNEL_FREE_ENERGY
; i
<=eNR_NB14
; i
++)
4683 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4684 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4686 for(i
=eNR_BONDS
; i
<=eNR_WALLS
; i
++)
4688 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4694 void dd_force_flop_start(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4696 if (dd
->comm
->eFlop
)
4698 dd
->comm
->flop
-= force_flop_count(nrnb
);
4701 void dd_force_flop_stop(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4703 if (dd
->comm
->eFlop
)
4705 dd
->comm
->flop
+= force_flop_count(nrnb
);
4710 static void clear_dd_cycle_counts(gmx_domdec_t
*dd
)
4714 for(i
=0; i
<ddCyclNr
; i
++)
4716 dd
->comm
->cycl
[i
] = 0;
4717 dd
->comm
->cycl_n
[i
] = 0;
4718 dd
->comm
->cycl_max
[i
] = 0;
4721 dd
->comm
->flop_n
= 0;
4724 static void get_load_distribution(gmx_domdec_t
*dd
,gmx_wallcycle_t wcycle
)
4726 gmx_domdec_comm_t
*comm
;
4727 gmx_domdec_load_t
*load
;
4728 gmx_domdec_root_t
*root
=NULL
;
4729 int d
,dim
,cid
,i
,pos
;
4730 float cell_frac
=0,sbuf
[DD_NLOAD_MAX
];
4735 fprintf(debug
,"get_load_distribution start\n");
4738 wallcycle_start(wcycle
,ewcDDCOMMLOAD
);
4742 bSepPME
= (dd
->pme_nodeid
>= 0);
4744 for(d
=dd
->ndim
-1; d
>=0; d
--)
4747 /* Check if we participate in the communication in this dimension */
4748 if (d
== dd
->ndim
-1 ||
4749 (dd
->ci
[dd
->dim
[d
+1]]==0 && dd
->ci
[dd
->dim
[dd
->ndim
-1]]==0))
4751 load
= &comm
->load
[d
];
4754 cell_frac
= comm
->cell_f1
[d
] - comm
->cell_f0
[d
];
4757 if (d
== dd
->ndim
-1)
4759 sbuf
[pos
++] = dd_force_load(comm
);
4760 sbuf
[pos
++] = sbuf
[0];
4763 sbuf
[pos
++] = sbuf
[0];
4764 sbuf
[pos
++] = cell_frac
;
4767 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4768 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4773 sbuf
[pos
++] = comm
->cycl
[ddCyclPPduringPME
];
4774 sbuf
[pos
++] = comm
->cycl
[ddCyclPME
];
4779 sbuf
[pos
++] = comm
->load
[d
+1].sum
;
4780 sbuf
[pos
++] = comm
->load
[d
+1].max
;
4783 sbuf
[pos
++] = comm
->load
[d
+1].sum_m
;
4784 sbuf
[pos
++] = comm
->load
[d
+1].cvol_min
*cell_frac
;
4785 sbuf
[pos
++] = comm
->load
[d
+1].flags
;
4788 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4789 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4794 sbuf
[pos
++] = comm
->load
[d
+1].mdf
;
4795 sbuf
[pos
++] = comm
->load
[d
+1].pme
;
4799 /* Communicate a row in DD direction d.
4800 * The communicators are setup such that the root always has rank 0.
4803 MPI_Gather(sbuf
,load
->nload
*sizeof(float),MPI_BYTE
,
4804 load
->load
,load
->nload
*sizeof(float),MPI_BYTE
,
4805 0,comm
->mpi_comm_load
[d
]);
4807 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
4809 /* We are the root, process this row */
4810 if (comm
->bDynLoadBal
)
4812 root
= comm
->root
[d
];
4822 for(i
=0; i
<dd
->nc
[dim
]; i
++)
4824 load
->sum
+= load
->load
[pos
++];
4825 load
->max
= max(load
->max
,load
->load
[pos
]);
4831 /* This direction could not be load balanced properly,
4832 * therefore we need to use the maximum iso the average load.
4834 load
->sum_m
= max(load
->sum_m
,load
->load
[pos
]);
4838 load
->sum_m
+= load
->load
[pos
];
4841 load
->cvol_min
= min(load
->cvol_min
,load
->load
[pos
]);
4845 load
->flags
= (int)(load
->load
[pos
++] + 0.5);
4849 root
->cell_f_max0
[i
] = load
->load
[pos
++];
4850 root
->cell_f_min1
[i
] = load
->load
[pos
++];
4855 load
->mdf
= max(load
->mdf
,load
->load
[pos
]);
4857 load
->pme
= max(load
->pme
,load
->load
[pos
]);
4861 if (comm
->bDynLoadBal
&& root
->bLimited
)
4863 load
->sum_m
*= dd
->nc
[dim
];
4864 load
->flags
|= (1<<d
);
4872 comm
->nload
+= dd_load_count(comm
);
4873 comm
->load_step
+= comm
->cycl
[ddCyclStep
];
4874 comm
->load_sum
+= comm
->load
[0].sum
;
4875 comm
->load_max
+= comm
->load
[0].max
;
4876 if (comm
->bDynLoadBal
)
4878 for(d
=0; d
<dd
->ndim
; d
++)
4880 if (comm
->load
[0].flags
& (1<<d
))
4882 comm
->load_lim
[d
]++;
4888 comm
->load_mdf
+= comm
->load
[0].mdf
;
4889 comm
->load_pme
+= comm
->load
[0].pme
;
4893 wallcycle_stop(wcycle
,ewcDDCOMMLOAD
);
4897 fprintf(debug
,"get_load_distribution finished\n");
4901 static float dd_force_imb_perf_loss(gmx_domdec_t
*dd
)
4903 /* Return the relative performance loss on the total run time
4904 * due to the force calculation load imbalance.
4906 if (dd
->comm
->nload
> 0)
4909 (dd
->comm
->load_max
*dd
->nnodes
- dd
->comm
->load_sum
)/
4910 (dd
->comm
->load_step
*dd
->nnodes
);
4918 static void print_dd_load_av(FILE *fplog
,gmx_domdec_t
*dd
)
4921 int npp
,npme
,nnodes
,d
,limp
;
4922 float imbal
,pme_f_ratio
,lossf
,lossp
=0;
4924 gmx_domdec_comm_t
*comm
;
4927 if (DDMASTER(dd
) && comm
->nload
> 0)
4930 npme
= (dd
->pme_nodeid
>= 0) ? comm
->npmenodes
: 0;
4931 nnodes
= npp
+ npme
;
4932 imbal
= comm
->load_max
*npp
/comm
->load_sum
- 1;
4933 lossf
= dd_force_imb_perf_loss(dd
);
4934 sprintf(buf
," Average load imbalance: %.1f %%\n",imbal
*100);
4935 fprintf(fplog
,"%s",buf
);
4936 fprintf(stderr
,"\n");
4937 fprintf(stderr
,"%s",buf
);
4938 sprintf(buf
," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf
*100);
4939 fprintf(fplog
,"%s",buf
);
4940 fprintf(stderr
,"%s",buf
);
4942 if (comm
->bDynLoadBal
)
4944 sprintf(buf
," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
4945 for(d
=0; d
<dd
->ndim
; d
++)
4947 limp
= (200*comm
->load_lim
[d
]+1)/(2*comm
->nload
);
4948 sprintf(buf
+strlen(buf
)," %c %d %%",dim2char(dd
->dim
[d
]),limp
);
4954 sprintf(buf
+strlen(buf
),"\n");
4955 fprintf(fplog
,"%s",buf
);
4956 fprintf(stderr
,"%s",buf
);
4960 pme_f_ratio
= comm
->load_pme
/comm
->load_mdf
;
4961 lossp
= (comm
->load_pme
-comm
->load_mdf
)/comm
->load_step
;
4964 lossp
*= (float)npme
/(float)nnodes
;
4968 lossp
*= (float)npp
/(float)nnodes
;
4970 sprintf(buf
," Average PME mesh/force load: %5.3f\n",pme_f_ratio
);
4971 fprintf(fplog
,"%s",buf
);
4972 fprintf(stderr
,"%s",buf
);
4973 sprintf(buf
," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp
)*100);
4974 fprintf(fplog
,"%s",buf
);
4975 fprintf(stderr
,"%s",buf
);
4977 fprintf(fplog
,"\n");
4978 fprintf(stderr
,"\n");
4980 if (lossf
>= DD_PERF_LOSS
)
4983 "NOTE: %.1f %% performance was lost due to load imbalance\n"
4984 " in the domain decomposition.\n",lossf
*100);
4985 if (!comm
->bDynLoadBal
)
4987 sprintf(buf
+strlen(buf
)," You might want to use dynamic load balancing (option -dlb.)\n");
4991 sprintf(buf
+strlen(buf
)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
4993 fprintf(fplog
,"%s\n",buf
);
4994 fprintf(stderr
,"%s\n",buf
);
4996 if (npme
> 0 && fabs(lossp
) >= DD_PERF_LOSS
)
4999 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5000 " had %s work to do than the PP nodes.\n"
5001 " You might want to %s the number of PME nodes\n"
5002 " or %s the cut-off and the grid spacing.\n",
5004 (lossp
< 0) ? "less" : "more",
5005 (lossp
< 0) ? "decrease" : "increase",
5006 (lossp
< 0) ? "decrease" : "increase");
5007 fprintf(fplog
,"%s\n",buf
);
5008 fprintf(stderr
,"%s\n",buf
);
5013 static float dd_vol_min(gmx_domdec_t
*dd
)
5015 return dd
->comm
->load
[0].cvol_min
*dd
->nnodes
;
5018 static bool dd_load_flags(gmx_domdec_t
*dd
)
5020 return dd
->comm
->load
[0].flags
;
5023 static float dd_f_imbal(gmx_domdec_t
*dd
)
5025 return dd
->comm
->load
[0].max
*dd
->nnodes
/dd
->comm
->load
[0].sum
- 1;
5028 static float dd_pme_f_ratio(gmx_domdec_t
*dd
)
5030 return dd
->comm
->load
[0].pme
/dd
->comm
->load
[0].mdf
;
5033 static void dd_print_load(FILE *fplog
,gmx_domdec_t
*dd
,gmx_step_t step
)
5038 flags
= dd_load_flags(dd
);
5042 "DD load balancing is limited by minimum cell size in dimension");
5043 for(d
=0; d
<dd
->ndim
; d
++)
5047 fprintf(fplog
," %c",dim2char(dd
->dim
[d
]));
5050 fprintf(fplog
,"\n");
5052 fprintf(fplog
,"DD step %s",gmx_step_str(step
,buf
));
5053 if (dd
->comm
->bDynLoadBal
)
5055 fprintf(fplog
," vol min/aver %5.3f%c",
5056 dd_vol_min(dd
),flags
? '!' : ' ');
5058 fprintf(fplog
," load imb.: force %4.1f%%",dd_f_imbal(dd
)*100);
5059 if (dd
->comm
->cycl_n
[ddCyclPME
])
5061 fprintf(fplog
," pme mesh/force %5.3f",dd_pme_f_ratio(dd
));
5063 fprintf(fplog
,"\n\n");
5066 static void dd_print_load_verbose(gmx_domdec_t
*dd
)
5068 if (dd
->comm
->bDynLoadBal
)
5070 fprintf(stderr
,"vol %4.2f%c ",
5071 dd_vol_min(dd
),dd_load_flags(dd
) ? '!' : ' ');
5073 fprintf(stderr
,"imb F %2d%% ",(int)(dd_f_imbal(dd
)*100+0.5));
5074 if (dd
->comm
->cycl_n
[ddCyclPME
])
5076 fprintf(stderr
,"pme/F %4.2f ",dd_pme_f_ratio(dd
));
5081 static void make_load_communicator(gmx_domdec_t
*dd
,MPI_Group g_all
,
5082 int dim_ind
,ivec loc
)
5088 gmx_domdec_root_t
*root
;
5090 dim
= dd
->dim
[dim_ind
];
5091 copy_ivec(loc
,loc_c
);
5092 snew(rank
,dd
->nc
[dim
]);
5093 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5096 rank
[i
] = dd_index(dd
->nc
,loc_c
);
5098 /* Here we create a new group, that does not necessarily
5099 * include our process. But MPI_Comm_create needs to be
5100 * called by all the processes in the original communicator.
5101 * Calling MPI_Group_free afterwards gives errors, so I assume
5102 * also the group is needed by all processes. (B. Hess)
5104 MPI_Group_incl(g_all
,dd
->nc
[dim
],rank
,&g_row
);
5105 MPI_Comm_create(dd
->mpi_comm_all
,g_row
,&c_row
);
5106 if (c_row
!= MPI_COMM_NULL
)
5108 /* This process is part of the group */
5109 dd
->comm
->mpi_comm_load
[dim_ind
] = c_row
;
5110 if (dd
->comm
->eDLB
!= edlbNO
)
5112 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5114 /* This is the root process of this row */
5115 snew(dd
->comm
->root
[dim_ind
],1);
5116 root
= dd
->comm
->root
[dim_ind
];
5117 snew(root
->cell_f
,DD_CELL_F_SIZE(dd
,dim_ind
));
5118 snew(root
->old_cell_f
,dd
->nc
[dim
]+1);
5119 snew(root
->bCellMin
,dd
->nc
[dim
]);
5122 snew(root
->cell_f_max0
,dd
->nc
[dim
]);
5123 snew(root
->cell_f_min1
,dd
->nc
[dim
]);
5124 snew(root
->bound_min
,dd
->nc
[dim
]);
5125 snew(root
->bound_max
,dd
->nc
[dim
]);
5127 snew(root
->buf_ncd
,dd
->nc
[dim
]);
5131 /* This is not a root process, we only need to receive cell_f */
5132 snew(dd
->comm
->cell_f_row
,DD_CELL_F_SIZE(dd
,dim_ind
));
5135 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5137 snew(dd
->comm
->load
[dim_ind
].load
,dd
->nc
[dim
]*DD_NLOAD_MAX
);
5144 static void make_load_communicators(gmx_domdec_t
*dd
)
5152 fprintf(debug
,"Making load communicators\n");
5154 MPI_Comm_group(dd
->mpi_comm_all
,&g_all
);
5156 snew(dd
->comm
->load
,dd
->ndim
);
5157 snew(dd
->comm
->mpi_comm_load
,dd
->ndim
);
5160 make_load_communicator(dd
,g_all
,0,loc
);
5163 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5165 make_load_communicator(dd
,g_all
,1,loc
);
5170 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5173 for(j
=0; j
<dd
->nc
[dim1
]; j
++) {
5175 make_load_communicator(dd
,g_all
,2,loc
);
5180 MPI_Group_free(&g_all
);
5183 fprintf(debug
,"Finished making load communicators\n");
5187 void setup_dd_grid(FILE *fplog
,gmx_domdec_t
*dd
)
5193 ivec dd_zp
[DD_MAXIZONE
];
5194 gmx_domdec_zones_t
*zones
;
5195 gmx_domdec_ns_ranges_t
*izone
;
5197 for(d
=0; d
<dd
->ndim
; d
++)
5200 copy_ivec(dd
->ci
,tmp
);
5201 tmp
[dim
] = (tmp
[dim
] + 1) % dd
->nc
[dim
];
5202 dd
->neighbor
[d
][0] = ddcoord2ddnodeid(dd
,tmp
);
5203 copy_ivec(dd
->ci
,tmp
);
5204 tmp
[dim
] = (tmp
[dim
] - 1 + dd
->nc
[dim
]) % dd
->nc
[dim
];
5205 dd
->neighbor
[d
][1] = ddcoord2ddnodeid(dd
,tmp
);
5208 fprintf(debug
,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5211 dd
->neighbor
[d
][1]);
5217 fprintf(stderr
,"Making %dD domain decomposition %d x %d x %d\n",
5218 dd
->ndim
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5222 fprintf(fplog
,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5224 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],
5225 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5232 for(i
=0; i
<nzonep
; i
++)
5234 copy_ivec(dd_zp3
[i
],dd_zp
[i
]);
5240 for(i
=0; i
<nzonep
; i
++)
5242 copy_ivec(dd_zp2
[i
],dd_zp
[i
]);
5248 for(i
=0; i
<nzonep
; i
++)
5250 copy_ivec(dd_zp1
[i
],dd_zp
[i
]);
5254 gmx_fatal(FARGS
,"Can only do 1, 2 or 3D domain decomposition");
5259 zones
= &dd
->comm
->zones
;
5261 for(i
=0; i
<nzone
; i
++)
5264 clear_ivec(zones
->shift
[i
]);
5265 for(d
=0; d
<dd
->ndim
; d
++)
5267 zones
->shift
[i
][dd
->dim
[d
]] = dd_zo
[i
][m
++];
5272 for(i
=0; i
<nzone
; i
++)
5274 for(d
=0; d
<DIM
; d
++)
5276 s
[d
] = dd
->ci
[d
] - zones
->shift
[i
][d
];
5281 else if (s
[d
] >= dd
->nc
[d
])
5287 zones
->nizone
= nzonep
;
5288 for(i
=0; i
<zones
->nizone
; i
++)
5290 if (dd_zp
[i
][0] != i
)
5292 gmx_fatal(FARGS
,"Internal inconsistency in the dd grid setup");
5294 izone
= &zones
->izone
[i
];
5295 izone
->j0
= dd_zp
[i
][1];
5296 izone
->j1
= dd_zp
[i
][2];
5297 for(dim
=0; dim
<DIM
; dim
++)
5299 if (dd
->nc
[dim
] == 1)
5301 /* All shifts should be allowed */
5302 izone
->shift0
[dim
] = -1;
5303 izone
->shift1
[dim
] = 1;
5308 izone->shift0[d] = 0;
5309 izone->shift1[d] = 0;
5310 for(j=izone->j0; j<izone->j1; j++) {
5311 if (dd->shift[j][d] > dd->shift[i][d])
5312 izone->shift0[d] = -1;
5313 if (dd->shift[j][d] < dd->shift[i][d])
5314 izone->shift1[d] = 1;
5320 /* Assume the shift are not more than 1 cell */
5321 izone
->shift0
[dim
] = 1;
5322 izone
->shift1
[dim
] = -1;
5323 for(j
=izone
->j0
; j
<izone
->j1
; j
++)
5325 shift_diff
= zones
->shift
[j
][dim
] - zones
->shift
[i
][dim
];
5326 if (shift_diff
< izone
->shift0
[dim
])
5328 izone
->shift0
[dim
] = shift_diff
;
5330 if (shift_diff
> izone
->shift1
[dim
])
5332 izone
->shift1
[dim
] = shift_diff
;
5339 if (dd
->comm
->eDLB
!= edlbNO
)
5341 snew(dd
->comm
->root
,dd
->ndim
);
5344 if (dd
->comm
->bRecordLoad
)
5346 make_load_communicators(dd
);
5350 static void make_pp_communicator(FILE *fplog
,t_commrec
*cr
,int reorder
)
5353 gmx_domdec_comm_t
*comm
;
5364 if (comm
->bCartesianPP
)
5366 /* Set up cartesian communication for the particle-particle part */
5369 fprintf(fplog
,"Will use a Cartesian communicator: %d x %d x %d\n",
5370 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5373 for(i
=0; i
<DIM
; i
++)
5377 MPI_Cart_create(cr
->mpi_comm_mygroup
,DIM
,dd
->nc
,periods
,reorder
,
5379 /* We overwrite the old communicator with the new cartesian one */
5380 cr
->mpi_comm_mygroup
= comm_cart
;
5383 dd
->mpi_comm_all
= cr
->mpi_comm_mygroup
;
5384 MPI_Comm_rank(dd
->mpi_comm_all
,&dd
->rank
);
5386 if (comm
->bCartesianPP_PME
)
5388 /* Since we want to use the original cartesian setup for sim,
5389 * and not the one after split, we need to make an index.
5391 snew(comm
->ddindex2ddnodeid
,dd
->nnodes
);
5392 comm
->ddindex2ddnodeid
[dd_index(dd
->nc
,dd
->ci
)] = dd
->rank
;
5393 gmx_sumi(dd
->nnodes
,comm
->ddindex2ddnodeid
,cr
);
5394 /* Get the rank of the DD master,
5395 * above we made sure that the master node is a PP node.
5405 MPI_Allreduce(&rank
,&dd
->masterrank
,1,MPI_INT
,MPI_SUM
,dd
->mpi_comm_all
);
5407 else if (comm
->bCartesianPP
)
5409 if (cr
->npmenodes
== 0)
5411 /* The PP communicator is also
5412 * the communicator for this simulation
5414 cr
->mpi_comm_mysim
= cr
->mpi_comm_mygroup
;
5416 cr
->nodeid
= dd
->rank
;
5418 MPI_Cart_coords(dd
->mpi_comm_all
,dd
->rank
,DIM
,dd
->ci
);
5420 /* We need to make an index to go from the coordinates
5421 * to the nodeid of this simulation.
5423 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5424 snew(buf
,dd
->nnodes
);
5425 if (cr
->duty
& DUTY_PP
)
5427 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5429 /* Communicate the ddindex to simulation nodeid index */
5430 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5431 cr
->mpi_comm_mysim
);
5434 /* Determine the master coordinates and rank.
5435 * The DD master should be the same node as the master of this sim.
5437 for(i
=0; i
<dd
->nnodes
; i
++)
5439 if (comm
->ddindex2simnodeid
[i
] == 0)
5441 ddindex2xyz(dd
->nc
,i
,dd
->master_ci
);
5442 MPI_Cart_rank(dd
->mpi_comm_all
,dd
->master_ci
,&dd
->masterrank
);
5447 fprintf(debug
,"The master rank is %d\n",dd
->masterrank
);
5452 /* No Cartesian communicators */
5453 /* We use the rank in dd->comm->all as DD index */
5454 ddindex2xyz(dd
->nc
,dd
->rank
,dd
->ci
);
5455 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5457 clear_ivec(dd
->master_ci
);
5464 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5465 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5470 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5471 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5475 static void receive_ddindex2simnodeid(t_commrec
*cr
)
5479 gmx_domdec_comm_t
*comm
;
5486 if (!comm
->bCartesianPP_PME
&& comm
->bCartesianPP
)
5488 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5489 snew(buf
,dd
->nnodes
);
5490 if (cr
->duty
& DUTY_PP
)
5492 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5495 /* Communicate the ddindex to simulation nodeid index */
5496 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5497 cr
->mpi_comm_mysim
);
5504 static gmx_domdec_master_t
*init_gmx_domdec_master_t(gmx_domdec_t
*dd
,
5507 gmx_domdec_master_t
*ma
;
5512 snew(ma
->ncg
,dd
->nnodes
);
5513 snew(ma
->index
,dd
->nnodes
+1);
5515 snew(ma
->nat
,dd
->nnodes
);
5516 snew(ma
->ibuf
,dd
->nnodes
*2);
5517 snew(ma
->cell_x
,DIM
);
5518 for(i
=0; i
<DIM
; i
++)
5520 snew(ma
->cell_x
[i
],dd
->nc
[i
]+1);
5523 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
5529 snew(ma
->vbuf
,natoms
);
5535 static void split_communicator(FILE *fplog
,t_commrec
*cr
,int dd_node_order
,
5539 gmx_domdec_comm_t
*comm
;
5550 if (comm
->bCartesianPP
)
5552 for(i
=1; i
<DIM
; i
++)
5554 bDiv
[i
] = ((cr
->npmenodes
*dd
->nc
[i
]) % (dd
->nnodes
) == 0);
5556 if (bDiv
[YY
] || bDiv
[ZZ
])
5558 comm
->bCartesianPP_PME
= TRUE
;
5559 /* We choose the direction that provides the thinnest slab
5560 * of PME only nodes as this will have the least effect
5561 * on the PP communication.
5562 * But for the PME communication the opposite might be better.
5564 if (bDiv
[YY
] && (!bDiv
[ZZ
] || dd
->nc
[YY
] <= dd
->nc
[ZZ
]))
5566 comm
->cartpmedim
= YY
;
5570 comm
->cartpmedim
= ZZ
;
5572 comm
->ntot
[comm
->cartpmedim
]
5573 += (cr
->npmenodes
*dd
->nc
[comm
->cartpmedim
])/dd
->nnodes
;
5577 fprintf(fplog
,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr
->npmenodes
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[XX
],dd
->nc
[ZZ
]);
5579 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5584 if (comm
->bCartesianPP_PME
)
5588 fprintf(fplog
,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm
->ntot
[XX
],comm
->ntot
[YY
],comm
->ntot
[ZZ
]);
5591 for(i
=0; i
<DIM
; i
++)
5595 MPI_Cart_create(cr
->mpi_comm_mysim
,DIM
,comm
->ntot
,periods
,reorder
,
5598 MPI_Comm_rank(comm_cart
,&rank
);
5599 if (MASTERNODE(cr
) && rank
!= 0)
5601 gmx_fatal(FARGS
,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5604 /* With this assigment we loose the link to the original communicator
5605 * which will usually be MPI_COMM_WORLD, unless have multisim.
5607 cr
->mpi_comm_mysim
= comm_cart
;
5608 cr
->sim_nodeid
= rank
;
5610 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,dd
->ci
);
5614 fprintf(fplog
,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5615 cr
->sim_nodeid
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5618 if (dd
->ci
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
5622 if (cr
->npmenodes
== 0 ||
5623 dd
->ci
[comm
->cartpmedim
] >= dd
->nc
[comm
->cartpmedim
])
5625 cr
->duty
= DUTY_PME
;
5628 /* Split the sim communicator into PP and PME only nodes */
5629 MPI_Comm_split(cr
->mpi_comm_mysim
,
5631 dd_index(comm
->ntot
,dd
->ci
),
5632 &cr
->mpi_comm_mygroup
);
5636 switch (dd_node_order
)
5641 fprintf(fplog
,"Order of the nodes: PP first, PME last\n");
5644 case ddnoINTERLEAVE
:
5645 /* Interleave the PP-only and PME-only nodes,
5646 * as on clusters with dual-core machines this will double
5647 * the communication bandwidth of the PME processes
5648 * and thus speed up the PP <-> PME and inter PME communication.
5652 fprintf(fplog
,"Interleaving PP and PME nodes\n");
5654 comm
->pmenodes
= dd_pmenodes(cr
);
5659 gmx_fatal(FARGS
,"Unknown dd_node_order=%d",dd_node_order
);
5662 if (dd_simnode2pmenode(cr
,cr
->sim_nodeid
) == -1)
5664 cr
->duty
= DUTY_PME
;
5671 /* Split the sim communicator into PP and PME only nodes */
5672 MPI_Comm_split(cr
->mpi_comm_mysim
,
5675 &cr
->mpi_comm_mygroup
);
5676 MPI_Comm_rank(cr
->mpi_comm_mygroup
,&cr
->nodeid
);
5682 fprintf(fplog
,"This is a %s only node\n\n",
5683 (cr
->duty
& DUTY_PP
) ? "particle-particle" : "PME-mesh");
5687 void make_dd_communicators(FILE *fplog
,t_commrec
*cr
,int dd_node_order
)
5690 gmx_domdec_comm_t
*comm
;
5696 copy_ivec(dd
->nc
,comm
->ntot
);
5698 comm
->bCartesianPP
= (dd_node_order
== ddnoCARTESIAN
);
5699 comm
->bCartesianPP_PME
= FALSE
;
5701 /* Reorder the nodes by default. This might change the MPI ranks.
5702 * Real reordering is only supported on very few architectures,
5703 * Blue Gene is one of them.
5705 CartReorder
= (getenv("GMX_NO_CART_REORDER") == NULL
);
5707 if (cr
->npmenodes
> 0)
5709 /* Split the communicator into a PP and PME part */
5710 split_communicator(fplog
,cr
,dd_node_order
,CartReorder
);
5711 if (comm
->bCartesianPP_PME
)
5713 /* We (possibly) reordered the nodes in split_communicator,
5714 * so it is no longer required in make_pp_communicator.
5716 CartReorder
= FALSE
;
5721 /* All nodes do PP and PME */
5723 /* We do not require separate communicators */
5724 cr
->mpi_comm_mygroup
= cr
->mpi_comm_mysim
;
5728 if (cr
->duty
& DUTY_PP
)
5730 /* Copy or make a new PP communicator */
5731 make_pp_communicator(fplog
,cr
,CartReorder
);
5735 receive_ddindex2simnodeid(cr
);
5738 if (!(cr
->duty
& DUTY_PME
))
5740 /* Set up the commnuication to our PME node */
5741 dd
->pme_nodeid
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
5742 dd
->pme_receive_vir_ener
= receive_vir_ener(cr
);
5745 fprintf(debug
,"My pme_nodeid %d receive ener %d\n",
5746 dd
->pme_nodeid
,dd
->pme_receive_vir_ener
);
5751 dd
->pme_nodeid
= -1;
5756 dd
->ma
= init_gmx_domdec_master_t(dd
,
5758 comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
]);
5762 static real
*get_slb_frac(FILE *fplog
,const char *dir
,int nc
,const char *size_string
)
5769 if (nc
> 1 && size_string
!= NULL
)
5773 fprintf(fplog
,"Using static load balancing for the %s direction\n",
5778 for (i
=0; i
<nc
; i
++)
5781 sscanf(size_string
,"%lf%n",&dbl
,&n
);
5784 gmx_fatal(FARGS
,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir
,size_string
);
5793 fprintf(fplog
,"Relative cell sizes:");
5795 for (i
=0; i
<nc
; i
++)
5800 fprintf(fplog
," %5.3f",slb_frac
[i
]);
5805 fprintf(fplog
,"\n");
5812 static int multi_body_bondeds_count(gmx_mtop_t
*mtop
)
5815 gmx_mtop_ilistloop_t iloop
;
5819 iloop
= gmx_mtop_ilistloop_init(mtop
);
5820 while (gmx_mtop_ilistloop_next(iloop
,&il
,&nmol
))
5822 for(ftype
=0; ftype
<F_NRE
; ftype
++)
5824 if ((interaction_function
[ftype
].flags
& IF_BOND
) &&
5827 n
+= nmol
*il
[ftype
].nr
/(1 + NRAL(ftype
));
5835 static int dd_nst_env(FILE *fplog
,const char *env_var
,int def
)
5841 val
= getenv(env_var
);
5844 if (sscanf(val
,"%d",&nst
) <= 0)
5850 fprintf(fplog
,"Found env.var. %s = %s, using value %d\n",
5858 static void dd_warning(t_commrec
*cr
,FILE *fplog
,const char *warn_string
)
5862 fprintf(stderr
,"\n%s\n",warn_string
);
5866 fprintf(fplog
,"\n%s\n",warn_string
);
5870 static void check_dd_restrictions(t_commrec
*cr
,gmx_domdec_t
*dd
,
5871 t_inputrec
*ir
,FILE *fplog
)
5873 if (ir
->ePBC
== epbcSCREW
&&
5874 (dd
->nc
[XX
] == 1 || dd
->nc
[YY
] > 1 || dd
->nc
[ZZ
] > 1))
5876 gmx_fatal(FARGS
,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names
[ir
->ePBC
]);
5879 if (ir
->ns_type
== ensSIMPLE
)
5881 gmx_fatal(FARGS
,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
5884 if (ir
->nstlist
== 0)
5886 gmx_fatal(FARGS
,"Domain decomposition does not work with nstlist=0");
5889 if (ir
->comm_mode
== ecmANGULAR
&& ir
->ePBC
!= epbcNONE
)
5891 dd_warning(cr
,fplog
,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
5895 static real
average_cellsize_min(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
5900 r
= ddbox
->box_size
[XX
];
5901 for(di
=0; di
<dd
->ndim
; di
++)
5904 /* Check using the initial average cell size */
5905 r
= min(r
,ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
5911 static int check_dlb_support(FILE *fplog
,t_commrec
*cr
,
5912 const char *dlb_opt
,bool bRecordLoad
,
5913 unsigned long Flags
,t_inputrec
*ir
)
5921 case 'a': eDLB
= edlbAUTO
; break;
5922 case 'n': eDLB
= edlbNO
; break;
5923 case 'y': eDLB
= edlbYES
; break;
5924 default: gmx_incons("Unknown dlb_opt");
5927 if (Flags
& MD_RERUN
)
5932 if (!EI_DYNAMICS(ir
->eI
))
5934 if (eDLB
== edlbYES
)
5936 sprintf(buf
,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir
->eI
));
5937 dd_warning(cr
,fplog
,buf
);
5945 dd_warning(cr
,fplog
,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
5950 if (Flags
& MD_REPRODUCIBLE
)
5957 dd_warning(cr
,fplog
,"NOTE: reproducability requested, will not use dynamic load balancing\n");
5961 dd_warning(cr
,fplog
,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
5964 gmx_fatal(FARGS
,"Death horror: undefined case (%d) for load balancing choice",eDLB
);
5972 static void set_dd_dim(FILE *fplog
,gmx_domdec_t
*dd
)
5977 if (getenv("GMX_DD_ORDER_ZYX"))
5979 /* Decomposition order z,y,x */
5982 fprintf(fplog
,"Using domain decomposition order z, y, x\n");
5984 for(dim
=DIM
-1; dim
>=0; dim
--)
5986 if (dd
->nc
[dim
] > 1)
5988 dd
->dim
[dd
->ndim
++] = dim
;
5994 /* Decomposition order x,y,z */
5995 for(dim
=0; dim
<DIM
; dim
++)
5997 if (dd
->nc
[dim
] > 1)
5999 dd
->dim
[dd
->ndim
++] = dim
;
6005 gmx_domdec_t
*init_domain_decomposition(FILE *fplog
,t_commrec
*cr
,
6006 unsigned long Flags
,
6008 real comm_distance_min
,real rconstr
,
6009 const char *dlb_opt
,real dlb_scale
,
6010 const char *sizex
,const char *sizey
,const char *sizez
,
6011 gmx_mtop_t
*mtop
,t_inputrec
*ir
,
6017 gmx_domdec_comm_t
*comm
;
6020 real r_2b
,r_mb
,r_bonded
=-1,r_bonded_limit
=-1,limit
,acs
;
6027 "\nInitializing Domain Decomposition on %d nodes\n",cr
->nnodes
);
6033 snew(comm
->cggl_flag
,DIM
*2);
6034 snew(comm
->cgcm_state
,DIM
*2);
6036 dd
->npbcdim
= ePBC2npbcdim(ir
->ePBC
);
6037 dd
->bScrewPBC
= (ir
->ePBC
== epbcSCREW
);
6039 dd
->bSendRecv2
= dd_nst_env(fplog
,"GMX_DD_SENDRECV2",0);
6040 comm
->eFlop
= dd_nst_env(fplog
,"GMX_DLB_FLOP",0);
6041 recload
= dd_nst_env(fplog
,"GMX_DD_LOAD",1);
6042 comm
->nstSortCG
= dd_nst_env(fplog
,"GMX_DD_SORT",1);
6043 comm
->nstDDDump
= dd_nst_env(fplog
,"GMX_DD_DUMP",0);
6044 comm
->nstDDDumpGrid
= dd_nst_env(fplog
,"GMX_DD_DUMP_GRID",0);
6045 comm
->DD_debug
= dd_nst_env(fplog
,"GMX_DD_DEBUG",0);
6047 if (dd
->bSendRecv2
&& fplog
)
6049 fprintf(fplog
,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6055 fprintf(fplog
,"Will load balance based on FLOP count\n");
6057 if (comm
->eFlop
> 1)
6059 srand(1+cr
->nodeid
);
6061 comm
->bRecordLoad
= TRUE
;
6065 comm
->bRecordLoad
= (wallcycle_have_counter() && recload
> 0);
6069 comm
->eDLB
= check_dlb_support(fplog
,cr
,dlb_opt
,comm
->bRecordLoad
,Flags
,ir
);
6071 comm
->bDynLoadBal
= (comm
->eDLB
== edlbYES
);
6074 fprintf(fplog
,"Dynamic load balancing: %s\n",edlb_names
[comm
->eDLB
]);
6076 dd
->bGridJump
= comm
->bDynLoadBal
;
6078 if (comm
->nstSortCG
)
6082 if (comm
->nstSortCG
== 1)
6084 fprintf(fplog
,"Will sort the charge groups at every domain (re)decomposition\n");
6088 fprintf(fplog
,"Will sort the charge groups every %d steps\n",
6098 fprintf(fplog
,"Will not sort the charge groups\n");
6102 comm
->bInterCGBondeds
= (ncg_mtop(mtop
) > mtop
->mols
.nr
);
6103 if (comm
->bInterCGBondeds
)
6105 comm
->bInterCGMultiBody
= (multi_body_bondeds_count(mtop
) > 0);
6109 comm
->bInterCGMultiBody
= FALSE
;
6112 dd
->bInterCGcons
= inter_charge_group_constraints(mtop
);
6114 if (ir
->rlistlong
== 0)
6116 /* Set the cut-off to some very large value,
6117 * so we don't need if statements everywhere in the code.
6118 * We use sqrt, since the cut-off is squared in some places.
6120 comm
->cutoff
= GMX_CUTOFF_INF
;
6124 comm
->cutoff
= ir
->rlistlong
;
6126 comm
->cutoff_mbody
= 0;
6128 comm
->cellsize_limit
= 0;
6129 comm
->bBondComm
= FALSE
;
6131 if (comm
->bInterCGBondeds
)
6133 if (comm_distance_min
> 0)
6135 comm
->cutoff_mbody
= comm_distance_min
;
6136 if (Flags
& MD_DDBONDCOMM
)
6138 comm
->bBondComm
= (comm
->cutoff_mbody
> comm
->cutoff
);
6142 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6144 r_bonded_limit
= comm
->cutoff_mbody
;
6146 else if (ir
->bPeriodicMols
)
6148 /* Can not easily determine the required cut-off */
6149 dd_warning(cr
,fplog
,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6150 comm
->cutoff_mbody
= comm
->cutoff
/2;
6151 r_bonded_limit
= comm
->cutoff_mbody
;
6157 dd_bonded_cg_distance(fplog
,dd
,mtop
,ir
,x
,box
,
6158 Flags
& MD_DDBONDCHECK
,&r_2b
,&r_mb
);
6160 gmx_bcast(sizeof(r_2b
),&r_2b
,cr
);
6161 gmx_bcast(sizeof(r_mb
),&r_mb
,cr
);
6163 /* We use an initial margin of 10% for the minimum cell size,
6164 * except when we are just below the non-bonded cut-off.
6166 if (Flags
& MD_DDBONDCOMM
)
6168 if (max(r_2b
,r_mb
) > comm
->cutoff
)
6170 r_bonded
= max(r_2b
,r_mb
);
6171 r_bonded_limit
= 1.1*r_bonded
;
6172 comm
->bBondComm
= TRUE
;
6177 r_bonded_limit
= min(1.1*r_bonded
,comm
->cutoff
);
6179 /* We determine cutoff_mbody later */
6183 /* No special bonded communication,
6184 * simply increase the DD cut-off.
6186 r_bonded_limit
= 1.1*max(r_2b
,r_mb
);
6187 comm
->cutoff_mbody
= r_bonded_limit
;
6188 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6191 comm
->cellsize_limit
= max(comm
->cellsize_limit
,r_bonded_limit
);
6195 "Minimum cell size due to bonded interactions: %.3f nm\n",
6196 comm
->cellsize_limit
);
6200 if (dd
->bInterCGcons
&& rconstr
<= 0)
6202 /* There is a cell size limit due to the constraints (P-LINCS) */
6203 rconstr
= constr_r_max(fplog
,mtop
,ir
);
6207 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6209 if (rconstr
> comm
->cellsize_limit
)
6211 fprintf(fplog
,"This distance will limit the DD cell size, you can override this with -rcon\n");
6215 else if (rconstr
> 0 && fplog
)
6217 /* Here we do not check for dd->bInterCGcons,
6218 * because one can also set a cell size limit for virtual sites only
6219 * and at this point we don't know yet if there are intercg v-sites.
6222 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6225 comm
->cellsize_limit
= max(comm
->cellsize_limit
,rconstr
);
6227 comm
->cgs_gl
= gmx_mtop_global_cgs(mtop
);
6231 copy_ivec(nc
,dd
->nc
);
6232 set_dd_dim(fplog
,dd
);
6233 set_ddbox_cr(cr
,&dd
->nc
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6235 if (cr
->npmenodes
== -1)
6239 acs
= average_cellsize_min(dd
,ddbox
);
6240 if (acs
< comm
->cellsize_limit
)
6244 fprintf(fplog
,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs
,comm
->cellsize_limit
);
6248 gmx_fatal(FARGS
,"The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",acs
,comm
->cellsize_limit
);
6251 MPI_Abort(MPI_COMM_WORLD
, 0);
6259 set_ddbox_cr(cr
,NULL
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6261 /* We need to choose the optimal DD grid and possibly PME nodes */
6262 limit
= dd_choose_grid(fplog
,cr
,dd
,ir
,mtop
,box
,ddbox
,
6263 comm
->eDLB
!=edlbNO
,dlb_scale
,
6264 comm
->cellsize_limit
,comm
->cutoff
,
6265 comm
->bInterCGBondeds
,comm
->bInterCGMultiBody
);
6267 if (dd
->nc
[XX
] == 0)
6271 bC
= (dd
->bInterCGcons
&& rconstr
> r_bonded_limit
);
6272 sprintf(buf
,"Change the number of nodes or mdrun option %s%s%s",
6273 !bC
? "-rdd" : "-rcon",
6274 comm
->eDLB
!=edlbNO
? " or -dds" : "",
6275 bC
? " or your LINCS settings" : "");
6276 gmx_fatal(FARGS
,"There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6278 "Look in the log file for details on the domain decomposition",
6279 cr
->nnodes
-cr
->npmenodes
,limit
,buf
);
6282 MPI_Abort(MPI_COMM_WORLD
, 0);
6287 set_dd_dim(fplog
,dd
);
6293 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6294 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],cr
->npmenodes
);
6297 dd
->nnodes
= dd
->nc
[XX
]*dd
->nc
[YY
]*dd
->nc
[ZZ
];
6298 if (cr
->nnodes
- dd
->nnodes
!= cr
->npmenodes
)
6300 gmx_fatal(FARGS
,"The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6301 dd
->nnodes
,cr
->nnodes
- cr
->npmenodes
,cr
->nnodes
);
6303 if (cr
->npmenodes
> dd
->nnodes
)
6305 gmx_fatal(FARGS
,"The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr
->npmenodes
,dd
->nnodes
);
6307 if (cr
->npmenodes
> 0)
6309 comm
->npmenodes
= cr
->npmenodes
;
6313 comm
->npmenodes
= dd
->nnodes
;
6316 if (EEL_PME(ir
->coulombtype
))
6318 if (FALSE
&& dd
->nc
[XX
] > cr
->npmenodes
&& cr
->npmenodes
% dd
->nc
[XX
] == 0)
6320 comm
->npmedecompdim
= 2;
6321 comm
->npmenodes_major
= dd
->nc
[XX
];
6325 comm
->npmedecompdim
= 1;
6326 comm
->npmenodes_major
= comm
->npmenodes
;
6331 comm
->npmedecompdim
= 0;
6332 comm
->npmenodes_major
= 0;
6334 *npme_major
= comm
->npmenodes_major
;
6336 snew(comm
->slb_frac
,DIM
);
6337 if (comm
->eDLB
== edlbNO
)
6339 comm
->slb_frac
[XX
] = get_slb_frac(fplog
,"x",dd
->nc
[XX
],sizex
);
6340 comm
->slb_frac
[YY
] = get_slb_frac(fplog
,"y",dd
->nc
[YY
],sizey
);
6341 comm
->slb_frac
[ZZ
] = get_slb_frac(fplog
,"z",dd
->nc
[ZZ
],sizez
);
6344 if (comm
->bInterCGBondeds
&& comm
->cutoff_mbody
== 0)
6346 if (comm
->bBondComm
|| comm
->eDLB
!= edlbNO
)
6348 /* Set the bonded communication distance to halfway
6349 * the minimum and the maximum,
6350 * since the extra communication cost is nearly zero.
6352 acs
= average_cellsize_min(dd
,ddbox
);
6353 comm
->cutoff_mbody
= 0.5*(r_bonded
+ acs
);
6354 if (comm
->eDLB
!= edlbNO
)
6356 /* Check if this does not limit the scaling */
6357 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,dlb_scale
*acs
);
6359 if (!comm
->bBondComm
)
6361 /* Without bBondComm do not go beyond the n.b. cut-off */
6362 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,comm
->cutoff
);
6363 if (comm
->cellsize_limit
>= comm
->cutoff
)
6365 /* We don't loose a lot of efficieny
6366 * when increasing it to the n.b. cut-off.
6367 * It can even be slightly faster, because we need
6368 * less checks for the communication setup.
6370 comm
->cutoff_mbody
= comm
->cutoff
;
6373 /* Check if we did not end up below our original limit */
6374 comm
->cutoff_mbody
= max(comm
->cutoff_mbody
,r_bonded_limit
);
6376 if (comm
->cutoff_mbody
> comm
->cellsize_limit
)
6378 comm
->cellsize_limit
= comm
->cutoff_mbody
;
6381 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6386 fprintf(debug
,"Bonded atom communication beyond the cut-off: %d\n"
6387 "cellsize limit %f\n",
6388 comm
->bBondComm
,comm
->cellsize_limit
);
6393 check_dd_restrictions(cr
,dd
,ir
,fplog
);
6396 comm
->partition_step
= INT_MIN
;
6402 static void set_dlb_limits(gmx_domdec_t
*dd
)
6407 for(d
=0; d
<dd
->ndim
; d
++)
6409 dd
->comm
->cd
[d
].np
= dd
->comm
->cd
[d
].np_dlb
;
6410 dd
->comm
->cellsize_min
[dd
->dim
[d
]] =
6411 dd
->comm
->cellsize_min_dlb
[dd
->dim
[d
]];
6416 static void turn_on_dlb(FILE *fplog
,t_commrec
*cr
,gmx_step_t step
)
6419 gmx_domdec_comm_t
*comm
;
6429 fprintf(fplog
,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step
,buf
),dd_force_imb_perf_loss(dd
)*100);
6432 cellsize_min
= comm
->cellsize_min
[dd
->dim
[0]];
6433 for(d
=1; d
<dd
->ndim
; d
++)
6435 cellsize_min
= min(cellsize_min
,comm
->cellsize_min
[dd
->dim
[d
]]);
6438 if (cellsize_min
< comm
->cellsize_limit
*1.05)
6440 dd_warning(cr
,fplog
,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6445 dd_warning(cr
,fplog
,"NOTE: Turning on dynamic load balancing\n");
6446 comm
->bDynLoadBal
= TRUE
;
6447 dd
->bGridJump
= TRUE
;
6451 /* We can set the required cell size info here,
6452 * so we do not need to communicate this.
6453 * The grid is completely uniform.
6455 for(d
=0; d
<dd
->ndim
; d
++)
6459 comm
->load
[d
].sum_m
= comm
->load
[d
].sum
;
6461 nc
= dd
->nc
[dd
->dim
[d
]];
6464 comm
->root
[d
]->cell_f
[i
] = i
/(real
)nc
;
6467 comm
->root
[d
]->cell_f_max0
[i
] = i
/(real
)nc
;
6468 comm
->root
[d
]->cell_f_min1
[i
] = (i
+1)/(real
)nc
;
6471 comm
->root
[d
]->cell_f
[nc
] = 1.0;
6476 static char *init_bLocalCG(gmx_mtop_t
*mtop
)
6481 ncg
= ncg_mtop(mtop
);
6483 for(cg
=0; cg
<ncg
; cg
++)
6485 bLocalCG
[cg
] = FALSE
;
6491 void dd_init_bondeds(FILE *fplog
,
6492 gmx_domdec_t
*dd
,gmx_mtop_t
*mtop
,
6493 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
6494 t_inputrec
*ir
,bool bBCheck
,cginfo_mb_t
*cginfo_mb
)
6496 gmx_domdec_comm_t
*comm
;
6500 dd_make_reverse_top(fplog
,dd
,mtop
,vsite
,constr
,ir
,bBCheck
);
6504 if (comm
->bBondComm
)
6506 /* Communicate atoms beyond the cut-off for bonded interactions */
6509 comm
->cglink
= make_charge_group_links(mtop
,dd
,cginfo_mb
);
6511 comm
->bLocalCG
= init_bLocalCG(mtop
);
6515 /* Only communicate atoms based on cut-off */
6516 comm
->cglink
= NULL
;
6517 comm
->bLocalCG
= NULL
;
6521 static void print_dd_settings(FILE *fplog
,gmx_domdec_t
*dd
,
6523 bool bDynLoadBal
,real dlb_scale
,
6526 gmx_domdec_comm_t
*comm
;
6541 fprintf(fplog
,"The maximum number of communication pulses is:");
6542 for(d
=0; d
<dd
->ndim
; d
++)
6544 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),comm
->cd
[d
].np_dlb
);
6546 fprintf(fplog
,"\n");
6547 fprintf(fplog
,"The minimum size for domain decomposition cells is %.3f nm\n",comm
->cellsize_limit
);
6548 fprintf(fplog
,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale
);
6549 fprintf(fplog
,"The allowed shrink of domain decomposition cells is:");
6550 for(d
=0; d
<DIM
; d
++)
6554 if (d
>= ddbox
->npbcdim
&& dd
->nc
[d
] == 2)
6561 comm
->cellsize_min_dlb
[d
]/
6562 (ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6564 fprintf(fplog
," %c %.2f",dim2char(d
),shrink
);
6567 fprintf(fplog
,"\n");
6571 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,np
);
6572 fprintf(fplog
,"The initial number of communication pulses is:");
6573 for(d
=0; d
<dd
->ndim
; d
++)
6575 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),np
[dd
->dim
[d
]]);
6577 fprintf(fplog
,"\n");
6578 fprintf(fplog
,"The initial domain decomposition cell size is:");
6579 for(d
=0; d
<DIM
; d
++) {
6582 fprintf(fplog
," %c %.2f nm",
6583 dim2char(d
),dd
->comm
->cellsize_min
[d
]);
6586 fprintf(fplog
,"\n\n");
6589 if (comm
->bInterCGBondeds
|| dd
->vsite_comm
|| dd
->constraint_comm
)
6591 fprintf(fplog
,"The maximum allowed distance for charge groups involved in interactions is:\n");
6592 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6593 "non-bonded interactions","",comm
->cutoff
);
6597 limit
= dd
->comm
->cellsize_limit
;
6601 if (dynamic_dd_box(ddbox
,ir
))
6603 fprintf(fplog
,"(the following are initial values, they could change due to box deformation)\n");
6605 limit
= dd
->comm
->cellsize_min
[XX
];
6606 for(d
=1; d
<DIM
; d
++)
6608 limit
= min(limit
,dd
->comm
->cellsize_min
[d
]);
6612 if (comm
->bInterCGBondeds
)
6614 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6615 "two-body bonded interactions","(-rdd)",
6616 max(comm
->cutoff
,comm
->cutoff_mbody
));
6617 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6618 "multi-body bonded interactions","(-rdd)",
6619 (comm
->bBondComm
|| dd
->bGridJump
) ? comm
->cutoff_mbody
: min(comm
->cutoff
,limit
));
6623 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6624 "virtual site constructions","(-rcon)",limit
);
6626 if (dd
->constraint_comm
)
6628 sprintf(buf
,"atoms separated by up to %d constraints",
6630 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6631 buf
,"(-rcon)",limit
);
6633 fprintf(fplog
,"\n");
6639 void set_dd_parameters(FILE *fplog
,gmx_domdec_t
*dd
,real dlb_scale
,
6640 t_inputrec
*ir
,t_forcerec
*fr
,
6643 gmx_domdec_comm_t
*comm
;
6644 int d
,dim
,npulse
,npulse_d_max
,npulse_d
;
6651 bNoCutOff
= (ir
->rvdw
== 0 || ir
->rcoulomb
== 0);
6653 if (EEL_PME(ir
->coulombtype
))
6655 init_ddpme(dd
,&comm
->ddpme
[0],0,comm
->npmenodes_major
);
6656 if (comm
->npmedecompdim
>= 2)
6658 init_ddpme(dd
,&comm
->ddpme
[1],1,
6659 comm
->npmenodes
/comm
->npmenodes_major
);
6664 comm
->npmenodes
= 0;
6665 if (dd
->pme_nodeid
>= 0)
6667 gmx_fatal(FARGS
,"Can not have separate PME nodes without PME electrostatics");
6671 /* If each molecule is a single charge group
6672 * or we use domain decomposition for each periodic dimension,
6673 * we do not need to take pbc into account for the bonded interactions.
6675 if (fr
->ePBC
== epbcNONE
|| !comm
->bInterCGBondeds
||
6676 (dd
->nc
[XX
]>1 && dd
->nc
[YY
]>1 && (dd
->nc
[ZZ
]>1 || fr
->ePBC
==epbcXY
)))
6678 fr
->bMolPBC
= FALSE
;
6687 fprintf(debug
,"The DD cut-off is %f\n",comm
->cutoff
);
6689 if (comm
->eDLB
!= edlbNO
)
6691 /* Determine the maximum number of comm. pulses in one dimension */
6693 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6695 /* Determine the maximum required number of grid pulses */
6696 if (comm
->cellsize_limit
>= comm
->cutoff
)
6698 /* Only a single pulse is required */
6701 else if (!bNoCutOff
&& comm
->cellsize_limit
> 0)
6703 /* We round down slightly here to avoid overhead due to the latency
6704 * of extra communication calls when the cut-off
6705 * would be only slightly longer than the cell size.
6706 * Later cellsize_limit is redetermined,
6707 * so we can not miss interactions due to this rounding.
6709 npulse
= (int)(0.96 + comm
->cutoff
/comm
->cellsize_limit
);
6713 /* There is no cell size limit */
6714 npulse
= max(dd
->nc
[XX
]-1,max(dd
->nc
[YY
]-1,dd
->nc
[ZZ
]-1));
6717 if (!bNoCutOff
&& npulse
> 1)
6719 /* See if we can do with less pulses, based on dlb_scale */
6721 for(d
=0; d
<dd
->ndim
; d
++)
6724 npulse_d
= (int)(1 + dd
->nc
[dim
]*comm
->cutoff
6725 /(ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]*dlb_scale
));
6726 npulse_d_max
= max(npulse_d_max
,npulse_d
);
6728 npulse
= min(npulse
,npulse_d_max
);
6731 /* This env var can override npulse */
6732 d
= dd_nst_env(fplog
,"GMX_DD_NPULSE",0);
6739 comm
->bVacDLBNoLimit
= (ir
->ePBC
== epbcNONE
);
6740 for(d
=0; d
<dd
->ndim
; d
++)
6742 comm
->cd
[d
].np_dlb
= min(npulse
,dd
->nc
[dd
->dim
[d
]]-1);
6743 comm
->cd
[d
].np_nalloc
= comm
->cd
[d
].np_dlb
;
6744 snew(comm
->cd
[d
].ind
,comm
->cd
[d
].np_nalloc
);
6745 comm
->maxpulse
= max(comm
->maxpulse
,comm
->cd
[d
].np_dlb
);
6746 if (comm
->cd
[d
].np_dlb
< dd
->nc
[dd
->dim
[d
]]-1)
6748 comm
->bVacDLBNoLimit
= FALSE
;
6752 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6753 if (!comm
->bVacDLBNoLimit
)
6755 comm
->cellsize_limit
= max(comm
->cellsize_limit
,
6756 comm
->cutoff
/comm
->maxpulse
);
6758 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6759 /* Set the minimum cell size for each DD dimension */
6760 for(d
=0; d
<dd
->ndim
; d
++)
6762 if (comm
->bVacDLBNoLimit
||
6763 comm
->cd
[d
].np_dlb
*comm
->cellsize_limit
>= comm
->cutoff
)
6765 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->cellsize_limit
;
6769 comm
->cellsize_min_dlb
[dd
->dim
[d
]] =
6770 comm
->cutoff
/comm
->cd
[d
].np_dlb
;
6773 if (comm
->cutoff_mbody
<= 0)
6775 comm
->cutoff_mbody
= min(comm
->cutoff
,comm
->cellsize_limit
);
6777 if (comm
->bDynLoadBal
)
6783 print_dd_settings(fplog
,dd
,ir
,comm
->bDynLoadBal
,dlb_scale
,ddbox
);
6784 if (comm
->eDLB
== edlbAUTO
)
6788 fprintf(fplog
,"When dynamic load balancing gets turned on, these settings will change to:\n");
6790 print_dd_settings(fplog
,dd
,ir
,TRUE
,dlb_scale
,ddbox
);
6793 vol_frac
= 1/(real
)dd
->nnodes
+ comm_box_frac(dd
->nc
,comm
->cutoff
,ddbox
);
6796 fprintf(debug
,"Volume fraction for all DD zones: %f\n",vol_frac
);
6798 natoms_tot
= comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
];
6800 dd
->ga2la
= ga2la_init(natoms_tot
,vol_frac
*natoms_tot
);
6803 static void merge_cg_buffers(int ncell
,
6804 gmx_domdec_comm_dim_t
*cd
, int pulse
,
6806 int *index_gl
, int *recv_i
,
6807 rvec
*cg_cm
, rvec
*recv_vr
,
6809 cginfo_mb_t
*cginfo_mb
,int *cginfo
)
6811 gmx_domdec_ind_t
*ind
,*ind_p
;
6812 int p
,cell
,c
,cg
,cg0
,cg1
,cg_gl
,nat
;
6815 ind
= &cd
->ind
[pulse
];
6817 /* First correct the already stored data */
6818 shift
= ind
->nrecv
[ncell
];
6819 for(cell
=ncell
-1; cell
>=0; cell
--)
6821 shift
-= ind
->nrecv
[cell
];
6824 /* Move the cg's present from previous grid pulses */
6825 cg0
= ncg_cell
[ncell
+cell
];
6826 cg1
= ncg_cell
[ncell
+cell
+1];
6827 cgindex
[cg1
+shift
] = cgindex
[cg1
];
6828 for(cg
=cg1
-1; cg
>=cg0
; cg
--)
6830 index_gl
[cg
+shift
] = index_gl
[cg
];
6831 copy_rvec(cg_cm
[cg
],cg_cm
[cg
+shift
]);
6832 cgindex
[cg
+shift
] = cgindex
[cg
];
6833 cginfo
[cg
+shift
] = cginfo
[cg
];
6835 /* Correct the already stored send indices for the shift */
6836 for(p
=1; p
<=pulse
; p
++)
6838 ind_p
= &cd
->ind
[p
];
6840 for(c
=0; c
<cell
; c
++)
6842 cg0
+= ind_p
->nsend
[c
];
6844 cg1
= cg0
+ ind_p
->nsend
[cell
];
6845 for(cg
=cg0
; cg
<cg1
; cg
++)
6847 ind_p
->index
[cg
] += shift
;
6853 /* Merge in the communicated buffers */
6857 for(cell
=0; cell
<ncell
; cell
++)
6859 cg1
= ncg_cell
[ncell
+cell
+1] + shift
;
6862 /* Correct the old cg indices */
6863 for(cg
=ncg_cell
[ncell
+cell
]; cg
<cg1
; cg
++)
6865 cgindex
[cg
+1] += shift_at
;
6868 for(cg
=0; cg
<ind
->nrecv
[cell
]; cg
++)
6870 /* Copy this charge group from the buffer */
6871 index_gl
[cg1
] = recv_i
[cg0
];
6872 copy_rvec(recv_vr
[cg0
],cg_cm
[cg1
]);
6873 /* Add it to the cgindex */
6874 cg_gl
= index_gl
[cg1
];
6875 cginfo
[cg1
] = ddcginfo(cginfo_mb
,cg_gl
);
6876 nat
= GET_CGINFO_NATOMS(cginfo
[cg1
]);
6877 cgindex
[cg1
+1] = cgindex
[cg1
] + nat
;
6882 shift
+= ind
->nrecv
[cell
];
6883 ncg_cell
[ncell
+cell
+1] = cg1
;
6887 static void make_cell2at_index(gmx_domdec_comm_dim_t
*cd
,
6888 int nzone
,int cg0
,const int *cgindex
)
6892 /* Store the atom block boundaries for easy copying of communication buffers
6895 for(zone
=0; zone
<nzone
; zone
++)
6897 for(p
=0; p
<cd
->np
; p
++) {
6898 cd
->ind
[p
].cell2at0
[zone
] = cgindex
[cg
];
6899 cg
+= cd
->ind
[p
].nrecv
[zone
];
6900 cd
->ind
[p
].cell2at1
[zone
] = cgindex
[cg
];
6905 static bool missing_link(t_blocka
*link
,int cg_gl
,char *bLocalCG
)
6911 for(i
=link
->index
[cg_gl
]; i
<link
->index
[cg_gl
+1]; i
++)
6913 if (!bLocalCG
[link
->a
[i
]])
6922 static void setup_dd_communication(gmx_domdec_t
*dd
,
6923 matrix box
,gmx_ddbox_t
*ddbox
,t_forcerec
*fr
)
6925 int dim_ind
,dim
,dim0
,dim1
=-1,dim2
=-1,dimd
,p
,nat_tot
;
6926 int nzone
,nzone_send
,zone
,zonei
,cg0
,cg1
;
6927 int c
,i
,j
,cg
,cg_gl
,nrcg
;
6928 int *zone_cg_range
,pos_cg
,*index_gl
,*cgindex
,*recv_i
;
6929 gmx_domdec_comm_t
*comm
;
6930 gmx_domdec_zones_t
*zones
;
6931 gmx_domdec_comm_dim_t
*cd
;
6932 gmx_domdec_ind_t
*ind
;
6933 cginfo_mb_t
*cginfo_mb
;
6934 bool bBondComm
,bDist2B
,bDistMB
,bDistMB_pulse
,bDistBonded
,bScrew
;
6935 real r_mb
,r_comm2
,r_scomm2
,r_bcomm2
,r
,r_0
,r_1
,r2
,rb2
,r2inc
,inv_ncg
,tric_sh
;
6937 real corner
[DIM
][4],corner_round_0
=0,corner_round_1
[4];
6938 real bcorner
[DIM
],bcorner_round_1
=0;
6940 rvec
*cg_cm
,*normal
,*v_d
,*v_0
=NULL
,*v_1
=NULL
,*recv_vr
;
6941 real skew_fac2_d
,skew_fac_01
;
6947 fprintf(debug
,"Setting up DD communication\n");
6953 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
6955 dim
= dd
->dim
[dim_ind
];
6957 /* Check if we need to use triclinic distances */
6958 tric_dist
[dim_ind
] = 0;
6959 for(i
=0; i
<=dim_ind
; i
++)
6961 if (ddbox
->tric_dir
[dd
->dim
[i
]])
6963 tric_dist
[dim_ind
] = 1;
6968 bBondComm
= comm
->bBondComm
;
6970 /* Do we need to determine extra distances for multi-body bondeds? */
6971 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
6973 /* Do we need to determine extra distances for only two-body bondeds? */
6974 bDist2B
= (bBondComm
&& !bDistMB
);
6976 r_comm2
= sqr(comm
->cutoff
);
6977 r_bcomm2
= sqr(comm
->cutoff_mbody
);
6981 fprintf(debug
,"bBondComm %d, r_bc %f\n",bBondComm
,sqrt(r_bcomm2
));
6984 zones
= &comm
->zones
;
6987 /* The first dimension is equal for all cells */
6988 corner
[0][0] = comm
->cell_x0
[dim0
];
6991 bcorner
[0] = corner
[0][0];
6996 /* This cell row is only seen from the first row */
6997 corner
[1][0] = comm
->cell_x0
[dim1
];
6998 /* All rows can see this row */
6999 corner
[1][1] = comm
->cell_x0
[dim1
];
7002 corner
[1][1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].mch0
);
7005 /* For the multi-body distance we need the maximum */
7006 bcorner
[1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].p1_0
);
7009 /* Set the upper-right corner for rounding */
7010 corner_round_0
= comm
->cell_x1
[dim0
];
7017 corner
[2][j
] = comm
->cell_x0
[dim2
];
7021 /* Use the maximum of the i-cells that see a j-cell */
7022 for(i
=0; i
<zones
->nizone
; i
++)
7024 for(j
=zones
->izone
[i
].j0
; j
<zones
->izone
[i
].j1
; j
++)
7030 comm
->zone_d2
[zones
->shift
[i
][dim0
]][zones
->shift
[i
][dim1
]].mch0
);
7036 /* For the multi-body distance we need the maximum */
7037 bcorner
[2] = comm
->cell_x0
[dim2
];
7042 bcorner
[2] = max(bcorner
[2],
7043 comm
->zone_d2
[i
][j
].p1_0
);
7049 /* Set the upper-right corner for rounding */
7050 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7051 * Only cell (0,0,0) can see cell 7 (1,1,1)
7053 corner_round_1
[0] = comm
->cell_x1
[dim1
];
7054 corner_round_1
[3] = comm
->cell_x1
[dim1
];
7057 corner_round_1
[0] = max(comm
->cell_x1
[dim1
],
7058 comm
->zone_d1
[1].mch1
);
7061 /* For the multi-body distance we need the maximum */
7062 bcorner_round_1
= max(comm
->cell_x1
[dim1
],
7063 comm
->zone_d1
[1].p1_1
);
7069 /* Triclinic stuff */
7070 normal
= ddbox
->normal
;
7074 v_0
= ddbox
->v
[dim0
];
7075 if (ddbox
->tric_dir
[dim0
] && ddbox
->tric_dir
[dim1
])
7077 /* Determine the coupling coefficient for the distances
7078 * to the cell planes along dim0 and dim1 through dim2.
7079 * This is required for correct rounding.
7082 ddbox
->v
[dim0
][dim1
+1][dim0
]*ddbox
->v
[dim1
][dim1
+1][dim1
];
7085 fprintf(debug
,"\nskew_fac_01 %f\n",skew_fac_01
);
7091 v_1
= ddbox
->v
[dim1
];
7094 zone_cg_range
= zones
->cg_range
;
7095 index_gl
= dd
->index_gl
;
7096 cgindex
= dd
->cgindex
;
7097 cginfo_mb
= fr
->cginfo_mb
;
7099 zone_cg_range
[0] = 0;
7100 zone_cg_range
[1] = dd
->ncg_home
;
7101 comm
->zone_ncg1
[0] = dd
->ncg_home
;
7102 pos_cg
= dd
->ncg_home
;
7104 nat_tot
= dd
->nat_home
;
7106 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7108 dim
= dd
->dim
[dim_ind
];
7109 cd
= &comm
->cd
[dim_ind
];
7111 if (dim
>= ddbox
->npbcdim
&& dd
->ci
[dim
] == 0)
7113 /* No pbc in this dimension, the first node should not comm. */
7121 bScrew
= (dd
->bScrewPBC
&& dim
== XX
);
7123 v_d
= ddbox
->v
[dim
];
7124 skew_fac2_d
= sqr(ddbox
->skew_fac
[dim
]);
7126 cd
->bInPlace
= TRUE
;
7127 for(p
=0; p
<cd
->np
; p
++)
7129 /* Only atoms communicated in the first pulse are used
7130 * for multi-body bonded interactions or for bBondComm.
7132 bDistBonded
= ((bDistMB
|| bDist2B
) && p
== 0);
7133 bDistMB_pulse
= (bDistMB
&& bDistBonded
);
7138 for(zone
=0; zone
<nzone_send
; zone
++)
7140 if (tric_dist
[dim_ind
] && dim_ind
> 0)
7142 /* Determine slightly more optimized skew_fac's
7144 * This reduces the number of communicated atoms
7145 * by about 10% for 3D DD of rhombic dodecahedra.
7147 for(dimd
=0; dimd
<dim
; dimd
++)
7149 sf2_round
[dimd
] = 1;
7150 if (ddbox
->tric_dir
[dimd
])
7152 for(i
=dd
->dim
[dimd
]+1; i
<DIM
; i
++)
7154 /* If we are shifted in dimension i
7155 * and the cell plane is tilted forward
7156 * in dimension i, skip this coupling.
7158 if (!(zones
->shift
[nzone
+zone
][i
] &&
7159 ddbox
->v
[dimd
][i
][dimd
] >= 0))
7162 sqr(ddbox
->v
[dimd
][i
][dimd
]);
7165 sf2_round
[dimd
] = 1/sf2_round
[dimd
];
7170 zonei
= zone_perm
[dim_ind
][zone
];
7173 /* Here we permutate the zones to obtain a convenient order
7174 * for neighbor searching
7176 cg0
= zone_cg_range
[zonei
];
7177 cg1
= zone_cg_range
[zonei
+1];
7181 /* Look only at the cg's received in the previous grid pulse
7183 cg1
= zone_cg_range
[nzone
+zone
+1];
7184 cg0
= cg1
- cd
->ind
[p
-1].nrecv
[zone
];
7186 ind
->nsend
[zone
] = 0;
7187 for(cg
=cg0
; cg
<cg1
; cg
++)
7191 if (tric_dist
[dim_ind
] == 0)
7193 /* Rectangular direction, easy */
7194 r
= cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7201 r
= cg_cm
[cg
][dim
] - bcorner
[dim_ind
];
7207 /* Rounding gives at most a 16% reduction
7208 * in communicated atoms
7210 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7212 r
= cg_cm
[cg
][dim0
] - corner_round_0
;
7213 /* This is the first dimension, so always r >= 0 */
7220 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7222 r
= cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7229 r
= cg_cm
[cg
][dim1
] - bcorner_round_1
;
7239 /* Triclinic direction, more complicated */
7242 /* Rounding, conservative as the skew_fac multiplication
7243 * will slightly underestimate the distance.
7245 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7247 rn
[dim0
] = cg_cm
[cg
][dim0
] - corner_round_0
;
7248 for(i
=dim0
+1; i
<DIM
; i
++)
7250 rn
[dim0
] -= cg_cm
[cg
][i
]*v_0
[i
][dim0
];
7252 r2
= rn
[dim0
]*rn
[dim0
]*sf2_round
[dim0
];
7255 rb
[dim0
] = rn
[dim0
];
7258 /* Take care that the cell planes along dim0 might not
7259 * be orthogonal to those along dim1 and dim2.
7261 for(i
=1; i
<=dim_ind
; i
++)
7264 if (normal
[dim0
][dimd
] > 0)
7266 rn
[dimd
] -= rn
[dim0
]*normal
[dim0
][dimd
];
7269 rb
[dimd
] -= rb
[dim0
]*normal
[dim0
][dimd
];
7274 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7276 rn
[dim1
] += cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7278 for(i
=dim1
+1; i
<DIM
; i
++)
7280 tric_sh
-= cg_cm
[cg
][i
]*v_1
[i
][dim1
];
7282 rn
[dim1
] += tric_sh
;
7285 r2
+= rn
[dim1
]*rn
[dim1
]*sf2_round
[dim1
];
7286 /* Take care of coupling of the distances
7287 * to the planes along dim0 and dim1 through dim2.
7289 r2
-= rn
[dim0
]*rn
[dim1
]*skew_fac_01
;
7290 /* Take care that the cell planes along dim1
7291 * might not be orthogonal to that along dim2.
7293 if (normal
[dim1
][dim2
] > 0)
7295 rn
[dim2
] -= rn
[dim1
]*normal
[dim1
][dim2
];
7301 cg_cm
[cg
][dim1
] - bcorner_round_1
+ tric_sh
;
7304 rb2
+= rb
[dim1
]*rb
[dim1
]*sf2_round
[dim1
];
7305 /* Take care of coupling of the distances
7306 * to the planes along dim0 and dim1 through dim2.
7308 rb2
-= rb
[dim0
]*rb
[dim1
]*skew_fac_01
;
7309 /* Take care that the cell planes along dim1
7310 * might not be orthogonal to that along dim2.
7312 if (normal
[dim1
][dim2
] > 0)
7314 rb
[dim2
] -= rb
[dim1
]*normal
[dim1
][dim2
];
7319 /* The distance along the communication direction */
7320 rn
[dim
] += cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7322 for(i
=dim
+1; i
<DIM
; i
++)
7324 tric_sh
-= cg_cm
[cg
][i
]*v_d
[i
][dim
];
7329 r2
+= rn
[dim
]*rn
[dim
]*skew_fac2_d
;
7330 /* Take care of coupling of the distances
7331 * to the planes along dim0 and dim1 through dim2.
7333 if (dim_ind
== 1 && zonei
== 1)
7335 r2
-= rn
[dim0
]*rn
[dim
]*skew_fac_01
;
7341 rb
[dim
] += cg_cm
[cg
][dim
] - bcorner
[dim_ind
] + tric_sh
;
7344 rb2
+= rb
[dim
]*rb
[dim
]*skew_fac2_d
;
7345 /* Take care of coupling of the distances
7346 * to the planes along dim0 and dim1 through dim2.
7348 if (dim_ind
== 1 && zonei
== 1)
7350 rb2
-= rb
[dim0
]*rb
[dim
]*skew_fac_01
;
7358 ((bDistMB
&& rb2
< r_bcomm2
) ||
7359 (bDist2B
&& r2
< r_bcomm2
)) &&
7361 (GET_CGINFO_BOND_INTER(fr
->cginfo
[cg
]) &&
7362 missing_link(comm
->cglink
,index_gl
[cg
],
7365 /* Make an index to the local charge groups */
7366 if (nsend
+1 > ind
->nalloc
)
7368 ind
->nalloc
= over_alloc_large(nsend
+1);
7369 srenew(ind
->index
,ind
->nalloc
);
7371 if (nsend
+1 > comm
->nalloc_int
)
7373 comm
->nalloc_int
= over_alloc_large(nsend
+1);
7374 srenew(comm
->buf_int
,comm
->nalloc_int
);
7376 ind
->index
[nsend
] = cg
;
7377 comm
->buf_int
[nsend
] = index_gl
[cg
];
7379 check_vec_rvec_alloc(&comm
->vbuf
,nsend
+1);
7381 if (dd
->ci
[dim
] == 0)
7383 /* Correct cg_cm for pbc */
7384 rvec_add(cg_cm
[cg
],box
[dim
],comm
->vbuf
.v
[nsend
]);
7387 comm
->vbuf
.v
[nsend
][YY
] =
7388 box
[YY
][YY
]-comm
->vbuf
.v
[nsend
][YY
];
7389 comm
->vbuf
.v
[nsend
][ZZ
] =
7390 box
[ZZ
][ZZ
]-comm
->vbuf
.v
[nsend
][ZZ
];
7395 copy_rvec(cg_cm
[cg
],comm
->vbuf
.v
[nsend
]);
7398 nat
+= cgindex
[cg
+1] - cgindex
[cg
];
7402 /* Clear the counts in case we do not have pbc */
7403 for(zone
=nzone_send
; zone
<nzone
; zone
++)
7405 ind
->nsend
[zone
] = 0;
7407 ind
->nsend
[nzone
] = nsend
;
7408 ind
->nsend
[nzone
+1] = nat
;
7409 /* Communicate the number of cg's and atoms to receive */
7410 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7411 ind
->nsend
, nzone
+2,
7412 ind
->nrecv
, nzone
+2);
7414 /* The rvec buffer is also required for atom buffers of size nsend
7415 * in dd_move_x and dd_move_f.
7417 check_vec_rvec_alloc(&comm
->vbuf
,ind
->nsend
[nzone
+1]);
7421 /* We can receive in place if only the last zone is not empty */
7422 for(zone
=0; zone
<nzone
-1; zone
++)
7424 if (ind
->nrecv
[zone
] > 0)
7426 cd
->bInPlace
= FALSE
;
7431 /* The int buffer is only required here for the cg indices */
7432 if (ind
->nrecv
[nzone
] > comm
->nalloc_int2
)
7434 comm
->nalloc_int2
= over_alloc_dd(ind
->nrecv
[nzone
]);
7435 srenew(comm
->buf_int2
,comm
->nalloc_int2
);
7437 /* The rvec buffer is also required for atom buffers
7438 * of size nrecv in dd_move_x and dd_move_f.
7440 i
= max(cd
->ind
[0].nrecv
[nzone
+1],ind
->nrecv
[nzone
+1]);
7441 check_vec_rvec_alloc(&comm
->vbuf2
,i
);
7445 /* Make space for the global cg indices */
7446 if (pos_cg
+ ind
->nrecv
[nzone
] > dd
->cg_nalloc
7447 || dd
->cg_nalloc
== 0)
7449 dd
->cg_nalloc
= over_alloc_dd(pos_cg
+ ind
->nrecv
[nzone
]);
7450 srenew(index_gl
,dd
->cg_nalloc
);
7451 srenew(cgindex
,dd
->cg_nalloc
+1);
7453 /* Communicate the global cg indices */
7456 recv_i
= index_gl
+ pos_cg
;
7460 recv_i
= comm
->buf_int2
;
7462 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7463 comm
->buf_int
, nsend
,
7464 recv_i
, ind
->nrecv
[nzone
]);
7466 /* Make space for cg_cm */
7467 if (pos_cg
+ ind
->nrecv
[nzone
] > fr
->cg_nalloc
)
7469 dd_realloc_fr_cg(fr
,pos_cg
+ ind
->nrecv
[nzone
]);
7472 /* Communicate cg_cm */
7475 recv_vr
= cg_cm
+ pos_cg
;
7479 recv_vr
= comm
->vbuf2
.v
;
7481 dd_sendrecv_rvec(dd
, dim_ind
, dddirBackward
,
7482 comm
->vbuf
.v
, nsend
,
7483 recv_vr
, ind
->nrecv
[nzone
]);
7485 /* Make the charge group index */
7488 zone
= (p
== 0 ? 0 : nzone
- 1);
7489 while (zone
< nzone
)
7491 for(cg
=0; cg
<ind
->nrecv
[zone
]; cg
++)
7493 cg_gl
= index_gl
[pos_cg
];
7494 fr
->cginfo
[pos_cg
] = ddcginfo(cginfo_mb
,cg_gl
);
7495 nrcg
= GET_CGINFO_NATOMS(fr
->cginfo
[pos_cg
]);
7496 cgindex
[pos_cg
+1] = cgindex
[pos_cg
] + nrcg
;
7499 /* Update the charge group presence,
7500 * so we can use it in the next pass of the loop.
7502 comm
->bLocalCG
[cg_gl
] = TRUE
;
7508 comm
->zone_ncg1
[nzone
+zone
] = ind
->nrecv
[zone
];
7511 zone_cg_range
[nzone
+zone
] = pos_cg
;
7516 /* This part of the code is never executed with bBondComm. */
7517 merge_cg_buffers(nzone
,cd
,p
,zone_cg_range
,
7518 index_gl
,recv_i
,cg_cm
,recv_vr
,
7519 cgindex
,fr
->cginfo_mb
,fr
->cginfo
);
7520 pos_cg
+= ind
->nrecv
[nzone
];
7522 nat_tot
+= ind
->nrecv
[nzone
+1];
7526 /* Store the atom block for easy copying of communication buffers */
7527 make_cell2at_index(cd
,nzone
,zone_cg_range
[nzone
],cgindex
);
7531 dd
->index_gl
= index_gl
;
7532 dd
->cgindex
= cgindex
;
7534 dd
->ncg_tot
= zone_cg_range
[zones
->n
];
7535 dd
->nat_tot
= nat_tot
;
7536 comm
->nat
[ddnatHOME
] = dd
->nat_home
;
7537 for(i
=ddnatZONE
; i
<ddnatNR
; i
++)
7539 comm
->nat
[i
] = dd
->nat_tot
;
7544 /* We don't need to update cginfo, since that was alrady done above.
7545 * So we pass NULL for the forcerec.
7547 dd_set_cginfo(dd
->index_gl
,dd
->ncg_home
,dd
->ncg_tot
,
7548 NULL
,comm
->bLocalCG
);
7553 fprintf(debug
,"Finished setting up DD communication, zones:");
7554 for(c
=0; c
<zones
->n
; c
++)
7556 fprintf(debug
," %d",zones
->cg_range
[c
+1]-zones
->cg_range
[c
]);
7558 fprintf(debug
,"\n");
7562 static void set_cg_boundaries(gmx_domdec_zones_t
*zones
)
7566 for(c
=0; c
<zones
->nizone
; c
++)
7568 zones
->izone
[c
].cg1
= zones
->cg_range
[c
+1];
7569 zones
->izone
[c
].jcg0
= zones
->cg_range
[zones
->izone
[c
].j0
];
7570 zones
->izone
[c
].jcg1
= zones
->cg_range
[zones
->izone
[c
].j1
];
7574 static int comp_cgsort(const void *a
,const void *b
)
7578 gmx_cgsort_t
*cga
,*cgb
;
7579 cga
= (gmx_cgsort_t
*)a
;
7580 cgb
= (gmx_cgsort_t
*)b
;
7582 comp
= cga
->nsc
- cgb
->nsc
;
7585 comp
= cga
->ind_gl
- cgb
->ind_gl
;
7591 static void order_int_cg(int n
,gmx_cgsort_t
*sort
,
7596 /* Order the data */
7599 buf
[i
] = a
[sort
[i
].ind
];
7602 /* Copy back to the original array */
7609 static void order_vec_cg(int n
,gmx_cgsort_t
*sort
,
7614 /* Order the data */
7617 copy_rvec(v
[sort
[i
].ind
],buf
[i
]);
7620 /* Copy back to the original array */
7623 copy_rvec(buf
[i
],v
[i
]);
7627 static void order_vec_atom(int ncg
,int *cgindex
,gmx_cgsort_t
*sort
,
7630 int a
,atot
,cg
,cg0
,cg1
,i
;
7632 /* Order the data */
7634 for(cg
=0; cg
<ncg
; cg
++)
7636 cg0
= cgindex
[sort
[cg
].ind
];
7637 cg1
= cgindex
[sort
[cg
].ind
+1];
7638 for(i
=cg0
; i
<cg1
; i
++)
7640 copy_rvec(v
[i
],buf
[a
]);
7646 /* Copy back to the original array */
7647 for(a
=0; a
<atot
; a
++)
7649 copy_rvec(buf
[a
],v
[a
]);
7653 static void ordered_sort(int nsort2
,gmx_cgsort_t
*sort2
,
7654 int nsort_new
,gmx_cgsort_t
*sort_new
,
7655 gmx_cgsort_t
*sort1
)
7659 /* The new indices are not very ordered, so we qsort them */
7660 qsort(sort_new
,nsort_new
,sizeof(sort_new
[0]),comp_cgsort
);
7662 /* sort2 is already ordered, so now we can merge the two arrays */
7666 while(i2
< nsort2
|| i_new
< nsort_new
)
7670 sort1
[i1
++] = sort_new
[i_new
++];
7672 else if (i_new
== nsort_new
)
7674 sort1
[i1
++] = sort2
[i2
++];
7676 else if (sort2
[i2
].nsc
< sort_new
[i_new
].nsc
||
7677 (sort2
[i2
].nsc
== sort_new
[i_new
].nsc
&&
7678 sort2
[i2
].ind_gl
< sort_new
[i_new
].ind_gl
))
7680 sort1
[i1
++] = sort2
[i2
++];
7684 sort1
[i1
++] = sort_new
[i_new
++];
7689 static void dd_sort_state(gmx_domdec_t
*dd
,int ePBC
,
7690 rvec
*cgcm
,t_forcerec
*fr
,t_state
*state
,
7693 gmx_domdec_sort_t
*sort
;
7694 gmx_cgsort_t
*cgsort
,*sort_i
;
7695 int ncg_new
,nsort2
,nsort_new
,i
,cell_index
,*ibuf
,cgsize
;
7698 sort
= dd
->comm
->sort
;
7700 if (dd
->ncg_home
> sort
->sort_nalloc
)
7702 sort
->sort_nalloc
= over_alloc_dd(dd
->ncg_home
);
7703 srenew(sort
->sort1
,sort
->sort_nalloc
);
7704 srenew(sort
->sort2
,sort
->sort_nalloc
);
7707 if (ncg_home_old
>= 0)
7709 /* The charge groups that remained in the same ns grid cell
7710 * are completely ordered. So we can sort efficiently by sorting
7711 * the charge groups that did move into the stationary list.
7716 for(i
=0; i
<dd
->ncg_home
; i
++)
7718 /* Check if this cg did not move to another node */
7719 cell_index
= fr
->ns
.grid
->cell_index
[i
];
7720 if (cell_index
!= 4*fr
->ns
.grid
->ncells
)
7722 if (i
>= ncg_home_old
|| cell_index
!= sort
->sort1
[i
].nsc
)
7724 /* This cg is new on this node or moved ns grid cell */
7725 if (nsort_new
>= sort
->sort_new_nalloc
)
7727 sort
->sort_new_nalloc
= over_alloc_dd(nsort_new
+1);
7728 srenew(sort
->sort_new
,sort
->sort_new_nalloc
);
7730 sort_i
= &(sort
->sort_new
[nsort_new
++]);
7734 /* This cg did not move */
7735 sort_i
= &(sort
->sort2
[nsort2
++]);
7737 /* Sort on the ns grid cell indices
7738 * and the global topology index
7740 sort_i
->nsc
= cell_index
;
7741 sort_i
->ind_gl
= dd
->index_gl
[i
];
7748 fprintf(debug
,"ordered sort cgs: stationary %d moved %d\n",
7751 /* Sort efficiently */
7752 ordered_sort(nsort2
,sort
->sort2
,nsort_new
,sort
->sort_new
,sort
->sort1
);
7756 cgsort
= sort
->sort1
;
7758 for(i
=0; i
<dd
->ncg_home
; i
++)
7760 /* Sort on the ns grid cell indices
7761 * and the global topology index
7763 cgsort
[i
].nsc
= fr
->ns
.grid
->cell_index
[i
];
7764 cgsort
[i
].ind_gl
= dd
->index_gl
[i
];
7766 if (cgsort
[i
].nsc
!= 4*fr
->ns
.grid
->ncells
)
7773 fprintf(debug
,"qsort cgs: %d new home %d\n",dd
->ncg_home
,ncg_new
);
7775 /* Determine the order of the charge groups using qsort */
7776 qsort(cgsort
,dd
->ncg_home
,sizeof(cgsort
[0]),comp_cgsort
);
7778 cgsort
= sort
->sort1
;
7780 /* We alloc with the old size, since cgindex is still old */
7781 check_vec_rvec_alloc(&dd
->comm
->vbuf
,dd
->cgindex
[dd
->ncg_home
]);
7782 vbuf
= dd
->comm
->vbuf
.v
;
7784 /* Remove the charge groups which are no longer at home here */
7785 dd
->ncg_home
= ncg_new
;
7787 /* Reorder the state */
7788 for(i
=estX
; i
<estNR
; i
++)
7790 if (state
->flags
& (1<<i
))
7795 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->x
,vbuf
);
7798 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->v
,vbuf
);
7801 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->sd_X
,vbuf
);
7804 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->cg_p
,vbuf
);
7808 case estDISRE_INITF
:
7809 case estDISRE_RM3TAV
:
7810 case estORIRE_INITF
:
7812 /* No ordering required */
7815 gmx_incons("Unknown state entry encountered in dd_sort_state");
7821 order_vec_cg(dd
->ncg_home
,cgsort
,cgcm
,vbuf
);
7823 if (dd
->ncg_home
+1 > sort
->ibuf_nalloc
)
7825 sort
->ibuf_nalloc
= over_alloc_dd(dd
->ncg_home
+1);
7826 srenew(sort
->ibuf
,sort
->ibuf_nalloc
);
7829 /* Reorder the global cg index */
7830 order_int_cg(dd
->ncg_home
,cgsort
,dd
->index_gl
,ibuf
);
7831 /* Reorder the cginfo */
7832 order_int_cg(dd
->ncg_home
,cgsort
,fr
->cginfo
,ibuf
);
7833 /* Rebuild the local cg index */
7835 for(i
=0; i
<dd
->ncg_home
; i
++)
7837 cgsize
= dd
->cgindex
[cgsort
[i
].ind
+1] - dd
->cgindex
[cgsort
[i
].ind
];
7838 ibuf
[i
+1] = ibuf
[i
] + cgsize
;
7840 for(i
=0; i
<dd
->ncg_home
+1; i
++)
7842 dd
->cgindex
[i
] = ibuf
[i
];
7844 /* Set the home atom number */
7845 dd
->nat_home
= dd
->cgindex
[dd
->ncg_home
];
7847 /* Copy the sorted ns cell indices back to the ns grid struct */
7848 for(i
=0; i
<dd
->ncg_home
; i
++)
7850 fr
->ns
.grid
->cell_index
[i
] = cgsort
[i
].nsc
;
7852 fr
->ns
.grid
->nr
= dd
->ncg_home
;
7855 static void add_dd_statistics(gmx_domdec_t
*dd
)
7857 gmx_domdec_comm_t
*comm
;
7862 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
7864 comm
->sum_nat
[ddnat
-ddnatZONE
] +=
7865 comm
->nat
[ddnat
] - comm
->nat
[ddnat
-1];
7870 void reset_dd_statistics_counters(gmx_domdec_t
*dd
)
7872 gmx_domdec_comm_t
*comm
;
7877 /* Reset all the statistics and counters for total run counting */
7878 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
7880 comm
->sum_nat
[ddnat
-ddnatZONE
] = 0;
7884 comm
->load_step
= 0;
7887 clear_ivec(comm
->load_lim
);
7892 void print_dd_statistics(t_commrec
*cr
,t_inputrec
*ir
,FILE *fplog
)
7894 gmx_domdec_comm_t
*comm
;
7898 comm
= cr
->dd
->comm
;
7900 gmx_sumd(ddnatNR
-ddnatZONE
,comm
->sum_nat
,cr
);
7907 fprintf(fplog
,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
7909 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
7911 av
= comm
->sum_nat
[ddnat
-ddnatZONE
]/comm
->ndecomp
;
7916 " av. #atoms communicated per step for force: %d x %.1f\n",
7920 if (cr
->dd
->vsite_comm
)
7923 " av. #atoms communicated per step for vsites: %d x %.1f\n",
7924 (EEL_PME(ir
->coulombtype
) || ir
->coulombtype
==eelEWALD
) ? 3 : 2,
7929 if (cr
->dd
->constraint_comm
)
7932 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
7933 1 + ir
->nLincsIter
,av
);
7937 gmx_incons(" Unknown type for DD statistics");
7940 fprintf(fplog
,"\n");
7942 if (comm
->bRecordLoad
&& EI_DYNAMICS(ir
->eI
))
7944 print_dd_load_av(fplog
,cr
->dd
);
7948 void dd_partition_system(FILE *fplog
,
7953 t_state
*state_global
,
7954 gmx_mtop_t
*top_global
,
7956 t_state
*state_local
,
7959 gmx_localtop_t
*top_local
,
7962 gmx_shellfc_t shellfc
,
7963 gmx_constr_t constr
,
7965 gmx_wallcycle_t wcycle
,
7969 gmx_domdec_comm_t
*comm
;
7972 gmx_step_t step_pcoupl
;
7973 rvec cell_ns_x0
,cell_ns_x1
;
7974 int i
,j
,n
,cg0
=0,ncg_home_old
=-1,nat_f_novirsum
;
7975 bool bBoxChanged
,bNStGlobalComm
,bDoDLB
,bCheckDLB
,bTurnOnDLB
,bLogLoad
;
7976 bool bRedist
,bSortCG
,bResortAll
;
7984 bBoxChanged
= (bMasterState
|| DEFORM(*ir
));
7985 if (ir
->epc
!= epcNO
)
7987 /* With nstcalcenery > 1 pressure coupling happens.
7988 * one step after calculating the energies.
7989 * Box scaling happens at the end of the MD step,
7990 * after the DD partitioning.
7991 * We therefore have to do DLB in the first partitioning
7992 * after an MD step where P-coupling occured.
7993 * We need to determine the last step in which p-coupling occurred.
7995 n
= ir
->nstcalcenergy
;
7998 step_pcoupl
= step
- 1;
8002 step_pcoupl
= ((step
- 1)/n
)*n
+ 1;
8004 if (step_pcoupl
>= comm
->partition_step
)
8010 bNStGlobalComm
= (step
>= comm
->partition_step
+ nstglobalcomm
);
8012 if (!comm
->bDynLoadBal
)
8018 /* Should we do dynamic load balacing this step?
8019 * Since it requires (possibly expensive) global communication,
8020 * we might want to do DLB less frequently.
8022 if (bBoxChanged
|| ir
->epc
!= epcNO
)
8024 bDoDLB
= bBoxChanged
;
8028 bDoDLB
= bNStGlobalComm
;
8032 /* Check if we have recorded loads on the nodes */
8033 if (comm
->bRecordLoad
&& dd_load_count(comm
))
8035 if (comm
->eDLB
== edlbAUTO
&& !comm
->bDynLoadBal
)
8037 /* Check if we should use DLB at the second partitioning
8038 * and every 100 partitionings,
8039 * so the extra communication cost is negligible.
8041 n
= max(100,nstglobalcomm
);
8042 bCheckDLB
= (comm
->n_load_collect
== 0 ||
8043 comm
->n_load_have
% n
== n
-1);
8050 /* Print load every nstlog, first and last step to the log file */
8051 bLogLoad
= ((ir
->nstlog
> 0 && step
% ir
->nstlog
== 0) ||
8052 comm
->n_load_collect
== 0 ||
8053 (step
+ ir
->nstlist
> ir
->init_step
+ ir
->nsteps
));
8055 /* Avoid extra communication due to verbose screen output
8056 * when nstglobalcomm is set.
8058 if (bDoDLB
|| bLogLoad
|| bCheckDLB
||
8059 (bVerbose
&& (ir
->nstlist
== 0 || nstglobalcomm
<= ir
->nstlist
)))
8061 get_load_distribution(dd
,wcycle
);
8066 dd_print_load(fplog
,dd
,step
-1);
8070 dd_print_load_verbose(dd
);
8073 comm
->n_load_collect
++;
8076 /* Since the timings are node dependent, the master decides */
8080 (dd_force_imb_perf_loss(dd
) >= DD_PERF_LOSS
);
8083 fprintf(debug
,"step %s, imb loss %f\n",
8084 gmx_step_str(step
,sbuf
),
8085 dd_force_imb_perf_loss(dd
));
8088 dd_bcast(dd
,sizeof(bTurnOnDLB
),&bTurnOnDLB
);
8091 turn_on_dlb(fplog
,cr
,step
);
8096 comm
->n_load_have
++;
8099 cgs_gl
= &comm
->cgs_gl
;
8104 /* Clear the old state */
8105 clear_dd_indices(dd
,0,0);
8107 set_ddbox(dd
,bMasterState
,cr
,ir
,state_global
->box
,
8108 TRUE
,cgs_gl
,state_global
->x
,&ddbox
);
8110 get_cg_distribution(fplog
,step
,dd
,cgs_gl
,
8111 state_global
->box
,&ddbox
,state_global
->x
);
8113 dd_distribute_state(dd
,cgs_gl
,
8114 state_global
,state_local
,f
);
8116 dd_make_local_cgs(dd
,&top_local
->cgs
);
8118 if (dd
->ncg_home
> fr
->cg_nalloc
)
8120 dd_realloc_fr_cg(fr
,dd
->ncg_home
);
8122 calc_cgcm(fplog
,0,dd
->ncg_home
,
8123 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8125 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8127 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8131 else if (state_local
->ddp_count
!= dd
->ddp_count
)
8133 if (state_local
->ddp_count
> dd
->ddp_count
)
8135 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local
->ddp_count
,dd
->ddp_count
);
8138 if (state_local
->ddp_count_cg_gl
!= state_local
->ddp_count
)
8140 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local
->ddp_count_cg_gl
,state_local
->ddp_count
);
8143 /* Clear the old state */
8144 clear_dd_indices(dd
,0,0);
8146 /* Build the new indices */
8147 rebuild_cgindex(dd
,cgs_gl
->index
,state_local
);
8148 make_dd_indices(dd
,cgs_gl
->index
,0);
8150 /* Redetermine the cg COMs */
8151 calc_cgcm(fplog
,0,dd
->ncg_home
,
8152 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8154 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8156 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8158 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8159 TRUE
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8161 bRedist
= comm
->bDynLoadBal
;
8165 /* We have the full state, only redistribute the cgs */
8167 /* Clear the non-home indices */
8168 clear_dd_indices(dd
,dd
->ncg_home
,dd
->nat_home
);
8170 /* Avoid global communication for dim's without pbc and -gcom */
8171 if (!bNStGlobalComm
)
8173 copy_rvec(comm
->box0
,ddbox
.box0
);
8174 copy_rvec(comm
->box_size
,ddbox
.box_size
);
8176 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8177 bNStGlobalComm
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8182 /* For dim's without pbc and -gcom */
8183 copy_rvec(ddbox
.box0
,comm
->box0
);
8184 copy_rvec(ddbox
.box_size
,comm
->box_size
);
8186 set_dd_cell_sizes(dd
,&ddbox
,dynamic_dd_box(&ddbox
,ir
),bMasterState
,bDoDLB
,
8189 if (comm
->nstDDDumpGrid
> 0 && step
% comm
->nstDDDumpGrid
== 0)
8191 write_dd_grid_pdb("dd_grid",step
,dd
,state_local
->box
,&ddbox
);
8194 /* Check if we should sort the charge groups */
8195 if (comm
->nstSortCG
> 0)
8197 bSortCG
= (bMasterState
||
8198 (bRedist
&& (step
% comm
->nstSortCG
== 0)));
8205 ncg_home_old
= dd
->ncg_home
;
8209 cg0
= dd_redistribute_cg(fplog
,step
,dd
,ddbox
.tric_dir
,
8210 state_local
,f
,fr
,mdatoms
,
8214 get_nsgrid_boundaries(fr
->ns
.grid
,dd
,
8215 state_local
->box
,&ddbox
,&comm
->cell_x0
,&comm
->cell_x1
,
8216 dd
->ncg_home
,fr
->cg_cm
,
8217 cell_ns_x0
,cell_ns_x1
,&grid_density
);
8221 comm_dd_ns_cell_sizes(dd
,&ddbox
,cell_ns_x0
,cell_ns_x1
,step
);
8224 copy_ivec(fr
->ns
.grid
->n
,ncells_old
);
8225 grid_first(fplog
,fr
->ns
.grid
,dd
,&ddbox
,fr
->ePBC
,
8226 state_local
->box
,cell_ns_x0
,cell_ns_x1
,
8227 fr
->rlistlong
,grid_density
);
8228 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8229 copy_ivec(ddbox
.tric_dir
,comm
->tric_dir
);
8233 /* Sort the state on charge group position.
8234 * This enables exact restarts from this step.
8235 * It also improves performance by about 15% with larger numbers
8236 * of atoms per node.
8239 /* Fill the ns grid with the home cell,
8240 * so we can sort with the indices.
8242 set_zones_ncg_home(dd
);
8243 fill_grid(fplog
,&comm
->zones
,fr
->ns
.grid
,dd
->ncg_home
,
8244 0,dd
->ncg_home
,fr
->cg_cm
);
8246 /* Check if we can user the old order and ns grid cell indices
8247 * of the charge groups to sort the charge groups efficiently.
8249 bResortAll
= (bMasterState
||
8250 fr
->ns
.grid
->n
[XX
] != ncells_old
[XX
] ||
8251 fr
->ns
.grid
->n
[YY
] != ncells_old
[YY
] ||
8252 fr
->ns
.grid
->n
[ZZ
] != ncells_old
[ZZ
]);
8256 fprintf(debug
,"Step %s, sorting the %d home charge groups\n",
8257 gmx_step_str(step
,sbuf
),dd
->ncg_home
);
8259 dd_sort_state(dd
,ir
->ePBC
,fr
->cg_cm
,fr
,state_local
,
8260 bResortAll
? -1 : ncg_home_old
);
8261 /* Rebuild all the indices */
8263 ga2la_clear(dd
->ga2la
);
8266 /* Setup up the communication and communicate the coordinates */
8267 setup_dd_communication(dd
,state_local
->box
,&ddbox
,fr
);
8269 /* Set the indices */
8270 make_dd_indices(dd
,cgs_gl
->index
,cg0
);
8272 /* Set the charge group boundaries for neighbor searching */
8273 set_cg_boundaries(&comm
->zones
);
8276 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8277 -1,state_local->x,state_local->box);
8280 /* Extract a local topology from the global topology */
8281 for(i
=0; i
<dd
->ndim
; i
++)
8283 np
[dd
->dim
[i
]] = comm
->cd
[i
].np
;
8285 dd_make_local_top(fplog
,dd
,&comm
->zones
,dd
->npbcdim
,state_local
->box
,
8286 comm
->cellsize_min
,np
,
8287 fr
,vsite
,top_global
,top_local
);
8289 /* Set up the special atom communication */
8290 n
= comm
->nat
[ddnatZONE
];
8291 for(i
=ddnatZONE
+1; i
<ddnatNR
; i
++)
8296 if (vsite
&& vsite
->n_intercg_vsite
)
8298 n
= dd_make_local_vsites(dd
,n
,top_local
->idef
.il
);
8302 if (dd
->bInterCGcons
)
8304 /* Only for inter-cg constraints we need special code */
8305 n
= dd_make_local_constraints(dd
,n
,top_global
,
8306 constr
,ir
->nProjOrder
,
8307 &top_local
->idef
.il
[F_CONSTR
]);
8311 gmx_incons("Unknown special atom type setup");
8316 /* Make space for the extra coordinates for virtual site
8317 * or constraint communication.
8319 state_local
->natoms
= comm
->nat
[ddnatNR
-1];
8320 if (state_local
->natoms
> state_local
->nalloc
)
8322 dd_realloc_state(state_local
,f
,state_local
->natoms
);
8325 if (fr
->bF_NoVirSum
)
8327 if (vsite
&& vsite
->n_intercg_vsite
)
8329 nat_f_novirsum
= comm
->nat
[ddnatVSITE
];
8333 if (EEL_FULL(ir
->coulombtype
) && dd
->n_intercg_excl
> 0)
8335 nat_f_novirsum
= dd
->nat_tot
;
8339 nat_f_novirsum
= dd
->nat_home
;
8348 /* Set the number of atoms required for the force calculation */
8349 forcerec_set_ranges(fr
,dd
->ncg_home
,dd
->ncg_tot
,dd
->nat_tot
,nat_f_novirsum
);
8351 /* We make the all mdatoms up to nat_tot_con.
8352 * We could save some work by only setting invmass
8353 * between nat_tot and nat_tot_con.
8355 /* This call also sets the new number of home particles to dd->nat_home */
8356 atoms2md(top_global
,ir
,
8357 comm
->nat
[ddnatCON
],dd
->gatindex
,0,dd
->nat_home
,mdatoms
);
8361 /* Make the local shell stuff, currently no communication is done */
8362 make_local_shells(cr
,mdatoms
,shellfc
);
8365 if (ir
->implicit_solvent
)
8367 make_local_gb(cr
,fr
->born
,ir
->gb_algorithm
);
8370 if (!(cr
->duty
& DUTY_PME
))
8372 /* Send the charges to our PME only node */
8373 gmx_pme_send_q(cr
,mdatoms
->nChargePerturbed
,
8374 mdatoms
->chargeA
,mdatoms
->chargeB
,
8375 comm
->ddpme
[0].maxshift
,comm
->ddpme
[1].maxshift
);
8380 set_constraints(constr
,top_local
,ir
,mdatoms
,cr
);
8383 if (ir
->ePull
!= epullNO
)
8385 /* Update the local pull groups */
8386 dd_make_local_pull_groups(dd
,ir
->pull
,mdatoms
);
8391 /* Update the local rotation groups */
8392 dd_make_local_rotation_groups(dd
,ir
->rot
,mdatoms
);
8396 add_dd_statistics(dd
);
8398 /* Make sure we only count the cycles for this DD partitioning */
8399 clear_dd_cycle_counts(dd
);
8401 /* Because the order of the atoms might have changed since
8402 * the last vsite construction, we need to communicate the constructing
8403 * atom coordinates again (for spreading the forces this MD step).
8405 dd_move_x_vsites(dd
,state_local
->box
,state_local
->x
);
8407 if (comm
->nstDDDump
> 0 && step
% comm
->nstDDDump
== 0)
8409 dd_move_x(dd
,state_local
->box
,state_local
->x
);
8410 write_dd_pdb("dd_dump",step
,"dump",top_global
,cr
,
8411 -1,state_local
->x
,state_local
->box
);
8414 /* Store the partitioning step */
8415 comm
->partition_step
= step
;
8417 /* Increase the DD partitioning counter */
8419 /* The state currently matches this DD partitioning count, store it */
8420 state_local
->ddp_count
= dd
->ddp_count
;
8423 /* The DD master node knows the complete cg distribution,
8424 * store the count so we can possibly skip the cg info communication.
8426 comm
->master_cg_ddp_count
= (bSortCG
? 0 : dd
->ddp_count
);
8429 if (comm
->DD_debug
> 0)
8431 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8432 check_index_consistency(dd
,top_global
->natoms
,ncg_mtop(top_global
),
8433 "after partitioning");