1 /* #define LEARN_BCAST */
2 /***********************************************************************
6 The following is a notice of limited availability of the code and
7 Government license and disclaimer which must be included in the
8 prologue of the code and in all source listings of the code.
11 (c) 1977 University of Chicago
13 Permission is hereby granted to use, reproduce, prepare
14 derivative works, and to redistribute to others at no charge. If
15 you distribute a copy or copies of the Software, or you modify a
16 copy or copies of the Software or any portion of it, thus forming
17 a work based on the Software and make and/or distribute copies of
18 such work, you must meet the following conditions:
20 a) If you make a copy of the Software (modified or verbatim)
21 it must include the copyright notice and Government
22 license and disclaimer.
24 b) You must cause the modified Software to carry prominent
25 notices stating that you changed specified portions of
28 This software was authored by:
30 Argonne National Laboratory
31 J. Michalakes: (630) 252-6646; email: michalak@mcs.anl.gov
32 Mathematics and Computer Science Division
33 Argonne National Laboratory, Argonne, IL 60439
35 ARGONNE NATIONAL LABORATORY (ANL), WITH FACILITIES IN THE STATES
36 OF ILLINOIS AND IDAHO, IS OWNED BY THE UNITED STATES GOVERNMENT,
37 AND OPERATED BY THE UNIVERSITY OF CHICAGO UNDER PROVISION OF A
38 CONTRACT WITH THE DEPARTMENT OF ENERGY.
40 GOVERNMENT LICENSE AND DISCLAIMER
42 This computer code material was prepared, in part, as an account
43 of work sponsored by an agency of the United States Government.
44 The Government is granted for itself and others acting on its
45 behalf a paid-up, nonexclusive, irrevocable worldwide license in
46 this data to reproduce, prepare derivative works, distribute
47 copies to the public, perform publicly and display publicly, and
48 to permit others to do so. NEITHER THE UNITED STATES GOVERNMENT
49 NOR ANY AGENCY THEREOF, NOR THE UNIVERSITY OF CHICAGO, NOR ANY OF
50 THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
51 ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
52 COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS,
53 PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD
54 NOT INFRINGE PRIVATELY OWNED RIGHTS.
56 ***************************************************************************/
69 typedef struct bcast_point_desc
{
72 } bcast_point_desc_t
;
75 static destroy_par_info ( p
)
78 if ( p
!= NULL
) RSL_FREE( p
) ;
81 static rsl_list_t
*Xlist
, *Xp
, *Xprev
;
82 static rsl_list_t
*stage
;
83 static int stage_len
= 0 ; /* 96/3/15 */
85 static int Sendbufsize
;
86 static int Sendbufcurs
;
87 static char *Sendbuf
;
88 static int Sdisplacements
[RSL_MAXPROC
] ;
89 static int Ssizes
[RSL_MAXPROC
] ;
91 static int Recsizeindex
;
95 static int Rpointcurs
;
96 static char *Recvbuf
;
97 static int Rdisplacements
[RSL_MAXPROC
+1] ;
98 static int Rsizes
[RSL_MAXPROC
] ;
106 static int s_idim_nst
;
107 static int s_jdim_nst
;
108 static int s_irax_n
;
109 static int s_irax_m
;
110 static int s_ntasks_x
;
111 static int s_ntasks_y
;
112 static rsl_list_t
**Plist
;
113 static int Psize
[RSL_MAXPROC
] ;
114 static char *s_parent_msgs
;
115 static int s_parent_msgs_curs
;
116 static int s_remaining
; /* number of bytes left in a parent message before
117 the next point descriptor */
119 /* add a field to a message outgoing for the specified child domain cell */
120 /* relies on rsl_ready_bcast having been called already */
121 /* sends are specified in terms of coarse domain */
123 static int s_i
, s_j
, s_ig
, s_jg
, s_cm
, s_cn
,
127 static rsl_list_t
*Pptr
;
130 static int s_putmsg
= 0 ;
134 RSL_LITE_TO_CHILD_INFO ( Fcomm
, msize_p
,
135 cips_p
, cipe_p
, cjps_p
, cjpe_p
, /* patch dims of SOURCE DOMAIN */
136 iids_p
, iide_p
, ijds_p
, ijde_p
, /* domain dims of INTERMEDIATE DOMAIN */
137 nids_p
, nide_p
, njds_p
, njde_p
, /* domain dims of CHILD DOMAIN */
138 pgr_p
, shw_p
, /* nest ratio and stencil half width */
139 ntasks_x_p
, ntasks_y_p
, /* proc counts in x and y */
140 min_subdomain
, /* minimum width allowed for a subdomain in a dim ON PARENT */
142 idim_cd_p
, jdim_cd_p
,
147 Fcomm
/* Fortran version of MPI communicator */
148 ,cips_p
, cipe_p
, cjps_p
, cjpe_p
/* (i) c.d. patch dims */
149 ,iids_p
, iide_p
, ijds_p
, ijde_p
/* (i) n.n. global dims */
150 ,nids_p
, nide_p
, njds_p
, njde_p
/* (i) n.n. global dims */
151 ,pgr_p
/* nesting ratio */
152 ,ntasks_x_p
, ntasks_y_p
/* proc counts in x and y */
154 ,icoord_p
/* i coordinate of nest in cd */
155 ,jcoord_p
/* j coordinate of nest in cd */
156 ,shw_p
/* stencil half width */
157 ,idim_cd_p
/* i width of nest in cd */
158 ,jdim_cd_p
/* j width of nest in cd */
159 ,msize_p
/* (I) Message size in bytes. */
160 ,ig_p
/* (O) Global N index of parent domain point. */
161 ,jg_p
/* (O) Global N index of parent domain point. */
162 ,retval_p
; /* (O) =1 if a valid point returned; =0 (zero) otherwise. */
172 MPI_Comm
*comm
, dummy_comm
;
175 *comm
= MPI_Comm_f2c( *Fcomm
) ;
178 if ( Plist
== NULL
) {
179 s_ntasks_x
= *ntasks_x_p
;
180 s_ntasks_y
= *ntasks_y_p
;
181 /* construct Plist */
183 Plist
= RSL_MALLOC( rsl_list_t
* , s_ntasks_x
* s_ntasks_y
) ; /* big enough for nest points */
184 for ( j
= 0 ; j
< s_ntasks_x
* s_ntasks_y
; j
++ ) {
186 Sdisplacements
[j
] = 0 ;
190 for ( j
= *cjps_p
; j
<= *cjpe_p
; j
++ )
192 for ( i
= *cips_p
; i
<= *cipe_p
; i
++ )
194 if ( ( *jcoord_p
<= j
&& j
<= *jcoord_p
+*jdim_cd_p
-1 ) && ( *icoord_p
<= i
&& i
<= *icoord_p
+*idim_cd_p
-1 ) ) {
195 ni
= ( i
- (*icoord_p
+ *shw_p
) ) * *pgr_p
+ 1 + 1 ; /* add 1 to give center point */
196 nj
= ( j
- (*jcoord_p
+ *shw_p
) ) * *pgr_p
+ 1 + 1 ;
199 TASK_FOR_POINT ( &ni
, &nj
, nids_p
, nide_p
, njds_p
, njde_p
, &s_ntasks_x
, &s_ntasks_y
, &Px
, &Py
,
200 min_subdomain
, min_subdomain
, &ierr
) ;
201 coords
[1] = Px
; coords
[0] = Py
;
202 MPI_Cart_rank( *comm
, coords
, &P
) ;
206 q
= RSL_MALLOC( rsl_list_t
, 1 ) ;
211 Sendbufsize
+= *msize_p
+ 3 * sizeof( int ) ; /* point data plus 3 ints for i, j, and size */
216 fprintf(stderr
,"rsl_to_child_info: ") ;
217 TASK_FOR_POINT_MESSAGE () ;
219 Sendbuf
= RSL_MALLOC( char , Sendbufsize
) ;
226 if ( Pptr
!= NULL
) {
230 if ( Recsizeindex
>= 0 ) {
231 r
= (int *) &(Sendbuf
[Recsizeindex
]) ;
232 *r
= Sendbufcurs
- Recsizeindex
+ 2 * sizeof(int) ;
233 Ssizes
[Pcurs
] += *r
;
236 while ( Pptr
== NULL
) {
238 while ( Pcurs
< s_ntasks_x
* s_ntasks_y
&& Plist
[Pcurs
] == NULL
) Pcurs
++ ;
239 if ( Pcurs
< s_ntasks_x
* s_ntasks_y
) {
240 Sdisplacements
[Pcurs
] = Sendbufcurs
;
242 Pptr
= Plist
[Pcurs
] ;
249 *ig_p
= Pptr
->info1
;
250 *jg_p
= Pptr
->info2
;
252 r
= (int *) &(Sendbuf
[Sendbufcurs
]) ;
253 *r
++ = Pptr
->info1
; Sendbufcurs
+= sizeof(int) ; /* ig to buffer */
254 *r
++ = Pptr
->info2
; Sendbufcurs
+= sizeof(int) ; /* jg to buffer */
255 Recsizeindex
= Sendbufcurs
;
256 *r
++ = 0 ; Sendbufcurs
+= sizeof(int) ; /* store start for size */
262 /********************************************/
265 RSL_LITE_TO_PARENT_INFO ( Fcomm
, msize_p
,
266 nips_p
, nipe_p
, njps_p
, njpe_p
, /* patch dims of SOURCE DOMAIN (CHILD) */
267 cids_p
, cide_p
, cjds_p
, cjde_p
, /* domain dims of TARGET DOMAIN (PARENT) */
268 ntasks_x_p
, ntasks_y_p
, /* proc counts in x and y */
271 idim_cd_p
, jdim_cd_p
,
275 Fcomm
/* Fortran version of MPI communicator */
276 ,nips_p
, nipe_p
, njps_p
, njpe_p
/* (i) n.d. patch dims */
277 ,cids_p
, cide_p
, cjds_p
, cjde_p
/* (i) n.n. global dims */
278 ,ntasks_x_p
, ntasks_y_p
/* proc counts in x and y */
280 ,icoord_p
/* i coordinate of nest in cd */
281 ,jcoord_p
/* j coordinate of nest in cd */
282 ,idim_cd_p
/* i width of nest in cd */
283 ,jdim_cd_p
/* j width of nest in cd */
284 ,msize_p
/* (I) Message size in bytes. */
285 ,ig_p
/* (O) Global N index of parent domain point. */
286 ,jg_p
/* (O) Global N index of parent domain point. */
287 ,retval_p
; /* (O) =1 if a valid point returned; =0 (zero) otherwise. */
296 MPI_Comm
*comm
, dummy_comm
;
299 *comm
= MPI_Comm_f2c( *Fcomm
) ;
302 if ( Plist
== NULL
) {
303 s_ntasks_x
= *ntasks_x_p
;
304 s_ntasks_y
= *ntasks_y_p
;
305 /* construct Plist */
307 Plist
= RSL_MALLOC( rsl_list_t
* , s_ntasks_x
* s_ntasks_y
) ;
308 for ( j
= 0 ; j
< s_ntasks_x
* s_ntasks_y
; j
++ ) {
310 Sdisplacements
[j
] = 0 ;
314 for ( j
= *njps_p
; j
<= *njpe_p
; j
++ )
316 for ( i
= *nips_p
; i
<= *nipe_p
; i
++ )
318 if ( ( *jcoord_p
<= j
&& j
<= *jcoord_p
+*jdim_cd_p
-1 ) && ( *icoord_p
<= i
&& i
<= *icoord_p
+*idim_cd_p
-1 ) ) {
320 TASK_FOR_POINT ( &i
, &j
, cids_p
, cide_p
, cjds_p
, cjde_p
, &s_ntasks_x
, &s_ntasks_y
, &Px
, &Py
,
321 min_subdomain
, min_subdomain
, &ierr
) ;
322 coords
[1] = Px
; coords
[0] = Py
;
323 MPI_Cart_rank( *comm
, coords
, &P
) ;
327 q
= RSL_MALLOC( rsl_list_t
, 1 ) ;
332 Sendbufsize
+= *msize_p
+ 3 * sizeof( int ) ; /* point data plus 3 ints for i, j, and size */
337 fprintf(stderr
,"rsl_to_parent_info: ") ;
338 TASK_FOR_POINT_MESSAGE () ;
340 Sendbuf
= RSL_MALLOC( char , Sendbufsize
) ;
346 if ( Pptr
!= NULL
) {
350 if ( Recsizeindex
>= 0 ) {
351 r
= (int *) &(Sendbuf
[Recsizeindex
]) ;
352 *r
= Sendbufcurs
- Recsizeindex
+ 2 * sizeof(int) ;
353 Ssizes
[Pcurs
] += *r
;
356 while ( Pptr
== NULL
) {
358 while ( Pcurs
< s_ntasks_x
* s_ntasks_y
&& Plist
[Pcurs
] == NULL
) Pcurs
++ ;
359 if ( Pcurs
< s_ntasks_x
* s_ntasks_y
) {
360 Sdisplacements
[Pcurs
] = Sendbufcurs
;
362 Pptr
= Plist
[Pcurs
] ;
369 *ig_p
= Pptr
->info1
;
370 *jg_p
= Pptr
->info2
;
372 r
= (int *) &(Sendbuf
[Sendbufcurs
]) ;
373 *r
++ = Pptr
->info1
; Sendbufcurs
+= sizeof(int) ; /* ig to buffer */
374 *r
++ = Pptr
->info2
; Sendbufcurs
+= sizeof(int) ; /* jg to buffer */
375 Recsizeindex
= Sendbufcurs
;
376 *r
++ = 0 ; Sendbufcurs
+= sizeof(int) ; /* store start for size */
383 /********************************************/
386 RSL_TO_CHILD_MSG -- Pack force data into a message for a nest point.
391 RSL_LITE_TO_CHILD_MSG ( nbuf_p
, buf
)
393 nbuf_p
; /* (I) Number of bytes to be packed. */
395 buf
; /* (I) Buffer containing the data to be packed. */
397 rsl_lite_to_peerpoint_msg ( nbuf_p
, buf
) ;
401 RSL_LITE_TO_PARENT_MSG ( nbuf_p
, buf
)
403 nbuf_p
; /* (I) Number of bytes to be packed. */
405 buf
; /* (I) Buffer containing the data to be packed. */
407 rsl_lite_to_peerpoint_msg ( nbuf_p
, buf
) ;
411 rsl_lite_to_peerpoint_msg ( nbuf_p
, buf
)
413 nbuf_p
; /* (I) Number of bytes to be packed. */
415 buf
; /* (I) Buffer containing the data to be packed. */
423 RSL_TEST_ERR(buf
==NULL
,"2nd argument is NULL. Field allocated?") ;
427 if ( Sendbufcurs
+ nbuf
>= Sendbufsize
) {
428 sprintf(mess
,"RSL_LITE_TO_CHILD_MSG: Sendbufcurs + nbuf (%d) would exceed Sendbufsize (%d)\n",
429 Sendbufcurs
+ nbuf
, Sendbufsize
) ;
430 RSL_TEST_ERR(1,mess
) ;
433 if ( nbuf
% sizeof(int) == 0 ) {
434 for ( p
= (int *)buf
, q
= (int *) &(Sendbuf
[Sendbufcurs
]), i
= 0 ; i
< nbuf
; i
+= sizeof(int) )
441 for ( c
= buf
, d
= &(Sendbuf
[Sendbufcurs
]), i
= 0 ; i
< nbuf
; i
++ )
447 Sendbufcurs
+= nbuf
;
451 /********************************************/
454 RSL_LITE_BCAST_MSGS ( mytask_p
, ntasks_p
, Fcomm
)
455 int_p mytask_p
, ntasks_p
, Fcomm
;
460 comm
= MPI_Comm_f2c( *Fcomm
) ;
464 rsl_lite_allgather_msgs ( mytask_p
, ntasks_p
, comm
) ;
468 RSL_LITE_MERGE_MSGS ( mytask_p
, ntasks_p
, Fcomm
)
469 int_p mytask_p
, ntasks_p
, Fcomm
;
474 comm
= MPI_Comm_f2c( *Fcomm
) ;
478 rsl_lite_allgather_msgs ( mytask_p
, ntasks_p
, comm
) ;
482 rsl_lite_allgather_msgs ( mytask_p
, ntasks_p
, comm
)
483 int_p mytask_p
, ntasks_p
;
493 bcast_point_desc_t pdesc
;
495 int msglen
, mdest
, mtag
;
511 RSL_TEST_ERR( Plist
== NULL
,
512 "RSL_BCAST_MSGS: rsl_to_child_info not called first" ) ;
514 RSL_TEST_ERR( ntasks
== RSL_MAXPROC
,
515 "RSL_BCAST_MSGS: raise the compile time value of MAXPROC" ) ;
517 Psize_all
= RSL_MALLOC( int, ntasks
* ntasks
) ;
520 MPI_Allgather( Ssizes
, ntasks
, MPI_INT
, Psize_all
, ntasks
, MPI_INT
, comm
) ;
522 Psize_all
[0] = Ssizes
[0] ;
525 for ( j
= 0 ; j
< ntasks
; j
++ )
528 for ( j
= 0 ; j
< ntasks
; j
++ )
530 Rsizes
[j
] += Psize_all
[ INDEX_2( j
, mytask
, ntasks
) ] ;
533 for ( Rbufsize
= 0, P
= 0, Rdisplacements
[0] = 0 ; P
< ntasks
; P
++ )
535 Rdisplacements
[P
+1] = Rsizes
[P
] + Rdisplacements
[P
] ;
536 Rbufsize
+= Rsizes
[P
] ;
539 /* this will be freed later */
541 Recvbuf
= RSL_MALLOC( char , Rbufsize
+ 3 * sizeof(int) ) ; /* for sentinal record */
546 rc
= MPI_Alltoallv ( Sendbuf
, Ssizes
, Sdisplacements
, MPI_BYTE
,
547 Recvbuf
, Rsizes
, Rdisplacements
, MPI_BYTE
, comm
) ;
554 /* add sentinel to the end of Recvbuf */
556 r
= (int *)&(Recvbuf
[Rbufsize
+ 2 * sizeof(int)]) ;
559 RSL_FREE( Sendbuf
) ;
560 RSL_FREE( Psize_all
) ;
562 for ( j
= 0 ; j
< *ntasks_p
; j
++ ) {
563 destroy_list ( &(Plist
[j
]), NULL
) ;
570 /********************************************/
573 RSL_LITE_FROM_PARENT_INFO ( ig_p
, jg_p
, retval_p
)
575 ig_p
/* (O) Global index in M dimension of nest. */
576 ,jg_p
/* (O) Global index in N dimension of nest. */
577 ,retval_p
; /* (O) Return value; =1 valid point, =0 done. */
579 rsl_lite_from_peerpoint_info ( ig_p
, jg_p
, retval_p
) ;
583 RSL_LITE_FROM_CHILD_INFO ( ig_p
, jg_p
, retval_p
)
585 ig_p
/* (O) Global index in M dimension of nest. */
586 ,jg_p
/* (O) Global index in N dimension of nest. */
587 ,retval_p
; /* (O) Return value; =1 valid point, =0 done. */
589 rsl_lite_from_peerpoint_info ( ig_p
, jg_p
, retval_p
) ;
593 rsl_lite_from_peerpoint_info ( ig_p
, jg_p
, retval_p
)
595 ig_p
/* (O) Global index in M dimension of nest. */
596 ,jg_p
/* (O) Global index in N dimension of nest. */
597 ,retval_p
; /* (O) Return value; =1 valid point, =0 done. */
601 Rbufcurs
= Rbufcurs
+ Rreclen
;
603 *ig_p
= *(int *)&( Recvbuf
[Rbufcurs
+ Rpointcurs
] ) ; Rpointcurs
+= sizeof(int) ;
604 *jg_p
= *(int *)&( Recvbuf
[Rbufcurs
+ Rpointcurs
] ) ; Rpointcurs
+= sizeof(int) ;
606 Rreclen
= *(int *)&( Recvbuf
[Rbufcurs
+ Rpointcurs
] ) ; Rpointcurs
+= sizeof(int) ;
608 if ( Rreclen
== RSL_INVALID
) {
610 RSL_FREE( Recvbuf
) ;
616 /********************************************/
619 RSL_LITE_FROM_PARENT_MSG ( len_p
, buf
)
621 len_p
; /* (I) Number of bytes to unpack. */
623 buf
; /* (O) Destination buffer. */
625 rsl_lite_from_peerpoint_msg ( len_p
, buf
) ;
629 RSL_LITE_FROM_CHILD_MSG ( len_p
, buf
)
631 len_p
; /* (I) Number of bytes to unpack. */
633 buf
; /* (O) Destination buffer. */
635 rsl_lite_from_peerpoint_msg ( len_p
, buf
) ;
639 rsl_lite_from_peerpoint_msg ( len_p
, buf
)
641 len_p
; /* (I) Number of bytes to unpack. */
643 buf
; /* (O) Destination buffer. */
649 if ( *len_p
% sizeof(int) == 0 ) {
650 for ( p
= (int *)&(Recvbuf
[Rbufcurs
+Rpointcurs
]), q
= buf
, i
= 0 ; i
< *len_p
; i
+= sizeof(int) )
655 for ( c
= &(Recvbuf
[Rbufcurs
+Rpointcurs
]), d
= (char *) buf
, i
= 0 ; i
< *len_p
; i
++ )
661 Rpointcurs
+= *len_p
;
664 /********************************************/
666 destroy_list( list
, dfcn
)
667 rsl_list_t
** list
; /* pointer to pointer to list */
668 int (*dfcn
)() ; /* pointer to function for destroying
669 the data field of the list */
671 rsl_list_t
*p
, *trash
;
672 if ( list
== NULL
) return(0) ;
673 if ( *list
== NULL
) return(0) ;
674 for ( p
= *list
; p
!= NULL
; )
676 if ( dfcn
!= NULL
) (*dfcn
)( p
->data
) ;
685 /********************************************/