1 /***********************************************************************
5 The following is a notice of limited availability of the code and
6 Government license and disclaimer which must be included in the
7 prologue of the code and in all source listings of the code.
10 (c) 1977 University of Chicago
12 Permission is hereby granted to use, reproduce, prepare
13 derivative works, and to redistribute to others at no charge. If
14 you distribute a copy or copies of the Software, or you modify a
15 copy or copies of the Software or any portion of it, thus forming
16 a work based on the Software and make and/or distribute copies of
17 such work, you must meet the following conditions:
19 a) If you make a copy of the Software (modified or verbatim)
20 it must include the copyright notice and Government
21 license and disclaimer.
23 b) You must cause the modified Software to carry prominent
24 notices stating that you changed specified portions of
27 This software was authored by:
29 Argonne National Laboratory
30 J. Michalakes: (630) 252-6646; email: michalak@mcs.anl.gov
31 Mathematics and Computer Science Division
32 Argonne National Laboratory, Argonne, IL 60439
34 ARGONNE NATIONAL LABORATORY (ANL), WITH FACILITIES IN THE STATES
35 OF ILLINOIS AND IDAHO, IS OWNED BY THE UNITED STATES GOVERNMENT,
36 AND OPERATED BY THE UNIVERSITY OF CHICAGO UNDER PROVISION OF A
37 CONTRACT WITH THE DEPARTMENT OF ENERGY.
39 GOVERNMENT LICENSE AND DISCLAIMER
41 This computer code material was prepared, in part, as an account
42 of work sponsored by an agency of the United States Government.
43 The Government is granted for itself and others acting on its
44 behalf a paid-up, nonexclusive, irrevocable worldwide license in
45 this data to reproduce, prepare derivative works, distribute
46 copies to the public, perform publicly and display publicly, and
47 to permit others to do so. NEITHER THE UNITED STATES GOVERNMENT
48 NOR ANY AGENCY THEREOF, NOR THE UNIVERSITY OF CHICAGO, NOR ANY OF
49 THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
50 ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
51 COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS,
52 PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD
53 NOT INFRINGE PRIVATELY OWNED RIGHTS.
55 ***************************************************************************/
62 RSL_EXCH_STENCIL - Exchange data on an RSL stencil
65 This routine is used to exchange data within domain Arg1 using
67 When this routine returns, data
68 in the ghost areas around the local partition will have been
69 updated with data from cells on surrounding processors, as
70 described in the stencil. A stencil must have been described
71 in the context of a domain before it can be used on the domain
72 (RSL_DESCRIBE_STENCIL).
74 This routine generates interprocessor communication on message
75 passing architectures.
77 All processors must call RSL_EXCH_STENCIL at the same point in
81 RSL_CREATE_STENCIL, RSL_DESCRIBE_STENCIL
88 void copymem( void *src
, /* Source address (byte) */
89 int src_inc
, /* Stride between source per copy (bytes) */
90 void *dest
, /* Destination address (byte) */
91 int dest_inc
, /* Stride between destination per copy (bytes) */
92 int nbytes
, /* Number of bytes to move per copy (bytes) */
93 int nelems
) /* Number of times to repeat copy */
95 /* Byte based pointers. */
98 /* 4 byte based pointers. */
101 /* 8 byte based pointers. */
107 if ( (((long)src
| (long)dest
| src_inc
| dest_inc
| nbytes
) & (sizeof(int) -1)) == 0 )
109 src4b
= (int *)(src
) ;
110 dest4b
= (int *)(dest
) ;
111 src_inc
/= sizeof(int) ;
112 dest_inc
/= sizeof(int) ;
114 for ( outer
= 0 ; outer
< nbytes
/sizeof(int) ; outer
++ )
117 for ( inner
= 0 ; inner
< nelems
; inner
++ )
119 dest4b
[outer
+ inner
*dest_inc
] = src4b
[outer
+ inner
*src_inc
] ;
125 src1b
= (char *) (src
) ;
126 dest1b
= (char *) (dest
) ;
127 for ( inner
= 0 ; inner
< nelems
; inner
++ )
129 bcopy(src1b
, dest1b
, nbytes
) ;
137 RSL_EXCH_STENCIL ( d_p
, s_p
)
139 d_p
/* (I) Domain descriptor. */
140 ,s_p
; /* (I) Stencil descriptor. */
143 stencil_desc_t
*sten
;
144 message_desc_t
*msg
;
145 rsl_procrec_t
*procrec
;
147 rsl_list_t
*lp
, *lp1
;
149 rsl_point_hdr_t point_hdr
;
157 int Pque
[RSL_MAXPROC
] ;
158 rsl_procrec_t
*procrecque
[RSL_MAXPROC
] ;
159 int typeque
[RSL_MAXPROC
] ;
165 d
= *d_p
; s
= *s_p
;
167 RSL_TEST_ERR(d
< 0 || d
>= RSL_MAXDOMAINS
,
168 "bad domain descriptor" ) ;
169 RSL_TEST_ERR( domain_info
[d
].valid
!= RSL_VALID
,
170 "descriptor for invalid domain" ) ;
173 MPE_Log_event( 15, s
, "sten begin" ) ;
176 fprintf(stderr
,"debug called RSL_EXCH_STENCIL %d\n",s
) ;
179 if ((sten
= (stencil_desc_t
*) sh_descriptors
[ s
]) == NULL
)
181 RSL_TEST_ERR(1,"invalid or unspecified stencil descriptor" ) ;
184 /* if stencil has not been compiled, compile it now! */
185 if ( sten
->compiled
[d
] == 0 )
187 rsl_compile_stencil( d_p
, s_p
) ;
191 /* iterate over procrecs for domain and post buffers */
194 for ( procrec
= sten
->procs
[d
] ; procrec
!= NULL
; procrec
= procrec
->next
)
196 if ( procrec
->unpack_table_nbytes
> 0 )
200 procrecque
[tqp
] = procrec
;
201 pbuf
= buffer_for_proc( P
, procrec
->unpack_table_nbytes
, RSL_RECVBUF
) ;
202 mtype
= MTYPE_FROMTO( MSG_STENCOM
,
203 rsl_c_comp2phys_proc (procrec
->P
),
205 typeque
[tqp
] = mtype
;
206 procrec
->nrecvs
++ ; /* diagnostic */
208 fprintf(stderr
,"debug posting async recv for %d bytes from %d\n", procrec
->unpack_table_nbytes
, rsl_c_comp2phys_proc (procrec
->P
) ) ;
210 RSL_RECVBEGIN ( pbuf
, procrec
->unpack_table_nbytes
, mtype
) ;
216 /* pack buffers and issue sends */
218 for ( procrec
= sten
->procs
[d
] ; procrec
!= NULL
; procrec
= procrec
->next
)
220 pbuf
=buffer_for_proc(procrec
->P
, procrec
->pack_table_nbytes
, RSL_SENDBUF
) ;
221 pr
= procrec
->pack_table
;
222 for ( curs
= 0, i
= 0 ; i
< procrec
->pack_table_size
; i
++, pr
++ )
224 if ( sten
->has_f90_fields
&& procrec
->pack_table_size
> 0 )
225 base
= (void *) get_base_for_index ( pr
->f90_table_index
) ;
229 fprintf(stderr
,"pack base %lu, f90_index %d, sten=%d\n",base
,pr
->f90_table_index
,s
) ;
232 for ( j
= 0 ; j
< pr
->nelems
; j
++ )
236 if ( rsl_debug_flg
) {
237 fprintf(stderr
,"pck %08x, buf %08x, curs %5d, n %5d, off %5d, j %5d, s %5d\n",
238 (char *)(base
) + pr
->offset
+ j
* pr
->stride
,
239 &(pbuf
[curs
]), curs
, pr
->n
,
240 pr
->offset
, j
, pr
->stride
) ;
244 bcopy((char *)(base
) + pr
->offset
+ j
* pr
->stride
,
245 &(pbuf
[curs
]),pr
->n
) ;
249 copymem((char *)(base
) + pr
->offset
, pr
->stride
, &(pbuf
[curs
]), pr
->n
, pr
->n
, pr
->nelems
) ;
250 curs
+= pr
->n
*pr
->nelems
;
255 mdest
= rsl_c_comp2phys_proc (procrec
->P
) ;
256 mtype
= MTYPE_FROMTO( MSG_STENCOM
, rsl_myproc
, mdest
) ;
258 if ( curs
> procrec
->pack_table_nbytes
)
260 sprintf(mess
,"pack buffer overflow %d > %d\n",curs
,procrec
->pack_table_nbytes
) ;
261 RSL_TEST_ERR(1,mess
) ;
264 fprintf(stderr
,"debug sending %d bytes to %d, sten=%d\n", curs
, mdest
, s
) ;
266 RSL_SEND ( pbuf
, curs
, mtype
, mdest
) ;
268 else if ( curs
== 0 && procrec
->pack_table_nbytes
!= 0 )
270 RSL_TEST_ERR(1,"internal error") ;
274 /* wait on receives and unpack messages as they come in */
279 while( ndone
< nprocs
)
281 if (tqp
>= nprocs
) tqp
= 0 ;
282 if (typeque
[tqp
] != RSL_INVALID
)
284 mtype
= typeque
[tqp
] ;
285 if ( rsl_noprobe
== NULL
)
286 RSL_PROBE ( mtype
, &retval
) ;
287 /* else, retval will always be 1 */
292 /* on the Paragon, calling RSL_PROBE clears the message so this
293 would bomb on an unknown message id. Don't call unless the probe
294 is disabled (rsl_noprobe != NULL). */
295 if ( rsl_noprobe
!= NULL
) RSL_RECVEND ( mtype
) ;
297 RSL_RECVEND ( mtype
) ;
301 pbuf
= buffer_for_proc( Pque
[tqp
], 0, RSL_RECVBUF
) ;
302 procrec
= procrecque
[tqp
] ;
303 pr
= procrec
->unpack_table
;
304 for ( curs
= 0, i
= 0 ; i
< procrec
->unpack_table_size
; i
++, pr
++ )
306 if ( sten
->has_f90_fields
&& procrec
->unpack_table_size
> 0 )
307 base
= (void *) get_base_for_index ( pr
->f90_table_index
) ;
311 fprintf(stderr
,"unpack base %lu, f90_index %d, sten=%d\n",base
,pr
->f90_table_index
,s
) ;
314 for ( j
= 0 ; j
< pr
->nelems
; j
++ )
317 (char *)(base
) + pr
->offset
+ j
* pr
->stride
, pr
->n
) ;
321 copymem(&(pbuf
[curs
]), pr
->n
, (char *)(base
) + pr
->offset
, pr
->stride
, pr
->n
, pr
->nelems
) ;
322 curs
+= pr
->n
*pr
->nelems
;
327 RSL_TEST_ERR(1,"internal error") ;
329 if ( curs
> procrec
->unpack_table_nbytes
)
331 sprintf(mess
,"unpack buffer overflow %d > %d\n",curs
,procrec
->pack_table_nbytes
) ;
332 RSL_TEST_ERR(1,mess
) ;
335 fprintf(stderr
,"debug got message from %d and unpacked %d bytes; sten=%d\n", Pque
[tqp
], curs
, s
) ;
337 typeque
[tqp
] = RSL_INVALID
;
344 MPE_Log_event( 16, s
, "sten end" ) ;
353 RSL_EXCH_STENCIL ( d_p
, s_p
)
355 d_p
/* (I) Domain descriptor. */
356 ,s_p
; /* (I) Stencil descriptor. */
359 stencil_desc_t
*sten
;
360 rsl_procrec_t
*procrec
;
368 int Pque
[RSL_MAXPROC
] ;
369 rsl_procrec_t
*procrecque
[RSL_MAXPROC
] ;
370 int typeque
[RSL_MAXPROC
] ;
376 d
= *d_p
; s
= *s_p
;
378 RSL_TEST_ERR(d
< 0 || d
>= RSL_MAXDOMAINS
,
379 "bad domain descriptor" ) ;
380 RSL_TEST_ERR( domain_info
[d
].valid
!= RSL_VALID
,
381 "descriptor for invalid domain" ) ;
384 MPE_Log_event( 15, s
, "sten begin" ) ;
387 fprintf(stderr
,"debug called RSL_EXCH_STENCIL %d\n",s
) ;
390 if ((sten
= (stencil_desc_t
*) sh_descriptors
[ s
]) == NULL
)
392 RSL_TEST_ERR(1,"invalid or unspecified stencil descriptor" ) ;
395 /* if stencil has not been compiled, compile it now! */
396 if ( sten
->compiled
[d
] == 0 )
398 rsl_compile_stencil( d_p
, s_p
) ;
400 /* fill in curs value for pack and unpack buffers */
401 for ( procrec
= sten
->procs
[d
] ; procrec
!= NULL
; procrec
= procrec
->next
)
403 /* determine offset into pack buffer for each element */
404 pr
= procrec
->pack_table
;
405 if ( procrec
->pack_table_nbytes
> 0 )
407 for ( curs
= 0, i
= 0 ; i
< procrec
->pack_table_size
; i
++, pr
++ )
410 curs
+= pr
->nelems
* pr
->n
;
412 // fprintf(stderr, "pack %d %d\n", curs, procrec->pack_table_nbytes);
414 /* determine offset into unpack buffer for each element */
415 if ( procrec
->unpack_table_nbytes
> 0 )
417 pr
= procrec
->unpack_table
;
418 for ( curs
= 0, i
= 0 ; i
< procrec
->unpack_table_size
; i
++, pr
++ )
421 curs
+= pr
->nelems
* pr
->n
;
423 // fprintf(stderr, "unpack %d %d\n", curs, procrec->unpack_table_nbytes);
429 /* iterate over procrecs for domain and post buffers */
432 for ( procrec
= sten
->procs
[d
] ; procrec
!= NULL
; procrec
= procrec
->next
)
434 if ( procrec
->unpack_table_nbytes
> 0 )
438 procrecque
[tqp
] = procrec
;
439 pbuf
= buffer_for_proc( P
, procrec
->unpack_table_nbytes
, RSL_RECVBUF
) ;
440 mtype
= MTYPE_FROMTO( MSG_STENCOM
,
441 rsl_c_comp2phys_proc (procrec
->P
), rsl_myproc
) ;
442 typeque
[tqp
] = mtype
;
443 procrec
->nrecvs
++ ; /* diagnostic */
445 fprintf(stderr
,"debug posting async recv for %d bytes from %d\n", procrec
->unpack_table_nbytes
, rsl_c_comp2phys_proc (procrec
->P
) ) ;
447 RSL_RECVBEGIN ( pbuf
, procrec
->unpack_table_nbytes
, mtype
) ;
453 /* pack buffers and issue sends */
455 for ( procrec
= sten
->procs
[d
] ; procrec
!= NULL
; procrec
= procrec
->next
)
458 pbuf
=buffer_for_proc(procrec
->P
, procrec
->pack_table_nbytes
, RSL_SENDBUF
) ;
460 #pragma csd parallel for private(i, pr, base)
461 for ( i
= 0 ; i
< procrec
->pack_table_size
; i
++ )
466 pr
= &procrec
->pack_table
[i
];
468 if ( sten
->has_f90_fields
&& procrec
->pack_table_size
> 0 )
469 base
= (void *) get_base_for_index ( pr
->f90_table_index
) ;
473 fprintf(stderr
,"pack base %lu, f90_index %d, sten=%d\n",base
,pr
->f90_table_index
,s
) ;
475 #pragma no_cache_alloc bufin bufout
476 bufin
= (int *)(base
) + (pr
->offset
>> 2);
477 bufout
= (int *)(pbuf
) + (pr
->curs
>> 2);
478 inc
= pr
->stride
>> 2;
482 for (j
= 0; j
< nwrds
; j
++) {
486 for (k
= 0; k
< pr
->nelems
; k
++) {
487 bufout
[k
*nwrds
+j
] = bufin
[k
*inc
+j
];
495 for (j
= 0; j
< pr
->nelems
; j
++) {
498 for (k
= 0; k
< nwrds
; k
++) {
499 bufout
[iwd
++] = bufin
[iwd2
+k
];
506 curs
= procrec
->pack_table_nbytes
;
509 mdest
= rsl_c_comp2phys_proc (procrec
->P
) ;
510 mtype
= MTYPE_FROMTO( MSG_STENCOM
, rsl_myproc
, mdest
) ;
513 fprintf(stderr
,"debug sending %d bytes to %d, sten=%d\n", curs
, mdest
, s
) ;
515 RSL_SEND ( pbuf
, curs
, mtype
, mdest
) ;
519 /* wait on receives and unpack messages as they come in */
524 while( ndone
< nprocs
)
526 if (tqp
>= nprocs
) tqp
= 0 ;
527 if (typeque
[tqp
] != RSL_INVALID
)
529 mtype
= typeque
[tqp
] ;
530 if ( rsl_noprobe
== NULL
)
531 RSL_PROBE ( mtype
, &retval
) ;
532 /* else, retval will always be 1 */
537 /* on the Paragon, calling RSL_PROBE clears the message so this
538 would bomb on an unknown message id. Don't call unless the probe
539 is disabled (rsl_noprobe != NULL). */
540 if ( rsl_noprobe
!= NULL
) RSL_RECVEND ( mtype
) ;
542 RSL_RECVEND ( mtype
) ;
545 pbuf
= buffer_for_proc( Pque
[tqp
], 0, RSL_RECVBUF
) ;
546 procrec
= procrecque
[tqp
] ;
547 #pragma csd parallel for private(i, pr, base)
548 for ( i
= 0 ; i
< procrec
->unpack_table_size
; i
++ )
552 #pragma no_cache_alloc bufin bufout
554 pr
= &procrec
->unpack_table
[i
] ;
556 if ( sten
->has_f90_fields
&& procrec
->unpack_table_size
> 0 )
557 base
= (void *) get_base_for_index ( pr
->f90_table_index
) ;
561 fprintf(stderr
,"unpack base %lu, f90_index %d, sten=%d\n",base
,pr
->f90_table_index
,s
) ;
564 bufin
= (int *)(pbuf
) + (pr
->curs
>> 2);
565 bufout
= (int *)(base
) + (pr
->offset
>> 2);
566 inc
= pr
->stride
>> 2;
570 for (j
= 0; j
< nwrds
; j
++) {
574 for (k
= 0; k
< pr
->nelems
; k
++) {
575 bufout
[k
*inc
+j
] = bufin
[k
*nwrds
+j
];
583 for (j
= 0; j
< pr
->nelems
; j
++) {
586 for (k
= 0; k
< nwrds
; k
++) {
587 bufout
[iwd2
+k
] = bufin
[iwd
++];
595 fprintf(stderr
,"debug got message from %d and unpacked %d bytes; sten=%d\n", Pque
[tqp
], curs
, s
) ;
597 typeque
[tqp
] = RSL_INVALID
;
604 MPE_Log_event( 16, s
, "sten end" ) ;