added README_changes.txt
[wrffire.git] / wrfv2_fire / external / RSL / RSL / exch_sten.c
blobcd7be99641aeb3ba065025a25423e0812aab1a81
1 /***********************************************************************
3 COPYRIGHT
5 The following is a notice of limited availability of the code and
6 Government license and disclaimer which must be included in the
7 prologue of the code and in all source listings of the code.
9 Copyright notice
10 (c) 1977 University of Chicago
12 Permission is hereby granted to use, reproduce, prepare
13 derivative works, and to redistribute to others at no charge. If
14 you distribute a copy or copies of the Software, or you modify a
15 copy or copies of the Software or any portion of it, thus forming
16 a work based on the Software and make and/or distribute copies of
17 such work, you must meet the following conditions:
19 a) If you make a copy of the Software (modified or verbatim)
20 it must include the copyright notice and Government
21 license and disclaimer.
23 b) You must cause the modified Software to carry prominent
24 notices stating that you changed specified portions of
25 the Software.
27 This software was authored by:
29 Argonne National Laboratory
30 J. Michalakes: (630) 252-6646; email: michalak@mcs.anl.gov
31 Mathematics and Computer Science Division
32 Argonne National Laboratory, Argonne, IL 60439
34 ARGONNE NATIONAL LABORATORY (ANL), WITH FACILITIES IN THE STATES
35 OF ILLINOIS AND IDAHO, IS OWNED BY THE UNITED STATES GOVERNMENT,
36 AND OPERATED BY THE UNIVERSITY OF CHICAGO UNDER PROVISION OF A
37 CONTRACT WITH THE DEPARTMENT OF ENERGY.
39 GOVERNMENT LICENSE AND DISCLAIMER
41 This computer code material was prepared, in part, as an account
42 of work sponsored by an agency of the United States Government.
43 The Government is granted for itself and others acting on its
44 behalf a paid-up, nonexclusive, irrevocable worldwide license in
45 this data to reproduce, prepare derivative works, distribute
46 copies to the public, perform publicly and display publicly, and
47 to permit others to do so. NEITHER THE UNITED STATES GOVERNMENT
48 NOR ANY AGENCY THEREOF, NOR THE UNIVERSITY OF CHICAGO, NOR ANY OF
49 THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
50 ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
51 COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS,
52 PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD
53 NOT INFRINGE PRIVATELY OWNED RIGHTS.
55 ***************************************************************************/
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include "rsl.h"
61 /*@
62 RSL_EXCH_STENCIL - Exchange data on an RSL stencil
64 Notes:
65 This routine is used to exchange data within domain Arg1 using
66 an RSL stencil, Arg2.
67 When this routine returns, data
68 in the ghost areas around the local partition will have been
69 updated with data from cells on surrounding processors, as
70 described in the stencil. A stencil must have been described
71 in the context of a domain before it can be used on the domain
72 (RSL_DESCRIBE_STENCIL).
74 This routine generates interprocessor communication on message
75 passing architectures.
77 All processors must call RSL_EXCH_STENCIL at the same point in
78 the code.
80 See also:
81 RSL_CREATE_STENCIL, RSL_DESCRIBE_STENCIL
83 @*/
85 #ifndef crayx1
87 #ifdef NEC_TUNE
88 void copymem( void *src, /* Source address (byte) */
89 int src_inc, /* Stride between source per copy (bytes) */
90 void *dest, /* Destination address (byte) */
91 int dest_inc, /* Stride between destination per copy (bytes) */
92 int nbytes, /* Number of bytes to move per copy (bytes) */
93 int nelems ) /* Number of times to repeat copy */
95 /* Byte based pointers. */
96 char * src1b ;
97 char * dest1b ;
98 /* 4 byte based pointers. */
99 int * src4b ;
100 int * dest4b ;
101 /* 8 byte based pointers. */
102 long * src8b ;
103 long * dest8b ;
104 int outer ;
105 int inner ;
107 if ( (((long)src | (long)dest | src_inc | dest_inc | nbytes) & (sizeof(int) -1)) == 0 )
109 src4b = (int *)(src) ;
110 dest4b = (int *)(dest) ;
111 src_inc /= sizeof(int) ;
112 dest_inc /= sizeof(int) ;
114 for ( outer = 0 ; outer < nbytes/sizeof(int) ; outer++ )
116 #pragma cdir nodep
117 for ( inner = 0 ; inner < nelems ; inner++ )
119 dest4b[outer + inner*dest_inc] = src4b[outer + inner*src_inc] ;
123 else
125 src1b = (char *) (src) ;
126 dest1b = (char *) (dest) ;
127 for ( inner = 0 ; inner < nelems ; inner++ )
129 bcopy(src1b, dest1b, nbytes) ;
130 src1b += src_inc ;
131 dest1b += dest_inc ;
134 } /* copymem */
135 #endif
137 RSL_EXCH_STENCIL ( d_p, s_p )
138 int_p
139 d_p /* (I) Domain descriptor. */
140 ,s_p ; /* (I) Stencil descriptor. */
142 int d, s ;
143 stencil_desc_t *sten ;
144 message_desc_t *msg ;
145 rsl_procrec_t *procrec ;
146 rsl_ptrec_t *ptrec ;
147 rsl_list_t *lp, *lp1 ;
148 rsl_index_t ig, jg ;
149 rsl_point_hdr_t point_hdr ;
150 int i, ipt, sp, j ;
151 int curs ;
152 int nprocs, npts ;
153 int retval ;
154 int mtype, mdest ;
155 char * pbuf ;
156 int P ;
157 int Pque[RSL_MAXPROC] ;
158 rsl_procrec_t *procrecque[RSL_MAXPROC ] ;
159 int typeque[RSL_MAXPROC] ;
160 int tqp, ndone ;
161 void * base ;
162 packrec_t * pr ;
165 d = *d_p ; s = *s_p ;
167 RSL_TEST_ERR(d < 0 || d >= RSL_MAXDOMAINS,
168 "bad domain descriptor" ) ;
169 RSL_TEST_ERR( domain_info[d].valid != RSL_VALID,
170 "descriptor for invalid domain" ) ;
172 #ifdef UPSHOT
173 MPE_Log_event( 15, s, "sten begin" ) ;
174 #endif
175 #if 0
176 fprintf(stderr,"debug called RSL_EXCH_STENCIL %d\n",s ) ;
177 #endif
179 if ((sten = (stencil_desc_t *) sh_descriptors[ s ]) == NULL )
181 RSL_TEST_ERR(1,"invalid or unspecified stencil descriptor" ) ;
184 /* if stencil has not been compiled, compile it now! */
185 if ( sten->compiled[d] == 0 )
187 rsl_compile_stencil( d_p, s_p ) ;
190 /* post receives */
191 /* iterate over procrecs for domain and post buffers */
193 tqp = 0 ;
194 for ( procrec = sten->procs[d] ; procrec != NULL ; procrec = procrec->next )
196 if ( procrec->unpack_table_nbytes > 0 )
198 P = procrec->P ;
199 Pque[tqp] = P ;
200 procrecque[tqp] = procrec ;
201 pbuf = buffer_for_proc( P, procrec->unpack_table_nbytes, RSL_RECVBUF ) ;
202 mtype = MTYPE_FROMTO( MSG_STENCOM,
203 rsl_c_comp2phys_proc (procrec->P),
204 rsl_myproc ) ;
205 typeque[tqp] = mtype ;
206 procrec->nrecvs++ ; /* diagnostic */
207 #if 0
208 fprintf(stderr,"debug posting async recv for %d bytes from %d\n", procrec->unpack_table_nbytes, rsl_c_comp2phys_proc (procrec->P) ) ;
209 #endif
210 RSL_RECVBEGIN ( pbuf, procrec->unpack_table_nbytes, mtype ) ;
211 tqp++ ;
214 nprocs = tqp ;
216 /* pack buffers and issue sends */
218 for ( procrec = sten->procs[d] ; procrec != NULL ; procrec = procrec->next )
220 pbuf=buffer_for_proc(procrec->P, procrec->pack_table_nbytes, RSL_SENDBUF) ;
221 pr = procrec->pack_table ;
222 for ( curs = 0, i = 0 ; i < procrec->pack_table_size ; i++, pr++ )
224 if ( sten->has_f90_fields && procrec->pack_table_size > 0 )
225 base = (void *) get_base_for_index ( pr->f90_table_index ) ;
226 else
227 base = pr->base ;
228 #if 0
229 fprintf(stderr,"pack base %lu, f90_index %d, sten=%d\n",base,pr->f90_table_index,s) ;
230 #endif
231 #ifndef NEC_TUNE
232 for ( j = 0 ; j < pr->nelems ; j++ )
235 #if 0
236 if ( rsl_debug_flg ) {
237 fprintf(stderr,"pck %08x, buf %08x, curs %5d, n %5d, off %5d, j %5d, s %5d\n",
238 (char *)(base) + pr->offset + j * pr->stride,
239 &(pbuf[curs]), curs, pr->n,
240 pr->offset, j, pr->stride ) ;
242 #endif
244 bcopy((char *)(base) + pr->offset + j * pr->stride,
245 &(pbuf[curs]),pr->n) ;
246 curs += pr->n ;
248 #else
249 copymem((char *)(base) + pr->offset, pr->stride, &(pbuf[curs]), pr->n, pr->n, pr->nelems) ;
250 curs += pr->n*pr->nelems ;
251 #endif
253 if ( curs > 0 )
255 mdest = rsl_c_comp2phys_proc (procrec->P) ;
256 mtype = MTYPE_FROMTO( MSG_STENCOM, rsl_myproc, mdest ) ;
257 procrec->nsends++ ;
258 if ( curs > procrec->pack_table_nbytes )
260 sprintf(mess,"pack buffer overflow %d > %d\n",curs,procrec->pack_table_nbytes) ;
261 RSL_TEST_ERR(1,mess) ;
263 #if 0
264 fprintf(stderr,"debug sending %d bytes to %d, sten=%d\n", curs, mdest, s ) ;
265 #endif
266 RSL_SEND ( pbuf, curs, mtype, mdest ) ;
268 else if ( curs == 0 && procrec->pack_table_nbytes != 0 )
270 RSL_TEST_ERR(1,"internal error") ;
274 /* wait on receives and unpack messages as they come in */
275 ndone = 0 ;
276 tqp = 0 ;
277 retval = 1 ;
279 while( ndone < nprocs )
281 if (tqp >= nprocs ) tqp = 0 ;
282 if (typeque[tqp] != RSL_INVALID)
284 mtype = typeque[tqp] ;
285 if ( rsl_noprobe == NULL )
286 RSL_PROBE ( mtype, &retval ) ;
287 /* else, retval will always be 1 */
289 if ( retval )
291 #ifdef PGON
292 /* on the Paragon, calling RSL_PROBE clears the message so this
293 would bomb on an unknown message id. Don't call unless the probe
294 is disabled (rsl_noprobe != NULL). */
295 if ( rsl_noprobe != NULL ) RSL_RECVEND ( mtype ) ;
296 #else
297 RSL_RECVEND ( mtype ) ;
298 #endif
300 curs = 0 ;
301 pbuf = buffer_for_proc( Pque[tqp], 0, RSL_RECVBUF ) ;
302 procrec = procrecque[tqp] ;
303 pr = procrec->unpack_table ;
304 for ( curs = 0, i = 0 ; i < procrec->unpack_table_size ; i++, pr++ )
306 if ( sten->has_f90_fields && procrec->unpack_table_size > 0 )
307 base = (void *) get_base_for_index ( pr->f90_table_index ) ;
308 else
309 base = pr->base ;
310 #if 0
311 fprintf(stderr,"unpack base %lu, f90_index %d, sten=%d\n",base,pr->f90_table_index,s) ;
312 #endif
313 #ifndef NEC_TUNE
314 for ( j = 0 ; j < pr->nelems ; j++ )
316 bcopy(&(pbuf[curs]),
317 (char *)(base) + pr->offset + j * pr->stride, pr->n) ;
318 curs += pr->n ;
320 #else
321 copymem(&(pbuf[curs]), pr->n, (char *)(base) + pr->offset, pr->stride, pr->n, pr->nelems) ;
322 curs += pr->n*pr->nelems ;
323 #endif
325 if ( curs == 0 )
327 RSL_TEST_ERR(1,"internal error") ;
329 if ( curs > procrec->unpack_table_nbytes )
331 sprintf(mess,"unpack buffer overflow %d > %d\n",curs,procrec->pack_table_nbytes) ;
332 RSL_TEST_ERR(1,mess) ;
334 #if 0
335 fprintf(stderr,"debug got message from %d and unpacked %d bytes; sten=%d\n", Pque[tqp], curs, s ) ;
336 #endif
337 typeque[tqp] = RSL_INVALID ;
338 ndone++ ;
341 tqp++ ;
343 #ifdef UPSHOT
344 MPE_Log_event( 16, s, "sten end" ) ;
345 #endif
350 #else
353 RSL_EXCH_STENCIL ( d_p, s_p )
354 int_p
355 d_p /* (I) Domain descriptor. */
356 ,s_p ; /* (I) Stencil descriptor. */
358 int d, s ;
359 stencil_desc_t *sten ;
360 rsl_procrec_t *procrec ;
361 int i,j ;
362 int curs ;
363 int nprocs ;
364 int retval ;
365 int mtype, mdest ;
366 char * pbuf ;
367 int P ;
368 int Pque[RSL_MAXPROC] ;
369 rsl_procrec_t *procrecque[RSL_MAXPROC ] ;
370 int typeque[RSL_MAXPROC] ;
371 int tqp, ndone ;
372 void * base ;
374 packrec_t * pr ;
376 d = *d_p ; s = *s_p ;
378 RSL_TEST_ERR(d < 0 || d >= RSL_MAXDOMAINS,
379 "bad domain descriptor" ) ;
380 RSL_TEST_ERR( domain_info[d].valid != RSL_VALID,
381 "descriptor for invalid domain" ) ;
383 #ifdef UPSHOT
384 MPE_Log_event( 15, s, "sten begin" ) ;
385 #endif
386 #if 0
387 fprintf(stderr,"debug called RSL_EXCH_STENCIL %d\n",s ) ;
388 #endif
390 if ((sten = (stencil_desc_t *) sh_descriptors[ s ]) == NULL )
392 RSL_TEST_ERR(1,"invalid or unspecified stencil descriptor" ) ;
395 /* if stencil has not been compiled, compile it now! */
396 if ( sten->compiled[d] == 0 )
398 rsl_compile_stencil( d_p, s_p ) ;
400 /* fill in curs value for pack and unpack buffers */
401 for ( procrec = sten->procs[d] ; procrec != NULL ; procrec = procrec->next )
403 /* determine offset into pack buffer for each element */
404 pr = procrec->pack_table ;
405 if ( procrec->pack_table_nbytes > 0 )
407 for ( curs = 0, i = 0 ; i < procrec->pack_table_size ; i++, pr++ )
409 pr->curs = curs;
410 curs += pr->nelems * pr->n;
412 // fprintf(stderr, "pack %d %d\n", curs, procrec->pack_table_nbytes);
414 /* determine offset into unpack buffer for each element */
415 if ( procrec->unpack_table_nbytes > 0 )
417 pr = procrec->unpack_table ;
418 for ( curs = 0, i = 0 ; i < procrec->unpack_table_size ; i++, pr++ )
420 pr->curs = curs;
421 curs += pr->nelems * pr->n;
423 // fprintf(stderr, "unpack %d %d\n", curs, procrec->unpack_table_nbytes);
428 /* post receives */
429 /* iterate over procrecs for domain and post buffers */
431 tqp = 0 ;
432 for ( procrec = sten->procs[d] ; procrec != NULL ; procrec = procrec->next )
434 if ( procrec->unpack_table_nbytes > 0 )
436 P = procrec->P ;
437 Pque[tqp] = P ;
438 procrecque[tqp] = procrec ;
439 pbuf = buffer_for_proc( P, procrec->unpack_table_nbytes, RSL_RECVBUF ) ;
440 mtype = MTYPE_FROMTO( MSG_STENCOM,
441 rsl_c_comp2phys_proc (procrec->P), rsl_myproc ) ;
442 typeque[tqp] = mtype ;
443 procrec->nrecvs++ ; /* diagnostic */
444 #if 0
445 fprintf(stderr,"debug posting async recv for %d bytes from %d\n", procrec->unpack_table_nbytes, rsl_c_comp2phys_proc (procrec->P) ) ;
446 #endif
447 RSL_RECVBEGIN ( pbuf, procrec->unpack_table_nbytes, mtype ) ;
448 tqp++ ;
451 nprocs = tqp ;
453 /* pack buffers and issue sends */
455 for ( procrec = sten->procs[d] ; procrec != NULL ; procrec = procrec->next )
458 pbuf=buffer_for_proc(procrec->P, procrec->pack_table_nbytes, RSL_SENDBUF) ;
460 #pragma csd parallel for private(i, pr, base)
461 for ( i = 0 ; i < procrec->pack_table_size ; i++ )
463 int inc, nwrds;
464 int *bufin, *bufout;
466 pr = &procrec->pack_table[i];
468 if ( sten->has_f90_fields && procrec->pack_table_size > 0 )
469 base = (void *) get_base_for_index ( pr->f90_table_index ) ;
470 else
471 base = pr->base ;
472 #if 0
473 fprintf(stderr,"pack base %lu, f90_index %d, sten=%d\n",base,pr->f90_table_index,s) ;
474 #endif
475 #pragma no_cache_alloc bufin bufout
476 bufin = (int *)(base) + (pr->offset >> 2);
477 bufout = (int *)(pbuf) + (pr->curs >> 2);
478 inc = pr->stride >> 2;
479 nwrds = pr->n >> 2;
480 if (nwrds < 64) {
481 int j, k;
482 for (j = 0; j < nwrds; j++) {
483 #pragma _CRI ivdep
484 #pragma prefervector
485 #pragma cdir nodep
486 for (k = 0; k < pr->nelems; k++) {
487 bufout[k*nwrds+j] = bufin[k*inc+j];
491 else {
492 int j, k;
493 int iwd = 0;
494 int iwd2 = 0;
495 for (j = 0; j < pr->nelems; j++) {
496 #pragma _CRI ivdep
497 #pragma cdir nodep
498 for (k = 0; k < nwrds; k++) {
499 bufout[iwd++] = bufin[iwd2+k];
501 iwd2 += inc;
506 curs = procrec->pack_table_nbytes;
507 if ( curs > 0 )
509 mdest = rsl_c_comp2phys_proc (procrec->P) ;
510 mtype = MTYPE_FROMTO( MSG_STENCOM, rsl_myproc, mdest ) ;
511 procrec->nsends++ ;
512 #if 0
513 fprintf(stderr,"debug sending %d bytes to %d, sten=%d\n", curs, mdest, s ) ;
514 #endif
515 RSL_SEND ( pbuf, curs, mtype, mdest ) ;
519 /* wait on receives and unpack messages as they come in */
520 ndone = 0 ;
521 tqp = 0 ;
522 retval = 1 ;
524 while( ndone < nprocs )
526 if (tqp >= nprocs ) tqp = 0 ;
527 if (typeque[tqp] != RSL_INVALID)
529 mtype = typeque[tqp] ;
530 if ( rsl_noprobe == NULL )
531 RSL_PROBE ( mtype, &retval ) ;
532 /* else, retval will always be 1 */
534 if ( retval )
536 #ifdef PGON
537 /* on the Paragon, calling RSL_PROBE clears the message so this
538 would bomb on an unknown message id. Don't call unless the probe
539 is disabled (rsl_noprobe != NULL). */
540 if ( rsl_noprobe != NULL ) RSL_RECVEND ( mtype ) ;
541 #else
542 RSL_RECVEND ( mtype ) ;
543 #endif
545 pbuf = buffer_for_proc( Pque[tqp], 0, RSL_RECVBUF ) ;
546 procrec = procrecque[tqp] ;
547 #pragma csd parallel for private(i, pr, base)
548 for ( i = 0 ; i < procrec->unpack_table_size ; i++ )
550 int inc, nwrds;
551 int *bufin, *bufout;
552 #pragma no_cache_alloc bufin bufout
554 pr = &procrec->unpack_table[i] ;
556 if ( sten->has_f90_fields && procrec->unpack_table_size > 0 )
557 base = (void *) get_base_for_index ( pr->f90_table_index ) ;
558 else
559 base = pr->base ;
560 #if 0
561 fprintf(stderr,"unpack base %lu, f90_index %d, sten=%d\n",base,pr->f90_table_index,s) ;
562 #endif
564 bufin = (int *)(pbuf) + (pr->curs >> 2);
565 bufout = (int *)(base) + (pr->offset >> 2);
566 inc = pr->stride >> 2;
567 nwrds = pr->n >> 2;
568 if (nwrds < 64) {
569 int j, k;
570 for (j = 0; j < nwrds; j++) {
571 #pragma _CRI ivdep
572 #pragma prefervector
573 #pragma cdir nodep
574 for (k = 0; k < pr->nelems; k++) {
575 bufout[k*inc+j] = bufin[k*nwrds+j];
579 else {
580 int j, k;
581 int iwd = 0;
582 int iwd2 = 0;
583 for (j = 0; j < pr->nelems; j++) {
584 #pragma _CRI ivdep
585 #pragma cdir nodep
586 for (k = 0; k < nwrds; k++) {
587 bufout[iwd2+k] = bufin[iwd++];
589 iwd2 += inc;
594 #if 0
595 fprintf(stderr,"debug got message from %d and unpacked %d bytes; sten=%d\n", Pque[tqp], curs, s ) ;
596 #endif
597 typeque[tqp] = RSL_INVALID ;
598 ndone++ ;
601 tqp++ ;
603 #ifdef UPSHOT
604 MPE_Log_event( 16, s, "sten end" ) ;
605 #endif
609 #endif