Original WRF subgrid support version from John Michalakes without fire
[wrffire.git] / wrfv2_fire / external / RSL / RSL / comp_xpose.c
blob781f08a1d4e6c3100a54f2e6fe50d5d0aea03424
1 /***********************************************************************
3 COPYRIGHT
5 The following is a notice of limited availability of the code and
6 Government license and disclaimer which must be included in the
7 prologue of the code and in all source listings of the code.
9 Copyright notice
10 (c) 1977 University of Chicago
12 Permission is hereby granted to use, reproduce, prepare
13 derivative works, and to redistribute to others at no charge. If
14 you distribute a copy or copies of the Software, or you modify a
15 copy or copies of the Software or any portion of it, thus forming
16 a work based on the Software and make and/or distribute copies of
17 such work, you must meet the following conditions:
19 a) If you make a copy of the Software (modified or verbatim)
20 it must include the copyright notice and Government
21 license and disclaimer.
23 b) You must cause the modified Software to carry prominent
24 notices stating that you changed specified portions of
25 the Software.
27 This software was authored by:
29 Argonne National Laboratory
30 J. Michalakes: (630) 252-6646; email: michalak@mcs.anl.gov
31 Mathematics and Computer Science Division
32 Argonne National Laboratory, Argonne, IL 60439
34 ARGONNE NATIONAL LABORATORY (ANL), WITH FACILITIES IN THE STATES
35 OF ILLINOIS AND IDAHO, IS OWNED BY THE UNITED STATES GOVERNMENT,
36 AND OPERATED BY THE UNIVERSITY OF CHICAGO UNDER PROVISION OF A
37 CONTRACT WITH THE DEPARTMENT OF ENERGY.
39 GOVERNMENT LICENSE AND DISCLAIMER
41 This computer code material was prepared, in part, as an account
42 of work sponsored by an agency of the United States Government.
43 The Government is granted for itself and others acting on its
44 behalf a paid-up, nonexclusive, irrevocable worldwide license in
45 this data to reproduce, prepare derivative works, distribute
46 copies to the public, perform publicly and display publicly, and
47 to permit others to do so. NEITHER THE UNITED STATES GOVERNMENT
48 NOR ANY AGENCY THEREOF, NOR THE UNIVERSITY OF CHICAGO, NOR ANY OF
49 THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
50 ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
51 COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS,
52 PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD
53 NOT INFRINGE PRIVATELY OWNED RIGHTS.
55 ***************************************************************************/
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include "rsl.h"
61 /* The mechanism here is similar to and patterned after that used by the
62 stencil mechanism: see comp_sten.c */
65 /* this is used internally only -- this will be called automatically
66 whenever a xpose is attempted that has not
67 yet been compiled */
68 rsl_compile_xpose( d_p, x_p )
69 int_p d_p, x_p ;
71 int d, x ;
72 xpose_desc_t * xp ;
73 int i, j, ig, jg, kg, k, js, je, is, ie, ks, ke ;
74 int len_plist ;
75 rsl_domain_info_t *dp ;
76 rsl_point_t *pt ;
77 rsl_dimlen_t mlen, nlen, zlen ;
78 rsl_fldspec_t * fld ;
79 message_desc_t *msg_from, *msg_to ;
80 rsl_procrec_t *procrec ;
81 int m, n, dir ;
82 rsl_processor_t P, Plist[RSL_MAXPROC], sendP, recvP, prevP ;
83 int elemsz, t0, t1 ;
84 int ipack ;
85 void *base ;
87 d = *d_p ;
88 x = *x_p ;
90 RSL_TEST_ERR(d < 0 || d >= RSL_MAXDOMAINS,
91 "rsl_compile_xpose: bad domain descriptor" ) ;
92 RSL_TEST_ERR( domain_info[d].valid != RSL_VALID,
93 "rsl_compile_xpose: descriptor for invalid domain" ) ;
95 mlen = domain_info[d].len_m ;
96 nlen = domain_info[d].len_n ;
97 zlen = domain_info[d].len_z ;
99 RSL_TEST_ERR( x <= 0 || x > RSL_MAXDESCRIPTORS,
100 "rsl_compile_xpose: bad xpose descriptor" ) ;
101 RSL_TEST_ERR((xp = (xpose_desc_t *)xp_descriptors[x]) == NULL,
102 "rsl_compile_xpose: null xpose descriptor" ) ;
103 RSL_TEST_ERR( xp->tag != XPOSE_DESC,
104 "rsl_compile_xpose: bad xpose descriptor" ) ;
105 RSL_TEST_ERR( xp->compiled[d] != 0,
106 "rsl_compile_xpose: xpose has already been compiled for this domain") ;
108 xp->compiled[d] = 1 ;
110 dp = &(domain_info[d]) ;
111 if ( dp->decomposed != 1 )
113 default_decomposition( d_p,
114 &(domain_info[*d_p].loc_m),
115 &(domain_info[*d_p].loc_n) ) ;
118 /************* MN to MZ *************/
120 msg_from = xp->msgs_mn[d] ;
121 msg_to = xp->msgs_mz[d] ;
124 /* first pass builds the procrec list, second pass traverses it */
125 /* this is necessary because the process_refs mechanism can only */
126 /* build one set of pack or unpack lists at a time */
127 /* ipack = 0 packing on sender ; ipack = 1, unpacking on receiver */
129 xp->procs[d][XPOSE_MN_MZ] = NULL ;
130 ipack = 0 ;
131 for ( P = 0 ; P < rsl_nproc_all ; P++ )
133 procrec = RSL_MALLOC( rsl_procrec_t, 1 ) ;
134 procrec->P = P ;
136 init_process_refs() ;
138 /***/ for ( k = 0 ; k < zlen ; k++ )
140 /***/ for ( ig = 0 ; ig < mlen ; ig++ )
142 recvP = domain_info[*d_p].domain_mz[INDEX_2(k,ig,mlen)].P ;
143 if ( ipack == 0 || rsl_c_comp2phys_proc(recvP) != rsl_myproc )
145 js = 0 - domain_info[d].jlocaloffset ; je = -1 ;
146 prevP = domain_info[*d_p].domain[INDEX_2(0,ig,mlen)].P ;
147 #if 0
148 fprintf(stderr,"set js to %d\n",js) ;
149 fprintf(stderr,"set prevP to %d\n",prevP) ;
150 #endif
151 /***/ for ( jg = 0 ; jg < nlen ; jg++ )
153 sendP = domain_info[*d_p].domain[INDEX_2(jg,ig,mlen)].P ;
154 if ( jg == nlen-1 )
155 { sendP = -1 ; je++ ;}
156 i = ig - domain_info[d].ilocaloffset ;
157 j = jg - domain_info[d].jlocaloffset ;
158 #if 0
159 if ( k==0) fprintf(stderr,"P %d sendP %d prevP %d js %d je %d\n",P,sendP,prevP, js,je) ;
160 #endif
161 if ((ipack == 0) && (sendP != prevP ))
163 if ( rsl_c_comp2phys_proc(prevP) == rsl_myproc && recvP == P )
165 if ( jg > 0 )
167 /* store the pencil (ig,k,js:je) as being sent
168 from sendP and received by recvP */
170 if ( ipack == 0 ) { fld = msg_from->fldspecs ; }
171 else { fld = msg_to->fldspecs ; }
173 for ( ; fld != NULL ; fld = fld->next )
175 if ( fld->type >= 100 ) xp->has_f90_fields = 1 ;
176 base = fld->base ;
177 elemsz = fld->elemsz ;
178 switch (fld->strategy)
180 case MINNS_MAJEW_K_3D : /* <MM> eg: ua(i,j,k) */
181 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
182 #if 0
183 if (1)fprintf(stderr,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d js %2d je %2d je-js+1 %3d\n",P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,js,je,je-js+1) ;
184 #endif
185 store_process_refs( base, fld->f90_table_index, (i+js*t0+k*t1)*elemsz, elemsz,
186 je-js+1 ,
187 -t0*elemsz) ;
188 break ;
189 case MINEW_MAJNS_K_3D : /* <MM> eg: u(j,i,k) */
190 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
191 store_process_refs( base, fld->f90_table_index, (js+i*t0+k*t1)*elemsz, (je-js+1)*elemsz,
193 -elemsz) ;
194 break ;
195 case K_MIDNS_MAJEW_3D : /* <MM> eg: u(k,i,j) */
196 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
197 store_process_refs( base, fld->f90_table_index, (k+i*t0+js*t1)*elemsz, elemsz,
198 je-js+1,
199 -t1*elemsz) ;
200 break ;
201 case MINNS_K_MAJEW_3D : /* <MM> eg: u(i,k,j) */
202 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
203 #if 0
204 if (1)fprintf(stderr,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d js %2d je %2d je-js+1 %3d\n",P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,js,je,je-js+1) ;
205 #endif
206 store_process_refs( base, fld->f90_table_index, (i+k*t0+js*t1)*elemsz, elemsz,
207 je-js+1,
208 -t1*elemsz) ; /* don't need to suppress packing optimization on MN grid because of pads */
209 break ;
210 default:
211 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
212 break ;
217 #if 0
218 fprintf(stderr,"resetting js to %d\n",j) ;
219 #endif
220 js = j ;
222 je = j ;
223 #if 0
224 fprintf(stderr,"resetting je to %d\n",je) ;
225 #endif
226 prevP = sendP ;
227 #if 0
228 fprintf(stderr,"resset prevP to %d\n",prevP) ;
229 #endif
234 process_refs( &(procrec->pack_table),
235 &(procrec->pack_table_size),
236 &(procrec->pack_table_nbytes), 1 ) ;
238 #if 0
239 fprintf(stderr,"pack P = %3d:\n",procrec->P ) ;
240 show_pack_table( procrec->pack_table,
241 procrec->pack_table_size,
242 procrec->pack_table_nbytes ) ;
243 #endif
245 procrec->next = xp->procs[d][XPOSE_MN_MZ] ;
246 xp->procs[d][XPOSE_MN_MZ] = procrec ;
249 /* unpacking loop */
251 ipack = 1 ;
252 for ( procrec = xp->procs[d][XPOSE_MN_MZ] ; procrec != NULL ; procrec = procrec->next )
254 P = procrec->P ;
255 init_process_refs() ;
257 /***/ for ( kg = 0 ; kg < zlen ; kg++ )
259 /***/ for ( ig = 0 ; ig < mlen ; ig++ )
261 recvP = domain_info[*d_p].domain_mz[INDEX_2(kg,ig,mlen)].P ;
263 js = 0 - domain_info[d].jlocaloffset_mz ; je = -1 ;
265 prevP = domain_info[*d_p].domain[INDEX_2(0,ig,mlen)].P ;
266 /***/ for ( jg = 0 ; jg < nlen ; jg++ )
268 sendP = domain_info[*d_p].domain[INDEX_2(jg,ig,mlen)].P ;
269 if ( jg == nlen-1 )
270 { sendP = -1 ; je++ ;}
272 i = ig - domain_info[d].ilocaloffset_mz ;
273 j = jg - domain_info[d].jlocaloffset_mz ;
274 k = kg - domain_info[d].klocaloffset_mz ;
276 if (sendP != prevP )
278 #if 0
279 fprintf(stderr,"sendP (%2d) != prevP (%d) =? P (%d) || recvP (%d) =? rsl_myproc (%d)\n",
280 sendP,prevP,P,recvP,rsl_myproc) ;
281 #endif
282 if ( rsl_c_comp2phys_proc(prevP) == P && recvP == rsl_myproc )
284 if ( jg > 0 )
286 /* store the pencil (ig,k,js:je) as being sent
287 from sendP and received by recvP */
289 if ( ipack == 0 ) { fld = msg_from->fldspecs ; }
290 else { fld = msg_to->fldspecs ; }
292 for ( ; fld != NULL ; fld = fld->next )
294 if ( fld->type >= 100 ) xp->has_f90_fields = 1 ;
295 base = fld->base ;
296 elemsz = fld->elemsz ;
297 switch (fld->strategy)
299 case MINNS_MAJEW_K_3D : /* <MM> eg: ua(i,j,k) */
300 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
301 #if 0
302 if (1)fprintf(stderr,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d js %2d je %2d je-js+1 %3d\n",
303 P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,js,je,je-js+1) ;
304 #endif
305 store_process_refs( base, fld->f90_table_index, (i+js*t0+k*t1)*elemsz, elemsz,
306 je-js+1 ,
307 -t0*elemsz) ;
308 break ;
309 case MINEW_MAJNS_K_3D : /* <MM> eg: u(j,i,k) */
310 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
311 store_process_refs( base, fld->f90_table_index, (js+i*t0+k*t1)*elemsz, (je-js+1)*elemsz,
313 -elemsz) ;
314 break ;
315 case K_MIDNS_MAJEW_3D : /* <MM> eg: u(k,i,j) */
316 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
317 store_process_refs( base, fld->f90_table_index, (k+i*t0+js*t1)*elemsz, elemsz,
318 je-js+1,
319 -t1*elemsz) ;
320 break ;
321 case MINNS_K_MAJEW_3D : /* <MM> eg: u(i,k,j) */
322 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
323 #if 0
324 if (1)fprintf(stderr,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d js %2d je %2d je-js+1 %3d ofst %3d\n",
325 P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,js,je,je-js+1, (i+k*t0+js*t1)*elemsz) ;
326 #endif
327 store_process_refs( base, fld->f90_table_index, (i+k*t0+js*t1)*elemsz, elemsz,
328 je-js+1,
329 -t1*elemsz) ; /* negative stride suppresses some unpacking collapses */
330 break ;
331 default:
332 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
333 break ;
338 #if 0
339 fprintf(stderr,"resetting js to %d\n",j) ;
340 #endif
341 js = j ;
343 je = j ;
344 #if 0
345 fprintf(stderr,"resetting je to %d\n",je) ;
346 #endif
347 prevP = sendP ;
348 #if 0
349 fprintf(stderr,"resset prevP to %d\n",prevP) ;
350 #endif
354 process_refs( &(procrec->unpack_table),
355 &(procrec->unpack_table_size),
356 &(procrec->unpack_table_nbytes), 1 ) ;
357 #if 0
358 fprintf(stderr,"unpack P = %3d:\n",procrec->P ) ;
359 show_pack_table( procrec->unpack_table,
360 procrec->unpack_table_size,
361 procrec->unpack_table_nbytes ) ;
362 #endif
366 /************* MZ to NZ *************/
368 msg_from = xp->msgs_mz[d] ;
369 msg_to = xp->msgs_nz[d] ;
372 /* first pass builds the procrec list, second pass traverses it */
373 /* this is necessary because the process_refs mechanism can only */
374 /* build one set of pack or unpack lists at a time */
375 /* ipack = 0 packing on sender ; ipack = 1, unpacking on receiver */
377 ipack = 0 ;
378 xp->procs[d][XPOSE_MZ_NZ] = NULL ;
379 ipack = 0 ;
380 for ( P = 0 ; P < rsl_nproc_all ; P++ )
382 procrec = RSL_MALLOC( rsl_procrec_t, 1 ) ;
383 procrec->P = P ;
385 init_process_refs() ;
387 /***/ for ( jg = 0 ; jg < nlen ; jg++ )
389 /***/ for ( kg = 0 ; kg < zlen ; kg++ )
391 recvP = domain_info[*d_p].domain_nz[INDEX_2(kg,jg,nlen)].P ;
392 if ( ipack == 0 || rsl_c_comp2phys_proc(recvP) != rsl_myproc )
394 is = 0 - domain_info[d].ilocaloffset_mz ; ie = -1 ;
395 prevP = domain_info[*d_p].domain_mz[INDEX_2(kg,0,mlen)].P ;
396 #if 0
397 fprintf(stderr,"set is to %d\n",is) ;
398 fprintf(stderr,"set prevP to %d\n",prevP) ;
399 #endif
400 /***/ for ( ig = 0 ; ig < mlen ; ig++ )
402 sendP = domain_info[*d_p].domain_mz[INDEX_2(kg,ig,mlen)].P ;
403 if ( ig == mlen-1 )
404 { sendP = -1 ; ie++ ;}
405 i = ig - domain_info[d].ilocaloffset_mz ;
406 j = jg - domain_info[d].jlocaloffset_mz ;
407 k = kg - domain_info[d].klocaloffset_mz ;
408 #if 0
409 if ( k==0) fprintf(stderr,"P %d sendP %d prevP %d js %d je %d\n",P,sendP,prevP, is,ie) ;
410 #endif
411 if (sendP != prevP )
413 if ( rsl_c_comp2phys_proc(prevP) == rsl_myproc && recvP == P )
415 if ( ig > 0 )
417 /* store the pencil (is:ie,k,jg) as being sent
418 from sendP and received by recvP */
420 fld = msg_from->fldspecs ;
422 for ( ; fld != NULL ; fld = fld->next )
424 if ( fld->type >= 100 ) xp->has_f90_fields = 1 ;
425 base = fld->base ;
426 elemsz = fld->elemsz ;
427 switch (fld->strategy)
429 case MINNS_MAJEW_K_3D : /* <MM> eg: ua(i,j,k) */
430 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
431 #if 0
432 if (1)fprintf(stderr,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d is %2d ie %2d ie-is+1 %3d\n",P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,is,ie,ie-is+1) ;
433 #endif
434 store_process_refs( base, fld->f90_table_index, (is+j*t0+k*t1)*elemsz, (ie-is+1)*elemsz,
436 -elemsz) ;
437 break ;
438 case MINEW_MAJNS_K_3D : /* <MM> eg: u(j,i,k) */
439 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
440 store_process_refs( base, fld->f90_table_index, (j+is*t0+k*t1)*elemsz, elemsz,
441 (ie-is+1) ,
442 -t0*elemsz) ;
443 break ;
444 case K_MIDNS_MAJEW_3D : /* <MM> eg: u(k,i,j) */
445 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
446 store_process_refs( base, fld->f90_table_index, (k+is*t0+j*t1)*elemsz, elemsz,
447 ie-is+1,
448 -t0*elemsz) ;
449 break ;
450 case MINNS_K_MAJEW_3D : /* <MM> eg: u(i,k,j) */
451 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
452 #if 0
453 if (1)fprintf(stderr,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d is %2d ie %2d ie-is+1 %3d\n",P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,is,ie,ie-is+1) ;
454 #endif
455 store_process_refs( base, fld->f90_table_index, (is+k*t0+j*t1)*elemsz, (ie-is+1)*elemsz,
457 -elemsz) ; /* negative stride suppresses some packing optimzation in process_refs */
458 break ;
459 default:
460 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
461 break ;
466 #if 0
467 fprintf(stderr,"resetting is to %d\n",j) ;
468 #endif
469 is = i ;
471 ie = i ;
472 #if 0
473 fprintf(stderr,"resetting ie to %d\n",ie) ;
474 #endif
475 prevP = sendP ;
476 #if 0
477 fprintf(stderr,"resset prevP to %d\n",prevP) ;
478 #endif
483 process_refs( &(procrec->pack_table),
484 &(procrec->pack_table_size),
485 &(procrec->pack_table_nbytes), 1 ) ;
487 #if 0
488 fprintf(stderr,"pack P = %3d:\n",procrec->P ) ;
489 show_pack_table( procrec->pack_table,
490 procrec->pack_table_size,
491 procrec->pack_table_nbytes ) ;
492 #endif
494 procrec->next = xp->procs[d][XPOSE_MZ_NZ] ;
495 xp->procs[d][XPOSE_MZ_NZ] = procrec ;
498 /* unpacking loop */
500 ipack = 1 ;
501 for ( procrec = xp->procs[d][XPOSE_MZ_NZ] ; procrec != NULL ; procrec = procrec->next )
503 P = procrec->P ;
504 init_process_refs() ;
506 /***/ for ( jg = 0 ; jg < nlen ; jg++ )
508 /***/ for ( kg = 0 ; kg < zlen ; kg++ )
510 recvP = domain_info[*d_p].domain_nz[INDEX_2(kg,jg,nlen)].P ;
511 is = 0 - domain_info[d].ilocaloffset_nz ; ie = -1 ;
512 prevP = domain_info[*d_p].domain_mz[INDEX_2(kg,0,mlen)].P ;
513 /***/ for ( ig = 0 ; ig < mlen ; ig++ )
515 sendP = domain_info[*d_p].domain_mz[INDEX_2(kg,ig,mlen)].P ;
516 if ( ig == mlen-1 )
517 { sendP = -1 ; ie++ ;}
519 i = ig - domain_info[d].ilocaloffset_nz ;
520 j = jg - domain_info[d].jlocaloffset_nz ;
521 k = kg - domain_info[d].klocaloffset_nz ;
523 if (sendP != prevP )
525 #if 0
526 fprintf(stderr,"sendP (%2d) != prevP (%d) =? P (%d) || recvP (%d) =? rsl_myproc (%d)\n",
527 sendP,prevP,P,recvP,rsl_myproc) ;
528 #endif
529 if ( rsl_c_comp2phys_proc(prevP) == P && recvP == rsl_myproc )
531 if ( ig > 0 )
533 /* store the pencil (is:ie,k,jg) as being sent
534 from sendP and received by recvP */
536 fld = msg_to->fldspecs ;
538 for ( ; fld != NULL ; fld = fld->next )
540 if ( fld->type >= 100 ) xp->has_f90_fields = 1 ;
541 base = fld->base ;
542 elemsz = fld->elemsz ;
543 switch (fld->strategy)
545 case MINNS_MAJEW_K_3D : /* <MM> eg: ua(i,j,k) */
546 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
547 #if 0
548 if (1)fprintf(stderr,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d is %2d ie %2d ie-is+1 %3d\n",
549 P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,is,ie,ie-is+1) ;
550 #endif
551 store_process_refs( base, fld->f90_table_index, (is+j*t0+k*t1)*elemsz, (ie-is+1)*elemsz,
553 -elemsz) ;
554 break ;
555 case MINEW_MAJNS_K_3D : /* <MM> eg: u(j,i,k) */
556 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
557 store_process_refs( base, fld->f90_table_index, (j+is*t0+k*t1)*elemsz, elemsz,
558 ie-is+1 ,
559 -t0*elemsz) ;
560 break ;
561 case K_MIDNS_MAJEW_3D : /* <MM> eg: u(k,i,j) */
562 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
563 store_process_refs( base, fld->f90_table_index, (k+is*t0+j*t1)*elemsz, elemsz,
564 ie-is+1,
565 -t0*elemsz) ;
566 break ;
567 case MINNS_K_MAJEW_3D : /* <MM> eg: u(i,k,j) */
568 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
569 #if 0
570 if (1)fprintf(stderr,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d is %2d ie %2d ie-is+1 %3d\n",
571 P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,is,ie,ie-is+1) ;
572 #endif
573 store_process_refs( base, fld->f90_table_index, (is+k*t0+j*t1)*elemsz, (ie-is+1)*elemsz,
575 -elemsz) ; /* negative stride suppresses some unpacking optimization in process_refs */
576 break ;
577 default:
578 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
579 break ;
584 #if 0
585 fprintf(stderr,"resetting is to %d\n",i) ;
586 #endif
587 is = i ;
589 ie = i ;
590 #if 0
591 fprintf(stderr,"resetting je to %d\n",je) ;
592 #endif
593 prevP = sendP ;
594 #if 0
595 fprintf(stderr,"resset prevP to %d\n",prevP) ;
596 #endif
600 process_refs( &(procrec->unpack_table),
601 &(procrec->unpack_table_size),
602 &(procrec->unpack_table_nbytes), 1 ) ;
603 #if 0
604 fprintf(stderr,"unpack P = %3d:\n",procrec->P ) ;
605 show_pack_table( procrec->unpack_table,
606 procrec->unpack_table_size,
607 procrec->unpack_table_nbytes ) ;
608 #endif
612 /************* NZ to MN *************/
613 /* (may the circle be unbroken) */
615 msg_from = xp->msgs_nz[d] ;
616 msg_to = xp->msgs_mn[d] ;
618 /* first pass builds the procrec list, second pass traverses it */
619 /* this is necessary because the process_refs mechanism can only */
620 /* build one set of pack or unpack lists at a time */
621 /* ipack = 0 packing on sender ; ipack = 1, unpacking on receiver */
623 ipack = 0 ;
624 xp->procs[d][XPOSE_NZ_MN] = NULL ;
625 ipack = 0 ;
626 for ( P = 0 ; P < rsl_nproc_all ; P++ )
628 procrec = RSL_MALLOC( rsl_procrec_t, 1 ) ;
629 procrec->P = P ;
631 init_process_refs() ;
633 /***/ for ( jg = 0 ; jg < nlen ; jg++ )
635 /***/ for ( ig = 0 ; ig < mlen ; ig++ )
637 recvP = domain_info[*d_p].domain[INDEX_2(jg,ig,mlen)].P ;
638 if ( ipack == 0 || rsl_c_comp2phys_proc(recvP) != rsl_myproc )
640 ks = 0 - domain_info[d].klocaloffset_nz ; ke = -1 ;
641 prevP = domain_info[*d_p].domain_nz[INDEX_2(0,jg,nlen)].P ;
642 #if 0
643 fprintf(stderr,"set ks to %d\n",ks) ;
644 fprintf(stderr,"set prevP to %d\n",prevP) ;
645 #endif
646 /***/ for ( kg = 0 ; kg < zlen ; kg++ )
648 sendP = domain_info[*d_p].domain_nz[INDEX_2(kg,jg,nlen)].P ;
649 if ( kg == zlen-1 )
650 { sendP = -1 ; ke++ ;}
651 i = ig - domain_info[d].ilocaloffset_nz ;
652 j = jg - domain_info[d].jlocaloffset_nz ;
653 k = kg - domain_info[d].klocaloffset_nz ;
654 #if 0
655 if ( k==0) fprintf(stderr,"P %d sendP %d prevP %d ks %d ke %d\n",P,sendP,prevP, ks,ke) ;
656 #endif
657 if (sendP != prevP )
659 if ( rsl_c_comp2phys_proc(prevP) == rsl_myproc && recvP == P )
661 if ( kg > 0 )
663 /* store the pencil (ig,ks:ke,jg) as being sent
664 from sendP and received by recvP */
666 fld = msg_from->fldspecs ;
668 for ( ; fld != NULL ; fld = fld->next )
670 if ( fld->type >= 100 ) xp->has_f90_fields = 1 ;
671 base = fld->base ;
672 elemsz = fld->elemsz ;
673 switch (fld->strategy)
675 case MINNS_MAJEW_K_3D : /* <MM> eg: ua(i,j,k) */
676 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
677 #if 0
678 if (1)fprintf(stderr,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d ks %2d ke %2d ke-ks+1 %3d\n",P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,ks,ke,ke-ks+1) ;
679 #endif
680 store_process_refs( base, fld->f90_table_index, (i+j*t0+ks*t1)*elemsz, elemsz,
681 ke-ks+1 ,
682 -t1*elemsz) ;
683 break ;
684 case MINEW_MAJNS_K_3D : /* <MM> eg: u(j,i,k) */
685 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
686 store_process_refs( base, fld->f90_table_index, (j+i*t0+ks*t1)*elemsz, elemsz,
687 ke-ks+1 ,
688 -t1*elemsz) ;
689 break ;
690 case K_MIDNS_MAJEW_3D : /* <MM> eg: u(k,i,j) */
691 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
692 store_process_refs( base, fld->f90_table_index, (ks+i*t0+j*t1)*elemsz, (ke-ks+1)*elemsz,
694 -elemsz) ;
695 break ;
696 case MINNS_K_MAJEW_3D : /* <MM> eg: u(i,k,j) */
697 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
698 #if 0
699 if (1)fprintf(stderr,"MZ to MN s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d ks %2d ke %2d ke-ks+1 %3d\n",P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,ks,ke,ke-ks+1) ;
700 #endif
701 store_process_refs( base, fld->f90_table_index, (i+ks*t0+j*t1)*elemsz, elemsz,
702 ke-ks+1,
703 -t0*elemsz) ; /* negative stride suppresses some packing optimizationin process_refs */
704 break ;
705 default:
706 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
707 break ;
712 #if 0
713 fprintf(stderr,"resetting ks to %d\n",k) ;
714 #endif
715 ks = k ;
717 ke = k ;
718 #if 0
719 fprintf(stderr,"resetting ke to %d\n",ke) ;
720 #endif
721 prevP = sendP ;
722 #if 0
723 fprintf(stderr,"resset prevP to %d\n",prevP) ;
724 #endif
729 process_refs( &(procrec->pack_table),
730 &(procrec->pack_table_size),
731 &(procrec->pack_table_nbytes), 1 ) ;
733 #if 0
734 fprintf(stderr,"pack P = %3d:\n",procrec->P ) ;
735 show_pack_table( procrec->pack_table,
736 procrec->pack_table_size,
737 procrec->pack_table_nbytes ) ;
738 #endif
740 procrec->next = xp->procs[d][XPOSE_NZ_MN] ;
741 xp->procs[d][XPOSE_NZ_MN] = procrec ;
744 /* unpacking loop */
746 ipack = 1 ;
747 for ( procrec = xp->procs[d][XPOSE_NZ_MN] ; procrec != NULL ; procrec = procrec->next )
749 P = procrec->P ;
750 init_process_refs() ;
752 /***/ for ( jg = 0 ; jg < nlen ; jg++ )
754 /***/ for ( ig = 0 ; ig < mlen ; ig++ )
756 recvP = domain_info[*d_p].domain[INDEX_2(jg,ig,mlen)].P ;
757 ks = 0 ; ke = -1 ;
758 prevP = domain_info[*d_p].domain_nz[INDEX_2(0,jg,nlen)].P ;
759 /***/ for ( kg = 0 ; kg < zlen ; kg++ )
761 sendP = domain_info[*d_p].domain_nz[INDEX_2(kg,jg,nlen)].P ;
762 if ( kg == zlen-1 )
763 { sendP = -1 ; ke++ ;}
765 i = ig - domain_info[d].ilocaloffset ;
766 j = jg - domain_info[d].jlocaloffset ;
767 k = kg ;
769 if (sendP != prevP )
771 #if 0
772 fprintf(stderr,"sendP (%2d) != prevP (%d) =? P (%d) || recvP (%d) =? rsl_myproc (%d)\n",
773 sendP,prevP,P,recvP,rsl_myproc) ;
774 #endif
775 if ( rsl_c_comp2phys_proc(prevP) == P && recvP == rsl_myproc )
777 if ( kg > 0 )
779 /* store the pencil (i,ks:ke,jg) as being sent
780 from sendP and received by recvP */
782 fld = msg_to->fldspecs ;
784 for ( ; fld != NULL ; fld = fld->next )
786 if ( fld->type >= 100 ) xp->has_f90_fields = 1 ;
787 base = fld->base ;
788 elemsz = fld->elemsz ;
789 switch (fld->strategy)
791 case MINNS_MAJEW_K_3D : /* <MM> eg: ua(i,j,k) */
792 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
793 #if 0
794 if (1)fprintf(stderr,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d ks %2d ke %2d ke-ks+1 %3d\n",
795 P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,ks,ke,ke-ks+1) ;
796 #endif
797 store_process_refs( base, fld->f90_table_index, (i+j*t0+ks*t1)*elemsz, elemsz,
798 ke-ks+1 ,
799 -t1*elemsz) ;
800 break ;
801 case MINEW_MAJNS_K_3D : /* <MM> eg: u(j,i,k) */
802 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
803 store_process_refs( base, fld->f90_table_index, (j+i*t0+ks*t1)*elemsz, elemsz,
804 ke-ks+1 ,
805 -t1*elemsz) ;
806 break ;
807 case K_MIDNS_MAJEW_3D : /* <MM> eg: u(k,i,j) */
808 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
809 store_process_refs( base, fld->f90_table_index, (ks+i*t0+j*t1)*elemsz, (ke-ks+1)*elemsz,
811 -elemsz) ;
812 break ;
813 case MINNS_K_MAJEW_3D : /* <MM> eg: u(i,k,j) */
814 t0 = fld->llen[0] ; t1 = fld->llen[1]*t0 ;
815 #if 0
816 if (1)fprintf(stderr,"MZ to MN s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d ks %2d ke %2d ke-ks+1 %3d\n",
817 P,prevP,recvP,ipack,i,j,k,ig,jg,t0,t1,ks,ke,ke-ks+1) ;
818 #endif
819 store_process_refs( base, fld->f90_table_index, (i+ks*t0+j*t1)*elemsz, elemsz,
820 ke-ks+1,
821 -t0*elemsz) ; /* don't need to suppress optimizations unpacking onto MN grid because of pads */
822 break ;
823 default:
824 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
825 break ;
830 #if 0
831 fprintf(stderr,"resetting ks to %d\n",k) ;
832 #endif
833 ks = k ;
835 ke = k ;
836 #if 0
837 fprintf(stderr,"resetting ke to %d\n",ke) ;
838 #endif
839 prevP = sendP ;
840 #if 0
841 fprintf(stderr,"resset prevP to %d\n",prevP) ;
842 #endif
846 process_refs( &(procrec->unpack_table),
847 &(procrec->unpack_table_size),
848 &(procrec->unpack_table_nbytes), 1 ) ;
849 #if 0
850 fprintf(stderr,"unpack P = %3d:\n",procrec->P ) ;
851 show_pack_table( procrec->unpack_table,
852 procrec->unpack_table_size,
853 procrec->unpack_table_nbytes ) ;
854 #endif