1 /***********************************************************************
5 The following is a notice of limited availability of the code and
6 Government license and disclaimer which must be included in the
7 prologue of the code and in all source listings of the code.
10 (c) 1977 University of Chicago
12 Permission is hereby granted to use, reproduce, prepare
13 derivative works, and to redistribute to others at no charge. If
14 you distribute a copy or copies of the Software, or you modify a
15 copy or copies of the Software or any portion of it, thus forming
16 a work based on the Software and make and/or distribute copies of
17 such work, you must meet the following conditions:
19 a) If you make a copy of the Software (modified or verbatim)
20 it must include the copyright notice and Government
21 license and disclaimer.
23 b) You must cause the modified Software to carry prominent
24 notices stating that you changed specified portions of
27 This software was authored by:
29 Argonne National Laboratory
30 J. Michalakes: (630) 252-6646; email: michalak@mcs.anl.gov
31 Mathematics and Computer Science Division
32 Argonne National Laboratory, Argonne, IL 60439
34 ARGONNE NATIONAL LABORATORY (ANL), WITH FACILITIES IN THE STATES
35 OF ILLINOIS AND IDAHO, IS OWNED BY THE UNITED STATES GOVERNMENT,
36 AND OPERATED BY THE UNIVERSITY OF CHICAGO UNDER PROVISION OF A
37 CONTRACT WITH THE DEPARTMENT OF ENERGY.
39 GOVERNMENT LICENSE AND DISCLAIMER
41 This computer code material was prepared, in part, as an account
42 of work sponsored by an agency of the United States Government.
43 The Government is granted for itself and others acting on its
44 behalf a paid-up, nonexclusive, irrevocable worldwide license in
45 this data to reproduce, prepare derivative works, distribute
46 copies to the public, perform publicly and display publicly, and
47 to permit others to do so. NEITHER THE UNITED STATES GOVERNMENT
48 NOR ANY AGENCY THEREOF, NOR THE UNIVERSITY OF CHICAGO, NOR ANY OF
49 THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
50 ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
51 COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS,
52 PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD
53 NOT INFRINGE PRIVATELY OWNED RIGHTS.
55 ***************************************************************************/
61 /* The mechanism here is similar to and patterned after that used by the
62 stencil mechanism: see comp_sten.c */
65 /* this is used internally only -- this will be called automatically
66 whenever a xpose is attempted that has not
68 rsl_compile_xpose( d_p
, x_p
)
73 int i
, j
, ig
, jg
, kg
, k
, js
, je
, is
, ie
, ks
, ke
;
75 rsl_domain_info_t
*dp
;
77 rsl_dimlen_t mlen
, nlen
, zlen
;
79 message_desc_t
*msg_from
, *msg_to
;
80 rsl_procrec_t
*procrec
;
82 rsl_processor_t P
, Plist
[RSL_MAXPROC
], sendP
, recvP
, prevP
;
90 RSL_TEST_ERR(d
< 0 || d
>= RSL_MAXDOMAINS
,
91 "rsl_compile_xpose: bad domain descriptor" ) ;
92 RSL_TEST_ERR( domain_info
[d
].valid
!= RSL_VALID
,
93 "rsl_compile_xpose: descriptor for invalid domain" ) ;
95 mlen
= domain_info
[d
].len_m
;
96 nlen
= domain_info
[d
].len_n
;
97 zlen
= domain_info
[d
].len_z
;
99 RSL_TEST_ERR( x
<= 0 || x
> RSL_MAXDESCRIPTORS
,
100 "rsl_compile_xpose: bad xpose descriptor" ) ;
101 RSL_TEST_ERR((xp
= (xpose_desc_t
*)xp_descriptors
[x
]) == NULL
,
102 "rsl_compile_xpose: null xpose descriptor" ) ;
103 RSL_TEST_ERR( xp
->tag
!= XPOSE_DESC
,
104 "rsl_compile_xpose: bad xpose descriptor" ) ;
105 RSL_TEST_ERR( xp
->compiled
[d
] != 0,
106 "rsl_compile_xpose: xpose has already been compiled for this domain") ;
108 xp
->compiled
[d
] = 1 ;
110 dp
= &(domain_info
[d
]) ;
111 if ( dp
->decomposed
!= 1 )
113 default_decomposition( d_p
,
114 &(domain_info
[*d_p
].loc_m
),
115 &(domain_info
[*d_p
].loc_n
) ) ;
118 /************* MN to MZ *************/
120 msg_from
= xp
->msgs_mn
[d
] ;
121 msg_to
= xp
->msgs_mz
[d
] ;
124 /* first pass builds the procrec list, second pass traverses it */
125 /* this is necessary because the process_refs mechanism can only */
126 /* build one set of pack or unpack lists at a time */
127 /* ipack = 0 packing on sender ; ipack = 1, unpacking on receiver */
129 xp
->procs
[d
][XPOSE_MN_MZ
] = NULL
;
131 for ( P
= 0 ; P
< rsl_nproc_all
; P
++ )
133 procrec
= RSL_MALLOC( rsl_procrec_t
, 1 ) ;
136 init_process_refs() ;
138 /***/ for ( k
= 0 ; k
< zlen
; k
++ )
140 /***/ for ( ig
= 0 ; ig
< mlen
; ig
++ )
142 recvP
= domain_info
[*d_p
].domain_mz
[INDEX_2(k
,ig
,mlen
)].P
;
143 if ( ipack
== 0 || rsl_c_comp2phys_proc(recvP
) != rsl_myproc
)
145 js
= 0 - domain_info
[d
].jlocaloffset
; je
= -1 ;
146 prevP
= domain_info
[*d_p
].domain
[INDEX_2(0,ig
,mlen
)].P
;
148 fprintf(stderr
,"set js to %d\n",js
) ;
149 fprintf(stderr
,"set prevP to %d\n",prevP
) ;
151 /***/ for ( jg
= 0 ; jg
< nlen
; jg
++ )
153 sendP
= domain_info
[*d_p
].domain
[INDEX_2(jg
,ig
,mlen
)].P
;
155 { sendP
= -1 ; je
++ ;}
156 i
= ig
- domain_info
[d
].ilocaloffset
;
157 j
= jg
- domain_info
[d
].jlocaloffset
;
159 if ( k
==0) fprintf(stderr
,"P %d sendP %d prevP %d js %d je %d\n",P
,sendP
,prevP
, js
,je
) ;
161 if ((ipack
== 0) && (sendP
!= prevP
))
163 if ( rsl_c_comp2phys_proc(prevP
) == rsl_myproc
&& recvP
== P
)
167 /* store the pencil (ig,k,js:je) as being sent
168 from sendP and received by recvP */
170 if ( ipack
== 0 ) { fld
= msg_from
->fldspecs
; }
171 else { fld
= msg_to
->fldspecs
; }
173 for ( ; fld
!= NULL
; fld
= fld
->next
)
175 if ( fld
->type
>= 100 ) xp
->has_f90_fields
= 1 ;
177 elemsz
= fld
->elemsz
;
178 switch (fld
->strategy
)
180 case MINNS_MAJEW_K_3D
: /* <MM> eg: ua(i,j,k) */
181 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
183 if (1)fprintf(stderr
,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d js %2d je %2d je-js+1 %3d\n",P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,js
,je
,je
-js
+1) ;
185 store_process_refs( base
, fld
->f90_table_index
, (i
+js
*t0
+k
*t1
)*elemsz
, elemsz
,
189 case MINEW_MAJNS_K_3D
: /* <MM> eg: u(j,i,k) */
190 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
191 store_process_refs( base
, fld
->f90_table_index
, (js
+i
*t0
+k
*t1
)*elemsz
, (je
-js
+1)*elemsz
,
195 case K_MIDNS_MAJEW_3D
: /* <MM> eg: u(k,i,j) */
196 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
197 store_process_refs( base
, fld
->f90_table_index
, (k
+i
*t0
+js
*t1
)*elemsz
, elemsz
,
201 case MINNS_K_MAJEW_3D
: /* <MM> eg: u(i,k,j) */
202 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
204 if (1)fprintf(stderr
,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d js %2d je %2d je-js+1 %3d\n",P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,js
,je
,je
-js
+1) ;
206 store_process_refs( base
, fld
->f90_table_index
, (i
+k
*t0
+js
*t1
)*elemsz
, elemsz
,
208 -t1
*elemsz
) ; /* don't need to suppress packing optimization on MN grid because of pads */
211 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
218 fprintf(stderr
,"resetting js to %d\n",j
) ;
224 fprintf(stderr
,"resetting je to %d\n",je
) ;
228 fprintf(stderr
,"resset prevP to %d\n",prevP
) ;
234 process_refs( &(procrec
->pack_table
),
235 &(procrec
->pack_table_size
),
236 &(procrec
->pack_table_nbytes
), 1 ) ;
239 fprintf(stderr
,"pack P = %3d:\n",procrec
->P
) ;
240 show_pack_table( procrec
->pack_table
,
241 procrec
->pack_table_size
,
242 procrec
->pack_table_nbytes
) ;
245 procrec
->next
= xp
->procs
[d
][XPOSE_MN_MZ
] ;
246 xp
->procs
[d
][XPOSE_MN_MZ
] = procrec
;
252 for ( procrec
= xp
->procs
[d
][XPOSE_MN_MZ
] ; procrec
!= NULL
; procrec
= procrec
->next
)
255 init_process_refs() ;
257 /***/ for ( kg
= 0 ; kg
< zlen
; kg
++ )
259 /***/ for ( ig
= 0 ; ig
< mlen
; ig
++ )
261 recvP
= domain_info
[*d_p
].domain_mz
[INDEX_2(kg
,ig
,mlen
)].P
;
263 js
= 0 - domain_info
[d
].jlocaloffset_mz
; je
= -1 ;
265 prevP
= domain_info
[*d_p
].domain
[INDEX_2(0,ig
,mlen
)].P
;
266 /***/ for ( jg
= 0 ; jg
< nlen
; jg
++ )
268 sendP
= domain_info
[*d_p
].domain
[INDEX_2(jg
,ig
,mlen
)].P
;
270 { sendP
= -1 ; je
++ ;}
272 i
= ig
- domain_info
[d
].ilocaloffset_mz
;
273 j
= jg
- domain_info
[d
].jlocaloffset_mz
;
274 k
= kg
- domain_info
[d
].klocaloffset_mz
;
279 fprintf(stderr
,"sendP (%2d) != prevP (%d) =? P (%d) || recvP (%d) =? rsl_myproc (%d)\n",
280 sendP
,prevP
,P
,recvP
,rsl_myproc
) ;
282 if ( rsl_c_comp2phys_proc(prevP
) == P
&& recvP
== rsl_myproc
)
286 /* store the pencil (ig,k,js:je) as being sent
287 from sendP and received by recvP */
289 if ( ipack
== 0 ) { fld
= msg_from
->fldspecs
; }
290 else { fld
= msg_to
->fldspecs
; }
292 for ( ; fld
!= NULL
; fld
= fld
->next
)
294 if ( fld
->type
>= 100 ) xp
->has_f90_fields
= 1 ;
296 elemsz
= fld
->elemsz
;
297 switch (fld
->strategy
)
299 case MINNS_MAJEW_K_3D
: /* <MM> eg: ua(i,j,k) */
300 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
302 if (1)fprintf(stderr
,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d js %2d je %2d je-js+1 %3d\n",
303 P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,js
,je
,je
-js
+1) ;
305 store_process_refs( base
, fld
->f90_table_index
, (i
+js
*t0
+k
*t1
)*elemsz
, elemsz
,
309 case MINEW_MAJNS_K_3D
: /* <MM> eg: u(j,i,k) */
310 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
311 store_process_refs( base
, fld
->f90_table_index
, (js
+i
*t0
+k
*t1
)*elemsz
, (je
-js
+1)*elemsz
,
315 case K_MIDNS_MAJEW_3D
: /* <MM> eg: u(k,i,j) */
316 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
317 store_process_refs( base
, fld
->f90_table_index
, (k
+i
*t0
+js
*t1
)*elemsz
, elemsz
,
321 case MINNS_K_MAJEW_3D
: /* <MM> eg: u(i,k,j) */
322 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
324 if (1)fprintf(stderr
,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d js %2d je %2d je-js+1 %3d ofst %3d\n",
325 P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,js
,je
,je
-js
+1, (i
+k
*t0
+js
*t1
)*elemsz
) ;
327 store_process_refs( base
, fld
->f90_table_index
, (i
+k
*t0
+js
*t1
)*elemsz
, elemsz
,
329 -t1
*elemsz
) ; /* negative stride suppresses some unpacking collapses */
332 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
339 fprintf(stderr
,"resetting js to %d\n",j
) ;
345 fprintf(stderr
,"resetting je to %d\n",je
) ;
349 fprintf(stderr
,"resset prevP to %d\n",prevP
) ;
354 process_refs( &(procrec
->unpack_table
),
355 &(procrec
->unpack_table_size
),
356 &(procrec
->unpack_table_nbytes
), 1 ) ;
358 fprintf(stderr
,"unpack P = %3d:\n",procrec
->P
) ;
359 show_pack_table( procrec
->unpack_table
,
360 procrec
->unpack_table_size
,
361 procrec
->unpack_table_nbytes
) ;
366 /************* MZ to NZ *************/
368 msg_from
= xp
->msgs_mz
[d
] ;
369 msg_to
= xp
->msgs_nz
[d
] ;
372 /* first pass builds the procrec list, second pass traverses it */
373 /* this is necessary because the process_refs mechanism can only */
374 /* build one set of pack or unpack lists at a time */
375 /* ipack = 0 packing on sender ; ipack = 1, unpacking on receiver */
378 xp
->procs
[d
][XPOSE_MZ_NZ
] = NULL
;
380 for ( P
= 0 ; P
< rsl_nproc_all
; P
++ )
382 procrec
= RSL_MALLOC( rsl_procrec_t
, 1 ) ;
385 init_process_refs() ;
387 /***/ for ( jg
= 0 ; jg
< nlen
; jg
++ )
389 /***/ for ( kg
= 0 ; kg
< zlen
; kg
++ )
391 recvP
= domain_info
[*d_p
].domain_nz
[INDEX_2(kg
,jg
,nlen
)].P
;
392 if ( ipack
== 0 || rsl_c_comp2phys_proc(recvP
) != rsl_myproc
)
394 is
= 0 - domain_info
[d
].ilocaloffset_mz
; ie
= -1 ;
395 prevP
= domain_info
[*d_p
].domain_mz
[INDEX_2(kg
,0,mlen
)].P
;
397 fprintf(stderr
,"set is to %d\n",is
) ;
398 fprintf(stderr
,"set prevP to %d\n",prevP
) ;
400 /***/ for ( ig
= 0 ; ig
< mlen
; ig
++ )
402 sendP
= domain_info
[*d_p
].domain_mz
[INDEX_2(kg
,ig
,mlen
)].P
;
404 { sendP
= -1 ; ie
++ ;}
405 i
= ig
- domain_info
[d
].ilocaloffset_mz
;
406 j
= jg
- domain_info
[d
].jlocaloffset_mz
;
407 k
= kg
- domain_info
[d
].klocaloffset_mz
;
409 if ( k
==0) fprintf(stderr
,"P %d sendP %d prevP %d js %d je %d\n",P
,sendP
,prevP
, is
,ie
) ;
413 if ( rsl_c_comp2phys_proc(prevP
) == rsl_myproc
&& recvP
== P
)
417 /* store the pencil (is:ie,k,jg) as being sent
418 from sendP and received by recvP */
420 fld
= msg_from
->fldspecs
;
422 for ( ; fld
!= NULL
; fld
= fld
->next
)
424 if ( fld
->type
>= 100 ) xp
->has_f90_fields
= 1 ;
426 elemsz
= fld
->elemsz
;
427 switch (fld
->strategy
)
429 case MINNS_MAJEW_K_3D
: /* <MM> eg: ua(i,j,k) */
430 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
432 if (1)fprintf(stderr
,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d is %2d ie %2d ie-is+1 %3d\n",P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,is
,ie
,ie
-is
+1) ;
434 store_process_refs( base
, fld
->f90_table_index
, (is
+j
*t0
+k
*t1
)*elemsz
, (ie
-is
+1)*elemsz
,
438 case MINEW_MAJNS_K_3D
: /* <MM> eg: u(j,i,k) */
439 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
440 store_process_refs( base
, fld
->f90_table_index
, (j
+is
*t0
+k
*t1
)*elemsz
, elemsz
,
444 case K_MIDNS_MAJEW_3D
: /* <MM> eg: u(k,i,j) */
445 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
446 store_process_refs( base
, fld
->f90_table_index
, (k
+is
*t0
+j
*t1
)*elemsz
, elemsz
,
450 case MINNS_K_MAJEW_3D
: /* <MM> eg: u(i,k,j) */
451 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
453 if (1)fprintf(stderr
,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d is %2d ie %2d ie-is+1 %3d\n",P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,is
,ie
,ie
-is
+1) ;
455 store_process_refs( base
, fld
->f90_table_index
, (is
+k
*t0
+j
*t1
)*elemsz
, (ie
-is
+1)*elemsz
,
457 -elemsz
) ; /* negative stride suppresses some packing optimzation in process_refs */
460 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
467 fprintf(stderr
,"resetting is to %d\n",j
) ;
473 fprintf(stderr
,"resetting ie to %d\n",ie
) ;
477 fprintf(stderr
,"resset prevP to %d\n",prevP
) ;
483 process_refs( &(procrec
->pack_table
),
484 &(procrec
->pack_table_size
),
485 &(procrec
->pack_table_nbytes
), 1 ) ;
488 fprintf(stderr
,"pack P = %3d:\n",procrec
->P
) ;
489 show_pack_table( procrec
->pack_table
,
490 procrec
->pack_table_size
,
491 procrec
->pack_table_nbytes
) ;
494 procrec
->next
= xp
->procs
[d
][XPOSE_MZ_NZ
] ;
495 xp
->procs
[d
][XPOSE_MZ_NZ
] = procrec
;
501 for ( procrec
= xp
->procs
[d
][XPOSE_MZ_NZ
] ; procrec
!= NULL
; procrec
= procrec
->next
)
504 init_process_refs() ;
506 /***/ for ( jg
= 0 ; jg
< nlen
; jg
++ )
508 /***/ for ( kg
= 0 ; kg
< zlen
; kg
++ )
510 recvP
= domain_info
[*d_p
].domain_nz
[INDEX_2(kg
,jg
,nlen
)].P
;
511 is
= 0 - domain_info
[d
].ilocaloffset_nz
; ie
= -1 ;
512 prevP
= domain_info
[*d_p
].domain_mz
[INDEX_2(kg
,0,mlen
)].P
;
513 /***/ for ( ig
= 0 ; ig
< mlen
; ig
++ )
515 sendP
= domain_info
[*d_p
].domain_mz
[INDEX_2(kg
,ig
,mlen
)].P
;
517 { sendP
= -1 ; ie
++ ;}
519 i
= ig
- domain_info
[d
].ilocaloffset_nz
;
520 j
= jg
- domain_info
[d
].jlocaloffset_nz
;
521 k
= kg
- domain_info
[d
].klocaloffset_nz
;
526 fprintf(stderr
,"sendP (%2d) != prevP (%d) =? P (%d) || recvP (%d) =? rsl_myproc (%d)\n",
527 sendP
,prevP
,P
,recvP
,rsl_myproc
) ;
529 if ( rsl_c_comp2phys_proc(prevP
) == P
&& recvP
== rsl_myproc
)
533 /* store the pencil (is:ie,k,jg) as being sent
534 from sendP and received by recvP */
536 fld
= msg_to
->fldspecs
;
538 for ( ; fld
!= NULL
; fld
= fld
->next
)
540 if ( fld
->type
>= 100 ) xp
->has_f90_fields
= 1 ;
542 elemsz
= fld
->elemsz
;
543 switch (fld
->strategy
)
545 case MINNS_MAJEW_K_3D
: /* <MM> eg: ua(i,j,k) */
546 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
548 if (1)fprintf(stderr
,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d is %2d ie %2d ie-is+1 %3d\n",
549 P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,is
,ie
,ie
-is
+1) ;
551 store_process_refs( base
, fld
->f90_table_index
, (is
+j
*t0
+k
*t1
)*elemsz
, (ie
-is
+1)*elemsz
,
555 case MINEW_MAJNS_K_3D
: /* <MM> eg: u(j,i,k) */
556 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
557 store_process_refs( base
, fld
->f90_table_index
, (j
+is
*t0
+k
*t1
)*elemsz
, elemsz
,
561 case K_MIDNS_MAJEW_3D
: /* <MM> eg: u(k,i,j) */
562 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
563 store_process_refs( base
, fld
->f90_table_index
, (k
+is
*t0
+j
*t1
)*elemsz
, elemsz
,
567 case MINNS_K_MAJEW_3D
: /* <MM> eg: u(i,k,j) */
568 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
570 if (1)fprintf(stderr
,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d is %2d ie %2d ie-is+1 %3d\n",
571 P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,is
,ie
,ie
-is
+1) ;
573 store_process_refs( base
, fld
->f90_table_index
, (is
+k
*t0
+j
*t1
)*elemsz
, (ie
-is
+1)*elemsz
,
575 -elemsz
) ; /* negative stride suppresses some unpacking optimization in process_refs */
578 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
585 fprintf(stderr
,"resetting is to %d\n",i
) ;
591 fprintf(stderr
,"resetting je to %d\n",je
) ;
595 fprintf(stderr
,"resset prevP to %d\n",prevP
) ;
600 process_refs( &(procrec
->unpack_table
),
601 &(procrec
->unpack_table_size
),
602 &(procrec
->unpack_table_nbytes
), 1 ) ;
604 fprintf(stderr
,"unpack P = %3d:\n",procrec
->P
) ;
605 show_pack_table( procrec
->unpack_table
,
606 procrec
->unpack_table_size
,
607 procrec
->unpack_table_nbytes
) ;
612 /************* NZ to MN *************/
613 /* (may the circle be unbroken) */
615 msg_from
= xp
->msgs_nz
[d
] ;
616 msg_to
= xp
->msgs_mn
[d
] ;
618 /* first pass builds the procrec list, second pass traverses it */
619 /* this is necessary because the process_refs mechanism can only */
620 /* build one set of pack or unpack lists at a time */
621 /* ipack = 0 packing on sender ; ipack = 1, unpacking on receiver */
624 xp
->procs
[d
][XPOSE_NZ_MN
] = NULL
;
626 for ( P
= 0 ; P
< rsl_nproc_all
; P
++ )
628 procrec
= RSL_MALLOC( rsl_procrec_t
, 1 ) ;
631 init_process_refs() ;
633 /***/ for ( jg
= 0 ; jg
< nlen
; jg
++ )
635 /***/ for ( ig
= 0 ; ig
< mlen
; ig
++ )
637 recvP
= domain_info
[*d_p
].domain
[INDEX_2(jg
,ig
,mlen
)].P
;
638 if ( ipack
== 0 || rsl_c_comp2phys_proc(recvP
) != rsl_myproc
)
640 ks
= 0 - domain_info
[d
].klocaloffset_nz
; ke
= -1 ;
641 prevP
= domain_info
[*d_p
].domain_nz
[INDEX_2(0,jg
,nlen
)].P
;
643 fprintf(stderr
,"set ks to %d\n",ks
) ;
644 fprintf(stderr
,"set prevP to %d\n",prevP
) ;
646 /***/ for ( kg
= 0 ; kg
< zlen
; kg
++ )
648 sendP
= domain_info
[*d_p
].domain_nz
[INDEX_2(kg
,jg
,nlen
)].P
;
650 { sendP
= -1 ; ke
++ ;}
651 i
= ig
- domain_info
[d
].ilocaloffset_nz
;
652 j
= jg
- domain_info
[d
].jlocaloffset_nz
;
653 k
= kg
- domain_info
[d
].klocaloffset_nz
;
655 if ( k
==0) fprintf(stderr
,"P %d sendP %d prevP %d ks %d ke %d\n",P
,sendP
,prevP
, ks
,ke
) ;
659 if ( rsl_c_comp2phys_proc(prevP
) == rsl_myproc
&& recvP
== P
)
663 /* store the pencil (ig,ks:ke,jg) as being sent
664 from sendP and received by recvP */
666 fld
= msg_from
->fldspecs
;
668 for ( ; fld
!= NULL
; fld
= fld
->next
)
670 if ( fld
->type
>= 100 ) xp
->has_f90_fields
= 1 ;
672 elemsz
= fld
->elemsz
;
673 switch (fld
->strategy
)
675 case MINNS_MAJEW_K_3D
: /* <MM> eg: ua(i,j,k) */
676 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
678 if (1)fprintf(stderr
,"s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d ks %2d ke %2d ke-ks+1 %3d\n",P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,ks
,ke
,ke
-ks
+1) ;
680 store_process_refs( base
, fld
->f90_table_index
, (i
+j
*t0
+ks
*t1
)*elemsz
, elemsz
,
684 case MINEW_MAJNS_K_3D
: /* <MM> eg: u(j,i,k) */
685 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
686 store_process_refs( base
, fld
->f90_table_index
, (j
+i
*t0
+ks
*t1
)*elemsz
, elemsz
,
690 case K_MIDNS_MAJEW_3D
: /* <MM> eg: u(k,i,j) */
691 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
692 store_process_refs( base
, fld
->f90_table_index
, (ks
+i
*t0
+j
*t1
)*elemsz
, (ke
-ks
+1)*elemsz
,
696 case MINNS_K_MAJEW_3D
: /* <MM> eg: u(i,k,j) */
697 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
699 if (1)fprintf(stderr
,"MZ to MN s_p_r^: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d ks %2d ke %2d ke-ks+1 %3d\n",P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,ks
,ke
,ke
-ks
+1) ;
701 store_process_refs( base
, fld
->f90_table_index
, (i
+ks
*t0
+j
*t1
)*elemsz
, elemsz
,
703 -t0
*elemsz
) ; /* negative stride suppresses some packing optimizationin process_refs */
706 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
713 fprintf(stderr
,"resetting ks to %d\n",k
) ;
719 fprintf(stderr
,"resetting ke to %d\n",ke
) ;
723 fprintf(stderr
,"resset prevP to %d\n",prevP
) ;
729 process_refs( &(procrec
->pack_table
),
730 &(procrec
->pack_table_size
),
731 &(procrec
->pack_table_nbytes
), 1 ) ;
734 fprintf(stderr
,"pack P = %3d:\n",procrec
->P
) ;
735 show_pack_table( procrec
->pack_table
,
736 procrec
->pack_table_size
,
737 procrec
->pack_table_nbytes
) ;
740 procrec
->next
= xp
->procs
[d
][XPOSE_NZ_MN
] ;
741 xp
->procs
[d
][XPOSE_NZ_MN
] = procrec
;
747 for ( procrec
= xp
->procs
[d
][XPOSE_NZ_MN
] ; procrec
!= NULL
; procrec
= procrec
->next
)
750 init_process_refs() ;
752 /***/ for ( jg
= 0 ; jg
< nlen
; jg
++ )
754 /***/ for ( ig
= 0 ; ig
< mlen
; ig
++ )
756 recvP
= domain_info
[*d_p
].domain
[INDEX_2(jg
,ig
,mlen
)].P
;
758 prevP
= domain_info
[*d_p
].domain_nz
[INDEX_2(0,jg
,nlen
)].P
;
759 /***/ for ( kg
= 0 ; kg
< zlen
; kg
++ )
761 sendP
= domain_info
[*d_p
].domain_nz
[INDEX_2(kg
,jg
,nlen
)].P
;
763 { sendP
= -1 ; ke
++ ;}
765 i
= ig
- domain_info
[d
].ilocaloffset
;
766 j
= jg
- domain_info
[d
].jlocaloffset
;
772 fprintf(stderr
,"sendP (%2d) != prevP (%d) =? P (%d) || recvP (%d) =? rsl_myproc (%d)\n",
773 sendP
,prevP
,P
,recvP
,rsl_myproc
) ;
775 if ( rsl_c_comp2phys_proc(prevP
) == P
&& recvP
== rsl_myproc
)
779 /* store the pencil (i,ks:ke,jg) as being sent
780 from sendP and received by recvP */
782 fld
= msg_to
->fldspecs
;
784 for ( ; fld
!= NULL
; fld
= fld
->next
)
786 if ( fld
->type
>= 100 ) xp
->has_f90_fields
= 1 ;
788 elemsz
= fld
->elemsz
;
789 switch (fld
->strategy
)
791 case MINNS_MAJEW_K_3D
: /* <MM> eg: ua(i,j,k) */
792 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
794 if (1)fprintf(stderr
,"s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d ks %2d ke %2d ke-ks+1 %3d\n",
795 P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,ks
,ke
,ke
-ks
+1) ;
797 store_process_refs( base
, fld
->f90_table_index
, (i
+j
*t0
+ks
*t1
)*elemsz
, elemsz
,
801 case MINEW_MAJNS_K_3D
: /* <MM> eg: u(j,i,k) */
802 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
803 store_process_refs( base
, fld
->f90_table_index
, (j
+i
*t0
+ks
*t1
)*elemsz
, elemsz
,
807 case K_MIDNS_MAJEW_3D
: /* <MM> eg: u(k,i,j) */
808 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
809 store_process_refs( base
, fld
->f90_table_index
, (ks
+i
*t0
+j
*t1
)*elemsz
, (ke
-ks
+1)*elemsz
,
813 case MINNS_K_MAJEW_3D
: /* <MM> eg: u(i,k,j) */
814 t0
= fld
->llen
[0] ; t1
= fld
->llen
[1]*t0
;
816 if (1)fprintf(stderr
,"MZ to MN s_p_rv: P %2d prevP %2d recvP %2d ipack %d i %2d j %2d k %2d ig %2d jg %2d t0 %3d t1 %3d ks %2d ke %2d ke-ks+1 %3d\n",
817 P
,prevP
,recvP
,ipack
,i
,j
,k
,ig
,jg
,t0
,t1
,ks
,ke
,ke
-ks
+1) ;
819 store_process_refs( base
, fld
->f90_table_index
, (i
+ks
*t0
+j
*t1
)*elemsz
, elemsz
,
821 -t0
*elemsz
) ; /* don't need to suppress optimizations unpacking onto MN grid because of pads */
824 RSL_TEST_ERR(1,"new pack comp: strategy not supported" ) ;
831 fprintf(stderr
,"resetting ks to %d\n",k
) ;
837 fprintf(stderr
,"resetting ke to %d\n",ke
) ;
841 fprintf(stderr
,"resset prevP to %d\n",prevP
) ;
846 process_refs( &(procrec
->unpack_table
),
847 &(procrec
->unpack_table_size
),
848 &(procrec
->unpack_table_nbytes
), 1 ) ;
850 fprintf(stderr
,"unpack P = %3d:\n",procrec
->P
) ;
851 show_pack_table( procrec
->unpack_table
,
852 procrec
->unpack_table_size
,
853 procrec
->unpack_table_nbytes
) ;