2 ! { dg-options "-cpp -fcoarray=lib" }
3 ! PR 87397 - this used to generate an ICE.
5 ! Coarray Distributed Transpose Test
7 ! Copyright (c) 2012-2014, Sourcery, Inc.
10 ! Redistribution and use in source and binary forms, with or without
11 ! modification, are permitted provided that the following conditions are met:
12 ! * Redistributions of source code must retain the above copyright
13 ! notice, this list of conditions and the following disclaimer.
14 ! * Redistributions in binary form must reproduce the above copyright
15 ! notice, this list of conditions and the following disclaimer in the
16 ! documentation and/or other materials provided with the distribution.
17 ! * Neither the name of the Sourcery, Inc., nor the
18 ! names of its contributors may be used to endorse or promote products
19 ! derived from this software without specific prior written permission.
21 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 ! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 ! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 ! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
25 ! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 ! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 ! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 ! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 !****m* dist_transpose/run_size
36 ! Encapsulate problem state, wall-clock timer interface, integer broadcasts, and a data copy.
38 !================== test transposes with integer x,y,z values ===============================
43 integer(int64
), codimension
[*] :: nx
, ny
, nz
44 integer(int64
), codimension
[*] :: my
, mx
, first_y
, last_y
, first_x
, last_x
45 integer(int64
) :: my_node
, num_nodes
46 real(real64
), codimension
[*] :: tran_time
51 !****s* run_size/broadcast_int
55 ! Broadcast a scalar coarray integer from image 1 to all other images.
57 subroutine broadcast_int( variable
)
58 integer(int64
), codimension
[*] :: variable
60 if( my_node
== 1 ) then
61 do i
= 2, num_nodes
; variable
[i
] = variable
; end do
63 end subroutine broadcast_int
65 subroutine copy3( A
,B
, n1
, sA1
, sB1
, n2
, sA2
, sB2
, n3
, sA3
, sB3
)
67 complex, intent(in
) :: A(0:*)
68 complex, intent(out
) :: B(0:*)
69 integer(int64
), intent(in
) :: n1
, sA1
, sB1
70 integer(int64
), intent(in
) :: n2
, sA2
, sB2
71 integer(int64
), intent(in
) :: n3
, sA3
, sB3
77 B(i
*sB1
+j
*sB2
+k
*sB3
) = A(i
*sA1
+j
*sA2
+k
*sA3
)
85 !****e* dist_transpose/coarray_distributed_transpose
87 ! coarray_distributed_transpose
89 ! This program tests the transpose routines used in Fourier-spectral simulations of homogeneous turbulence.
90 ! The data is presented to the physics routines as groups of y-z or x-z planes distributed among the images.
91 ! The (out-of-place) transpose routines do the x <--> y transposes required and consist of transposes within
92 ! data blocks (intra-image) and a transpose of the distribution of these blocks among the images (inter-image).
94 ! Two methods are tested here:
95 ! RECEIVE: receive block from other image and transpose it
96 ! SEND: transpose block and send it to other image
98 ! This code is the coarray analog of mpi_distributed_transpose.
101 program coarray_distributed_transpose
102 !(***********************************************************************************************************
103 ! m a i n p r o g r a m
104 !***********************************************************************************************************)
108 complex, allocatable
:: u(:,:,:,:)[:] ! u(nz,4,first_x:last_x,ny)[*] !(*-- ny = my * num_nodes --*)
109 complex, allocatable
:: ur(:,:,:,:)[:] !ur(nz,4,first_y:last_y,nx/2)[*] !(*-- nx/2 = mx * num_nodes --*)
110 complex, allocatable
:: bufr_X_Y(:,:,:,:)
111 complex, allocatable
:: bufr_Y_X(:,:,:,:)
112 integer(int64
) :: x
, y
, z
, msg_size
, iter
114 num_nodes
= num_images()
115 my_node
= this_image()
117 if( my_node
== 1 ) then
118 !write(6,*) "nx,ny,nz : "; read(5,*) nx, ny, nz
120 call broadcast_int( nx
); call broadcast_int( ny
); call broadcast_int( nz
);
122 sync all
!-- other nodes wait for broadcast!
125 if ( mod(ny
,num_nodes
) == 0) then; my
= ny
/ num_nodes
126 else; write(6,*) "node ", my_node
, " ny not multiple of num_nodes"; error
stop
129 if ( mod(nx
/2,num_nodes
) == 0) then; mx
= nx
/2 / num_nodes
130 else; write(6,*) "node ", my_node
, "nx/2 not multiple of num_nodes"; error
stop
133 first_y
= (my_node
-1)*my
+ 1; last_y
= (my_node
-1)*my
+ my
134 first_x
= (my_node
-1)*mx
+ 1; last_x
= (my_node
-1)*mx
+ mx
136 allocate ( u(nz
, 4 , first_x
:last_x
, ny
) [*] ) !(*-- y-z planes --*)
137 allocate ( ur(nz
, 4 , first_y
:last_y
, nx
/2)[*] ) !(*-- x-z planes --*)
138 allocate ( bufr_X_Y(nz
,4,mx
,my
) )
139 allocate ( bufr_Y_X(nz
,4,my
,mx
) )
141 msg_size
= nz
*4*mx
*my
!-- message size (complex data items)
143 !--------- initialize data u (mx y-z planes per image) ----------
145 do x
= first_x
, last_x
156 do iter
= 1, 2 !--- 2 transform pairs per second-order time step
158 !--------- transpose data u -> ur (mx y-z planes to my x-z planes per image) --------
164 !--------- test data ur (my x-z planes per image) ----------
167 do y
= first_y
, last_y
169 if ( real(ur(z
,1,y
,x
)) /= x
.or
. real(ur(z
,2,y
,x
)) /= y
.or
. real(ur(z
,3,y
,x
)) /= z
)then
170 write(6,fmt
="(A,i3,3(6X,A,f7.3,i4))") "transpose_X_Y failed: image ", my_node
&
171 , " X ",real(ur(z
,1,y
,x
)),x
, " Y ",real(ur(z
,2,y
,x
)),y
, " Z ", real(ur(z
,3,y
,x
)),z
178 !--------- transpose data ur -> u (my x-z planes to mx y-z planes per image) --------
183 !--------- test data u (mx y-z planes per image) ----------
185 do x
= first_x
, last_x
188 if ( real(u(z
,1,x
,y
)) /= x
.or
. real(u(z
,2,x
,y
)) /= y
.or
. real(u(z
,3,x
,y
)) /= z
)then
189 write(6,fmt
="(A,i3,3(6X,A,f7.3,i4))") "transpose_Y_X failed: image ", my_node
&
190 , " X ",real(u(z
,1,x
,y
)),x
, " Y ",real(u(z
,2,x
,y
)),y
, " Z ", real(u(z
,3,x
,y
)),z
199 if( my_node
== 1 ) write(6,fmt
="(A,f8.3)") "test passed: tran_time ", tran_time
201 deallocate ( bufr_X_Y
); deallocate ( bufr_Y_X
)
203 !========================= end of main executable =============================
207 !------------- out-of-place transpose data_s --> data_r ----------------------------
209 subroutine transpose_X_Y
214 integer(int64
) :: i
,stage
217 sync all
!-- wait for other nodes to finish compute
219 tran_time
= tran_time
- tmp
221 call copy3 ( u(1,1,first_x
,1+(my_node
-1)*my
) & !-- intra-node transpose
222 , ur(1,1,first_y
,1+(my_node
-1)*mx
) & !-- no inter-node transpose needed
223 , nz
*3, 1_8, 1_8 & !-- note: only 3 of 4 words needed
224 , mx
, nz
*4, nz
*4*my
&
225 , my
, nz
*4*mx
, nz
*4 )
230 do stage
= 1, num_nodes
-1
231 i
= 1 + mod( my_node
-1+stage
, num_nodes
)
232 bufr_X_Y(:,:,:,:) = u(:,:,:,1+(my_node
-1)*my
:my_node
*my
)[i
] !-- inter-node transpose to buffer
233 call copy3 ( bufr_X_Y
, ur(1,1,first_y
,1+(i
-1)*mx
) & !-- intra-node transpose from buffer
234 , nz
*3, 1_8, 1_8 & !-- note: only 3 of 4 words needed
235 , mx
, nz
*4, nz
*4*my
&
236 , my
, nz
*4*mx
, nz
*4 )
241 do stage
= 1, num_nodes
-1
242 i
= 1 + mod( my_node
-1+stage
, num_nodes
)
243 call copy3 ( u(1,1,first_x
,1+(i
-1)*my
), bufr_Y_X
& !-- intra-node transpose to buffer
245 , mx
, nz
*4, nz
*4*my
&
246 , my
, nz
*4*mx
, nz
*4 )
247 ur(:,:,:,1+(my_node
-1)*mx
:my_node
*mx
)[i
] = bufr_Y_X(:,:,:,:) !-- inter-node transpose from buffer
252 sync all
!-- wait for other nodes to finish transpose
254 tran_time
= tran_time
+ tmp
256 end subroutine transpose_X_Y
258 !------------- out-of-place transpose data_r --> data_s ----------------------------
260 subroutine transpose_Y_X
264 integer(int64
) :: i
, stage
267 sync all
!-- wait for other nodes to finish compute
269 tran_time
= tran_time
- tmp
271 call copy3 ( ur(1,1,first_y
,1+(my_node
-1)*mx
) & !-- intra-node transpose
272 , u(1,1,first_x
,1+(my_node
-1)*my
) & !-- no inter-node transpose needed
273 , nz
*4, 1_8, 1_8 & !-- note: all 4 words needed
274 , my
, nz
*4, nz
*4*mx
&
275 , mx
, nz
*4*my
, nz
*4 )
280 do stage
= 1, num_nodes
-1
281 i
= 1 + mod( my_node
-1+stage
, num_nodes
)
282 bufr_Y_X(:,:,:,:) = ur(:,:,:,1+(my_node
-1)*mx
:my_node
*mx
)[i
] !-- inter-node transpose to buffer
283 call copy3 ( bufr_Y_X
, u(1,1,first_x
,1+(i
-1)*my
) & !-- intra-node transpose from buffer
285 , my
, nz
*4, nz
*4*mx
&
286 , mx
, nz
*4*my
, nz
*4 )
291 do stage
= 1, num_nodes
-1
292 i
= 1 + mod( my_node
-1+stage
, num_nodes
)
293 call copy3 ( ur(1,1,first_y
,1+(i
-1)*mx
), bufr_X_Y
& !-- intra-node transpose from buffer
295 , my
, nz
*4, nz
*4*mx
&
296 , mx
, nz
*4*my
, nz
*4 )
297 u(:,:,:,1+(my_node
-1)*my
:my_node
*my
)[i
] = bufr_X_Y(:,:,:,:) !-- inter-node transpose from buffer
302 sync all
!-- wait for other nodes to finish transpose
304 tran_time
= tran_time
+ tmp
306 end subroutine transpose_Y_X
309 end program coarray_distributed_transpose