[Fortran] OpenACC – permit common blocks in some clauses
[official-gcc.git] / gcc / testsuite / gfortran.dg / intent_out_11.f90
blobc266385b49f146e077fabc263b5e60c9c9f709b7
1 ! { dg-do compile }
2 ! { dg-options "-cpp -fcoarray=lib" }
3 ! PR 87397 - this used to generate an ICE.
5 ! Coarray Distributed Transpose Test
7 ! Copyright (c) 2012-2014, Sourcery, Inc.
8 ! All rights reserved.
10 ! Redistribution and use in source and binary forms, with or without
11 ! modification, are permitted provided that the following conditions are met:
12 ! * Redistributions of source code must retain the above copyright
13 ! notice, this list of conditions and the following disclaimer.
14 ! * Redistributions in binary form must reproduce the above copyright
15 ! notice, this list of conditions and the following disclaimer in the
16 ! documentation and/or other materials provided with the distribution.
17 ! * Neither the name of the Sourcery, Inc., nor the
18 ! names of its contributors may be used to endorse or promote products
19 ! derived from this software without specific prior written permission.
21 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 ! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 ! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 ! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
25 ! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 ! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 ! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 ! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 ! Robodoc header:
32 !****m* dist_transpose/run_size
33 ! NAME
34 ! run_size
35 ! SYNOPSIS
36 ! Encapsulate problem state, wall-clock timer interface, integer broadcasts, and a data copy.
37 !******
38 !================== test transposes with integer x,y,z values ===============================
39 module run_size
40 use iso_fortran_env
41 implicit none
43 integer(int64), codimension[*] :: nx, ny, nz
44 integer(int64), codimension[*] :: my, mx, first_y, last_y, first_x, last_x
45 integer(int64) :: my_node, num_nodes
46 real(real64), codimension[*] :: tran_time
49 contains
51 !****s* run_size/broadcast_int
52 ! NAME
53 ! broadcast_int
54 ! SYNOPSIS
55 ! Broadcast a scalar coarray integer from image 1 to all other images.
56 !******
57 subroutine broadcast_int( variable )
58 integer(int64), codimension[*] :: variable
59 integer(int64) :: i
60 if( my_node == 1 ) then
61 do i = 2, num_nodes; variable[i] = variable; end do
62 end if
63 end subroutine broadcast_int
65 subroutine copy3( A,B, n1, sA1, sB1, n2, sA2, sB2, n3, sA3, sB3 )
66 implicit none
67 complex, intent(in) :: A(0:*)
68 complex, intent(out) :: B(0:*)
69 integer(int64), intent(in) :: n1, sA1, sB1
70 integer(int64), intent(in) :: n2, sA2, sB2
71 integer(int64), intent(in) :: n3, sA3, sB3
72 integer(int64) i,j,k
74 do k=0,n3-1
75 do j=0,n2-1
76 do i=0,n1-1
77 B(i*sB1+j*sB2+k*sB3) = A(i*sA1+j*sA2+k*sA3)
78 end do
79 end do
80 end do
81 end subroutine copy3
83 end module run_size
85 !****e* dist_transpose/coarray_distributed_transpose
86 ! NAME
87 ! coarray_distributed_transpose
88 ! SYNOPSIS
89 ! This program tests the transpose routines used in Fourier-spectral simulations of homogeneous turbulence.
90 ! The data is presented to the physics routines as groups of y-z or x-z planes distributed among the images.
91 ! The (out-of-place) transpose routines do the x <--> y transposes required and consist of transposes within
92 ! data blocks (intra-image) and a transpose of the distribution of these blocks among the images (inter-image).
94 ! Two methods are tested here:
95 ! RECEIVE: receive block from other image and transpose it
96 ! SEND: transpose block and send it to other image
98 ! This code is the coarray analog of mpi_distributed_transpose.
99 !******
101 program coarray_distributed_transpose
102 !(***********************************************************************************************************
103 ! m a i n p r o g r a m
104 !***********************************************************************************************************)
105 use run_size
106 implicit none
108 complex, allocatable :: u(:,:,:,:)[:] ! u(nz,4,first_x:last_x,ny)[*] !(*-- ny = my * num_nodes --*)
109 complex, allocatable :: ur(:,:,:,:)[:] !ur(nz,4,first_y:last_y,nx/2)[*] !(*-- nx/2 = mx * num_nodes --*)
110 complex, allocatable :: bufr_X_Y(:,:,:,:)
111 complex, allocatable :: bufr_Y_X(:,:,:,:)
112 integer(int64) :: x, y, z, msg_size, iter
114 num_nodes = num_images()
115 my_node = this_image()
117 if( my_node == 1 ) then
118 !write(6,*) "nx,ny,nz : "; read(5,*) nx, ny, nz
119 nx=32; ny=32; nz=32
120 call broadcast_int( nx ); call broadcast_int( ny ); call broadcast_int( nz );
121 end if
122 sync all !-- other nodes wait for broadcast!
125 if ( mod(ny,num_nodes) == 0) then; my = ny / num_nodes
126 else; write(6,*) "node ", my_node, " ny not multiple of num_nodes"; error stop
127 end if
129 if ( mod(nx/2,num_nodes) == 0) then; mx = nx/2 / num_nodes
130 else; write(6,*) "node ", my_node, "nx/2 not multiple of num_nodes"; error stop
131 end if
133 first_y = (my_node-1)*my + 1; last_y = (my_node-1)*my + my
134 first_x = (my_node-1)*mx + 1; last_x = (my_node-1)*mx + mx
136 allocate ( u(nz , 4 , first_x:last_x , ny) [*] ) !(*-- y-z planes --*)
137 allocate ( ur(nz , 4 , first_y:last_y , nx/2)[*] ) !(*-- x-z planes --*)
138 allocate ( bufr_X_Y(nz,4,mx,my) )
139 allocate ( bufr_Y_X(nz,4,my,mx) )
141 msg_size = nz*4*mx*my !-- message size (complex data items)
143 !--------- initialize data u (mx y-z planes per image) ----------
145 do x = first_x, last_x
146 do y = 1, ny
147 do z = 1, nz
148 u(z,1,x,y) = x
149 u(z,2,x,y) = y
150 u(z,3,x,y) = z
151 end do
152 end do
153 end do
155 tran_time = 0
156 do iter = 1, 2 !--- 2 transform pairs per second-order time step
158 !--------- transpose data u -> ur (mx y-z planes to my x-z planes per image) --------
160 ur = 0
162 call transpose_X_Y
164 !--------- test data ur (my x-z planes per image) ----------
166 do x = 1, nx/2
167 do y = first_y, last_y
168 do z = 1, nz
169 if ( real(ur(z,1,y,x)) /= x .or. real(ur(z,2,y,x)) /= y .or. real(ur(z,3,y,x)) /= z )then
170 write(6,fmt="(A,i3,3(6X,A,f7.3,i4))") "transpose_X_Y failed: image ", my_node &
171 , " X ",real(ur(z,1,y,x)),x, " Y ",real(ur(z,2,y,x)),y, " Z ", real(ur(z,3,y,x)),z
172 stop
173 end if
174 end do
175 end do
176 end do
178 !--------- transpose data ur -> u (my x-z planes to mx y-z planes per image) --------
180 u = 0
181 call transpose_Y_X
183 !--------- test data u (mx y-z planes per image) ----------
185 do x = first_x, last_x
186 do y = 1, ny
187 do z = 1, nz
188 if ( real(u(z,1,x,y)) /= x .or. real(u(z,2,x,y)) /= y .or. real(u(z,3,x,y)) /= z )then
189 write(6,fmt="(A,i3,3(6X,A,f7.3,i4))") "transpose_Y_X failed: image ", my_node &
190 , " X ",real(u(z,1,x,y)),x, " Y ",real(u(z,2,x,y)),y, " Z ", real(u(z,3,x,y)),z
191 stop
192 end if
193 end do
194 end do
195 end do
196 end do
198 sync all
199 if( my_node == 1 ) write(6,fmt="(A,f8.3)") "test passed: tran_time ", tran_time
201 deallocate ( bufr_X_Y ); deallocate ( bufr_Y_X )
203 !========================= end of main executable =============================
205 contains
207 !------------- out-of-place transpose data_s --> data_r ----------------------------
209 subroutine transpose_X_Y
211 use run_size
212 implicit none
214 integer(int64) :: i,stage
215 real(real64) :: tmp
217 sync all !-- wait for other nodes to finish compute
218 call cpu_time(tmp)
219 tran_time = tran_time - tmp
221 call copy3 ( u(1,1,first_x,1+(my_node-1)*my) & !-- intra-node transpose
222 , ur(1,1,first_y,1+(my_node-1)*mx) & !-- no inter-node transpose needed
223 , nz*3, 1_8, 1_8 & !-- note: only 3 of 4 words needed
224 , mx, nz*4, nz*4*my &
225 , my, nz*4*mx, nz*4 )
227 #define RECEIVE
228 #ifdef RECEIVE
230 do stage = 1, num_nodes-1
231 i = 1 + mod( my_node-1+stage, num_nodes )
232 bufr_X_Y(:,:,:,:) = u(:,:,:,1+(my_node-1)*my:my_node*my)[i] !-- inter-node transpose to buffer
233 call copy3 ( bufr_X_Y, ur(1,1,first_y,1+(i-1)*mx) & !-- intra-node transpose from buffer
234 , nz*3, 1_8, 1_8 & !-- note: only 3 of 4 words needed
235 , mx, nz*4, nz*4*my &
236 , my, nz*4*mx, nz*4 )
237 end do
239 #else
241 do stage = 1, num_nodes-1
242 i = 1 + mod( my_node-1+stage, num_nodes )
243 call copy3 ( u(1,1,first_x,1+(i-1)*my), bufr_Y_X & !-- intra-node transpose to buffer
244 , nz*3, 1_8, 1_8 &
245 , mx, nz*4, nz*4*my &
246 , my, nz*4*mx, nz*4 )
247 ur(:,:,:,1+(my_node-1)*mx:my_node*mx)[i] = bufr_Y_X(:,:,:,:) !-- inter-node transpose from buffer
248 end do
250 #endif
252 sync all !-- wait for other nodes to finish transpose
253 call cpu_time(tmp)
254 tran_time = tran_time + tmp
256 end subroutine transpose_X_Y
258 !------------- out-of-place transpose data_r --> data_s ----------------------------
260 subroutine transpose_Y_X
261 use run_size
262 implicit none
264 integer(int64) :: i, stage
265 real(real64) :: tmp
267 sync all !-- wait for other nodes to finish compute
268 call cpu_time(tmp)
269 tran_time = tran_time - tmp
271 call copy3 ( ur(1,1,first_y,1+(my_node-1)*mx) & !-- intra-node transpose
272 , u(1,1,first_x,1+(my_node-1)*my) & !-- no inter-node transpose needed
273 , nz*4, 1_8, 1_8 & !-- note: all 4 words needed
274 , my, nz*4, nz*4*mx &
275 , mx, nz*4*my, nz*4 )
277 #define RECEIVE
278 #ifdef RECEIVE
280 do stage = 1, num_nodes-1
281 i = 1 + mod( my_node-1+stage, num_nodes )
282 bufr_Y_X(:,:,:,:) = ur(:,:,:,1+(my_node-1)*mx:my_node*mx)[i] !-- inter-node transpose to buffer
283 call copy3 ( bufr_Y_X, u(1,1,first_x,1+(i-1)*my) & !-- intra-node transpose from buffer
284 , nz*4, 1_8, 1_8 &
285 , my, nz*4, nz*4*mx &
286 , mx, nz*4*my, nz*4 )
287 end do
289 #else
291 do stage = 1, num_nodes-1
292 i = 1 + mod( my_node-1+stage, num_nodes )
293 call copy3 ( ur(1,1,first_y,1+(i-1)*mx), bufr_X_Y & !-- intra-node transpose from buffer
294 , nz*4, 1_8, 1_8 &
295 , my, nz*4, nz*4*mx &
296 , mx, nz*4*my, nz*4 )
297 u(:,:,:,1+(my_node-1)*my:my_node*my)[i] = bufr_X_Y(:,:,:,:) !-- inter-node transpose from buffer
298 end do
300 #endif
302 sync all !-- wait for other nodes to finish transpose
303 call cpu_time(tmp)
304 tran_time = tran_time + tmp
306 end subroutine transpose_Y_X
309 end program coarray_distributed_transpose