[AArch64] Merge stores of D-register values with different modes
[official-gcc.git] / gcc / testsuite / gfortran.dg / coarray_36.f
blobd06a01ec6bc04ad2cdf94d61c039d980f62b78b8
1 ! { dg-do compile }
2 ! { dg-options "-fcoarray=lib" }
4 ! PR fortran/64771
6 ! Contributed by Alessandro Fanfarill
8 ! Reduced version of the full NAS CG benchmark
11 !-------------------------------------------------------------------------!
12 ! !
13 ! N A S P A R A L L E L B E N C H M A R K S 3.3 !
14 ! !
15 ! C G !
16 ! !
17 !-------------------------------------------------------------------------!
18 ! !
19 ! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. !
20 ! It is described in NAS Technical Reports 95-020 and 02-007 !
21 ! !
22 ! Permission to use, copy, distribute and modify this software !
23 ! for any purpose with or without fee is hereby granted. We !
24 ! request, however, that all derived work reference the NAS !
25 ! Parallel Benchmarks 3.3. This software is provided "as is" !
26 ! without express or implied warranty. !
27 ! !
28 ! Information on NPB 3.3, including the technical report, the !
29 ! original specifications, source code, results and information !
30 ! on how to submit new results, is available at: !
31 ! !
32 ! http://www.nas.nasa.gov/Software/NPB/ !
33 ! !
34 ! Send comments or suggestions to npb@nas.nasa.gov !
35 ! !
36 ! NAS Parallel Benchmarks Group !
37 ! NASA Ames Research Center !
38 ! Mail Stop: T27A-1 !
39 ! Moffett Field, CA 94035-1000 !
40 ! !
41 ! E-mail: npb@nas.nasa.gov !
42 ! Fax: (650) 604-3957 !
43 ! !
44 !-------------------------------------------------------------------------!
47 c---------------------------------------------------------------------
49 c Authors: M. Yarrow
50 c C. Kuszmaul
51 c R. F. Van der Wijngaart
52 c H. Jin
54 c---------------------------------------------------------------------
57 c---------------------------------------------------------------------
58 c---------------------------------------------------------------------
59 program cg
60 c---------------------------------------------------------------------
61 c---------------------------------------------------------------------
62 implicit none
64 integer na, nonzer, niter
65 double precision shift, rcond
66 parameter( na=75000,
67 > nonzer=13,
68 > niter=75,
69 > shift=60.,
70 > rcond=1.0d-1 )
74 integer num_proc_rows, num_proc_cols
75 parameter( num_proc_rows = 2, num_proc_cols = 2)
76 integer num_procs
77 parameter( num_procs = num_proc_cols * num_proc_rows )
79 integer nz
80 parameter( nz = na*(nonzer+1)/num_procs*(nonzer+1)+nonzer
81 > + na*(nonzer+2+num_procs/256)/num_proc_cols )
83 common / partit_size / naa, nzz,
84 > npcols, nprows,
85 > proc_col, proc_row,
86 > firstrow,
87 > lastrow,
88 > firstcol,
89 > lastcol,
90 > exch_proc,
91 > exch_recv_length,
92 > send_start,
93 > send_len
94 integer naa, nzz,
95 > npcols, nprows,
96 > proc_col, proc_row,
97 > firstrow,
98 > lastrow,
99 > firstcol,
100 > lastcol,
101 > exch_proc,
102 > exch_recv_length,
103 > send_start,
104 > send_len
107 common / main_int_mem / colidx, rowstr,
108 > iv, arow, acol
109 integer colidx(nz), rowstr(na+1),
110 > iv(2*na+1), arow(nz), acol(nz)
113 c---------------------------------
114 c Coarray Decalarations
115 c---------------------------------
116 double precision v(na+1)[0:*], aelt(nz)[0:*], a(nz)[0:*],
117 > x(na/num_proc_rows+2)[0:*],
118 > z(na/num_proc_rows+2)[0:*],
119 > p(na/num_proc_rows+2)[0:*],
120 > q(na/num_proc_rows+2)[0:*],
121 > r(na/num_proc_rows+2)[0:*],
122 > w(na/num_proc_rows+2)[0:*]
125 common /urando/ amult, tran
126 double precision amult, tran
130 integer l2npcols
131 integer reduce_exch_proc(num_proc_cols)
132 integer reduce_send_starts(num_proc_cols)
133 integer reduce_send_lengths(num_proc_cols)
134 integer reduce_recv_lengths(num_proc_cols)
135 integer reduce_rrecv_starts(num_proc_cols)
136 c---------------------------------
137 c Coarray Decalarations
138 c---------------------------------
139 integer reduce_recv_starts(num_proc_cols)[0:*]
141 integer i, j, k, it, me, nprocs, root
143 double precision zeta, randlc
144 external randlc
145 double precision rnorm
146 c---------------------------------
147 c Coarray Decalarations
148 c---------------------------------
149 double precision norm_temp1(2)[0:*], norm_temp2(2)[0:*]
151 double precision t, tmax, mflops
152 double precision u(1), umax(1)
153 external timer_read
154 double precision timer_read
155 character class
156 logical verified
157 double precision zeta_verify_value, epsilon, err
159 c---------------------------------------------------------------------
160 c Explicit interface for conj_grad, due to coarray args
161 c---------------------------------------------------------------------
162 interface
164 subroutine conj_grad ( colidx,
165 > rowstr,
166 > x,
167 > z,
168 > a,
169 > p,
170 > q,
171 > r,
172 > w,
173 > rnorm,
174 > l2npcols,
175 > reduce_exch_proc,
176 > reduce_send_starts,
177 > reduce_send_lengths,
178 > reduce_recv_starts,
179 > reduce_recv_lengths,
180 > reduce_rrecv_starts )
182 common / partit_size / naa, nzz,
183 > npcols, nprows,
184 > proc_col, proc_row,
185 > firstrow,
186 > lastrow,
187 > firstcol,
188 > lastcol,
189 > exch_proc,
190 > exch_recv_length,
191 > send_start,
192 > send_len
194 integer naa, nzz,
195 > npcols, nprows,
196 > proc_col, proc_row,
197 > firstrow,
198 > lastrow,
199 > firstcol,
200 > lastcol,
201 > exch_proc,
202 > exch_recv_length,
203 > send_start,
204 > send_len
206 double precision x(*),
207 > z(*),
208 > a(nzz)
209 integer colidx(nzz), rowstr(naa+1)
211 double precision p(*),
212 > q(*)[0:*],
213 > r(*)[0:*],
214 > w(*)[0:*] ! used as work temporary
216 integer l2npcols
217 integer reduce_exch_proc(l2npcols)
218 integer reduce_send_starts(l2npcols)
219 integer reduce_send_lengths(l2npcols)
220 integer reduce_recv_starts(l2npcols)[0:*]
221 integer reduce_recv_lengths(l2npcols)
222 integer reduce_rrecv_starts(l2npcols)
224 double precision rnorm
226 end subroutine
228 end interface
230 c---------------------------------------------------------------------
231 c The call to the conjugate gradient routine:
232 c---------------------------------------------------------------------
233 call conj_grad ( colidx,
234 > rowstr,
235 > x,
236 > z,
237 > a,
238 > p,
239 > q,
240 > r,
241 > w,
242 > rnorm,
243 > l2npcols,
244 > reduce_exch_proc,
245 > reduce_send_starts,
246 > reduce_send_lengths,
247 > reduce_recv_starts,
248 > reduce_recv_lengths,
249 > reduce_rrecv_starts )
252 sync all
254 end ! end main
256 c---------------------------------------------------------------------
257 c---------------------------------------------------------------------
258 subroutine conj_grad ( colidx,
259 > rowstr,
260 > x,
261 > z,
262 > a,
263 > p,
264 > q,
265 > r,
266 > w,
267 > rnorm,
268 > l2npcols,
269 > reduce_exch_proc,
270 > reduce_send_starts,
271 > reduce_send_lengths,
272 > reduce_recv_starts,
273 > reduce_recv_lengths,
274 > reduce_rrecv_starts )
275 c---------------------------------------------------------------------
276 c---------------------------------------------------------------------
278 c---------------------------------------------------------------------
279 c Floaging point arrays here are named as in NPB1 spec discussion of
280 c CG algorithm
281 c---------------------------------------------------------------------
283 implicit none
285 c include 'cafnpb.h'
287 common / partit_size / naa, nzz,
288 > npcols, nprows,
289 > proc_col, proc_row,
290 > firstrow,
291 > lastrow,
292 > firstcol,
293 > lastcol,
294 > exch_proc,
295 > exch_recv_length,
296 > send_start,
297 > send_len
298 integer naa, nzz,
299 > npcols, nprows,
300 > proc_col, proc_row,
301 > firstrow,
302 > lastrow,
303 > firstcol,
304 > lastcol,
305 > exch_proc,
306 > exch_recv_length,
307 > send_start,
308 > send_len
312 double precision x(*),
313 > z(*),
314 > a(nzz)
315 integer colidx(nzz), rowstr(naa+1)
317 double precision p(*),
318 > q(*)[0:*],
319 > r(*)[0:*],
320 > w(*)[0:*] ! used as work temporary
322 integer l2npcols
323 integer reduce_exch_proc(l2npcols)
324 integer reduce_send_starts(l2npcols)
325 integer reduce_send_lengths(l2npcols)
326 integer reduce_recv_starts(l2npcols)[0:*]
327 integer reduce_recv_lengths(l2npcols)
328 integer reduce_rrecv_starts(l2npcols)
330 integer recv_start_idx, recv_end_idx, send_start_idx,
331 > send_end_idx, recv_length
333 integer i, j, k, ierr
334 integer cgit, cgitmax
336 double precision, save :: d[0:*], rho[0:*]
337 double precision sum, rho0, alpha, beta, rnorm
339 external timer_read
340 double precision timer_read
342 data cgitmax / 25 /
345 return
346 end ! end of routine conj_grad