2 ! { dg
-options
"-fcoarray=lib" }
6 ! Contributed by Alessandro Fanfarill
8 ! Reduced version of the full NAS CG benchmark
11 !-------------------------------------------------------------------------!
13 ! N A S P A R A L L E L B E N C H M A R K S
3.3 !
17 !-------------------------------------------------------------------------!
19 ! This benchmark is part of the NAS Parallel Benchmark
3.3 suite
. !
20 ! It is described in NAS Technical Reports
95-020 and
02-007 !
22 ! Permission
to use
, copy
, distribute and modify this software
!
23 ! for any purpose with or without fee is hereby granted
. We
!
24 ! request
, however
, that all derived work reference the NAS
!
25 ! Parallel Benchmarks
3.3. This software is provided
"as is" !
26 ! without express or implied warranty
. !
28 ! Information on NPB
3.3, including the technical report
, the
!
29 ! original specifications
, source code
, results and information
!
30 ! on how
to submit new results
, is available at
: !
32 ! http
://www
.nas
.nasa
.gov
/Software
/NPB
/ !
34 ! Send comments or suggestions
to npb@nas
.nasa
.gov
!
36 ! NAS Parallel Benchmarks Group
!
37 ! NASA Ames Research Center
!
39 ! Moffett Field
, CA
94035-1000 !
41 ! E
-mail
: npb@nas
.nasa
.gov
!
42 ! Fax
: (650) 604-3957 !
44 !-------------------------------------------------------------------------!
47 c---------------------------------------------------------------------
51 c R. F. Van der Wijngaart
54 c---------------------------------------------------------------------
57 c---------------------------------------------------------------------
58 c---------------------------------------------------------------------
60 c---------------------------------------------------------------------
61 c---------------------------------------------------------------------
64 integer na
, nonzer
, niter
65 double precision shift
, rcond
74 integer num_proc_rows
, num_proc_cols
75 parameter( num_proc_rows
= 2, num_proc_cols
= 2)
77 parameter( num_procs
= num_proc_cols
* num_proc_rows
)
80 parameter( nz
= na*
(nonzer
+1)/num_procs*
(nonzer
+1)+nonzer
81 > + na*
(nonzer
+2+num_procs
/256)/num_proc_cols
)
83 common / partit_size
/ naa
, nzz
,
107 common / main_int_mem
/ colidx
, rowstr
,
109 integer colidx
(nz
), rowstr
(na
+1),
110 > iv
(2*na
+1), arow
(nz
), acol
(nz
)
113 c---------------------------------
114 c Coarray Decalarations
115 c---------------------------------
116 double precision v
(na
+1)[0:*], aelt
(nz
)[0:*], a
(nz
)[0:*],
117 > x
(na
/num_proc_rows
+2)[0:*],
118 > z
(na
/num_proc_rows
+2)[0:*],
119 > p
(na
/num_proc_rows
+2)[0:*],
120 > q
(na
/num_proc_rows
+2)[0:*],
121 > r
(na
/num_proc_rows
+2)[0:*],
122 > w
(na
/num_proc_rows
+2)[0:*]
125 common /urando
/ amult
, tran
126 double precision amult
, tran
131 integer reduce_exch_proc
(num_proc_cols
)
132 integer reduce_send_starts
(num_proc_cols
)
133 integer reduce_send_lengths
(num_proc_cols
)
134 integer reduce_recv_lengths
(num_proc_cols
)
135 integer reduce_rrecv_starts
(num_proc_cols
)
136 c---------------------------------
137 c Coarray Decalarations
138 c---------------------------------
139 integer reduce_recv_starts
(num_proc_cols
)[0:*]
141 integer i
, j
, k
, it
, me
, nprocs
, root
143 double precision zeta
, randlc
145 double precision rnorm
146 c---------------------------------
147 c Coarray Decalarations
148 c---------------------------------
149 double precision norm_temp1
(2)[0:*], norm_temp2
(2)[0:*]
151 double precision t
, tmax
, mflops
152 double precision u
(1), umax
(1)
154 double precision timer_read
157 double precision zeta_verify_value
, epsilon
, err
159 c---------------------------------------------------------------------
160 c Explicit interface for conj_grad, due to coarray args
161 c---------------------------------------------------------------------
164 subroutine conj_grad
( colidx
,
176 > reduce_send_starts
,
177 > reduce_send_lengths
,
178 > reduce_recv_starts
,
179 > reduce_recv_lengths
,
180 > reduce_rrecv_starts
)
182 common / partit_size
/ naa
, nzz
,
184 > proc_col
, proc_row
,
196 > proc_col
, proc_row
,
206 double precision x
(*),
209 integer colidx
(nzz
), rowstr
(naa
+1)
211 double precision p
(*),
214 > w
(*)[0:*] ! used as work temporary
217 integer reduce_exch_proc
(l2npcols
)
218 integer reduce_send_starts
(l2npcols
)
219 integer reduce_send_lengths
(l2npcols
)
220 integer reduce_recv_starts
(l2npcols
)[0:*]
221 integer reduce_recv_lengths
(l2npcols
)
222 integer reduce_rrecv_starts
(l2npcols
)
224 double precision rnorm
230 c---------------------------------------------------------------------
231 c The call to the conjugate gradient routine:
232 c---------------------------------------------------------------------
233 call conj_grad
( colidx
,
245 > reduce_send_starts
,
246 > reduce_send_lengths
,
247 > reduce_recv_starts
,
248 > reduce_recv_lengths
,
249 > reduce_rrecv_starts
)
256 c---------------------------------------------------------------------
257 c---------------------------------------------------------------------
258 subroutine conj_grad
( colidx
,
270 > reduce_send_starts
,
271 > reduce_send_lengths
,
272 > reduce_recv_starts
,
273 > reduce_recv_lengths
,
274 > reduce_rrecv_starts
)
275 c---------------------------------------------------------------------
276 c---------------------------------------------------------------------
278 c---------------------------------------------------------------------
279 c Floaging point arrays here are named as in NPB1 spec discussion of
281 c---------------------------------------------------------------------
287 common / partit_size
/ naa
, nzz
,
289 > proc_col
, proc_row
,
300 > proc_col
, proc_row
,
312 double precision x
(*),
315 integer colidx
(nzz
), rowstr
(naa
+1)
317 double precision p
(*),
320 > w
(*)[0:*] ! used as work temporary
323 integer reduce_exch_proc
(l2npcols
)
324 integer reduce_send_starts
(l2npcols
)
325 integer reduce_send_lengths
(l2npcols
)
326 integer reduce_recv_starts
(l2npcols
)[0:*]
327 integer reduce_recv_lengths
(l2npcols
)
328 integer reduce_rrecv_starts
(l2npcols
)
330 integer recv_start_idx
, recv_end_idx
, send_start_idx
,
331 > send_end_idx
, recv_length
333 integer i
, j
, k
, ierr
334 integer cgit
, cgitmax
336 double precision, save
:: d
[0:*], rho
[0:*]
337 double precision sum
, rho0
, alpha
, beta
, rnorm
340 double precision timer_read
346 end ! end of routine conj_grad