2 /* Double float has 53 bits of fraction. */
3 #define FRAC (1.0 / (1LL << 48))
6 int close_enough (Type a
, Type b
)
12 return diff
/ a
< FRAC
;
17 static int __attribute__ ((noinline
))
18 vector (Type ary
[N
], Type sum
, Type prod
)
20 Type tsum
= 0, tprod
= 1;
22 #pragma acc parallel vector_length(32) copyin(ary[0:N])
24 #pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
25 for (int ix
= 0; ix
< N
; ix
++)
32 if (!close_enough (sum
, tsum
))
35 if (!close_enough (prod
, tprod
))
41 static int __attribute__ ((noinline
))
42 worker (Type ary
[N
], Type sum
, Type prod
)
44 Type tsum
= 0, tprod
= 1;
46 #pragma acc parallel num_workers(32) copyin(ary[0:N])
48 #pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
49 for (int ix
= 0; ix
< N
; ix
++)
56 if (!close_enough (sum
, tsum
))
59 if (!close_enough (prod
, tprod
))
65 static int __attribute__ ((noinline
))
66 gang (Type ary
[N
], Type sum
, Type prod
)
68 Type tsum
= 0, tprod
= 1;
70 #pragma acc parallel num_gangs (32) copyin(ary[0:N])
72 #pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
73 for (int ix
= 0; ix
< N
; ix
++)
80 if (!close_enough (sum
, tsum
))
83 if (!close_enough (prod
, tprod
))
91 Type ary
[N
], sum
= 0, prod
= 1;
93 for (int ix
= 0; ix
< N
; ix
++)
95 float frac
= ix
* (1.0f
/ 1024) + 1.0f
;
102 if (vector (ary
, sum
, prod
))
105 if (worker (ary
, sum
, prod
))
108 if (gang (ary
, sum
, prod
))