2 #if !defined(__hppa__) || !defined(__hpux__)
6 /* Double float has 53 bits of fraction. */
7 #define FRAC (1.0 / (1LL << 48))
8 typedef double _Complex Type
;
10 int close_enough (Type a
, Type b
)
13 double mag2_a
= __real__(a
) * __real__ (a
) + __imag__ (a
) * __imag__ (a
);
14 double mag2_diff
= (__real__(diff
) * __real__ (diff
)
15 + __imag__ (diff
) * __imag__ (diff
));
17 return mag2_diff
/ mag2_a
< (FRAC
* FRAC
);
22 static int __attribute__ ((noinline
))
23 vector (Type ary
[N
], Type sum
, Type prod
)
25 Type tsum
= 0, tprod
= 1;
27 #pragma acc parallel vector_length(32) copyin(ary[0:N])
29 #pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
30 for (int ix
= 0; ix
< N
; ix
++)
37 if (!close_enough (sum
, tsum
))
40 if (!close_enough (prod
, tprod
))
46 static int __attribute__ ((noinline
))
47 worker (Type ary
[N
], Type sum
, Type prod
)
49 Type tsum
= 0, tprod
= 1;
51 #pragma acc parallel num_workers(32) copyin(ary[0:N])
53 #pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
54 for (int ix
= 0; ix
< N
; ix
++)
61 if (!close_enough (sum
, tsum
))
64 if (!close_enough (prod
, tprod
))
70 static int __attribute__ ((noinline
))
71 gang (Type ary
[N
], Type sum
, Type prod
)
73 Type tsum
= 0, tprod
= 1;
75 #pragma acc parallel num_gangs (32) copyin(ary[0:N])
77 #pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
78 for (int ix
= 0; ix
< N
; ix
++)
85 if (!close_enough (sum
, tsum
))
88 if (!close_enough (prod
, tprod
))
96 Type ary
[N
], sum
= 0, prod
= 1;
98 for (int ix
= 0; ix
< N
; ix
++)
100 double frac
= ix
* (1.0 / 1024) + 1.0;
102 ary
[ix
] = frac
+ frac
* 2.0j
- 1.0j
;
107 if (vector (ary
, sum
, prod
))
110 if (worker (ary
, sum
, prod
))
113 if (gang (ary
, sum
, prod
))