1 /* This test is similar to data-2.c, but it uses acc_* library functions
11 main (int argc
, char **argv
)
13 int N
= 128; //1024 * 1024;
14 float *a
, *b
, *c
, *d
, *e
;
15 void *d_a
, *d_b
, *d_c
, *d_d
;
19 nbytes
= N
* sizeof (float);
21 a
= (float *) malloc (nbytes
);
22 b
= (float *) malloc (nbytes
);
23 c
= (float *) malloc (nbytes
);
24 d
= (float *) malloc (nbytes
);
25 e
= (float *) malloc (nbytes
);
27 for (i
= 0; i
< N
; i
++)
33 d_a
= acc_copyin (a
, nbytes
);
34 d_b
= acc_copyin (b
, nbytes
);
35 acc_copyin (&N
, sizeof (int));
37 #pragma acc parallel present (a[0:N], b[0:N], N) async wait
39 for (i
= 0; i
< N
; i
++)
44 acc_memcpy_from_device (a
, d_a
, nbytes
);
45 acc_memcpy_from_device (b
, d_b
, nbytes
);
47 for (i
= 0; i
< N
; i
++)
53 for (i
= 0; i
< N
; i
++)
59 acc_update_device (a
, nbytes
);
60 acc_update_device (b
, nbytes
);
62 #pragma acc parallel present (a[0:N], b[0:N], N) async (1)
64 for (i
= 0; i
< N
; i
++)
69 acc_memcpy_from_device (a
, d_a
, nbytes
);
70 acc_memcpy_from_device (b
, d_b
, nbytes
);
72 for (i
= 0; i
< N
; i
++)
78 for (i
= 0; i
< N
; i
++)
86 acc_update_device (a
, nbytes
);
87 acc_update_device (b
, nbytes
);
88 d_c
= acc_copyin (c
, nbytes
);
89 d_d
= acc_copyin (d
, nbytes
);
91 #pragma acc parallel present (a[0:N], b[0:N], N) async (1)
93 for (i
= 0; i
< N
; i
++)
94 b
[i
] = (a
[i
] * a
[i
] * a
[i
]) / a
[i
];
96 #pragma acc parallel present (a[0:N], c[0:N], N) async (2)
98 for (i
= 0; i
< N
; i
++)
99 c
[i
] = (a
[i
] + a
[i
] + a
[i
] + a
[i
]) / a
[i
];
101 #pragma acc parallel present (a[0:N], d[0:N], N) async (3)
103 for (i
= 0; i
< N
; i
++)
104 d
[i
] = ((a
[i
] * a
[i
] + a
[i
]) / a
[i
]) - a
[i
];
108 acc_memcpy_from_device (a
, d_a
, nbytes
);
109 acc_memcpy_from_device (b
, d_b
, nbytes
);
110 acc_memcpy_from_device (c
, d_c
, nbytes
);
111 acc_memcpy_from_device (d
, d_d
, nbytes
);
113 for (i
= 0; i
< N
; i
++)
128 for (i
= 0; i
< N
; i
++)
137 acc_update_device (a
, nbytes
);
138 acc_update_device (b
, nbytes
);
139 acc_update_device (c
, nbytes
);
140 acc_update_device (d
, nbytes
);
141 acc_copyin (e
, nbytes
);
143 #pragma acc parallel present (a[0:N], b[0:N], N) async (1)
144 for (int ii
= 0; ii
< N
; ii
++)
145 b
[ii
] = (a
[ii
] * a
[ii
] * a
[ii
]) / a
[ii
];
147 #pragma acc parallel present (a[0:N], c[0:N], N) async (2)
148 for (int ii
= 0; ii
< N
; ii
++)
149 c
[ii
] = (a
[ii
] + a
[ii
] + a
[ii
] + a
[ii
]) / a
[ii
];
151 #pragma acc parallel present (a[0:N], d[0:N], N) async (3)
152 for (int ii
= 0; ii
< N
; ii
++)
153 d
[ii
] = ((a
[ii
] * a
[ii
] + a
[ii
]) / a
[ii
]) - a
[ii
];
155 #pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N], N) \
157 for (int ii
= 0; ii
< N
; ii
++)
158 e
[ii
] = a
[ii
] + b
[ii
] + c
[ii
] + d
[ii
];
161 acc_copyout (a
, nbytes
);
162 acc_copyout (b
, nbytes
);
163 acc_copyout (c
, nbytes
);
164 acc_copyout (d
, nbytes
);
165 acc_copyout (e
, nbytes
);
166 acc_delete (&N
, sizeof (int));
168 for (i
= 0; i
< N
; i
++)