1 /* Test asynchronous, unstructed data regions, runtime library variant. */
2 /* See also data-2.c. */
10 main (int argc
, char **argv
)
13 float *a
, *b
, *c
, *d
, *e
;
14 void *d_a
, *d_b
, *d_c
, *d_d
;
18 nbytes
= N
* sizeof (float);
20 a
= (float *) malloc (nbytes
);
21 b
= (float *) malloc (nbytes
);
22 c
= (float *) malloc (nbytes
);
23 d
= (float *) malloc (nbytes
);
24 e
= (float *) malloc (nbytes
);
26 for (i
= 0; i
< N
; i
++)
32 acc_copyin_async (a
, nbytes
, acc_async_noval
);
33 acc_copyin_async (b
, nbytes
, acc_async_noval
);
34 acc_copyin_async (&N
, sizeof (int), acc_async_noval
);
36 #pragma acc parallel present (a[0:N], b[0:N], N) async
38 for (i
= 0; i
< N
; i
++)
41 d_a
= acc_deviceptr (a
);
42 acc_memcpy_from_device_async (a
, d_a
, nbytes
, acc_async_noval
);
43 d_b
= acc_deviceptr (b
);
44 acc_memcpy_from_device_async (b
, d_b
, nbytes
, acc_async_noval
);
46 acc_wait (acc_async_noval
);
48 for (i
= 0; i
< N
; i
++)
54 for (i
= 0; i
< N
; i
++)
60 acc_update_device_async (a
, nbytes
, 1);
61 acc_update_device_async (b
, nbytes
, 1);
63 #pragma acc parallel present (a[0:N], b[0:N], N) async (1)
65 for (i
= 0; i
< N
; i
++)
68 acc_memcpy_from_device_async (a
, d_a
, nbytes
, 1);
69 acc_memcpy_from_device_async (b
, d_b
, nbytes
, 1);
72 /* Test unseen async-argument. */
75 for (i
= 0; i
< N
; i
++)
81 for (i
= 0; i
< N
; i
++)
89 acc_update_device_async (a
, nbytes
, 0);
90 acc_update_device_async (b
, nbytes
, 1);
91 acc_copyin_async (c
, nbytes
, 2);
92 acc_copyin_async (d
, nbytes
, 3);
94 #pragma acc parallel present (a[0:N], b[0:N], N) wait (0) async (1)
96 for (i
= 0; i
< N
; i
++)
97 b
[i
] = (a
[i
] * a
[i
] * a
[i
]) / a
[i
];
99 #pragma acc parallel present (a[0:N], c[0:N], N) wait (0) async (2)
101 for (i
= 0; i
< N
; i
++)
102 c
[i
] = (a
[i
] + a
[i
] + a
[i
] + a
[i
]) / a
[i
];
104 #pragma acc parallel present (a[0:N], d[0:N], N) wait (0) async (3)
106 for (i
= 0; i
< N
; i
++)
107 d
[i
] = ((a
[i
] * a
[i
] + a
[i
]) / a
[i
]) - a
[i
];
109 acc_memcpy_from_device_async (a
, d_a
, nbytes
, 0);
110 acc_memcpy_from_device_async (b
, d_b
, nbytes
, 1);
111 d_c
= acc_deviceptr (c
);
112 acc_memcpy_from_device_async (c
, d_c
, nbytes
, 2);
113 d_d
= acc_deviceptr (d
);
114 acc_memcpy_from_device_async (d
, d_d
, nbytes
, 3);
116 acc_wait_all_async (0);
119 for (i
= 0; i
< N
; i
++)
121 assert (a
[i
] == 3.0);
122 assert (b
[i
] == 9.0);
123 assert (c
[i
] == 4.0);
124 assert (d
[i
] == 1.0);
127 for (i
= 0; i
< N
; i
++)
136 acc_update_device_async (a
, nbytes
, 10);
137 acc_update_device_async (b
, nbytes
, 11);
138 acc_update_device_async (c
, nbytes
, 12);
139 acc_update_device_async (d
, nbytes
, 13);
140 acc_copyin_async (e
, nbytes
, 14);
142 #pragma acc parallel present (a[0:N], b[0:N], N) wait (10) async (11)
143 for (int ii
= 0; ii
< N
; ii
++)
144 b
[ii
] = (a
[ii
] * a
[ii
] * a
[ii
]) / a
[ii
];
146 #pragma acc parallel present (a[0:N], c[0:N], N) wait (10) async (12)
147 for (int ii
= 0; ii
< N
; ii
++)
148 c
[ii
] = (a
[ii
] + a
[ii
] + a
[ii
] + a
[ii
]) / a
[ii
];
150 #pragma acc parallel present (a[0:N], d[0:N], N) wait (10) async (13)
151 for (int ii
= 0; ii
< N
; ii
++)
152 d
[ii
] = ((a
[ii
] * a
[ii
] + a
[ii
]) / a
[ii
]) - a
[ii
];
154 #pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N], N) wait (11) wait (12) wait (13) async (14)
155 for (int ii
= 0; ii
< N
; ii
++)
156 e
[ii
] = a
[ii
] + b
[ii
] + c
[ii
] + d
[ii
];
158 acc_copyout_async (a
, nbytes
, 10);
159 acc_copyout_async (b
, nbytes
, 11);
160 acc_copyout_async (c
, nbytes
, 12);
161 acc_copyout_async (d
, nbytes
, 13);
162 acc_copyout_async (e
, nbytes
, 14);
163 acc_delete_async (&N
, sizeof (int), 15);
166 for (i
= 0; i
< N
; i
++)
168 assert (a
[i
] == 2.0);
169 assert (b
[i
] == 4.0);
170 assert (c
[i
] == 4.0);
171 assert (d
[i
] == 1.0);
172 assert (e
[i
] == 11.0);