PR c++/59366
[official-gcc.git] / libgomp / oacc-parallel.c
blob6d5386b1f942d7383c9c1f0c721467c0d8a6e41f
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <string.h>
35 #include <stdarg.h>
36 #include <assert.h>
37 #include <alloca.h>
39 static int
40 find_pset (int pos, size_t mapnum, unsigned short *kinds)
42 if (pos + 1 >= mapnum)
43 return 0;
45 unsigned char kind = kinds[pos+1] & 0xff;
47 return kind == GOMP_MAP_TO_PSET;
51 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
52 plugins have been loaded if appropriate). The ACC_dev variable for the
53 current thread will be set appropriately for the given device type on
54 return. */
56 attribute_hidden void
57 select_acc_device (int device_type)
59 goacc_lazy_initialize ();
61 if (device_type == GOMP_DEVICE_HOST_FALLBACK)
62 return;
64 if (device_type == acc_device_none)
65 device_type = acc_device_host;
67 if (device_type >= 0)
69 /* NOTE: this will go badly if the surrounding data environment is set up
70 to use a different device type. We'll just have to trust that users
71 know what they're doing... */
72 acc_set_device_type (device_type);
76 static void goacc_wait (int async, int num_waits, va_list ap);
78 void
79 GOACC_parallel (int device, void (*fn) (void *), const void *offload_table,
80 size_t mapnum, void **hostaddrs, size_t *sizes,
81 unsigned short *kinds,
82 int num_gangs, int num_workers, int vector_length,
83 int async, int num_waits, ...)
85 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
86 va_list ap;
87 struct goacc_thread *thr;
88 struct gomp_device_descr *acc_dev;
89 struct target_mem_desc *tgt;
90 void **devaddrs;
91 unsigned int i;
92 struct splay_tree_key_s k;
93 splay_tree_key tgt_fn_key;
94 void (*tgt_fn);
96 if (num_gangs != 1)
97 gomp_fatal ("num_gangs (%d) different from one is not yet supported",
98 num_gangs);
99 if (num_workers != 1)
100 gomp_fatal ("num_workers (%d) different from one is not yet supported",
101 num_workers);
103 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
104 __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async);
106 select_acc_device (device);
108 thr = goacc_thread ();
109 acc_dev = thr->dev;
111 /* Host fallback if "if" clause is false or if the current device is set to
112 the host. */
113 if (host_fallback)
115 goacc_save_and_set_bind (acc_device_host);
116 fn (hostaddrs);
117 goacc_restore_bind ();
118 return;
120 else if (acc_device_type (acc_dev->type) == acc_device_host)
122 fn (hostaddrs);
123 return;
126 va_start (ap, num_waits);
128 if (num_waits > 0)
129 goacc_wait (async, num_waits, ap);
131 va_end (ap);
133 acc_dev->openacc.async_set_async_func (async);
135 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
137 k.host_start = (uintptr_t) fn;
138 k.host_end = k.host_start + 1;
139 gomp_mutex_lock (&acc_dev->mem_map.lock);
140 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
141 gomp_mutex_unlock (&acc_dev->mem_map.lock);
143 if (tgt_fn_key == NULL)
144 gomp_fatal ("target function wasn't mapped");
146 tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
148 else
149 tgt_fn = (void (*)) fn;
151 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
152 false);
154 devaddrs = alloca (sizeof (void *) * mapnum);
155 for (i = 0; i < mapnum; i++)
156 devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
157 + tgt->list[i]->tgt_offset);
159 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
160 num_gangs, num_workers, vector_length, async,
161 tgt);
163 /* If running synchronously, unmap immediately. */
164 if (async < acc_async_noval)
165 gomp_unmap_vars (tgt, true);
166 else
168 gomp_copy_from_async (tgt);
169 acc_dev->openacc.register_async_cleanup_func (tgt);
172 acc_dev->openacc.async_set_async_func (acc_async_sync);
175 void
176 GOACC_data_start (int device, const void *offload_table, size_t mapnum,
177 void **hostaddrs, size_t *sizes, unsigned short *kinds)
179 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
180 struct target_mem_desc *tgt;
182 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
183 __FUNCTION__, mapnum, hostaddrs, sizes, kinds);
185 select_acc_device (device);
187 struct goacc_thread *thr = goacc_thread ();
188 struct gomp_device_descr *acc_dev = thr->dev;
190 /* Host fallback or 'do nothing'. */
191 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
192 || host_fallback)
194 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
195 tgt->prev = thr->mapped_data;
196 thr->mapped_data = tgt;
198 return;
201 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
202 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
203 false);
204 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
205 tgt->prev = thr->mapped_data;
206 thr->mapped_data = tgt;
209 void
210 GOACC_data_end (void)
212 struct goacc_thread *thr = goacc_thread ();
213 struct target_mem_desc *tgt = thr->mapped_data;
215 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
216 thr->mapped_data = tgt->prev;
217 gomp_unmap_vars (tgt, true);
218 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
221 void
222 GOACC_enter_exit_data (int device, const void *offload_table, size_t mapnum,
223 void **hostaddrs, size_t *sizes, unsigned short *kinds,
224 int async, int num_waits, ...)
226 struct goacc_thread *thr;
227 struct gomp_device_descr *acc_dev;
228 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
229 bool data_enter = false;
230 size_t i;
232 select_acc_device (device);
234 thr = goacc_thread ();
235 acc_dev = thr->dev;
237 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
238 || host_fallback)
239 return;
241 if (num_waits > 0)
243 va_list ap;
245 va_start (ap, num_waits);
247 goacc_wait (async, num_waits, ap);
249 va_end (ap);
252 acc_dev->openacc.async_set_async_func (async);
254 /* Determine if this is an "acc enter data". */
255 for (i = 0; i < mapnum; ++i)
257 unsigned char kind = kinds[i] & 0xff;
259 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
260 continue;
262 if (kind == GOMP_MAP_FORCE_ALLOC
263 || kind == GOMP_MAP_FORCE_PRESENT
264 || kind == GOMP_MAP_FORCE_TO)
266 data_enter = true;
267 break;
270 if (kind == GOMP_MAP_FORCE_DEALLOC
271 || kind == GOMP_MAP_FORCE_FROM)
272 break;
274 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
275 kind);
278 if (data_enter)
280 for (i = 0; i < mapnum; i++)
282 unsigned char kind = kinds[i] & 0xff;
284 /* Scan for PSETs. */
285 int psets = find_pset (i, mapnum, kinds);
287 if (!psets)
289 switch (kind)
291 case GOMP_MAP_POINTER:
292 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
293 &kinds[i]);
294 break;
295 case GOMP_MAP_FORCE_ALLOC:
296 acc_create (hostaddrs[i], sizes[i]);
297 break;
298 case GOMP_MAP_FORCE_PRESENT:
299 acc_present_or_copyin (hostaddrs[i], sizes[i]);
300 break;
301 case GOMP_MAP_FORCE_TO:
302 acc_present_or_copyin (hostaddrs[i], sizes[i]);
303 break;
304 default:
305 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
306 kind);
307 break;
310 else
312 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
313 /* Increment 'i' by two because OpenACC requires fortran
314 arrays to be contiguous, so each PSET is associated with
315 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
316 one MAP_POINTER. */
317 i += 2;
321 else
322 for (i = 0; i < mapnum; ++i)
324 unsigned char kind = kinds[i] & 0xff;
326 int psets = find_pset (i, mapnum, kinds);
328 if (!psets)
330 switch (kind)
332 case GOMP_MAP_POINTER:
333 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
334 == GOMP_MAP_FORCE_FROM,
335 async, 1);
336 break;
337 case GOMP_MAP_FORCE_DEALLOC:
338 acc_delete (hostaddrs[i], sizes[i]);
339 break;
340 case GOMP_MAP_FORCE_FROM:
341 acc_copyout (hostaddrs[i], sizes[i]);
342 break;
343 default:
344 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
345 kind);
346 break;
349 else
351 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
352 == GOMP_MAP_FORCE_FROM, async, 3);
353 /* See the above comment. */
354 i += 2;
358 acc_dev->openacc.async_set_async_func (acc_async_sync);
361 static void
362 goacc_wait (int async, int num_waits, va_list ap)
364 struct goacc_thread *thr = goacc_thread ();
365 struct gomp_device_descr *acc_dev = thr->dev;
366 int i;
368 assert (num_waits >= 0);
370 if (async == acc_async_sync && num_waits == 0)
372 acc_wait_all ();
373 return;
376 if (async == acc_async_sync && num_waits)
378 for (i = 0; i < num_waits; i++)
380 int qid = va_arg (ap, int);
382 if (acc_async_test (qid))
383 continue;
385 acc_wait (qid);
387 return;
390 if (async == acc_async_noval && num_waits == 0)
392 acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
393 return;
396 for (i = 0; i < num_waits; i++)
398 int qid = va_arg (ap, int);
400 if (acc_async_test (qid))
401 continue;
403 /* If we're waiting on the same asynchronous queue as we're launching on,
404 the queue itself will order work as required, so there's no need to
405 wait explicitly. */
406 if (qid != async)
407 acc_dev->openacc.async_wait_async_func (qid, async);
411 void
412 GOACC_update (int device, const void *offload_table, size_t mapnum,
413 void **hostaddrs, size_t *sizes, unsigned short *kinds,
414 int async, int num_waits, ...)
416 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
417 size_t i;
419 select_acc_device (device);
421 struct goacc_thread *thr = goacc_thread ();
422 struct gomp_device_descr *acc_dev = thr->dev;
424 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
425 || host_fallback)
426 return;
428 if (num_waits > 0)
430 va_list ap;
432 va_start (ap, num_waits);
434 goacc_wait (async, num_waits, ap);
436 va_end (ap);
439 acc_dev->openacc.async_set_async_func (async);
441 for (i = 0; i < mapnum; ++i)
443 unsigned char kind = kinds[i] & 0xff;
445 switch (kind)
447 case GOMP_MAP_POINTER:
448 case GOMP_MAP_TO_PSET:
449 break;
451 case GOMP_MAP_FORCE_TO:
452 acc_update_device (hostaddrs[i], sizes[i]);
453 break;
455 case GOMP_MAP_FORCE_FROM:
456 acc_update_self (hostaddrs[i], sizes[i]);
457 break;
459 default:
460 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
461 break;
465 acc_dev->openacc.async_set_async_func (acc_async_sync);
468 void
469 GOACC_wait (int async, int num_waits, ...)
471 va_list ap;
473 va_start (ap, num_waits);
475 goacc_wait (async, num_waits, ap);
477 va_end (ap);
481 GOACC_get_num_threads (void)
483 return 1;
487 GOACC_get_thread_num (void)
489 return 0;