svn merge -r 219682:220584 svn+ssh://gcc.gnu.org/svn/gcc/trunk
[official-gcc.git] / libgomp / oacc-parallel.c
blob727fced3ef7e9941fdd03e9d9fa64dff2eac48fa
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <string.h>
35 #include <stdarg.h>
36 #include <assert.h>
38 static int
39 find_pset (int pos, size_t mapnum, unsigned short *kinds)
41 if (pos + 1 >= mapnum)
42 return 0;
44 unsigned char kind = kinds[pos+1] & 0xff;
46 return kind == GOMP_MAP_TO_PSET;
50 /* Ensure that the target device for DEVICE_TYPE is initialised (and that
51 plugins have been loaded if appropriate). The ACC_dev variable for the
52 current thread will be set appropriately for the given device type on
53 return. */
55 attribute_hidden void
56 select_acc_device (int device_type)
58 goacc_lazy_initialize ();
60 if (device_type == GOMP_DEVICE_HOST_FALLBACK)
61 return;
63 if (device_type == acc_device_none)
64 device_type = acc_device_host;
66 if (device_type >= 0)
68 /* NOTE: this will go badly if the surrounding data environment is set up
69 to use a different device type. We'll just have to trust that users
70 know what they're doing... */
71 acc_set_device_type (device_type);
75 static void goacc_wait (int async, int num_waits, va_list ap);
77 void
78 GOACC_parallel (int device, void (*fn) (void *),
79 size_t mapnum, void **hostaddrs, size_t *sizes,
80 unsigned short *kinds,
81 int num_gangs, int num_workers, int vector_length,
82 int async, int num_waits, ...)
84 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
85 va_list ap;
86 struct goacc_thread *thr;
87 struct gomp_device_descr *acc_dev;
88 struct target_mem_desc *tgt;
89 void **devaddrs;
90 unsigned int i;
91 struct splay_tree_key_s k;
92 splay_tree_key tgt_fn_key;
93 void (*tgt_fn);
95 if (num_workers != 1)
96 gomp_fatal ("num_workers (%d) different from one is not yet supported",
97 num_workers);
99 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
100 __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async);
102 select_acc_device (device);
104 thr = goacc_thread ();
105 acc_dev = thr->dev;
107 /* Host fallback if "if" clause is false or if the current device is set to
108 the host. */
109 if (host_fallback)
111 goacc_save_and_set_bind (acc_device_host);
112 fn (hostaddrs);
113 goacc_restore_bind ();
114 return;
116 else if (acc_device_type (acc_dev->type) == acc_device_host)
118 fn (hostaddrs);
119 return;
122 va_start (ap, num_waits);
124 if (num_waits > 0)
125 goacc_wait (async, num_waits, ap);
127 va_end (ap);
129 acc_dev->openacc.async_set_async_func (async);
131 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
133 k.host_start = (uintptr_t) fn;
134 k.host_end = k.host_start + 1;
135 gomp_mutex_lock (&acc_dev->mem_map.lock);
136 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
137 gomp_mutex_unlock (&acc_dev->mem_map.lock);
139 if (tgt_fn_key == NULL)
140 gomp_fatal ("target function wasn't mapped");
142 tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
144 else
145 tgt_fn = (void (*)) fn;
147 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
148 false);
150 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
151 for (i = 0; i < mapnum; i++)
152 devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
153 + tgt->list[i]->tgt_offset);
155 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
156 num_gangs, num_workers, vector_length, async,
157 tgt);
159 /* If running synchronously, unmap immediately. */
160 if (async < acc_async_noval)
161 gomp_unmap_vars (tgt, true);
162 else
164 gomp_copy_from_async (tgt);
165 acc_dev->openacc.register_async_cleanup_func (tgt);
168 acc_dev->openacc.async_set_async_func (acc_async_sync);
171 void
172 GOACC_data_start (int device, size_t mapnum,
173 void **hostaddrs, size_t *sizes, unsigned short *kinds)
175 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
176 struct target_mem_desc *tgt;
178 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
179 __FUNCTION__, mapnum, hostaddrs, sizes, kinds);
181 select_acc_device (device);
183 struct goacc_thread *thr = goacc_thread ();
184 struct gomp_device_descr *acc_dev = thr->dev;
186 /* Host fallback or 'do nothing'. */
187 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
188 || host_fallback)
190 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
191 tgt->prev = thr->mapped_data;
192 thr->mapped_data = tgt;
194 return;
197 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
198 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
199 false);
200 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
201 tgt->prev = thr->mapped_data;
202 thr->mapped_data = tgt;
205 void
206 GOACC_data_end (void)
208 struct goacc_thread *thr = goacc_thread ();
209 struct target_mem_desc *tgt = thr->mapped_data;
211 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
212 thr->mapped_data = tgt->prev;
213 gomp_unmap_vars (tgt, true);
214 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
217 void
218 GOACC_enter_exit_data (int device, size_t mapnum,
219 void **hostaddrs, size_t *sizes, unsigned short *kinds,
220 int async, int num_waits, ...)
222 struct goacc_thread *thr;
223 struct gomp_device_descr *acc_dev;
224 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
225 bool data_enter = false;
226 size_t i;
228 select_acc_device (device);
230 thr = goacc_thread ();
231 acc_dev = thr->dev;
233 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
234 || host_fallback)
235 return;
237 if (num_waits > 0)
239 va_list ap;
241 va_start (ap, num_waits);
243 goacc_wait (async, num_waits, ap);
245 va_end (ap);
248 acc_dev->openacc.async_set_async_func (async);
250 /* Determine if this is an "acc enter data". */
251 for (i = 0; i < mapnum; ++i)
253 unsigned char kind = kinds[i] & 0xff;
255 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
256 continue;
258 if (kind == GOMP_MAP_FORCE_ALLOC
259 || kind == GOMP_MAP_FORCE_PRESENT
260 || kind == GOMP_MAP_FORCE_TO)
262 data_enter = true;
263 break;
266 if (kind == GOMP_MAP_FORCE_DEALLOC
267 || kind == GOMP_MAP_FORCE_FROM)
268 break;
270 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
271 kind);
274 if (data_enter)
276 for (i = 0; i < mapnum; i++)
278 unsigned char kind = kinds[i] & 0xff;
280 /* Scan for PSETs. */
281 int psets = find_pset (i, mapnum, kinds);
283 if (!psets)
285 switch (kind)
287 case GOMP_MAP_POINTER:
288 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
289 &kinds[i]);
290 break;
291 case GOMP_MAP_FORCE_ALLOC:
292 acc_create (hostaddrs[i], sizes[i]);
293 break;
294 case GOMP_MAP_FORCE_PRESENT:
295 acc_present_or_copyin (hostaddrs[i], sizes[i]);
296 break;
297 case GOMP_MAP_FORCE_TO:
298 acc_present_or_copyin (hostaddrs[i], sizes[i]);
299 break;
300 default:
301 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
302 kind);
303 break;
306 else
308 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
309 /* Increment 'i' by two because OpenACC requires fortran
310 arrays to be contiguous, so each PSET is associated with
311 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
312 one MAP_POINTER. */
313 i += 2;
317 else
318 for (i = 0; i < mapnum; ++i)
320 unsigned char kind = kinds[i] & 0xff;
322 int psets = find_pset (i, mapnum, kinds);
324 if (!psets)
326 switch (kind)
328 case GOMP_MAP_POINTER:
329 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
330 == GOMP_MAP_FORCE_FROM,
331 async, 1);
332 break;
333 case GOMP_MAP_FORCE_DEALLOC:
334 acc_delete (hostaddrs[i], sizes[i]);
335 break;
336 case GOMP_MAP_FORCE_FROM:
337 acc_copyout (hostaddrs[i], sizes[i]);
338 break;
339 default:
340 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
341 kind);
342 break;
345 else
347 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
348 == GOMP_MAP_FORCE_FROM, async, 3);
349 /* See the above comment. */
350 i += 2;
354 acc_dev->openacc.async_set_async_func (acc_async_sync);
357 void
358 GOACC_kernels (int device, void (*fn) (void *),
359 size_t mapnum, void **hostaddrs, size_t *sizes,
360 unsigned short *kinds,
361 int num_gangs, int num_workers, int vector_length,
362 int async, int num_waits, ...)
364 gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
365 __FUNCTION__, mapnum, hostaddrs, sizes, kinds);
367 va_list ap;
369 select_acc_device (device);
371 va_start (ap, num_waits);
373 if (num_waits > 0)
374 goacc_wait (async, num_waits, ap);
376 va_end (ap);
378 GOACC_parallel (device, fn, mapnum, hostaddrs, sizes, kinds,
379 num_gangs, num_workers, vector_length, async, 0);
382 static void
383 goacc_wait (int async, int num_waits, va_list ap)
385 struct goacc_thread *thr = goacc_thread ();
386 struct gomp_device_descr *acc_dev = thr->dev;
387 int i;
389 assert (num_waits >= 0);
391 if (async == acc_async_sync && num_waits == 0)
393 acc_wait_all ();
394 return;
397 if (async == acc_async_sync && num_waits)
399 for (i = 0; i < num_waits; i++)
401 int qid = va_arg (ap, int);
403 if (acc_async_test (qid))
404 continue;
406 acc_wait (qid);
408 return;
411 if (async == acc_async_noval && num_waits == 0)
413 acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
414 return;
417 for (i = 0; i < num_waits; i++)
419 int qid = va_arg (ap, int);
421 if (acc_async_test (qid))
422 continue;
424 /* If we're waiting on the same asynchronous queue as we're launching on,
425 the queue itself will order work as required, so there's no need to
426 wait explicitly. */
427 if (qid != async)
428 acc_dev->openacc.async_wait_async_func (qid, async);
432 void
433 GOACC_update (int device, size_t mapnum,
434 void **hostaddrs, size_t *sizes, unsigned short *kinds,
435 int async, int num_waits, ...)
437 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
438 size_t i;
440 select_acc_device (device);
442 struct goacc_thread *thr = goacc_thread ();
443 struct gomp_device_descr *acc_dev = thr->dev;
445 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
446 || host_fallback)
447 return;
449 if (num_waits > 0)
451 va_list ap;
453 va_start (ap, num_waits);
455 goacc_wait (async, num_waits, ap);
457 va_end (ap);
460 acc_dev->openacc.async_set_async_func (async);
462 for (i = 0; i < mapnum; ++i)
464 unsigned char kind = kinds[i] & 0xff;
466 switch (kind)
468 case GOMP_MAP_POINTER:
469 case GOMP_MAP_TO_PSET:
470 break;
472 case GOMP_MAP_FORCE_TO:
473 acc_update_device (hostaddrs[i], sizes[i]);
474 break;
476 case GOMP_MAP_FORCE_FROM:
477 acc_update_self (hostaddrs[i], sizes[i]);
478 break;
480 default:
481 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
482 break;
486 acc_dev->openacc.async_set_async_func (acc_async_sync);
489 void
490 GOACC_wait (int async, int num_waits, ...)
492 va_list ap;
494 va_start (ap, num_waits);
496 goacc_wait (async, num_waits, ap);
498 va_end (ap);
502 GOACC_get_num_threads (void)
504 return 1;
508 GOACC_get_thread_num (void)
510 return 0;