gcc/ChangeLog:
[official-gcc.git] / libgomp / oacc-parallel.c
blob1e08af70b4daaceab1870bc0f7f3fa25694ba520
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
41 /* Returns the number of mappings associated with the pointer or pset. PSET
42 have three mappings, whereas pointer have two. */
44 static int
45 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
47 if (pos + 1 >= mapnum)
48 return 0;
50 unsigned char kind = kinds[pos+1] & 0xff;
52 if (kind == GOMP_MAP_TO_PSET)
53 return 3;
54 else if (kind == GOMP_MAP_POINTER)
55 return 2;
57 return 0;
60 /* Handle the mapping pair that are presented when a
61 deviceptr clause is used with Fortran. */
63 static void
64 handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
65 unsigned short *kinds)
67 int i;
69 for (i = 0; i < mapnum; i++)
71 unsigned short kind1 = kinds[i] & 0xff;
73 /* Handle Fortran deviceptr clause. */
74 if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
76 unsigned short kind2;
78 if (i < (signed)mapnum - 1)
79 kind2 = kinds[i + 1] & 0xff;
80 else
81 kind2 = 0xffff;
83 if (sizes[i] == sizeof (void *))
84 continue;
86 /* At this point, we're dealing with a Fortran deviceptr.
87 If the next element is not what we're expecting, then
88 this is an instance of where the deviceptr variable was
89 not used within the region and the pointer was removed
90 by the gimplifier. */
91 if (kind2 == GOMP_MAP_POINTER
92 && sizes[i + 1] == 0
93 && hostaddrs[i] == *(void **)hostaddrs[i + 1])
95 kinds[i+1] = kinds[i];
96 sizes[i+1] = sizeof (void *);
99 /* Invalidate the entry. */
100 hostaddrs[i] = NULL;
105 static void goacc_wait (int async, int num_waits, va_list *ap);
108 /* Launch a possibly offloaded function on DEVICE. FN is the host fn
109 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
110 blocks to be copied to/from the device. Varadic arguments are
111 keyed optional parameters terminated with a zero. */
113 void
114 GOACC_parallel_keyed (int device, void (*fn) (void *),
115 size_t mapnum, void **hostaddrs, size_t *sizes,
116 unsigned short *kinds, ...)
118 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
119 va_list ap;
120 struct goacc_thread *thr;
121 struct gomp_device_descr *acc_dev;
122 struct target_mem_desc *tgt;
123 void **devaddrs;
124 unsigned int i;
125 struct splay_tree_key_s k;
126 splay_tree_key tgt_fn_key;
127 void (*tgt_fn);
128 int async = GOMP_ASYNC_SYNC;
129 unsigned dims[GOMP_DIM_MAX];
130 unsigned tag;
132 #ifdef HAVE_INTTYPES_H
133 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
134 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
135 #else
136 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
137 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
138 #endif
139 goacc_lazy_initialize ();
141 thr = goacc_thread ();
142 acc_dev = thr->dev;
144 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
146 /* Host fallback if "if" clause is false or if the current device is set to
147 the host. */
148 if (host_fallback)
150 goacc_save_and_set_bind (acc_device_host);
151 fn (hostaddrs);
152 goacc_restore_bind ();
153 return;
155 else if (acc_device_type (acc_dev->type) == acc_device_host)
157 fn (hostaddrs);
158 return;
161 /* Default: let the runtime choose. */
162 for (i = 0; i != GOMP_DIM_MAX; i++)
163 dims[i] = 0;
165 va_start (ap, kinds);
166 /* TODO: This will need amending when device_type is implemented. */
167 while ((tag = va_arg (ap, unsigned)) != 0)
169 if (GOMP_LAUNCH_DEVICE (tag))
170 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
171 GOMP_LAUNCH_DEVICE (tag));
173 switch (GOMP_LAUNCH_CODE (tag))
175 case GOMP_LAUNCH_DIM:
177 unsigned mask = GOMP_LAUNCH_OP (tag);
179 for (i = 0; i != GOMP_DIM_MAX; i++)
180 if (mask & GOMP_DIM_MASK (i))
181 dims[i] = va_arg (ap, unsigned);
183 break;
185 case GOMP_LAUNCH_ASYNC:
187 /* Small constant values are encoded in the operand. */
188 async = GOMP_LAUNCH_OP (tag);
190 if (async == GOMP_LAUNCH_OP_MAX)
191 async = va_arg (ap, unsigned);
192 break;
195 case GOMP_LAUNCH_WAIT:
197 unsigned num_waits = GOMP_LAUNCH_OP (tag);
199 if (num_waits)
200 goacc_wait (async, num_waits, &ap);
201 break;
204 default:
205 gomp_fatal ("unrecognized offload code '%d',"
206 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
209 va_end (ap);
211 acc_dev->openacc.async_set_async_func (async);
213 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
215 k.host_start = (uintptr_t) fn;
216 k.host_end = k.host_start + 1;
217 gomp_mutex_lock (&acc_dev->lock);
218 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
219 gomp_mutex_unlock (&acc_dev->lock);
221 if (tgt_fn_key == NULL)
222 gomp_fatal ("target function wasn't mapped");
224 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
226 else
227 tgt_fn = (void (*)) fn;
229 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
230 GOMP_MAP_VARS_OPENACC);
232 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
233 for (i = 0; i < mapnum; i++)
234 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
235 + tgt->list[i].key->tgt_offset
236 + tgt->list[i].offset);
238 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
239 async, dims, tgt);
241 /* If running synchronously, unmap immediately. */
242 bool copyfrom = true;
243 if (async_synchronous_p (async))
244 gomp_unmap_vars (tgt, true);
245 else
247 bool async_unmap = false;
248 for (size_t i = 0; i < tgt->list_count; i++)
250 splay_tree_key k = tgt->list[i].key;
251 if (k && k->refcount == 1)
253 async_unmap = true;
254 break;
257 if (async_unmap)
258 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
259 else
261 copyfrom = false;
262 gomp_unmap_vars (tgt, copyfrom);
266 acc_dev->openacc.async_set_async_func (acc_async_sync);
269 /* Legacy entry point, only provide host execution. */
271 void
272 GOACC_parallel (int device, void (*fn) (void *),
273 size_t mapnum, void **hostaddrs, size_t *sizes,
274 unsigned short *kinds,
275 int num_gangs, int num_workers, int vector_length,
276 int async, int num_waits, ...)
278 goacc_save_and_set_bind (acc_device_host);
279 fn (hostaddrs);
280 goacc_restore_bind ();
283 void
284 GOACC_data_start (int device, size_t mapnum,
285 void **hostaddrs, size_t *sizes, unsigned short *kinds)
287 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
288 struct target_mem_desc *tgt;
290 #ifdef HAVE_INTTYPES_H
291 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
292 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
293 #else
294 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
295 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
296 #endif
298 goacc_lazy_initialize ();
300 struct goacc_thread *thr = goacc_thread ();
301 struct gomp_device_descr *acc_dev = thr->dev;
303 /* Host fallback or 'do nothing'. */
304 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
305 || host_fallback)
307 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
308 GOMP_MAP_VARS_OPENACC);
309 tgt->prev = thr->mapped_data;
310 thr->mapped_data = tgt;
312 return;
315 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
316 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
317 GOMP_MAP_VARS_OPENACC);
318 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
319 tgt->prev = thr->mapped_data;
320 thr->mapped_data = tgt;
323 void
324 GOACC_data_end (void)
326 struct goacc_thread *thr = goacc_thread ();
327 struct target_mem_desc *tgt = thr->mapped_data;
329 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
330 thr->mapped_data = tgt->prev;
331 gomp_unmap_vars (tgt, true);
332 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
335 void
336 GOACC_enter_exit_data (int device, size_t mapnum,
337 void **hostaddrs, size_t *sizes, unsigned short *kinds,
338 int async, int num_waits, ...)
340 struct goacc_thread *thr;
341 struct gomp_device_descr *acc_dev;
342 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
343 bool data_enter = false;
344 size_t i;
346 goacc_lazy_initialize ();
348 thr = goacc_thread ();
349 acc_dev = thr->dev;
351 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
352 || host_fallback)
353 return;
355 if (num_waits)
357 va_list ap;
359 va_start (ap, num_waits);
360 goacc_wait (async, num_waits, &ap);
361 va_end (ap);
364 /* Determine whether "finalize" semantics apply to all mappings of this
365 OpenACC directive. */
366 bool finalize = false;
367 if (mapnum > 0)
369 unsigned char kind = kinds[0] & 0xff;
370 if (kind == GOMP_MAP_DELETE
371 || kind == GOMP_MAP_FORCE_FROM)
372 finalize = true;
375 acc_dev->openacc.async_set_async_func (async);
377 /* Determine if this is an "acc enter data". */
378 for (i = 0; i < mapnum; ++i)
380 unsigned char kind = kinds[i] & 0xff;
382 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
383 continue;
385 if (kind == GOMP_MAP_FORCE_ALLOC
386 || kind == GOMP_MAP_FORCE_PRESENT
387 || kind == GOMP_MAP_FORCE_TO
388 || kind == GOMP_MAP_TO
389 || kind == GOMP_MAP_ALLOC)
391 data_enter = true;
392 break;
395 if (kind == GOMP_MAP_RELEASE
396 || kind == GOMP_MAP_DELETE
397 || kind == GOMP_MAP_FROM
398 || kind == GOMP_MAP_FORCE_FROM)
399 break;
401 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
402 kind);
405 /* In c, non-pointers and arrays are represented by a single data clause.
406 Dynamically allocated arrays and subarrays are represented by a data
407 clause followed by an internal GOMP_MAP_POINTER.
409 In fortran, scalars and not allocated arrays are represented by a
410 single data clause. Allocated arrays and subarrays have three mappings:
411 1) the original data clause, 2) a PSET 3) a pointer to the array data.
414 if (data_enter)
416 for (i = 0; i < mapnum; i++)
418 unsigned char kind = kinds[i] & 0xff;
420 /* Scan for pointers and PSETs. */
421 int pointer = find_pointer (i, mapnum, kinds);
423 if (!pointer)
425 switch (kind)
427 case GOMP_MAP_ALLOC:
428 acc_present_or_create (hostaddrs[i], sizes[i]);
429 break;
430 case GOMP_MAP_FORCE_ALLOC:
431 acc_create (hostaddrs[i], sizes[i]);
432 break;
433 case GOMP_MAP_TO:
434 acc_present_or_copyin (hostaddrs[i], sizes[i]);
435 break;
436 case GOMP_MAP_FORCE_TO:
437 acc_copyin (hostaddrs[i], sizes[i]);
438 break;
439 default:
440 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
441 kind);
442 break;
445 else
447 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
448 &sizes[i], &kinds[i]);
449 /* Increment 'i' by two because OpenACC requires fortran
450 arrays to be contiguous, so each PSET is associated with
451 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
452 one MAP_POINTER. */
453 i += pointer - 1;
457 else
458 for (i = 0; i < mapnum; ++i)
460 unsigned char kind = kinds[i] & 0xff;
462 int pointer = find_pointer (i, mapnum, kinds);
464 if (!pointer)
466 switch (kind)
468 case GOMP_MAP_RELEASE:
469 case GOMP_MAP_DELETE:
470 if (acc_is_present (hostaddrs[i], sizes[i]))
472 if (finalize)
473 acc_delete_finalize (hostaddrs[i], sizes[i]);
474 else
475 acc_delete (hostaddrs[i], sizes[i]);
477 break;
478 case GOMP_MAP_FROM:
479 case GOMP_MAP_FORCE_FROM:
480 if (finalize)
481 acc_copyout_finalize (hostaddrs[i], sizes[i]);
482 else
483 acc_copyout (hostaddrs[i], sizes[i]);
484 break;
485 default:
486 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
487 kind);
488 break;
491 else
493 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
494 || kind == GOMP_MAP_FROM);
495 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
496 finalize, pointer);
497 /* See the above comment. */
498 i += pointer - 1;
502 acc_dev->openacc.async_set_async_func (acc_async_sync);
505 static void
506 goacc_wait (int async, int num_waits, va_list *ap)
508 struct goacc_thread *thr = goacc_thread ();
509 struct gomp_device_descr *acc_dev = thr->dev;
511 while (num_waits--)
513 int qid = va_arg (*ap, int);
515 if (acc_async_test (qid))
516 continue;
518 if (async == acc_async_sync)
519 acc_wait (qid);
520 else if (qid == async)
521 ;/* If we're waiting on the same asynchronous queue as we're
522 launching on, the queue itself will order work as
523 required, so there's no need to wait explicitly. */
524 else
525 acc_dev->openacc.async_wait_async_func (qid, async);
529 void
530 GOACC_update (int device, size_t mapnum,
531 void **hostaddrs, size_t *sizes, unsigned short *kinds,
532 int async, int num_waits, ...)
534 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
535 size_t i;
537 goacc_lazy_initialize ();
539 struct goacc_thread *thr = goacc_thread ();
540 struct gomp_device_descr *acc_dev = thr->dev;
542 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
543 || host_fallback)
544 return;
546 if (num_waits)
548 va_list ap;
550 va_start (ap, num_waits);
551 goacc_wait (async, num_waits, &ap);
552 va_end (ap);
555 acc_dev->openacc.async_set_async_func (async);
557 bool update_device = false;
558 for (i = 0; i < mapnum; ++i)
560 unsigned char kind = kinds[i] & 0xff;
562 switch (kind)
564 case GOMP_MAP_POINTER:
565 case GOMP_MAP_TO_PSET:
566 break;
568 case GOMP_MAP_ALWAYS_POINTER:
569 if (update_device)
571 /* Save the contents of the host pointer. */
572 void *dptr = acc_deviceptr (hostaddrs[i-1]);
573 uintptr_t t = *(uintptr_t *) hostaddrs[i];
575 /* Update the contents of the host pointer to reflect
576 the value of the allocated device memory in the
577 previous pointer. */
578 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
579 acc_update_device (hostaddrs[i], sizeof (uintptr_t));
581 /* Restore the host pointer. */
582 *(uintptr_t *) hostaddrs[i] = t;
583 update_device = false;
585 break;
587 case GOMP_MAP_TO:
588 if (!acc_is_present (hostaddrs[i], sizes[i]))
590 update_device = false;
591 break;
593 /* Fallthru */
594 case GOMP_MAP_FORCE_TO:
595 update_device = true;
596 acc_update_device (hostaddrs[i], sizes[i]);
597 break;
599 case GOMP_MAP_FROM:
600 if (!acc_is_present (hostaddrs[i], sizes[i]))
602 update_device = false;
603 break;
605 /* Fallthru */
606 case GOMP_MAP_FORCE_FROM:
607 update_device = false;
608 acc_update_self (hostaddrs[i], sizes[i]);
609 break;
611 default:
612 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
613 break;
617 acc_dev->openacc.async_set_async_func (acc_async_sync);
620 void
621 GOACC_wait (int async, int num_waits, ...)
623 if (num_waits)
625 va_list ap;
627 va_start (ap, num_waits);
628 goacc_wait (async, num_waits, &ap);
629 va_end (ap);
631 else if (async == acc_async_sync)
632 acc_wait_all ();
633 else if (async == acc_async_noval)
634 goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
638 GOACC_get_num_threads (void)
640 return 1;
644 GOACC_get_thread_num (void)
646 return 0;
649 void
650 GOACC_declare (int device, size_t mapnum,
651 void **hostaddrs, size_t *sizes, unsigned short *kinds)
653 int i;
655 for (i = 0; i < mapnum; i++)
657 unsigned char kind = kinds[i] & 0xff;
659 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
660 continue;
662 switch (kind)
664 case GOMP_MAP_FORCE_ALLOC:
665 case GOMP_MAP_FORCE_FROM:
666 case GOMP_MAP_FORCE_TO:
667 case GOMP_MAP_POINTER:
668 case GOMP_MAP_RELEASE:
669 case GOMP_MAP_DELETE:
670 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
671 &kinds[i], GOMP_ASYNC_SYNC, 0);
672 break;
674 case GOMP_MAP_FORCE_DEVICEPTR:
675 break;
677 case GOMP_MAP_ALLOC:
678 if (!acc_is_present (hostaddrs[i], sizes[i]))
679 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
680 &kinds[i], GOMP_ASYNC_SYNC, 0);
681 break;
683 case GOMP_MAP_TO:
684 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
685 &kinds[i], GOMP_ASYNC_SYNC, 0);
687 break;
689 case GOMP_MAP_FROM:
690 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
691 &kinds[i], GOMP_ASYNC_SYNC, 0);
692 break;
694 case GOMP_MAP_FORCE_PRESENT:
695 if (!acc_is_present (hostaddrs[i], sizes[i]))
696 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
697 (unsigned long) sizes[i]);
698 break;
700 default:
701 assert (0);
702 break;