Daily bump.
[official-gcc.git] / libgomp / oacc-parallel.c
blobd8a08e974a23ae0df852ef7efea4f0c65f8ab371
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
42 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
43 continue to support the following two legacy values. */
44 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
45 "legacy GOMP_DEVICE_ICV broken");
46 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
47 == GOACC_FLAG_HOST_FALLBACK,
48 "legacy GOMP_DEVICE_HOST_FALLBACK broken");
51 /* Returns the number of mappings associated with the pointer or pset. PSET
52 have three mappings, whereas pointer have two. */
54 static int
55 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
57 if (pos + 1 >= mapnum)
58 return 0;
60 unsigned char kind = kinds[pos+1] & 0xff;
62 if (kind == GOMP_MAP_TO_PSET)
63 return 3;
64 else if (kind == GOMP_MAP_POINTER)
65 return 2;
67 return 0;
70 /* Handle the mapping pair that are presented when a
71 deviceptr clause is used with Fortran. */
73 static void
74 handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
75 unsigned short *kinds)
77 int i;
79 for (i = 0; i < mapnum; i++)
81 unsigned short kind1 = kinds[i] & 0xff;
83 /* Handle Fortran deviceptr clause. */
84 if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
86 unsigned short kind2;
88 if (i < (signed)mapnum - 1)
89 kind2 = kinds[i + 1] & 0xff;
90 else
91 kind2 = 0xffff;
93 if (sizes[i] == sizeof (void *))
94 continue;
96 /* At this point, we're dealing with a Fortran deviceptr.
97 If the next element is not what we're expecting, then
98 this is an instance of where the deviceptr variable was
99 not used within the region and the pointer was removed
100 by the gimplifier. */
101 if (kind2 == GOMP_MAP_POINTER
102 && sizes[i + 1] == 0
103 && hostaddrs[i] == *(void **)hostaddrs[i + 1])
105 kinds[i+1] = kinds[i];
106 sizes[i+1] = sizeof (void *);
109 /* Invalidate the entry. */
110 hostaddrs[i] = NULL;
115 static void goacc_wait (int async, int num_waits, va_list *ap);
118 /* Launch a possibly offloaded function with FLAGS. FN is the host fn
119 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
120 blocks to be copied to/from the device. Varadic arguments are
121 keyed optional parameters terminated with a zero. */
123 void
124 GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
125 size_t mapnum, void **hostaddrs, size_t *sizes,
126 unsigned short *kinds, ...)
128 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
130 va_list ap;
131 struct goacc_thread *thr;
132 struct gomp_device_descr *acc_dev;
133 struct target_mem_desc *tgt;
134 void **devaddrs;
135 unsigned int i;
136 struct splay_tree_key_s k;
137 splay_tree_key tgt_fn_key;
138 void (*tgt_fn);
139 int async = GOMP_ASYNC_SYNC;
140 unsigned dims[GOMP_DIM_MAX];
141 unsigned tag;
143 #ifdef HAVE_INTTYPES_H
144 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
145 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
146 #else
147 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
148 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
149 #endif
150 goacc_lazy_initialize ();
152 thr = goacc_thread ();
153 acc_dev = thr->dev;
155 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
157 /* Host fallback if "if" clause is false or if the current device is set to
158 the host. */
159 if (flags & GOACC_FLAG_HOST_FALLBACK)
161 goacc_save_and_set_bind (acc_device_host);
162 fn (hostaddrs);
163 goacc_restore_bind ();
164 return;
166 else if (acc_device_type (acc_dev->type) == acc_device_host)
168 fn (hostaddrs);
169 return;
172 /* Default: let the runtime choose. */
173 for (i = 0; i != GOMP_DIM_MAX; i++)
174 dims[i] = 0;
176 va_start (ap, kinds);
177 /* TODO: This will need amending when device_type is implemented. */
178 while ((tag = va_arg (ap, unsigned)) != 0)
180 if (GOMP_LAUNCH_DEVICE (tag))
181 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
182 GOMP_LAUNCH_DEVICE (tag));
184 switch (GOMP_LAUNCH_CODE (tag))
186 case GOMP_LAUNCH_DIM:
188 unsigned mask = GOMP_LAUNCH_OP (tag);
190 for (i = 0; i != GOMP_DIM_MAX; i++)
191 if (mask & GOMP_DIM_MASK (i))
192 dims[i] = va_arg (ap, unsigned);
194 break;
196 case GOMP_LAUNCH_ASYNC:
198 /* Small constant values are encoded in the operand. */
199 async = GOMP_LAUNCH_OP (tag);
201 if (async == GOMP_LAUNCH_OP_MAX)
202 async = va_arg (ap, unsigned);
203 break;
206 case GOMP_LAUNCH_WAIT:
208 unsigned num_waits = GOMP_LAUNCH_OP (tag);
210 if (num_waits)
211 goacc_wait (async, num_waits, &ap);
212 break;
215 default:
216 gomp_fatal ("unrecognized offload code '%d',"
217 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
220 va_end (ap);
222 acc_dev->openacc.async_set_async_func (async);
224 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
226 k.host_start = (uintptr_t) fn;
227 k.host_end = k.host_start + 1;
228 gomp_mutex_lock (&acc_dev->lock);
229 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
230 gomp_mutex_unlock (&acc_dev->lock);
232 if (tgt_fn_key == NULL)
233 gomp_fatal ("target function wasn't mapped");
235 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
237 else
238 tgt_fn = (void (*)) fn;
240 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
241 GOMP_MAP_VARS_OPENACC);
243 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
244 for (i = 0; i < mapnum; i++)
245 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
246 + tgt->list[i].key->tgt_offset
247 + tgt->list[i].offset);
249 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
250 async, dims, tgt);
252 /* If running synchronously, unmap immediately. */
253 bool copyfrom = true;
254 if (async_synchronous_p (async))
255 gomp_unmap_vars (tgt, true);
256 else
258 bool async_unmap = false;
259 for (size_t i = 0; i < tgt->list_count; i++)
261 splay_tree_key k = tgt->list[i].key;
262 if (k && k->refcount == 1)
264 async_unmap = true;
265 break;
268 if (async_unmap)
269 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
270 else
272 copyfrom = false;
273 gomp_unmap_vars (tgt, copyfrom);
277 acc_dev->openacc.async_set_async_func (acc_async_sync);
280 /* Legacy entry point, only provide host execution. */
282 void
283 GOACC_parallel (int flags_m, void (*fn) (void *),
284 size_t mapnum, void **hostaddrs, size_t *sizes,
285 unsigned short *kinds,
286 int num_gangs, int num_workers, int vector_length,
287 int async, int num_waits, ...)
289 goacc_save_and_set_bind (acc_device_host);
290 fn (hostaddrs);
291 goacc_restore_bind ();
294 void
295 GOACC_data_start (int flags_m, size_t mapnum,
296 void **hostaddrs, size_t *sizes, unsigned short *kinds)
298 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
300 struct target_mem_desc *tgt;
302 #ifdef HAVE_INTTYPES_H
303 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
304 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
305 #else
306 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
307 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
308 #endif
310 goacc_lazy_initialize ();
312 struct goacc_thread *thr = goacc_thread ();
313 struct gomp_device_descr *acc_dev = thr->dev;
315 /* Host fallback or 'do nothing'. */
316 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
317 || (flags & GOACC_FLAG_HOST_FALLBACK))
319 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
320 GOMP_MAP_VARS_OPENACC);
321 tgt->prev = thr->mapped_data;
322 thr->mapped_data = tgt;
324 return;
327 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
328 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
329 GOMP_MAP_VARS_OPENACC);
330 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
331 tgt->prev = thr->mapped_data;
332 thr->mapped_data = tgt;
335 void
336 GOACC_data_end (void)
338 struct goacc_thread *thr = goacc_thread ();
339 struct target_mem_desc *tgt = thr->mapped_data;
341 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
342 thr->mapped_data = tgt->prev;
343 gomp_unmap_vars (tgt, true);
344 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
347 void
348 GOACC_enter_exit_data (int flags_m, size_t mapnum,
349 void **hostaddrs, size_t *sizes, unsigned short *kinds,
350 int async, int num_waits, ...)
352 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
354 struct goacc_thread *thr;
355 struct gomp_device_descr *acc_dev;
356 bool data_enter = false;
357 size_t i;
359 goacc_lazy_initialize ();
361 thr = goacc_thread ();
362 acc_dev = thr->dev;
364 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
365 || (flags & GOACC_FLAG_HOST_FALLBACK))
366 return;
368 if (num_waits)
370 va_list ap;
372 va_start (ap, num_waits);
373 goacc_wait (async, num_waits, &ap);
374 va_end (ap);
377 /* Determine whether "finalize" semantics apply to all mappings of this
378 OpenACC directive. */
379 bool finalize = false;
380 if (mapnum > 0)
382 unsigned char kind = kinds[0] & 0xff;
383 if (kind == GOMP_MAP_DELETE
384 || kind == GOMP_MAP_FORCE_FROM)
385 finalize = true;
388 acc_dev->openacc.async_set_async_func (async);
390 /* Determine if this is an "acc enter data". */
391 for (i = 0; i < mapnum; ++i)
393 unsigned char kind = kinds[i] & 0xff;
395 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
396 continue;
398 if (kind == GOMP_MAP_FORCE_ALLOC
399 || kind == GOMP_MAP_FORCE_PRESENT
400 || kind == GOMP_MAP_FORCE_TO
401 || kind == GOMP_MAP_TO
402 || kind == GOMP_MAP_ALLOC)
404 data_enter = true;
405 break;
408 if (kind == GOMP_MAP_RELEASE
409 || kind == GOMP_MAP_DELETE
410 || kind == GOMP_MAP_FROM
411 || kind == GOMP_MAP_FORCE_FROM)
412 break;
414 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
415 kind);
418 /* In c, non-pointers and arrays are represented by a single data clause.
419 Dynamically allocated arrays and subarrays are represented by a data
420 clause followed by an internal GOMP_MAP_POINTER.
422 In fortran, scalars and not allocated arrays are represented by a
423 single data clause. Allocated arrays and subarrays have three mappings:
424 1) the original data clause, 2) a PSET 3) a pointer to the array data.
427 if (data_enter)
429 for (i = 0; i < mapnum; i++)
431 unsigned char kind = kinds[i] & 0xff;
433 /* Scan for pointers and PSETs. */
434 int pointer = find_pointer (i, mapnum, kinds);
436 if (!pointer)
438 switch (kind)
440 case GOMP_MAP_ALLOC:
441 case GOMP_MAP_FORCE_ALLOC:
442 acc_create (hostaddrs[i], sizes[i]);
443 break;
444 case GOMP_MAP_TO:
445 case GOMP_MAP_FORCE_TO:
446 acc_copyin (hostaddrs[i], sizes[i]);
447 break;
448 default:
449 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
450 kind);
451 break;
454 else
456 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
457 &sizes[i], &kinds[i]);
458 /* Increment 'i' by two because OpenACC requires fortran
459 arrays to be contiguous, so each PSET is associated with
460 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
461 one MAP_POINTER. */
462 i += pointer - 1;
466 else
467 for (i = 0; i < mapnum; ++i)
469 unsigned char kind = kinds[i] & 0xff;
471 int pointer = find_pointer (i, mapnum, kinds);
473 if (!pointer)
475 switch (kind)
477 case GOMP_MAP_RELEASE:
478 case GOMP_MAP_DELETE:
479 if (acc_is_present (hostaddrs[i], sizes[i]))
481 if (finalize)
482 acc_delete_finalize (hostaddrs[i], sizes[i]);
483 else
484 acc_delete (hostaddrs[i], sizes[i]);
486 break;
487 case GOMP_MAP_FROM:
488 case GOMP_MAP_FORCE_FROM:
489 if (finalize)
490 acc_copyout_finalize (hostaddrs[i], sizes[i]);
491 else
492 acc_copyout (hostaddrs[i], sizes[i]);
493 break;
494 default:
495 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
496 kind);
497 break;
500 else
502 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
503 || kind == GOMP_MAP_FROM);
504 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
505 finalize, pointer);
506 /* See the above comment. */
507 i += pointer - 1;
511 acc_dev->openacc.async_set_async_func (acc_async_sync);
514 static void
515 goacc_wait (int async, int num_waits, va_list *ap)
517 struct goacc_thread *thr = goacc_thread ();
518 struct gomp_device_descr *acc_dev = thr->dev;
520 while (num_waits--)
522 int qid = va_arg (*ap, int);
524 if (acc_async_test (qid))
525 continue;
527 if (async == acc_async_sync)
528 acc_wait (qid);
529 else if (qid == async)
530 ;/* If we're waiting on the same asynchronous queue as we're
531 launching on, the queue itself will order work as
532 required, so there's no need to wait explicitly. */
533 else
534 acc_dev->openacc.async_wait_async_func (qid, async);
538 void
539 GOACC_update (int flags_m, size_t mapnum,
540 void **hostaddrs, size_t *sizes, unsigned short *kinds,
541 int async, int num_waits, ...)
543 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
545 size_t i;
547 goacc_lazy_initialize ();
549 struct goacc_thread *thr = goacc_thread ();
550 struct gomp_device_descr *acc_dev = thr->dev;
552 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
553 || (flags & GOACC_FLAG_HOST_FALLBACK))
554 return;
556 if (num_waits)
558 va_list ap;
560 va_start (ap, num_waits);
561 goacc_wait (async, num_waits, &ap);
562 va_end (ap);
565 acc_dev->openacc.async_set_async_func (async);
567 bool update_device = false;
568 for (i = 0; i < mapnum; ++i)
570 unsigned char kind = kinds[i] & 0xff;
572 switch (kind)
574 case GOMP_MAP_POINTER:
575 case GOMP_MAP_TO_PSET:
576 break;
578 case GOMP_MAP_ALWAYS_POINTER:
579 if (update_device)
581 /* Save the contents of the host pointer. */
582 void *dptr = acc_deviceptr (hostaddrs[i-1]);
583 uintptr_t t = *(uintptr_t *) hostaddrs[i];
585 /* Update the contents of the host pointer to reflect
586 the value of the allocated device memory in the
587 previous pointer. */
588 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
589 acc_update_device (hostaddrs[i], sizeof (uintptr_t));
591 /* Restore the host pointer. */
592 *(uintptr_t *) hostaddrs[i] = t;
593 update_device = false;
595 break;
597 case GOMP_MAP_TO:
598 if (!acc_is_present (hostaddrs[i], sizes[i]))
600 update_device = false;
601 break;
603 /* Fallthru */
604 case GOMP_MAP_FORCE_TO:
605 update_device = true;
606 acc_update_device (hostaddrs[i], sizes[i]);
607 break;
609 case GOMP_MAP_FROM:
610 if (!acc_is_present (hostaddrs[i], sizes[i]))
612 update_device = false;
613 break;
615 /* Fallthru */
616 case GOMP_MAP_FORCE_FROM:
617 update_device = false;
618 acc_update_self (hostaddrs[i], sizes[i]);
619 break;
621 default:
622 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
623 break;
627 acc_dev->openacc.async_set_async_func (acc_async_sync);
630 void
631 GOACC_wait (int async, int num_waits, ...)
633 if (num_waits)
635 va_list ap;
637 va_start (ap, num_waits);
638 goacc_wait (async, num_waits, &ap);
639 va_end (ap);
641 else if (async == acc_async_sync)
642 acc_wait_all ();
643 else
644 acc_wait_all_async (async);
648 GOACC_get_num_threads (void)
650 return 1;
654 GOACC_get_thread_num (void)
656 return 0;
659 void
660 GOACC_declare (int flags_m, size_t mapnum,
661 void **hostaddrs, size_t *sizes, unsigned short *kinds)
663 int i;
665 for (i = 0; i < mapnum; i++)
667 unsigned char kind = kinds[i] & 0xff;
669 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
670 continue;
672 switch (kind)
674 case GOMP_MAP_FORCE_ALLOC:
675 case GOMP_MAP_FORCE_FROM:
676 case GOMP_MAP_FORCE_TO:
677 case GOMP_MAP_POINTER:
678 case GOMP_MAP_RELEASE:
679 case GOMP_MAP_DELETE:
680 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
681 &kinds[i], GOMP_ASYNC_SYNC, 0);
682 break;
684 case GOMP_MAP_FORCE_DEVICEPTR:
685 break;
687 case GOMP_MAP_ALLOC:
688 if (!acc_is_present (hostaddrs[i], sizes[i]))
689 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
690 &kinds[i], GOMP_ASYNC_SYNC, 0);
691 break;
693 case GOMP_MAP_TO:
694 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
695 &kinds[i], GOMP_ASYNC_SYNC, 0);
697 break;
699 case GOMP_MAP_FROM:
700 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
701 &kinds[i], GOMP_ASYNC_SYNC, 0);
702 break;
704 case GOMP_MAP_FORCE_PRESENT:
705 if (!acc_is_present (hostaddrs[i], sizes[i]))
706 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
707 (unsigned long) sizes[i]);
708 break;
710 default:
711 assert (0);
712 break;