2018-08-29 Richard Biener <rguenther@suse.de>
[official-gcc.git] / libgomp / oacc-parallel.c
blobb80ace585907d2a45063b48fd49cc87f3412d119
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
41 /* Returns the number of mappings associated with the pointer or pset. PSET
42 have three mappings, whereas pointer have two. */
44 static int
45 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
47 if (pos + 1 >= mapnum)
48 return 0;
50 unsigned char kind = kinds[pos+1] & 0xff;
52 if (kind == GOMP_MAP_TO_PSET)
53 return 3;
54 else if (kind == GOMP_MAP_POINTER)
55 return 2;
57 return 0;
60 /* Handle the mapping pair that are presented when a
61 deviceptr clause is used with Fortran. */
63 static void
64 handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
65 unsigned short *kinds)
67 int i;
69 for (i = 0; i < mapnum; i++)
71 unsigned short kind1 = kinds[i] & 0xff;
73 /* Handle Fortran deviceptr clause. */
74 if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
76 unsigned short kind2;
78 if (i < (signed)mapnum - 1)
79 kind2 = kinds[i + 1] & 0xff;
80 else
81 kind2 = 0xffff;
83 if (sizes[i] == sizeof (void *))
84 continue;
86 /* At this point, we're dealing with a Fortran deviceptr.
87 If the next element is not what we're expecting, then
88 this is an instance of where the deviceptr variable was
89 not used within the region and the pointer was removed
90 by the gimplifier. */
91 if (kind2 == GOMP_MAP_POINTER
92 && sizes[i + 1] == 0
93 && hostaddrs[i] == *(void **)hostaddrs[i + 1])
95 kinds[i+1] = kinds[i];
96 sizes[i+1] = sizeof (void *);
99 /* Invalidate the entry. */
100 hostaddrs[i] = NULL;
105 static void goacc_wait (int async, int num_waits, va_list *ap);
108 /* Launch a possibly offloaded function on DEVICE. FN is the host fn
109 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
110 blocks to be copied to/from the device. Varadic arguments are
111 keyed optional parameters terminated with a zero. */
113 void
114 GOACC_parallel_keyed (int device, void (*fn) (void *),
115 size_t mapnum, void **hostaddrs, size_t *sizes,
116 unsigned short *kinds, ...)
118 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
119 va_list ap;
120 struct goacc_thread *thr;
121 struct gomp_device_descr *acc_dev;
122 struct target_mem_desc *tgt;
123 void **devaddrs;
124 unsigned int i;
125 struct splay_tree_key_s k;
126 splay_tree_key tgt_fn_key;
127 void (*tgt_fn);
128 int async = GOMP_ASYNC_SYNC;
129 unsigned dims[GOMP_DIM_MAX];
130 unsigned tag;
132 #ifdef HAVE_INTTYPES_H
133 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
134 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
135 #else
136 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
137 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
138 #endif
139 goacc_lazy_initialize ();
141 thr = goacc_thread ();
142 acc_dev = thr->dev;
144 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
146 /* Host fallback if "if" clause is false or if the current device is set to
147 the host. */
148 if (host_fallback)
150 goacc_save_and_set_bind (acc_device_host);
151 fn (hostaddrs);
152 goacc_restore_bind ();
153 return;
155 else if (acc_device_type (acc_dev->type) == acc_device_host)
157 fn (hostaddrs);
158 return;
161 /* Default: let the runtime choose. */
162 for (i = 0; i != GOMP_DIM_MAX; i++)
163 dims[i] = 0;
165 va_start (ap, kinds);
166 /* TODO: This will need amending when device_type is implemented. */
167 while ((tag = va_arg (ap, unsigned)) != 0)
169 if (GOMP_LAUNCH_DEVICE (tag))
170 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
171 GOMP_LAUNCH_DEVICE (tag));
173 switch (GOMP_LAUNCH_CODE (tag))
175 case GOMP_LAUNCH_DIM:
177 unsigned mask = GOMP_LAUNCH_OP (tag);
179 for (i = 0; i != GOMP_DIM_MAX; i++)
180 if (mask & GOMP_DIM_MASK (i))
181 dims[i] = va_arg (ap, unsigned);
183 break;
185 case GOMP_LAUNCH_ASYNC:
187 /* Small constant values are encoded in the operand. */
188 async = GOMP_LAUNCH_OP (tag);
190 if (async == GOMP_LAUNCH_OP_MAX)
191 async = va_arg (ap, unsigned);
192 break;
195 case GOMP_LAUNCH_WAIT:
197 unsigned num_waits = GOMP_LAUNCH_OP (tag);
199 if (num_waits)
200 goacc_wait (async, num_waits, &ap);
201 break;
204 default:
205 gomp_fatal ("unrecognized offload code '%d',"
206 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
209 va_end (ap);
211 acc_dev->openacc.async_set_async_func (async);
213 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
215 k.host_start = (uintptr_t) fn;
216 k.host_end = k.host_start + 1;
217 gomp_mutex_lock (&acc_dev->lock);
218 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
219 gomp_mutex_unlock (&acc_dev->lock);
221 if (tgt_fn_key == NULL)
222 gomp_fatal ("target function wasn't mapped");
224 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
226 else
227 tgt_fn = (void (*)) fn;
229 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
230 GOMP_MAP_VARS_OPENACC);
232 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
233 for (i = 0; i < mapnum; i++)
234 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
235 + tgt->list[i].key->tgt_offset);
237 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
238 async, dims, tgt);
240 /* If running synchronously, unmap immediately. */
241 bool copyfrom = true;
242 if (async_synchronous_p (async))
243 gomp_unmap_vars (tgt, true);
244 else
246 bool async_unmap = false;
247 for (size_t i = 0; i < tgt->list_count; i++)
249 splay_tree_key k = tgt->list[i].key;
250 if (k && k->refcount == 1)
252 async_unmap = true;
253 break;
256 if (async_unmap)
257 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
258 else
260 copyfrom = false;
261 gomp_unmap_vars (tgt, copyfrom);
265 acc_dev->openacc.async_set_async_func (acc_async_sync);
268 /* Legacy entry point, only provide host execution. */
270 void
271 GOACC_parallel (int device, void (*fn) (void *),
272 size_t mapnum, void **hostaddrs, size_t *sizes,
273 unsigned short *kinds,
274 int num_gangs, int num_workers, int vector_length,
275 int async, int num_waits, ...)
277 goacc_save_and_set_bind (acc_device_host);
278 fn (hostaddrs);
279 goacc_restore_bind ();
282 void
283 GOACC_data_start (int device, size_t mapnum,
284 void **hostaddrs, size_t *sizes, unsigned short *kinds)
286 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
287 struct target_mem_desc *tgt;
289 #ifdef HAVE_INTTYPES_H
290 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
291 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
292 #else
293 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
294 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
295 #endif
297 goacc_lazy_initialize ();
299 struct goacc_thread *thr = goacc_thread ();
300 struct gomp_device_descr *acc_dev = thr->dev;
302 /* Host fallback or 'do nothing'. */
303 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
304 || host_fallback)
306 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
307 GOMP_MAP_VARS_OPENACC);
308 tgt->prev = thr->mapped_data;
309 thr->mapped_data = tgt;
311 return;
314 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
315 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
316 GOMP_MAP_VARS_OPENACC);
317 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
318 tgt->prev = thr->mapped_data;
319 thr->mapped_data = tgt;
322 void
323 GOACC_data_end (void)
325 struct goacc_thread *thr = goacc_thread ();
326 struct target_mem_desc *tgt = thr->mapped_data;
328 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
329 thr->mapped_data = tgt->prev;
330 gomp_unmap_vars (tgt, true);
331 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
334 void
335 GOACC_enter_exit_data (int device, size_t mapnum,
336 void **hostaddrs, size_t *sizes, unsigned short *kinds,
337 int async, int num_waits, ...)
339 struct goacc_thread *thr;
340 struct gomp_device_descr *acc_dev;
341 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
342 bool data_enter = false;
343 size_t i;
345 goacc_lazy_initialize ();
347 thr = goacc_thread ();
348 acc_dev = thr->dev;
350 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
351 || host_fallback)
352 return;
354 if (num_waits)
356 va_list ap;
358 va_start (ap, num_waits);
359 goacc_wait (async, num_waits, &ap);
360 va_end (ap);
363 /* Determine whether "finalize" semantics apply to all mappings of this
364 OpenACC directive. */
365 bool finalize = false;
366 if (mapnum > 0)
368 unsigned char kind = kinds[0] & 0xff;
369 if (kind == GOMP_MAP_DELETE
370 || kind == GOMP_MAP_FORCE_FROM)
371 finalize = true;
374 acc_dev->openacc.async_set_async_func (async);
376 /* Determine if this is an "acc enter data". */
377 for (i = 0; i < mapnum; ++i)
379 unsigned char kind = kinds[i] & 0xff;
381 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
382 continue;
384 if (kind == GOMP_MAP_FORCE_ALLOC
385 || kind == GOMP_MAP_FORCE_PRESENT
386 || kind == GOMP_MAP_FORCE_TO
387 || kind == GOMP_MAP_TO
388 || kind == GOMP_MAP_ALLOC)
390 data_enter = true;
391 break;
394 if (kind == GOMP_MAP_RELEASE
395 || kind == GOMP_MAP_DELETE
396 || kind == GOMP_MAP_FROM
397 || kind == GOMP_MAP_FORCE_FROM)
398 break;
400 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
401 kind);
404 /* In c, non-pointers and arrays are represented by a single data clause.
405 Dynamically allocated arrays and subarrays are represented by a data
406 clause followed by an internal GOMP_MAP_POINTER.
408 In fortran, scalars and not allocated arrays are represented by a
409 single data clause. Allocated arrays and subarrays have three mappings:
410 1) the original data clause, 2) a PSET 3) a pointer to the array data.
413 if (data_enter)
415 for (i = 0; i < mapnum; i++)
417 unsigned char kind = kinds[i] & 0xff;
419 /* Scan for pointers and PSETs. */
420 int pointer = find_pointer (i, mapnum, kinds);
422 if (!pointer)
424 switch (kind)
426 case GOMP_MAP_ALLOC:
427 acc_present_or_create (hostaddrs[i], sizes[i]);
428 break;
429 case GOMP_MAP_FORCE_ALLOC:
430 acc_create (hostaddrs[i], sizes[i]);
431 break;
432 case GOMP_MAP_TO:
433 acc_present_or_copyin (hostaddrs[i], sizes[i]);
434 break;
435 case GOMP_MAP_FORCE_TO:
436 acc_copyin (hostaddrs[i], sizes[i]);
437 break;
438 default:
439 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
440 kind);
441 break;
444 else
446 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
447 &sizes[i], &kinds[i]);
448 /* Increment 'i' by two because OpenACC requires fortran
449 arrays to be contiguous, so each PSET is associated with
450 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
451 one MAP_POINTER. */
452 i += pointer - 1;
456 else
457 for (i = 0; i < mapnum; ++i)
459 unsigned char kind = kinds[i] & 0xff;
461 int pointer = find_pointer (i, mapnum, kinds);
463 if (!pointer)
465 switch (kind)
467 case GOMP_MAP_RELEASE:
468 case GOMP_MAP_DELETE:
469 if (acc_is_present (hostaddrs[i], sizes[i]))
471 if (finalize)
472 acc_delete_finalize (hostaddrs[i], sizes[i]);
473 else
474 acc_delete (hostaddrs[i], sizes[i]);
476 break;
477 case GOMP_MAP_FROM:
478 case GOMP_MAP_FORCE_FROM:
479 if (finalize)
480 acc_copyout_finalize (hostaddrs[i], sizes[i]);
481 else
482 acc_copyout (hostaddrs[i], sizes[i]);
483 break;
484 default:
485 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
486 kind);
487 break;
490 else
492 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
493 || kind == GOMP_MAP_FROM);
494 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
495 finalize, pointer);
496 /* See the above comment. */
497 i += pointer - 1;
501 acc_dev->openacc.async_set_async_func (acc_async_sync);
504 static void
505 goacc_wait (int async, int num_waits, va_list *ap)
507 struct goacc_thread *thr = goacc_thread ();
508 struct gomp_device_descr *acc_dev = thr->dev;
510 while (num_waits--)
512 int qid = va_arg (*ap, int);
514 if (acc_async_test (qid))
515 continue;
517 if (async == acc_async_sync)
518 acc_wait (qid);
519 else if (qid == async)
520 ;/* If we're waiting on the same asynchronous queue as we're
521 launching on, the queue itself will order work as
522 required, so there's no need to wait explicitly. */
523 else
524 acc_dev->openacc.async_wait_async_func (qid, async);
528 void
529 GOACC_update (int device, size_t mapnum,
530 void **hostaddrs, size_t *sizes, unsigned short *kinds,
531 int async, int num_waits, ...)
533 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
534 size_t i;
536 goacc_lazy_initialize ();
538 struct goacc_thread *thr = goacc_thread ();
539 struct gomp_device_descr *acc_dev = thr->dev;
541 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
542 || host_fallback)
543 return;
545 if (num_waits)
547 va_list ap;
549 va_start (ap, num_waits);
550 goacc_wait (async, num_waits, &ap);
551 va_end (ap);
554 acc_dev->openacc.async_set_async_func (async);
556 bool update_device = false;
557 for (i = 0; i < mapnum; ++i)
559 unsigned char kind = kinds[i] & 0xff;
561 switch (kind)
563 case GOMP_MAP_POINTER:
564 case GOMP_MAP_TO_PSET:
565 break;
567 case GOMP_MAP_ALWAYS_POINTER:
568 if (update_device)
570 /* Save the contents of the host pointer. */
571 void *dptr = acc_deviceptr (hostaddrs[i-1]);
572 uintptr_t t = *(uintptr_t *) hostaddrs[i];
574 /* Update the contents of the host pointer to reflect
575 the value of the allocated device memory in the
576 previous pointer. */
577 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
578 acc_update_device (hostaddrs[i], sizeof (uintptr_t));
580 /* Restore the host pointer. */
581 *(uintptr_t *) hostaddrs[i] = t;
582 update_device = false;
584 break;
586 case GOMP_MAP_TO:
587 if (!acc_is_present (hostaddrs[i], sizes[i]))
589 update_device = false;
590 break;
592 /* Fallthru */
593 case GOMP_MAP_FORCE_TO:
594 update_device = true;
595 acc_update_device (hostaddrs[i], sizes[i]);
596 break;
598 case GOMP_MAP_FROM:
599 if (!acc_is_present (hostaddrs[i], sizes[i]))
601 update_device = false;
602 break;
604 /* Fallthru */
605 case GOMP_MAP_FORCE_FROM:
606 update_device = false;
607 acc_update_self (hostaddrs[i], sizes[i]);
608 break;
610 default:
611 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
612 break;
616 acc_dev->openacc.async_set_async_func (acc_async_sync);
619 void
620 GOACC_wait (int async, int num_waits, ...)
622 if (num_waits)
624 va_list ap;
626 va_start (ap, num_waits);
627 goacc_wait (async, num_waits, &ap);
628 va_end (ap);
630 else if (async == acc_async_sync)
631 acc_wait_all ();
632 else if (async == acc_async_noval)
633 goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
637 GOACC_get_num_threads (void)
639 return 1;
643 GOACC_get_thread_num (void)
645 return 0;
648 void
649 GOACC_declare (int device, size_t mapnum,
650 void **hostaddrs, size_t *sizes, unsigned short *kinds)
652 int i;
654 for (i = 0; i < mapnum; i++)
656 unsigned char kind = kinds[i] & 0xff;
658 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
659 continue;
661 switch (kind)
663 case GOMP_MAP_FORCE_ALLOC:
664 case GOMP_MAP_FORCE_FROM:
665 case GOMP_MAP_FORCE_TO:
666 case GOMP_MAP_POINTER:
667 case GOMP_MAP_RELEASE:
668 case GOMP_MAP_DELETE:
669 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
670 &kinds[i], GOMP_ASYNC_SYNC, 0);
671 break;
673 case GOMP_MAP_FORCE_DEVICEPTR:
674 break;
676 case GOMP_MAP_ALLOC:
677 if (!acc_is_present (hostaddrs[i], sizes[i]))
678 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
679 &kinds[i], GOMP_ASYNC_SYNC, 0);
680 break;
682 case GOMP_MAP_TO:
683 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
684 &kinds[i], GOMP_ASYNC_SYNC, 0);
686 break;
688 case GOMP_MAP_FROM:
689 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
690 &kinds[i], GOMP_ASYNC_SYNC, 0);
691 break;
693 case GOMP_MAP_FORCE_PRESENT:
694 if (!acc_is_present (hostaddrs[i], sizes[i]))
695 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
696 (unsigned long) sizes[i]);
697 break;
699 default:
700 assert (0);
701 break;