gcc/testsuite
[official-gcc.git] / libgomp / oacc-parallel.c
blob70758bcb9a2cdef3a081bc2eee08d3125e188801
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
41 /* Returns the number of mappings associated with the pointer or pset. PSET
42 have three mappings, whereas pointer have two. */
44 static int
45 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
47 if (pos + 1 >= mapnum)
48 return 0;
50 unsigned char kind = kinds[pos+1] & 0xff;
52 if (kind == GOMP_MAP_TO_PSET)
53 return 3;
54 else if (kind == GOMP_MAP_POINTER)
55 return 2;
57 return 0;
60 static void *__goacc_host_ganglocal_ptr;
62 void *
63 GOACC_get_ganglocal_ptr (void)
65 return __goacc_host_ganglocal_ptr;
68 static void
69 alloc_host_shared_mem (size_t shared_size)
71 if (shared_size > 0)
72 __goacc_host_ganglocal_ptr = malloc (shared_size);
75 static void
76 free_host_shared_mem (void)
78 if (__goacc_host_ganglocal_ptr)
80 free (__goacc_host_ganglocal_ptr);
81 __goacc_host_ganglocal_ptr = NULL;
85 static void
86 alloc_ganglocal_addrs (size_t mapnum, void **hostaddrs, size_t *sizes,
87 unsigned short *kinds)
89 int i;
90 const int typemask = 0xff;
91 void *t;
93 for (i = 0; i < mapnum; i++)
95 if ((kinds[i] & typemask) == GOMP_MAP_FORCE_TO_GANGLOCAL)
97 t = malloc (sizes[i]);
98 memcpy (t, hostaddrs[i], sizes[i]);
99 hostaddrs[i] = t;
101 if (i + 1 < mapnum && GOMP_MAP_POINTER_P (kinds[i+1] & typemask))
103 size_t *ptr = (size_t *) malloc (sizeof (size_t *));
104 *ptr = (size_t)t;
105 hostaddrs[i+1] = ptr;
106 i++;
112 static struct oacc_static
114 void *addr;
115 size_t size;
116 unsigned short mask;
117 bool free;
118 struct oacc_static *next;
119 } *oacc_statics;
121 static bool alloc_done = false;
123 void
124 goacc_allocate_static (acc_device_t d)
126 struct oacc_static *s;
128 if (alloc_done)
129 assert (0);
131 for (s = oacc_statics; s; s = s->next)
133 void *d;
135 switch (s->mask)
137 case GOMP_MAP_FORCE_ALLOC:
138 break;
140 case GOMP_MAP_FORCE_TO:
141 d = acc_deviceptr (s->addr);
142 acc_memcpy_to_device (d, s->addr, s->size);
143 break;
145 case GOMP_MAP_FORCE_DEVICEPTR:
146 case GOMP_MAP_DEVICE_RESIDENT:
147 case GOMP_MAP_LINK:
148 break;
150 default:
151 assert (0);
152 break;
156 alloc_done = true;
159 void
160 goacc_deallocate_static (acc_device_t d)
162 struct oacc_static *s;
163 unsigned short mask = GOMP_MAP_FORCE_DEALLOC;
165 if (!alloc_done)
166 return;
168 for (s = oacc_statics; s; s = s->next)
169 GOACC_enter_exit_data (d, 1, &s->addr, &s->size, &mask, 0, 0);
171 alloc_done = false;
174 static void goacc_wait (int async, int num_waits, va_list ap);
176 void
177 GOACC_parallel (int device, void (*fn) (void *),
178 size_t mapnum, void **hostaddrs, size_t *sizes,
179 unsigned short *kinds,
180 int num_gangs, int num_workers, int vector_length,
181 size_t shared_size,
182 int async, int num_waits, ...)
184 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
185 va_list ap;
186 struct goacc_thread *thr;
187 struct gomp_device_descr *acc_dev;
188 struct target_mem_desc *tgt;
189 void **devaddrs;
190 unsigned int i;
191 struct splay_tree_key_s k;
192 splay_tree_key tgt_fn_key;
193 void (*tgt_fn);
195 #ifdef HAVE_INTTYPES_H
196 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, sizes=%p, kinds=%p, "
197 "shared_size=%"PRIu64", async = %d\n",
198 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds,
199 (uint64_t) shared_size, async);
200 #else
201 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, "
202 "shared_size=%lu, async=%d\n",
203 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
204 (unsigned long) shared_size, async);
205 #endif
207 alloc_ganglocal_addrs (mapnum, hostaddrs, sizes, kinds);
209 goacc_lazy_initialize ();
211 thr = goacc_thread ();
212 acc_dev = thr->dev;
214 /* Host fallback if "if" clause is false or if the current device is set to
215 the host. */
216 if (host_fallback)
218 goacc_save_and_set_bind (acc_device_host);
219 alloc_host_shared_mem (shared_size);
220 fn (hostaddrs);
221 free_host_shared_mem ();
222 goacc_restore_bind ();
223 return;
225 else if (acc_device_type (acc_dev->type) == acc_device_host)
227 alloc_host_shared_mem (shared_size);
228 fn (hostaddrs);
229 free_host_shared_mem ();
230 return;
233 if (acc_device_type (acc_dev->type) == acc_device_host_nonshm)
234 alloc_host_shared_mem (shared_size);
236 va_start (ap, num_waits);
238 if (num_waits > 0)
239 goacc_wait (async, num_waits, ap);
241 va_end (ap);
243 acc_dev->openacc.async_set_async_func (async);
245 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
247 k.host_start = (uintptr_t) fn;
248 k.host_end = k.host_start + 1;
249 gomp_mutex_lock (&acc_dev->lock);
250 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
251 gomp_mutex_unlock (&acc_dev->lock);
253 if (tgt_fn_key == NULL)
254 gomp_fatal ("target function wasn't mapped");
256 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
258 else
259 tgt_fn = (void (*)) fn;
261 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
262 false);
264 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
265 for (i = 0; i < mapnum; i++)
266 devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
267 + tgt->list[i]->tgt_offset);
269 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
270 num_gangs, num_workers, vector_length, async,
271 shared_size, tgt);
273 /* If running synchronously, unmap immediately. */
274 if (async < acc_async_noval)
275 gomp_unmap_vars (tgt, true);
276 else
278 gomp_copy_from_async (tgt);
279 acc_dev->openacc.register_async_cleanup_func (tgt);
282 acc_dev->openacc.async_set_async_func (acc_async_sync);
284 if (acc_device_type (acc_dev->type) == acc_device_host_nonshm)
285 free_host_shared_mem ();
288 void
289 GOACC_data_start (int device, size_t mapnum,
290 void **hostaddrs, size_t *sizes, unsigned short *kinds)
292 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
293 struct target_mem_desc *tgt;
295 #ifdef HAVE_INTTYPES_H
296 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, sizes=%p, kinds=%p\n",
297 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
298 #else
299 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
300 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
301 #endif
303 goacc_lazy_initialize ();
305 struct goacc_thread *thr = goacc_thread ();
306 struct gomp_device_descr *acc_dev = thr->dev;
308 /* Host fallback or 'do nothing'. */
309 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
310 || host_fallback)
312 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
313 tgt->prev = thr->mapped_data;
314 thr->mapped_data = tgt;
316 return;
319 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
320 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
321 false);
322 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
323 tgt->prev = thr->mapped_data;
324 thr->mapped_data = tgt;
327 void
328 GOACC_data_end (void)
330 struct goacc_thread *thr = goacc_thread ();
331 struct target_mem_desc *tgt = thr->mapped_data;
333 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
334 thr->mapped_data = tgt->prev;
335 gomp_unmap_vars (tgt, true);
336 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
339 void
340 GOACC_enter_exit_data (int device, size_t mapnum,
341 void **hostaddrs, size_t *sizes, unsigned short *kinds,
342 int async, int num_waits, ...)
344 struct goacc_thread *thr;
345 struct gomp_device_descr *acc_dev;
346 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
347 bool data_enter = false;
348 size_t i;
350 goacc_lazy_initialize ();
352 thr = goacc_thread ();
353 acc_dev = thr->dev;
355 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
356 || host_fallback)
357 return;
359 if (num_waits > 0)
361 va_list ap;
363 va_start (ap, num_waits);
365 goacc_wait (async, num_waits, ap);
367 va_end (ap);
370 acc_dev->openacc.async_set_async_func (async);
372 /* Determine if this is an "acc enter data". */
373 for (i = 0; i < mapnum; ++i)
375 unsigned char kind = kinds[i] & 0xff;
377 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
378 continue;
380 if (kind == GOMP_MAP_FORCE_ALLOC
381 || kind == GOMP_MAP_FORCE_PRESENT
382 || kind == GOMP_MAP_FORCE_TO
383 || kind == GOMP_MAP_TO
384 || kind == GOMP_MAP_ALLOC)
386 data_enter = true;
387 break;
390 if (kind == GOMP_MAP_FORCE_DEALLOC
391 || kind == GOMP_MAP_FORCE_FROM)
392 break;
394 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
395 kind);
398 /* In c, non-pointers and arrays are represented by a single data clause.
399 Dynamically allocated arrays and subarrays are represented by a data
400 clause followed by an internal GOMP_MAP_POINTER.
402 In fortran, scalars and not allocated arrays are represented by a
403 single data clause. Allocated arrays and subarrays have three mappings:
404 1) the original data clause, 2) a PSET 3) a pointer to the array data.
407 if (data_enter)
409 for (i = 0; i < mapnum; i++)
411 unsigned char kind = kinds[i] & 0xff;
413 /* Scan for pointers and PSETs. */
414 int pointer = find_pointer (i, mapnum, kinds);
416 if (!pointer)
418 switch (kind)
420 case GOMP_MAP_ALLOC:
421 acc_present_or_create (hostaddrs[i], sizes[i]);
422 break;
423 case GOMP_MAP_FORCE_ALLOC:
424 acc_create (hostaddrs[i], sizes[i]);
425 break;
426 case GOMP_MAP_TO:
427 acc_present_or_copyin (hostaddrs[i], sizes[i]);
428 break;
429 case GOMP_MAP_FORCE_TO:
430 acc_copyin (hostaddrs[i], sizes[i]);
431 break;
432 default:
433 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
434 kind);
435 break;
438 else
440 if (!acc_is_present (hostaddrs[i], sizes[i]))
442 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
443 &sizes[i], &kinds[i]);
445 /* Increment 'i' by two because OpenACC requires fortran
446 arrays to be contiguous, so each PSET is associated with
447 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
448 one MAP_POINTER. */
449 i += pointer - 1;
453 else
454 for (i = 0; i < mapnum; ++i)
456 unsigned char kind = kinds[i] & 0xff;
458 int pointer = find_pointer (i, mapnum, kinds);
460 if (!pointer)
462 switch (kind)
464 case GOMP_MAP_FORCE_DEALLOC:
465 if (acc_is_present (hostaddrs[i], sizes[i]))
466 acc_delete (hostaddrs[i], sizes[i]);
467 else
468 i++;
469 break;
470 case GOMP_MAP_FORCE_FROM:
471 acc_copyout (hostaddrs[i], sizes[i]);
472 break;
473 default:
474 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
475 kind);
476 break;
479 else
481 if (acc_is_present (hostaddrs[i], sizes[i]))
483 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
484 == GOMP_MAP_FORCE_FROM, async,
485 pointer);
486 /* See the above comment. */
488 i += pointer - 1;
492 acc_dev->openacc.async_set_async_func (acc_async_sync);
495 void
496 GOACC_kernels (int device, void (*fn) (void *),
497 size_t mapnum, void **hostaddrs, size_t *sizes,
498 unsigned short *kinds,
499 int num_gangs, int num_workers, int vector_length,
500 size_t shared_size, int async, int num_waits, ...)
502 #ifdef HAVE_INTTYPES_H
503 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, sizes=%p, kinds=%p\n",
504 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
505 #else
506 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
507 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
508 #endif
510 va_list ap;
512 goacc_lazy_initialize ();
514 va_start (ap, num_waits);
516 if (num_waits > 0)
517 goacc_wait (async, num_waits, ap);
519 va_end (ap);
521 GOACC_parallel (device, fn, mapnum, hostaddrs, sizes, kinds,
522 num_gangs, num_workers, vector_length, shared_size,
523 async, 0);
526 static void
527 goacc_wait (int async, int num_waits, va_list ap)
529 struct goacc_thread *thr = goacc_thread ();
530 struct gomp_device_descr *acc_dev = thr->dev;
531 int i;
533 assert (num_waits >= 0);
535 if (async == acc_async_sync && num_waits == 0)
537 acc_wait_all ();
538 return;
541 if (async == acc_async_sync && num_waits)
543 for (i = 0; i < num_waits; i++)
545 int qid = va_arg (ap, int);
547 if (acc_async_test (qid))
548 continue;
550 acc_wait (qid);
552 return;
555 if (async == acc_async_noval && num_waits == 0)
557 acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
558 return;
561 for (i = 0; i < num_waits; i++)
563 int qid = va_arg (ap, int);
565 if (acc_async_test (qid))
566 continue;
568 /* If we're waiting on the same asynchronous queue as we're launching on,
569 the queue itself will order work as required, so there's no need to
570 wait explicitly. */
571 if (qid != async)
572 acc_dev->openacc.async_wait_async_func (qid, async);
576 void
577 GOACC_update (int device, size_t mapnum,
578 void **hostaddrs, size_t *sizes, unsigned short *kinds,
579 int async, int num_waits, ...)
581 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
582 size_t i;
584 goacc_lazy_initialize ();
586 struct goacc_thread *thr = goacc_thread ();
587 struct gomp_device_descr *acc_dev = thr->dev;
589 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
590 || host_fallback)
591 return;
593 if (num_waits > 0)
595 va_list ap;
597 va_start (ap, num_waits);
599 goacc_wait (async, num_waits, ap);
601 va_end (ap);
604 acc_dev->openacc.async_set_async_func (async);
606 for (i = 0; i < mapnum; ++i)
608 unsigned char kind = kinds[i] & 0xff;
610 switch (kind)
612 case GOMP_MAP_POINTER:
613 case GOMP_MAP_TO_PSET:
614 break;
616 case GOMP_MAP_FORCE_TO:
617 case GOMP_MAP_FORCE_TO_GANGLOCAL:
618 acc_update_device (hostaddrs[i], sizes[i]);
619 break;
621 case GOMP_MAP_FORCE_FROM:
622 acc_update_self (hostaddrs[i], sizes[i]);
623 break;
625 default:
626 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
627 break;
631 acc_dev->openacc.async_set_async_func (acc_async_sync);
634 void
635 GOACC_wait (int async, int num_waits, ...)
637 va_list ap;
639 va_start (ap, num_waits);
641 goacc_wait (async, num_waits, ap);
643 va_end (ap);
647 GOACC_get_num_threads (int gang, int worker, int vector)
649 return 1;
653 GOACC_get_thread_num (int gang, int worker, int vector)
655 return 0;
658 void
659 GOACC_register_static (void *addr, int size, unsigned int mask)
661 struct oacc_static *s;
663 s = (struct oacc_static *) malloc (sizeof (struct oacc_static));
664 s->addr = addr;
665 s->size = (size_t) size;
666 s->mask = mask;
667 s->free = false;
668 s->next = NULL;
670 if (oacc_statics)
671 s->next = oacc_statics;
673 oacc_statics = s;
676 #include <stdio.h>
678 void
679 GOACC_declare (int device, size_t mapnum,
680 void **hostaddrs, size_t *sizes, unsigned short *kinds)
682 int i;
684 for (i = 0; i < mapnum; i++)
686 unsigned char kind = kinds[i] & 0xff;
688 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
689 continue;
691 switch (kind)
693 case GOMP_MAP_FORCE_ALLOC:
694 case GOMP_MAP_FORCE_DEALLOC:
695 case GOMP_MAP_FORCE_FROM:
696 case GOMP_MAP_FORCE_TO:
697 case GOMP_MAP_POINTER:
698 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
699 &kinds[i], 0, 0);
700 break;
702 case GOMP_MAP_FORCE_DEVICEPTR:
703 break;
705 case GOMP_MAP_ALLOC:
706 if (!acc_is_present (hostaddrs[i], sizes[i]))
708 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
709 &kinds[i], 0, 0);
711 break;
713 case GOMP_MAP_TO:
714 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
715 &kinds[i], 0, 0);
717 break;
719 case GOMP_MAP_FROM:
720 kinds[i] = GOMP_MAP_FORCE_FROM;
721 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
722 &kinds[i], 0, 0);
723 break;
725 case GOMP_MAP_FORCE_PRESENT:
726 if (!acc_is_present (hostaddrs[i], sizes[i]))
727 gomp_fatal ("[%p,%zd] is not mapped", hostaddrs[i], sizes[i]);
728 break;
730 default:
731 assert (0);
732 break;