Support for OpenACC acc_on_device in offloading configurations.
[official-gcc.git] / libgomp / target.c
blobbdfec67b91358237fc8233f168191437e6b623bb
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "libgomp_target.h"
31 #include "oacc-plugin.h"
32 #include "oacc-int.h"
33 #include "gomp-constants.h"
34 #include <limits.h>
35 #include <stdbool.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
39 #include <assert.h>
41 #ifdef PLUGIN_SUPPORT
42 #include <dlfcn.h>
43 #endif
45 static void gomp_target_init (void);
47 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
49 /* This structure describes an offload image.
50 It contains type of the target device, pointer to host table descriptor, and
51 pointer to target data. */
52 struct offload_image_descr {
53 enum offload_target_type type;
54 void *host_table;
55 void *target_data;
58 /* Array of descriptors of offload images. */
59 static struct offload_image_descr *offload_images;
61 /* Total number of offload images. */
62 static int num_offload_images;
64 /* Array of descriptors for all available devices. */
65 static struct gomp_device_descr *devices;
67 /* Total number of available devices. */
68 static int num_devices;
70 /* The comparison function. */
72 attribute_hidden int
73 splay_compare (splay_tree_key x, splay_tree_key y)
75 if (x->host_start == x->host_end
76 && y->host_start == y->host_end)
77 return 0;
78 if (x->host_end <= y->host_start)
79 return -1;
80 if (x->host_start >= y->host_end)
81 return 1;
82 return 0;
85 #include "splay-tree.h"
87 attribute_hidden void
88 gomp_init_targets_once (void)
90 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
93 attribute_hidden int
94 gomp_get_num_devices (void)
96 gomp_init_targets_once ();
97 return num_devices;
100 static struct gomp_device_descr *
101 resolve_device (int device_id)
103 if (device_id == GOMP_DEVICE_ICV)
105 struct gomp_task_icv *icv = gomp_icv (false);
106 device_id = icv->default_device_var;
109 if (device_id < 0 || device_id >= gomp_get_num_devices ())
110 return NULL;
112 return &devices[device_id];
116 /* Handle the case where splay_tree_lookup found oldn for newn.
117 Helper function of gomp_map_vars. */
119 static inline void
120 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
121 unsigned char kind)
123 if ((kind & GOMP_MAP_FLAG_FORCE)
124 || oldn->host_start > newn->host_start
125 || oldn->host_end < newn->host_end)
126 gomp_fatal ("Trying to map into device [%p..%p) object when "
127 "[%p..%p) is already mapped",
128 (void *) newn->host_start, (void *) newn->host_end,
129 (void *) oldn->host_start, (void *) oldn->host_end);
130 oldn->refcount++;
133 static int
134 get_kind (bool is_openacc, void *kinds, int idx)
136 return is_openacc ? ((unsigned short *) kinds)[idx]
137 : ((unsigned char *) kinds)[idx];
140 attribute_hidden struct target_mem_desc *
141 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
142 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
143 bool is_openacc, bool is_target)
145 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
146 const int rshift = is_openacc ? 8 : 3;
147 const int typemask = is_openacc ? 0xff : 0x7;
148 struct gomp_memory_mapping *mm = &devicep->mem_map;
149 struct splay_tree_key_s cur_node;
150 struct target_mem_desc *tgt
151 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
152 tgt->list_count = mapnum;
153 tgt->refcount = 1;
154 tgt->device_descr = devicep;
155 tgt->mem_map = mm;
157 /* From gcc/fortran/trans-types.c */
158 struct descriptor_dimension
160 long stride;
161 long lbound;
162 long ubound;
165 struct gfc_array_descriptor
167 void *data;
168 long offset;
169 long dtype;
170 struct descriptor_dimension dimension[];
173 #define GFC_DTYPE_RANK_MASK 0x07
174 #define GFC_DTYPE_TYPE_MASK 0x38
175 #define GFC_DTYPE_TYPE_SHIFT 3
176 #define GFC_DTYPE_SIZE_SHIFT 6
178 if (mapnum == 0)
179 return tgt;
181 tgt_align = sizeof (void *);
182 tgt_size = 0;
183 if (is_target)
185 size_t align = 4 * sizeof (void *);
186 tgt_align = align;
187 tgt_size = mapnum * sizeof (void *);
189 gomp_mutex_lock (&mm->lock);
190 for (i = 0; i < mapnum; i++)
192 int kind = get_kind (is_openacc, kinds, i);
193 if (hostaddrs[i] == NULL)
195 tgt->list[i] = NULL;
196 continue;
198 cur_node.host_start = (uintptr_t) hostaddrs[i];
199 if (!GOMP_MAP_POINTER_P (kind & typemask))
200 cur_node.host_end = cur_node.host_start + sizes[i];
201 else
202 cur_node.host_end = cur_node.host_start + sizeof (void *);
203 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
204 if (n)
206 tgt->list[i] = n;
207 gomp_map_vars_existing (n, &cur_node, kind & typemask);
209 else
211 tgt->list[i] = NULL;
213 if ((kind & typemask) == GOMP_MAP_TO_PSET)
215 struct gfc_array_descriptor *gad;
216 size_t rank;
217 int j;
218 bool alloc_arrays = true;
220 for (j = i - 1; j >= 0; j--)
222 if (hostaddrs[j] == *(void**)hostaddrs[i])
224 alloc_arrays = false;
225 break;
229 gad = (struct gfc_array_descriptor *) cur_node.host_start;
230 rank = gad->dtype & GFC_DTYPE_RANK_MASK;
232 cur_node.host_start = (uintptr_t)gad->data;
233 cur_node.host_end = cur_node.host_start +
234 sizeof (struct gfc_array_descriptor) +
235 (sizeof (struct descriptor_dimension) * rank);
237 if (alloc_arrays)
239 size_t tsize;
241 tsize = gad->dtype >> GFC_DTYPE_SIZE_SHIFT;
243 for (j = 0; j < rank; j++)
245 cur_node.host_end += tsize *
246 (gad->dimension[j].ubound -
247 gad->dimension[j].lbound + 1);
252 size_t align = (size_t) 1 << (kind >> rshift);
253 not_found_cnt++;
254 if (tgt_align < align)
255 tgt_align = align;
256 tgt_size = (tgt_size + align - 1) & ~(align - 1);
257 tgt_size += cur_node.host_end - cur_node.host_start;
258 if ((kind & typemask) == GOMP_MAP_TO_PSET)
260 size_t j;
261 for (j = i + 1; j < mapnum; j++)
262 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
263 & typemask))
264 break;
265 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
266 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
267 > cur_node.host_end))
268 break;
269 else
271 tgt->list[j] = NULL;
272 i++;
278 if (devaddrs)
280 if (mapnum != 1)
281 gomp_fatal ("unexpected aggregation");
282 tgt->to_free = devaddrs[0];
283 tgt->tgt_start = (uintptr_t) tgt->to_free;
284 tgt->tgt_end = tgt->tgt_start + sizes[0];
286 else if (not_found_cnt || is_target)
288 /* Allocate tgt_align aligned tgt_size block of memory. */
289 /* FIXME: Perhaps change interface to allocate properly aligned
290 memory. */
291 tgt->to_free = devicep->alloc_func (devicep->target_id,
292 tgt_size + tgt_align - 1);
293 tgt->tgt_start = (uintptr_t) tgt->to_free;
294 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
295 tgt->tgt_end = tgt->tgt_start + tgt_size;
297 else
299 tgt->to_free = NULL;
300 tgt->tgt_start = 0;
301 tgt->tgt_end = 0;
304 tgt_size = 0;
305 if (is_target)
306 tgt_size = mapnum * sizeof (void *);
308 tgt->array = NULL;
309 if (not_found_cnt)
311 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
312 splay_tree_node array = tgt->array;
313 size_t j;
315 for (i = 0; i < mapnum; i++)
316 if (tgt->list[i] == NULL)
318 int kind = get_kind (is_openacc, kinds, i);
319 if (hostaddrs[i] == NULL)
320 continue;
321 splay_tree_key k = &array->key;
322 k->host_start = (uintptr_t) hostaddrs[i];
323 if (!GOMP_MAP_POINTER_P (kind & typemask))
324 k->host_end = k->host_start + sizes[i];
325 else
326 k->host_end = k->host_start + sizeof (void *);
327 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
328 if (n)
330 tgt->list[i] = n;
331 gomp_map_vars_existing (n, k, kind & typemask);
333 else
335 size_t align = (size_t) 1 << (kind >> rshift);
336 tgt->list[i] = k;
337 tgt_size = (tgt_size + align - 1) & ~(align - 1);
338 k->tgt = tgt;
339 k->tgt_offset = tgt_size;
340 tgt_size += k->host_end - k->host_start;
341 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
342 k->refcount = 1;
343 k->async_refcount = 0;
344 tgt->refcount++;
345 array->left = NULL;
346 array->right = NULL;
348 splay_tree_insert (&mm->splay_tree, array);
350 switch (kind & typemask)
352 case GOMP_MAP_ALLOC:
353 case GOMP_MAP_FROM:
354 case GOMP_MAP_FORCE_ALLOC:
355 case GOMP_MAP_FORCE_FROM:
356 break;
357 case GOMP_MAP_TO:
358 case GOMP_MAP_TOFROM:
359 case GOMP_MAP_FORCE_TO:
360 case GOMP_MAP_FORCE_TOFROM:
361 /* Copy from host to device memory. */
362 /* FIXME: Perhaps add some smarts, like if copying
363 several adjacent fields from host to target, use some
364 host buffer to avoid sending each var individually. */
365 devicep->host2dev_func (devicep->target_id,
366 (void *) (tgt->tgt_start
367 + k->tgt_offset),
368 (void *) k->host_start,
369 k->host_end - k->host_start);
370 break;
371 case GOMP_MAP_POINTER:
372 cur_node.host_start
373 = (uintptr_t) *(void **) k->host_start;
374 if (cur_node.host_start == (uintptr_t) NULL)
376 cur_node.tgt_offset = (uintptr_t) NULL;
377 /* Copy from host to device memory. */
378 /* FIXME: see above FIXME comment. */
379 devicep->host2dev_func (devicep->target_id,
380 (void *) (tgt->tgt_start
381 + k->tgt_offset),
382 (void *) &cur_node.tgt_offset,
383 sizeof (void *));
384 break;
386 /* Add bias to the pointer value. */
387 cur_node.host_start += sizes[i];
388 cur_node.host_end = cur_node.host_start + 1;
389 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
390 if (n == NULL)
392 /* Could be possibly zero size array section. */
393 cur_node.host_end--;
394 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
395 if (n == NULL)
397 cur_node.host_start--;
398 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
399 cur_node.host_start++;
402 if (n == NULL)
403 gomp_fatal ("Pointer target of array section "
404 "wasn't mapped");
406 cur_node.host_start -= n->host_start;
407 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
408 + cur_node.host_start;
409 /* At this point tgt_offset is target address of the
410 array section. Now subtract bias to get what we want
411 to initialize the pointer with. */
412 cur_node.tgt_offset -= sizes[i];
413 /* Copy from host to device memory. */
414 /* FIXME: see above FIXME comment. */
415 devicep->host2dev_func (devicep->target_id,
416 (void *) (tgt->tgt_start
417 + k->tgt_offset),
418 (void *) &cur_node.tgt_offset,
419 sizeof (void *));
420 break;
421 case GOMP_MAP_TO_PSET:
423 /* Copy from host to device memory. */
424 /* FIXME: see above FIXME comment. */
425 devicep->host2dev_func (devicep->target_id,
426 (void *) (tgt->tgt_start
427 + k->tgt_offset),
428 (void *) k->host_start,
429 (k->host_end - k->host_start));
430 devicep->host2dev_func (devicep->target_id,
431 (void *) (tgt->tgt_start
432 + k->tgt_offset),
433 (void *) &tgt->tgt_start,
434 sizeof (void *));
436 for (j = i + 1; j < mapnum; j++)
437 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
438 & typemask))
439 break;
440 else if ((uintptr_t) hostaddrs[j] < k->host_start
441 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
442 > k->host_end))
443 break;
444 else
446 tgt->list[j] = k;
447 k->refcount++;
448 cur_node.host_start
449 = (uintptr_t) *(void **) hostaddrs[j];
450 if (cur_node.host_start == (uintptr_t) NULL)
452 cur_node.tgt_offset = (uintptr_t) NULL;
453 /* Copy from host to device memory. */
454 /* FIXME: see above FIXME comment. */
455 devicep->host2dev_func (devicep->target_id,
456 (void *) (tgt->tgt_start
457 + k->tgt_offset
458 + ((uintptr_t) hostaddrs[j]
459 - k->host_start)),
460 (void *) &cur_node.tgt_offset,
461 sizeof (void *));
462 i++;
463 continue;
465 /* Add bias to the pointer value. */
466 cur_node.host_start += sizes[j];
467 cur_node.host_end = cur_node.host_start + 1;
468 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
469 if (n == NULL)
471 /* Could be possibly zero size array
472 section. */
473 cur_node.host_end--;
474 n = splay_tree_lookup (&mm->splay_tree,
475 &cur_node);
476 if (n == NULL)
478 cur_node.host_start--;
479 n = splay_tree_lookup (&mm->splay_tree,
480 &cur_node);
481 cur_node.host_start++;
484 if (n == NULL)
485 gomp_fatal ("Pointer target of array section "
486 "wasn't mapped");
487 cur_node.host_start -= n->host_start;
488 cur_node.tgt_offset = n->tgt->tgt_start
489 + n->tgt_offset
490 + cur_node.host_start;
491 /* At this point tgt_offset is target address of the
492 array section. Now subtract bias to get what we
493 want to initialize the pointer with. */
494 cur_node.tgt_offset -= sizes[j];
495 /* Copy from host to device memory. */
496 /* FIXME: see above FIXME comment. */
498 devicep->host2dev_func (devicep->target_id,
499 (void *) (tgt->tgt_start
500 + k->tgt_offset
501 + ((uintptr_t) hostaddrs[j]
502 - k->host_start)),
503 (void *) &cur_node.tgt_offset,
504 sizeof (void *));
505 i++;
508 break;
509 case GOMP_MAP_FORCE_PRESENT:
511 /* We already looked up the memory region above and it
512 was missing. */
513 size_t size = k->host_end - k->host_start;
514 gomp_fatal ("present clause: !acc_is_present (%p, "
515 "%zd (0x%zx))", (void *) k->host_start,
516 size, size);
518 break;
519 case GOMP_MAP_FORCE_DEVICEPTR:
520 assert (k->host_end - k->host_start == sizeof (void *));
522 devicep->host2dev_func (devicep->target_id,
523 (void *) (tgt->tgt_start
524 + k->tgt_offset),
525 (void *) k->host_start,
526 sizeof (void *));
527 break;
528 default:
529 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
530 kind);
532 array++;
537 #undef GFC_DTYPE_RANK_MASK
538 #undef GFC_DTYPE_TYPE_MASK
539 #undef GFC_DTYPE_TYPE_SHIFT
540 #undef GFC_DTYPE_SIZE_SHIFT
542 if (is_target)
544 for (i = 0; i < mapnum; i++)
546 if (tgt->list[i] == NULL)
547 cur_node.tgt_offset = (uintptr_t) NULL;
548 else
549 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
550 + tgt->list[i]->tgt_offset;
551 /* Copy from host to device memory. */
552 /* FIXME: see above FIXME comment. */
553 devicep->host2dev_func (devicep->target_id,
554 (void *) (tgt->tgt_start
555 + i * sizeof (void *)),
556 (void *) &cur_node.tgt_offset,
557 sizeof (void *));
561 gomp_mutex_unlock (&mm->lock);
562 return tgt;
565 static void
566 gomp_unmap_tgt (struct target_mem_desc *tgt)
568 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
569 if (tgt->tgt_end)
570 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
572 free (tgt->array);
573 free (tgt);
576 /* Decrease the refcount for a set of mapped variables, and queue asychronous
577 copies from the device back to the host after any work that has been issued.
578 Because the regions are still "live", increment an asynchronous reference
579 count to indicate that they should not be unmapped from host-side data
580 structures until the asynchronous copy has completed. */
582 attribute_hidden void
583 gomp_copy_from_async (struct target_mem_desc *tgt)
585 struct gomp_device_descr *devicep = tgt->device_descr;
586 struct gomp_memory_mapping *mm = tgt->mem_map;
587 size_t i;
589 gomp_mutex_lock (&mm->lock);
591 for (i = 0; i < tgt->list_count; i++)
592 if (tgt->list[i] == NULL)
594 else if (tgt->list[i]->refcount > 1)
596 tgt->list[i]->refcount--;
597 tgt->list[i]->async_refcount++;
599 else
601 splay_tree_key k = tgt->list[i];
602 if (k->copy_from)
603 /* Copy from device to host memory. */
604 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
605 (void *) (k->tgt->tgt_start + k->tgt_offset),
606 k->host_end - k->host_start);
609 gomp_mutex_unlock (&mm->lock);
612 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
613 variables back from device to host: if it is false, it is assumed that this
614 has been done already, i.e. by gomp_copy_from_async above. */
616 attribute_hidden void
617 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
619 struct gomp_device_descr *devicep = tgt->device_descr;
620 struct gomp_memory_mapping *mm = tgt->mem_map;
622 if (tgt->list_count == 0)
624 free (tgt);
625 return;
628 size_t i;
629 gomp_mutex_lock (&mm->lock);
630 for (i = 0; i < tgt->list_count; i++)
631 if (tgt->list[i] == NULL)
633 else if (tgt->list[i]->refcount > 1)
634 tgt->list[i]->refcount--;
635 else if (tgt->list[i]->async_refcount > 0)
636 tgt->list[i]->async_refcount--;
637 else
639 splay_tree_key k = tgt->list[i];
640 if (k->copy_from && do_copyfrom)
641 /* Copy from device to host memory. */
642 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
643 (void *) (k->tgt->tgt_start + k->tgt_offset),
644 k->host_end - k->host_start);
645 splay_tree_remove (&mm->splay_tree, k);
646 if (k->tgt->refcount > 1)
647 k->tgt->refcount--;
648 else
649 gomp_unmap_tgt (k->tgt);
652 if (tgt->refcount > 1)
653 tgt->refcount--;
654 else
655 gomp_unmap_tgt (tgt);
656 gomp_mutex_unlock (&mm->lock);
659 static void
660 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
661 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
662 bool is_openacc)
664 size_t i;
665 struct splay_tree_key_s cur_node;
666 const int typemask = is_openacc ? 0xff : 0x7;
668 if (!devicep)
669 return;
671 if (mapnum == 0)
672 return;
674 gomp_mutex_lock (&mm->lock);
675 for (i = 0; i < mapnum; i++)
676 if (sizes[i])
678 cur_node.host_start = (uintptr_t) hostaddrs[i];
679 cur_node.host_end = cur_node.host_start + sizes[i];
680 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
681 &cur_node);
682 if (n)
684 int kind = get_kind (is_openacc, kinds, i);
685 if (n->host_start > cur_node.host_start
686 || n->host_end < cur_node.host_end)
687 gomp_fatal ("Trying to update [%p..%p) object when"
688 "only [%p..%p) is mapped",
689 (void *) cur_node.host_start,
690 (void *) cur_node.host_end,
691 (void *) n->host_start,
692 (void *) n->host_end);
693 if (GOMP_MAP_COPY_TO_P (kind & typemask))
694 /* Copy from host to device memory. */
695 devicep->host2dev_func (devicep->target_id,
696 (void *) (n->tgt->tgt_start
697 + n->tgt_offset
698 + cur_node.host_start
699 - n->host_start),
700 (void *) cur_node.host_start,
701 cur_node.host_end - cur_node.host_start);
702 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
703 /* Copy from device to host memory. */
704 devicep->dev2host_func (devicep->target_id,
705 (void *) cur_node.host_start,
706 (void *) (n->tgt->tgt_start
707 + n->tgt_offset
708 + cur_node.host_start
709 - n->host_start),
710 cur_node.host_end - cur_node.host_start);
712 else
713 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
714 (void *) cur_node.host_start,
715 (void *) cur_node.host_end);
717 gomp_mutex_unlock (&mm->lock);
720 static void gomp_register_image_for_device (struct gomp_device_descr *device,
721 struct offload_image_descr *image);
723 /* This function should be called from every offload image.
724 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
725 the target, and TARGET_DATA needed by target plugin. */
727 void
728 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
729 void *target_data)
731 offload_images = gomp_realloc (offload_images,
732 (num_offload_images + 1)
733 * sizeof (struct offload_image_descr));
735 if (offload_images == NULL)
736 return;
738 offload_images[num_offload_images].type = target_type;
739 offload_images[num_offload_images].host_table = host_table;
740 offload_images[num_offload_images].target_data = target_data;
742 num_offload_images++;
745 /* This function initializes the target device, specified by DEVICEP. */
747 attribute_hidden void
748 gomp_init_device (struct gomp_device_descr *devicep)
750 /* Initialize the target device. */
751 devicep->init_device_func (devicep->target_id);
753 devicep->is_initialized = true;
756 attribute_hidden void
757 gomp_init_tables (const struct gomp_device_descr *devicep,
758 struct gomp_memory_mapping *mm)
760 /* Get address mapping table for device. */
761 struct mapping_table *table = NULL;
762 int i, num_entries = devicep->get_table_func (devicep->target_id, &table);
764 /* Insert host-target address mapping into dev_splay_tree. */
765 for (i = 0; i < num_entries; i++)
767 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
768 tgt->refcount = 1;
769 tgt->array = gomp_malloc (sizeof (*tgt->array));
770 tgt->tgt_start = table[i].tgt_start;
771 tgt->tgt_end = table[i].tgt_end;
772 tgt->to_free = NULL;
773 tgt->list_count = 0;
774 tgt->device_descr = (struct gomp_device_descr *) devicep;
775 splay_tree_node node = tgt->array;
776 splay_tree_key k = &node->key;
777 k->host_start = table[i].host_start;
778 k->host_end = table[i].host_end;
779 k->tgt_offset = 0;
780 k->refcount = 1;
781 k->copy_from = false;
782 k->tgt = tgt;
783 node->left = NULL;
784 node->right = NULL;
785 splay_tree_insert (&mm->splay_tree, node);
788 free (table);
789 mm->is_initialized = true;
792 static void
793 gomp_init_dev_tables (struct gomp_device_descr *devicep)
795 gomp_init_device (devicep);
796 gomp_init_tables (devicep, &devicep->mem_map);
800 attribute_hidden void
801 gomp_free_memmap (struct gomp_device_descr *devicep)
803 struct gomp_memory_mapping *mm = &devicep->mem_map;
805 while (mm->splay_tree.root)
807 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
809 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
810 free (tgt->array);
811 free (tgt);
814 mm->is_initialized = false;
817 attribute_hidden void
818 gomp_fini_device (struct gomp_device_descr *devicep)
820 if (devicep->is_initialized)
821 devicep->fini_device_func (devicep->target_id);
823 devicep->is_initialized = false;
826 /* Called when encountering a target directive. If DEVICE
827 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
828 GOMP_DEVICE_HOST_FALLBACK (or any value
829 larger than last available hw device), use host fallback.
830 FN is address of host code, OFFLOAD_TABLE contains value of the
831 __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes
832 GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
833 with MAPNUM entries, with addresses of the host objects,
834 sizes of the host objects (resp. for pointer kind pointer bias
835 and assumed sizeof (void *) size) and kinds. */
837 void
838 GOMP_target (int device, void (*fn) (void *), const void *offload_table,
839 size_t mapnum, void **hostaddrs, size_t *sizes,
840 unsigned char *kinds)
842 struct gomp_device_descr *devicep = resolve_device (device);
844 if (devicep != NULL && !devicep->is_initialized)
845 gomp_init_dev_tables (devicep);
847 if (devicep == NULL || !(devicep->capabilities & TARGET_CAP_OPENMP_400))
849 /* Host fallback. */
850 struct gomp_thread old_thr, *thr = gomp_thread ();
851 old_thr = *thr;
852 memset (thr, '\0', sizeof (*thr));
853 if (gomp_places_list)
855 thr->place = old_thr.place;
856 thr->ts.place_partition_len = gomp_places_list_len;
858 fn (hostaddrs);
859 gomp_free_thread (thr);
860 *thr = old_thr;
861 return;
864 void *fn_addr;
866 if (devicep->capabilities & TARGET_CAP_NATIVE_EXEC)
867 fn_addr = (void *) fn;
868 else
870 struct gomp_memory_mapping *mm = &devicep->mem_map;
871 gomp_mutex_lock (&mm->lock);
872 if (!devicep->is_initialized)
873 gomp_init_dev_tables (devicep);
874 struct splay_tree_key_s k;
875 k.host_start = (uintptr_t) fn;
876 k.host_end = k.host_start + 1;
877 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map.splay_tree,
878 &k);
879 if (tgt_fn == NULL)
880 gomp_fatal ("Target function wasn't mapped");
881 gomp_mutex_unlock (&mm->lock);
883 fn_addr = (void *) tgt_fn->tgt->tgt_start;
886 struct target_mem_desc *tgt_vars
887 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
888 true);
889 struct gomp_thread old_thr, *thr = gomp_thread ();
890 old_thr = *thr;
891 memset (thr, '\0', sizeof (*thr));
892 if (gomp_places_list)
894 thr->place = old_thr.place;
895 thr->ts.place_partition_len = gomp_places_list_len;
897 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
898 gomp_free_thread (thr);
899 *thr = old_thr;
900 gomp_unmap_vars (tgt_vars, true);
903 void
904 GOMP_target_data (int device, const void *offload_table, size_t mapnum,
905 void **hostaddrs, size_t *sizes, unsigned char *kinds)
907 struct gomp_device_descr *devicep = resolve_device (device);
909 if (devicep != NULL && !devicep->is_initialized)
910 gomp_init_dev_tables (devicep);
912 if (devicep == NULL || !(devicep->capabilities & TARGET_CAP_OPENMP_400))
914 /* Host fallback. */
915 struct gomp_task_icv *icv = gomp_icv (false);
916 if (icv->target_data)
918 /* Even when doing a host fallback, if there are any active
919 #pragma omp target data constructs, need to remember the
920 new #pragma omp target data, otherwise GOMP_target_end_data
921 would get out of sync. */
922 struct target_mem_desc *tgt
923 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
924 tgt->prev = icv->target_data;
925 icv->target_data = tgt;
927 return;
930 struct gomp_memory_mapping *mm = &devicep->mem_map;
931 gomp_mutex_lock (&mm->lock);
932 if (!devicep->is_initialized)
933 gomp_init_dev_tables (devicep);
934 gomp_mutex_unlock (&mm->lock);
936 struct target_mem_desc *tgt
937 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
938 false);
939 struct gomp_task_icv *icv = gomp_icv (true);
940 tgt->prev = icv->target_data;
941 icv->target_data = tgt;
944 void
945 GOMP_target_end_data (void)
947 struct gomp_task_icv *icv = gomp_icv (false);
948 if (icv->target_data)
950 struct target_mem_desc *tgt = icv->target_data;
951 icv->target_data = tgt->prev;
952 gomp_unmap_vars (tgt, true);
956 void
957 GOMP_target_update (int device, const void *offload_table, size_t mapnum,
958 void **hostaddrs, size_t *sizes, unsigned char *kinds)
960 struct gomp_device_descr *devicep = resolve_device (device);
962 if (devicep == NULL)
963 return;
965 struct gomp_memory_mapping *mm = &devicep->mem_map;
966 gomp_mutex_lock (&mm->lock);
967 if (!devicep->is_initialized)
968 gomp_init_device (devicep);
969 gomp_mutex_unlock (&mm->lock);
971 if (!(devicep->capabilities & TARGET_CAP_OPENMP_400))
972 return;
974 gomp_update (devicep, &devicep->mem_map, mapnum, hostaddrs, sizes, kinds,
975 false);
978 void
979 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
981 if (thread_limit)
983 struct gomp_task_icv *icv = gomp_icv (true);
984 icv->thread_limit_var
985 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
987 (void) num_teams;
990 #ifdef PLUGIN_SUPPORT
992 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
993 in PLUGIN_NAME.
994 The handles of the found functions are stored in the corresponding fields
995 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
997 static bool
998 gomp_load_plugin_for_device (struct gomp_device_descr *device,
999 const char *plugin_name)
1001 char *err = NULL, *last_missing = NULL;
1002 int optional_present, optional_total;
1004 /* Clear any existing error. */
1005 dlerror ();
1007 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1008 if (!plugin_handle)
1010 err = dlerror ();
1011 goto out;
1014 /* Check if all required functions are available in the plugin and store
1015 their handlers. */
1016 #define DLSYM(f) \
1017 do \
1019 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
1020 err = dlerror (); \
1021 if (err != NULL) \
1022 goto out; \
1024 while (0)
1025 /* Similar, but missing functions are not an error. */
1026 #define DLSYM_OPT(f, n) \
1027 do \
1029 char *tmp_err; \
1030 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
1031 tmp_err = dlerror (); \
1032 if (tmp_err == NULL) \
1033 optional_present++; \
1034 else \
1035 last_missing = #n; \
1036 optional_total++; \
1038 while (0)
1040 DLSYM (get_name);
1041 DLSYM (get_caps);
1042 DLSYM (get_type);
1043 DLSYM (get_num_devices);
1044 DLSYM (register_image);
1045 DLSYM (init_device);
1046 DLSYM (fini_device);
1047 DLSYM (get_table);
1048 DLSYM (alloc);
1049 DLSYM (free);
1050 DLSYM (dev2host);
1051 DLSYM (host2dev);
1052 device->capabilities = device->get_caps_func ();
1053 if (device->capabilities & TARGET_CAP_OPENMP_400)
1054 DLSYM (run);
1055 if (device->capabilities & TARGET_CAP_OPENACC_200)
1057 optional_present = optional_total = 0;
1058 DLSYM_OPT (openacc.exec, openacc_parallel);
1059 DLSYM_OPT (openacc.open_device, openacc_open_device);
1060 DLSYM_OPT (openacc.close_device, openacc_close_device);
1061 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
1062 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
1063 DLSYM_OPT (openacc.register_async_cleanup,
1064 openacc_register_async_cleanup);
1065 DLSYM_OPT (openacc.async_test, openacc_async_test);
1066 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1067 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1068 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1069 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1070 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1071 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1072 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1073 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1074 /* Require all the OpenACC handlers if we have TARGET_CAP_OPENACC_200. */
1075 if (optional_present != optional_total)
1077 err = "plugin missing OpenACC handler function";
1078 goto out;
1080 optional_present = optional_total = 0;
1081 DLSYM_OPT (openacc.cuda.get_current_device,
1082 openacc_get_current_cuda_device);
1083 DLSYM_OPT (openacc.cuda.get_current_context,
1084 openacc_get_current_cuda_context);
1085 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1086 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1087 /* Make sure all the CUDA functions are there if any of them are. */
1088 if (optional_present && optional_present != optional_total)
1090 err = "plugin missing OpenACC CUDA handler function";
1091 goto out;
1094 #undef DLSYM
1095 #undef DLSYM_OPT
1097 out:
1098 if (err != NULL)
1100 gomp_error ("while loading %s: %s", plugin_name, err);
1101 if (last_missing)
1102 gomp_error ("missing function was %s", last_missing);
1103 if (plugin_handle)
1104 dlclose (plugin_handle);
1106 return err == NULL;
1109 /* This function adds a compatible offload image IMAGE to an accelerator device
1110 DEVICE. */
1112 static void
1113 gomp_register_image_for_device (struct gomp_device_descr *device,
1114 struct offload_image_descr *image)
1116 if (!device->offload_regions_registered
1117 && (device->type == image->type
1118 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1120 device->register_image_func (image->host_table, image->target_data);
1121 device->offload_regions_registered = true;
1125 /* This function initializes the runtime needed for offloading.
1126 It parses the list of offload targets and tries to load the plugins for these
1127 targets. Result of the function is properly initialized variable NUM_DEVICES
1128 and array DEVICES, containing descriptors for corresponding devices. */
1130 static void
1131 gomp_target_init (void)
1133 const char *prefix ="libgomp-plugin-";
1134 const char *suffix = ".so.1";
1135 const char *cur, *next;
1136 char *plugin_name;
1137 int i, new_num_devices;
1139 num_devices = 0;
1140 devices = NULL;
1142 cur = OFFLOAD_TARGETS;
1143 if (*cur)
1146 struct gomp_device_descr current_device;
1148 next = strchr (cur, ',');
1150 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1151 + strlen (prefix) + strlen (suffix));
1152 if (!plugin_name)
1154 num_devices = 0;
1155 break;
1158 strcpy (plugin_name, prefix);
1159 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1160 strcat (plugin_name, suffix);
1162 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1164 new_num_devices = current_device.get_num_devices_func ();
1165 if (new_num_devices >= 1)
1167 devices = realloc (devices, (num_devices + new_num_devices)
1168 * sizeof (struct gomp_device_descr));
1169 if (!devices)
1171 num_devices = 0;
1172 free (plugin_name);
1173 break;
1176 current_device.type = current_device.get_type_func ();
1177 current_device.name = current_device.get_name_func ();
1178 current_device.is_initialized = false;
1179 current_device.offload_regions_registered = false;
1180 current_device.mem_map.splay_tree.root = NULL;
1181 current_device.mem_map.is_initialized = false;
1182 current_device.target_data = NULL;
1183 current_device.openacc.data_environ = NULL;
1184 for (i = 0; i < new_num_devices; i++)
1186 current_device.id = num_devices + 1;
1187 current_device.target_id = i;
1188 devices[num_devices] = current_device;
1189 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1190 num_devices++;
1195 free (plugin_name);
1196 cur = next + 1;
1198 while (next);
1200 /* Prefer a device with TARGET_CAP_OPENMP_400 for ICV default-device-var. */
1201 if (num_devices > 1)
1203 int d = gomp_icv (false)->default_device_var;
1205 if (!(devices[d].capabilities & TARGET_CAP_OPENMP_400))
1207 for (i = 0; i < num_devices; i++)
1209 if (devices[i].capabilities & TARGET_CAP_OPENMP_400)
1211 struct gomp_device_descr device_tmp = devices[d];
1212 devices[d] = devices[i];
1213 devices[d].id = d + 1;
1214 devices[i] = device_tmp;
1215 devices[i].id = i + 1;
1217 break;
1223 for (i = 0; i < num_devices; i++)
1225 int j;
1227 for (j = 0; j < num_offload_images; j++)
1228 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1230 /* The 'devices' array can be moved (by the realloc call) until we have
1231 found all the plugins, so registering with the OpenACC runtime (which
1232 takes a copy of the pointer argument) must be delayed until now. */
1233 if (devices[i].capabilities & TARGET_CAP_OPENACC_200)
1234 goacc_register (&devices[i]);
1237 free (offload_images);
1238 offload_images = NULL;
1239 num_offload_images = 0;
1242 #else /* PLUGIN_SUPPORT */
1243 /* If dlfcn.h is unavailable we always fallback to host execution.
1244 GOMP_target* routines are just stubs for this case. */
1245 static void
1246 gomp_target_init (void)
1249 #endif /* PLUGIN_SUPPORT */