Build a shared host libiberty also for libcc1's benefit.
[official-gcc.git] / libgomp / target.c
blob79b252db5a1f86bef52bf9a8b7b62547c80fccd2
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU OpenMP Library (libgomp).
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 /* This file handles the maintainence of threads in response to team
26 creation and termination. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "gomp-constants.h"
31 #include <string.h>
32 #include <stdio.h>
33 #include <assert.h>
35 #ifdef PLUGIN_SUPPORT
36 # include <dlfcn.h>
37 # include <dirent.h>
38 #endif
40 static void gomp_target_init (void);
42 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
44 #include "splay-tree.h"
46 /* This structure describes an offload image.
47 It contains type of the target, pointer to host table descriptor, and pointer
48 to target data. */
49 struct offload_image_descr {
50 int type;
51 void *host_table;
52 void *target_data;
55 /* Array of descriptors of offload images. */
56 static struct offload_image_descr *offload_images;
58 /* Total number of offload images. */
59 static int num_offload_images;
61 /* Array of descriptors of all available devices. */
62 static struct gomp_device_descr *devices;
64 /* Total number of available devices. */
65 static int num_devices;
67 /* The comparison function. */
69 attribute_hidden int
70 splay_compare (splay_tree_key x, splay_tree_key y)
72 if (x->host_start == x->host_end
73 && y->host_start == y->host_end)
74 return 0;
75 if (x->host_end <= y->host_start)
76 return -1;
77 if (x->host_start >= y->host_end)
78 return 1;
79 return 0;
82 #include "target.h"
84 attribute_hidden void
85 gomp_init_targets_once (void)
87 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
90 attribute_hidden int
91 gomp_get_num_devices (void)
93 gomp_init_targets_once ();
94 return num_devices;
97 static struct gomp_device_descr *
98 resolve_device (int device_id)
100 if (device_id == -1)
102 struct gomp_task_icv *icv = gomp_icv (false);
103 device_id = icv->default_device_var;
105 if (device_id < 0
106 || device_id >= gomp_get_num_devices ())
107 return NULL;
109 return &devices[device_id];
112 __attribute__((used)) static void
113 dump_mappings (FILE *f, splay_tree_node node)
115 int i;
117 splay_tree_key k = &node->key;
119 if (!k)
120 return;
122 fprintf (f, "key %p: host_start %p, host_end %p, tgt_offset %p, refcount %d, "
123 "copy_from %s\n", k, (void *) k->host_start,
124 (void *) k->host_end, (void *) k->tgt_offset, (int) k->refcount,
125 k->copy_from ? "true" : "false");
126 fprintf (f, "tgt->refcount %d, tgt->tgt_start %p, tgt->tgt_end %p, "
127 "tgt->to_free %p, tgt->prev %p, tgt->list_count %d, "
128 "tgt->device_descr %p\n", (int) k->tgt->refcount,
129 (void *) k->tgt->tgt_start, (void *) k->tgt->tgt_end,
130 k->tgt->to_free, k->tgt->prev, (int) k->tgt->list_count,
131 k->tgt->device_descr);
133 for (i = 0; i < k->tgt->list_count; i++)
134 fprintf (f, "item %d: %p\n", i, k->tgt->list[i]);
136 dump_mappings (f, node->left);
137 dump_mappings (f, node->right);
140 /* Handle the case where splay_tree_lookup found oldn for newn.
141 Helper function of gomp_map_vars. */
143 static inline void
144 gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
145 unsigned char kind)
147 if (oldn->host_start > newn->host_start
148 || oldn->host_end < newn->host_end)
149 gomp_fatal ("Trying to map into device [%p..%p) object when"
150 "[%p..%p) is already mapped",
151 (void *) newn->host_start, (void *) newn->host_end,
152 (void *) oldn->host_start, (void *) oldn->host_end);
153 oldn->refcount++;
156 static int
157 get_kind (bool is_openacc, void *kinds, int idx)
159 return is_openacc ? ((unsigned short *) kinds)[idx]
160 : ((unsigned char *) kinds)[idx];
163 attribute_hidden struct target_mem_desc *
164 gomp_map_vars (struct gomp_device_descr *devicep,
165 struct gomp_memory_mapping *mm, size_t mapnum,
166 void **hostaddrs, void **devaddrs, size_t *sizes,
167 void *kinds, bool is_openacc, bool is_target)
169 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
170 const int rshift = is_openacc ? 8 : 3;
171 const int typemask = is_openacc ? 0xff : 0x7;
172 struct splay_tree_key_s cur_node;
173 struct target_mem_desc *tgt
174 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
175 tgt->list_count = mapnum;
176 tgt->refcount = 1;
177 tgt->device_descr = devicep;
178 tgt->mem_map = mm;
180 /* From gcc/fortran/trans-types.c */
181 struct descriptor_dimension
183 long stride;
184 long lbound;
185 long ubound;
188 struct gfc_array_descriptor
190 void *data;
191 long offset;
192 long dtype;
193 struct descriptor_dimension dimension[];
196 #define GFC_DTYPE_RANK_MASK 0x07
197 #define GFC_DTYPE_TYPE_MASK 0x38
198 #define GFC_DTYPE_TYPE_SHIFT 3
199 #define GFC_DTYPE_SIZE_SHIFT 6
201 if (mapnum == 0)
202 return tgt;
204 tgt_align = sizeof (void *);
205 tgt_size = 0;
206 if (is_target)
208 size_t align = 4 * sizeof (void *);
209 tgt_align = align;
210 tgt_size = mapnum * sizeof (void *);
212 gomp_mutex_lock (&mm->lock);
213 for (i = 0; i < mapnum; i++)
215 int kind = get_kind (is_openacc, kinds, i);
216 if (hostaddrs[i] == NULL)
218 tgt->list[i] = NULL;
219 continue;
221 cur_node.host_start = (uintptr_t) hostaddrs[i];
222 if (!GOMP_MAP_POINTER_P (kind & typemask))
223 cur_node.host_end = cur_node.host_start + sizes[i];
224 else
225 cur_node.host_end = cur_node.host_start + sizeof (void *);
226 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
227 if (n)
229 tgt->list[i] = n;
230 gomp_map_vars_existing (n, &cur_node, kind);
232 else
234 tgt->list[i] = NULL;
236 if ((kind & typemask) == GOMP_MAP_TO_PSET)
238 struct gfc_array_descriptor *gad;
239 size_t rank;
240 int j;
241 bool alloc_arrays = true;
243 for (j = i - 1; j >= 0; j--)
245 if (hostaddrs[j] == *(void**)hostaddrs[i])
247 alloc_arrays = false;
248 break;
252 gad = (struct gfc_array_descriptor *) cur_node.host_start;
253 rank = gad->dtype & GFC_DTYPE_RANK_MASK;
255 cur_node.host_start = (uintptr_t)gad->data;
256 cur_node.host_end = cur_node.host_start +
257 sizeof (struct gfc_array_descriptor) +
258 (sizeof (struct descriptor_dimension) * rank);
260 if (alloc_arrays)
262 size_t tsize;
264 tsize = gad->dtype >> GFC_DTYPE_SIZE_SHIFT;
266 for (j = 0; j < rank; j++)
268 cur_node.host_end += tsize *
269 (gad->dimension[j].ubound -
270 gad->dimension[j].lbound + 1);
275 size_t align = (size_t) 1 << (kind >> rshift);
276 not_found_cnt++;
277 if (tgt_align < align)
278 tgt_align = align;
279 tgt_size = (tgt_size + align - 1) & ~(align - 1);
280 tgt_size += cur_node.host_end - cur_node.host_start;
281 if ((kind & typemask) == GOMP_MAP_TO_PSET)
283 size_t j;
284 for (j = i + 1; j < mapnum; j++)
285 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
286 & typemask))
287 break;
288 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
289 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
290 > cur_node.host_end))
291 break;
292 else
294 tgt->list[j] = NULL;
295 i++;
301 if (devaddrs)
303 if (mapnum != 1)
304 gomp_fatal ("unexpected aggregation");
305 tgt->to_free = devaddrs[0];
306 tgt->tgt_start = (uintptr_t) tgt->to_free;
307 tgt->tgt_end = tgt->tgt_start + sizes[0];
309 else if (not_found_cnt || is_target)
311 /* Allocate tgt_align aligned tgt_size block of memory. */
312 /* FIXME: Perhaps change interface to allocate properly aligned
313 memory. */
314 tgt->to_free = devicep->device_alloc_func (tgt_size + tgt_align - 1);
315 tgt->tgt_start = (uintptr_t) tgt->to_free;
316 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
317 tgt->tgt_end = tgt->tgt_start + tgt_size;
319 else
321 tgt->to_free = NULL;
322 tgt->tgt_start = 0;
323 tgt->tgt_end = 0;
326 tgt_size = 0;
327 if (is_target)
328 tgt_size = mapnum * sizeof (void *);
330 tgt->array = NULL;
331 if (not_found_cnt)
333 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
334 splay_tree_node array = tgt->array;
335 size_t j;
337 for (i = 0; i < mapnum; i++)
338 if (tgt->list[i] == NULL)
340 int kind = get_kind (is_openacc, kinds, i);
341 if (hostaddrs[i] == NULL)
342 continue;
343 splay_tree_key k = &array->key;
344 k->host_start = (uintptr_t) hostaddrs[i];
345 if (!GOMP_MAP_POINTER_P (kind & typemask))
346 k->host_end = k->host_start + sizes[i];
347 else
348 k->host_end = k->host_start + sizeof (void *);
349 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
350 if (n)
352 tgt->list[i] = n;
353 gomp_map_vars_existing (n, k, kind);
355 else
357 size_t align = (size_t) 1 << (kind >> rshift);
358 tgt->list[i] = k;
359 tgt_size = (tgt_size + align - 1) & ~(align - 1);
360 k->tgt = tgt;
361 k->tgt_offset = tgt_size;
362 tgt_size += k->host_end - k->host_start;
363 k->copy_from = GOMP_MAP_COPYFROM_P (kind & typemask)
364 || GOMP_MAP_TOFROM_P (kind & typemask);
365 k->refcount = 1;
366 k->async_refcount = 0;
367 tgt->refcount++;
368 array->left = NULL;
369 array->right = NULL;
371 splay_tree_insert (&mm->splay_tree, array);
373 switch (kind & typemask)
375 case GOMP_MAP_FORCE_ALLOC:
376 case GOMP_MAP_FORCE_FROM:
377 /* FIXME: No special handling (see comment in
378 oacc-parallel.c). */
379 case GOMP_MAP_ALLOC:
380 case GOMP_MAP_ALLOC_FROM:
381 break;
382 case GOMP_MAP_FORCE_TO:
383 case GOMP_MAP_FORCE_TOFROM:
384 /* FIXME: No special handling, as above. */
385 case GOMP_MAP_ALLOC_TO:
386 case GOMP_MAP_ALLOC_TOFROM:
387 /* Copy from host to device memory. */
388 /* FIXME: Perhaps add some smarts, like if copying
389 several adjacent fields from host to target, use some
390 host buffer to avoid sending each var individually. */
391 devicep->device_host2dev_func
392 ((void *) (tgt->tgt_start + k->tgt_offset),
393 (void *) k->host_start,
394 k->host_end - k->host_start);
395 break;
396 case GOMP_MAP_POINTER:
397 cur_node.host_start
398 = (uintptr_t) *(void **) k->host_start;
399 if (cur_node.host_start == (uintptr_t) NULL)
401 cur_node.tgt_offset = (uintptr_t) NULL;
402 /* Copy from host to device memory. */
403 /* FIXME: see above FIXME comment. */
404 devicep->device_host2dev_func
405 ((void *) (tgt->tgt_start + k->tgt_offset),
406 (void *) &cur_node.tgt_offset,
407 sizeof (void *));
408 break;
410 /* Add bias to the pointer value. */
411 cur_node.host_start += sizes[i];
412 cur_node.host_end = cur_node.host_start + 1;
413 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
414 if (n == NULL)
416 /* Could be possibly zero size array section. */
417 cur_node.host_end--;
418 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
419 if (n == NULL)
421 cur_node.host_start--;
422 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
423 cur_node.host_start++;
426 if (n == NULL)
427 gomp_fatal ("Pointer target of array section "
428 "wasn't mapped");
430 cur_node.host_start -= n->host_start;
431 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
432 + cur_node.host_start;
433 /* At this point tgt_offset is target address of the
434 array section. Now subtract bias to get what we want
435 to initialize the pointer with. */
436 cur_node.tgt_offset -= sizes[i];
437 /* Copy from host to device memory. */
438 /* FIXME: see above FIXME comment. */
439 devicep->device_host2dev_func
440 ((void *) (tgt->tgt_start + k->tgt_offset),
441 (void *) &cur_node.tgt_offset,
442 sizeof (void *));
443 break;
444 case GOMP_MAP_TO_PSET:
446 /* Copy from host to device memory. */
447 /* FIXME: see above FIXME comment. */
448 devicep->device_host2dev_func
449 ((void *) (tgt->tgt_start + k->tgt_offset),
450 (void *) k->host_start,
451 (k->host_end - k->host_start));
452 devicep->device_host2dev_func
453 ((void *) (tgt->tgt_start + k->tgt_offset),
454 (void *) &tgt->tgt_start,
455 sizeof (void *));
457 for (j = i + 1; j < mapnum; j++)
458 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
459 & typemask))
460 break;
461 else if ((uintptr_t) hostaddrs[j] < k->host_start
462 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
463 > k->host_end))
464 break;
465 else
467 tgt->list[j] = k;
468 k->refcount++;
469 cur_node.host_start
470 = (uintptr_t) *(void **) hostaddrs[j];
471 if (cur_node.host_start == (uintptr_t) NULL)
473 cur_node.tgt_offset = (uintptr_t) NULL;
474 /* Copy from host to device memory. */
475 /* FIXME: see above FIXME comment. */
476 devicep->device_host2dev_func
477 ((void *) (tgt->tgt_start + k->tgt_offset
478 + ((uintptr_t) hostaddrs[j]
479 - k->host_start)),
480 (void *) &cur_node.tgt_offset,
481 sizeof (void *));
482 i++;
483 continue;
485 /* Add bias to the pointer value. */
486 cur_node.host_start += sizes[j];
487 cur_node.host_end = cur_node.host_start + 1;
488 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
489 if (n == NULL)
491 /* Could be possibly zero size array
492 section. */
493 cur_node.host_end--;
494 n = splay_tree_lookup (&mm->splay_tree,
495 &cur_node);
496 if (n == NULL)
498 cur_node.host_start--;
499 n = splay_tree_lookup (&mm->splay_tree,
500 &cur_node);
501 cur_node.host_start++;
504 if (n == NULL)
505 gomp_fatal ("Pointer target of array section "
506 "wasn't mapped");
507 cur_node.host_start -= n->host_start;
508 cur_node.tgt_offset = n->tgt->tgt_start
509 + n->tgt_offset
510 + cur_node.host_start;
511 /* At this point tgt_offset is target address of the
512 array section. Now subtract bias to get what we
513 want to initialize the pointer with. */
514 cur_node.tgt_offset -= sizes[j];
515 /* Copy from host to device memory. */
516 /* FIXME: see above FIXME comment. */
518 devicep->device_host2dev_func
519 ((void *) (tgt->tgt_start + k->tgt_offset
520 + ((uintptr_t) hostaddrs[j]
521 - k->host_start)),
522 (void *) &cur_node.tgt_offset,
523 sizeof (void *));
524 i++;
526 break;
528 case GOMP_MAP_FORCE_PRESENT:
530 /* We already looked up the memory region above and it
531 was missing. */
532 size_t size = k->host_end - k->host_start;
533 gomp_fatal ("present clause: !acc_is_present (%p, "
534 "%zd (0x%zx))", (void *) k->host_start,
535 size, size);
537 break;
538 case GOMP_MAP_FORCE_DEVICEPTR:
539 assert (k->host_end - k->host_start == sizeof (void *));
541 devicep->device_host2dev_func
542 ((void *) (tgt->tgt_start + k->tgt_offset),
543 (void *) k->host_start,
544 sizeof (void *));
545 break;
546 case GOMP_MAP_FORCE_PRIVATE:
547 abort ();
548 case GOMP_MAP_FORCE_FIRSTPRIVATE:
549 abort ();
550 default:
551 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
552 kind);
554 array++;
559 #undef GFC_DTYPE_RANK_MASK
560 #undef GFC_DTYPE_TYPE_MASK
561 #undef GFC_DTYPE_TYPE_SHIFT
562 #undef GFC_DTYPE_SIZE_SHIFT
564 if (is_target)
566 for (i = 0; i < mapnum; i++)
568 if (tgt->list[i] == NULL)
569 cur_node.tgt_offset = (uintptr_t) NULL;
570 else
571 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
572 + tgt->list[i]->tgt_offset;
573 /* Copy from host to device memory. */
574 /* FIXME: see above FIXME comment. */
575 devicep->device_host2dev_func
576 ((void *) (tgt->tgt_start + i * sizeof (void *)),
577 (void *) &cur_node.tgt_offset,
578 sizeof (void *));
582 gomp_mutex_unlock (&mm->lock);
583 return tgt;
586 static void
587 gomp_unmap_tgt (struct target_mem_desc *tgt)
589 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
590 if (tgt->tgt_end)
591 tgt->device_descr->device_free_func(tgt->to_free);
593 free (tgt->array);
594 free (tgt);
597 /* Decrease the refcount for a set of mapped variables, and queue asychronous
598 copies from the device back to the host after any work that has been issued.
599 Because the regions are still "live", increment an asynchronous reference
600 count to indicate that they should not be unmapped from host-side data
601 structures until the asynchronous copy has completed. */
603 attribute_hidden void
604 gomp_copy_from_async (struct target_mem_desc *tgt)
606 struct gomp_device_descr *devicep = tgt->device_descr;
607 struct gomp_memory_mapping *mm = tgt->mem_map;
608 size_t i;
610 gomp_mutex_lock (&mm->lock);
612 for (i = 0; i < tgt->list_count; i++)
613 if (tgt->list[i] == NULL)
615 else if (tgt->list[i]->refcount > 1)
617 tgt->list[i]->refcount--;
618 tgt->list[i]->async_refcount++;
620 else
622 splay_tree_key k = tgt->list[i];
623 if (k->copy_from)
624 /* Copy from device to host memory. */
625 devicep->device_dev2host_func
626 ((void *) k->host_start,
627 (void *) (k->tgt->tgt_start + k->tgt_offset),
628 k->host_end - k->host_start);
631 gomp_mutex_unlock (&mm->lock);
634 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
635 variables back from device to host: if it is false, it is assumed that this
636 has been done already, i.e. by gomp_copy_from_async above. */
638 attribute_hidden void
639 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
641 struct gomp_device_descr *devicep = tgt->device_descr;
642 struct gomp_memory_mapping *mm = tgt->mem_map;
644 if (tgt->list_count == 0)
646 free (tgt);
647 return;
650 size_t i;
651 gomp_mutex_lock (&mm->lock);
652 for (i = 0; i < tgt->list_count; i++)
653 if (tgt->list[i] == NULL)
655 else if (tgt->list[i]->refcount > 1)
656 tgt->list[i]->refcount--;
657 else if (tgt->list[i]->async_refcount > 0)
658 tgt->list[i]->async_refcount--;
659 else
661 splay_tree_key k = tgt->list[i];
662 if (k->copy_from && do_copyfrom)
663 /* Copy from device to host memory. */
664 devicep->device_dev2host_func
665 ((void *) k->host_start,
666 (void *) (k->tgt->tgt_start + k->tgt_offset),
667 k->host_end - k->host_start);
668 splay_tree_remove (&mm->splay_tree, k);
669 if (k->tgt->refcount > 1)
670 k->tgt->refcount--;
671 else
672 gomp_unmap_tgt (k->tgt);
675 if (tgt->refcount > 1)
676 tgt->refcount--;
677 else
678 gomp_unmap_tgt (tgt);
679 gomp_mutex_unlock (&mm->lock);
682 static void
683 gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
684 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
685 bool is_openacc)
687 size_t i;
688 struct splay_tree_key_s cur_node;
689 const int typemask = is_openacc ? 0xff : 0x7;
691 if (!devicep)
692 return;
694 if (mapnum == 0)
695 return;
697 gomp_mutex_lock (&mm->lock);
698 for (i = 0; i < mapnum; i++)
699 if (sizes[i])
701 cur_node.host_start = (uintptr_t) hostaddrs[i];
702 cur_node.host_end = cur_node.host_start + sizes[i];
703 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
704 &cur_node);
705 if (n)
707 int kind = get_kind (is_openacc, kinds, i);
708 if (n->host_start > cur_node.host_start
709 || n->host_end < cur_node.host_end)
710 gomp_fatal ("Trying to update [%p..%p) object when"
711 "only [%p..%p) is mapped",
712 (void *) cur_node.host_start,
713 (void *) cur_node.host_end,
714 (void *) n->host_start,
715 (void *) n->host_end);
716 if (GOMP_MAP_COPYTO_P (kind & typemask))
717 /* Copy from host to device memory. */
718 devicep->device_host2dev_func
719 ((void *) (n->tgt->tgt_start
720 + n->tgt_offset
721 + cur_node.host_start
722 - n->host_start),
723 (void *) cur_node.host_start,
724 cur_node.host_end - cur_node.host_start);
725 else if (GOMP_MAP_COPYFROM_P (kind & typemask))
726 /* Copy from device to host memory. */
727 devicep->device_dev2host_func
728 ((void *) cur_node.host_start,
729 (void *) (n->tgt->tgt_start
730 + n->tgt_offset
731 + cur_node.host_start
732 - n->host_start),
733 cur_node.host_end - cur_node.host_start);
735 else
736 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
737 (void *) cur_node.host_start,
738 (void *) cur_node.host_end);
740 gomp_mutex_unlock (&mm->lock);
743 static void gomp_register_image_for_device (struct gomp_device_descr *device,
744 struct offload_image_descr *image);
746 /* This function should be called from every offload image. It gets the
747 descriptor of the host func and var tables HOST_TABLE, TYPE of the target,
748 and TARGET_DATA needed by target plugin (target tables, etc.) */
749 void
750 GOMP_offload_register (void *host_table, int type, void **target_data)
752 offload_images = gomp_realloc (offload_images,
753 (num_offload_images + 1)
754 * sizeof (struct offload_image_descr));
756 if (offload_images == NULL)
757 return;
759 offload_images[num_offload_images].type = type;
760 offload_images[num_offload_images].host_table = host_table;
761 offload_images[num_offload_images].target_data = target_data;
763 num_offload_images++;
766 attribute_hidden void
767 gomp_init_device (struct gomp_device_descr *devicep)
769 /* Initialize the target device. */
770 devicep->device_init_func ();
772 devicep->is_initialized = true;
775 attribute_hidden void
776 gomp_init_tables (const struct gomp_device_descr *devicep,
777 struct gomp_memory_mapping *mm)
779 /* Get address mapping table for device. */
780 struct mapping_table *table = NULL;
781 int i, num_entries = devicep->device_get_table_func (&table);
783 /* Insert host-target address mapping into dev_splay_tree. */
784 for (i = 0; i < num_entries; i++)
786 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
787 tgt->refcount = 1;
788 tgt->array = gomp_malloc (sizeof (*tgt->array));
789 tgt->tgt_start = table[i].tgt_start;
790 tgt->tgt_end = table[i].tgt_end;
791 tgt->to_free = NULL;
792 tgt->list_count = 0;
793 tgt->device_descr = (struct gomp_device_descr *) devicep;
794 splay_tree_node node = tgt->array;
795 splay_tree_key k = &node->key;
796 k->host_start = table[i].host_start;
797 k->host_end = table[i].host_end;
798 k->tgt_offset = 0;
799 k->tgt = tgt;
800 node->left = NULL;
801 node->right = NULL;
802 splay_tree_insert (&mm->splay_tree, node);
805 free (table);
807 mm->is_initialized = true;
810 static void
811 gomp_init_dev_tables (struct gomp_device_descr *devicep)
813 gomp_init_device (devicep);
814 gomp_init_tables (devicep, &devicep->mem_map);
817 attribute_hidden void
818 gomp_fini_device (struct gomp_device_descr *devicep)
820 struct gomp_memory_mapping *mm = &devicep->mem_map;
822 if (devicep->is_initialized)
823 devicep->device_fini_func ();
825 while (mm->splay_tree.root)
827 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
828 free (tgt->array);
829 free (tgt);
830 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
833 devicep->is_initialized = false;
836 /* Called when encountering a target directive. If DEVICE
837 is -1, it means use device-var ICV. If it is -2 (or any other value
838 larger than last available hw device, use host fallback.
839 FN is address of host code, OPENMP_TARGET contains value of the
840 __OPENMP_TARGET__ symbol in the shared library or binary that invokes
841 GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
842 with MAPNUM entries, with addresses of the host objects,
843 sizes of the host objects (resp. for pointer kind pointer bias
844 and assumed sizeof (void *) size) and kinds. */
846 void
847 GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
848 size_t mapnum, void **hostaddrs, size_t *sizes,
849 unsigned char *kinds)
851 struct gomp_device_descr *devicep = resolve_device (device);
852 struct gomp_memory_mapping *mm = &devicep->mem_map;
854 if (devicep != NULL && !devicep->is_initialized)
855 gomp_init_dev_tables (devicep);
857 if (devicep == NULL || !(devicep->capabilities & TARGET_CAP_OPENMP_400))
859 /* Host fallback. */
860 struct gomp_thread old_thr, *thr = gomp_thread ();
861 old_thr = *thr;
862 memset (thr, '\0', sizeof (*thr));
863 if (gomp_places_list)
865 thr->place = old_thr.place;
866 thr->ts.place_partition_len = gomp_places_list_len;
868 fn (hostaddrs);
869 gomp_free_thread (thr);
870 *thr = old_thr;
871 return;
874 struct splay_tree_key_s k;
875 k.host_start = (uintptr_t) fn;
876 k.host_end = k.host_start + 1;
877 gomp_mutex_lock (&mm->lock);
878 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map.splay_tree, &k);
879 if (tgt_fn == NULL && !(devicep->capabilities & TARGET_CAP_NATIVE_EXEC))
880 gomp_fatal ("Target function wasn't mapped");
881 gomp_mutex_unlock (&mm->lock);
883 struct target_mem_desc *tgt_vars
884 = gomp_map_vars (devicep, &devicep->mem_map, mapnum, hostaddrs, NULL,
885 sizes, kinds, false, true);
886 struct gomp_thread old_thr, *thr = gomp_thread ();
887 old_thr = *thr;
888 memset (thr, '\0', sizeof (*thr));
889 if (gomp_places_list)
891 thr->place = old_thr.place;
892 thr->ts.place_partition_len = gomp_places_list_len;
894 if (devicep->capabilities & TARGET_CAP_NATIVE_EXEC)
895 devicep->device_run_func (fn, (void *) tgt_vars->tgt_start);
896 else
897 devicep->device_run_func ((void *) tgt_fn->tgt->tgt_start,
898 (void *) tgt_vars->tgt_start);
899 gomp_free_thread (thr);
900 *thr = old_thr;
901 gomp_unmap_vars (tgt_vars, true);
904 void
905 GOMP_target_data (int device, const void *openmp_target, size_t mapnum,
906 void **hostaddrs, size_t *sizes, unsigned char *kinds)
908 struct gomp_device_descr *devicep = resolve_device (device);
910 if (devicep != NULL && !devicep->is_initialized)
911 gomp_init_dev_tables (devicep);
913 if (devicep == NULL || !(devicep->capabilities & TARGET_CAP_OPENMP_400))
915 /* Host fallback. */
916 struct gomp_task_icv *icv = gomp_icv (false);
917 if (icv->target_data)
919 /* Even when doing a host fallback, if there are any active
920 #pragma omp target data constructs, need to remember the
921 new #pragma omp target data, otherwise GOMP_target_end_data
922 would get out of sync. */
923 struct target_mem_desc *tgt
924 = gomp_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, false,
925 false);
926 tgt->prev = icv->target_data;
927 icv->target_data = tgt;
929 return;
932 struct target_mem_desc *tgt
933 = gomp_map_vars (devicep, &devicep->mem_map, mapnum, hostaddrs, NULL, sizes,
934 kinds, false, false);
935 struct gomp_task_icv *icv = gomp_icv (true);
936 tgt->prev = icv->target_data;
937 icv->target_data = tgt;
940 void
941 GOMP_target_end_data (void)
943 struct gomp_task_icv *icv = gomp_icv (false);
944 if (icv->target_data)
946 struct target_mem_desc *tgt = icv->target_data;
947 icv->target_data = tgt->prev;
948 gomp_unmap_vars (tgt, true);
952 void
953 GOMP_target_update (int device, const void *openmp_target, size_t mapnum,
954 void **hostaddrs, size_t *sizes, unsigned char *kinds)
956 struct gomp_device_descr *devicep = resolve_device (device);
958 if (devicep != NULL && !devicep->is_initialized)
959 gomp_init_dev_tables (devicep);
961 if (devicep == NULL || !(devicep->capabilities & TARGET_CAP_OPENMP_400))
962 return;
964 gomp_update (devicep, &devicep->mem_map, mapnum, hostaddrs, sizes, kinds,
965 false);
968 void
969 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
971 if (thread_limit)
973 struct gomp_task_icv *icv = gomp_icv (true);
974 icv->thread_limit_var
975 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
977 (void) num_teams;
980 #ifdef PLUGIN_SUPPORT
982 /* This function checks if the given string FNAME matches
983 "libgomp-plugin-*.so.1". */
984 static bool
985 gomp_check_plugin_file_name (const char *fname)
987 const char *prefix = "libgomp-plugin-";
988 const char *suffix = ".so.1";
989 if (!fname)
990 return false;
991 if (strncmp (fname, prefix, strlen (prefix)) != 0)
992 return false;
993 if (strncmp (fname + strlen (fname) - strlen (suffix), suffix,
994 strlen (suffix)) != 0)
995 return false;
996 return true;
999 /* This function tries to load plugin for DEVICE. Name of plugin is passed
1000 in PLUGIN_NAME.
1001 Plugin handle and handles of the found functions are stored in the
1002 corresponding fields of DEVICE.
1003 The function returns TRUE on success and FALSE otherwise. */
1004 static bool
1005 gomp_load_plugin_for_device (struct gomp_device_descr *device,
1006 const char *plugin_name)
1008 char *err = NULL, *last_missing = NULL;
1009 int optional_present, optional_total;
1011 /* Clear any existing error. */
1012 dlerror ();
1014 device->plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1015 if (!device->plugin_handle)
1017 err = dlerror ();
1018 goto out;
1021 /* Check if all required functions are available in the plugin and store
1022 their handlers. */
1023 #define DLSYM(f) \
1024 do \
1026 device->f##_func = dlsym (device->plugin_handle, #f); \
1027 err = dlerror (); \
1028 if (err != NULL) \
1029 goto out; \
1031 while (0)
1032 /* Similar, but missing functions are not an error. */
1033 #define DLSYM_OPT(f,n) \
1034 do \
1036 char *tmp_err; \
1037 device->f##_func = dlsym (device->plugin_handle, #n); \
1038 tmp_err = dlerror (); \
1039 if (tmp_err == NULL) \
1040 optional_present++; \
1041 else \
1042 last_missing = #n; \
1043 optional_total++; \
1045 while (0)
1047 DLSYM (get_name);
1048 DLSYM (get_caps);
1049 DLSYM (get_type);
1050 DLSYM (get_num_devices);
1051 DLSYM (offload_register);
1052 DLSYM (device_init);
1053 DLSYM (device_fini);
1054 DLSYM (device_get_table);
1055 DLSYM (device_alloc);
1056 DLSYM (device_free);
1057 DLSYM (device_dev2host);
1058 DLSYM (device_host2dev);
1059 if (device->get_caps_func () & TARGET_CAP_OPENMP_400)
1060 DLSYM (device_run);
1061 if (device->get_caps_func () & TARGET_CAP_OPENACC_200)
1063 optional_present = optional_total = 0;
1064 DLSYM_OPT (openacc.exec, openacc_parallel);
1065 DLSYM_OPT (openacc.open_device, openacc_open_device);
1066 DLSYM_OPT (openacc.close_device, openacc_close_device);
1067 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
1068 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
1069 DLSYM_OPT (openacc.avail, openacc_avail);
1070 DLSYM_OPT (openacc.register_async_cleanup,
1071 openacc_register_async_cleanup);
1072 DLSYM_OPT (openacc.async_test, openacc_async_test);
1073 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1074 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1075 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1076 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1077 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1078 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1079 /* Require all the OpenACC handlers if we have TARGET_CAP_OPENACC_200. */
1080 if (optional_present != optional_total)
1082 err = "plugin missing OpenACC handler function";
1083 goto out;
1085 optional_present = optional_total = 0;
1086 DLSYM_OPT (openacc.cuda.get_current_device,
1087 openacc_get_current_cuda_device);
1088 DLSYM_OPT (openacc.cuda.get_current_context,
1089 openacc_get_current_cuda_context);
1090 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1091 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1092 /* Make sure all the CUDA functions are there if any of them are. */
1093 if (optional_present && optional_present != optional_total)
1095 err = "plugin missing OpenACC CUDA handler function";
1096 goto out;
1099 #undef DLSYM
1100 #undef DLSYM_OPT
1102 out:
1103 if (err != NULL)
1105 gomp_error ("while loading %s: %s", plugin_name, err);
1106 if (last_missing)
1107 gomp_error ("missing function was %s", last_missing);
1108 if (device->plugin_handle)
1109 dlclose (device->plugin_handle);
1111 return err == NULL;
1114 /* This function adds a compatible offload image IMAGE to an accelerator device
1115 DEVICE. */
1117 static void
1118 gomp_register_image_for_device (struct gomp_device_descr *device,
1119 struct offload_image_descr *image)
1121 if (!device->offload_regions_registered
1122 && (device->type == image->type || device->type == TARGET_TYPE_HOST))
1124 device->offload_register_func (image->host_table, image->target_data);
1125 device->offload_regions_registered = true;
1129 /* This functions scans folder, specified in environment variable
1130 LIBGOMP_PLUGIN_PATH, and loads all suitable libgomp plugins from this folder.
1131 For a plugin to be suitable, its name should be "libgomp-plugin-*.so.1" and
1132 it should implement a certain set of functions.
1133 Result of this function is properly initialized variable NUM_DEVICES and
1134 array DEVICES, containing all plugins and their callback handles. */
1135 static void
1136 gomp_find_available_plugins (void)
1138 char *plugin_path = NULL;
1139 DIR *dir = NULL;
1140 struct dirent *ent;
1141 char plugin_name[PATH_MAX];
1142 int i;
1144 num_devices = 0;
1145 devices = NULL;
1147 plugin_path = getenv ("LIBGOMP_PLUGIN_PATH");
1148 if (!plugin_path)
1149 goto out;
1151 dir = opendir (plugin_path);
1152 if (!dir)
1153 goto out;
1155 while ((ent = readdir (dir)) != NULL)
1157 struct gomp_device_descr current_device, *devicep;
1158 if (!gomp_check_plugin_file_name (ent->d_name))
1159 continue;
1160 if (strlen (plugin_path) + 1 + strlen (ent->d_name) >= PATH_MAX)
1161 continue;
1162 strcpy (plugin_name, plugin_path);
1163 strcat (plugin_name, "/");
1164 strcat (plugin_name, ent->d_name);
1165 if (!gomp_load_plugin_for_device (&current_device, plugin_name))
1166 continue;
1167 devices = gomp_realloc (devices, (num_devices + 1)
1168 * sizeof (struct gomp_device_descr));
1169 if (devices == NULL)
1171 num_devices = 0;
1172 goto out;
1175 devices[num_devices] = current_device;
1176 devicep = &devices[num_devices];
1178 devicep->is_initialized = false;
1179 devicep->offload_regions_registered = false;
1180 devicep->mem_map.splay_tree.root = NULL;
1181 devicep->mem_map.is_initialized = false;
1182 devicep->type = devicep->get_type_func ();
1183 devicep->name = devicep->get_name_func ();
1184 devicep->capabilities = devicep->get_caps_func ();
1185 gomp_mutex_init (&devicep->mem_map.lock);
1186 devicep->id = ++num_devices;
1188 /* Prefer a device with TARGET_CAP_OPENMP_400 for ICV default-device-var. */
1189 if (num_devices > 1)
1191 int d = gomp_icv (false)->default_device_var;
1193 if (!(devices[d].capabilities & TARGET_CAP_OPENMP_400))
1195 for (i = 0; i < num_devices; i++)
1197 if (devices[i].capabilities & TARGET_CAP_OPENMP_400)
1199 struct gomp_device_descr device_tmp = devices[d];
1200 devices[d] = devices[i];
1201 devices[d].id = d + 1;
1202 devices[i] = device_tmp;
1203 devices[i].id = i + 1;
1205 break;
1211 for (i = 0; i < num_devices; i++)
1213 int j;
1215 for (j = 0; j < num_offload_images; j++)
1216 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1218 /* The 'devices' array can be moved (by the realloc call) until we have
1219 found all the plugins, so registering with the OpenACC runtime (which
1220 takes a copy of the pointer argument) must be delayed until now. */
1221 if (devices[i].capabilities & TARGET_CAP_OPENACC_200)
1222 ACC_plugin_register (&devices[i]);
1225 out:
1226 if (dir)
1227 closedir (dir);
1228 free (offload_images);
1229 offload_images = NULL;
1230 num_offload_images = 0;
1233 /* This function initializes runtime needed for offloading.
1234 It loads plugins, sets up a connection with devices, etc. */
1235 static void
1236 gomp_target_init (void)
1238 gomp_find_available_plugins ();
1241 #else /* PLUGIN_SUPPORT */
1242 /* If dlfcn.h is unavailable we always fallback to host execution.
1243 GOMP_target* routines are just stubs for this case. */
1244 static void
1245 gomp_target_init (void)
1248 #endif /* PLUGIN_SUPPORT */