gcc/
[official-gcc.git] / libgomp / target.c
blob758ece5d78c0e476a10cdd31cf65919899a621aa
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr {
59 unsigned version;
60 enum offload_target_type type;
61 const void *host_table;
62 const void *target_data;
65 /* Array of descriptors of offload images. */
66 static struct offload_image_descr *offload_images;
68 /* Total number of offload images. */
69 static int num_offload_images;
71 /* Array of descriptors for all available devices. */
72 static struct gomp_device_descr *devices;
74 /* Total number of available devices. */
75 static int num_devices;
77 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
78 static int num_devices_openmp;
80 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 static void *
83 gomp_realloc_unlock (void *old, size_t size)
85 void *ret = realloc (old, size);
86 if (ret == NULL)
88 gomp_mutex_unlock (&register_lock);
89 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 return ret;
94 /* The comparison function. */
96 attribute_hidden int
97 splay_compare (splay_tree_key x, splay_tree_key y)
99 if (x->host_start == x->host_end
100 && y->host_start == y->host_end)
101 return 0;
102 if (x->host_end <= y->host_start)
103 return -1;
104 if (x->host_start >= y->host_end)
105 return 1;
106 return 0;
109 #include "splay-tree.h"
111 attribute_hidden void
112 gomp_init_targets_once (void)
114 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
117 attribute_hidden int
118 gomp_get_num_devices (void)
120 gomp_init_targets_once ();
121 return num_devices_openmp;
124 static struct gomp_device_descr *
125 resolve_device (int device_id)
127 if (device_id == GOMP_DEVICE_ICV)
129 struct gomp_task_icv *icv = gomp_icv (false);
130 device_id = icv->default_device_var;
133 if (device_id < 0 || device_id >= gomp_get_num_devices ())
134 return NULL;
136 return &devices[device_id];
140 /* Handle the case where splay_tree_lookup found oldn for newn.
141 Helper function of gomp_map_vars. */
143 static inline void
144 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
145 splay_tree_key newn, unsigned char kind)
147 if ((kind & GOMP_MAP_FLAG_FORCE)
148 || oldn->host_start > newn->host_start
149 || oldn->host_end < newn->host_end)
151 gomp_mutex_unlock (&devicep->lock);
152 gomp_fatal ("Trying to map into device [%p..%p) object when "
153 "[%p..%p) is already mapped",
154 (void *) newn->host_start, (void *) newn->host_end,
155 (void *) oldn->host_start, (void *) oldn->host_end);
157 oldn->refcount++;
160 static int
161 get_kind (bool is_openacc, void *kinds, int idx)
163 return is_openacc ? ((unsigned short *) kinds)[idx]
164 : ((unsigned char *) kinds)[idx];
167 static void
168 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
169 uintptr_t target_offset, uintptr_t bias)
171 struct gomp_device_descr *devicep = tgt->device_descr;
172 struct splay_tree_s *mem_map = &devicep->mem_map;
173 struct splay_tree_key_s cur_node;
175 cur_node.host_start = host_ptr;
176 if (cur_node.host_start == (uintptr_t) NULL)
178 cur_node.tgt_offset = (uintptr_t) NULL;
179 /* FIXME: see comment about coalescing host/dev transfers below. */
180 devicep->host2dev_func (devicep->target_id,
181 (void *) (tgt->tgt_start + target_offset),
182 (void *) &cur_node.tgt_offset,
183 sizeof (void *));
184 return;
186 /* Add bias to the pointer value. */
187 cur_node.host_start += bias;
188 cur_node.host_end = cur_node.host_start + 1;
189 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
190 if (n == NULL)
192 /* Could be possibly zero size array section. */
193 cur_node.host_end--;
194 n = splay_tree_lookup (mem_map, &cur_node);
195 if (n == NULL)
197 cur_node.host_start--;
198 n = splay_tree_lookup (mem_map, &cur_node);
199 cur_node.host_start++;
202 if (n == NULL)
204 gomp_mutex_unlock (&devicep->lock);
205 gomp_fatal ("Pointer target of array section wasn't mapped");
207 cur_node.host_start -= n->host_start;
208 cur_node.tgt_offset
209 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
210 /* At this point tgt_offset is target address of the
211 array section. Now subtract bias to get what we want
212 to initialize the pointer with. */
213 cur_node.tgt_offset -= bias;
214 /* FIXME: see comment about coalescing host/dev transfers below. */
215 devicep->host2dev_func (devicep->target_id,
216 (void *) (tgt->tgt_start + target_offset),
217 (void *) &cur_node.tgt_offset,
218 sizeof (void *));
221 attribute_hidden struct target_mem_desc *
222 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
223 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
224 bool is_openacc, bool is_target)
226 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
227 const int rshift = is_openacc ? 8 : 3;
228 const int typemask = is_openacc ? 0xff : 0x7;
229 struct splay_tree_s *mem_map = &devicep->mem_map;
230 struct splay_tree_key_s cur_node;
231 struct target_mem_desc *tgt
232 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
233 tgt->list_count = mapnum;
234 tgt->refcount = 1;
235 tgt->device_descr = devicep;
237 if (mapnum == 0)
238 return tgt;
240 tgt_align = sizeof (void *);
241 tgt_size = 0;
242 if (is_target)
244 size_t align = 4 * sizeof (void *);
245 tgt_align = align;
246 tgt_size = mapnum * sizeof (void *);
249 gomp_mutex_lock (&devicep->lock);
251 for (i = 0; i < mapnum; i++)
253 int kind = get_kind (is_openacc, kinds, i);
254 if (hostaddrs[i] == NULL)
256 tgt->list[i] = NULL;
257 continue;
259 cur_node.host_start = (uintptr_t) hostaddrs[i];
260 if (!GOMP_MAP_POINTER_P (kind & typemask))
261 cur_node.host_end = cur_node.host_start + sizes[i];
262 else
263 cur_node.host_end = cur_node.host_start + sizeof (void *);
264 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
265 if (n)
267 tgt->list[i] = n;
268 gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
270 else
272 tgt->list[i] = NULL;
274 size_t align = (size_t) 1 << (kind >> rshift);
275 not_found_cnt++;
276 if (tgt_align < align)
277 tgt_align = align;
278 tgt_size = (tgt_size + align - 1) & ~(align - 1);
279 tgt_size += cur_node.host_end - cur_node.host_start;
280 if ((kind & typemask) == GOMP_MAP_TO_PSET)
282 size_t j;
283 for (j = i + 1; j < mapnum; j++)
284 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
285 & typemask))
286 break;
287 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
288 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
289 > cur_node.host_end))
290 break;
291 else
293 tgt->list[j] = NULL;
294 i++;
300 if (devaddrs)
302 if (mapnum != 1)
304 gomp_mutex_unlock (&devicep->lock);
305 gomp_fatal ("unexpected aggregation");
307 tgt->to_free = devaddrs[0];
308 tgt->tgt_start = (uintptr_t) tgt->to_free;
309 tgt->tgt_end = tgt->tgt_start + sizes[0];
311 else if (not_found_cnt || is_target)
313 /* Allocate tgt_align aligned tgt_size block of memory. */
314 /* FIXME: Perhaps change interface to allocate properly aligned
315 memory. */
316 tgt->to_free = devicep->alloc_func (devicep->target_id,
317 tgt_size + tgt_align - 1);
318 tgt->tgt_start = (uintptr_t) tgt->to_free;
319 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
320 tgt->tgt_end = tgt->tgt_start + tgt_size;
322 else
324 tgt->to_free = NULL;
325 tgt->tgt_start = 0;
326 tgt->tgt_end = 0;
329 tgt_size = 0;
330 if (is_target)
331 tgt_size = mapnum * sizeof (void *);
333 tgt->array = NULL;
334 if (not_found_cnt)
336 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
337 splay_tree_node array = tgt->array;
338 size_t j;
340 for (i = 0; i < mapnum; i++)
341 if (tgt->list[i] == NULL)
343 int kind = get_kind (is_openacc, kinds, i);
344 if (hostaddrs[i] == NULL)
345 continue;
346 splay_tree_key k = &array->key;
347 k->host_start = (uintptr_t) hostaddrs[i];
348 if (!GOMP_MAP_POINTER_P (kind & typemask))
349 k->host_end = k->host_start + sizes[i];
350 else
351 k->host_end = k->host_start + sizeof (void *);
352 splay_tree_key n = splay_tree_lookup (mem_map, k);
353 if (n)
355 tgt->list[i] = n;
356 gomp_map_vars_existing (devicep, n, k, kind & typemask);
358 else
360 size_t align = (size_t) 1 << (kind >> rshift);
361 tgt->list[i] = k;
362 tgt_size = (tgt_size + align - 1) & ~(align - 1);
363 k->tgt = tgt;
364 k->tgt_offset = tgt_size;
365 tgt_size += k->host_end - k->host_start;
366 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
367 k->refcount = 1;
368 k->async_refcount = 0;
369 tgt->refcount++;
370 array->left = NULL;
371 array->right = NULL;
372 splay_tree_insert (mem_map, array);
373 switch (kind & typemask)
375 case GOMP_MAP_ALLOC:
376 case GOMP_MAP_FROM:
377 case GOMP_MAP_FORCE_ALLOC:
378 case GOMP_MAP_FORCE_FROM:
379 break;
380 case GOMP_MAP_TO:
381 case GOMP_MAP_TOFROM:
382 case GOMP_MAP_FORCE_TO:
383 case GOMP_MAP_FORCE_TOFROM:
384 /* FIXME: Perhaps add some smarts, like if copying
385 several adjacent fields from host to target, use some
386 host buffer to avoid sending each var individually. */
387 devicep->host2dev_func (devicep->target_id,
388 (void *) (tgt->tgt_start
389 + k->tgt_offset),
390 (void *) k->host_start,
391 k->host_end - k->host_start);
392 break;
393 case GOMP_MAP_POINTER:
394 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
395 k->tgt_offset, sizes[i]);
396 break;
397 case GOMP_MAP_TO_PSET:
398 /* FIXME: see above FIXME comment. */
399 devicep->host2dev_func (devicep->target_id,
400 (void *) (tgt->tgt_start
401 + k->tgt_offset),
402 (void *) k->host_start,
403 k->host_end - k->host_start);
405 for (j = i + 1; j < mapnum; j++)
406 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
407 & typemask))
408 break;
409 else if ((uintptr_t) hostaddrs[j] < k->host_start
410 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
411 > k->host_end))
412 break;
413 else
415 tgt->list[j] = k;
416 k->refcount++;
417 gomp_map_pointer (tgt,
418 (uintptr_t) *(void **) hostaddrs[j],
419 k->tgt_offset
420 + ((uintptr_t) hostaddrs[j]
421 - k->host_start),
422 sizes[j]);
423 i++;
425 break;
426 case GOMP_MAP_FORCE_PRESENT:
428 /* We already looked up the memory region above and it
429 was missing. */
430 size_t size = k->host_end - k->host_start;
431 gomp_mutex_unlock (&devicep->lock);
432 #ifdef HAVE_INTTYPES_H
433 gomp_fatal ("present clause: !acc_is_present (%p, "
434 "%"PRIu64" (0x%"PRIx64"))",
435 (void *) k->host_start,
436 (uint64_t) size, (uint64_t) size);
437 #else
438 gomp_fatal ("present clause: !acc_is_present (%p, "
439 "%lu (0x%lx))", (void *) k->host_start,
440 (unsigned long) size, (unsigned long) size);
441 #endif
443 break;
444 case GOMP_MAP_FORCE_DEVICEPTR:
445 assert (k->host_end - k->host_start == sizeof (void *));
447 devicep->host2dev_func (devicep->target_id,
448 (void *) (tgt->tgt_start
449 + k->tgt_offset),
450 (void *) k->host_start,
451 sizeof (void *));
452 break;
453 default:
454 gomp_mutex_unlock (&devicep->lock);
455 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
456 kind);
458 array++;
463 if (is_target)
465 for (i = 0; i < mapnum; i++)
467 if (tgt->list[i] == NULL)
468 cur_node.tgt_offset = (uintptr_t) NULL;
469 else
470 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
471 + tgt->list[i]->tgt_offset;
472 /* FIXME: see above FIXME comment. */
473 devicep->host2dev_func (devicep->target_id,
474 (void *) (tgt->tgt_start
475 + i * sizeof (void *)),
476 (void *) &cur_node.tgt_offset,
477 sizeof (void *));
481 gomp_mutex_unlock (&devicep->lock);
482 return tgt;
485 static void
486 gomp_unmap_tgt (struct target_mem_desc *tgt)
488 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
489 if (tgt->tgt_end)
490 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
492 free (tgt->array);
493 free (tgt);
496 /* Decrease the refcount for a set of mapped variables, and queue asychronous
497 copies from the device back to the host after any work that has been issued.
498 Because the regions are still "live", increment an asynchronous reference
499 count to indicate that they should not be unmapped from host-side data
500 structures until the asynchronous copy has completed. */
502 attribute_hidden void
503 gomp_copy_from_async (struct target_mem_desc *tgt)
505 struct gomp_device_descr *devicep = tgt->device_descr;
506 size_t i;
508 gomp_mutex_lock (&devicep->lock);
510 for (i = 0; i < tgt->list_count; i++)
511 if (tgt->list[i] == NULL)
513 else if (tgt->list[i]->refcount > 1)
515 tgt->list[i]->refcount--;
516 tgt->list[i]->async_refcount++;
518 else
520 splay_tree_key k = tgt->list[i];
521 if (k->copy_from)
522 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
523 (void *) (k->tgt->tgt_start + k->tgt_offset),
524 k->host_end - k->host_start);
527 gomp_mutex_unlock (&devicep->lock);
530 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
531 variables back from device to host: if it is false, it is assumed that this
532 has been done already, i.e. by gomp_copy_from_async above. */
534 attribute_hidden void
535 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
537 struct gomp_device_descr *devicep = tgt->device_descr;
539 if (tgt->list_count == 0)
541 free (tgt);
542 return;
545 gomp_mutex_lock (&devicep->lock);
547 size_t i;
548 for (i = 0; i < tgt->list_count; i++)
549 if (tgt->list[i] == NULL)
551 else if (tgt->list[i]->refcount > 1)
552 tgt->list[i]->refcount--;
553 else if (tgt->list[i]->async_refcount > 0)
554 tgt->list[i]->async_refcount--;
555 else
557 splay_tree_key k = tgt->list[i];
558 if (k->copy_from && do_copyfrom)
559 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
560 (void *) (k->tgt->tgt_start + k->tgt_offset),
561 k->host_end - k->host_start);
562 splay_tree_remove (&devicep->mem_map, k);
563 if (k->tgt->refcount > 1)
564 k->tgt->refcount--;
565 else
566 gomp_unmap_tgt (k->tgt);
569 if (tgt->refcount > 1)
570 tgt->refcount--;
571 else
572 gomp_unmap_tgt (tgt);
574 gomp_mutex_unlock (&devicep->lock);
577 static void
578 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
579 size_t *sizes, void *kinds, bool is_openacc)
581 size_t i;
582 struct splay_tree_key_s cur_node;
583 const int typemask = is_openacc ? 0xff : 0x7;
585 if (!devicep)
586 return;
588 if (mapnum == 0)
589 return;
591 gomp_mutex_lock (&devicep->lock);
592 for (i = 0; i < mapnum; i++)
593 if (sizes[i])
595 cur_node.host_start = (uintptr_t) hostaddrs[i];
596 cur_node.host_end = cur_node.host_start + sizes[i];
597 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
598 if (n)
600 int kind = get_kind (is_openacc, kinds, i);
601 if (n->host_start > cur_node.host_start
602 || n->host_end < cur_node.host_end)
604 gomp_mutex_unlock (&devicep->lock);
605 gomp_fatal ("Trying to update [%p..%p) object when "
606 "only [%p..%p) is mapped",
607 (void *) cur_node.host_start,
608 (void *) cur_node.host_end,
609 (void *) n->host_start,
610 (void *) n->host_end);
612 if (GOMP_MAP_COPY_TO_P (kind & typemask))
613 devicep->host2dev_func (devicep->target_id,
614 (void *) (n->tgt->tgt_start
615 + n->tgt_offset
616 + cur_node.host_start
617 - n->host_start),
618 (void *) cur_node.host_start,
619 cur_node.host_end - cur_node.host_start);
620 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
621 devicep->dev2host_func (devicep->target_id,
622 (void *) cur_node.host_start,
623 (void *) (n->tgt->tgt_start
624 + n->tgt_offset
625 + cur_node.host_start
626 - n->host_start),
627 cur_node.host_end - cur_node.host_start);
629 else
631 gomp_mutex_unlock (&devicep->lock);
632 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
633 (void *) cur_node.host_start,
634 (void *) cur_node.host_end);
637 gomp_mutex_unlock (&devicep->lock);
640 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
641 And insert to splay tree the mapping between addresses from HOST_TABLE and
642 from loaded target image. We rely in the host and device compiler
643 emitting variable and functions in the same order. */
645 static void
646 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
647 const void *host_table, const void *target_data,
648 bool is_register_lock)
650 void **host_func_table = ((void ***) host_table)[0];
651 void **host_funcs_end = ((void ***) host_table)[1];
652 void **host_var_table = ((void ***) host_table)[2];
653 void **host_vars_end = ((void ***) host_table)[3];
655 /* The func table contains only addresses, the var table contains addresses
656 and corresponding sizes. */
657 int num_funcs = host_funcs_end - host_func_table;
658 int num_vars = (host_vars_end - host_var_table) / 2;
660 /* Load image to device and get target addresses for the image. */
661 struct addr_pair *target_table = NULL;
662 int i, num_target_entries;
664 num_target_entries
665 = devicep->load_image_func (devicep->target_id, version,
666 target_data, &target_table);
668 if (num_target_entries != num_funcs + num_vars)
670 gomp_mutex_unlock (&devicep->lock);
671 if (is_register_lock)
672 gomp_mutex_unlock (&register_lock);
673 gomp_fatal ("Cannot map target functions or variables"
674 " (expected %u, have %u)", num_funcs + num_vars,
675 num_target_entries);
678 /* Insert host-target address mapping into splay tree. */
679 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
680 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
681 tgt->refcount = 1;
682 tgt->tgt_start = 0;
683 tgt->tgt_end = 0;
684 tgt->to_free = NULL;
685 tgt->prev = NULL;
686 tgt->list_count = 0;
687 tgt->device_descr = devicep;
688 splay_tree_node array = tgt->array;
690 for (i = 0; i < num_funcs; i++)
692 splay_tree_key k = &array->key;
693 k->host_start = (uintptr_t) host_func_table[i];
694 k->host_end = k->host_start + 1;
695 k->tgt = tgt;
696 k->tgt_offset = target_table[i].start;
697 k->refcount = 1;
698 k->async_refcount = 0;
699 k->copy_from = false;
700 array->left = NULL;
701 array->right = NULL;
702 splay_tree_insert (&devicep->mem_map, array);
703 array++;
706 for (i = 0; i < num_vars; i++)
708 struct addr_pair *target_var = &target_table[num_funcs + i];
709 if (target_var->end - target_var->start
710 != (uintptr_t) host_var_table[i * 2 + 1])
712 gomp_mutex_unlock (&devicep->lock);
713 if (is_register_lock)
714 gomp_mutex_unlock (&register_lock);
715 gomp_fatal ("Can't map target variables (size mismatch)");
718 splay_tree_key k = &array->key;
719 k->host_start = (uintptr_t) host_var_table[i * 2];
720 k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
721 k->tgt = tgt;
722 k->tgt_offset = target_var->start;
723 k->refcount = 1;
724 k->async_refcount = 0;
725 k->copy_from = false;
726 array->left = NULL;
727 array->right = NULL;
728 splay_tree_insert (&devicep->mem_map, array);
729 array++;
732 free (target_table);
735 /* Unload the mappings described by target_data from device DEVICE_P.
736 The device must be locked. */
738 static void
739 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
740 unsigned version,
741 const void *host_table, const void *target_data)
743 void **host_func_table = ((void ***) host_table)[0];
744 void **host_funcs_end = ((void ***) host_table)[1];
745 void **host_var_table = ((void ***) host_table)[2];
746 void **host_vars_end = ((void ***) host_table)[3];
748 /* The func table contains only addresses, the var table contains addresses
749 and corresponding sizes. */
750 int num_funcs = host_funcs_end - host_func_table;
751 int num_vars = (host_vars_end - host_var_table) / 2;
753 unsigned j;
754 struct splay_tree_key_s k;
755 splay_tree_key node = NULL;
757 /* Find mapping at start of node array */
758 if (num_funcs || num_vars)
760 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
761 : (uintptr_t) host_var_table[0]);
762 k.host_end = k.host_start + 1;
763 node = splay_tree_lookup (&devicep->mem_map, &k);
766 devicep->unload_image_func (devicep->target_id, version, target_data);
768 /* Remove mappings from splay tree. */
769 for (j = 0; j < num_funcs; j++)
771 k.host_start = (uintptr_t) host_func_table[j];
772 k.host_end = k.host_start + 1;
773 splay_tree_remove (&devicep->mem_map, &k);
776 for (j = 0; j < num_vars; j++)
778 k.host_start = (uintptr_t) host_var_table[j * 2];
779 k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
780 splay_tree_remove (&devicep->mem_map, &k);
783 if (node)
785 free (node->tgt);
786 free (node);
790 /* This function should be called from every offload image while loading.
791 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
792 the target, and TARGET_DATA needed by target plugin. */
794 void
795 GOMP_offload_register_ver (unsigned version, const void *host_table,
796 int target_type, const void *target_data)
798 int i;
800 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
801 gomp_fatal ("Library too old for offload (version %u < %u)",
802 GOMP_VERSION, GOMP_VERSION_LIB (version));
804 gomp_mutex_lock (&register_lock);
806 /* Load image to all initialized devices. */
807 for (i = 0; i < num_devices; i++)
809 struct gomp_device_descr *devicep = &devices[i];
810 gomp_mutex_lock (&devicep->lock);
811 if (devicep->type == target_type && devicep->is_initialized)
812 gomp_load_image_to_device (devicep, version,
813 host_table, target_data, true);
814 gomp_mutex_unlock (&devicep->lock);
817 /* Insert image to array of pending images. */
818 offload_images
819 = gomp_realloc_unlock (offload_images,
820 (num_offload_images + 1)
821 * sizeof (struct offload_image_descr));
822 offload_images[num_offload_images].version = version;
823 offload_images[num_offload_images].type = target_type;
824 offload_images[num_offload_images].host_table = host_table;
825 offload_images[num_offload_images].target_data = target_data;
827 num_offload_images++;
828 gomp_mutex_unlock (&register_lock);
831 void
832 GOMP_offload_register (const void *host_table, int target_type,
833 const void *target_data)
835 GOMP_offload_register_ver (0, host_table, target_type, target_data);
838 /* This function should be called from every offload image while unloading.
839 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
840 the target, and TARGET_DATA needed by target plugin. */
842 void
843 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
844 int target_type, const void *target_data)
846 int i;
848 gomp_mutex_lock (&register_lock);
850 /* Unload image from all initialized devices. */
851 for (i = 0; i < num_devices; i++)
853 struct gomp_device_descr *devicep = &devices[i];
854 gomp_mutex_lock (&devicep->lock);
855 if (devicep->type == target_type && devicep->is_initialized)
856 gomp_unload_image_from_device (devicep, version,
857 host_table, target_data);
858 gomp_mutex_unlock (&devicep->lock);
861 /* Remove image from array of pending images. */
862 for (i = 0; i < num_offload_images; i++)
863 if (offload_images[i].target_data == target_data)
865 offload_images[i] = offload_images[--num_offload_images];
866 break;
869 gomp_mutex_unlock (&register_lock);
872 void
873 GOMP_offload_unregister (const void *host_table, int target_type,
874 const void *target_data)
876 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
879 /* This function initializes the target device, specified by DEVICEP. DEVICEP
880 must be locked on entry, and remains locked on return. */
882 attribute_hidden void
883 gomp_init_device (struct gomp_device_descr *devicep)
885 int i;
886 devicep->init_device_func (devicep->target_id);
888 /* Load to device all images registered by the moment. */
889 for (i = 0; i < num_offload_images; i++)
891 struct offload_image_descr *image = &offload_images[i];
892 if (image->type == devicep->type)
893 gomp_load_image_to_device (devicep, image->version,
894 image->host_table, image->target_data,
895 false);
898 devicep->is_initialized = true;
901 attribute_hidden void
902 gomp_unload_device (struct gomp_device_descr *devicep)
904 if (devicep->is_initialized)
906 unsigned i;
908 /* Unload from device all images registered at the moment. */
909 for (i = 0; i < num_offload_images; i++)
911 struct offload_image_descr *image = &offload_images[i];
912 if (image->type == devicep->type)
913 gomp_unload_image_from_device (devicep, image->version,
914 image->host_table,
915 image->target_data);
920 /* Free address mapping tables. MM must be locked on entry, and remains locked
921 on return. */
923 attribute_hidden void
924 gomp_free_memmap (struct splay_tree_s *mem_map)
926 while (mem_map->root)
928 struct target_mem_desc *tgt = mem_map->root->key.tgt;
930 splay_tree_remove (mem_map, &mem_map->root->key);
931 free (tgt->array);
932 free (tgt);
936 /* This function de-initializes the target device, specified by DEVICEP.
937 DEVICEP must be locked on entry, and remains locked on return. */
939 attribute_hidden void
940 gomp_fini_device (struct gomp_device_descr *devicep)
942 if (devicep->is_initialized)
943 devicep->fini_device_func (devicep->target_id);
945 devicep->is_initialized = false;
948 /* Called when encountering a target directive. If DEVICE
949 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
950 GOMP_DEVICE_HOST_FALLBACK (or any value
951 larger than last available hw device), use host fallback.
952 FN is address of host code, UNUSED is part of the current ABI, but
953 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
954 with MAPNUM entries, with addresses of the host objects,
955 sizes of the host objects (resp. for pointer kind pointer bias
956 and assumed sizeof (void *) size) and kinds. */
958 void
959 GOMP_target (int device, void (*fn) (void *), const void *unused,
960 size_t mapnum, void **hostaddrs, size_t *sizes,
961 unsigned char *kinds)
963 struct gomp_device_descr *devicep = resolve_device (device);
965 if (devicep == NULL
966 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
968 /* Host fallback. */
969 struct gomp_thread old_thr, *thr = gomp_thread ();
970 old_thr = *thr;
971 memset (thr, '\0', sizeof (*thr));
972 if (gomp_places_list)
974 thr->place = old_thr.place;
975 thr->ts.place_partition_len = gomp_places_list_len;
977 fn (hostaddrs);
978 gomp_free_thread (thr);
979 *thr = old_thr;
980 return;
983 gomp_mutex_lock (&devicep->lock);
984 if (!devicep->is_initialized)
985 gomp_init_device (devicep);
986 gomp_mutex_unlock (&devicep->lock);
988 void *fn_addr;
990 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
991 fn_addr = (void *) fn;
992 else
994 gomp_mutex_lock (&devicep->lock);
995 struct splay_tree_key_s k;
996 k.host_start = (uintptr_t) fn;
997 k.host_end = k.host_start + 1;
998 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
999 if (tgt_fn == NULL)
1001 gomp_mutex_unlock (&devicep->lock);
1002 gomp_fatal ("Target function wasn't mapped");
1004 gomp_mutex_unlock (&devicep->lock);
1006 fn_addr = (void *) tgt_fn->tgt_offset;
1009 struct target_mem_desc *tgt_vars
1010 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1011 true);
1012 struct gomp_thread old_thr, *thr = gomp_thread ();
1013 old_thr = *thr;
1014 memset (thr, '\0', sizeof (*thr));
1015 if (gomp_places_list)
1017 thr->place = old_thr.place;
1018 thr->ts.place_partition_len = gomp_places_list_len;
1020 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
1021 gomp_free_thread (thr);
1022 *thr = old_thr;
1023 gomp_unmap_vars (tgt_vars, true);
1026 void
1027 GOMP_target_data (int device, const void *unused, size_t mapnum,
1028 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1030 struct gomp_device_descr *devicep = resolve_device (device);
1032 if (devicep == NULL
1033 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1035 /* Host fallback. */
1036 struct gomp_task_icv *icv = gomp_icv (false);
1037 if (icv->target_data)
1039 /* Even when doing a host fallback, if there are any active
1040 #pragma omp target data constructs, need to remember the
1041 new #pragma omp target data, otherwise GOMP_target_end_data
1042 would get out of sync. */
1043 struct target_mem_desc *tgt
1044 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
1045 tgt->prev = icv->target_data;
1046 icv->target_data = tgt;
1048 return;
1051 gomp_mutex_lock (&devicep->lock);
1052 if (!devicep->is_initialized)
1053 gomp_init_device (devicep);
1054 gomp_mutex_unlock (&devicep->lock);
1056 struct target_mem_desc *tgt
1057 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1058 false);
1059 struct gomp_task_icv *icv = gomp_icv (true);
1060 tgt->prev = icv->target_data;
1061 icv->target_data = tgt;
1064 void
1065 GOMP_target_end_data (void)
1067 struct gomp_task_icv *icv = gomp_icv (false);
1068 if (icv->target_data)
1070 struct target_mem_desc *tgt = icv->target_data;
1071 icv->target_data = tgt->prev;
1072 gomp_unmap_vars (tgt, true);
1076 void
1077 GOMP_target_update (int device, const void *unused, size_t mapnum,
1078 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1080 struct gomp_device_descr *devicep = resolve_device (device);
1082 if (devicep == NULL
1083 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1084 return;
1086 gomp_mutex_lock (&devicep->lock);
1087 if (!devicep->is_initialized)
1088 gomp_init_device (devicep);
1089 gomp_mutex_unlock (&devicep->lock);
1091 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1094 void
1095 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1097 if (thread_limit)
1099 struct gomp_task_icv *icv = gomp_icv (true);
1100 icv->thread_limit_var
1101 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1103 (void) num_teams;
1106 #ifdef PLUGIN_SUPPORT
1108 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1109 in PLUGIN_NAME.
1110 The handles of the found functions are stored in the corresponding fields
1111 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1113 static bool
1114 gomp_load_plugin_for_device (struct gomp_device_descr *device,
1115 const char *plugin_name)
1117 const char *err = NULL, *last_missing = NULL;
1119 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1120 if (!plugin_handle)
1121 goto dl_fail;
1123 /* Check if all required functions are available in the plugin and store
1124 their handlers. None of the symbols can legitimately be NULL,
1125 so we don't need to check dlerror all the time. */
1126 #define DLSYM(f) \
1127 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
1128 goto dl_fail
1129 /* Similar, but missing functions are not an error. Return false if
1130 failed, true otherwise. */
1131 #define DLSYM_OPT(f, n) \
1132 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
1133 || (last_missing = #n, 0))
1135 DLSYM (version);
1136 if (device->version_func () != GOMP_VERSION)
1138 err = "plugin version mismatch";
1139 goto fail;
1142 DLSYM (get_name);
1143 DLSYM (get_caps);
1144 DLSYM (get_type);
1145 DLSYM (get_num_devices);
1146 DLSYM (init_device);
1147 DLSYM (fini_device);
1148 DLSYM (load_image);
1149 DLSYM (unload_image);
1150 DLSYM (alloc);
1151 DLSYM (free);
1152 DLSYM (dev2host);
1153 DLSYM (host2dev);
1154 device->capabilities = device->get_caps_func ();
1155 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1156 DLSYM (run);
1157 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1159 if (!DLSYM_OPT (openacc.exec, openacc_parallel)
1160 || !DLSYM_OPT (openacc.register_async_cleanup,
1161 openacc_register_async_cleanup)
1162 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
1163 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
1164 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
1165 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
1166 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
1167 || !DLSYM_OPT (openacc.async_wait_all_async,
1168 openacc_async_wait_all_async)
1169 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
1170 || !DLSYM_OPT (openacc.create_thread_data,
1171 openacc_create_thread_data)
1172 || !DLSYM_OPT (openacc.destroy_thread_data,
1173 openacc_destroy_thread_data))
1175 /* Require all the OpenACC handlers if we have
1176 GOMP_OFFLOAD_CAP_OPENACC_200. */
1177 err = "plugin missing OpenACC handler function";
1178 goto fail;
1181 unsigned cuda = 0;
1182 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
1183 openacc_get_current_cuda_device);
1184 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
1185 openacc_get_current_cuda_context);
1186 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1187 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1188 if (cuda && cuda != 4)
1190 /* Make sure all the CUDA functions are there if any of them are. */
1191 err = "plugin missing OpenACC CUDA handler function";
1192 goto fail;
1195 #undef DLSYM
1196 #undef DLSYM_OPT
1198 return 1;
1200 dl_fail:
1201 err = dlerror ();
1202 fail:
1203 gomp_error ("while loading %s: %s", plugin_name, err);
1204 if (last_missing)
1205 gomp_error ("missing function was %s", last_missing);
1206 if (plugin_handle)
1207 dlclose (plugin_handle);
1209 return 0;
1212 /* This function initializes the runtime needed for offloading.
1213 It parses the list of offload targets and tries to load the plugins for
1214 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1215 will be set, and the array DEVICES initialized, containing descriptors for
1216 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1217 by the others. */
1219 static void
1220 gomp_target_init (void)
1222 const char *prefix ="libgomp-plugin-";
1223 const char *suffix = SONAME_SUFFIX (1);
1224 const char *cur, *next;
1225 char *plugin_name;
1226 int i, new_num_devices;
1228 num_devices = 0;
1229 devices = NULL;
1231 cur = OFFLOAD_TARGETS;
1232 if (*cur)
1235 struct gomp_device_descr current_device;
1237 next = strchr (cur, ',');
1239 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1240 + strlen (prefix) + strlen (suffix));
1241 if (!plugin_name)
1243 num_devices = 0;
1244 break;
1247 strcpy (plugin_name, prefix);
1248 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1249 strcat (plugin_name, suffix);
1251 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1253 new_num_devices = current_device.get_num_devices_func ();
1254 if (new_num_devices >= 1)
1256 /* Augment DEVICES and NUM_DEVICES. */
1258 devices = realloc (devices, (num_devices + new_num_devices)
1259 * sizeof (struct gomp_device_descr));
1260 if (!devices)
1262 num_devices = 0;
1263 free (plugin_name);
1264 break;
1267 current_device.name = current_device.get_name_func ();
1268 /* current_device.capabilities has already been set. */
1269 current_device.type = current_device.get_type_func ();
1270 current_device.mem_map.root = NULL;
1271 current_device.is_initialized = false;
1272 current_device.openacc.data_environ = NULL;
1273 for (i = 0; i < new_num_devices; i++)
1275 current_device.target_id = i;
1276 devices[num_devices] = current_device;
1277 gomp_mutex_init (&devices[num_devices].lock);
1278 num_devices++;
1283 free (plugin_name);
1284 cur = next + 1;
1286 while (next);
1288 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1289 NUM_DEVICES_OPENMP. */
1290 struct gomp_device_descr *devices_s
1291 = malloc (num_devices * sizeof (struct gomp_device_descr));
1292 if (!devices_s)
1294 num_devices = 0;
1295 free (devices);
1296 devices = NULL;
1298 num_devices_openmp = 0;
1299 for (i = 0; i < num_devices; i++)
1300 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1301 devices_s[num_devices_openmp++] = devices[i];
1302 int num_devices_after_openmp = num_devices_openmp;
1303 for (i = 0; i < num_devices; i++)
1304 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1305 devices_s[num_devices_after_openmp++] = devices[i];
1306 free (devices);
1307 devices = devices_s;
1309 for (i = 0; i < num_devices; i++)
1311 /* The 'devices' array can be moved (by the realloc call) until we have
1312 found all the plugins, so registering with the OpenACC runtime (which
1313 takes a copy of the pointer argument) must be delayed until now. */
1314 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1315 goacc_register (&devices[i]);
1319 #else /* PLUGIN_SUPPORT */
1320 /* If dlfcn.h is unavailable we always fallback to host execution.
1321 GOMP_target* routines are just stubs for this case. */
1322 static void
1323 gomp_target_init (void)
1326 #endif /* PLUGIN_SUPPORT */