Merged trunk revision 227333 to the hsa branch.
[official-gcc.git] / libgomp / target.c
blob7a60b6680a921013a7f61813e1619efdf0bf8191
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr {
59 unsigned version;
60 enum offload_target_type type;
61 const void *host_table;
62 const void *target_data;
65 /* Array of descriptors of offload images. */
66 static struct offload_image_descr *offload_images;
68 /* Total number of offload images. */
69 static int num_offload_images;
71 /* Array of descriptors for all available devices. */
72 static struct gomp_device_descr *devices;
74 /* Total number of available devices. */
75 static int num_devices;
77 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
78 static int num_devices_openmp;
80 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 static void *
83 gomp_realloc_unlock (void *old, size_t size)
85 void *ret = realloc (old, size);
86 if (ret == NULL)
88 gomp_mutex_unlock (&register_lock);
89 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 return ret;
94 /* The comparison function. */
96 attribute_hidden int
97 splay_compare (splay_tree_key x, splay_tree_key y)
99 if (x->host_start == x->host_end
100 && y->host_start == y->host_end)
101 return 0;
102 if (x->host_end <= y->host_start)
103 return -1;
104 if (x->host_start >= y->host_end)
105 return 1;
106 return 0;
109 #include "splay-tree.h"
111 attribute_hidden void
112 gomp_init_targets_once (void)
114 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
117 attribute_hidden int
118 gomp_get_num_devices (void)
120 gomp_init_targets_once ();
121 return num_devices_openmp;
124 static struct gomp_device_descr *
125 resolve_device (int device_id)
127 if (device_id == GOMP_DEVICE_ICV)
129 struct gomp_task_icv *icv = gomp_icv (false);
130 device_id = icv->default_device_var;
133 if (device_id < 0 || device_id >= gomp_get_num_devices ())
134 return NULL;
136 return &devices[device_id];
140 /* Handle the case where splay_tree_lookup found oldn for newn.
141 Helper function of gomp_map_vars. */
143 static inline void
144 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
145 splay_tree_key newn, unsigned char kind)
147 if ((kind & GOMP_MAP_FLAG_FORCE)
148 || oldn->host_start > newn->host_start
149 || oldn->host_end < newn->host_end)
151 gomp_mutex_unlock (&devicep->lock);
152 gomp_fatal ("Trying to map into device [%p..%p) object when "
153 "[%p..%p) is already mapped",
154 (void *) newn->host_start, (void *) newn->host_end,
155 (void *) oldn->host_start, (void *) oldn->host_end);
157 oldn->refcount++;
160 static int
161 get_kind (bool is_openacc, void *kinds, int idx)
163 return is_openacc ? ((unsigned short *) kinds)[idx]
164 : ((unsigned char *) kinds)[idx];
167 static void
168 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
169 uintptr_t target_offset, uintptr_t bias)
171 struct gomp_device_descr *devicep = tgt->device_descr;
172 struct splay_tree_s *mem_map = &devicep->mem_map;
173 struct splay_tree_key_s cur_node;
175 cur_node.host_start = host_ptr;
176 if (cur_node.host_start == (uintptr_t) NULL)
178 cur_node.tgt_offset = (uintptr_t) NULL;
179 /* FIXME: see comment about coalescing host/dev transfers below. */
180 devicep->host2dev_func (devicep->target_id,
181 (void *) (tgt->tgt_start + target_offset),
182 (void *) &cur_node.tgt_offset,
183 sizeof (void *));
184 return;
186 /* Add bias to the pointer value. */
187 cur_node.host_start += bias;
188 cur_node.host_end = cur_node.host_start + 1;
189 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
190 if (n == NULL)
192 /* Could be possibly zero size array section. */
193 cur_node.host_end--;
194 n = splay_tree_lookup (mem_map, &cur_node);
195 if (n == NULL)
197 cur_node.host_start--;
198 n = splay_tree_lookup (mem_map, &cur_node);
199 cur_node.host_start++;
202 if (n == NULL)
204 gomp_mutex_unlock (&devicep->lock);
205 gomp_fatal ("Pointer target of array section wasn't mapped");
207 cur_node.host_start -= n->host_start;
208 cur_node.tgt_offset
209 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
210 /* At this point tgt_offset is target address of the
211 array section. Now subtract bias to get what we want
212 to initialize the pointer with. */
213 cur_node.tgt_offset -= bias;
214 /* FIXME: see comment about coalescing host/dev transfers below. */
215 devicep->host2dev_func (devicep->target_id,
216 (void *) (tgt->tgt_start + target_offset),
217 (void *) &cur_node.tgt_offset,
218 sizeof (void *));
221 attribute_hidden struct target_mem_desc *
222 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
223 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
224 bool is_openacc, bool is_target)
226 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
227 const int rshift = is_openacc ? 8 : 3;
228 const int typemask = is_openacc ? 0xff : 0x7;
229 struct splay_tree_s *mem_map = &devicep->mem_map;
230 struct splay_tree_key_s cur_node;
231 struct target_mem_desc *tgt
232 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
233 tgt->list_count = mapnum;
234 tgt->refcount = 1;
235 tgt->device_descr = devicep;
237 if (mapnum == 0)
238 return tgt;
240 tgt_align = sizeof (void *);
241 tgt_size = 0;
242 if (is_target)
244 size_t align = 4 * sizeof (void *);
245 tgt_align = align;
246 tgt_size = mapnum * sizeof (void *);
249 gomp_mutex_lock (&devicep->lock);
251 for (i = 0; i < mapnum; i++)
253 int kind = get_kind (is_openacc, kinds, i);
254 if (hostaddrs[i] == NULL)
256 tgt->list[i] = NULL;
257 continue;
259 cur_node.host_start = (uintptr_t) hostaddrs[i];
260 if (!GOMP_MAP_POINTER_P (kind & typemask))
261 cur_node.host_end = cur_node.host_start + sizes[i];
262 else
263 cur_node.host_end = cur_node.host_start + sizeof (void *);
264 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
265 if (n)
267 tgt->list[i] = n;
268 gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
270 else
272 tgt->list[i] = NULL;
274 size_t align = (size_t) 1 << (kind >> rshift);
275 not_found_cnt++;
276 if (tgt_align < align)
277 tgt_align = align;
278 tgt_size = (tgt_size + align - 1) & ~(align - 1);
279 tgt_size += cur_node.host_end - cur_node.host_start;
280 if ((kind & typemask) == GOMP_MAP_TO_PSET)
282 size_t j;
283 for (j = i + 1; j < mapnum; j++)
284 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
285 & typemask))
286 break;
287 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
288 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
289 > cur_node.host_end))
290 break;
291 else
293 tgt->list[j] = NULL;
294 i++;
300 if (devaddrs)
302 if (mapnum != 1)
304 gomp_mutex_unlock (&devicep->lock);
305 gomp_fatal ("unexpected aggregation");
307 tgt->to_free = devaddrs[0];
308 tgt->tgt_start = (uintptr_t) tgt->to_free;
309 tgt->tgt_end = tgt->tgt_start + sizes[0];
311 else if (not_found_cnt || is_target)
313 /* Allocate tgt_align aligned tgt_size block of memory. */
314 /* FIXME: Perhaps change interface to allocate properly aligned
315 memory. */
316 tgt->to_free = devicep->alloc_func (devicep->target_id,
317 tgt_size + tgt_align - 1);
318 tgt->tgt_start = (uintptr_t) tgt->to_free;
319 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
320 tgt->tgt_end = tgt->tgt_start + tgt_size;
322 else
324 tgt->to_free = NULL;
325 tgt->tgt_start = 0;
326 tgt->tgt_end = 0;
329 tgt_size = 0;
330 if (is_target)
331 tgt_size = mapnum * sizeof (void *);
333 tgt->array = NULL;
334 if (not_found_cnt)
336 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
337 splay_tree_node array = tgt->array;
338 size_t j;
340 for (i = 0; i < mapnum; i++)
341 if (tgt->list[i] == NULL)
343 int kind = get_kind (is_openacc, kinds, i);
344 if (hostaddrs[i] == NULL)
345 continue;
346 splay_tree_key k = &array->key;
347 k->host_start = (uintptr_t) hostaddrs[i];
348 if (!GOMP_MAP_POINTER_P (kind & typemask))
349 k->host_end = k->host_start + sizes[i];
350 else
351 k->host_end = k->host_start + sizeof (void *);
352 splay_tree_key n = splay_tree_lookup (mem_map, k);
353 if (n)
355 tgt->list[i] = n;
356 gomp_map_vars_existing (devicep, n, k, kind & typemask);
358 else
360 size_t align = (size_t) 1 << (kind >> rshift);
361 tgt->list[i] = k;
362 tgt_size = (tgt_size + align - 1) & ~(align - 1);
363 k->tgt = tgt;
364 k->tgt_offset = tgt_size;
365 tgt_size += k->host_end - k->host_start;
366 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
367 k->refcount = 1;
368 k->async_refcount = 0;
369 tgt->refcount++;
370 array->left = NULL;
371 array->right = NULL;
372 splay_tree_insert (mem_map, array);
373 switch (kind & typemask)
375 case GOMP_MAP_ALLOC:
376 case GOMP_MAP_FROM:
377 case GOMP_MAP_FORCE_ALLOC:
378 case GOMP_MAP_FORCE_FROM:
379 break;
380 case GOMP_MAP_TO:
381 case GOMP_MAP_TOFROM:
382 case GOMP_MAP_FORCE_TO:
383 case GOMP_MAP_FORCE_TOFROM:
384 /* FIXME: Perhaps add some smarts, like if copying
385 several adjacent fields from host to target, use some
386 host buffer to avoid sending each var individually. */
387 devicep->host2dev_func (devicep->target_id,
388 (void *) (tgt->tgt_start
389 + k->tgt_offset),
390 (void *) k->host_start,
391 k->host_end - k->host_start);
392 break;
393 case GOMP_MAP_POINTER:
394 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
395 k->tgt_offset, sizes[i]);
396 break;
397 case GOMP_MAP_TO_PSET:
398 /* FIXME: see above FIXME comment. */
399 devicep->host2dev_func (devicep->target_id,
400 (void *) (tgt->tgt_start
401 + k->tgt_offset),
402 (void *) k->host_start,
403 k->host_end - k->host_start);
405 for (j = i + 1; j < mapnum; j++)
406 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
407 & typemask))
408 break;
409 else if ((uintptr_t) hostaddrs[j] < k->host_start
410 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
411 > k->host_end))
412 break;
413 else
415 tgt->list[j] = k;
416 k->refcount++;
417 gomp_map_pointer (tgt,
418 (uintptr_t) *(void **) hostaddrs[j],
419 k->tgt_offset
420 + ((uintptr_t) hostaddrs[j]
421 - k->host_start),
422 sizes[j]);
423 i++;
425 break;
426 case GOMP_MAP_FORCE_PRESENT:
428 /* We already looked up the memory region above and it
429 was missing. */
430 size_t size = k->host_end - k->host_start;
431 gomp_mutex_unlock (&devicep->lock);
432 #ifdef HAVE_INTTYPES_H
433 gomp_fatal ("present clause: !acc_is_present (%p, "
434 "%"PRIu64" (0x%"PRIx64"))",
435 (void *) k->host_start,
436 (uint64_t) size, (uint64_t) size);
437 #else
438 gomp_fatal ("present clause: !acc_is_present (%p, "
439 "%lu (0x%lx))", (void *) k->host_start,
440 (unsigned long) size, (unsigned long) size);
441 #endif
443 break;
444 case GOMP_MAP_FORCE_DEVICEPTR:
445 assert (k->host_end - k->host_start == sizeof (void *));
447 devicep->host2dev_func (devicep->target_id,
448 (void *) (tgt->tgt_start
449 + k->tgt_offset),
450 (void *) k->host_start,
451 sizeof (void *));
452 break;
453 default:
454 gomp_mutex_unlock (&devicep->lock);
455 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
456 kind);
458 array++;
463 if (is_target)
465 for (i = 0; i < mapnum; i++)
467 if (tgt->list[i] == NULL)
468 cur_node.tgt_offset = (uintptr_t) NULL;
469 else
470 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
471 + tgt->list[i]->tgt_offset;
472 /* FIXME: see above FIXME comment. */
473 devicep->host2dev_func (devicep->target_id,
474 (void *) (tgt->tgt_start
475 + i * sizeof (void *)),
476 (void *) &cur_node.tgt_offset,
477 sizeof (void *));
481 gomp_mutex_unlock (&devicep->lock);
482 return tgt;
485 static void
486 gomp_unmap_tgt (struct target_mem_desc *tgt)
488 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
489 if (tgt->tgt_end)
490 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
492 free (tgt->array);
493 free (tgt);
496 /* Decrease the refcount for a set of mapped variables, and queue asychronous
497 copies from the device back to the host after any work that has been issued.
498 Because the regions are still "live", increment an asynchronous reference
499 count to indicate that they should not be unmapped from host-side data
500 structures until the asynchronous copy has completed. */
502 attribute_hidden void
503 gomp_copy_from_async (struct target_mem_desc *tgt)
505 struct gomp_device_descr *devicep = tgt->device_descr;
506 size_t i;
508 gomp_mutex_lock (&devicep->lock);
510 for (i = 0; i < tgt->list_count; i++)
511 if (tgt->list[i] == NULL)
513 else if (tgt->list[i]->refcount > 1)
515 tgt->list[i]->refcount--;
516 tgt->list[i]->async_refcount++;
518 else
520 splay_tree_key k = tgt->list[i];
521 if (k->copy_from)
522 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
523 (void *) (k->tgt->tgt_start + k->tgt_offset),
524 k->host_end - k->host_start);
527 gomp_mutex_unlock (&devicep->lock);
530 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
531 variables back from device to host: if it is false, it is assumed that this
532 has been done already, i.e. by gomp_copy_from_async above. */
534 attribute_hidden void
535 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
537 struct gomp_device_descr *devicep = tgt->device_descr;
539 if (tgt->list_count == 0)
541 free (tgt);
542 return;
545 gomp_mutex_lock (&devicep->lock);
547 size_t i;
548 for (i = 0; i < tgt->list_count; i++)
549 if (tgt->list[i] == NULL)
551 else if (tgt->list[i]->refcount > 1)
552 tgt->list[i]->refcount--;
553 else if (tgt->list[i]->async_refcount > 0)
554 tgt->list[i]->async_refcount--;
555 else
557 splay_tree_key k = tgt->list[i];
558 if (k->copy_from && do_copyfrom)
559 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
560 (void *) (k->tgt->tgt_start + k->tgt_offset),
561 k->host_end - k->host_start);
562 splay_tree_remove (&devicep->mem_map, k);
563 if (k->tgt->refcount > 1)
564 k->tgt->refcount--;
565 else
566 gomp_unmap_tgt (k->tgt);
569 if (tgt->refcount > 1)
570 tgt->refcount--;
571 else
572 gomp_unmap_tgt (tgt);
574 gomp_mutex_unlock (&devicep->lock);
577 static void
578 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
579 size_t *sizes, void *kinds, bool is_openacc)
581 size_t i;
582 struct splay_tree_key_s cur_node;
583 const int typemask = is_openacc ? 0xff : 0x7;
585 if (!devicep)
586 return;
588 if (mapnum == 0)
589 return;
591 gomp_mutex_lock (&devicep->lock);
592 for (i = 0; i < mapnum; i++)
593 if (sizes[i])
595 cur_node.host_start = (uintptr_t) hostaddrs[i];
596 cur_node.host_end = cur_node.host_start + sizes[i];
597 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
598 if (n)
600 int kind = get_kind (is_openacc, kinds, i);
601 if (n->host_start > cur_node.host_start
602 || n->host_end < cur_node.host_end)
604 gomp_mutex_unlock (&devicep->lock);
605 gomp_fatal ("Trying to update [%p..%p) object when "
606 "only [%p..%p) is mapped",
607 (void *) cur_node.host_start,
608 (void *) cur_node.host_end,
609 (void *) n->host_start,
610 (void *) n->host_end);
612 if (GOMP_MAP_COPY_TO_P (kind & typemask))
613 devicep->host2dev_func (devicep->target_id,
614 (void *) (n->tgt->tgt_start
615 + n->tgt_offset
616 + cur_node.host_start
617 - n->host_start),
618 (void *) cur_node.host_start,
619 cur_node.host_end - cur_node.host_start);
620 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
621 devicep->dev2host_func (devicep->target_id,
622 (void *) cur_node.host_start,
623 (void *) (n->tgt->tgt_start
624 + n->tgt_offset
625 + cur_node.host_start
626 - n->host_start),
627 cur_node.host_end - cur_node.host_start);
629 else
631 gomp_mutex_unlock (&devicep->lock);
632 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
633 (void *) cur_node.host_start,
634 (void *) cur_node.host_end);
637 gomp_mutex_unlock (&devicep->lock);
640 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
641 And insert to splay tree the mapping between addresses from HOST_TABLE and
642 from loaded target image. We rely in the host and device compiler
643 emitting variable and functions in the same order. */
645 static void
646 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
647 const void *host_table, const void *target_data,
648 bool is_register_lock)
650 void **host_func_table = ((void ***) host_table)[0];
651 void **host_funcs_end = ((void ***) host_table)[1];
652 void **host_var_table = ((void ***) host_table)[2];
653 void **host_vars_end = ((void ***) host_table)[3];
655 /* The func table contains only addresses, the var table contains addresses
656 and corresponding sizes. */
657 int num_funcs = host_funcs_end - host_func_table;
658 int num_vars = (host_vars_end - host_var_table) / 2;
660 /* Load image to device and get target addresses for the image. */
661 struct addr_pair *target_table = NULL;
662 int i, num_target_entries;
664 num_target_entries
665 = devicep->load_image_func (devicep->target_id, version,
666 target_data, &target_table);
668 if (num_target_entries != num_funcs + num_vars)
670 gomp_mutex_unlock (&devicep->lock);
671 if (is_register_lock)
672 gomp_mutex_unlock (&register_lock);
673 gomp_fatal ("Cannot map target functions or variables"
674 " (expected %u, have %u)", num_funcs + num_vars,
675 num_target_entries);
678 /* Insert host-target address mapping into splay tree. */
679 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
680 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
681 tgt->refcount = 1;
682 tgt->tgt_start = 0;
683 tgt->tgt_end = 0;
684 tgt->to_free = NULL;
685 tgt->prev = NULL;
686 tgt->list_count = 0;
687 tgt->device_descr = devicep;
688 splay_tree_node array = tgt->array;
690 for (i = 0; i < num_funcs; i++)
692 splay_tree_key k = &array->key;
693 k->host_start = (uintptr_t) host_func_table[i];
694 k->host_end = k->host_start + 1;
695 k->tgt = tgt;
696 k->tgt_offset = target_table[i].start;
697 k->refcount = 1;
698 k->async_refcount = 0;
699 k->copy_from = false;
700 array->left = NULL;
701 array->right = NULL;
702 splay_tree_insert (&devicep->mem_map, array);
703 array++;
706 for (i = 0; i < num_vars; i++)
708 struct addr_pair *target_var = &target_table[num_funcs + i];
709 if (target_var->end - target_var->start
710 != (uintptr_t) host_var_table[i * 2 + 1])
712 gomp_mutex_unlock (&devicep->lock);
713 if (is_register_lock)
714 gomp_mutex_unlock (&register_lock);
715 gomp_fatal ("Can't map target variables (size mismatch)");
718 splay_tree_key k = &array->key;
719 k->host_start = (uintptr_t) host_var_table[i * 2];
720 k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
721 k->tgt = tgt;
722 k->tgt_offset = target_var->start;
723 k->refcount = 1;
724 k->async_refcount = 0;
725 k->copy_from = false;
726 array->left = NULL;
727 array->right = NULL;
728 splay_tree_insert (&devicep->mem_map, array);
729 array++;
732 free (target_table);
735 /* Unload the mappings described by target_data from device DEVICE_P.
736 The device must be locked. */
738 static void
739 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
740 unsigned version,
741 const void *host_table, const void *target_data)
743 void **host_func_table = ((void ***) host_table)[0];
744 void **host_funcs_end = ((void ***) host_table)[1];
745 void **host_var_table = ((void ***) host_table)[2];
746 void **host_vars_end = ((void ***) host_table)[3];
748 /* The func table contains only addresses, the var table contains addresses
749 and corresponding sizes. */
750 int num_funcs = host_funcs_end - host_func_table;
751 int num_vars = (host_vars_end - host_var_table) / 2;
753 unsigned j;
754 struct splay_tree_key_s k;
755 splay_tree_key node = NULL;
757 /* Find mapping at start of node array */
758 if (num_funcs || num_vars)
760 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
761 : (uintptr_t) host_var_table[0]);
762 k.host_end = k.host_start + 1;
763 node = splay_tree_lookup (&devicep->mem_map, &k);
766 devicep->unload_image_func (devicep->target_id, version, target_data);
768 /* Remove mappings from splay tree. */
769 for (j = 0; j < num_funcs; j++)
771 k.host_start = (uintptr_t) host_func_table[j];
772 k.host_end = k.host_start + 1;
773 splay_tree_remove (&devicep->mem_map, &k);
776 for (j = 0; j < num_vars; j++)
778 k.host_start = (uintptr_t) host_var_table[j * 2];
779 k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
780 splay_tree_remove (&devicep->mem_map, &k);
783 if (node)
785 free (node->tgt);
786 free (node);
790 /* This function should be called from every offload image while loading.
791 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
792 the target, and TARGET_DATA needed by target plugin. */
794 void
795 GOMP_offload_register_ver (unsigned version, const void *host_table,
796 int target_type, const void *target_data)
798 int i;
800 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
801 gomp_fatal ("Library too old for offload (version %u < %u)",
802 GOMP_VERSION, GOMP_VERSION_LIB (version));
804 gomp_mutex_lock (&register_lock);
806 /* Load image to all initialized devices. */
807 for (i = 0; i < num_devices; i++)
809 struct gomp_device_descr *devicep = &devices[i];
810 gomp_mutex_lock (&devicep->lock);
811 if (devicep->type == target_type && devicep->is_initialized)
812 gomp_load_image_to_device (devicep, version,
813 host_table, target_data, true);
814 gomp_mutex_unlock (&devicep->lock);
817 /* Insert image to array of pending images. */
818 offload_images
819 = gomp_realloc_unlock (offload_images,
820 (num_offload_images + 1)
821 * sizeof (struct offload_image_descr));
822 offload_images[num_offload_images].version = version;
823 offload_images[num_offload_images].type = target_type;
824 offload_images[num_offload_images].host_table = host_table;
825 offload_images[num_offload_images].target_data = target_data;
827 num_offload_images++;
828 gomp_mutex_unlock (&register_lock);
831 void
832 GOMP_offload_register (const void *host_table, int target_type,
833 const void *target_data)
835 GOMP_offload_register_ver (0, host_table, target_type, target_data);
838 /* This function should be called from every offload image while unloading.
839 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
840 the target, and TARGET_DATA needed by target plugin. */
842 void
843 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
844 int target_type, const void *target_data)
846 int i;
848 gomp_mutex_lock (&register_lock);
850 /* Unload image from all initialized devices. */
851 for (i = 0; i < num_devices; i++)
853 struct gomp_device_descr *devicep = &devices[i];
854 gomp_mutex_lock (&devicep->lock);
855 if (devicep->type == target_type && devicep->is_initialized)
856 gomp_unload_image_from_device (devicep, version,
857 host_table, target_data);
858 gomp_mutex_unlock (&devicep->lock);
861 /* Remove image from array of pending images. */
862 for (i = 0; i < num_offload_images; i++)
863 if (offload_images[i].target_data == target_data)
865 offload_images[i] = offload_images[--num_offload_images];
866 break;
869 gomp_mutex_unlock (&register_lock);
872 void
873 GOMP_offload_unregister (const void *host_table, int target_type,
874 const void *target_data)
876 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
879 /* This function initializes the target device, specified by DEVICEP. DEVICEP
880 must be locked on entry, and remains locked on return. */
882 attribute_hidden void
883 gomp_init_device (struct gomp_device_descr *devicep)
885 int i;
886 devicep->init_device_func (devicep->target_id);
888 /* Load to device all images registered by the moment. */
889 for (i = 0; i < num_offload_images; i++)
891 struct offload_image_descr *image = &offload_images[i];
892 if (image->type == devicep->type)
893 gomp_load_image_to_device (devicep, image->version,
894 image->host_table, image->target_data,
895 false);
898 devicep->is_initialized = true;
901 attribute_hidden void
902 gomp_unload_device (struct gomp_device_descr *devicep)
904 if (devicep->is_initialized)
906 unsigned i;
908 /* Unload from device all images registered at the moment. */
909 for (i = 0; i < num_offload_images; i++)
911 struct offload_image_descr *image = &offload_images[i];
912 if (image->type == devicep->type)
913 gomp_unload_image_from_device (devicep, image->version,
914 image->host_table,
915 image->target_data);
920 /* Free address mapping tables. MM must be locked on entry, and remains locked
921 on return. */
923 attribute_hidden void
924 gomp_free_memmap (struct splay_tree_s *mem_map)
926 while (mem_map->root)
928 struct target_mem_desc *tgt = mem_map->root->key.tgt;
930 splay_tree_remove (mem_map, &mem_map->root->key);
931 free (tgt->array);
932 free (tgt);
936 /* This function de-initializes the target device, specified by DEVICEP.
937 DEVICEP must be locked on entry, and remains locked on return. */
939 attribute_hidden void
940 gomp_fini_device (struct gomp_device_descr *devicep)
942 if (devicep->is_initialized)
943 devicep->fini_device_func (devicep->target_id);
945 devicep->is_initialized = false;
948 /* Called when encountering a target directive. If DEVICE
949 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
950 GOMP_DEVICE_HOST_FALLBACK (or any value
951 larger than last available hw device), use host fallback.
952 FN is address of host code, UNUSED is part of the current ABI, but
953 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
954 with MAPNUM entries, with addresses of the host objects,
955 sizes of the host objects (resp. for pointer kind pointer bias
956 and assumed sizeof (void *) size) and kinds. */
958 void
959 GOMP_target (int device, void (*fn) (void *), const void *kernel_launch,
960 size_t mapnum, void **hostaddrs, size_t *sizes,
961 unsigned char *kinds)
963 struct gomp_device_descr *devicep = resolve_device (device);
965 if (devicep == NULL
966 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
968 /* Host fallback. */
969 struct gomp_thread old_thr, *thr = gomp_thread ();
970 old_thr = *thr;
971 memset (thr, '\0', sizeof (*thr));
972 if (gomp_places_list)
974 thr->place = old_thr.place;
975 thr->ts.place_partition_len = gomp_places_list_len;
977 fn (hostaddrs);
978 gomp_free_thread (thr);
979 *thr = old_thr;
980 return;
983 gomp_mutex_lock (&devicep->lock);
984 if (!devicep->is_initialized)
985 gomp_init_device (devicep);
986 gomp_mutex_unlock (&devicep->lock);
988 void *fn_addr;
990 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
991 fn_addr = (void *) fn;
992 else
994 gomp_mutex_lock (&devicep->lock);
995 struct splay_tree_key_s k;
996 k.host_start = (uintptr_t) fn;
997 k.host_end = k.host_start + 1;
998 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
999 if (tgt_fn == NULL)
1001 gomp_mutex_unlock (&devicep->lock);
1002 gomp_fatal ("Target function wasn't mapped");
1004 gomp_mutex_unlock (&devicep->lock);
1006 fn_addr = (void *) tgt_fn->tgt_offset;
1009 struct target_mem_desc *tgt_vars;
1010 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1011 tgt_vars = NULL;
1012 else
1013 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1014 false, true);
1015 struct gomp_thread old_thr, *thr = gomp_thread ();
1016 old_thr = *thr;
1017 memset (thr, '\0', sizeof (*thr));
1018 if (gomp_places_list)
1020 thr->place = old_thr.place;
1021 thr->ts.place_partition_len = gomp_places_list_len;
1023 devicep->run_func (devicep->target_id, fn_addr,
1024 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1025 kernel_launch);
1026 gomp_free_thread (thr);
1027 *thr = old_thr;
1028 if (tgt_vars)
1029 gomp_unmap_vars (tgt_vars, true);
1032 void
1033 GOMP_target_data (int device, const void *unused, size_t mapnum,
1034 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1036 struct gomp_device_descr *devicep = resolve_device (device);
1038 if (devicep == NULL
1039 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1040 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1042 /* Host fallback or accelerators with memory coherent access. */
1043 struct gomp_task_icv *icv = gomp_icv (false);
1044 if (icv->target_data)
1046 /* Even when doing a host fallback, if there are any active
1047 #pragma omp target data constructs, need to remember the
1048 new #pragma omp target data, otherwise GOMP_target_end_data
1049 would get out of sync. */
1050 struct target_mem_desc *tgt
1051 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
1052 tgt->prev = icv->target_data;
1053 icv->target_data = tgt;
1055 return;
1058 gomp_mutex_lock (&devicep->lock);
1059 if (!devicep->is_initialized)
1060 gomp_init_device (devicep);
1061 gomp_mutex_unlock (&devicep->lock);
1063 struct target_mem_desc *tgt
1064 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1065 false);
1066 struct gomp_task_icv *icv = gomp_icv (true);
1067 tgt->prev = icv->target_data;
1068 icv->target_data = tgt;
1071 void
1072 GOMP_target_end_data (void)
1074 struct gomp_task_icv *icv = gomp_icv (false);
1075 if (icv->target_data)
1077 struct target_mem_desc *tgt = icv->target_data;
1078 icv->target_data = tgt->prev;
1079 gomp_unmap_vars (tgt, true);
1083 void
1084 GOMP_target_update (int device, const void *unused, size_t mapnum,
1085 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1087 struct gomp_device_descr *devicep = resolve_device (device);
1089 if (devicep == NULL
1090 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1091 return;
1093 gomp_mutex_lock (&devicep->lock);
1094 if (!devicep->is_initialized)
1095 gomp_init_device (devicep);
1096 gomp_mutex_unlock (&devicep->lock);
1098 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1101 void
1102 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1104 if (thread_limit)
1106 struct gomp_task_icv *icv = gomp_icv (true);
1107 icv->thread_limit_var
1108 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1110 (void) num_teams;
1113 #ifdef PLUGIN_SUPPORT
1115 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1116 in PLUGIN_NAME.
1117 The handles of the found functions are stored in the corresponding fields
1118 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1120 static bool
1121 gomp_load_plugin_for_device (struct gomp_device_descr *device,
1122 const char *plugin_name)
1124 const char *err = NULL, *last_missing = NULL;
1126 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1127 if (!plugin_handle)
1128 goto dl_fail;
1130 /* Check if all required functions are available in the plugin and store
1131 their handlers. None of the symbols can legitimately be NULL,
1132 so we don't need to check dlerror all the time. */
1133 #define DLSYM(f) \
1134 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
1135 goto dl_fail
1136 /* Similar, but missing functions are not an error. Return false if
1137 failed, true otherwise. */
1138 #define DLSYM_OPT(f, n) \
1139 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
1140 || (last_missing = #n, 0))
1142 DLSYM (version);
1143 if (device->version_func () != GOMP_VERSION)
1145 err = "plugin version mismatch";
1146 goto fail;
1149 DLSYM (get_name);
1150 DLSYM (get_caps);
1151 DLSYM (get_type);
1152 DLSYM (get_num_devices);
1153 DLSYM (init_device);
1154 DLSYM (fini_device);
1155 DLSYM (load_image);
1156 DLSYM (unload_image);
1157 DLSYM (alloc);
1158 DLSYM (free);
1159 DLSYM (dev2host);
1160 DLSYM (host2dev);
1161 device->capabilities = device->get_caps_func ();
1162 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1163 DLSYM (run);
1164 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1166 if (!DLSYM_OPT (openacc.exec, openacc_parallel)
1167 || !DLSYM_OPT (openacc.register_async_cleanup,
1168 openacc_register_async_cleanup)
1169 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
1170 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
1171 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
1172 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
1173 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
1174 || !DLSYM_OPT (openacc.async_wait_all_async,
1175 openacc_async_wait_all_async)
1176 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
1177 || !DLSYM_OPT (openacc.create_thread_data,
1178 openacc_create_thread_data)
1179 || !DLSYM_OPT (openacc.destroy_thread_data,
1180 openacc_destroy_thread_data))
1182 /* Require all the OpenACC handlers if we have
1183 GOMP_OFFLOAD_CAP_OPENACC_200. */
1184 err = "plugin missing OpenACC handler function";
1185 goto fail;
1188 unsigned cuda = 0;
1189 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
1190 openacc_get_current_cuda_device);
1191 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
1192 openacc_get_current_cuda_context);
1193 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1194 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1195 if (cuda && cuda != 4)
1197 /* Make sure all the CUDA functions are there if any of them are. */
1198 err = "plugin missing OpenACC CUDA handler function";
1199 goto fail;
1202 #undef DLSYM
1203 #undef DLSYM_OPT
1205 return 1;
1207 dl_fail:
1208 err = dlerror ();
1209 fail:
1210 gomp_error ("while loading %s: %s", plugin_name, err);
1211 if (last_missing)
1212 gomp_error ("missing function was %s", last_missing);
1213 if (plugin_handle)
1214 dlclose (plugin_handle);
1216 return 0;
1219 /* This function initializes the runtime needed for offloading.
1220 It parses the list of offload targets and tries to load the plugins for
1221 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1222 will be set, and the array DEVICES initialized, containing descriptors for
1223 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1224 by the others. */
1226 static void
1227 gomp_target_init (void)
1229 const char *prefix ="libgomp-plugin-";
1230 const char *suffix = SONAME_SUFFIX (1);
1231 const char *cur, *next;
1232 char *plugin_name;
1233 int i, new_num_devices;
1235 num_devices = 0;
1236 devices = NULL;
1238 cur = OFFLOAD_TARGETS;
1239 if (*cur)
1242 struct gomp_device_descr current_device;
1244 next = strchr (cur, ',');
1246 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1247 + strlen (prefix) + strlen (suffix));
1248 if (!plugin_name)
1250 num_devices = 0;
1251 break;
1254 strcpy (plugin_name, prefix);
1255 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1256 strcat (plugin_name, suffix);
1258 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1260 new_num_devices = current_device.get_num_devices_func ();
1261 if (new_num_devices >= 1)
1263 /* Augment DEVICES and NUM_DEVICES. */
1265 devices = realloc (devices, (num_devices + new_num_devices)
1266 * sizeof (struct gomp_device_descr));
1267 if (!devices)
1269 num_devices = 0;
1270 free (plugin_name);
1271 break;
1274 current_device.name = current_device.get_name_func ();
1275 /* current_device.capabilities has already been set. */
1276 current_device.type = current_device.get_type_func ();
1277 current_device.mem_map.root = NULL;
1278 current_device.is_initialized = false;
1279 current_device.openacc.data_environ = NULL;
1280 for (i = 0; i < new_num_devices; i++)
1282 current_device.target_id = i;
1283 devices[num_devices] = current_device;
1284 gomp_mutex_init (&devices[num_devices].lock);
1285 num_devices++;
1290 free (plugin_name);
1291 cur = next + 1;
1293 while (next);
1295 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1296 NUM_DEVICES_OPENMP. */
1297 struct gomp_device_descr *devices_s
1298 = malloc (num_devices * sizeof (struct gomp_device_descr));
1299 if (!devices_s)
1301 num_devices = 0;
1302 free (devices);
1303 devices = NULL;
1305 num_devices_openmp = 0;
1306 for (i = 0; i < num_devices; i++)
1307 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1308 devices_s[num_devices_openmp++] = devices[i];
1309 int num_devices_after_openmp = num_devices_openmp;
1310 for (i = 0; i < num_devices; i++)
1311 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1312 devices_s[num_devices_after_openmp++] = devices[i];
1313 free (devices);
1314 devices = devices_s;
1316 for (i = 0; i < num_devices; i++)
1318 /* The 'devices' array can be moved (by the realloc call) until we have
1319 found all the plugins, so registering with the OpenACC runtime (which
1320 takes a copy of the pointer argument) must be delayed until now. */
1321 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1322 goacc_register (&devices[i]);
1326 #else /* PLUGIN_SUPPORT */
1327 /* If dlfcn.h is unavailable we always fallback to host execution.
1328 GOMP_target* routines are just stubs for this case. */
1329 static void
1330 gomp_target_init (void)
1333 #endif /* PLUGIN_SUPPORT */