Merged trunk revision 225993 into the hsa branch.
[official-gcc.git] / libgomp / target.c
blobd48f972ad3c47ab9053e2c7ce4110ac8f9c93169
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr {
59 enum offload_target_type type;
60 const void *host_table;
61 const void *target_data;
64 /* Array of descriptors of offload images. */
65 static struct offload_image_descr *offload_images;
67 /* Total number of offload images. */
68 static int num_offload_images;
70 /* Array of descriptors for all available devices. */
71 static struct gomp_device_descr *devices;
73 /* Total number of available devices. */
74 static int num_devices;
76 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
77 static int num_devices_openmp;
79 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
81 static void *
82 gomp_realloc_unlock (void *old, size_t size)
84 void *ret = realloc (old, size);
85 if (ret == NULL)
87 gomp_mutex_unlock (&register_lock);
88 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
90 return ret;
93 /* The comparison function. */
95 attribute_hidden int
96 splay_compare (splay_tree_key x, splay_tree_key y)
98 if (x->host_start == x->host_end
99 && y->host_start == y->host_end)
100 return 0;
101 if (x->host_end <= y->host_start)
102 return -1;
103 if (x->host_start >= y->host_end)
104 return 1;
105 return 0;
108 #include "splay-tree.h"
110 attribute_hidden void
111 gomp_init_targets_once (void)
113 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
116 attribute_hidden int
117 gomp_get_num_devices (void)
119 gomp_init_targets_once ();
120 return num_devices_openmp;
123 static struct gomp_device_descr *
124 resolve_device (int device_id)
126 if (device_id == GOMP_DEVICE_ICV)
128 struct gomp_task_icv *icv = gomp_icv (false);
129 device_id = icv->default_device_var;
132 if (device_id < 0 || device_id >= gomp_get_num_devices ())
133 return NULL;
135 return &devices[device_id];
139 /* Handle the case where splay_tree_lookup found oldn for newn.
140 Helper function of gomp_map_vars. */
142 static inline void
143 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
144 splay_tree_key newn, unsigned char kind)
146 if ((kind & GOMP_MAP_FLAG_FORCE)
147 || oldn->host_start > newn->host_start
148 || oldn->host_end < newn->host_end)
150 gomp_mutex_unlock (&devicep->lock);
151 gomp_fatal ("Trying to map into device [%p..%p) object when "
152 "[%p..%p) is already mapped",
153 (void *) newn->host_start, (void *) newn->host_end,
154 (void *) oldn->host_start, (void *) oldn->host_end);
156 oldn->refcount++;
159 static int
160 get_kind (bool is_openacc, void *kinds, int idx)
162 return is_openacc ? ((unsigned short *) kinds)[idx]
163 : ((unsigned char *) kinds)[idx];
166 static void
167 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
168 uintptr_t target_offset, uintptr_t bias)
170 struct gomp_device_descr *devicep = tgt->device_descr;
171 struct splay_tree_s *mem_map = &devicep->mem_map;
172 struct splay_tree_key_s cur_node;
174 cur_node.host_start = host_ptr;
175 if (cur_node.host_start == (uintptr_t) NULL)
177 cur_node.tgt_offset = (uintptr_t) NULL;
178 /* FIXME: see comment about coalescing host/dev transfers below. */
179 devicep->host2dev_func (devicep->target_id,
180 (void *) (tgt->tgt_start + target_offset),
181 (void *) &cur_node.tgt_offset,
182 sizeof (void *));
183 return;
185 /* Add bias to the pointer value. */
186 cur_node.host_start += bias;
187 cur_node.host_end = cur_node.host_start + 1;
188 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
189 if (n == NULL)
191 /* Could be possibly zero size array section. */
192 cur_node.host_end--;
193 n = splay_tree_lookup (mem_map, &cur_node);
194 if (n == NULL)
196 cur_node.host_start--;
197 n = splay_tree_lookup (mem_map, &cur_node);
198 cur_node.host_start++;
201 if (n == NULL)
203 gomp_mutex_unlock (&devicep->lock);
204 gomp_fatal ("Pointer target of array section wasn't mapped");
206 cur_node.host_start -= n->host_start;
207 cur_node.tgt_offset
208 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
209 /* At this point tgt_offset is target address of the
210 array section. Now subtract bias to get what we want
211 to initialize the pointer with. */
212 cur_node.tgt_offset -= bias;
213 /* FIXME: see comment about coalescing host/dev transfers below. */
214 devicep->host2dev_func (devicep->target_id,
215 (void *) (tgt->tgt_start + target_offset),
216 (void *) &cur_node.tgt_offset,
217 sizeof (void *));
220 attribute_hidden struct target_mem_desc *
221 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
222 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
223 bool is_openacc, bool is_target)
225 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
226 const int rshift = is_openacc ? 8 : 3;
227 const int typemask = is_openacc ? 0xff : 0x7;
228 struct splay_tree_s *mem_map = &devicep->mem_map;
229 struct splay_tree_key_s cur_node;
230 struct target_mem_desc *tgt
231 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
232 tgt->list_count = mapnum;
233 tgt->refcount = 1;
234 tgt->device_descr = devicep;
236 if (mapnum == 0)
237 return tgt;
239 tgt_align = sizeof (void *);
240 tgt_size = 0;
241 if (is_target)
243 size_t align = 4 * sizeof (void *);
244 tgt_align = align;
245 tgt_size = mapnum * sizeof (void *);
248 gomp_mutex_lock (&devicep->lock);
250 for (i = 0; i < mapnum; i++)
252 int kind = get_kind (is_openacc, kinds, i);
253 if (hostaddrs[i] == NULL)
255 tgt->list[i] = NULL;
256 continue;
258 cur_node.host_start = (uintptr_t) hostaddrs[i];
259 if (!GOMP_MAP_POINTER_P (kind & typemask))
260 cur_node.host_end = cur_node.host_start + sizes[i];
261 else
262 cur_node.host_end = cur_node.host_start + sizeof (void *);
263 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
264 if (n)
266 tgt->list[i] = n;
267 gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
269 else
271 tgt->list[i] = NULL;
273 size_t align = (size_t) 1 << (kind >> rshift);
274 not_found_cnt++;
275 if (tgt_align < align)
276 tgt_align = align;
277 tgt_size = (tgt_size + align - 1) & ~(align - 1);
278 tgt_size += cur_node.host_end - cur_node.host_start;
279 if ((kind & typemask) == GOMP_MAP_TO_PSET)
281 size_t j;
282 for (j = i + 1; j < mapnum; j++)
283 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
284 & typemask))
285 break;
286 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
287 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
288 > cur_node.host_end))
289 break;
290 else
292 tgt->list[j] = NULL;
293 i++;
299 if (devaddrs)
301 if (mapnum != 1)
303 gomp_mutex_unlock (&devicep->lock);
304 gomp_fatal ("unexpected aggregation");
306 tgt->to_free = devaddrs[0];
307 tgt->tgt_start = (uintptr_t) tgt->to_free;
308 tgt->tgt_end = tgt->tgt_start + sizes[0];
310 else if (not_found_cnt || is_target)
312 /* Allocate tgt_align aligned tgt_size block of memory. */
313 /* FIXME: Perhaps change interface to allocate properly aligned
314 memory. */
315 tgt->to_free = devicep->alloc_func (devicep->target_id,
316 tgt_size + tgt_align - 1);
317 tgt->tgt_start = (uintptr_t) tgt->to_free;
318 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
319 tgt->tgt_end = tgt->tgt_start + tgt_size;
321 else
323 tgt->to_free = NULL;
324 tgt->tgt_start = 0;
325 tgt->tgt_end = 0;
328 tgt_size = 0;
329 if (is_target)
330 tgt_size = mapnum * sizeof (void *);
332 tgt->array = NULL;
333 if (not_found_cnt)
335 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
336 splay_tree_node array = tgt->array;
337 size_t j;
339 for (i = 0; i < mapnum; i++)
340 if (tgt->list[i] == NULL)
342 int kind = get_kind (is_openacc, kinds, i);
343 if (hostaddrs[i] == NULL)
344 continue;
345 splay_tree_key k = &array->key;
346 k->host_start = (uintptr_t) hostaddrs[i];
347 if (!GOMP_MAP_POINTER_P (kind & typemask))
348 k->host_end = k->host_start + sizes[i];
349 else
350 k->host_end = k->host_start + sizeof (void *);
351 splay_tree_key n = splay_tree_lookup (mem_map, k);
352 if (n)
354 tgt->list[i] = n;
355 gomp_map_vars_existing (devicep, n, k, kind & typemask);
357 else
359 size_t align = (size_t) 1 << (kind >> rshift);
360 tgt->list[i] = k;
361 tgt_size = (tgt_size + align - 1) & ~(align - 1);
362 k->tgt = tgt;
363 k->tgt_offset = tgt_size;
364 tgt_size += k->host_end - k->host_start;
365 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
366 k->refcount = 1;
367 k->async_refcount = 0;
368 tgt->refcount++;
369 array->left = NULL;
370 array->right = NULL;
371 splay_tree_insert (mem_map, array);
372 switch (kind & typemask)
374 case GOMP_MAP_ALLOC:
375 case GOMP_MAP_FROM:
376 case GOMP_MAP_FORCE_ALLOC:
377 case GOMP_MAP_FORCE_FROM:
378 break;
379 case GOMP_MAP_TO:
380 case GOMP_MAP_TOFROM:
381 case GOMP_MAP_FORCE_TO:
382 case GOMP_MAP_FORCE_TOFROM:
383 /* FIXME: Perhaps add some smarts, like if copying
384 several adjacent fields from host to target, use some
385 host buffer to avoid sending each var individually. */
386 devicep->host2dev_func (devicep->target_id,
387 (void *) (tgt->tgt_start
388 + k->tgt_offset),
389 (void *) k->host_start,
390 k->host_end - k->host_start);
391 break;
392 case GOMP_MAP_POINTER:
393 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
394 k->tgt_offset, sizes[i]);
395 break;
396 case GOMP_MAP_TO_PSET:
397 /* FIXME: see above FIXME comment. */
398 devicep->host2dev_func (devicep->target_id,
399 (void *) (tgt->tgt_start
400 + k->tgt_offset),
401 (void *) k->host_start,
402 k->host_end - k->host_start);
404 for (j = i + 1; j < mapnum; j++)
405 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
406 & typemask))
407 break;
408 else if ((uintptr_t) hostaddrs[j] < k->host_start
409 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
410 > k->host_end))
411 break;
412 else
414 tgt->list[j] = k;
415 k->refcount++;
416 gomp_map_pointer (tgt,
417 (uintptr_t) *(void **) hostaddrs[j],
418 k->tgt_offset
419 + ((uintptr_t) hostaddrs[j]
420 - k->host_start),
421 sizes[j]);
422 i++;
424 break;
425 case GOMP_MAP_FORCE_PRESENT:
427 /* We already looked up the memory region above and it
428 was missing. */
429 size_t size = k->host_end - k->host_start;
430 gomp_mutex_unlock (&devicep->lock);
431 #ifdef HAVE_INTTYPES_H
432 gomp_fatal ("present clause: !acc_is_present (%p, "
433 "%"PRIu64" (0x%"PRIx64"))",
434 (void *) k->host_start,
435 (uint64_t) size, (uint64_t) size);
436 #else
437 gomp_fatal ("present clause: !acc_is_present (%p, "
438 "%lu (0x%lx))", (void *) k->host_start,
439 (unsigned long) size, (unsigned long) size);
440 #endif
442 break;
443 case GOMP_MAP_FORCE_DEVICEPTR:
444 assert (k->host_end - k->host_start == sizeof (void *));
446 devicep->host2dev_func (devicep->target_id,
447 (void *) (tgt->tgt_start
448 + k->tgt_offset),
449 (void *) k->host_start,
450 sizeof (void *));
451 break;
452 default:
453 gomp_mutex_unlock (&devicep->lock);
454 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
455 kind);
457 array++;
462 if (is_target)
464 for (i = 0; i < mapnum; i++)
466 if (tgt->list[i] == NULL)
467 cur_node.tgt_offset = (uintptr_t) NULL;
468 else
469 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
470 + tgt->list[i]->tgt_offset;
471 /* FIXME: see above FIXME comment. */
472 devicep->host2dev_func (devicep->target_id,
473 (void *) (tgt->tgt_start
474 + i * sizeof (void *)),
475 (void *) &cur_node.tgt_offset,
476 sizeof (void *));
480 gomp_mutex_unlock (&devicep->lock);
481 return tgt;
484 static void
485 gomp_unmap_tgt (struct target_mem_desc *tgt)
487 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
488 if (tgt->tgt_end)
489 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
491 free (tgt->array);
492 free (tgt);
495 /* Decrease the refcount for a set of mapped variables, and queue asychronous
496 copies from the device back to the host after any work that has been issued.
497 Because the regions are still "live", increment an asynchronous reference
498 count to indicate that they should not be unmapped from host-side data
499 structures until the asynchronous copy has completed. */
501 attribute_hidden void
502 gomp_copy_from_async (struct target_mem_desc *tgt)
504 struct gomp_device_descr *devicep = tgt->device_descr;
505 size_t i;
507 gomp_mutex_lock (&devicep->lock);
509 for (i = 0; i < tgt->list_count; i++)
510 if (tgt->list[i] == NULL)
512 else if (tgt->list[i]->refcount > 1)
514 tgt->list[i]->refcount--;
515 tgt->list[i]->async_refcount++;
517 else
519 splay_tree_key k = tgt->list[i];
520 if (k->copy_from)
521 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
522 (void *) (k->tgt->tgt_start + k->tgt_offset),
523 k->host_end - k->host_start);
526 gomp_mutex_unlock (&devicep->lock);
529 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
530 variables back from device to host: if it is false, it is assumed that this
531 has been done already, i.e. by gomp_copy_from_async above. */
533 attribute_hidden void
534 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
536 struct gomp_device_descr *devicep = tgt->device_descr;
538 if (tgt->list_count == 0)
540 free (tgt);
541 return;
544 gomp_mutex_lock (&devicep->lock);
546 size_t i;
547 for (i = 0; i < tgt->list_count; i++)
548 if (tgt->list[i] == NULL)
550 else if (tgt->list[i]->refcount > 1)
551 tgt->list[i]->refcount--;
552 else if (tgt->list[i]->async_refcount > 0)
553 tgt->list[i]->async_refcount--;
554 else
556 splay_tree_key k = tgt->list[i];
557 if (k->copy_from && do_copyfrom)
558 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
559 (void *) (k->tgt->tgt_start + k->tgt_offset),
560 k->host_end - k->host_start);
561 splay_tree_remove (&devicep->mem_map, k);
562 if (k->tgt->refcount > 1)
563 k->tgt->refcount--;
564 else
565 gomp_unmap_tgt (k->tgt);
568 if (tgt->refcount > 1)
569 tgt->refcount--;
570 else
571 gomp_unmap_tgt (tgt);
573 gomp_mutex_unlock (&devicep->lock);
576 static void
577 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
578 size_t *sizes, void *kinds, bool is_openacc)
580 size_t i;
581 struct splay_tree_key_s cur_node;
582 const int typemask = is_openacc ? 0xff : 0x7;
584 if (!devicep)
585 return;
587 if (mapnum == 0)
588 return;
590 gomp_mutex_lock (&devicep->lock);
591 for (i = 0; i < mapnum; i++)
592 if (sizes[i])
594 cur_node.host_start = (uintptr_t) hostaddrs[i];
595 cur_node.host_end = cur_node.host_start + sizes[i];
596 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
597 if (n)
599 int kind = get_kind (is_openacc, kinds, i);
600 if (n->host_start > cur_node.host_start
601 || n->host_end < cur_node.host_end)
603 gomp_mutex_unlock (&devicep->lock);
604 gomp_fatal ("Trying to update [%p..%p) object when "
605 "only [%p..%p) is mapped",
606 (void *) cur_node.host_start,
607 (void *) cur_node.host_end,
608 (void *) n->host_start,
609 (void *) n->host_end);
611 if (GOMP_MAP_COPY_TO_P (kind & typemask))
612 devicep->host2dev_func (devicep->target_id,
613 (void *) (n->tgt->tgt_start
614 + n->tgt_offset
615 + cur_node.host_start
616 - n->host_start),
617 (void *) cur_node.host_start,
618 cur_node.host_end - cur_node.host_start);
619 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
620 devicep->dev2host_func (devicep->target_id,
621 (void *) cur_node.host_start,
622 (void *) (n->tgt->tgt_start
623 + n->tgt_offset
624 + cur_node.host_start
625 - n->host_start),
626 cur_node.host_end - cur_node.host_start);
628 else
630 gomp_mutex_unlock (&devicep->lock);
631 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
632 (void *) cur_node.host_start,
633 (void *) cur_node.host_end);
636 gomp_mutex_unlock (&devicep->lock);
639 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
640 And insert to splay tree the mapping between addresses from HOST_TABLE and
641 from loaded target image. */
643 static void
644 gomp_offload_image_to_device (struct gomp_device_descr *devicep,
645 const void *host_table, const void *target_data,
646 bool is_register_lock)
648 void **host_func_table = ((void ***) host_table)[0];
649 void **host_funcs_end = ((void ***) host_table)[1];
650 void **host_var_table = ((void ***) host_table)[2];
651 void **host_vars_end = ((void ***) host_table)[3];
653 /* The func table contains only addresses, the var table contains addresses
654 and corresponding sizes. */
655 int num_funcs = host_funcs_end - host_func_table;
656 int num_vars = (host_vars_end - host_var_table) / 2;
658 /* Load image to device and get target addresses for the image. */
659 struct addr_pair *target_table = NULL;
660 int i, num_target_entries
661 = devicep->load_image_func (devicep->target_id, target_data, &target_table);
663 if (num_target_entries != num_funcs + num_vars)
665 gomp_mutex_unlock (&devicep->lock);
666 if (is_register_lock)
667 gomp_mutex_unlock (&register_lock);
668 gomp_fatal ("Can't map target functions or variables");
671 /* Insert host-target address mapping into splay tree. */
672 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
673 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
674 tgt->refcount = 1;
675 tgt->tgt_start = 0;
676 tgt->tgt_end = 0;
677 tgt->to_free = NULL;
678 tgt->prev = NULL;
679 tgt->list_count = 0;
680 tgt->device_descr = devicep;
681 splay_tree_node array = tgt->array;
683 for (i = 0; i < num_funcs; i++)
685 splay_tree_key k = &array->key;
686 k->host_start = (uintptr_t) host_func_table[i];
687 k->host_end = k->host_start + 1;
688 k->tgt = tgt;
689 k->tgt_offset = target_table[i].start;
690 k->refcount = 1;
691 k->async_refcount = 0;
692 k->copy_from = false;
693 array->left = NULL;
694 array->right = NULL;
695 splay_tree_insert (&devicep->mem_map, array);
696 array++;
699 for (i = 0; i < num_vars; i++)
701 struct addr_pair *target_var = &target_table[num_funcs + i];
702 if (target_var->end - target_var->start
703 != (uintptr_t) host_var_table[i * 2 + 1])
705 gomp_mutex_unlock (&devicep->lock);
706 if (is_register_lock)
707 gomp_mutex_unlock (&register_lock);
708 gomp_fatal ("Can't map target variables (size mismatch)");
711 splay_tree_key k = &array->key;
712 k->host_start = (uintptr_t) host_var_table[i * 2];
713 k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
714 k->tgt = tgt;
715 k->tgt_offset = target_var->start;
716 k->refcount = 1;
717 k->async_refcount = 0;
718 k->copy_from = false;
719 array->left = NULL;
720 array->right = NULL;
721 splay_tree_insert (&devicep->mem_map, array);
722 array++;
725 free (target_table);
728 /* This function should be called from every offload image while loading.
729 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
730 the target, and TARGET_DATA needed by target plugin. */
732 void
733 GOMP_offload_register (const void *host_table, int target_type,
734 const void *target_data)
736 int i;
737 gomp_mutex_lock (&register_lock);
739 /* Load image to all initialized devices. */
740 for (i = 0; i < num_devices; i++)
742 struct gomp_device_descr *devicep = &devices[i];
743 gomp_mutex_lock (&devicep->lock);
744 if (devicep->type == target_type && devicep->is_initialized)
745 gomp_offload_image_to_device (devicep, host_table, target_data, true);
746 gomp_mutex_unlock (&devicep->lock);
749 /* Insert image to array of pending images. */
750 offload_images
751 = gomp_realloc_unlock (offload_images,
752 (num_offload_images + 1)
753 * sizeof (struct offload_image_descr));
754 offload_images[num_offload_images].type = target_type;
755 offload_images[num_offload_images].host_table = host_table;
756 offload_images[num_offload_images].target_data = target_data;
758 num_offload_images++;
759 gomp_mutex_unlock (&register_lock);
762 /* This function should be called from every offload image while unloading.
763 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
764 the target, and TARGET_DATA needed by target plugin. */
766 void
767 GOMP_offload_unregister (const void *host_table, int target_type,
768 const void *target_data)
770 void **host_func_table = ((void ***) host_table)[0];
771 void **host_funcs_end = ((void ***) host_table)[1];
772 void **host_var_table = ((void ***) host_table)[2];
773 void **host_vars_end = ((void ***) host_table)[3];
774 int i;
776 /* The func table contains only addresses, the var table contains addresses
777 and corresponding sizes. */
778 int num_funcs = host_funcs_end - host_func_table;
779 int num_vars = (host_vars_end - host_var_table) / 2;
781 gomp_mutex_lock (&register_lock);
783 /* Unload image from all initialized devices. */
784 for (i = 0; i < num_devices; i++)
786 int j;
787 struct gomp_device_descr *devicep = &devices[i];
788 gomp_mutex_lock (&devicep->lock);
789 if (devicep->type != target_type || !devicep->is_initialized)
791 gomp_mutex_unlock (&devicep->lock);
792 continue;
795 devicep->unload_image_func (devicep->target_id, target_data);
797 /* Remove mapping from splay tree. */
798 struct splay_tree_key_s k;
799 splay_tree_key node = NULL;
800 if (num_funcs > 0)
802 k.host_start = (uintptr_t) host_func_table[0];
803 k.host_end = k.host_start + 1;
804 node = splay_tree_lookup (&devicep->mem_map, &k);
806 else if (num_vars > 0)
808 k.host_start = (uintptr_t) host_var_table[0];
809 k.host_end = k.host_start + (uintptr_t) host_var_table[1];
810 node = splay_tree_lookup (&devicep->mem_map, &k);
813 for (j = 0; j < num_funcs; j++)
815 k.host_start = (uintptr_t) host_func_table[j];
816 k.host_end = k.host_start + 1;
817 splay_tree_remove (&devicep->mem_map, &k);
820 for (j = 0; j < num_vars; j++)
822 k.host_start = (uintptr_t) host_var_table[j * 2];
823 k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
824 splay_tree_remove (&devicep->mem_map, &k);
827 if (node)
829 free (node->tgt);
830 free (node);
833 gomp_mutex_unlock (&devicep->lock);
836 /* Remove image from array of pending images. */
837 for (i = 0; i < num_offload_images; i++)
838 if (offload_images[i].target_data == target_data)
840 offload_images[i] = offload_images[--num_offload_images];
841 break;
844 gomp_mutex_unlock (&register_lock);
847 /* This function initializes the target device, specified by DEVICEP. DEVICEP
848 must be locked on entry, and remains locked on return. */
850 attribute_hidden void
851 gomp_init_device (struct gomp_device_descr *devicep)
853 int i;
854 devicep->init_device_func (devicep->target_id);
856 /* Load to device all images registered by the moment. */
857 for (i = 0; i < num_offload_images; i++)
859 struct offload_image_descr *image = &offload_images[i];
860 if (image->type == devicep->type)
861 gomp_offload_image_to_device (devicep, image->host_table,
862 image->target_data, false);
865 devicep->is_initialized = true;
868 /* Free address mapping tables. MM must be locked on entry, and remains locked
869 on return. */
871 attribute_hidden void
872 gomp_free_memmap (struct splay_tree_s *mem_map)
874 while (mem_map->root)
876 struct target_mem_desc *tgt = mem_map->root->key.tgt;
878 splay_tree_remove (mem_map, &mem_map->root->key);
879 free (tgt->array);
880 free (tgt);
884 /* This function de-initializes the target device, specified by DEVICEP.
885 DEVICEP must be locked on entry, and remains locked on return. */
887 attribute_hidden void
888 gomp_fini_device (struct gomp_device_descr *devicep)
890 if (devicep->is_initialized)
891 devicep->fini_device_func (devicep->target_id);
893 devicep->is_initialized = false;
896 /* Called when encountering a target directive. If DEVICE
897 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
898 GOMP_DEVICE_HOST_FALLBACK (or any value
899 larger than last available hw device), use host fallback.
900 FN is address of host code, UNUSED is part of the current ABI, but
901 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
902 with MAPNUM entries, with addresses of the host objects,
903 sizes of the host objects (resp. for pointer kind pointer bias
904 and assumed sizeof (void *) size) and kinds. */
906 void
907 GOMP_target (int device, void (*fn) (void *), const void *unused,
908 size_t mapnum, void **hostaddrs, size_t *sizes,
909 unsigned char *kinds)
911 struct gomp_device_descr *devicep = resolve_device (device);
913 if (devicep == NULL
914 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
916 /* Host fallback. */
917 struct gomp_thread old_thr, *thr = gomp_thread ();
918 old_thr = *thr;
919 memset (thr, '\0', sizeof (*thr));
920 if (gomp_places_list)
922 thr->place = old_thr.place;
923 thr->ts.place_partition_len = gomp_places_list_len;
925 fn (hostaddrs);
926 gomp_free_thread (thr);
927 *thr = old_thr;
928 return;
931 gomp_mutex_lock (&devicep->lock);
932 if (!devicep->is_initialized)
933 gomp_init_device (devicep);
934 gomp_mutex_unlock (&devicep->lock);
936 void *fn_addr;
938 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
939 fn_addr = (void *) fn;
940 else
942 gomp_mutex_lock (&devicep->lock);
943 struct splay_tree_key_s k;
944 k.host_start = (uintptr_t) fn;
945 k.host_end = k.host_start + 1;
946 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
947 if (tgt_fn == NULL)
949 gomp_mutex_unlock (&devicep->lock);
950 gomp_fatal ("Target function wasn't mapped");
952 gomp_mutex_unlock (&devicep->lock);
954 fn_addr = (void *) tgt_fn->tgt_offset;
957 struct target_mem_desc *tgt_vars;
958 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
959 tgt_vars = NULL;
960 else
961 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
962 false, true);
963 struct gomp_thread old_thr, *thr = gomp_thread ();
964 old_thr = *thr;
965 memset (thr, '\0', sizeof (*thr));
966 if (gomp_places_list)
968 thr->place = old_thr.place;
969 thr->ts.place_partition_len = gomp_places_list_len;
971 devicep->run_func (devicep->target_id, fn_addr,
972 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs);
973 gomp_free_thread (thr);
974 *thr = old_thr;
975 if (tgt_vars)
976 gomp_unmap_vars (tgt_vars, true);
979 void
980 GOMP_target_data (int device, const void *unused, size_t mapnum,
981 void **hostaddrs, size_t *sizes, unsigned char *kinds)
983 struct gomp_device_descr *devicep = resolve_device (device);
985 if (devicep == NULL
986 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
987 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
989 /* Host fallback or accelerators with memory coherent access. */
990 struct gomp_task_icv *icv = gomp_icv (false);
991 if (icv->target_data)
993 /* Even when doing a host fallback, if there are any active
994 #pragma omp target data constructs, need to remember the
995 new #pragma omp target data, otherwise GOMP_target_end_data
996 would get out of sync. */
997 struct target_mem_desc *tgt
998 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
999 tgt->prev = icv->target_data;
1000 icv->target_data = tgt;
1002 return;
1005 gomp_mutex_lock (&devicep->lock);
1006 if (!devicep->is_initialized)
1007 gomp_init_device (devicep);
1008 gomp_mutex_unlock (&devicep->lock);
1010 struct target_mem_desc *tgt
1011 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1012 false);
1013 struct gomp_task_icv *icv = gomp_icv (true);
1014 tgt->prev = icv->target_data;
1015 icv->target_data = tgt;
1018 void
1019 GOMP_target_end_data (void)
1021 struct gomp_task_icv *icv = gomp_icv (false);
1022 if (icv->target_data)
1024 struct target_mem_desc *tgt = icv->target_data;
1025 icv->target_data = tgt->prev;
1026 gomp_unmap_vars (tgt, true);
1030 void
1031 GOMP_target_update (int device, const void *unused, size_t mapnum,
1032 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1034 struct gomp_device_descr *devicep = resolve_device (device);
1036 if (devicep == NULL
1037 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1038 return;
1040 gomp_mutex_lock (&devicep->lock);
1041 if (!devicep->is_initialized)
1042 gomp_init_device (devicep);
1043 gomp_mutex_unlock (&devicep->lock);
1045 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1048 void
1049 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1051 if (thread_limit)
1053 struct gomp_task_icv *icv = gomp_icv (true);
1054 icv->thread_limit_var
1055 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1057 (void) num_teams;
1060 #ifdef PLUGIN_SUPPORT
1062 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1063 in PLUGIN_NAME.
1064 The handles of the found functions are stored in the corresponding fields
1065 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1067 static bool
1068 gomp_load_plugin_for_device (struct gomp_device_descr *device,
1069 const char *plugin_name)
1071 const char *err = NULL, *last_missing = NULL;
1072 int optional_present, optional_total;
1074 /* Clear any existing error. */
1075 dlerror ();
1077 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1078 if (!plugin_handle)
1080 err = dlerror ();
1081 goto out;
1084 /* Check if all required functions are available in the plugin and store
1085 their handlers. */
1086 #define DLSYM(f) \
1087 do \
1089 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
1090 err = dlerror (); \
1091 if (err != NULL) \
1092 goto out; \
1094 while (0)
1095 /* Similar, but missing functions are not an error. */
1096 #define DLSYM_OPT(f, n) \
1097 do \
1099 const char *tmp_err; \
1100 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
1101 tmp_err = dlerror (); \
1102 if (tmp_err == NULL) \
1103 optional_present++; \
1104 else \
1105 last_missing = #n; \
1106 optional_total++; \
1108 while (0)
1110 DLSYM (get_name);
1111 DLSYM (get_caps);
1112 DLSYM (get_type);
1113 DLSYM (get_num_devices);
1114 DLSYM (init_device);
1115 DLSYM (fini_device);
1116 DLSYM (load_image);
1117 DLSYM (unload_image);
1118 DLSYM (alloc);
1119 DLSYM (free);
1120 DLSYM (dev2host);
1121 DLSYM (host2dev);
1122 device->capabilities = device->get_caps_func ();
1123 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1124 DLSYM (run);
1125 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1127 optional_present = optional_total = 0;
1128 DLSYM_OPT (openacc.exec, openacc_parallel);
1129 DLSYM_OPT (openacc.register_async_cleanup,
1130 openacc_register_async_cleanup);
1131 DLSYM_OPT (openacc.async_test, openacc_async_test);
1132 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1133 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1134 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1135 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1136 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1137 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1138 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1139 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1140 /* Require all the OpenACC handlers if we have
1141 GOMP_OFFLOAD_CAP_OPENACC_200. */
1142 if (optional_present != optional_total)
1144 err = "plugin missing OpenACC handler function";
1145 goto out;
1147 optional_present = optional_total = 0;
1148 DLSYM_OPT (openacc.cuda.get_current_device,
1149 openacc_get_current_cuda_device);
1150 DLSYM_OPT (openacc.cuda.get_current_context,
1151 openacc_get_current_cuda_context);
1152 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1153 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1154 /* Make sure all the CUDA functions are there if any of them are. */
1155 if (optional_present && optional_present != optional_total)
1157 err = "plugin missing OpenACC CUDA handler function";
1158 goto out;
1161 #undef DLSYM
1162 #undef DLSYM_OPT
1164 out:
1165 if (err != NULL)
1167 gomp_error ("while loading %s: %s", plugin_name, err);
1168 if (last_missing)
1169 gomp_error ("missing function was %s", last_missing);
1170 if (plugin_handle)
1171 dlclose (plugin_handle);
1173 return err == NULL;
1176 /* This function initializes the runtime needed for offloading.
1177 It parses the list of offload targets and tries to load the plugins for
1178 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1179 will be set, and the array DEVICES initialized, containing descriptors for
1180 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1181 by the others. */
1183 static void
1184 gomp_target_init (void)
1186 const char *prefix ="libgomp-plugin-";
1187 const char *suffix = SONAME_SUFFIX (1);
1188 const char *cur, *next;
1189 char *plugin_name;
1190 int i, new_num_devices;
1192 num_devices = 0;
1193 devices = NULL;
1195 cur = OFFLOAD_TARGETS;
1196 if (*cur)
1199 struct gomp_device_descr current_device;
1201 next = strchr (cur, ',');
1203 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1204 + strlen (prefix) + strlen (suffix));
1205 if (!plugin_name)
1207 num_devices = 0;
1208 break;
1211 strcpy (plugin_name, prefix);
1212 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1213 strcat (plugin_name, suffix);
1215 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1217 new_num_devices = current_device.get_num_devices_func ();
1218 if (new_num_devices >= 1)
1220 /* Augment DEVICES and NUM_DEVICES. */
1222 devices = realloc (devices, (num_devices + new_num_devices)
1223 * sizeof (struct gomp_device_descr));
1224 if (!devices)
1226 num_devices = 0;
1227 free (plugin_name);
1228 break;
1231 current_device.name = current_device.get_name_func ();
1232 /* current_device.capabilities has already been set. */
1233 current_device.type = current_device.get_type_func ();
1234 current_device.mem_map.root = NULL;
1235 current_device.is_initialized = false;
1236 current_device.openacc.data_environ = NULL;
1237 for (i = 0; i < new_num_devices; i++)
1239 current_device.target_id = i;
1240 devices[num_devices] = current_device;
1241 gomp_mutex_init (&devices[num_devices].lock);
1242 num_devices++;
1247 free (plugin_name);
1248 cur = next + 1;
1250 while (next);
1252 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1253 NUM_DEVICES_OPENMP. */
1254 struct gomp_device_descr *devices_s
1255 = malloc (num_devices * sizeof (struct gomp_device_descr));
1256 if (!devices_s)
1258 num_devices = 0;
1259 free (devices);
1260 devices = NULL;
1262 num_devices_openmp = 0;
1263 for (i = 0; i < num_devices; i++)
1264 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1265 devices_s[num_devices_openmp++] = devices[i];
1266 int num_devices_after_openmp = num_devices_openmp;
1267 for (i = 0; i < num_devices; i++)
1268 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1269 devices_s[num_devices_after_openmp++] = devices[i];
1270 free (devices);
1271 devices = devices_s;
1273 for (i = 0; i < num_devices; i++)
1275 /* The 'devices' array can be moved (by the realloc call) until we have
1276 found all the plugins, so registering with the OpenACC runtime (which
1277 takes a copy of the pointer argument) must be delayed until now. */
1278 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1279 goacc_register (&devices[i]);
1283 #else /* PLUGIN_SUPPORT */
1284 /* If dlfcn.h is unavailable we always fallback to host execution.
1285 GOMP_target* routines are just stubs for this case. */
1286 static void
1287 gomp_target_init (void)
1290 #endif /* PLUGIN_SUPPORT */