include/:
[official-gcc.git] / libgomp / target.c
blobd8da7833aa96d2782a5fef8772b3e648a9dce40d
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr {
59 enum offload_target_type type;
60 void *host_table;
61 void *target_data;
64 /* Array of descriptors of offload images. */
65 static struct offload_image_descr *offload_images;
67 /* Total number of offload images. */
68 static int num_offload_images;
70 /* Array of descriptors for all available devices. */
71 static struct gomp_device_descr *devices;
73 /* Total number of available devices. */
74 static int num_devices;
76 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
77 static int num_devices_openmp;
79 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
81 static void *
82 gomp_realloc_unlock (void *old, size_t size)
84 void *ret = realloc (old, size);
85 if (ret == NULL)
87 gomp_mutex_unlock (&register_lock);
88 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
90 return ret;
93 /* The comparison function. */
95 attribute_hidden int
96 splay_compare (splay_tree_key x, splay_tree_key y)
98 if (x->host_start == x->host_end
99 && y->host_start == y->host_end)
100 return 0;
101 if (x->host_end <= y->host_start)
102 return -1;
103 if (x->host_start >= y->host_end)
104 return 1;
105 return 0;
108 #include "splay-tree.h"
110 attribute_hidden void
111 gomp_init_targets_once (void)
113 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
116 attribute_hidden int
117 gomp_get_num_devices (void)
119 gomp_init_targets_once ();
120 return num_devices_openmp;
123 static struct gomp_device_descr *
124 resolve_device (int device_id)
126 if (device_id == GOMP_DEVICE_ICV)
128 struct gomp_task_icv *icv = gomp_icv (false);
129 device_id = icv->default_device_var;
132 if (device_id < 0 || device_id >= gomp_get_num_devices ())
133 return NULL;
135 return &devices[device_id];
139 /* Handle the case where splay_tree_lookup found oldn for newn.
140 Helper function of gomp_map_vars. */
142 static inline void
143 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
144 splay_tree_key newn, unsigned char kind)
146 if ((kind & GOMP_MAP_FLAG_FORCE)
147 || oldn->host_start > newn->host_start
148 || oldn->host_end < newn->host_end)
150 gomp_mutex_unlock (&devicep->lock);
151 gomp_fatal ("Trying to map into device [%p..%p) object when "
152 "[%p..%p) is already mapped",
153 (void *) newn->host_start, (void *) newn->host_end,
154 (void *) oldn->host_start, (void *) oldn->host_end);
156 oldn->refcount++;
159 static int
160 get_kind (bool is_openacc, void *kinds, int idx)
162 return is_openacc ? ((unsigned short *) kinds)[idx]
163 : ((unsigned char *) kinds)[idx];
166 attribute_hidden struct target_mem_desc *
167 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
168 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
169 bool is_openacc, bool is_target)
171 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
172 const int rshift = is_openacc ? 8 : 3;
173 const int typemask = is_openacc ? 0xff : 0x7;
174 struct splay_tree_s *mem_map = &devicep->mem_map;
175 struct splay_tree_key_s cur_node;
176 struct target_mem_desc *tgt
177 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
178 tgt->list_count = mapnum;
179 tgt->refcount = 1;
180 tgt->device_descr = devicep;
182 if (mapnum == 0)
183 return tgt;
185 tgt_align = sizeof (void *);
186 tgt_size = 0;
187 if (is_target)
189 size_t align = 4 * sizeof (void *);
190 tgt_align = align;
191 tgt_size = mapnum * sizeof (void *);
194 gomp_mutex_lock (&devicep->lock);
196 for (i = 0; i < mapnum; i++)
198 int kind = get_kind (is_openacc, kinds, i);
199 if (hostaddrs[i] == NULL)
201 tgt->list[i] = NULL;
202 continue;
204 cur_node.host_start = (uintptr_t) hostaddrs[i];
205 if (!GOMP_MAP_POINTER_P (kind & typemask))
206 cur_node.host_end = cur_node.host_start + sizes[i];
207 else
208 cur_node.host_end = cur_node.host_start + sizeof (void *);
209 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
210 if (n)
212 tgt->list[i] = n;
213 gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
215 else
217 tgt->list[i] = NULL;
219 size_t align = (size_t) 1 << (kind >> rshift);
220 not_found_cnt++;
221 if (tgt_align < align)
222 tgt_align = align;
223 tgt_size = (tgt_size + align - 1) & ~(align - 1);
224 tgt_size += cur_node.host_end - cur_node.host_start;
225 if ((kind & typemask) == GOMP_MAP_TO_PSET)
227 size_t j;
228 for (j = i + 1; j < mapnum; j++)
229 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
230 & typemask))
231 break;
232 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
233 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
234 > cur_node.host_end))
235 break;
236 else
238 tgt->list[j] = NULL;
239 i++;
245 if (devaddrs)
247 if (mapnum != 1)
249 gomp_mutex_unlock (&devicep->lock);
250 gomp_fatal ("unexpected aggregation");
252 tgt->to_free = devaddrs[0];
253 tgt->tgt_start = (uintptr_t) tgt->to_free;
254 tgt->tgt_end = tgt->tgt_start + sizes[0];
256 else if (not_found_cnt || is_target)
258 /* Allocate tgt_align aligned tgt_size block of memory. */
259 /* FIXME: Perhaps change interface to allocate properly aligned
260 memory. */
261 tgt->to_free = devicep->alloc_func (devicep->target_id,
262 tgt_size + tgt_align - 1);
263 tgt->tgt_start = (uintptr_t) tgt->to_free;
264 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
265 tgt->tgt_end = tgt->tgt_start + tgt_size;
267 else
269 tgt->to_free = NULL;
270 tgt->tgt_start = 0;
271 tgt->tgt_end = 0;
274 tgt_size = 0;
275 if (is_target)
276 tgt_size = mapnum * sizeof (void *);
278 tgt->array = NULL;
279 if (not_found_cnt)
281 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
282 splay_tree_node array = tgt->array;
283 size_t j;
285 for (i = 0; i < mapnum; i++)
286 if (tgt->list[i] == NULL)
288 int kind = get_kind (is_openacc, kinds, i);
289 if (hostaddrs[i] == NULL)
290 continue;
291 splay_tree_key k = &array->key;
292 k->host_start = (uintptr_t) hostaddrs[i];
293 if (!GOMP_MAP_POINTER_P (kind & typemask))
294 k->host_end = k->host_start + sizes[i];
295 else
296 k->host_end = k->host_start + sizeof (void *);
297 splay_tree_key n = splay_tree_lookup (mem_map, k);
298 if (n)
300 tgt->list[i] = n;
301 gomp_map_vars_existing (devicep, n, k, kind & typemask);
303 else
305 size_t align = (size_t) 1 << (kind >> rshift);
306 tgt->list[i] = k;
307 tgt_size = (tgt_size + align - 1) & ~(align - 1);
308 k->tgt = tgt;
309 k->tgt_offset = tgt_size;
310 tgt_size += k->host_end - k->host_start;
311 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
312 k->refcount = 1;
313 k->async_refcount = 0;
314 tgt->refcount++;
315 array->left = NULL;
316 array->right = NULL;
317 splay_tree_insert (mem_map, array);
318 switch (kind & typemask)
320 case GOMP_MAP_ALLOC:
321 case GOMP_MAP_FROM:
322 case GOMP_MAP_FORCE_ALLOC:
323 case GOMP_MAP_FORCE_FROM:
324 break;
325 case GOMP_MAP_TO:
326 case GOMP_MAP_TOFROM:
327 case GOMP_MAP_FORCE_TO:
328 case GOMP_MAP_FORCE_TOFROM:
329 /* FIXME: Perhaps add some smarts, like if copying
330 several adjacent fields from host to target, use some
331 host buffer to avoid sending each var individually. */
332 devicep->host2dev_func (devicep->target_id,
333 (void *) (tgt->tgt_start
334 + k->tgt_offset),
335 (void *) k->host_start,
336 k->host_end - k->host_start);
337 break;
338 case GOMP_MAP_POINTER:
339 cur_node.host_start
340 = (uintptr_t) *(void **) k->host_start;
341 if (cur_node.host_start == (uintptr_t) NULL)
343 cur_node.tgt_offset = (uintptr_t) NULL;
344 /* FIXME: see above FIXME comment. */
345 devicep->host2dev_func (devicep->target_id,
346 (void *) (tgt->tgt_start
347 + k->tgt_offset),
348 (void *) &cur_node.tgt_offset,
349 sizeof (void *));
350 break;
352 /* Add bias to the pointer value. */
353 cur_node.host_start += sizes[i];
354 cur_node.host_end = cur_node.host_start + 1;
355 n = splay_tree_lookup (mem_map, &cur_node);
356 if (n == NULL)
358 /* Could be possibly zero size array section. */
359 cur_node.host_end--;
360 n = splay_tree_lookup (mem_map, &cur_node);
361 if (n == NULL)
363 cur_node.host_start--;
364 n = splay_tree_lookup (mem_map, &cur_node);
365 cur_node.host_start++;
368 if (n == NULL)
370 gomp_mutex_unlock (&devicep->lock);
371 gomp_fatal ("Pointer target of array section "
372 "wasn't mapped");
374 cur_node.host_start -= n->host_start;
375 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
376 + cur_node.host_start;
377 /* At this point tgt_offset is target address of the
378 array section. Now subtract bias to get what we want
379 to initialize the pointer with. */
380 cur_node.tgt_offset -= sizes[i];
381 /* FIXME: see above FIXME comment. */
382 devicep->host2dev_func (devicep->target_id,
383 (void *) (tgt->tgt_start
384 + k->tgt_offset),
385 (void *) &cur_node.tgt_offset,
386 sizeof (void *));
387 break;
388 case GOMP_MAP_TO_PSET:
389 /* FIXME: see above FIXME comment. */
390 devicep->host2dev_func (devicep->target_id,
391 (void *) (tgt->tgt_start
392 + k->tgt_offset),
393 (void *) k->host_start,
394 k->host_end - k->host_start);
396 for (j = i + 1; j < mapnum; j++)
397 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
398 & typemask))
399 break;
400 else if ((uintptr_t) hostaddrs[j] < k->host_start
401 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
402 > k->host_end))
403 break;
404 else
406 tgt->list[j] = k;
407 k->refcount++;
408 cur_node.host_start
409 = (uintptr_t) *(void **) hostaddrs[j];
410 if (cur_node.host_start == (uintptr_t) NULL)
412 cur_node.tgt_offset = (uintptr_t) NULL;
413 /* FIXME: see above FIXME comment. */
414 devicep->host2dev_func (devicep->target_id,
415 (void *) (tgt->tgt_start + k->tgt_offset
416 + ((uintptr_t) hostaddrs[j]
417 - k->host_start)),
418 (void *) &cur_node.tgt_offset,
419 sizeof (void *));
420 i++;
421 continue;
423 /* Add bias to the pointer value. */
424 cur_node.host_start += sizes[j];
425 cur_node.host_end = cur_node.host_start + 1;
426 n = splay_tree_lookup (mem_map, &cur_node);
427 if (n == NULL)
429 /* Could be possibly zero size array section. */
430 cur_node.host_end--;
431 n = splay_tree_lookup (mem_map, &cur_node);
432 if (n == NULL)
434 cur_node.host_start--;
435 n = splay_tree_lookup (mem_map, &cur_node);
436 cur_node.host_start++;
439 if (n == NULL)
441 gomp_mutex_unlock (&devicep->lock);
442 gomp_fatal ("Pointer target of array section "
443 "wasn't mapped");
445 cur_node.host_start -= n->host_start;
446 cur_node.tgt_offset = n->tgt->tgt_start
447 + n->tgt_offset
448 + cur_node.host_start;
449 /* At this point tgt_offset is target address of the
450 array section. Now subtract bias to get what we
451 want to initialize the pointer with. */
452 cur_node.tgt_offset -= sizes[j];
453 /* FIXME: see above FIXME comment. */
454 devicep->host2dev_func (devicep->target_id,
455 (void *) (tgt->tgt_start + k->tgt_offset
456 + ((uintptr_t) hostaddrs[j]
457 - k->host_start)),
458 (void *) &cur_node.tgt_offset,
459 sizeof (void *));
460 i++;
462 break;
463 case GOMP_MAP_FORCE_PRESENT:
465 /* We already looked up the memory region above and it
466 was missing. */
467 size_t size = k->host_end - k->host_start;
468 gomp_mutex_unlock (&devicep->lock);
469 #ifdef HAVE_INTTYPES_H
470 gomp_fatal ("present clause: !acc_is_present (%p, "
471 "%"PRIu64" (0x%"PRIx64"))",
472 (void *) k->host_start,
473 (uint64_t) size, (uint64_t) size);
474 #else
475 gomp_fatal ("present clause: !acc_is_present (%p, "
476 "%lu (0x%lx))", (void *) k->host_start,
477 (unsigned long) size, (unsigned long) size);
478 #endif
480 break;
481 case GOMP_MAP_FORCE_DEVICEPTR:
482 assert (k->host_end - k->host_start == sizeof (void *));
484 devicep->host2dev_func (devicep->target_id,
485 (void *) (tgt->tgt_start
486 + k->tgt_offset),
487 (void *) k->host_start,
488 sizeof (void *));
489 break;
490 default:
491 gomp_mutex_unlock (&devicep->lock);
492 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
493 kind);
495 array++;
500 if (is_target)
502 for (i = 0; i < mapnum; i++)
504 if (tgt->list[i] == NULL)
505 cur_node.tgt_offset = (uintptr_t) NULL;
506 else
507 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
508 + tgt->list[i]->tgt_offset;
509 /* FIXME: see above FIXME comment. */
510 devicep->host2dev_func (devicep->target_id,
511 (void *) (tgt->tgt_start
512 + i * sizeof (void *)),
513 (void *) &cur_node.tgt_offset,
514 sizeof (void *));
518 gomp_mutex_unlock (&devicep->lock);
519 return tgt;
522 static void
523 gomp_unmap_tgt (struct target_mem_desc *tgt)
525 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
526 if (tgt->tgt_end)
527 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
529 free (tgt->array);
530 free (tgt);
533 /* Decrease the refcount for a set of mapped variables, and queue asychronous
534 copies from the device back to the host after any work that has been issued.
535 Because the regions are still "live", increment an asynchronous reference
536 count to indicate that they should not be unmapped from host-side data
537 structures until the asynchronous copy has completed. */
539 attribute_hidden void
540 gomp_copy_from_async (struct target_mem_desc *tgt)
542 struct gomp_device_descr *devicep = tgt->device_descr;
543 size_t i;
545 gomp_mutex_lock (&devicep->lock);
547 for (i = 0; i < tgt->list_count; i++)
548 if (tgt->list[i] == NULL)
550 else if (tgt->list[i]->refcount > 1)
552 tgt->list[i]->refcount--;
553 tgt->list[i]->async_refcount++;
555 else
557 splay_tree_key k = tgt->list[i];
558 if (k->copy_from)
559 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
560 (void *) (k->tgt->tgt_start + k->tgt_offset),
561 k->host_end - k->host_start);
564 gomp_mutex_unlock (&devicep->lock);
567 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
568 variables back from device to host: if it is false, it is assumed that this
569 has been done already, i.e. by gomp_copy_from_async above. */
571 attribute_hidden void
572 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
574 struct gomp_device_descr *devicep = tgt->device_descr;
576 if (tgt->list_count == 0)
578 free (tgt);
579 return;
582 gomp_mutex_lock (&devicep->lock);
584 size_t i;
585 for (i = 0; i < tgt->list_count; i++)
586 if (tgt->list[i] == NULL)
588 else if (tgt->list[i]->refcount > 1)
589 tgt->list[i]->refcount--;
590 else if (tgt->list[i]->async_refcount > 0)
591 tgt->list[i]->async_refcount--;
592 else
594 splay_tree_key k = tgt->list[i];
595 if (k->copy_from && do_copyfrom)
596 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
597 (void *) (k->tgt->tgt_start + k->tgt_offset),
598 k->host_end - k->host_start);
599 splay_tree_remove (&devicep->mem_map, k);
600 if (k->tgt->refcount > 1)
601 k->tgt->refcount--;
602 else
603 gomp_unmap_tgt (k->tgt);
606 if (tgt->refcount > 1)
607 tgt->refcount--;
608 else
609 gomp_unmap_tgt (tgt);
611 gomp_mutex_unlock (&devicep->lock);
614 static void
615 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
616 size_t *sizes, void *kinds, bool is_openacc)
618 size_t i;
619 struct splay_tree_key_s cur_node;
620 const int typemask = is_openacc ? 0xff : 0x7;
622 if (!devicep)
623 return;
625 if (mapnum == 0)
626 return;
628 gomp_mutex_lock (&devicep->lock);
629 for (i = 0; i < mapnum; i++)
630 if (sizes[i])
632 cur_node.host_start = (uintptr_t) hostaddrs[i];
633 cur_node.host_end = cur_node.host_start + sizes[i];
634 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
635 if (n)
637 int kind = get_kind (is_openacc, kinds, i);
638 if (n->host_start > cur_node.host_start
639 || n->host_end < cur_node.host_end)
641 gomp_mutex_unlock (&devicep->lock);
642 gomp_fatal ("Trying to update [%p..%p) object when "
643 "only [%p..%p) is mapped",
644 (void *) cur_node.host_start,
645 (void *) cur_node.host_end,
646 (void *) n->host_start,
647 (void *) n->host_end);
649 if (GOMP_MAP_COPY_TO_P (kind & typemask))
650 devicep->host2dev_func (devicep->target_id,
651 (void *) (n->tgt->tgt_start
652 + n->tgt_offset
653 + cur_node.host_start
654 - n->host_start),
655 (void *) cur_node.host_start,
656 cur_node.host_end - cur_node.host_start);
657 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
658 devicep->dev2host_func (devicep->target_id,
659 (void *) cur_node.host_start,
660 (void *) (n->tgt->tgt_start
661 + n->tgt_offset
662 + cur_node.host_start
663 - n->host_start),
664 cur_node.host_end - cur_node.host_start);
666 else
668 gomp_mutex_unlock (&devicep->lock);
669 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
670 (void *) cur_node.host_start,
671 (void *) cur_node.host_end);
674 gomp_mutex_unlock (&devicep->lock);
677 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
678 And insert to splay tree the mapping between addresses from HOST_TABLE and
679 from loaded target image. */
681 static void
682 gomp_offload_image_to_device (struct gomp_device_descr *devicep,
683 void *host_table, void *target_data,
684 bool is_register_lock)
686 void **host_func_table = ((void ***) host_table)[0];
687 void **host_funcs_end = ((void ***) host_table)[1];
688 void **host_var_table = ((void ***) host_table)[2];
689 void **host_vars_end = ((void ***) host_table)[3];
691 /* The func table contains only addresses, the var table contains addresses
692 and corresponding sizes. */
693 int num_funcs = host_funcs_end - host_func_table;
694 int num_vars = (host_vars_end - host_var_table) / 2;
696 /* Load image to device and get target addresses for the image. */
697 struct addr_pair *target_table = NULL;
698 int i, num_target_entries
699 = devicep->load_image_func (devicep->target_id, target_data, &target_table);
701 if (num_target_entries != num_funcs + num_vars)
703 gomp_mutex_unlock (&devicep->lock);
704 if (is_register_lock)
705 gomp_mutex_unlock (&register_lock);
706 gomp_fatal ("Can't map target functions or variables");
709 /* Insert host-target address mapping into splay tree. */
710 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
711 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
712 tgt->refcount = 1;
713 tgt->tgt_start = 0;
714 tgt->tgt_end = 0;
715 tgt->to_free = NULL;
716 tgt->prev = NULL;
717 tgt->list_count = 0;
718 tgt->device_descr = devicep;
719 splay_tree_node array = tgt->array;
721 for (i = 0; i < num_funcs; i++)
723 splay_tree_key k = &array->key;
724 k->host_start = (uintptr_t) host_func_table[i];
725 k->host_end = k->host_start + 1;
726 k->tgt = tgt;
727 k->tgt_offset = target_table[i].start;
728 k->refcount = 1;
729 k->async_refcount = 0;
730 k->copy_from = false;
731 array->left = NULL;
732 array->right = NULL;
733 splay_tree_insert (&devicep->mem_map, array);
734 array++;
737 for (i = 0; i < num_vars; i++)
739 struct addr_pair *target_var = &target_table[num_funcs + i];
740 if (target_var->end - target_var->start
741 != (uintptr_t) host_var_table[i * 2 + 1])
743 gomp_mutex_unlock (&devicep->lock);
744 if (is_register_lock)
745 gomp_mutex_unlock (&register_lock);
746 gomp_fatal ("Can't map target variables (size mismatch)");
749 splay_tree_key k = &array->key;
750 k->host_start = (uintptr_t) host_var_table[i * 2];
751 k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
752 k->tgt = tgt;
753 k->tgt_offset = target_var->start;
754 k->refcount = 1;
755 k->async_refcount = 0;
756 k->copy_from = false;
757 array->left = NULL;
758 array->right = NULL;
759 splay_tree_insert (&devicep->mem_map, array);
760 array++;
763 free (target_table);
766 /* This function should be called from every offload image while loading.
767 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
768 the target, and TARGET_DATA needed by target plugin. */
770 void
771 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
772 void *target_data)
774 int i;
775 gomp_mutex_lock (&register_lock);
777 /* Load image to all initialized devices. */
778 for (i = 0; i < num_devices; i++)
780 struct gomp_device_descr *devicep = &devices[i];
781 gomp_mutex_lock (&devicep->lock);
782 if (devicep->type == target_type && devicep->is_initialized)
783 gomp_offload_image_to_device (devicep, host_table, target_data, true);
784 gomp_mutex_unlock (&devicep->lock);
787 /* Insert image to array of pending images. */
788 offload_images
789 = gomp_realloc_unlock (offload_images,
790 (num_offload_images + 1)
791 * sizeof (struct offload_image_descr));
792 offload_images[num_offload_images].type = target_type;
793 offload_images[num_offload_images].host_table = host_table;
794 offload_images[num_offload_images].target_data = target_data;
796 num_offload_images++;
797 gomp_mutex_unlock (&register_lock);
800 /* This function should be called from every offload image while unloading.
801 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
802 the target, and TARGET_DATA needed by target plugin. */
804 void
805 GOMP_offload_unregister (void *host_table, enum offload_target_type target_type,
806 void *target_data)
808 void **host_func_table = ((void ***) host_table)[0];
809 void **host_funcs_end = ((void ***) host_table)[1];
810 void **host_var_table = ((void ***) host_table)[2];
811 void **host_vars_end = ((void ***) host_table)[3];
812 int i;
814 /* The func table contains only addresses, the var table contains addresses
815 and corresponding sizes. */
816 int num_funcs = host_funcs_end - host_func_table;
817 int num_vars = (host_vars_end - host_var_table) / 2;
819 gomp_mutex_lock (&register_lock);
821 /* Unload image from all initialized devices. */
822 for (i = 0; i < num_devices; i++)
824 int j;
825 struct gomp_device_descr *devicep = &devices[i];
826 gomp_mutex_lock (&devicep->lock);
827 if (devicep->type != target_type || !devicep->is_initialized)
829 gomp_mutex_unlock (&devicep->lock);
830 continue;
833 devicep->unload_image_func (devicep->target_id, target_data);
835 /* Remove mapping from splay tree. */
836 struct splay_tree_key_s k;
837 splay_tree_key node = NULL;
838 if (num_funcs > 0)
840 k.host_start = (uintptr_t) host_func_table[0];
841 k.host_end = k.host_start + 1;
842 node = splay_tree_lookup (&devicep->mem_map, &k);
844 else if (num_vars > 0)
846 k.host_start = (uintptr_t) host_var_table[0];
847 k.host_end = k.host_start + (uintptr_t) host_var_table[1];
848 node = splay_tree_lookup (&devicep->mem_map, &k);
851 for (j = 0; j < num_funcs; j++)
853 k.host_start = (uintptr_t) host_func_table[j];
854 k.host_end = k.host_start + 1;
855 splay_tree_remove (&devicep->mem_map, &k);
858 for (j = 0; j < num_vars; j++)
860 k.host_start = (uintptr_t) host_var_table[j * 2];
861 k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
862 splay_tree_remove (&devicep->mem_map, &k);
865 if (node)
867 free (node->tgt);
868 free (node);
871 gomp_mutex_unlock (&devicep->lock);
874 /* Remove image from array of pending images. */
875 for (i = 0; i < num_offload_images; i++)
876 if (offload_images[i].target_data == target_data)
878 offload_images[i] = offload_images[--num_offload_images];
879 break;
882 gomp_mutex_unlock (&register_lock);
885 /* This function initializes the target device, specified by DEVICEP. DEVICEP
886 must be locked on entry, and remains locked on return. */
888 attribute_hidden void
889 gomp_init_device (struct gomp_device_descr *devicep)
891 int i;
892 devicep->init_device_func (devicep->target_id);
894 /* Load to device all images registered by the moment. */
895 for (i = 0; i < num_offload_images; i++)
897 struct offload_image_descr *image = &offload_images[i];
898 if (image->type == devicep->type)
899 gomp_offload_image_to_device (devicep, image->host_table,
900 image->target_data, false);
903 devicep->is_initialized = true;
906 /* Free address mapping tables. MM must be locked on entry, and remains locked
907 on return. */
909 attribute_hidden void
910 gomp_free_memmap (struct splay_tree_s *mem_map)
912 while (mem_map->root)
914 struct target_mem_desc *tgt = mem_map->root->key.tgt;
916 splay_tree_remove (mem_map, &mem_map->root->key);
917 free (tgt->array);
918 free (tgt);
922 /* This function de-initializes the target device, specified by DEVICEP.
923 DEVICEP must be locked on entry, and remains locked on return. */
925 attribute_hidden void
926 gomp_fini_device (struct gomp_device_descr *devicep)
928 if (devicep->is_initialized)
929 devicep->fini_device_func (devicep->target_id);
931 devicep->is_initialized = false;
934 /* Called when encountering a target directive. If DEVICE
935 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
936 GOMP_DEVICE_HOST_FALLBACK (or any value
937 larger than last available hw device), use host fallback.
938 FN is address of host code, UNUSED is part of the current ABI, but
939 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
940 with MAPNUM entries, with addresses of the host objects,
941 sizes of the host objects (resp. for pointer kind pointer bias
942 and assumed sizeof (void *) size) and kinds. */
944 void
945 GOMP_target (int device, void (*fn) (void *), const void *unused,
946 size_t mapnum, void **hostaddrs, size_t *sizes,
947 unsigned char *kinds)
949 struct gomp_device_descr *devicep = resolve_device (device);
951 if (devicep == NULL
952 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
954 /* Host fallback. */
955 struct gomp_thread old_thr, *thr = gomp_thread ();
956 old_thr = *thr;
957 memset (thr, '\0', sizeof (*thr));
958 if (gomp_places_list)
960 thr->place = old_thr.place;
961 thr->ts.place_partition_len = gomp_places_list_len;
963 fn (hostaddrs);
964 gomp_free_thread (thr);
965 *thr = old_thr;
966 return;
969 gomp_mutex_lock (&devicep->lock);
970 if (!devicep->is_initialized)
971 gomp_init_device (devicep);
972 gomp_mutex_unlock (&devicep->lock);
974 void *fn_addr;
976 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
977 fn_addr = (void *) fn;
978 else
980 gomp_mutex_lock (&devicep->lock);
981 struct splay_tree_key_s k;
982 k.host_start = (uintptr_t) fn;
983 k.host_end = k.host_start + 1;
984 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
985 if (tgt_fn == NULL)
987 gomp_mutex_unlock (&devicep->lock);
988 gomp_fatal ("Target function wasn't mapped");
990 gomp_mutex_unlock (&devicep->lock);
992 fn_addr = (void *) tgt_fn->tgt_offset;
995 struct target_mem_desc *tgt_vars
996 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
997 true);
998 struct gomp_thread old_thr, *thr = gomp_thread ();
999 old_thr = *thr;
1000 memset (thr, '\0', sizeof (*thr));
1001 if (gomp_places_list)
1003 thr->place = old_thr.place;
1004 thr->ts.place_partition_len = gomp_places_list_len;
1006 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
1007 gomp_free_thread (thr);
1008 *thr = old_thr;
1009 gomp_unmap_vars (tgt_vars, true);
1012 void
1013 GOMP_target_data (int device, const void *unused, size_t mapnum,
1014 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1016 struct gomp_device_descr *devicep = resolve_device (device);
1018 if (devicep == NULL
1019 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1021 /* Host fallback. */
1022 struct gomp_task_icv *icv = gomp_icv (false);
1023 if (icv->target_data)
1025 /* Even when doing a host fallback, if there are any active
1026 #pragma omp target data constructs, need to remember the
1027 new #pragma omp target data, otherwise GOMP_target_end_data
1028 would get out of sync. */
1029 struct target_mem_desc *tgt
1030 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
1031 tgt->prev = icv->target_data;
1032 icv->target_data = tgt;
1034 return;
1037 gomp_mutex_lock (&devicep->lock);
1038 if (!devicep->is_initialized)
1039 gomp_init_device (devicep);
1040 gomp_mutex_unlock (&devicep->lock);
1042 struct target_mem_desc *tgt
1043 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1044 false);
1045 struct gomp_task_icv *icv = gomp_icv (true);
1046 tgt->prev = icv->target_data;
1047 icv->target_data = tgt;
1050 void
1051 GOMP_target_end_data (void)
1053 struct gomp_task_icv *icv = gomp_icv (false);
1054 if (icv->target_data)
1056 struct target_mem_desc *tgt = icv->target_data;
1057 icv->target_data = tgt->prev;
1058 gomp_unmap_vars (tgt, true);
1062 void
1063 GOMP_target_update (int device, const void *unused, size_t mapnum,
1064 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1066 struct gomp_device_descr *devicep = resolve_device (device);
1068 if (devicep == NULL
1069 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1070 return;
1072 gomp_mutex_lock (&devicep->lock);
1073 if (!devicep->is_initialized)
1074 gomp_init_device (devicep);
1075 gomp_mutex_unlock (&devicep->lock);
1077 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1080 void
1081 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1083 if (thread_limit)
1085 struct gomp_task_icv *icv = gomp_icv (true);
1086 icv->thread_limit_var
1087 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1089 (void) num_teams;
1092 #ifdef PLUGIN_SUPPORT
1094 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1095 in PLUGIN_NAME.
1096 The handles of the found functions are stored in the corresponding fields
1097 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1099 static bool
1100 gomp_load_plugin_for_device (struct gomp_device_descr *device,
1101 const char *plugin_name)
1103 const char *err = NULL, *last_missing = NULL;
1104 int optional_present, optional_total;
1106 /* Clear any existing error. */
1107 dlerror ();
1109 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1110 if (!plugin_handle)
1112 err = dlerror ();
1113 goto out;
1116 /* Check if all required functions are available in the plugin and store
1117 their handlers. */
1118 #define DLSYM(f) \
1119 do \
1121 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
1122 err = dlerror (); \
1123 if (err != NULL) \
1124 goto out; \
1126 while (0)
1127 /* Similar, but missing functions are not an error. */
1128 #define DLSYM_OPT(f, n) \
1129 do \
1131 const char *tmp_err; \
1132 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
1133 tmp_err = dlerror (); \
1134 if (tmp_err == NULL) \
1135 optional_present++; \
1136 else \
1137 last_missing = #n; \
1138 optional_total++; \
1140 while (0)
1142 DLSYM (get_name);
1143 DLSYM (get_caps);
1144 DLSYM (get_type);
1145 DLSYM (get_num_devices);
1146 DLSYM (init_device);
1147 DLSYM (fini_device);
1148 DLSYM (load_image);
1149 DLSYM (unload_image);
1150 DLSYM (alloc);
1151 DLSYM (free);
1152 DLSYM (dev2host);
1153 DLSYM (host2dev);
1154 device->capabilities = device->get_caps_func ();
1155 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1156 DLSYM (run);
1157 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1159 optional_present = optional_total = 0;
1160 DLSYM_OPT (openacc.exec, openacc_parallel);
1161 DLSYM_OPT (openacc.register_async_cleanup,
1162 openacc_register_async_cleanup);
1163 DLSYM_OPT (openacc.async_test, openacc_async_test);
1164 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1165 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1166 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1167 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1168 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1169 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1170 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1171 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1172 /* Require all the OpenACC handlers if we have
1173 GOMP_OFFLOAD_CAP_OPENACC_200. */
1174 if (optional_present != optional_total)
1176 err = "plugin missing OpenACC handler function";
1177 goto out;
1179 optional_present = optional_total = 0;
1180 DLSYM_OPT (openacc.cuda.get_current_device,
1181 openacc_get_current_cuda_device);
1182 DLSYM_OPT (openacc.cuda.get_current_context,
1183 openacc_get_current_cuda_context);
1184 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1185 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1186 /* Make sure all the CUDA functions are there if any of them are. */
1187 if (optional_present && optional_present != optional_total)
1189 err = "plugin missing OpenACC CUDA handler function";
1190 goto out;
1193 #undef DLSYM
1194 #undef DLSYM_OPT
1196 out:
1197 if (err != NULL)
1199 gomp_error ("while loading %s: %s", plugin_name, err);
1200 if (last_missing)
1201 gomp_error ("missing function was %s", last_missing);
1202 if (plugin_handle)
1203 dlclose (plugin_handle);
1205 return err == NULL;
1208 /* This function initializes the runtime needed for offloading.
1209 It parses the list of offload targets and tries to load the plugins for
1210 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1211 will be set, and the array DEVICES initialized, containing descriptors for
1212 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1213 by the others. */
1215 static void
1216 gomp_target_init (void)
1218 const char *prefix ="libgomp-plugin-";
1219 const char *suffix = SONAME_SUFFIX (1);
1220 const char *cur, *next;
1221 char *plugin_name;
1222 int i, new_num_devices;
1224 num_devices = 0;
1225 devices = NULL;
1227 cur = OFFLOAD_TARGETS;
1228 if (*cur)
1231 struct gomp_device_descr current_device;
1233 next = strchr (cur, ',');
1235 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1236 + strlen (prefix) + strlen (suffix));
1237 if (!plugin_name)
1239 num_devices = 0;
1240 break;
1243 strcpy (plugin_name, prefix);
1244 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1245 strcat (plugin_name, suffix);
1247 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1249 new_num_devices = current_device.get_num_devices_func ();
1250 if (new_num_devices >= 1)
1252 /* Augment DEVICES and NUM_DEVICES. */
1254 devices = realloc (devices, (num_devices + new_num_devices)
1255 * sizeof (struct gomp_device_descr));
1256 if (!devices)
1258 num_devices = 0;
1259 free (plugin_name);
1260 break;
1263 current_device.name = current_device.get_name_func ();
1264 /* current_device.capabilities has already been set. */
1265 current_device.type = current_device.get_type_func ();
1266 current_device.mem_map.root = NULL;
1267 current_device.is_initialized = false;
1268 current_device.openacc.data_environ = NULL;
1269 for (i = 0; i < new_num_devices; i++)
1271 current_device.target_id = i;
1272 devices[num_devices] = current_device;
1273 gomp_mutex_init (&devices[num_devices].lock);
1274 num_devices++;
1279 free (plugin_name);
1280 cur = next + 1;
1282 while (next);
1284 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1285 NUM_DEVICES_OPENMP. */
1286 struct gomp_device_descr *devices_s
1287 = malloc (num_devices * sizeof (struct gomp_device_descr));
1288 if (!devices_s)
1290 num_devices = 0;
1291 free (devices);
1292 devices = NULL;
1294 num_devices_openmp = 0;
1295 for (i = 0; i < num_devices; i++)
1296 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1297 devices_s[num_devices_openmp++] = devices[i];
1298 int num_devices_after_openmp = num_devices_openmp;
1299 for (i = 0; i < num_devices; i++)
1300 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1301 devices_s[num_devices_after_openmp++] = devices[i];
1302 free (devices);
1303 devices = devices_s;
1305 for (i = 0; i < num_devices; i++)
1307 /* The 'devices' array can be moved (by the realloc call) until we have
1308 found all the plugins, so registering with the OpenACC runtime (which
1309 takes a copy of the pointer argument) must be delayed until now. */
1310 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1311 goacc_register (&devices[i]);
1315 #else /* PLUGIN_SUPPORT */
1316 /* If dlfcn.h is unavailable we always fallback to host execution.
1317 GOMP_target* routines are just stubs for this case. */
1318 static void
1319 gomp_target_init (void)
1322 #endif /* PLUGIN_SUPPORT */