* doc/install.texi (*-ibm-aix*): Additional information for AIX 7.1.
[official-gcc.git] / libgomp / target.c
blobb767410032699b7aa791fc67f0e14f9157c3520d
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
87 if (ret == NULL)
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 return ret;
95 /* The comparison function. */
97 attribute_hidden int
98 splay_compare (splay_tree_key x, splay_tree_key y)
100 if (x->host_start == x->host_end
101 && y->host_start == y->host_end)
102 return 0;
103 if (x->host_end <= y->host_start)
104 return -1;
105 if (x->host_start >= y->host_end)
106 return 1;
107 return 0;
110 #include "splay-tree.h"
112 attribute_hidden void
113 gomp_init_targets_once (void)
115 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
118 attribute_hidden int
119 gomp_get_num_devices (void)
121 gomp_init_targets_once ();
122 return num_devices_openmp;
125 static struct gomp_device_descr *
126 resolve_device (int device_id)
128 if (device_id == GOMP_DEVICE_ICV)
130 struct gomp_task_icv *icv = gomp_icv (false);
131 device_id = icv->default_device_var;
134 if (device_id < 0 || device_id >= gomp_get_num_devices ())
135 return NULL;
137 gomp_mutex_lock (&devices[device_id].lock);
138 if (!devices[device_id].is_initialized)
139 gomp_init_device (&devices[device_id]);
140 gomp_mutex_unlock (&devices[device_id].lock);
142 return &devices[device_id];
146 static inline splay_tree_key
147 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
149 if (key->host_start != key->host_end)
150 return splay_tree_lookup (mem_map, key);
152 key->host_end++;
153 splay_tree_key n = splay_tree_lookup (mem_map, key);
154 key->host_end--;
155 if (n)
156 return n;
157 key->host_start--;
158 n = splay_tree_lookup (mem_map, key);
159 key->host_start++;
160 if (n)
161 return n;
162 return splay_tree_lookup (mem_map, key);
165 /* Handle the case where gomp_map_lookup found oldn for newn.
166 Helper function of gomp_map_vars. */
168 static inline void
169 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
170 splay_tree_key newn, struct target_var_desc *tgt_var,
171 unsigned char kind)
173 tgt_var->key = oldn;
174 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
175 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
176 tgt_var->offset = newn->host_start - oldn->host_start;
177 tgt_var->length = newn->host_end - newn->host_start;
179 if ((kind & GOMP_MAP_FLAG_FORCE)
180 || oldn->host_start > newn->host_start
181 || oldn->host_end < newn->host_end)
183 gomp_mutex_unlock (&devicep->lock);
184 gomp_fatal ("Trying to map into device [%p..%p) object when "
185 "[%p..%p) is already mapped",
186 (void *) newn->host_start, (void *) newn->host_end,
187 (void *) oldn->host_start, (void *) oldn->host_end);
190 if (GOMP_MAP_ALWAYS_TO_P (kind))
191 devicep->host2dev_func (devicep->target_id,
192 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
193 + newn->host_start - oldn->host_start),
194 (void *) newn->host_start,
195 newn->host_end - newn->host_start);
196 if (oldn->refcount != REFCOUNT_INFINITY)
197 oldn->refcount++;
200 static int
201 get_kind (bool short_mapkind, void *kinds, int idx)
203 return short_mapkind ? ((unsigned short *) kinds)[idx]
204 : ((unsigned char *) kinds)[idx];
207 static void
208 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
209 uintptr_t target_offset, uintptr_t bias)
211 struct gomp_device_descr *devicep = tgt->device_descr;
212 struct splay_tree_s *mem_map = &devicep->mem_map;
213 struct splay_tree_key_s cur_node;
215 cur_node.host_start = host_ptr;
216 if (cur_node.host_start == (uintptr_t) NULL)
218 cur_node.tgt_offset = (uintptr_t) NULL;
219 /* FIXME: see comment about coalescing host/dev transfers below. */
220 devicep->host2dev_func (devicep->target_id,
221 (void *) (tgt->tgt_start + target_offset),
222 (void *) &cur_node.tgt_offset,
223 sizeof (void *));
224 return;
226 /* Add bias to the pointer value. */
227 cur_node.host_start += bias;
228 cur_node.host_end = cur_node.host_start;
229 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
230 if (n == NULL)
232 gomp_mutex_unlock (&devicep->lock);
233 gomp_fatal ("Pointer target of array section wasn't mapped");
235 cur_node.host_start -= n->host_start;
236 cur_node.tgt_offset
237 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
238 /* At this point tgt_offset is target address of the
239 array section. Now subtract bias to get what we want
240 to initialize the pointer with. */
241 cur_node.tgt_offset -= bias;
242 /* FIXME: see comment about coalescing host/dev transfers below. */
243 devicep->host2dev_func (devicep->target_id,
244 (void *) (tgt->tgt_start + target_offset),
245 (void *) &cur_node.tgt_offset,
246 sizeof (void *));
249 static void
250 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
251 size_t first, size_t i, void **hostaddrs,
252 size_t *sizes, void *kinds)
254 struct gomp_device_descr *devicep = tgt->device_descr;
255 struct splay_tree_s *mem_map = &devicep->mem_map;
256 struct splay_tree_key_s cur_node;
257 int kind;
258 const bool short_mapkind = true;
259 const int typemask = short_mapkind ? 0xff : 0x7;
261 cur_node.host_start = (uintptr_t) hostaddrs[i];
262 cur_node.host_end = cur_node.host_start + sizes[i];
263 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
264 kind = get_kind (short_mapkind, kinds, i);
265 if (n2
266 && n2->tgt == n->tgt
267 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
269 gomp_map_vars_existing (devicep, n2, &cur_node,
270 &tgt->list[i], kind & typemask);
271 return;
273 if (sizes[i] == 0)
275 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
277 cur_node.host_start--;
278 n2 = splay_tree_lookup (mem_map, &cur_node);
279 cur_node.host_start++;
280 if (n2
281 && n2->tgt == n->tgt
282 && n2->host_start - n->host_start
283 == n2->tgt_offset - n->tgt_offset)
285 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
286 kind & typemask);
287 return;
290 cur_node.host_end++;
291 n2 = splay_tree_lookup (mem_map, &cur_node);
292 cur_node.host_end--;
293 if (n2
294 && n2->tgt == n->tgt
295 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
297 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
298 kind & typemask);
299 return;
302 gomp_mutex_unlock (&devicep->lock);
303 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
304 "other mapped elements from the same structure weren't mapped "
305 "together with it", (void *) cur_node.host_start,
306 (void *) cur_node.host_end);
309 attribute_hidden struct target_mem_desc *
310 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
311 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
312 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
314 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
315 bool has_firstprivate = false;
316 const int rshift = short_mapkind ? 8 : 3;
317 const int typemask = short_mapkind ? 0xff : 0x7;
318 struct splay_tree_s *mem_map = &devicep->mem_map;
319 struct splay_tree_key_s cur_node;
320 struct target_mem_desc *tgt
321 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
322 tgt->list_count = mapnum;
323 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
324 tgt->device_descr = devicep;
326 if (mapnum == 0)
328 tgt->tgt_start = 0;
329 tgt->tgt_end = 0;
330 return tgt;
333 tgt_align = sizeof (void *);
334 tgt_size = 0;
335 if (pragma_kind == GOMP_MAP_VARS_TARGET)
337 size_t align = 4 * sizeof (void *);
338 tgt_align = align;
339 tgt_size = mapnum * sizeof (void *);
342 gomp_mutex_lock (&devicep->lock);
344 for (i = 0; i < mapnum; i++)
346 int kind = get_kind (short_mapkind, kinds, i);
347 if (hostaddrs[i] == NULL
348 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
350 tgt->list[i].key = NULL;
351 tgt->list[i].offset = ~(uintptr_t) 0;
352 continue;
354 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
356 cur_node.host_start = (uintptr_t) hostaddrs[i];
357 cur_node.host_end = cur_node.host_start;
358 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
359 if (n == NULL)
361 gomp_mutex_unlock (&devicep->lock);
362 gomp_fatal ("use_device_ptr pointer wasn't mapped");
364 cur_node.host_start -= n->host_start;
365 hostaddrs[i]
366 = (void *) (n->tgt->tgt_start + n->tgt_offset
367 + cur_node.host_start);
368 tgt->list[i].key = NULL;
369 tgt->list[i].offset = ~(uintptr_t) 0;
370 continue;
372 else if ((kind & typemask) == GOMP_MAP_STRUCT)
374 size_t first = i + 1;
375 size_t last = i + sizes[i];
376 cur_node.host_start = (uintptr_t) hostaddrs[i];
377 cur_node.host_end = (uintptr_t) hostaddrs[last]
378 + sizes[last];
379 tgt->list[i].key = NULL;
380 tgt->list[i].offset = ~(uintptr_t) 2;
381 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
382 if (n == NULL)
384 size_t align = (size_t) 1 << (kind >> rshift);
385 if (tgt_align < align)
386 tgt_align = align;
387 tgt_size -= (uintptr_t) hostaddrs[first]
388 - (uintptr_t) hostaddrs[i];
389 tgt_size = (tgt_size + align - 1) & ~(align - 1);
390 tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
391 not_found_cnt += last - i;
392 for (i = first; i <= last; i++)
393 tgt->list[i].key = NULL;
394 i--;
395 continue;
397 for (i = first; i <= last; i++)
398 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
399 sizes, kinds);
400 i--;
401 continue;
403 cur_node.host_start = (uintptr_t) hostaddrs[i];
404 if (!GOMP_MAP_POINTER_P (kind & typemask))
405 cur_node.host_end = cur_node.host_start + sizes[i];
406 else
407 cur_node.host_end = cur_node.host_start + sizeof (void *);
408 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
410 tgt->list[i].key = NULL;
412 size_t align = (size_t) 1 << (kind >> rshift);
413 if (tgt_align < align)
414 tgt_align = align;
415 tgt_size = (tgt_size + align - 1) & ~(align - 1);
416 tgt_size += cur_node.host_end - cur_node.host_start;
417 has_firstprivate = true;
418 continue;
420 splay_tree_key n;
421 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
423 n = gomp_map_lookup (mem_map, &cur_node);
424 if (!n)
426 tgt->list[i].key = NULL;
427 tgt->list[i].offset = ~(uintptr_t) 1;
428 continue;
431 else
432 n = splay_tree_lookup (mem_map, &cur_node);
433 if (n)
434 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
435 kind & typemask);
436 else
438 tgt->list[i].key = NULL;
440 size_t align = (size_t) 1 << (kind >> rshift);
441 not_found_cnt++;
442 if (tgt_align < align)
443 tgt_align = align;
444 tgt_size = (tgt_size + align - 1) & ~(align - 1);
445 tgt_size += cur_node.host_end - cur_node.host_start;
446 if ((kind & typemask) == GOMP_MAP_TO_PSET)
448 size_t j;
449 for (j = i + 1; j < mapnum; j++)
450 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
451 & typemask))
452 break;
453 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
454 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
455 > cur_node.host_end))
456 break;
457 else
459 tgt->list[j].key = NULL;
460 i++;
466 if (devaddrs)
468 if (mapnum != 1)
470 gomp_mutex_unlock (&devicep->lock);
471 gomp_fatal ("unexpected aggregation");
473 tgt->to_free = devaddrs[0];
474 tgt->tgt_start = (uintptr_t) tgt->to_free;
475 tgt->tgt_end = tgt->tgt_start + sizes[0];
477 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
479 /* Allocate tgt_align aligned tgt_size block of memory. */
480 /* FIXME: Perhaps change interface to allocate properly aligned
481 memory. */
482 tgt->to_free = devicep->alloc_func (devicep->target_id,
483 tgt_size + tgt_align - 1);
484 tgt->tgt_start = (uintptr_t) tgt->to_free;
485 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
486 tgt->tgt_end = tgt->tgt_start + tgt_size;
488 else
490 tgt->to_free = NULL;
491 tgt->tgt_start = 0;
492 tgt->tgt_end = 0;
495 tgt_size = 0;
496 if (pragma_kind == GOMP_MAP_VARS_TARGET)
497 tgt_size = mapnum * sizeof (void *);
499 tgt->array = NULL;
500 if (not_found_cnt || has_firstprivate)
502 if (not_found_cnt)
503 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
504 splay_tree_node array = tgt->array;
505 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
506 uintptr_t field_tgt_base = 0;
508 for (i = 0; i < mapnum; i++)
509 if (tgt->list[i].key == NULL)
511 int kind = get_kind (short_mapkind, kinds, i);
512 if (hostaddrs[i] == NULL)
513 continue;
514 switch (kind & typemask)
516 size_t align, len, first, last;
517 splay_tree_key n;
518 case GOMP_MAP_FIRSTPRIVATE:
519 align = (size_t) 1 << (kind >> rshift);
520 tgt_size = (tgt_size + align - 1) & ~(align - 1);
521 tgt->list[i].offset = tgt_size;
522 len = sizes[i];
523 devicep->host2dev_func (devicep->target_id,
524 (void *) (tgt->tgt_start + tgt_size),
525 (void *) hostaddrs[i], len);
526 tgt_size += len;
527 continue;
528 case GOMP_MAP_FIRSTPRIVATE_INT:
529 case GOMP_MAP_USE_DEVICE_PTR:
530 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
531 continue;
532 case GOMP_MAP_STRUCT:
533 first = i + 1;
534 last = i + sizes[i];
535 cur_node.host_start = (uintptr_t) hostaddrs[i];
536 cur_node.host_end = (uintptr_t) hostaddrs[last]
537 + sizes[last];
538 if (tgt->list[first].key != NULL)
539 continue;
540 n = splay_tree_lookup (mem_map, &cur_node);
541 if (n == NULL)
543 size_t align = (size_t) 1 << (kind >> rshift);
544 tgt_size -= (uintptr_t) hostaddrs[first]
545 - (uintptr_t) hostaddrs[i];
546 tgt_size = (tgt_size + align - 1) & ~(align - 1);
547 tgt_size += (uintptr_t) hostaddrs[first]
548 - (uintptr_t) hostaddrs[i];
549 field_tgt_base = (uintptr_t) hostaddrs[first];
550 field_tgt_offset = tgt_size;
551 field_tgt_clear = last;
552 tgt_size += cur_node.host_end
553 - (uintptr_t) hostaddrs[first];
554 continue;
556 for (i = first; i <= last; i++)
557 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
558 sizes, kinds);
559 i--;
560 continue;
561 default:
562 break;
564 splay_tree_key k = &array->key;
565 k->host_start = (uintptr_t) hostaddrs[i];
566 if (!GOMP_MAP_POINTER_P (kind & typemask))
567 k->host_end = k->host_start + sizes[i];
568 else
569 k->host_end = k->host_start + sizeof (void *);
570 splay_tree_key n = splay_tree_lookup (mem_map, k);
571 if (n)
572 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
573 kind & typemask);
574 else
576 size_t align = (size_t) 1 << (kind >> rshift);
577 tgt->list[i].key = k;
578 k->tgt = tgt;
579 if (field_tgt_clear != ~(size_t) 0)
581 k->tgt_offset = k->host_start - field_tgt_base
582 + field_tgt_offset;
583 if (i == field_tgt_clear)
584 field_tgt_clear = ~(size_t) 0;
586 else
588 tgt_size = (tgt_size + align - 1) & ~(align - 1);
589 k->tgt_offset = tgt_size;
590 tgt_size += k->host_end - k->host_start;
592 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
593 tgt->list[i].always_copy_from
594 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
595 tgt->list[i].offset = 0;
596 tgt->list[i].length = k->host_end - k->host_start;
597 k->refcount = 1;
598 k->async_refcount = 0;
599 tgt->refcount++;
600 array->left = NULL;
601 array->right = NULL;
602 splay_tree_insert (mem_map, array);
603 switch (kind & typemask)
605 case GOMP_MAP_ALLOC:
606 case GOMP_MAP_FROM:
607 case GOMP_MAP_FORCE_ALLOC:
608 case GOMP_MAP_FORCE_FROM:
609 case GOMP_MAP_ALWAYS_FROM:
610 break;
611 case GOMP_MAP_TO:
612 case GOMP_MAP_TOFROM:
613 case GOMP_MAP_FORCE_TO:
614 case GOMP_MAP_FORCE_TOFROM:
615 case GOMP_MAP_ALWAYS_TO:
616 case GOMP_MAP_ALWAYS_TOFROM:
617 /* FIXME: Perhaps add some smarts, like if copying
618 several adjacent fields from host to target, use some
619 host buffer to avoid sending each var individually. */
620 devicep->host2dev_func (devicep->target_id,
621 (void *) (tgt->tgt_start
622 + k->tgt_offset),
623 (void *) k->host_start,
624 k->host_end - k->host_start);
625 break;
626 case GOMP_MAP_POINTER:
627 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
628 k->tgt_offset, sizes[i]);
629 break;
630 case GOMP_MAP_TO_PSET:
631 /* FIXME: see above FIXME comment. */
632 devicep->host2dev_func (devicep->target_id,
633 (void *) (tgt->tgt_start
634 + k->tgt_offset),
635 (void *) k->host_start,
636 k->host_end - k->host_start);
638 for (j = i + 1; j < mapnum; j++)
639 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
641 & typemask))
642 break;
643 else if ((uintptr_t) hostaddrs[j] < k->host_start
644 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
645 > k->host_end))
646 break;
647 else
649 tgt->list[j].key = k;
650 tgt->list[j].copy_from = false;
651 tgt->list[j].always_copy_from = false;
652 if (k->refcount != REFCOUNT_INFINITY)
653 k->refcount++;
654 gomp_map_pointer (tgt,
655 (uintptr_t) *(void **) hostaddrs[j],
656 k->tgt_offset
657 + ((uintptr_t) hostaddrs[j]
658 - k->host_start),
659 sizes[j]);
660 i++;
662 break;
663 case GOMP_MAP_FORCE_PRESENT:
665 /* We already looked up the memory region above and it
666 was missing. */
667 size_t size = k->host_end - k->host_start;
668 gomp_mutex_unlock (&devicep->lock);
669 #ifdef HAVE_INTTYPES_H
670 gomp_fatal ("present clause: !acc_is_present (%p, "
671 "%"PRIu64" (0x%"PRIx64"))",
672 (void *) k->host_start,
673 (uint64_t) size, (uint64_t) size);
674 #else
675 gomp_fatal ("present clause: !acc_is_present (%p, "
676 "%lu (0x%lx))", (void *) k->host_start,
677 (unsigned long) size, (unsigned long) size);
678 #endif
680 break;
681 case GOMP_MAP_FORCE_DEVICEPTR:
682 assert (k->host_end - k->host_start == sizeof (void *));
684 devicep->host2dev_func (devicep->target_id,
685 (void *) (tgt->tgt_start
686 + k->tgt_offset),
687 (void *) k->host_start,
688 sizeof (void *));
689 break;
690 default:
691 gomp_mutex_unlock (&devicep->lock);
692 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
693 kind);
695 array++;
700 if (pragma_kind == GOMP_MAP_VARS_TARGET)
702 for (i = 0; i < mapnum; i++)
704 if (tgt->list[i].key == NULL)
706 if (tgt->list[i].offset == ~(uintptr_t) 0)
707 cur_node.tgt_offset = (uintptr_t) hostaddrs[i];
708 else if (tgt->list[i].offset == ~(uintptr_t) 1)
709 cur_node.tgt_offset = 0;
710 else if (tgt->list[i].offset == ~(uintptr_t) 2)
711 cur_node.tgt_offset = tgt->list[i + 1].key->tgt->tgt_start
712 + tgt->list[i + 1].key->tgt_offset
713 + tgt->list[i + 1].offset
714 + (uintptr_t) hostaddrs[i]
715 - (uintptr_t) hostaddrs[i + 1];
716 else
717 cur_node.tgt_offset = tgt->tgt_start
718 + tgt->list[i].offset;
720 else
721 cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start
722 + tgt->list[i].key->tgt_offset
723 + tgt->list[i].offset;
724 /* FIXME: see above FIXME comment. */
725 devicep->host2dev_func (devicep->target_id,
726 (void *) (tgt->tgt_start
727 + i * sizeof (void *)),
728 (void *) &cur_node.tgt_offset,
729 sizeof (void *));
733 /* If the variable from "omp target enter data" map-list was already mapped,
734 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
735 gomp_exit_data. */
736 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
738 free (tgt);
739 tgt = NULL;
742 gomp_mutex_unlock (&devicep->lock);
743 return tgt;
746 static void
747 gomp_unmap_tgt (struct target_mem_desc *tgt)
749 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
750 if (tgt->tgt_end)
751 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
753 free (tgt->array);
754 free (tgt);
757 /* Decrease the refcount for a set of mapped variables, and queue asychronous
758 copies from the device back to the host after any work that has been issued.
759 Because the regions are still "live", increment an asynchronous reference
760 count to indicate that they should not be unmapped from host-side data
761 structures until the asynchronous copy has completed. */
763 attribute_hidden void
764 gomp_copy_from_async (struct target_mem_desc *tgt)
766 struct gomp_device_descr *devicep = tgt->device_descr;
767 size_t i;
769 gomp_mutex_lock (&devicep->lock);
771 for (i = 0; i < tgt->list_count; i++)
772 if (tgt->list[i].key == NULL)
774 else if (tgt->list[i].key->refcount > 1)
776 tgt->list[i].key->refcount--;
777 tgt->list[i].key->async_refcount++;
779 else
781 splay_tree_key k = tgt->list[i].key;
782 if (tgt->list[i].copy_from)
783 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
784 (void *) (k->tgt->tgt_start + k->tgt_offset),
785 k->host_end - k->host_start);
788 gomp_mutex_unlock (&devicep->lock);
791 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
792 variables back from device to host: if it is false, it is assumed that this
793 has been done already, i.e. by gomp_copy_from_async above. */
795 attribute_hidden void
796 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
798 struct gomp_device_descr *devicep = tgt->device_descr;
800 if (tgt->list_count == 0)
802 free (tgt);
803 return;
806 gomp_mutex_lock (&devicep->lock);
808 size_t i;
809 for (i = 0; i < tgt->list_count; i++)
811 splay_tree_key k = tgt->list[i].key;
812 if (k == NULL)
813 continue;
815 bool do_unmap = false;
816 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
817 k->refcount--;
818 else if (k->refcount == 1)
820 if (k->async_refcount > 0)
821 k->async_refcount--;
822 else
824 k->refcount--;
825 do_unmap = true;
829 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
830 || tgt->list[i].always_copy_from)
831 devicep->dev2host_func (devicep->target_id,
832 (void *) (k->host_start + tgt->list[i].offset),
833 (void *) (k->tgt->tgt_start + k->tgt_offset
834 + tgt->list[i].offset),
835 tgt->list[i].length);
836 if (do_unmap)
838 splay_tree_remove (&devicep->mem_map, k);
839 if (k->tgt->refcount > 1)
840 k->tgt->refcount--;
841 else
842 gomp_unmap_tgt (k->tgt);
846 if (tgt->refcount > 1)
847 tgt->refcount--;
848 else
849 gomp_unmap_tgt (tgt);
851 gomp_mutex_unlock (&devicep->lock);
854 static void
855 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
856 size_t *sizes, void *kinds, bool short_mapkind)
858 size_t i;
859 struct splay_tree_key_s cur_node;
860 const int typemask = short_mapkind ? 0xff : 0x7;
862 if (!devicep)
863 return;
865 if (mapnum == 0)
866 return;
868 gomp_mutex_lock (&devicep->lock);
869 for (i = 0; i < mapnum; i++)
870 if (sizes[i])
872 cur_node.host_start = (uintptr_t) hostaddrs[i];
873 cur_node.host_end = cur_node.host_start + sizes[i];
874 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
875 if (n)
877 int kind = get_kind (short_mapkind, kinds, i);
878 if (n->host_start > cur_node.host_start
879 || n->host_end < cur_node.host_end)
881 gomp_mutex_unlock (&devicep->lock);
882 gomp_fatal ("Trying to update [%p..%p) object when "
883 "only [%p..%p) is mapped",
884 (void *) cur_node.host_start,
885 (void *) cur_node.host_end,
886 (void *) n->host_start,
887 (void *) n->host_end);
889 if (GOMP_MAP_COPY_TO_P (kind & typemask))
890 devicep->host2dev_func (devicep->target_id,
891 (void *) (n->tgt->tgt_start
892 + n->tgt_offset
893 + cur_node.host_start
894 - n->host_start),
895 (void *) cur_node.host_start,
896 cur_node.host_end - cur_node.host_start);
897 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
898 devicep->dev2host_func (devicep->target_id,
899 (void *) cur_node.host_start,
900 (void *) (n->tgt->tgt_start
901 + n->tgt_offset
902 + cur_node.host_start
903 - n->host_start),
904 cur_node.host_end - cur_node.host_start);
907 gomp_mutex_unlock (&devicep->lock);
910 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
911 And insert to splay tree the mapping between addresses from HOST_TABLE and
912 from loaded target image. We rely in the host and device compiler
913 emitting variable and functions in the same order. */
915 static void
916 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
917 const void *host_table, const void *target_data,
918 bool is_register_lock)
920 void **host_func_table = ((void ***) host_table)[0];
921 void **host_funcs_end = ((void ***) host_table)[1];
922 void **host_var_table = ((void ***) host_table)[2];
923 void **host_vars_end = ((void ***) host_table)[3];
925 /* The func table contains only addresses, the var table contains addresses
926 and corresponding sizes. */
927 int num_funcs = host_funcs_end - host_func_table;
928 int num_vars = (host_vars_end - host_var_table) / 2;
930 /* Load image to device and get target addresses for the image. */
931 struct addr_pair *target_table = NULL;
932 int i, num_target_entries;
934 num_target_entries
935 = devicep->load_image_func (devicep->target_id, version,
936 target_data, &target_table);
938 if (num_target_entries != num_funcs + num_vars)
940 gomp_mutex_unlock (&devicep->lock);
941 if (is_register_lock)
942 gomp_mutex_unlock (&register_lock);
943 gomp_fatal ("Cannot map target functions or variables"
944 " (expected %u, have %u)", num_funcs + num_vars,
945 num_target_entries);
948 /* Insert host-target address mapping into splay tree. */
949 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
950 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
951 tgt->refcount = REFCOUNT_INFINITY;
952 tgt->tgt_start = 0;
953 tgt->tgt_end = 0;
954 tgt->to_free = NULL;
955 tgt->prev = NULL;
956 tgt->list_count = 0;
957 tgt->device_descr = devicep;
958 splay_tree_node array = tgt->array;
960 for (i = 0; i < num_funcs; i++)
962 splay_tree_key k = &array->key;
963 k->host_start = (uintptr_t) host_func_table[i];
964 k->host_end = k->host_start + 1;
965 k->tgt = tgt;
966 k->tgt_offset = target_table[i].start;
967 k->refcount = REFCOUNT_INFINITY;
968 k->async_refcount = 0;
969 array->left = NULL;
970 array->right = NULL;
971 splay_tree_insert (&devicep->mem_map, array);
972 array++;
975 for (i = 0; i < num_vars; i++)
977 struct addr_pair *target_var = &target_table[num_funcs + i];
978 if (target_var->end - target_var->start
979 != (uintptr_t) host_var_table[i * 2 + 1])
981 gomp_mutex_unlock (&devicep->lock);
982 if (is_register_lock)
983 gomp_mutex_unlock (&register_lock);
984 gomp_fatal ("Can't map target variables (size mismatch)");
987 splay_tree_key k = &array->key;
988 k->host_start = (uintptr_t) host_var_table[i * 2];
989 k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
990 k->tgt = tgt;
991 k->tgt_offset = target_var->start;
992 k->refcount = REFCOUNT_INFINITY;
993 k->async_refcount = 0;
994 array->left = NULL;
995 array->right = NULL;
996 splay_tree_insert (&devicep->mem_map, array);
997 array++;
1000 free (target_table);
1003 /* Unload the mappings described by target_data from device DEVICE_P.
1004 The device must be locked. */
1006 static void
1007 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1008 unsigned version,
1009 const void *host_table, const void *target_data)
1011 void **host_func_table = ((void ***) host_table)[0];
1012 void **host_funcs_end = ((void ***) host_table)[1];
1013 void **host_var_table = ((void ***) host_table)[2];
1014 void **host_vars_end = ((void ***) host_table)[3];
1016 /* The func table contains only addresses, the var table contains addresses
1017 and corresponding sizes. */
1018 int num_funcs = host_funcs_end - host_func_table;
1019 int num_vars = (host_vars_end - host_var_table) / 2;
1021 unsigned j;
1022 struct splay_tree_key_s k;
1023 splay_tree_key node = NULL;
1025 /* Find mapping at start of node array */
1026 if (num_funcs || num_vars)
1028 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1029 : (uintptr_t) host_var_table[0]);
1030 k.host_end = k.host_start + 1;
1031 node = splay_tree_lookup (&devicep->mem_map, &k);
1034 devicep->unload_image_func (devicep->target_id, version, target_data);
1036 /* Remove mappings from splay tree. */
1037 for (j = 0; j < num_funcs; j++)
1039 k.host_start = (uintptr_t) host_func_table[j];
1040 k.host_end = k.host_start + 1;
1041 splay_tree_remove (&devicep->mem_map, &k);
1044 for (j = 0; j < num_vars; j++)
1046 k.host_start = (uintptr_t) host_var_table[j * 2];
1047 k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
1048 splay_tree_remove (&devicep->mem_map, &k);
1051 if (node)
1053 free (node->tgt);
1054 free (node);
1058 /* This function should be called from every offload image while loading.
1059 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1060 the target, and TARGET_DATA needed by target plugin. */
1062 void
1063 GOMP_offload_register_ver (unsigned version, const void *host_table,
1064 int target_type, const void *target_data)
1066 int i;
1068 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1069 gomp_fatal ("Library too old for offload (version %u < %u)",
1070 GOMP_VERSION, GOMP_VERSION_LIB (version));
1072 gomp_mutex_lock (&register_lock);
1074 /* Load image to all initialized devices. */
1075 for (i = 0; i < num_devices; i++)
1077 struct gomp_device_descr *devicep = &devices[i];
1078 gomp_mutex_lock (&devicep->lock);
1079 if (devicep->type == target_type && devicep->is_initialized)
1080 gomp_load_image_to_device (devicep, version,
1081 host_table, target_data, true);
1082 gomp_mutex_unlock (&devicep->lock);
1085 /* Insert image to array of pending images. */
1086 offload_images
1087 = gomp_realloc_unlock (offload_images,
1088 (num_offload_images + 1)
1089 * sizeof (struct offload_image_descr));
1090 offload_images[num_offload_images].version = version;
1091 offload_images[num_offload_images].type = target_type;
1092 offload_images[num_offload_images].host_table = host_table;
1093 offload_images[num_offload_images].target_data = target_data;
1095 num_offload_images++;
1096 gomp_mutex_unlock (&register_lock);
1099 void
1100 GOMP_offload_register (const void *host_table, int target_type,
1101 const void *target_data)
1103 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1106 /* This function should be called from every offload image while unloading.
1107 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1108 the target, and TARGET_DATA needed by target plugin. */
1110 void
1111 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1112 int target_type, const void *target_data)
1114 int i;
1116 gomp_mutex_lock (&register_lock);
1118 /* Unload image from all initialized devices. */
1119 for (i = 0; i < num_devices; i++)
1121 struct gomp_device_descr *devicep = &devices[i];
1122 gomp_mutex_lock (&devicep->lock);
1123 if (devicep->type == target_type && devicep->is_initialized)
1124 gomp_unload_image_from_device (devicep, version,
1125 host_table, target_data);
1126 gomp_mutex_unlock (&devicep->lock);
1129 /* Remove image from array of pending images. */
1130 for (i = 0; i < num_offload_images; i++)
1131 if (offload_images[i].target_data == target_data)
1133 offload_images[i] = offload_images[--num_offload_images];
1134 break;
1137 gomp_mutex_unlock (&register_lock);
1140 void
1141 GOMP_offload_unregister (const void *host_table, int target_type,
1142 const void *target_data)
1144 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1147 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1148 must be locked on entry, and remains locked on return. */
1150 attribute_hidden void
1151 gomp_init_device (struct gomp_device_descr *devicep)
1153 int i;
1154 devicep->init_device_func (devicep->target_id);
1156 /* Load to device all images registered by the moment. */
1157 for (i = 0; i < num_offload_images; i++)
1159 struct offload_image_descr *image = &offload_images[i];
1160 if (image->type == devicep->type)
1161 gomp_load_image_to_device (devicep, image->version,
1162 image->host_table, image->target_data,
1163 false);
1166 devicep->is_initialized = true;
1169 attribute_hidden void
1170 gomp_unload_device (struct gomp_device_descr *devicep)
1172 if (devicep->is_initialized)
1174 unsigned i;
1176 /* Unload from device all images registered at the moment. */
1177 for (i = 0; i < num_offload_images; i++)
1179 struct offload_image_descr *image = &offload_images[i];
1180 if (image->type == devicep->type)
1181 gomp_unload_image_from_device (devicep, image->version,
1182 image->host_table,
1183 image->target_data);
1188 /* Free address mapping tables. MM must be locked on entry, and remains locked
1189 on return. */
1191 attribute_hidden void
1192 gomp_free_memmap (struct splay_tree_s *mem_map)
1194 while (mem_map->root)
1196 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1198 splay_tree_remove (mem_map, &mem_map->root->key);
1199 free (tgt->array);
1200 free (tgt);
1204 /* This function de-initializes the target device, specified by DEVICEP.
1205 DEVICEP must be locked on entry, and remains locked on return. */
1207 attribute_hidden void
1208 gomp_fini_device (struct gomp_device_descr *devicep)
1210 if (devicep->is_initialized)
1211 devicep->fini_device_func (devicep->target_id);
1213 devicep->is_initialized = false;
1216 /* Host fallback for GOMP_target{,_41} routines. */
1218 static void
1219 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1221 struct gomp_thread old_thr, *thr = gomp_thread ();
1222 old_thr = *thr;
1223 memset (thr, '\0', sizeof (*thr));
1224 if (gomp_places_list)
1226 thr->place = old_thr.place;
1227 thr->ts.place_partition_len = gomp_places_list_len;
1229 fn (hostaddrs);
1230 gomp_free_thread (thr);
1231 *thr = old_thr;
1234 /* Helper function of GOMP_target{,_41} routines. */
1236 static void *
1237 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1238 void (*host_fn) (void *))
1240 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1241 return (void *) host_fn;
1242 else
1244 gomp_mutex_lock (&devicep->lock);
1245 struct splay_tree_key_s k;
1246 k.host_start = (uintptr_t) host_fn;
1247 k.host_end = k.host_start + 1;
1248 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1249 gomp_mutex_unlock (&devicep->lock);
1250 if (tgt_fn == NULL)
1251 gomp_fatal ("Target function wasn't mapped");
1253 return (void *) tgt_fn->tgt_offset;
1257 /* Called when encountering a target directive. If DEVICE
1258 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1259 GOMP_DEVICE_HOST_FALLBACK (or any value
1260 larger than last available hw device), use host fallback.
1261 FN is address of host code, UNUSED is part of the current ABI, but
1262 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1263 with MAPNUM entries, with addresses of the host objects,
1264 sizes of the host objects (resp. for pointer kind pointer bias
1265 and assumed sizeof (void *) size) and kinds. */
1267 void
1268 GOMP_target (int device, void (*fn) (void *), const void *unused,
1269 size_t mapnum, void **hostaddrs, size_t *sizes,
1270 unsigned char *kinds)
1272 struct gomp_device_descr *devicep = resolve_device (device);
1274 if (devicep == NULL
1275 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1276 return gomp_target_fallback (fn, hostaddrs);
1278 void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
1280 struct target_mem_desc *tgt_vars
1281 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1282 GOMP_MAP_VARS_TARGET);
1283 struct gomp_thread old_thr, *thr = gomp_thread ();
1284 old_thr = *thr;
1285 memset (thr, '\0', sizeof (*thr));
1286 if (gomp_places_list)
1288 thr->place = old_thr.place;
1289 thr->ts.place_partition_len = gomp_places_list_len;
1291 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
1292 gomp_free_thread (thr);
1293 *thr = old_thr;
1294 gomp_unmap_vars (tgt_vars, true);
1297 void
1298 GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
1299 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1300 unsigned int flags, void **depend)
1302 struct gomp_device_descr *devicep = resolve_device (device);
1304 /* If there are depend clauses, but nowait is not present,
1305 block the parent task until the dependencies are resolved
1306 and then just continue with the rest of the function as if it
1307 is a merged task. */
1308 if (depend != NULL)
1310 struct gomp_thread *thr = gomp_thread ();
1311 if (thr->task && thr->task->depend_hash)
1312 gomp_task_maybe_wait_for_dependencies (depend);
1315 if (devicep == NULL
1316 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1318 size_t i, tgt_align = 0, tgt_size = 0;
1319 char *tgt = NULL;
1320 for (i = 0; i < mapnum; i++)
1321 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1323 size_t align = (size_t) 1 << (kinds[i] >> 8);
1324 if (tgt_align < align)
1325 tgt_align = align;
1326 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1327 tgt_size += sizes[i];
1329 if (tgt_align)
1331 tgt = gomp_alloca (tgt_size + tgt_align - 1);
1332 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1333 if (al)
1334 tgt += tgt_align - al;
1335 tgt_size = 0;
1336 for (i = 0; i < mapnum; i++)
1337 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1339 size_t align = (size_t) 1 << (kinds[i] >> 8);
1340 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1341 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1342 hostaddrs[i] = tgt + tgt_size;
1343 tgt_size = tgt_size + sizes[i];
1346 gomp_target_fallback (fn, hostaddrs);
1347 return;
1350 void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
1352 struct target_mem_desc *tgt_vars
1353 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1354 GOMP_MAP_VARS_TARGET);
1355 struct gomp_thread old_thr, *thr = gomp_thread ();
1356 old_thr = *thr;
1357 memset (thr, '\0', sizeof (*thr));
1358 if (gomp_places_list)
1360 thr->place = old_thr.place;
1361 thr->ts.place_partition_len = gomp_places_list_len;
1363 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
1364 gomp_free_thread (thr);
1365 *thr = old_thr;
1366 gomp_unmap_vars (tgt_vars, true);
1369 /* Host fallback for GOMP_target_data{,_41} routines. */
1371 static void
1372 gomp_target_data_fallback (void)
1374 struct gomp_task_icv *icv = gomp_icv (false);
1375 if (icv->target_data)
1377 /* Even when doing a host fallback, if there are any active
1378 #pragma omp target data constructs, need to remember the
1379 new #pragma omp target data, otherwise GOMP_target_end_data
1380 would get out of sync. */
1381 struct target_mem_desc *tgt
1382 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1383 GOMP_MAP_VARS_DATA);
1384 tgt->prev = icv->target_data;
1385 icv->target_data = tgt;
1389 void
1390 GOMP_target_data (int device, const void *unused, size_t mapnum,
1391 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1393 struct gomp_device_descr *devicep = resolve_device (device);
1395 if (devicep == NULL
1396 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1397 return gomp_target_data_fallback ();
1399 struct target_mem_desc *tgt
1400 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1401 GOMP_MAP_VARS_DATA);
1402 struct gomp_task_icv *icv = gomp_icv (true);
1403 tgt->prev = icv->target_data;
1404 icv->target_data = tgt;
1407 void
1408 GOMP_target_data_41 (int device, size_t mapnum, void **hostaddrs, size_t *sizes,
1409 unsigned short *kinds)
1411 struct gomp_device_descr *devicep = resolve_device (device);
1413 if (devicep == NULL
1414 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1415 return gomp_target_data_fallback ();
1417 struct target_mem_desc *tgt
1418 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1419 GOMP_MAP_VARS_DATA);
1420 struct gomp_task_icv *icv = gomp_icv (true);
1421 tgt->prev = icv->target_data;
1422 icv->target_data = tgt;
1425 void
1426 GOMP_target_end_data (void)
1428 struct gomp_task_icv *icv = gomp_icv (false);
1429 if (icv->target_data)
1431 struct target_mem_desc *tgt = icv->target_data;
1432 icv->target_data = tgt->prev;
1433 gomp_unmap_vars (tgt, true);
1437 void
1438 GOMP_target_update (int device, const void *unused, size_t mapnum,
1439 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1441 struct gomp_device_descr *devicep = resolve_device (device);
1443 if (devicep == NULL
1444 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1445 return;
1447 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1450 void
1451 GOMP_target_update_41 (int device, size_t mapnum, void **hostaddrs,
1452 size_t *sizes, unsigned short *kinds,
1453 unsigned int flags, void **depend)
1455 struct gomp_device_descr *devicep = resolve_device (device);
1457 /* If there are depend clauses, but nowait is not present,
1458 block the parent task until the dependencies are resolved
1459 and then just continue with the rest of the function as if it
1460 is a merged task. Until we are able to schedule task during
1461 variable mapping or unmapping, ignore nowait if depend clauses
1462 are not present. */
1463 if (depend != NULL)
1465 struct gomp_thread *thr = gomp_thread ();
1466 if (thr->task && thr->task->depend_hash)
1468 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1469 && thr->ts.team
1470 && !thr->task->final_task)
1472 gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1473 mapnum, hostaddrs, sizes, kinds,
1474 flags | GOMP_TARGET_FLAG_UPDATE,
1475 depend);
1476 return;
1479 struct gomp_team *team = thr->ts.team;
1480 /* If parallel or taskgroup has been cancelled, don't start new
1481 tasks. */
1482 if (team
1483 && (gomp_team_barrier_cancelled (&team->barrier)
1484 || (thr->task->taskgroup
1485 && thr->task->taskgroup->cancelled)))
1486 return;
1488 gomp_task_maybe_wait_for_dependencies (depend);
1492 if (devicep == NULL
1493 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1494 return;
1496 struct gomp_thread *thr = gomp_thread ();
1497 struct gomp_team *team = thr->ts.team;
1498 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1499 if (team
1500 && (gomp_team_barrier_cancelled (&team->barrier)
1501 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1502 return;
1504 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1507 static void
1508 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1509 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1511 const int typemask = 0xff;
1512 size_t i;
1513 gomp_mutex_lock (&devicep->lock);
1514 for (i = 0; i < mapnum; i++)
1516 struct splay_tree_key_s cur_node;
1517 unsigned char kind = kinds[i] & typemask;
1518 switch (kind)
1520 case GOMP_MAP_FROM:
1521 case GOMP_MAP_ALWAYS_FROM:
1522 case GOMP_MAP_DELETE:
1523 case GOMP_MAP_RELEASE:
1524 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1525 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1526 cur_node.host_start = (uintptr_t) hostaddrs[i];
1527 cur_node.host_end = cur_node.host_start + sizes[i];
1528 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1529 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1530 ? gomp_map_lookup (&devicep->mem_map, &cur_node)
1531 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1532 if (!k)
1533 continue;
1535 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1536 k->refcount--;
1537 if ((kind == GOMP_MAP_DELETE
1538 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1539 && k->refcount != REFCOUNT_INFINITY)
1540 k->refcount = 0;
1542 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1543 || kind == GOMP_MAP_ALWAYS_FROM)
1544 devicep->dev2host_func (devicep->target_id,
1545 (void *) cur_node.host_start,
1546 (void *) (k->tgt->tgt_start + k->tgt_offset
1547 + cur_node.host_start
1548 - k->host_start),
1549 cur_node.host_end - cur_node.host_start);
1550 if (k->refcount == 0)
1552 splay_tree_remove (&devicep->mem_map, k);
1553 if (k->tgt->refcount > 1)
1554 k->tgt->refcount--;
1555 else
1556 gomp_unmap_tgt (k->tgt);
1559 break;
1560 default:
1561 gomp_mutex_unlock (&devicep->lock);
1562 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1563 kind);
1567 gomp_mutex_unlock (&devicep->lock);
1570 void
1571 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1572 size_t *sizes, unsigned short *kinds,
1573 unsigned int flags, void **depend)
1575 struct gomp_device_descr *devicep = resolve_device (device);
1577 /* If there are depend clauses, but nowait is not present,
1578 block the parent task until the dependencies are resolved
1579 and then just continue with the rest of the function as if it
1580 is a merged task. Until we are able to schedule task during
1581 variable mapping or unmapping, ignore nowait if depend clauses
1582 are not present. */
1583 if (depend != NULL)
1585 struct gomp_thread *thr = gomp_thread ();
1586 if (thr->task && thr->task->depend_hash)
1588 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1589 && thr->ts.team
1590 && !thr->task->final_task)
1592 gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1593 mapnum, hostaddrs, sizes, kinds,
1594 flags, depend);
1595 return;
1598 struct gomp_team *team = thr->ts.team;
1599 /* If parallel or taskgroup has been cancelled, don't start new
1600 tasks. */
1601 if (team
1602 && (gomp_team_barrier_cancelled (&team->barrier)
1603 || (thr->task->taskgroup
1604 && thr->task->taskgroup->cancelled)))
1605 return;
1607 gomp_task_maybe_wait_for_dependencies (depend);
1611 if (devicep == NULL
1612 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1613 return;
1615 struct gomp_thread *thr = gomp_thread ();
1616 struct gomp_team *team = thr->ts.team;
1617 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1618 if (team
1619 && (gomp_team_barrier_cancelled (&team->barrier)
1620 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1621 return;
1623 size_t i;
1624 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1625 for (i = 0; i < mapnum; i++)
1626 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1628 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
1629 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1630 i += sizes[i];
1632 else
1633 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
1634 true, GOMP_MAP_VARS_ENTER_DATA);
1635 else
1636 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
1639 void
1640 gomp_target_task_fn (void *data)
1642 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
1643 if (ttask->fn != NULL)
1645 /* GOMP_target_41 */
1647 else if (ttask->devicep == NULL
1648 || !(ttask->devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1649 return;
1651 size_t i;
1652 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
1653 gomp_update (ttask->devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1654 ttask->kinds, true);
1655 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1656 for (i = 0; i < ttask->mapnum; i++)
1657 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1659 gomp_map_vars (ttask->devicep, ttask->sizes[i] + 1,
1660 &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
1661 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1662 i += ttask->sizes[i];
1664 else
1665 gomp_map_vars (ttask->devicep, 1, &ttask->hostaddrs[i], NULL,
1666 &ttask->sizes[i], &ttask->kinds[i],
1667 true, GOMP_MAP_VARS_ENTER_DATA);
1668 else
1669 gomp_exit_data (ttask->devicep, ttask->mapnum, ttask->hostaddrs,
1670 ttask->sizes, ttask->kinds);
1673 void
1674 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1676 if (thread_limit)
1678 struct gomp_task_icv *icv = gomp_icv (true);
1679 icv->thread_limit_var
1680 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1682 (void) num_teams;
1685 void *
1686 omp_target_alloc (size_t size, int device_num)
1688 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1689 return malloc (size);
1691 if (device_num < 0)
1692 return NULL;
1694 struct gomp_device_descr *devicep = resolve_device (device_num);
1695 if (devicep == NULL)
1696 return NULL;
1698 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1699 return malloc (size);
1701 gomp_mutex_lock (&devicep->lock);
1702 void *ret = devicep->alloc_func (devicep->target_id, size);
1703 gomp_mutex_unlock (&devicep->lock);
1704 return ret;
1707 void
1708 omp_target_free (void *device_ptr, int device_num)
1710 if (device_ptr == NULL)
1711 return;
1713 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1715 free (device_ptr);
1716 return;
1719 if (device_num < 0)
1720 return;
1722 struct gomp_device_descr *devicep = resolve_device (device_num);
1723 if (devicep == NULL)
1724 return;
1726 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1728 free (device_ptr);
1729 return;
1732 gomp_mutex_lock (&devicep->lock);
1733 devicep->free_func (devicep->target_id, device_ptr);
1734 gomp_mutex_unlock (&devicep->lock);
1738 omp_target_is_present (void *ptr, int device_num)
1740 if (ptr == NULL)
1741 return 1;
1743 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1744 return 1;
1746 if (device_num < 0)
1747 return 0;
1749 struct gomp_device_descr *devicep = resolve_device (device_num);
1750 if (devicep == NULL)
1751 return 0;
1753 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1754 return 1;
1756 gomp_mutex_lock (&devicep->lock);
1757 struct splay_tree_s *mem_map = &devicep->mem_map;
1758 struct splay_tree_key_s cur_node;
1760 cur_node.host_start = (uintptr_t) ptr;
1761 cur_node.host_end = cur_node.host_start;
1762 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1763 int ret = n != NULL;
1764 gomp_mutex_unlock (&devicep->lock);
1765 return ret;
1769 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
1770 size_t src_offset, int dst_device_num, int src_device_num)
1772 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
1774 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
1776 if (dst_device_num < 0)
1777 return EINVAL;
1779 dst_devicep = resolve_device (dst_device_num);
1780 if (dst_devicep == NULL)
1781 return EINVAL;
1783 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1784 dst_devicep = NULL;
1786 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
1788 if (src_device_num < 0)
1789 return EINVAL;
1791 src_devicep = resolve_device (src_device_num);
1792 if (src_devicep == NULL)
1793 return EINVAL;
1795 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1796 src_devicep = NULL;
1798 if (src_devicep == NULL && dst_devicep == NULL)
1800 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
1801 return 0;
1803 if (src_devicep == NULL)
1805 gomp_mutex_lock (&dst_devicep->lock);
1806 dst_devicep->host2dev_func (dst_devicep->target_id,
1807 (char *) dst + dst_offset,
1808 (char *) src + src_offset, length);
1809 gomp_mutex_unlock (&dst_devicep->lock);
1810 return 0;
1812 if (dst_devicep == NULL)
1814 gomp_mutex_lock (&src_devicep->lock);
1815 src_devicep->dev2host_func (src_devicep->target_id,
1816 (char *) dst + dst_offset,
1817 (char *) src + src_offset, length);
1818 gomp_mutex_unlock (&src_devicep->lock);
1819 return 0;
1821 if (src_devicep == dst_devicep)
1823 gomp_mutex_lock (&src_devicep->lock);
1824 src_devicep->dev2dev_func (src_devicep->target_id,
1825 (char *) dst + dst_offset,
1826 (char *) src + src_offset, length);
1827 gomp_mutex_unlock (&src_devicep->lock);
1828 return 0;
1830 return EINVAL;
1833 static int
1834 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
1835 int num_dims, const size_t *volume,
1836 const size_t *dst_offsets,
1837 const size_t *src_offsets,
1838 const size_t *dst_dimensions,
1839 const size_t *src_dimensions,
1840 struct gomp_device_descr *dst_devicep,
1841 struct gomp_device_descr *src_devicep)
1843 size_t dst_slice = element_size;
1844 size_t src_slice = element_size;
1845 size_t j, dst_off, src_off, length;
1846 int i, ret;
1848 if (num_dims == 1)
1850 if (__builtin_mul_overflow (element_size, volume[0], &length)
1851 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
1852 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
1853 return EINVAL;
1854 if (dst_devicep == NULL && src_devicep == NULL)
1855 memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
1856 else if (src_devicep == NULL)
1857 dst_devicep->host2dev_func (dst_devicep->target_id,
1858 (char *) dst + dst_off,
1859 (char *) src + src_off, length);
1860 else if (dst_devicep == NULL)
1861 src_devicep->dev2host_func (src_devicep->target_id,
1862 (char *) dst + dst_off,
1863 (char *) src + src_off, length);
1864 else if (src_devicep == dst_devicep)
1865 src_devicep->dev2dev_func (src_devicep->target_id,
1866 (char *) dst + dst_off,
1867 (char *) src + src_off, length);
1868 else
1869 return EINVAL;
1870 return 0;
1873 /* FIXME: it would be nice to have some plugin function to handle
1874 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
1875 be handled in the generic recursion below, and for host-host it
1876 should be used even for any num_dims >= 2. */
1878 for (i = 1; i < num_dims; i++)
1879 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
1880 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
1881 return EINVAL;
1882 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
1883 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
1884 return EINVAL;
1885 for (j = 0; j < volume[0]; j++)
1887 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
1888 (char *) src + src_off,
1889 element_size, num_dims - 1,
1890 volume + 1, dst_offsets + 1,
1891 src_offsets + 1, dst_dimensions + 1,
1892 src_dimensions + 1, dst_devicep,
1893 src_devicep);
1894 if (ret)
1895 return ret;
1896 dst_off += dst_slice;
1897 src_off += src_slice;
1899 return 0;
1903 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
1904 int num_dims, const size_t *volume,
1905 const size_t *dst_offsets,
1906 const size_t *src_offsets,
1907 const size_t *dst_dimensions,
1908 const size_t *src_dimensions,
1909 int dst_device_num, int src_device_num)
1911 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
1913 if (!dst && !src)
1914 return INT_MAX;
1916 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
1918 if (dst_device_num < 0)
1919 return EINVAL;
1921 dst_devicep = resolve_device (dst_device_num);
1922 if (dst_devicep == NULL)
1923 return EINVAL;
1925 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1926 dst_devicep = NULL;
1928 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
1930 if (src_device_num < 0)
1931 return EINVAL;
1933 src_devicep = resolve_device (src_device_num);
1934 if (src_devicep == NULL)
1935 return EINVAL;
1937 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1938 src_devicep = NULL;
1941 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
1942 return EINVAL;
1944 if (src_devicep)
1945 gomp_mutex_lock (&src_devicep->lock);
1946 else if (dst_devicep)
1947 gomp_mutex_lock (&dst_devicep->lock);
1948 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
1949 volume, dst_offsets, src_offsets,
1950 dst_dimensions, src_dimensions,
1951 dst_devicep, src_devicep);
1952 if (src_devicep)
1953 gomp_mutex_unlock (&src_devicep->lock);
1954 else if (dst_devicep)
1955 gomp_mutex_unlock (&dst_devicep->lock);
1956 return ret;
1960 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
1961 size_t device_offset, int device_num)
1963 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1964 return EINVAL;
1966 if (device_num < 0)
1967 return EINVAL;
1969 struct gomp_device_descr *devicep = resolve_device (device_num);
1970 if (devicep == NULL)
1971 return EINVAL;
1973 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1974 return EINVAL;
1976 gomp_mutex_lock (&devicep->lock);
1978 struct splay_tree_s *mem_map = &devicep->mem_map;
1979 struct splay_tree_key_s cur_node;
1980 int ret = EINVAL;
1982 cur_node.host_start = (uintptr_t) host_ptr;
1983 cur_node.host_end = cur_node.host_start + size;
1984 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1985 if (n)
1987 if (n->tgt->tgt_start + n->tgt_offset
1988 == (uintptr_t) device_ptr + device_offset
1989 && n->host_start <= cur_node.host_start
1990 && n->host_end >= cur_node.host_end)
1991 ret = 0;
1993 else
1995 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1996 tgt->array = gomp_malloc (sizeof (*tgt->array));
1997 tgt->refcount = 1;
1998 tgt->tgt_start = 0;
1999 tgt->tgt_end = 0;
2000 tgt->to_free = NULL;
2001 tgt->prev = NULL;
2002 tgt->list_count = 0;
2003 tgt->device_descr = devicep;
2004 splay_tree_node array = tgt->array;
2005 splay_tree_key k = &array->key;
2006 k->host_start = cur_node.host_start;
2007 k->host_end = cur_node.host_end;
2008 k->tgt = tgt;
2009 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2010 k->refcount = REFCOUNT_INFINITY;
2011 k->async_refcount = 0;
2012 array->left = NULL;
2013 array->right = NULL;
2014 splay_tree_insert (&devicep->mem_map, array);
2015 ret = 0;
2017 gomp_mutex_unlock (&devicep->lock);
2018 return ret;
2022 omp_target_disassociate_ptr (void *ptr, int device_num)
2024 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2025 return EINVAL;
2027 if (device_num < 0)
2028 return EINVAL;
2030 struct gomp_device_descr *devicep = resolve_device (device_num);
2031 if (devicep == NULL)
2032 return EINVAL;
2034 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2035 return EINVAL;
2037 gomp_mutex_lock (&devicep->lock);
2039 struct splay_tree_s *mem_map = &devicep->mem_map;
2040 struct splay_tree_key_s cur_node;
2041 int ret = EINVAL;
2043 cur_node.host_start = (uintptr_t) ptr;
2044 cur_node.host_end = cur_node.host_start;
2045 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2046 if (n
2047 && n->host_start == cur_node.host_start
2048 && n->refcount == REFCOUNT_INFINITY
2049 && n->tgt->tgt_start == 0
2050 && n->tgt->to_free == NULL
2051 && n->tgt->refcount == 1
2052 && n->tgt->list_count == 0)
2054 splay_tree_remove (&devicep->mem_map, n);
2055 gomp_unmap_tgt (n->tgt);
2056 ret = 0;
2059 gomp_mutex_unlock (&devicep->lock);
2060 return ret;
2063 #ifdef PLUGIN_SUPPORT
2065 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2066 in PLUGIN_NAME.
2067 The handles of the found functions are stored in the corresponding fields
2068 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2070 static bool
2071 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2072 const char *plugin_name)
2074 const char *err = NULL, *last_missing = NULL;
2076 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2077 if (!plugin_handle)
2078 goto dl_fail;
2080 /* Check if all required functions are available in the plugin and store
2081 their handlers. None of the symbols can legitimately be NULL,
2082 so we don't need to check dlerror all the time. */
2083 #define DLSYM(f) \
2084 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2085 goto dl_fail
2086 /* Similar, but missing functions are not an error. Return false if
2087 failed, true otherwise. */
2088 #define DLSYM_OPT(f, n) \
2089 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2090 || (last_missing = #n, 0))
2092 DLSYM (version);
2093 if (device->version_func () != GOMP_VERSION)
2095 err = "plugin version mismatch";
2096 goto fail;
2099 DLSYM (get_name);
2100 DLSYM (get_caps);
2101 DLSYM (get_type);
2102 DLSYM (get_num_devices);
2103 DLSYM (init_device);
2104 DLSYM (fini_device);
2105 DLSYM (load_image);
2106 DLSYM (unload_image);
2107 DLSYM (alloc);
2108 DLSYM (free);
2109 DLSYM (dev2host);
2110 DLSYM (host2dev);
2111 device->capabilities = device->get_caps_func ();
2112 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2114 DLSYM (run);
2115 DLSYM (dev2dev);
2117 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2119 if (!DLSYM_OPT (openacc.exec, openacc_parallel)
2120 || !DLSYM_OPT (openacc.register_async_cleanup,
2121 openacc_register_async_cleanup)
2122 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2123 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2124 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2125 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2126 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2127 || !DLSYM_OPT (openacc.async_wait_all_async,
2128 openacc_async_wait_all_async)
2129 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2130 || !DLSYM_OPT (openacc.create_thread_data,
2131 openacc_create_thread_data)
2132 || !DLSYM_OPT (openacc.destroy_thread_data,
2133 openacc_destroy_thread_data))
2135 /* Require all the OpenACC handlers if we have
2136 GOMP_OFFLOAD_CAP_OPENACC_200. */
2137 err = "plugin missing OpenACC handler function";
2138 goto fail;
2141 unsigned cuda = 0;
2142 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2143 openacc_get_current_cuda_device);
2144 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2145 openacc_get_current_cuda_context);
2146 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
2147 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
2148 if (cuda && cuda != 4)
2150 /* Make sure all the CUDA functions are there if any of them are. */
2151 err = "plugin missing OpenACC CUDA handler function";
2152 goto fail;
2155 #undef DLSYM
2156 #undef DLSYM_OPT
2158 return 1;
2160 dl_fail:
2161 err = dlerror ();
2162 fail:
2163 gomp_error ("while loading %s: %s", plugin_name, err);
2164 if (last_missing)
2165 gomp_error ("missing function was %s", last_missing);
2166 if (plugin_handle)
2167 dlclose (plugin_handle);
2169 return 0;
2172 /* This function initializes the runtime needed for offloading.
2173 It parses the list of offload targets and tries to load the plugins for
2174 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2175 will be set, and the array DEVICES initialized, containing descriptors for
2176 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2177 by the others. */
2179 static void
2180 gomp_target_init (void)
2182 const char *prefix ="libgomp-plugin-";
2183 const char *suffix = SONAME_SUFFIX (1);
2184 const char *cur, *next;
2185 char *plugin_name;
2186 int i, new_num_devices;
2188 num_devices = 0;
2189 devices = NULL;
2191 cur = OFFLOAD_TARGETS;
2192 if (*cur)
2195 struct gomp_device_descr current_device;
2197 next = strchr (cur, ',');
2199 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
2200 + strlen (prefix) + strlen (suffix));
2201 if (!plugin_name)
2203 num_devices = 0;
2204 break;
2207 strcpy (plugin_name, prefix);
2208 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
2209 strcat (plugin_name, suffix);
2211 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2213 new_num_devices = current_device.get_num_devices_func ();
2214 if (new_num_devices >= 1)
2216 /* Augment DEVICES and NUM_DEVICES. */
2218 devices = realloc (devices, (num_devices + new_num_devices)
2219 * sizeof (struct gomp_device_descr));
2220 if (!devices)
2222 num_devices = 0;
2223 free (plugin_name);
2224 break;
2227 current_device.name = current_device.get_name_func ();
2228 /* current_device.capabilities has already been set. */
2229 current_device.type = current_device.get_type_func ();
2230 current_device.mem_map.root = NULL;
2231 current_device.is_initialized = false;
2232 current_device.openacc.data_environ = NULL;
2233 for (i = 0; i < new_num_devices; i++)
2235 current_device.target_id = i;
2236 devices[num_devices] = current_device;
2237 gomp_mutex_init (&devices[num_devices].lock);
2238 num_devices++;
2243 free (plugin_name);
2244 cur = next + 1;
2246 while (next);
2248 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2249 NUM_DEVICES_OPENMP. */
2250 struct gomp_device_descr *devices_s
2251 = malloc (num_devices * sizeof (struct gomp_device_descr));
2252 if (!devices_s)
2254 num_devices = 0;
2255 free (devices);
2256 devices = NULL;
2258 num_devices_openmp = 0;
2259 for (i = 0; i < num_devices; i++)
2260 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2261 devices_s[num_devices_openmp++] = devices[i];
2262 int num_devices_after_openmp = num_devices_openmp;
2263 for (i = 0; i < num_devices; i++)
2264 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2265 devices_s[num_devices_after_openmp++] = devices[i];
2266 free (devices);
2267 devices = devices_s;
2269 for (i = 0; i < num_devices; i++)
2271 /* The 'devices' array can be moved (by the realloc call) until we have
2272 found all the plugins, so registering with the OpenACC runtime (which
2273 takes a copy of the pointer argument) must be delayed until now. */
2274 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2275 goacc_register (&devices[i]);
2279 #else /* PLUGIN_SUPPORT */
2280 /* If dlfcn.h is unavailable we always fallback to host execution.
2281 GOMP_target* routines are just stubs for this case. */
2282 static void
2283 gomp_target_init (void)
2286 #endif /* PLUGIN_SUPPORT */