PR tree-optimization/78476 - snprintf(0, 0, ...) with known arguments not optimized...
[official-gcc.git] / libgomp / target.c
blob48b9ab8e0767a537ddcb624137abbe619a0b786c
1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
87 if (ret == NULL)
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 return ret;
95 attribute_hidden void
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
101 attribute_hidden int
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 gomp_init_device (&devices[device_id]);
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
128 gomp_mutex_unlock (&devices[device_id].lock);
130 return &devices[device_id];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
170 size_t size)
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
180 static void
181 gomp_copy_host2dev (struct gomp_device_descr *devicep,
182 void *d, const void *h, size_t sz)
184 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
187 static void
188 gomp_copy_dev2host (struct gomp_device_descr *devicep,
189 void *h, const void *d, size_t sz)
191 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
194 static void
195 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
197 if (!devicep->free_func (devicep->target_id, devptr))
199 gomp_mutex_unlock (&devicep->lock);
200 gomp_fatal ("error in freeing device memory block at %p", devptr);
204 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
205 gomp_map_0len_lookup found oldn for newn.
206 Helper function of gomp_map_vars. */
208 static inline void
209 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
210 splay_tree_key newn, struct target_var_desc *tgt_var,
211 unsigned char kind)
213 tgt_var->key = oldn;
214 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
215 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
216 tgt_var->offset = newn->host_start - oldn->host_start;
217 tgt_var->length = newn->host_end - newn->host_start;
219 if ((kind & GOMP_MAP_FLAG_FORCE)
220 || oldn->host_start > newn->host_start
221 || oldn->host_end < newn->host_end)
223 gomp_mutex_unlock (&devicep->lock);
224 gomp_fatal ("Trying to map into device [%p..%p) object when "
225 "[%p..%p) is already mapped",
226 (void *) newn->host_start, (void *) newn->host_end,
227 (void *) oldn->host_start, (void *) oldn->host_end);
230 if (GOMP_MAP_ALWAYS_TO_P (kind))
231 gomp_copy_host2dev (devicep,
232 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
233 + newn->host_start - oldn->host_start),
234 (void *) newn->host_start,
235 newn->host_end - newn->host_start);
237 if (oldn->refcount != REFCOUNT_INFINITY)
238 oldn->refcount++;
241 static int
242 get_kind (bool short_mapkind, void *kinds, int idx)
244 return short_mapkind ? ((unsigned short *) kinds)[idx]
245 : ((unsigned char *) kinds)[idx];
248 static void
249 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
250 uintptr_t target_offset, uintptr_t bias)
252 struct gomp_device_descr *devicep = tgt->device_descr;
253 struct splay_tree_s *mem_map = &devicep->mem_map;
254 struct splay_tree_key_s cur_node;
256 cur_node.host_start = host_ptr;
257 if (cur_node.host_start == (uintptr_t) NULL)
259 cur_node.tgt_offset = (uintptr_t) NULL;
260 /* FIXME: see comment about coalescing host/dev transfers below. */
261 gomp_copy_host2dev (devicep,
262 (void *) (tgt->tgt_start + target_offset),
263 (void *) &cur_node.tgt_offset,
264 sizeof (void *));
265 return;
267 /* Add bias to the pointer value. */
268 cur_node.host_start += bias;
269 cur_node.host_end = cur_node.host_start;
270 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
271 if (n == NULL)
273 gomp_mutex_unlock (&devicep->lock);
274 gomp_fatal ("Pointer target of array section wasn't mapped");
276 cur_node.host_start -= n->host_start;
277 cur_node.tgt_offset
278 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
279 /* At this point tgt_offset is target address of the
280 array section. Now subtract bias to get what we want
281 to initialize the pointer with. */
282 cur_node.tgt_offset -= bias;
283 /* FIXME: see comment about coalescing host/dev transfers below. */
284 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
285 (void *) &cur_node.tgt_offset, sizeof (void *));
288 static void
289 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
290 size_t first, size_t i, void **hostaddrs,
291 size_t *sizes, void *kinds)
293 struct gomp_device_descr *devicep = tgt->device_descr;
294 struct splay_tree_s *mem_map = &devicep->mem_map;
295 struct splay_tree_key_s cur_node;
296 int kind;
297 const bool short_mapkind = true;
298 const int typemask = short_mapkind ? 0xff : 0x7;
300 cur_node.host_start = (uintptr_t) hostaddrs[i];
301 cur_node.host_end = cur_node.host_start + sizes[i];
302 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
303 kind = get_kind (short_mapkind, kinds, i);
304 if (n2
305 && n2->tgt == n->tgt
306 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
308 gomp_map_vars_existing (devicep, n2, &cur_node,
309 &tgt->list[i], kind & typemask);
310 return;
312 if (sizes[i] == 0)
314 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
316 cur_node.host_start--;
317 n2 = splay_tree_lookup (mem_map, &cur_node);
318 cur_node.host_start++;
319 if (n2
320 && n2->tgt == n->tgt
321 && n2->host_start - n->host_start
322 == n2->tgt_offset - n->tgt_offset)
324 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
325 kind & typemask);
326 return;
329 cur_node.host_end++;
330 n2 = splay_tree_lookup (mem_map, &cur_node);
331 cur_node.host_end--;
332 if (n2
333 && n2->tgt == n->tgt
334 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
336 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
337 kind & typemask);
338 return;
341 gomp_mutex_unlock (&devicep->lock);
342 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
343 "other mapped elements from the same structure weren't mapped "
344 "together with it", (void *) cur_node.host_start,
345 (void *) cur_node.host_end);
348 static inline uintptr_t
349 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
351 if (tgt->list[i].key != NULL)
352 return tgt->list[i].key->tgt->tgt_start
353 + tgt->list[i].key->tgt_offset
354 + tgt->list[i].offset;
355 if (tgt->list[i].offset == ~(uintptr_t) 0)
356 return (uintptr_t) hostaddrs[i];
357 if (tgt->list[i].offset == ~(uintptr_t) 1)
358 return 0;
359 if (tgt->list[i].offset == ~(uintptr_t) 2)
360 return tgt->list[i + 1].key->tgt->tgt_start
361 + tgt->list[i + 1].key->tgt_offset
362 + tgt->list[i + 1].offset
363 + (uintptr_t) hostaddrs[i]
364 - (uintptr_t) hostaddrs[i + 1];
365 return tgt->tgt_start + tgt->list[i].offset;
368 attribute_hidden struct target_mem_desc *
369 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
370 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
371 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
373 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
374 bool has_firstprivate = false;
375 const int rshift = short_mapkind ? 8 : 3;
376 const int typemask = short_mapkind ? 0xff : 0x7;
377 struct splay_tree_s *mem_map = &devicep->mem_map;
378 struct splay_tree_key_s cur_node;
379 struct target_mem_desc *tgt
380 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
381 tgt->list_count = mapnum;
382 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
383 tgt->device_descr = devicep;
385 if (mapnum == 0)
387 tgt->tgt_start = 0;
388 tgt->tgt_end = 0;
389 return tgt;
392 tgt_align = sizeof (void *);
393 tgt_size = 0;
394 if (pragma_kind == GOMP_MAP_VARS_TARGET)
396 size_t align = 4 * sizeof (void *);
397 tgt_align = align;
398 tgt_size = mapnum * sizeof (void *);
401 gomp_mutex_lock (&devicep->lock);
402 if (devicep->state == GOMP_DEVICE_FINALIZED)
404 gomp_mutex_unlock (&devicep->lock);
405 free (tgt);
406 return NULL;
409 for (i = 0; i < mapnum; i++)
411 int kind = get_kind (short_mapkind, kinds, i);
412 if (hostaddrs[i] == NULL
413 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
415 tgt->list[i].key = NULL;
416 tgt->list[i].offset = ~(uintptr_t) 0;
417 continue;
419 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
421 cur_node.host_start = (uintptr_t) hostaddrs[i];
422 cur_node.host_end = cur_node.host_start;
423 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
424 if (n == NULL)
426 gomp_mutex_unlock (&devicep->lock);
427 gomp_fatal ("use_device_ptr pointer wasn't mapped");
429 cur_node.host_start -= n->host_start;
430 hostaddrs[i]
431 = (void *) (n->tgt->tgt_start + n->tgt_offset
432 + cur_node.host_start);
433 tgt->list[i].key = NULL;
434 tgt->list[i].offset = ~(uintptr_t) 0;
435 continue;
437 else if ((kind & typemask) == GOMP_MAP_STRUCT)
439 size_t first = i + 1;
440 size_t last = i + sizes[i];
441 cur_node.host_start = (uintptr_t) hostaddrs[i];
442 cur_node.host_end = (uintptr_t) hostaddrs[last]
443 + sizes[last];
444 tgt->list[i].key = NULL;
445 tgt->list[i].offset = ~(uintptr_t) 2;
446 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
447 if (n == NULL)
449 size_t align = (size_t) 1 << (kind >> rshift);
450 if (tgt_align < align)
451 tgt_align = align;
452 tgt_size -= (uintptr_t) hostaddrs[first]
453 - (uintptr_t) hostaddrs[i];
454 tgt_size = (tgt_size + align - 1) & ~(align - 1);
455 tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
456 not_found_cnt += last - i;
457 for (i = first; i <= last; i++)
458 tgt->list[i].key = NULL;
459 i--;
460 continue;
462 for (i = first; i <= last; i++)
463 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
464 sizes, kinds);
465 i--;
466 continue;
468 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
470 tgt->list[i].key = NULL;
471 tgt->list[i].offset = ~(uintptr_t) 1;
472 has_firstprivate = true;
473 continue;
475 cur_node.host_start = (uintptr_t) hostaddrs[i];
476 if (!GOMP_MAP_POINTER_P (kind & typemask))
477 cur_node.host_end = cur_node.host_start + sizes[i];
478 else
479 cur_node.host_end = cur_node.host_start + sizeof (void *);
480 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
482 tgt->list[i].key = NULL;
484 size_t align = (size_t) 1 << (kind >> rshift);
485 if (tgt_align < align)
486 tgt_align = align;
487 tgt_size = (tgt_size + align - 1) & ~(align - 1);
488 tgt_size += cur_node.host_end - cur_node.host_start;
489 has_firstprivate = true;
490 continue;
492 splay_tree_key n;
493 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
495 n = gomp_map_0len_lookup (mem_map, &cur_node);
496 if (!n)
498 tgt->list[i].key = NULL;
499 tgt->list[i].offset = ~(uintptr_t) 1;
500 continue;
503 else
504 n = splay_tree_lookup (mem_map, &cur_node);
505 if (n && n->refcount != REFCOUNT_LINK)
506 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
507 kind & typemask);
508 else
510 tgt->list[i].key = NULL;
512 size_t align = (size_t) 1 << (kind >> rshift);
513 not_found_cnt++;
514 if (tgt_align < align)
515 tgt_align = align;
516 tgt_size = (tgt_size + align - 1) & ~(align - 1);
517 tgt_size += cur_node.host_end - cur_node.host_start;
518 if ((kind & typemask) == GOMP_MAP_TO_PSET)
520 size_t j;
521 for (j = i + 1; j < mapnum; j++)
522 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
523 & typemask))
524 break;
525 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
526 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
527 > cur_node.host_end))
528 break;
529 else
531 tgt->list[j].key = NULL;
532 i++;
538 if (devaddrs)
540 if (mapnum != 1)
542 gomp_mutex_unlock (&devicep->lock);
543 gomp_fatal ("unexpected aggregation");
545 tgt->to_free = devaddrs[0];
546 tgt->tgt_start = (uintptr_t) tgt->to_free;
547 tgt->tgt_end = tgt->tgt_start + sizes[0];
549 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
551 /* Allocate tgt_align aligned tgt_size block of memory. */
552 /* FIXME: Perhaps change interface to allocate properly aligned
553 memory. */
554 tgt->to_free = devicep->alloc_func (devicep->target_id,
555 tgt_size + tgt_align - 1);
556 if (!tgt->to_free)
558 gomp_mutex_unlock (&devicep->lock);
559 gomp_fatal ("device memory allocation fail");
562 tgt->tgt_start = (uintptr_t) tgt->to_free;
563 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
564 tgt->tgt_end = tgt->tgt_start + tgt_size;
566 else
568 tgt->to_free = NULL;
569 tgt->tgt_start = 0;
570 tgt->tgt_end = 0;
573 tgt_size = 0;
574 if (pragma_kind == GOMP_MAP_VARS_TARGET)
575 tgt_size = mapnum * sizeof (void *);
577 tgt->array = NULL;
578 if (not_found_cnt || has_firstprivate)
580 if (not_found_cnt)
581 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
582 splay_tree_node array = tgt->array;
583 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
584 uintptr_t field_tgt_base = 0;
586 for (i = 0; i < mapnum; i++)
587 if (tgt->list[i].key == NULL)
589 int kind = get_kind (short_mapkind, kinds, i);
590 if (hostaddrs[i] == NULL)
591 continue;
592 switch (kind & typemask)
594 size_t align, len, first, last;
595 splay_tree_key n;
596 case GOMP_MAP_FIRSTPRIVATE:
597 align = (size_t) 1 << (kind >> rshift);
598 tgt_size = (tgt_size + align - 1) & ~(align - 1);
599 tgt->list[i].offset = tgt_size;
600 len = sizes[i];
601 gomp_copy_host2dev (devicep,
602 (void *) (tgt->tgt_start + tgt_size),
603 (void *) hostaddrs[i], len);
604 tgt_size += len;
605 continue;
606 case GOMP_MAP_FIRSTPRIVATE_INT:
607 case GOMP_MAP_USE_DEVICE_PTR:
608 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
609 continue;
610 case GOMP_MAP_STRUCT:
611 first = i + 1;
612 last = i + sizes[i];
613 cur_node.host_start = (uintptr_t) hostaddrs[i];
614 cur_node.host_end = (uintptr_t) hostaddrs[last]
615 + sizes[last];
616 if (tgt->list[first].key != NULL)
617 continue;
618 n = splay_tree_lookup (mem_map, &cur_node);
619 if (n == NULL)
621 size_t align = (size_t) 1 << (kind >> rshift);
622 tgt_size -= (uintptr_t) hostaddrs[first]
623 - (uintptr_t) hostaddrs[i];
624 tgt_size = (tgt_size + align - 1) & ~(align - 1);
625 tgt_size += (uintptr_t) hostaddrs[first]
626 - (uintptr_t) hostaddrs[i];
627 field_tgt_base = (uintptr_t) hostaddrs[first];
628 field_tgt_offset = tgt_size;
629 field_tgt_clear = last;
630 tgt_size += cur_node.host_end
631 - (uintptr_t) hostaddrs[first];
632 continue;
634 for (i = first; i <= last; i++)
635 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
636 sizes, kinds);
637 i--;
638 continue;
639 case GOMP_MAP_ALWAYS_POINTER:
640 cur_node.host_start = (uintptr_t) hostaddrs[i];
641 cur_node.host_end = cur_node.host_start + sizeof (void *);
642 n = splay_tree_lookup (mem_map, &cur_node);
643 if (n == NULL
644 || n->host_start > cur_node.host_start
645 || n->host_end < cur_node.host_end)
647 gomp_mutex_unlock (&devicep->lock);
648 gomp_fatal ("always pointer not mapped");
650 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
651 != GOMP_MAP_ALWAYS_POINTER)
652 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
653 if (cur_node.tgt_offset)
654 cur_node.tgt_offset -= sizes[i];
655 gomp_copy_host2dev (devicep,
656 (void *) (n->tgt->tgt_start
657 + n->tgt_offset
658 + cur_node.host_start
659 - n->host_start),
660 (void *) &cur_node.tgt_offset,
661 sizeof (void *));
662 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
663 + cur_node.host_start - n->host_start;
664 continue;
665 default:
666 break;
668 splay_tree_key k = &array->key;
669 k->host_start = (uintptr_t) hostaddrs[i];
670 if (!GOMP_MAP_POINTER_P (kind & typemask))
671 k->host_end = k->host_start + sizes[i];
672 else
673 k->host_end = k->host_start + sizeof (void *);
674 splay_tree_key n = splay_tree_lookup (mem_map, k);
675 if (n && n->refcount != REFCOUNT_LINK)
676 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
677 kind & typemask);
678 else
680 k->link_key = NULL;
681 if (n && n->refcount == REFCOUNT_LINK)
683 /* Replace target address of the pointer with target address
684 of mapped object in the splay tree. */
685 splay_tree_remove (mem_map, n);
686 k->link_key = n;
688 size_t align = (size_t) 1 << (kind >> rshift);
689 tgt->list[i].key = k;
690 k->tgt = tgt;
691 if (field_tgt_clear != ~(size_t) 0)
693 k->tgt_offset = k->host_start - field_tgt_base
694 + field_tgt_offset;
695 if (i == field_tgt_clear)
696 field_tgt_clear = ~(size_t) 0;
698 else
700 tgt_size = (tgt_size + align - 1) & ~(align - 1);
701 k->tgt_offset = tgt_size;
702 tgt_size += k->host_end - k->host_start;
704 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
705 tgt->list[i].always_copy_from
706 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
707 tgt->list[i].offset = 0;
708 tgt->list[i].length = k->host_end - k->host_start;
709 k->refcount = 1;
710 tgt->refcount++;
711 array->left = NULL;
712 array->right = NULL;
713 splay_tree_insert (mem_map, array);
714 switch (kind & typemask)
716 case GOMP_MAP_ALLOC:
717 case GOMP_MAP_FROM:
718 case GOMP_MAP_FORCE_ALLOC:
719 case GOMP_MAP_FORCE_FROM:
720 case GOMP_MAP_ALWAYS_FROM:
721 break;
722 case GOMP_MAP_TO:
723 case GOMP_MAP_TOFROM:
724 case GOMP_MAP_FORCE_TO:
725 case GOMP_MAP_FORCE_TOFROM:
726 case GOMP_MAP_ALWAYS_TO:
727 case GOMP_MAP_ALWAYS_TOFROM:
728 /* FIXME: Perhaps add some smarts, like if copying
729 several adjacent fields from host to target, use some
730 host buffer to avoid sending each var individually. */
731 gomp_copy_host2dev (devicep,
732 (void *) (tgt->tgt_start
733 + k->tgt_offset),
734 (void *) k->host_start,
735 k->host_end - k->host_start);
736 break;
737 case GOMP_MAP_POINTER:
738 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
739 k->tgt_offset, sizes[i]);
740 break;
741 case GOMP_MAP_TO_PSET:
742 /* FIXME: see above FIXME comment. */
743 gomp_copy_host2dev (devicep,
744 (void *) (tgt->tgt_start
745 + k->tgt_offset),
746 (void *) k->host_start,
747 k->host_end - k->host_start);
749 for (j = i + 1; j < mapnum; j++)
750 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
752 & typemask))
753 break;
754 else if ((uintptr_t) hostaddrs[j] < k->host_start
755 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
756 > k->host_end))
757 break;
758 else
760 tgt->list[j].key = k;
761 tgt->list[j].copy_from = false;
762 tgt->list[j].always_copy_from = false;
763 if (k->refcount != REFCOUNT_INFINITY)
764 k->refcount++;
765 gomp_map_pointer (tgt,
766 (uintptr_t) *(void **) hostaddrs[j],
767 k->tgt_offset
768 + ((uintptr_t) hostaddrs[j]
769 - k->host_start),
770 sizes[j]);
771 i++;
773 break;
774 case GOMP_MAP_FORCE_PRESENT:
776 /* We already looked up the memory region above and it
777 was missing. */
778 size_t size = k->host_end - k->host_start;
779 gomp_mutex_unlock (&devicep->lock);
780 #ifdef HAVE_INTTYPES_H
781 gomp_fatal ("present clause: !acc_is_present (%p, "
782 "%"PRIu64" (0x%"PRIx64"))",
783 (void *) k->host_start,
784 (uint64_t) size, (uint64_t) size);
785 #else
786 gomp_fatal ("present clause: !acc_is_present (%p, "
787 "%lu (0x%lx))", (void *) k->host_start,
788 (unsigned long) size, (unsigned long) size);
789 #endif
791 break;
792 case GOMP_MAP_FORCE_DEVICEPTR:
793 assert (k->host_end - k->host_start == sizeof (void *));
794 gomp_copy_host2dev (devicep,
795 (void *) (tgt->tgt_start
796 + k->tgt_offset),
797 (void *) k->host_start,
798 sizeof (void *));
799 break;
800 default:
801 gomp_mutex_unlock (&devicep->lock);
802 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
803 kind);
806 if (k->link_key)
808 /* Set link pointer on target to the device address of the
809 mapped object. */
810 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
811 devicep->host2dev_func (devicep->target_id,
812 (void *) n->tgt_offset,
813 &tgt_addr, sizeof (void *));
815 array++;
820 if (pragma_kind == GOMP_MAP_VARS_TARGET)
822 for (i = 0; i < mapnum; i++)
824 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
825 /* FIXME: see above FIXME comment. */
826 gomp_copy_host2dev (devicep,
827 (void *) (tgt->tgt_start + i * sizeof (void *)),
828 (void *) &cur_node.tgt_offset, sizeof (void *));
832 /* If the variable from "omp target enter data" map-list was already mapped,
833 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
834 gomp_exit_data. */
835 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
837 free (tgt);
838 tgt = NULL;
841 gomp_mutex_unlock (&devicep->lock);
842 return tgt;
845 static void
846 gomp_unmap_tgt (struct target_mem_desc *tgt)
848 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
849 if (tgt->tgt_end)
850 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
852 free (tgt->array);
853 free (tgt);
856 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
857 variables back from device to host: if it is false, it is assumed that this
858 has been done already. */
860 attribute_hidden void
861 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
863 struct gomp_device_descr *devicep = tgt->device_descr;
865 if (tgt->list_count == 0)
867 free (tgt);
868 return;
871 gomp_mutex_lock (&devicep->lock);
872 if (devicep->state == GOMP_DEVICE_FINALIZED)
874 gomp_mutex_unlock (&devicep->lock);
875 free (tgt->array);
876 free (tgt);
877 return;
880 size_t i;
881 for (i = 0; i < tgt->list_count; i++)
883 splay_tree_key k = tgt->list[i].key;
884 if (k == NULL)
885 continue;
887 bool do_unmap = false;
888 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
889 k->refcount--;
890 else if (k->refcount == 1)
892 k->refcount--;
893 do_unmap = true;
896 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
897 || tgt->list[i].always_copy_from)
898 gomp_copy_dev2host (devicep,
899 (void *) (k->host_start + tgt->list[i].offset),
900 (void *) (k->tgt->tgt_start + k->tgt_offset
901 + tgt->list[i].offset),
902 tgt->list[i].length);
903 if (do_unmap)
905 splay_tree_remove (&devicep->mem_map, k);
906 if (k->link_key)
907 splay_tree_insert (&devicep->mem_map,
908 (splay_tree_node) k->link_key);
909 if (k->tgt->refcount > 1)
910 k->tgt->refcount--;
911 else
912 gomp_unmap_tgt (k->tgt);
916 if (tgt->refcount > 1)
917 tgt->refcount--;
918 else
919 gomp_unmap_tgt (tgt);
921 gomp_mutex_unlock (&devicep->lock);
924 static void
925 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
926 size_t *sizes, void *kinds, bool short_mapkind)
928 size_t i;
929 struct splay_tree_key_s cur_node;
930 const int typemask = short_mapkind ? 0xff : 0x7;
932 if (!devicep)
933 return;
935 if (mapnum == 0)
936 return;
938 gomp_mutex_lock (&devicep->lock);
939 if (devicep->state == GOMP_DEVICE_FINALIZED)
941 gomp_mutex_unlock (&devicep->lock);
942 return;
945 for (i = 0; i < mapnum; i++)
946 if (sizes[i])
948 cur_node.host_start = (uintptr_t) hostaddrs[i];
949 cur_node.host_end = cur_node.host_start + sizes[i];
950 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
951 if (n)
953 int kind = get_kind (short_mapkind, kinds, i);
954 if (n->host_start > cur_node.host_start
955 || n->host_end < cur_node.host_end)
957 gomp_mutex_unlock (&devicep->lock);
958 gomp_fatal ("Trying to update [%p..%p) object when "
959 "only [%p..%p) is mapped",
960 (void *) cur_node.host_start,
961 (void *) cur_node.host_end,
962 (void *) n->host_start,
963 (void *) n->host_end);
967 void *hostaddr = (void *) cur_node.host_start;
968 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
969 + cur_node.host_start - n->host_start);
970 size_t size = cur_node.host_end - cur_node.host_start;
972 if (GOMP_MAP_COPY_TO_P (kind & typemask))
973 gomp_copy_host2dev (devicep, devaddr, hostaddr, size);
974 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
975 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
978 gomp_mutex_unlock (&devicep->lock);
981 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
982 And insert to splay tree the mapping between addresses from HOST_TABLE and
983 from loaded target image. We rely in the host and device compiler
984 emitting variable and functions in the same order. */
986 static void
987 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
988 const void *host_table, const void *target_data,
989 bool is_register_lock)
991 void **host_func_table = ((void ***) host_table)[0];
992 void **host_funcs_end = ((void ***) host_table)[1];
993 void **host_var_table = ((void ***) host_table)[2];
994 void **host_vars_end = ((void ***) host_table)[3];
996 /* The func table contains only addresses, the var table contains addresses
997 and corresponding sizes. */
998 int num_funcs = host_funcs_end - host_func_table;
999 int num_vars = (host_vars_end - host_var_table) / 2;
1001 /* Load image to device and get target addresses for the image. */
1002 struct addr_pair *target_table = NULL;
1003 int i, num_target_entries;
1005 num_target_entries
1006 = devicep->load_image_func (devicep->target_id, version,
1007 target_data, &target_table);
1009 if (num_target_entries != num_funcs + num_vars)
1011 gomp_mutex_unlock (&devicep->lock);
1012 if (is_register_lock)
1013 gomp_mutex_unlock (&register_lock);
1014 gomp_fatal ("Cannot map target functions or variables"
1015 " (expected %u, have %u)", num_funcs + num_vars,
1016 num_target_entries);
1019 /* Insert host-target address mapping into splay tree. */
1020 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1021 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1022 tgt->refcount = REFCOUNT_INFINITY;
1023 tgt->tgt_start = 0;
1024 tgt->tgt_end = 0;
1025 tgt->to_free = NULL;
1026 tgt->prev = NULL;
1027 tgt->list_count = 0;
1028 tgt->device_descr = devicep;
1029 splay_tree_node array = tgt->array;
1031 for (i = 0; i < num_funcs; i++)
1033 splay_tree_key k = &array->key;
1034 k->host_start = (uintptr_t) host_func_table[i];
1035 k->host_end = k->host_start + 1;
1036 k->tgt = tgt;
1037 k->tgt_offset = target_table[i].start;
1038 k->refcount = REFCOUNT_INFINITY;
1039 k->link_key = NULL;
1040 array->left = NULL;
1041 array->right = NULL;
1042 splay_tree_insert (&devicep->mem_map, array);
1043 array++;
1046 /* Most significant bit of the size in host and target tables marks
1047 "omp declare target link" variables. */
1048 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1049 const uintptr_t size_mask = ~link_bit;
1051 for (i = 0; i < num_vars; i++)
1053 struct addr_pair *target_var = &target_table[num_funcs + i];
1054 uintptr_t target_size = target_var->end - target_var->start;
1056 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1058 gomp_mutex_unlock (&devicep->lock);
1059 if (is_register_lock)
1060 gomp_mutex_unlock (&register_lock);
1061 gomp_fatal ("Cannot map target variables (size mismatch)");
1064 splay_tree_key k = &array->key;
1065 k->host_start = (uintptr_t) host_var_table[i * 2];
1066 k->host_end
1067 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1068 k->tgt = tgt;
1069 k->tgt_offset = target_var->start;
1070 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1071 k->link_key = NULL;
1072 array->left = NULL;
1073 array->right = NULL;
1074 splay_tree_insert (&devicep->mem_map, array);
1075 array++;
1078 free (target_table);
1081 /* Unload the mappings described by target_data from device DEVICE_P.
1082 The device must be locked. */
1084 static void
1085 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1086 unsigned version,
1087 const void *host_table, const void *target_data)
1089 void **host_func_table = ((void ***) host_table)[0];
1090 void **host_funcs_end = ((void ***) host_table)[1];
1091 void **host_var_table = ((void ***) host_table)[2];
1092 void **host_vars_end = ((void ***) host_table)[3];
1094 /* The func table contains only addresses, the var table contains addresses
1095 and corresponding sizes. */
1096 int num_funcs = host_funcs_end - host_func_table;
1097 int num_vars = (host_vars_end - host_var_table) / 2;
1099 struct splay_tree_key_s k;
1100 splay_tree_key node = NULL;
1102 /* Find mapping at start of node array */
1103 if (num_funcs || num_vars)
1105 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1106 : (uintptr_t) host_var_table[0]);
1107 k.host_end = k.host_start + 1;
1108 node = splay_tree_lookup (&devicep->mem_map, &k);
1111 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1113 gomp_mutex_unlock (&devicep->lock);
1114 gomp_fatal ("image unload fail");
1117 /* Remove mappings from splay tree. */
1118 int i;
1119 for (i = 0; i < num_funcs; i++)
1121 k.host_start = (uintptr_t) host_func_table[i];
1122 k.host_end = k.host_start + 1;
1123 splay_tree_remove (&devicep->mem_map, &k);
1126 /* Most significant bit of the size in host and target tables marks
1127 "omp declare target link" variables. */
1128 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1129 const uintptr_t size_mask = ~link_bit;
1130 bool is_tgt_unmapped = false;
1132 for (i = 0; i < num_vars; i++)
1134 k.host_start = (uintptr_t) host_var_table[i * 2];
1135 k.host_end
1136 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1138 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1139 splay_tree_remove (&devicep->mem_map, &k);
1140 else
1142 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1143 splay_tree_remove (&devicep->mem_map, n);
1144 if (n->link_key)
1146 if (n->tgt->refcount > 1)
1147 n->tgt->refcount--;
1148 else
1150 is_tgt_unmapped = true;
1151 gomp_unmap_tgt (n->tgt);
1157 if (node && !is_tgt_unmapped)
1159 free (node->tgt);
1160 free (node);
1164 /* This function should be called from every offload image while loading.
1165 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1166 the target, and TARGET_DATA needed by target plugin. */
1168 void
1169 GOMP_offload_register_ver (unsigned version, const void *host_table,
1170 int target_type, const void *target_data)
1172 int i;
1174 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1175 gomp_fatal ("Library too old for offload (version %u < %u)",
1176 GOMP_VERSION, GOMP_VERSION_LIB (version));
1178 gomp_mutex_lock (&register_lock);
1180 /* Load image to all initialized devices. */
1181 for (i = 0; i < num_devices; i++)
1183 struct gomp_device_descr *devicep = &devices[i];
1184 gomp_mutex_lock (&devicep->lock);
1185 if (devicep->type == target_type
1186 && devicep->state == GOMP_DEVICE_INITIALIZED)
1187 gomp_load_image_to_device (devicep, version,
1188 host_table, target_data, true);
1189 gomp_mutex_unlock (&devicep->lock);
1192 /* Insert image to array of pending images. */
1193 offload_images
1194 = gomp_realloc_unlock (offload_images,
1195 (num_offload_images + 1)
1196 * sizeof (struct offload_image_descr));
1197 offload_images[num_offload_images].version = version;
1198 offload_images[num_offload_images].type = target_type;
1199 offload_images[num_offload_images].host_table = host_table;
1200 offload_images[num_offload_images].target_data = target_data;
1202 num_offload_images++;
1203 gomp_mutex_unlock (&register_lock);
1206 void
1207 GOMP_offload_register (const void *host_table, int target_type,
1208 const void *target_data)
1210 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1213 /* This function should be called from every offload image while unloading.
1214 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1215 the target, and TARGET_DATA needed by target plugin. */
1217 void
1218 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1219 int target_type, const void *target_data)
1221 int i;
1223 gomp_mutex_lock (&register_lock);
1225 /* Unload image from all initialized devices. */
1226 for (i = 0; i < num_devices; i++)
1228 struct gomp_device_descr *devicep = &devices[i];
1229 gomp_mutex_lock (&devicep->lock);
1230 if (devicep->type == target_type
1231 && devicep->state == GOMP_DEVICE_INITIALIZED)
1232 gomp_unload_image_from_device (devicep, version,
1233 host_table, target_data);
1234 gomp_mutex_unlock (&devicep->lock);
1237 /* Remove image from array of pending images. */
1238 for (i = 0; i < num_offload_images; i++)
1239 if (offload_images[i].target_data == target_data)
1241 offload_images[i] = offload_images[--num_offload_images];
1242 break;
1245 gomp_mutex_unlock (&register_lock);
1248 void
1249 GOMP_offload_unregister (const void *host_table, int target_type,
1250 const void *target_data)
1252 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1255 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1256 must be locked on entry, and remains locked on return. */
1258 attribute_hidden void
1259 gomp_init_device (struct gomp_device_descr *devicep)
1261 int i;
1262 if (!devicep->init_device_func (devicep->target_id))
1264 gomp_mutex_unlock (&devicep->lock);
1265 gomp_fatal ("device initialization failed");
1268 /* Load to device all images registered by the moment. */
1269 for (i = 0; i < num_offload_images; i++)
1271 struct offload_image_descr *image = &offload_images[i];
1272 if (image->type == devicep->type)
1273 gomp_load_image_to_device (devicep, image->version,
1274 image->host_table, image->target_data,
1275 false);
1278 devicep->state = GOMP_DEVICE_INITIALIZED;
1281 attribute_hidden void
1282 gomp_unload_device (struct gomp_device_descr *devicep)
1284 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1286 unsigned i;
1288 /* Unload from device all images registered at the moment. */
1289 for (i = 0; i < num_offload_images; i++)
1291 struct offload_image_descr *image = &offload_images[i];
1292 if (image->type == devicep->type)
1293 gomp_unload_image_from_device (devicep, image->version,
1294 image->host_table,
1295 image->target_data);
1300 /* Free address mapping tables. MM must be locked on entry, and remains locked
1301 on return. */
1303 attribute_hidden void
1304 gomp_free_memmap (struct splay_tree_s *mem_map)
1306 while (mem_map->root)
1308 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1310 splay_tree_remove (mem_map, &mem_map->root->key);
1311 free (tgt->array);
1312 free (tgt);
1316 /* Host fallback for GOMP_target{,_ext} routines. */
1318 static void
1319 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1321 struct gomp_thread old_thr, *thr = gomp_thread ();
1322 old_thr = *thr;
1323 memset (thr, '\0', sizeof (*thr));
1324 if (gomp_places_list)
1326 thr->place = old_thr.place;
1327 thr->ts.place_partition_len = gomp_places_list_len;
1329 fn (hostaddrs);
1330 gomp_free_thread (thr);
1331 *thr = old_thr;
1334 /* Calculate alignment and size requirements of a private copy of data shared
1335 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1337 static inline void
1338 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1339 unsigned short *kinds, size_t *tgt_align,
1340 size_t *tgt_size)
1342 size_t i;
1343 for (i = 0; i < mapnum; i++)
1344 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1346 size_t align = (size_t) 1 << (kinds[i] >> 8);
1347 if (*tgt_align < align)
1348 *tgt_align = align;
1349 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1350 *tgt_size += sizes[i];
1354 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1356 static inline void
1357 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1358 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1359 size_t tgt_size)
1361 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1362 if (al)
1363 tgt += tgt_align - al;
1364 tgt_size = 0;
1365 size_t i;
1366 for (i = 0; i < mapnum; i++)
1367 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1369 size_t align = (size_t) 1 << (kinds[i] >> 8);
1370 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1371 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1372 hostaddrs[i] = tgt + tgt_size;
1373 tgt_size = tgt_size + sizes[i];
1377 /* Helper function of GOMP_target{,_ext} routines. */
1379 static void *
1380 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1381 void (*host_fn) (void *))
1383 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1384 return (void *) host_fn;
1385 else
1387 gomp_mutex_lock (&devicep->lock);
1388 if (devicep->state == GOMP_DEVICE_FINALIZED)
1390 gomp_mutex_unlock (&devicep->lock);
1391 return NULL;
1394 struct splay_tree_key_s k;
1395 k.host_start = (uintptr_t) host_fn;
1396 k.host_end = k.host_start + 1;
1397 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1398 gomp_mutex_unlock (&devicep->lock);
1399 if (tgt_fn == NULL)
1400 return NULL;
1402 return (void *) tgt_fn->tgt_offset;
1406 /* Called when encountering a target directive. If DEVICE
1407 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1408 GOMP_DEVICE_HOST_FALLBACK (or any value
1409 larger than last available hw device), use host fallback.
1410 FN is address of host code, UNUSED is part of the current ABI, but
1411 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1412 with MAPNUM entries, with addresses of the host objects,
1413 sizes of the host objects (resp. for pointer kind pointer bias
1414 and assumed sizeof (void *) size) and kinds. */
1416 void
1417 GOMP_target (int device, void (*fn) (void *), const void *unused,
1418 size_t mapnum, void **hostaddrs, size_t *sizes,
1419 unsigned char *kinds)
1421 struct gomp_device_descr *devicep = resolve_device (device);
1423 void *fn_addr;
1424 if (devicep == NULL
1425 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1426 /* All shared memory devices should use the GOMP_target_ext function. */
1427 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1428 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1429 return gomp_target_fallback (fn, hostaddrs);
1431 struct target_mem_desc *tgt_vars
1432 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1433 GOMP_MAP_VARS_TARGET);
1434 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1435 NULL);
1436 gomp_unmap_vars (tgt_vars, true);
1439 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1440 and several arguments have been added:
1441 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1442 DEPEND is array of dependencies, see GOMP_task for details.
1444 ARGS is a pointer to an array consisting of a variable number of both
1445 device-independent and device-specific arguments, which can take one two
1446 elements where the first specifies for which device it is intended, the type
1447 and optionally also the value. If the value is not present in the first
1448 one, the whole second element the actual value. The last element of the
1449 array is a single NULL. Among the device independent can be for example
1450 NUM_TEAMS and THREAD_LIMIT.
1452 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1453 that value, or 1 if teams construct is not present, or 0, if
1454 teams construct does not have num_teams clause and so the choice is
1455 implementation defined, and -1 if it can't be determined on the host
1456 what value will GOMP_teams have on the device.
1457 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1458 body with that value, or 0, if teams construct does not have thread_limit
1459 clause or the teams construct is not present, or -1 if it can't be
1460 determined on the host what value will GOMP_teams have on the device. */
1462 void
1463 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1464 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1465 unsigned int flags, void **depend, void **args)
1467 struct gomp_device_descr *devicep = resolve_device (device);
1468 size_t tgt_align = 0, tgt_size = 0;
1469 bool fpc_done = false;
1471 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1473 struct gomp_thread *thr = gomp_thread ();
1474 /* Create a team if we don't have any around, as nowait
1475 target tasks make sense to run asynchronously even when
1476 outside of any parallel. */
1477 if (__builtin_expect (thr->ts.team == NULL, 0))
1479 struct gomp_team *team = gomp_new_team (1);
1480 struct gomp_task *task = thr->task;
1481 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1482 team->prev_ts = thr->ts;
1483 thr->ts.team = team;
1484 thr->ts.team_id = 0;
1485 thr->ts.work_share = &team->work_shares[0];
1486 thr->ts.last_work_share = NULL;
1487 #ifdef HAVE_SYNC_BUILTINS
1488 thr->ts.single_count = 0;
1489 #endif
1490 thr->ts.static_trip = 0;
1491 thr->task = &team->implicit_task[0];
1492 gomp_init_task (thr->task, NULL, icv);
1493 if (task)
1495 thr->task = task;
1496 gomp_end_task ();
1497 free (task);
1498 thr->task = &team->implicit_task[0];
1500 else
1501 pthread_setspecific (gomp_thread_destructor, thr);
1503 if (thr->ts.team
1504 && !thr->task->final_task)
1506 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1507 sizes, kinds, flags, depend, args,
1508 GOMP_TARGET_TASK_BEFORE_MAP);
1509 return;
1513 /* If there are depend clauses, but nowait is not present
1514 (or we are in a final task), block the parent task until the
1515 dependencies are resolved and then just continue with the rest
1516 of the function as if it is a merged task. */
1517 if (depend != NULL)
1519 struct gomp_thread *thr = gomp_thread ();
1520 if (thr->task && thr->task->depend_hash)
1522 /* If we might need to wait, copy firstprivate now. */
1523 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1524 &tgt_align, &tgt_size);
1525 if (tgt_align)
1527 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1528 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1529 tgt_align, tgt_size);
1531 fpc_done = true;
1532 gomp_task_maybe_wait_for_dependencies (depend);
1536 void *fn_addr;
1537 if (devicep == NULL
1538 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1539 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1540 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1542 if (!fpc_done)
1544 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1545 &tgt_align, &tgt_size);
1546 if (tgt_align)
1548 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1549 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1550 tgt_align, tgt_size);
1553 gomp_target_fallback (fn, hostaddrs);
1554 return;
1557 struct target_mem_desc *tgt_vars;
1558 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1560 if (!fpc_done)
1562 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1563 &tgt_align, &tgt_size);
1564 if (tgt_align)
1566 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1567 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1568 tgt_align, tgt_size);
1571 tgt_vars = NULL;
1573 else
1574 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1575 true, GOMP_MAP_VARS_TARGET);
1576 devicep->run_func (devicep->target_id, fn_addr,
1577 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1578 args);
1579 if (tgt_vars)
1580 gomp_unmap_vars (tgt_vars, true);
1583 /* Host fallback for GOMP_target_data{,_ext} routines. */
1585 static void
1586 gomp_target_data_fallback (void)
1588 struct gomp_task_icv *icv = gomp_icv (false);
1589 if (icv->target_data)
1591 /* Even when doing a host fallback, if there are any active
1592 #pragma omp target data constructs, need to remember the
1593 new #pragma omp target data, otherwise GOMP_target_end_data
1594 would get out of sync. */
1595 struct target_mem_desc *tgt
1596 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1597 GOMP_MAP_VARS_DATA);
1598 tgt->prev = icv->target_data;
1599 icv->target_data = tgt;
1603 void
1604 GOMP_target_data (int device, const void *unused, size_t mapnum,
1605 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1607 struct gomp_device_descr *devicep = resolve_device (device);
1609 if (devicep == NULL
1610 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1611 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1612 return gomp_target_data_fallback ();
1614 struct target_mem_desc *tgt
1615 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1616 GOMP_MAP_VARS_DATA);
1617 struct gomp_task_icv *icv = gomp_icv (true);
1618 tgt->prev = icv->target_data;
1619 icv->target_data = tgt;
1622 void
1623 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1624 size_t *sizes, unsigned short *kinds)
1626 struct gomp_device_descr *devicep = resolve_device (device);
1628 if (devicep == NULL
1629 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1630 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1631 return gomp_target_data_fallback ();
1633 struct target_mem_desc *tgt
1634 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1635 GOMP_MAP_VARS_DATA);
1636 struct gomp_task_icv *icv = gomp_icv (true);
1637 tgt->prev = icv->target_data;
1638 icv->target_data = tgt;
1641 void
1642 GOMP_target_end_data (void)
1644 struct gomp_task_icv *icv = gomp_icv (false);
1645 if (icv->target_data)
1647 struct target_mem_desc *tgt = icv->target_data;
1648 icv->target_data = tgt->prev;
1649 gomp_unmap_vars (tgt, true);
1653 void
1654 GOMP_target_update (int device, const void *unused, size_t mapnum,
1655 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1657 struct gomp_device_descr *devicep = resolve_device (device);
1659 if (devicep == NULL
1660 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1661 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1662 return;
1664 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1667 void
1668 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1669 size_t *sizes, unsigned short *kinds,
1670 unsigned int flags, void **depend)
1672 struct gomp_device_descr *devicep = resolve_device (device);
1674 /* If there are depend clauses, but nowait is not present,
1675 block the parent task until the dependencies are resolved
1676 and then just continue with the rest of the function as if it
1677 is a merged task. Until we are able to schedule task during
1678 variable mapping or unmapping, ignore nowait if depend clauses
1679 are not present. */
1680 if (depend != NULL)
1682 struct gomp_thread *thr = gomp_thread ();
1683 if (thr->task && thr->task->depend_hash)
1685 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1686 && thr->ts.team
1687 && !thr->task->final_task)
1689 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1690 mapnum, hostaddrs, sizes, kinds,
1691 flags | GOMP_TARGET_FLAG_UPDATE,
1692 depend, NULL, GOMP_TARGET_TASK_DATA))
1693 return;
1695 else
1697 struct gomp_team *team = thr->ts.team;
1698 /* If parallel or taskgroup has been cancelled, don't start new
1699 tasks. */
1700 if (team
1701 && (gomp_team_barrier_cancelled (&team->barrier)
1702 || (thr->task->taskgroup
1703 && thr->task->taskgroup->cancelled)))
1704 return;
1706 gomp_task_maybe_wait_for_dependencies (depend);
1711 if (devicep == NULL
1712 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1713 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1714 return;
1716 struct gomp_thread *thr = gomp_thread ();
1717 struct gomp_team *team = thr->ts.team;
1718 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1719 if (team
1720 && (gomp_team_barrier_cancelled (&team->barrier)
1721 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1722 return;
1724 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1727 static void
1728 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1729 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1731 const int typemask = 0xff;
1732 size_t i;
1733 gomp_mutex_lock (&devicep->lock);
1734 if (devicep->state == GOMP_DEVICE_FINALIZED)
1736 gomp_mutex_unlock (&devicep->lock);
1737 return;
1740 for (i = 0; i < mapnum; i++)
1742 struct splay_tree_key_s cur_node;
1743 unsigned char kind = kinds[i] & typemask;
1744 switch (kind)
1746 case GOMP_MAP_FROM:
1747 case GOMP_MAP_ALWAYS_FROM:
1748 case GOMP_MAP_DELETE:
1749 case GOMP_MAP_RELEASE:
1750 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1751 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1752 cur_node.host_start = (uintptr_t) hostaddrs[i];
1753 cur_node.host_end = cur_node.host_start + sizes[i];
1754 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1755 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1756 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1757 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1758 if (!k)
1759 continue;
1761 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1762 k->refcount--;
1763 if ((kind == GOMP_MAP_DELETE
1764 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1765 && k->refcount != REFCOUNT_INFINITY)
1766 k->refcount = 0;
1768 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1769 || kind == GOMP_MAP_ALWAYS_FROM)
1770 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1771 (void *) (k->tgt->tgt_start + k->tgt_offset
1772 + cur_node.host_start
1773 - k->host_start),
1774 cur_node.host_end - cur_node.host_start);
1775 if (k->refcount == 0)
1777 splay_tree_remove (&devicep->mem_map, k);
1778 if (k->link_key)
1779 splay_tree_insert (&devicep->mem_map,
1780 (splay_tree_node) k->link_key);
1781 if (k->tgt->refcount > 1)
1782 k->tgt->refcount--;
1783 else
1784 gomp_unmap_tgt (k->tgt);
1787 break;
1788 default:
1789 gomp_mutex_unlock (&devicep->lock);
1790 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1791 kind);
1795 gomp_mutex_unlock (&devicep->lock);
1798 void
1799 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1800 size_t *sizes, unsigned short *kinds,
1801 unsigned int flags, void **depend)
1803 struct gomp_device_descr *devicep = resolve_device (device);
1805 /* If there are depend clauses, but nowait is not present,
1806 block the parent task until the dependencies are resolved
1807 and then just continue with the rest of the function as if it
1808 is a merged task. Until we are able to schedule task during
1809 variable mapping or unmapping, ignore nowait if depend clauses
1810 are not present. */
1811 if (depend != NULL)
1813 struct gomp_thread *thr = gomp_thread ();
1814 if (thr->task && thr->task->depend_hash)
1816 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1817 && thr->ts.team
1818 && !thr->task->final_task)
1820 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1821 mapnum, hostaddrs, sizes, kinds,
1822 flags, depend, NULL,
1823 GOMP_TARGET_TASK_DATA))
1824 return;
1826 else
1828 struct gomp_team *team = thr->ts.team;
1829 /* If parallel or taskgroup has been cancelled, don't start new
1830 tasks. */
1831 if (team
1832 && (gomp_team_barrier_cancelled (&team->barrier)
1833 || (thr->task->taskgroup
1834 && thr->task->taskgroup->cancelled)))
1835 return;
1837 gomp_task_maybe_wait_for_dependencies (depend);
1842 if (devicep == NULL
1843 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1844 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1845 return;
1847 struct gomp_thread *thr = gomp_thread ();
1848 struct gomp_team *team = thr->ts.team;
1849 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1850 if (team
1851 && (gomp_team_barrier_cancelled (&team->barrier)
1852 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1853 return;
1855 size_t i;
1856 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1857 for (i = 0; i < mapnum; i++)
1858 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1860 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
1861 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1862 i += sizes[i];
1864 else
1865 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
1866 true, GOMP_MAP_VARS_ENTER_DATA);
1867 else
1868 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
1871 bool
1872 gomp_target_task_fn (void *data)
1874 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
1875 struct gomp_device_descr *devicep = ttask->devicep;
1877 if (ttask->fn != NULL)
1879 void *fn_addr;
1880 if (devicep == NULL
1881 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1882 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
1883 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1885 ttask->state = GOMP_TARGET_TASK_FALLBACK;
1886 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
1887 return false;
1890 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1892 if (ttask->tgt)
1893 gomp_unmap_vars (ttask->tgt, true);
1894 return false;
1897 void *actual_arguments;
1898 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1900 ttask->tgt = NULL;
1901 actual_arguments = ttask->hostaddrs;
1903 else
1905 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
1906 NULL, ttask->sizes, ttask->kinds, true,
1907 GOMP_MAP_VARS_TARGET);
1908 actual_arguments = (void *) ttask->tgt->tgt_start;
1910 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
1912 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
1913 ttask->args, (void *) ttask);
1914 return true;
1916 else if (devicep == NULL
1917 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1918 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1919 return false;
1921 size_t i;
1922 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
1923 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1924 ttask->kinds, true);
1925 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1926 for (i = 0; i < ttask->mapnum; i++)
1927 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1929 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
1930 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
1931 GOMP_MAP_VARS_ENTER_DATA);
1932 i += ttask->sizes[i];
1934 else
1935 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
1936 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1937 else
1938 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1939 ttask->kinds);
1940 return false;
1943 void
1944 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1946 if (thread_limit)
1948 struct gomp_task_icv *icv = gomp_icv (true);
1949 icv->thread_limit_var
1950 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1952 (void) num_teams;
1955 void *
1956 omp_target_alloc (size_t size, int device_num)
1958 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1959 return malloc (size);
1961 if (device_num < 0)
1962 return NULL;
1964 struct gomp_device_descr *devicep = resolve_device (device_num);
1965 if (devicep == NULL)
1966 return NULL;
1968 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1969 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1970 return malloc (size);
1972 gomp_mutex_lock (&devicep->lock);
1973 void *ret = devicep->alloc_func (devicep->target_id, size);
1974 gomp_mutex_unlock (&devicep->lock);
1975 return ret;
1978 void
1979 omp_target_free (void *device_ptr, int device_num)
1981 if (device_ptr == NULL)
1982 return;
1984 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1986 free (device_ptr);
1987 return;
1990 if (device_num < 0)
1991 return;
1993 struct gomp_device_descr *devicep = resolve_device (device_num);
1994 if (devicep == NULL)
1995 return;
1997 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1998 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2000 free (device_ptr);
2001 return;
2004 gomp_mutex_lock (&devicep->lock);
2005 gomp_free_device_memory (devicep, device_ptr);
2006 gomp_mutex_unlock (&devicep->lock);
2010 omp_target_is_present (void *ptr, int device_num)
2012 if (ptr == NULL)
2013 return 1;
2015 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2016 return 1;
2018 if (device_num < 0)
2019 return 0;
2021 struct gomp_device_descr *devicep = resolve_device (device_num);
2022 if (devicep == NULL)
2023 return 0;
2025 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2026 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2027 return 1;
2029 gomp_mutex_lock (&devicep->lock);
2030 struct splay_tree_s *mem_map = &devicep->mem_map;
2031 struct splay_tree_key_s cur_node;
2033 cur_node.host_start = (uintptr_t) ptr;
2034 cur_node.host_end = cur_node.host_start;
2035 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2036 int ret = n != NULL;
2037 gomp_mutex_unlock (&devicep->lock);
2038 return ret;
2042 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2043 size_t src_offset, int dst_device_num, int src_device_num)
2045 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2046 bool ret;
2048 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2050 if (dst_device_num < 0)
2051 return EINVAL;
2053 dst_devicep = resolve_device (dst_device_num);
2054 if (dst_devicep == NULL)
2055 return EINVAL;
2057 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2058 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2059 dst_devicep = NULL;
2061 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2063 if (src_device_num < 0)
2064 return EINVAL;
2066 src_devicep = resolve_device (src_device_num);
2067 if (src_devicep == NULL)
2068 return EINVAL;
2070 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2071 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2072 src_devicep = NULL;
2074 if (src_devicep == NULL && dst_devicep == NULL)
2076 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2077 return 0;
2079 if (src_devicep == NULL)
2081 gomp_mutex_lock (&dst_devicep->lock);
2082 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2083 (char *) dst + dst_offset,
2084 (char *) src + src_offset, length);
2085 gomp_mutex_unlock (&dst_devicep->lock);
2086 return (ret ? 0 : EINVAL);
2088 if (dst_devicep == NULL)
2090 gomp_mutex_lock (&src_devicep->lock);
2091 ret = src_devicep->dev2host_func (src_devicep->target_id,
2092 (char *) dst + dst_offset,
2093 (char *) src + src_offset, length);
2094 gomp_mutex_unlock (&src_devicep->lock);
2095 return (ret ? 0 : EINVAL);
2097 if (src_devicep == dst_devicep)
2099 gomp_mutex_lock (&src_devicep->lock);
2100 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2101 (char *) dst + dst_offset,
2102 (char *) src + src_offset, length);
2103 gomp_mutex_unlock (&src_devicep->lock);
2104 return (ret ? 0 : EINVAL);
2106 return EINVAL;
2109 static int
2110 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2111 int num_dims, const size_t *volume,
2112 const size_t *dst_offsets,
2113 const size_t *src_offsets,
2114 const size_t *dst_dimensions,
2115 const size_t *src_dimensions,
2116 struct gomp_device_descr *dst_devicep,
2117 struct gomp_device_descr *src_devicep)
2119 size_t dst_slice = element_size;
2120 size_t src_slice = element_size;
2121 size_t j, dst_off, src_off, length;
2122 int i, ret;
2124 if (num_dims == 1)
2126 if (__builtin_mul_overflow (element_size, volume[0], &length)
2127 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2128 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2129 return EINVAL;
2130 if (dst_devicep == NULL && src_devicep == NULL)
2132 memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2133 ret = 1;
2135 else if (src_devicep == NULL)
2136 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2137 (char *) dst + dst_off,
2138 (char *) src + src_off, length);
2139 else if (dst_devicep == NULL)
2140 ret = src_devicep->dev2host_func (src_devicep->target_id,
2141 (char *) dst + dst_off,
2142 (char *) src + src_off, length);
2143 else if (src_devicep == dst_devicep)
2144 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2145 (char *) dst + dst_off,
2146 (char *) src + src_off, length);
2147 else
2148 ret = 0;
2149 return ret ? 0 : EINVAL;
2152 /* FIXME: it would be nice to have some plugin function to handle
2153 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2154 be handled in the generic recursion below, and for host-host it
2155 should be used even for any num_dims >= 2. */
2157 for (i = 1; i < num_dims; i++)
2158 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2159 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2160 return EINVAL;
2161 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2162 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2163 return EINVAL;
2164 for (j = 0; j < volume[0]; j++)
2166 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2167 (char *) src + src_off,
2168 element_size, num_dims - 1,
2169 volume + 1, dst_offsets + 1,
2170 src_offsets + 1, dst_dimensions + 1,
2171 src_dimensions + 1, dst_devicep,
2172 src_devicep);
2173 if (ret)
2174 return ret;
2175 dst_off += dst_slice;
2176 src_off += src_slice;
2178 return 0;
2182 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2183 int num_dims, const size_t *volume,
2184 const size_t *dst_offsets,
2185 const size_t *src_offsets,
2186 const size_t *dst_dimensions,
2187 const size_t *src_dimensions,
2188 int dst_device_num, int src_device_num)
2190 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2192 if (!dst && !src)
2193 return INT_MAX;
2195 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2197 if (dst_device_num < 0)
2198 return EINVAL;
2200 dst_devicep = resolve_device (dst_device_num);
2201 if (dst_devicep == NULL)
2202 return EINVAL;
2204 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2205 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2206 dst_devicep = NULL;
2208 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2210 if (src_device_num < 0)
2211 return EINVAL;
2213 src_devicep = resolve_device (src_device_num);
2214 if (src_devicep == NULL)
2215 return EINVAL;
2217 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2218 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2219 src_devicep = NULL;
2222 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2223 return EINVAL;
2225 if (src_devicep)
2226 gomp_mutex_lock (&src_devicep->lock);
2227 else if (dst_devicep)
2228 gomp_mutex_lock (&dst_devicep->lock);
2229 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2230 volume, dst_offsets, src_offsets,
2231 dst_dimensions, src_dimensions,
2232 dst_devicep, src_devicep);
2233 if (src_devicep)
2234 gomp_mutex_unlock (&src_devicep->lock);
2235 else if (dst_devicep)
2236 gomp_mutex_unlock (&dst_devicep->lock);
2237 return ret;
2241 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2242 size_t device_offset, int device_num)
2244 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2245 return EINVAL;
2247 if (device_num < 0)
2248 return EINVAL;
2250 struct gomp_device_descr *devicep = resolve_device (device_num);
2251 if (devicep == NULL)
2252 return EINVAL;
2254 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2255 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2256 return EINVAL;
2258 gomp_mutex_lock (&devicep->lock);
2260 struct splay_tree_s *mem_map = &devicep->mem_map;
2261 struct splay_tree_key_s cur_node;
2262 int ret = EINVAL;
2264 cur_node.host_start = (uintptr_t) host_ptr;
2265 cur_node.host_end = cur_node.host_start + size;
2266 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2267 if (n)
2269 if (n->tgt->tgt_start + n->tgt_offset
2270 == (uintptr_t) device_ptr + device_offset
2271 && n->host_start <= cur_node.host_start
2272 && n->host_end >= cur_node.host_end)
2273 ret = 0;
2275 else
2277 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2278 tgt->array = gomp_malloc (sizeof (*tgt->array));
2279 tgt->refcount = 1;
2280 tgt->tgt_start = 0;
2281 tgt->tgt_end = 0;
2282 tgt->to_free = NULL;
2283 tgt->prev = NULL;
2284 tgt->list_count = 0;
2285 tgt->device_descr = devicep;
2286 splay_tree_node array = tgt->array;
2287 splay_tree_key k = &array->key;
2288 k->host_start = cur_node.host_start;
2289 k->host_end = cur_node.host_end;
2290 k->tgt = tgt;
2291 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2292 k->refcount = REFCOUNT_INFINITY;
2293 array->left = NULL;
2294 array->right = NULL;
2295 splay_tree_insert (&devicep->mem_map, array);
2296 ret = 0;
2298 gomp_mutex_unlock (&devicep->lock);
2299 return ret;
2303 omp_target_disassociate_ptr (void *ptr, int device_num)
2305 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2306 return EINVAL;
2308 if (device_num < 0)
2309 return EINVAL;
2311 struct gomp_device_descr *devicep = resolve_device (device_num);
2312 if (devicep == NULL)
2313 return EINVAL;
2315 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2316 return EINVAL;
2318 gomp_mutex_lock (&devicep->lock);
2320 struct splay_tree_s *mem_map = &devicep->mem_map;
2321 struct splay_tree_key_s cur_node;
2322 int ret = EINVAL;
2324 cur_node.host_start = (uintptr_t) ptr;
2325 cur_node.host_end = cur_node.host_start;
2326 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2327 if (n
2328 && n->host_start == cur_node.host_start
2329 && n->refcount == REFCOUNT_INFINITY
2330 && n->tgt->tgt_start == 0
2331 && n->tgt->to_free == NULL
2332 && n->tgt->refcount == 1
2333 && n->tgt->list_count == 0)
2335 splay_tree_remove (&devicep->mem_map, n);
2336 gomp_unmap_tgt (n->tgt);
2337 ret = 0;
2340 gomp_mutex_unlock (&devicep->lock);
2341 return ret;
2344 #ifdef PLUGIN_SUPPORT
2346 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2347 in PLUGIN_NAME.
2348 The handles of the found functions are stored in the corresponding fields
2349 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2351 static bool
2352 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2353 const char *plugin_name)
2355 const char *err = NULL, *last_missing = NULL;
2357 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2358 if (!plugin_handle)
2359 goto dl_fail;
2361 /* Check if all required functions are available in the plugin and store
2362 their handlers. None of the symbols can legitimately be NULL,
2363 so we don't need to check dlerror all the time. */
2364 #define DLSYM(f) \
2365 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2366 goto dl_fail
2367 /* Similar, but missing functions are not an error. Return false if
2368 failed, true otherwise. */
2369 #define DLSYM_OPT(f, n) \
2370 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2371 || (last_missing = #n, 0))
2373 DLSYM (version);
2374 if (device->version_func () != GOMP_VERSION)
2376 err = "plugin version mismatch";
2377 goto fail;
2380 DLSYM (get_name);
2381 DLSYM (get_caps);
2382 DLSYM (get_type);
2383 DLSYM (get_num_devices);
2384 DLSYM (init_device);
2385 DLSYM (fini_device);
2386 DLSYM (load_image);
2387 DLSYM (unload_image);
2388 DLSYM (alloc);
2389 DLSYM (free);
2390 DLSYM (dev2host);
2391 DLSYM (host2dev);
2392 device->capabilities = device->get_caps_func ();
2393 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2395 DLSYM (run);
2396 DLSYM (async_run);
2397 DLSYM_OPT (can_run, can_run);
2398 DLSYM (dev2dev);
2400 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2402 if (!DLSYM_OPT (openacc.exec, openacc_parallel)
2403 || !DLSYM_OPT (openacc.register_async_cleanup,
2404 openacc_register_async_cleanup)
2405 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2406 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2407 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2408 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2409 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2410 || !DLSYM_OPT (openacc.async_wait_all_async,
2411 openacc_async_wait_all_async)
2412 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2413 || !DLSYM_OPT (openacc.create_thread_data,
2414 openacc_create_thread_data)
2415 || !DLSYM_OPT (openacc.destroy_thread_data,
2416 openacc_destroy_thread_data))
2418 /* Require all the OpenACC handlers if we have
2419 GOMP_OFFLOAD_CAP_OPENACC_200. */
2420 err = "plugin missing OpenACC handler function";
2421 goto fail;
2424 unsigned cuda = 0;
2425 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2426 openacc_get_current_cuda_device);
2427 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2428 openacc_get_current_cuda_context);
2429 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
2430 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
2431 if (cuda && cuda != 4)
2433 /* Make sure all the CUDA functions are there if any of them are. */
2434 err = "plugin missing OpenACC CUDA handler function";
2435 goto fail;
2438 #undef DLSYM
2439 #undef DLSYM_OPT
2441 return 1;
2443 dl_fail:
2444 err = dlerror ();
2445 fail:
2446 gomp_error ("while loading %s: %s", plugin_name, err);
2447 if (last_missing)
2448 gomp_error ("missing function was %s", last_missing);
2449 if (plugin_handle)
2450 dlclose (plugin_handle);
2452 return 0;
2455 /* This function finalizes all initialized devices. */
2457 static void
2458 gomp_target_fini (void)
2460 int i;
2461 for (i = 0; i < num_devices; i++)
2463 bool ret = true;
2464 struct gomp_device_descr *devicep = &devices[i];
2465 gomp_mutex_lock (&devicep->lock);
2466 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2468 ret = devicep->fini_device_func (devicep->target_id);
2469 devicep->state = GOMP_DEVICE_FINALIZED;
2471 gomp_mutex_unlock (&devicep->lock);
2472 if (!ret)
2473 gomp_fatal ("device finalization failed");
2477 /* This function initializes the runtime needed for offloading.
2478 It parses the list of offload targets and tries to load the plugins for
2479 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2480 will be set, and the array DEVICES initialized, containing descriptors for
2481 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2482 by the others. */
2484 static void
2485 gomp_target_init (void)
2487 const char *prefix ="libgomp-plugin-";
2488 const char *suffix = SONAME_SUFFIX (1);
2489 const char *cur, *next;
2490 char *plugin_name;
2491 int i, new_num_devices;
2493 num_devices = 0;
2494 devices = NULL;
2496 cur = OFFLOAD_TARGETS;
2497 if (*cur)
2500 struct gomp_device_descr current_device;
2502 next = strchr (cur, ',');
2504 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
2505 + strlen (prefix) + strlen (suffix));
2506 if (!plugin_name)
2508 num_devices = 0;
2509 break;
2512 strcpy (plugin_name, prefix);
2513 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
2514 strcat (plugin_name, suffix);
2516 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2518 new_num_devices = current_device.get_num_devices_func ();
2519 if (new_num_devices >= 1)
2521 /* Augment DEVICES and NUM_DEVICES. */
2523 devices = realloc (devices, (num_devices + new_num_devices)
2524 * sizeof (struct gomp_device_descr));
2525 if (!devices)
2527 num_devices = 0;
2528 free (plugin_name);
2529 break;
2532 current_device.name = current_device.get_name_func ();
2533 /* current_device.capabilities has already been set. */
2534 current_device.type = current_device.get_type_func ();
2535 current_device.mem_map.root = NULL;
2536 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2537 current_device.openacc.data_environ = NULL;
2538 for (i = 0; i < new_num_devices; i++)
2540 current_device.target_id = i;
2541 devices[num_devices] = current_device;
2542 gomp_mutex_init (&devices[num_devices].lock);
2543 num_devices++;
2548 free (plugin_name);
2549 cur = next + 1;
2551 while (next);
2553 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2554 NUM_DEVICES_OPENMP. */
2555 struct gomp_device_descr *devices_s
2556 = malloc (num_devices * sizeof (struct gomp_device_descr));
2557 if (!devices_s)
2559 num_devices = 0;
2560 free (devices);
2561 devices = NULL;
2563 num_devices_openmp = 0;
2564 for (i = 0; i < num_devices; i++)
2565 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2566 devices_s[num_devices_openmp++] = devices[i];
2567 int num_devices_after_openmp = num_devices_openmp;
2568 for (i = 0; i < num_devices; i++)
2569 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2570 devices_s[num_devices_after_openmp++] = devices[i];
2571 free (devices);
2572 devices = devices_s;
2574 for (i = 0; i < num_devices; i++)
2576 /* The 'devices' array can be moved (by the realloc call) until we have
2577 found all the plugins, so registering with the OpenACC runtime (which
2578 takes a copy of the pointer argument) must be delayed until now. */
2579 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2580 goacc_register (&devices[i]);
2583 if (atexit (gomp_target_fini) != 0)
2584 gomp_fatal ("atexit failed");
2587 #else /* PLUGIN_SUPPORT */
2588 /* If dlfcn.h is unavailable we always fallback to host execution.
2589 GOMP_target* routines are just stubs for this case. */
2590 static void
2591 gomp_target_init (void)
2594 #endif /* PLUGIN_SUPPORT */