decltype-pr66548.C: Reinstate correct version.
[official-gcc.git] / libgomp / target.c
blob2e0905effb3b3f56f503dfb60d065011e31ff10c
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
87 if (ret == NULL)
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 return ret;
95 attribute_hidden void
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
101 attribute_hidden int
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 gomp_init_device (&devices[device_id]);
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
128 gomp_mutex_unlock (&devices[device_id].lock);
130 return &devices[device_id];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
170 size_t size)
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
180 static inline void
181 goacc_device_copy_async (struct gomp_device_descr *devicep,
182 bool (*copy_func) (int, void *, const void *, size_t,
183 struct goacc_asyncqueue *),
184 const char *dst, void *dstaddr,
185 const char *src, const void *srcaddr,
186 size_t size, struct goacc_asyncqueue *aq)
188 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190 gomp_mutex_unlock (&devicep->lock);
191 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
192 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
196 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
197 host to device memory transfers. */
199 struct gomp_coalesce_chunk
201 /* The starting and ending point of a coalesced chunk of memory. */
202 size_t start, end;
205 struct gomp_coalesce_buf
207 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
208 it will be copied to the device. */
209 void *buf;
210 struct target_mem_desc *tgt;
211 /* Array with offsets, chunks[i].start is the starting offset and
212 chunks[i].end ending offset relative to tgt->tgt_start device address
213 of chunks which are to be copied to buf and later copied to device. */
214 struct gomp_coalesce_chunk *chunks;
215 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
216 be performed. */
217 long chunk_cnt;
218 /* During construction of chunks array, how many memory regions are within
219 the last chunk. If there is just one memory region for a chunk, we copy
220 it directly to device rather than going through buf. */
221 long use_cnt;
224 /* Maximum size of memory region considered for coalescing. Larger copies
225 are performed directly. */
226 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
228 /* Maximum size of a gap in between regions to consider them being copied
229 within the same chunk. All the device offsets considered are within
230 newly allocated device memory, so it isn't fatal if we copy some padding
231 in between from host to device. The gaps come either from alignment
232 padding or from memory regions which are not supposed to be copied from
233 host to device (e.g. map(alloc:), map(from:) etc.). */
234 #define MAX_COALESCE_BUF_GAP (4 * 1024)
236 /* Add region with device tgt_start relative offset and length to CBUF. */
238 static inline void
239 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
242 return;
243 if (cbuf->chunk_cnt)
245 if (cbuf->chunk_cnt < 0)
246 return;
247 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 cbuf->chunk_cnt = -1;
250 return;
252 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
255 cbuf->use_cnt++;
256 return;
258 /* If the last chunk is only used by one mapping, discard it,
259 as it will be one host to device copy anyway and
260 memcpying it around will only waste cycles. */
261 if (cbuf->use_cnt == 1)
262 cbuf->chunk_cnt--;
264 cbuf->chunks[cbuf->chunk_cnt].start = start;
265 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
266 cbuf->chunk_cnt++;
267 cbuf->use_cnt = 1;
270 /* Return true for mapping kinds which need to copy data from the
271 host to device for regions that weren't previously mapped. */
273 static inline bool
274 gomp_to_device_kind_p (int kind)
276 switch (kind)
278 case GOMP_MAP_ALLOC:
279 case GOMP_MAP_FROM:
280 case GOMP_MAP_FORCE_ALLOC:
281 case GOMP_MAP_ALWAYS_FROM:
282 return false;
283 default:
284 return true;
288 attribute_hidden void
289 gomp_copy_host2dev (struct gomp_device_descr *devicep,
290 struct goacc_asyncqueue *aq,
291 void *d, const void *h, size_t sz,
292 struct gomp_coalesce_buf *cbuf)
294 if (cbuf)
296 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
297 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
299 long first = 0;
300 long last = cbuf->chunk_cnt - 1;
301 while (first <= last)
303 long middle = (first + last) >> 1;
304 if (cbuf->chunks[middle].end <= doff)
305 first = middle + 1;
306 else if (cbuf->chunks[middle].start <= doff)
308 if (doff + sz > cbuf->chunks[middle].end)
309 gomp_fatal ("internal libgomp cbuf error");
310 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
311 h, sz);
312 return;
314 else
315 last = middle - 1;
319 if (__builtin_expect (aq != NULL, 0))
320 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
321 "dev", d, "host", h, sz, aq);
322 else
323 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 attribute_hidden void
327 gomp_copy_dev2host (struct gomp_device_descr *devicep,
328 struct goacc_asyncqueue *aq,
329 void *h, const void *d, size_t sz)
331 if (__builtin_expect (aq != NULL, 0))
332 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
333 "host", h, "dev", d, sz, aq);
334 else
335 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338 static void
339 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
341 if (!devicep->free_func (devicep->target_id, devptr))
343 gomp_mutex_unlock (&devicep->lock);
344 gomp_fatal ("error in freeing device memory block at %p", devptr);
348 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
349 gomp_map_0len_lookup found oldn for newn.
350 Helper function of gomp_map_vars. */
352 static inline void
353 gomp_map_vars_existing (struct gomp_device_descr *devicep,
354 struct goacc_asyncqueue *aq, splay_tree_key oldn,
355 splay_tree_key newn, struct target_var_desc *tgt_var,
356 unsigned char kind, struct gomp_coalesce_buf *cbuf)
358 tgt_var->key = oldn;
359 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
360 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
361 tgt_var->offset = newn->host_start - oldn->host_start;
362 tgt_var->length = newn->host_end - newn->host_start;
364 if ((kind & GOMP_MAP_FLAG_FORCE)
365 || oldn->host_start > newn->host_start
366 || oldn->host_end < newn->host_end)
368 gomp_mutex_unlock (&devicep->lock);
369 gomp_fatal ("Trying to map into device [%p..%p) object when "
370 "[%p..%p) is already mapped",
371 (void *) newn->host_start, (void *) newn->host_end,
372 (void *) oldn->host_start, (void *) oldn->host_end);
375 if (GOMP_MAP_ALWAYS_TO_P (kind))
376 gomp_copy_host2dev (devicep, aq,
377 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
378 + newn->host_start - oldn->host_start),
379 (void *) newn->host_start,
380 newn->host_end - newn->host_start, cbuf);
382 if (oldn->refcount != REFCOUNT_INFINITY)
383 oldn->refcount++;
386 static int
387 get_kind (bool short_mapkind, void *kinds, int idx)
389 return short_mapkind ? ((unsigned short *) kinds)[idx]
390 : ((unsigned char *) kinds)[idx];
393 static void
394 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
395 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
396 struct gomp_coalesce_buf *cbuf)
398 struct gomp_device_descr *devicep = tgt->device_descr;
399 struct splay_tree_s *mem_map = &devicep->mem_map;
400 struct splay_tree_key_s cur_node;
402 cur_node.host_start = host_ptr;
403 if (cur_node.host_start == (uintptr_t) NULL)
405 cur_node.tgt_offset = (uintptr_t) NULL;
406 gomp_copy_host2dev (devicep, aq,
407 (void *) (tgt->tgt_start + target_offset),
408 (void *) &cur_node.tgt_offset,
409 sizeof (void *), cbuf);
410 return;
412 /* Add bias to the pointer value. */
413 cur_node.host_start += bias;
414 cur_node.host_end = cur_node.host_start;
415 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
416 if (n == NULL)
418 gomp_mutex_unlock (&devicep->lock);
419 gomp_fatal ("Pointer target of array section wasn't mapped");
421 cur_node.host_start -= n->host_start;
422 cur_node.tgt_offset
423 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
424 /* At this point tgt_offset is target address of the
425 array section. Now subtract bias to get what we want
426 to initialize the pointer with. */
427 cur_node.tgt_offset -= bias;
428 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
429 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
432 static void
433 gomp_map_fields_existing (struct target_mem_desc *tgt,
434 struct goacc_asyncqueue *aq, splay_tree_key n,
435 size_t first, size_t i, void **hostaddrs,
436 size_t *sizes, void *kinds,
437 struct gomp_coalesce_buf *cbuf)
439 struct gomp_device_descr *devicep = tgt->device_descr;
440 struct splay_tree_s *mem_map = &devicep->mem_map;
441 struct splay_tree_key_s cur_node;
442 int kind;
443 const bool short_mapkind = true;
444 const int typemask = short_mapkind ? 0xff : 0x7;
446 cur_node.host_start = (uintptr_t) hostaddrs[i];
447 cur_node.host_end = cur_node.host_start + sizes[i];
448 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
449 kind = get_kind (short_mapkind, kinds, i);
450 if (n2
451 && n2->tgt == n->tgt
452 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
454 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
455 &tgt->list[i], kind & typemask, cbuf);
456 return;
458 if (sizes[i] == 0)
460 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
462 cur_node.host_start--;
463 n2 = splay_tree_lookup (mem_map, &cur_node);
464 cur_node.host_start++;
465 if (n2
466 && n2->tgt == n->tgt
467 && n2->host_start - n->host_start
468 == n2->tgt_offset - n->tgt_offset)
470 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
471 &tgt->list[i], kind & typemask, cbuf);
472 return;
475 cur_node.host_end++;
476 n2 = splay_tree_lookup (mem_map, &cur_node);
477 cur_node.host_end--;
478 if (n2
479 && n2->tgt == n->tgt
480 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
482 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
483 kind & typemask, cbuf);
484 return;
487 gomp_mutex_unlock (&devicep->lock);
488 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
489 "other mapped elements from the same structure weren't mapped "
490 "together with it", (void *) cur_node.host_start,
491 (void *) cur_node.host_end);
494 static inline uintptr_t
495 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
497 if (tgt->list[i].key != NULL)
498 return tgt->list[i].key->tgt->tgt_start
499 + tgt->list[i].key->tgt_offset
500 + tgt->list[i].offset;
501 if (tgt->list[i].offset == ~(uintptr_t) 0)
502 return (uintptr_t) hostaddrs[i];
503 if (tgt->list[i].offset == ~(uintptr_t) 1)
504 return 0;
505 if (tgt->list[i].offset == ~(uintptr_t) 2)
506 return tgt->list[i + 1].key->tgt->tgt_start
507 + tgt->list[i + 1].key->tgt_offset
508 + tgt->list[i + 1].offset
509 + (uintptr_t) hostaddrs[i]
510 - (uintptr_t) hostaddrs[i + 1];
511 return tgt->tgt_start + tgt->list[i].offset;
514 static inline __attribute__((always_inline)) struct target_mem_desc *
515 gomp_map_vars_internal (struct gomp_device_descr *devicep,
516 struct goacc_asyncqueue *aq, size_t mapnum,
517 void **hostaddrs, void **devaddrs, size_t *sizes,
518 void *kinds, bool short_mapkind,
519 enum gomp_map_vars_kind pragma_kind)
521 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
522 bool has_firstprivate = false;
523 const int rshift = short_mapkind ? 8 : 3;
524 const int typemask = short_mapkind ? 0xff : 0x7;
525 struct splay_tree_s *mem_map = &devicep->mem_map;
526 struct splay_tree_key_s cur_node;
527 struct target_mem_desc *tgt
528 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
529 tgt->list_count = mapnum;
530 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
531 tgt->device_descr = devicep;
532 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
534 if (mapnum == 0)
536 tgt->tgt_start = 0;
537 tgt->tgt_end = 0;
538 return tgt;
541 tgt_align = sizeof (void *);
542 tgt_size = 0;
543 cbuf.chunks = NULL;
544 cbuf.chunk_cnt = -1;
545 cbuf.use_cnt = 0;
546 cbuf.buf = NULL;
547 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
549 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
550 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
551 cbuf.chunk_cnt = 0;
553 if (pragma_kind == GOMP_MAP_VARS_TARGET)
555 size_t align = 4 * sizeof (void *);
556 tgt_align = align;
557 tgt_size = mapnum * sizeof (void *);
558 cbuf.chunk_cnt = 1;
559 cbuf.use_cnt = 1 + (mapnum > 1);
560 cbuf.chunks[0].start = 0;
561 cbuf.chunks[0].end = tgt_size;
564 gomp_mutex_lock (&devicep->lock);
565 if (devicep->state == GOMP_DEVICE_FINALIZED)
567 gomp_mutex_unlock (&devicep->lock);
568 free (tgt);
569 return NULL;
572 for (i = 0; i < mapnum; i++)
574 int kind = get_kind (short_mapkind, kinds, i);
575 if (hostaddrs[i] == NULL
576 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
578 tgt->list[i].key = NULL;
579 tgt->list[i].offset = ~(uintptr_t) 0;
580 continue;
582 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
584 cur_node.host_start = (uintptr_t) hostaddrs[i];
585 cur_node.host_end = cur_node.host_start;
586 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
587 if (n == NULL)
589 gomp_mutex_unlock (&devicep->lock);
590 gomp_fatal ("use_device_ptr pointer wasn't mapped");
592 cur_node.host_start -= n->host_start;
593 hostaddrs[i]
594 = (void *) (n->tgt->tgt_start + n->tgt_offset
595 + cur_node.host_start);
596 tgt->list[i].key = NULL;
597 tgt->list[i].offset = ~(uintptr_t) 0;
598 continue;
600 else if ((kind & typemask) == GOMP_MAP_STRUCT)
602 size_t first = i + 1;
603 size_t last = i + sizes[i];
604 cur_node.host_start = (uintptr_t) hostaddrs[i];
605 cur_node.host_end = (uintptr_t) hostaddrs[last]
606 + sizes[last];
607 tgt->list[i].key = NULL;
608 tgt->list[i].offset = ~(uintptr_t) 2;
609 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
610 if (n == NULL)
612 size_t align = (size_t) 1 << (kind >> rshift);
613 if (tgt_align < align)
614 tgt_align = align;
615 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
616 tgt_size = (tgt_size + align - 1) & ~(align - 1);
617 tgt_size += cur_node.host_end - cur_node.host_start;
618 not_found_cnt += last - i;
619 for (i = first; i <= last; i++)
621 tgt->list[i].key = NULL;
622 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
623 & typemask))
624 gomp_coalesce_buf_add (&cbuf,
625 tgt_size - cur_node.host_end
626 + (uintptr_t) hostaddrs[i],
627 sizes[i]);
629 i--;
630 continue;
632 for (i = first; i <= last; i++)
633 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
634 sizes, kinds, NULL);
635 i--;
636 continue;
638 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
640 tgt->list[i].key = NULL;
641 tgt->list[i].offset = ~(uintptr_t) 1;
642 has_firstprivate = true;
643 continue;
645 cur_node.host_start = (uintptr_t) hostaddrs[i];
646 if (!GOMP_MAP_POINTER_P (kind & typemask))
647 cur_node.host_end = cur_node.host_start + sizes[i];
648 else
649 cur_node.host_end = cur_node.host_start + sizeof (void *);
650 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
652 tgt->list[i].key = NULL;
654 size_t align = (size_t) 1 << (kind >> rshift);
655 if (tgt_align < align)
656 tgt_align = align;
657 tgt_size = (tgt_size + align - 1) & ~(align - 1);
658 gomp_coalesce_buf_add (&cbuf, tgt_size,
659 cur_node.host_end - cur_node.host_start);
660 tgt_size += cur_node.host_end - cur_node.host_start;
661 has_firstprivate = true;
662 continue;
664 splay_tree_key n;
665 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
667 n = gomp_map_0len_lookup (mem_map, &cur_node);
668 if (!n)
670 tgt->list[i].key = NULL;
671 tgt->list[i].offset = ~(uintptr_t) 1;
672 continue;
675 else
676 n = splay_tree_lookup (mem_map, &cur_node);
677 if (n && n->refcount != REFCOUNT_LINK)
678 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
679 kind & typemask, NULL);
680 else
682 tgt->list[i].key = NULL;
684 size_t align = (size_t) 1 << (kind >> rshift);
685 not_found_cnt++;
686 if (tgt_align < align)
687 tgt_align = align;
688 tgt_size = (tgt_size + align - 1) & ~(align - 1);
689 if (gomp_to_device_kind_p (kind & typemask))
690 gomp_coalesce_buf_add (&cbuf, tgt_size,
691 cur_node.host_end - cur_node.host_start);
692 tgt_size += cur_node.host_end - cur_node.host_start;
693 if ((kind & typemask) == GOMP_MAP_TO_PSET)
695 size_t j;
696 for (j = i + 1; j < mapnum; j++)
697 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
698 & typemask))
699 break;
700 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
701 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
702 > cur_node.host_end))
703 break;
704 else
706 tgt->list[j].key = NULL;
707 i++;
713 if (devaddrs)
715 if (mapnum != 1)
717 gomp_mutex_unlock (&devicep->lock);
718 gomp_fatal ("unexpected aggregation");
720 tgt->to_free = devaddrs[0];
721 tgt->tgt_start = (uintptr_t) tgt->to_free;
722 tgt->tgt_end = tgt->tgt_start + sizes[0];
724 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
726 /* Allocate tgt_align aligned tgt_size block of memory. */
727 /* FIXME: Perhaps change interface to allocate properly aligned
728 memory. */
729 tgt->to_free = devicep->alloc_func (devicep->target_id,
730 tgt_size + tgt_align - 1);
731 if (!tgt->to_free)
733 gomp_mutex_unlock (&devicep->lock);
734 gomp_fatal ("device memory allocation fail");
737 tgt->tgt_start = (uintptr_t) tgt->to_free;
738 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
739 tgt->tgt_end = tgt->tgt_start + tgt_size;
741 if (cbuf.use_cnt == 1)
742 cbuf.chunk_cnt--;
743 if (cbuf.chunk_cnt > 0)
745 cbuf.buf
746 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
747 if (cbuf.buf)
749 cbuf.tgt = tgt;
750 cbufp = &cbuf;
754 else
756 tgt->to_free = NULL;
757 tgt->tgt_start = 0;
758 tgt->tgt_end = 0;
761 tgt_size = 0;
762 if (pragma_kind == GOMP_MAP_VARS_TARGET)
763 tgt_size = mapnum * sizeof (void *);
765 tgt->array = NULL;
766 if (not_found_cnt || has_firstprivate)
768 if (not_found_cnt)
769 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
770 splay_tree_node array = tgt->array;
771 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
772 uintptr_t field_tgt_base = 0;
774 for (i = 0; i < mapnum; i++)
775 if (tgt->list[i].key == NULL)
777 int kind = get_kind (short_mapkind, kinds, i);
778 if (hostaddrs[i] == NULL)
779 continue;
780 switch (kind & typemask)
782 size_t align, len, first, last;
783 splay_tree_key n;
784 case GOMP_MAP_FIRSTPRIVATE:
785 align = (size_t) 1 << (kind >> rshift);
786 tgt_size = (tgt_size + align - 1) & ~(align - 1);
787 tgt->list[i].offset = tgt_size;
788 len = sizes[i];
789 gomp_copy_host2dev (devicep, aq,
790 (void *) (tgt->tgt_start + tgt_size),
791 (void *) hostaddrs[i], len, cbufp);
792 tgt_size += len;
793 continue;
794 case GOMP_MAP_FIRSTPRIVATE_INT:
795 case GOMP_MAP_USE_DEVICE_PTR:
796 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
797 continue;
798 case GOMP_MAP_STRUCT:
799 first = i + 1;
800 last = i + sizes[i];
801 cur_node.host_start = (uintptr_t) hostaddrs[i];
802 cur_node.host_end = (uintptr_t) hostaddrs[last]
803 + sizes[last];
804 if (tgt->list[first].key != NULL)
805 continue;
806 n = splay_tree_lookup (mem_map, &cur_node);
807 if (n == NULL)
809 size_t align = (size_t) 1 << (kind >> rshift);
810 tgt_size -= (uintptr_t) hostaddrs[first]
811 - (uintptr_t) hostaddrs[i];
812 tgt_size = (tgt_size + align - 1) & ~(align - 1);
813 tgt_size += (uintptr_t) hostaddrs[first]
814 - (uintptr_t) hostaddrs[i];
815 field_tgt_base = (uintptr_t) hostaddrs[first];
816 field_tgt_offset = tgt_size;
817 field_tgt_clear = last;
818 tgt_size += cur_node.host_end
819 - (uintptr_t) hostaddrs[first];
820 continue;
822 for (i = first; i <= last; i++)
823 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
824 sizes, kinds, cbufp);
825 i--;
826 continue;
827 case GOMP_MAP_ALWAYS_POINTER:
828 cur_node.host_start = (uintptr_t) hostaddrs[i];
829 cur_node.host_end = cur_node.host_start + sizeof (void *);
830 n = splay_tree_lookup (mem_map, &cur_node);
831 if (n == NULL
832 || n->host_start > cur_node.host_start
833 || n->host_end < cur_node.host_end)
835 gomp_mutex_unlock (&devicep->lock);
836 gomp_fatal ("always pointer not mapped");
838 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
839 != GOMP_MAP_ALWAYS_POINTER)
840 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
841 if (cur_node.tgt_offset)
842 cur_node.tgt_offset -= sizes[i];
843 gomp_copy_host2dev (devicep, aq,
844 (void *) (n->tgt->tgt_start
845 + n->tgt_offset
846 + cur_node.host_start
847 - n->host_start),
848 (void *) &cur_node.tgt_offset,
849 sizeof (void *), cbufp);
850 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
851 + cur_node.host_start - n->host_start;
852 continue;
853 default:
854 break;
856 splay_tree_key k = &array->key;
857 k->host_start = (uintptr_t) hostaddrs[i];
858 if (!GOMP_MAP_POINTER_P (kind & typemask))
859 k->host_end = k->host_start + sizes[i];
860 else
861 k->host_end = k->host_start + sizeof (void *);
862 splay_tree_key n = splay_tree_lookup (mem_map, k);
863 if (n && n->refcount != REFCOUNT_LINK)
864 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
865 kind & typemask, cbufp);
866 else
868 k->link_key = NULL;
869 if (n && n->refcount == REFCOUNT_LINK)
871 /* Replace target address of the pointer with target address
872 of mapped object in the splay tree. */
873 splay_tree_remove (mem_map, n);
874 k->link_key = n;
876 size_t align = (size_t) 1 << (kind >> rshift);
877 tgt->list[i].key = k;
878 k->tgt = tgt;
879 if (field_tgt_clear != ~(size_t) 0)
881 k->tgt_offset = k->host_start - field_tgt_base
882 + field_tgt_offset;
883 if (i == field_tgt_clear)
884 field_tgt_clear = ~(size_t) 0;
886 else
888 tgt_size = (tgt_size + align - 1) & ~(align - 1);
889 k->tgt_offset = tgt_size;
890 tgt_size += k->host_end - k->host_start;
892 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
893 tgt->list[i].always_copy_from
894 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
895 tgt->list[i].offset = 0;
896 tgt->list[i].length = k->host_end - k->host_start;
897 k->refcount = 1;
898 k->dynamic_refcount = 0;
899 tgt->refcount++;
900 array->left = NULL;
901 array->right = NULL;
902 splay_tree_insert (mem_map, array);
903 switch (kind & typemask)
905 case GOMP_MAP_ALLOC:
906 case GOMP_MAP_FROM:
907 case GOMP_MAP_FORCE_ALLOC:
908 case GOMP_MAP_FORCE_FROM:
909 case GOMP_MAP_ALWAYS_FROM:
910 break;
911 case GOMP_MAP_TO:
912 case GOMP_MAP_TOFROM:
913 case GOMP_MAP_FORCE_TO:
914 case GOMP_MAP_FORCE_TOFROM:
915 case GOMP_MAP_ALWAYS_TO:
916 case GOMP_MAP_ALWAYS_TOFROM:
917 gomp_copy_host2dev (devicep, aq,
918 (void *) (tgt->tgt_start
919 + k->tgt_offset),
920 (void *) k->host_start,
921 k->host_end - k->host_start, cbufp);
922 break;
923 case GOMP_MAP_POINTER:
924 gomp_map_pointer (tgt, aq,
925 (uintptr_t) *(void **) k->host_start,
926 k->tgt_offset, sizes[i], cbufp);
927 break;
928 case GOMP_MAP_TO_PSET:
929 gomp_copy_host2dev (devicep, aq,
930 (void *) (tgt->tgt_start
931 + k->tgt_offset),
932 (void *) k->host_start,
933 k->host_end - k->host_start, cbufp);
935 for (j = i + 1; j < mapnum; j++)
936 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
938 & typemask))
939 break;
940 else if ((uintptr_t) hostaddrs[j] < k->host_start
941 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
942 > k->host_end))
943 break;
944 else
946 tgt->list[j].key = k;
947 tgt->list[j].copy_from = false;
948 tgt->list[j].always_copy_from = false;
949 if (k->refcount != REFCOUNT_INFINITY)
950 k->refcount++;
951 gomp_map_pointer (tgt, aq,
952 (uintptr_t) *(void **) hostaddrs[j],
953 k->tgt_offset
954 + ((uintptr_t) hostaddrs[j]
955 - k->host_start),
956 sizes[j], cbufp);
957 i++;
959 break;
960 case GOMP_MAP_FORCE_PRESENT:
962 /* We already looked up the memory region above and it
963 was missing. */
964 size_t size = k->host_end - k->host_start;
965 gomp_mutex_unlock (&devicep->lock);
966 #ifdef HAVE_INTTYPES_H
967 gomp_fatal ("present clause: !acc_is_present (%p, "
968 "%"PRIu64" (0x%"PRIx64"))",
969 (void *) k->host_start,
970 (uint64_t) size, (uint64_t) size);
971 #else
972 gomp_fatal ("present clause: !acc_is_present (%p, "
973 "%lu (0x%lx))", (void *) k->host_start,
974 (unsigned long) size, (unsigned long) size);
975 #endif
977 break;
978 case GOMP_MAP_FORCE_DEVICEPTR:
979 assert (k->host_end - k->host_start == sizeof (void *));
980 gomp_copy_host2dev (devicep, aq,
981 (void *) (tgt->tgt_start
982 + k->tgt_offset),
983 (void *) k->host_start,
984 sizeof (void *), cbufp);
985 break;
986 default:
987 gomp_mutex_unlock (&devicep->lock);
988 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
989 kind);
992 if (k->link_key)
994 /* Set link pointer on target to the device address of the
995 mapped object. */
996 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
997 /* We intentionally do not use coalescing here, as it's not
998 data allocated by the current call to this function. */
999 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1000 &tgt_addr, sizeof (void *), NULL);
1002 array++;
1007 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1009 for (i = 0; i < mapnum; i++)
1011 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1012 gomp_copy_host2dev (devicep, aq,
1013 (void *) (tgt->tgt_start + i * sizeof (void *)),
1014 (void *) &cur_node.tgt_offset, sizeof (void *),
1015 cbufp);
1019 if (cbufp)
1021 long c = 0;
1022 for (c = 0; c < cbuf.chunk_cnt; ++c)
1023 gomp_copy_host2dev (devicep, aq,
1024 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1025 (char *) cbuf.buf + (cbuf.chunks[c].start
1026 - cbuf.chunks[0].start),
1027 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1028 free (cbuf.buf);
1029 cbuf.buf = NULL;
1030 cbufp = NULL;
1033 /* If the variable from "omp target enter data" map-list was already mapped,
1034 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1035 gomp_exit_data. */
1036 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1038 free (tgt);
1039 tgt = NULL;
1042 gomp_mutex_unlock (&devicep->lock);
1043 return tgt;
1046 attribute_hidden struct target_mem_desc *
1047 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1048 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1049 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1051 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1052 sizes, kinds, short_mapkind, pragma_kind);
1055 attribute_hidden struct target_mem_desc *
1056 gomp_map_vars_async (struct gomp_device_descr *devicep,
1057 struct goacc_asyncqueue *aq, size_t mapnum,
1058 void **hostaddrs, void **devaddrs, size_t *sizes,
1059 void *kinds, bool short_mapkind,
1060 enum gomp_map_vars_kind pragma_kind)
1062 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1063 sizes, kinds, short_mapkind, pragma_kind);
1066 attribute_hidden void
1067 gomp_unmap_tgt (struct target_mem_desc *tgt)
1069 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1070 if (tgt->tgt_end)
1071 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1073 free (tgt->array);
1074 free (tgt);
1077 attribute_hidden bool
1078 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1080 bool is_tgt_unmapped = false;
1081 splay_tree_remove (&devicep->mem_map, k);
1082 if (k->link_key)
1083 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1084 if (k->tgt->refcount > 1)
1085 k->tgt->refcount--;
1086 else
1088 is_tgt_unmapped = true;
1089 gomp_unmap_tgt (k->tgt);
1091 return is_tgt_unmapped;
1094 static void
1095 gomp_unref_tgt (void *ptr)
1097 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1099 if (tgt->refcount > 1)
1100 tgt->refcount--;
1101 else
1102 gomp_unmap_tgt (tgt);
1105 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1106 variables back from device to host: if it is false, it is assumed that this
1107 has been done already. */
1109 static inline __attribute__((always_inline)) void
1110 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1111 struct goacc_asyncqueue *aq)
1113 struct gomp_device_descr *devicep = tgt->device_descr;
1115 if (tgt->list_count == 0)
1117 free (tgt);
1118 return;
1121 gomp_mutex_lock (&devicep->lock);
1122 if (devicep->state == GOMP_DEVICE_FINALIZED)
1124 gomp_mutex_unlock (&devicep->lock);
1125 free (tgt->array);
1126 free (tgt);
1127 return;
1130 size_t i;
1131 for (i = 0; i < tgt->list_count; i++)
1133 splay_tree_key k = tgt->list[i].key;
1134 if (k == NULL)
1135 continue;
1137 bool do_unmap = false;
1138 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1139 k->refcount--;
1140 else if (k->refcount == 1)
1142 k->refcount--;
1143 do_unmap = true;
1146 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1147 || tgt->list[i].always_copy_from)
1148 gomp_copy_dev2host (devicep, aq,
1149 (void *) (k->host_start + tgt->list[i].offset),
1150 (void *) (k->tgt->tgt_start + k->tgt_offset
1151 + tgt->list[i].offset),
1152 tgt->list[i].length);
1153 if (do_unmap)
1154 gomp_remove_var (devicep, k);
1157 if (aq)
1158 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
1159 (void *) tgt);
1160 else
1161 gomp_unref_tgt ((void *) tgt);
1163 gomp_mutex_unlock (&devicep->lock);
1166 attribute_hidden void
1167 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1169 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1172 attribute_hidden void
1173 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1174 struct goacc_asyncqueue *aq)
1176 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1179 static void
1180 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1181 size_t *sizes, void *kinds, bool short_mapkind)
1183 size_t i;
1184 struct splay_tree_key_s cur_node;
1185 const int typemask = short_mapkind ? 0xff : 0x7;
1187 if (!devicep)
1188 return;
1190 if (mapnum == 0)
1191 return;
1193 gomp_mutex_lock (&devicep->lock);
1194 if (devicep->state == GOMP_DEVICE_FINALIZED)
1196 gomp_mutex_unlock (&devicep->lock);
1197 return;
1200 for (i = 0; i < mapnum; i++)
1201 if (sizes[i])
1203 cur_node.host_start = (uintptr_t) hostaddrs[i];
1204 cur_node.host_end = cur_node.host_start + sizes[i];
1205 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1206 if (n)
1208 int kind = get_kind (short_mapkind, kinds, i);
1209 if (n->host_start > cur_node.host_start
1210 || n->host_end < cur_node.host_end)
1212 gomp_mutex_unlock (&devicep->lock);
1213 gomp_fatal ("Trying to update [%p..%p) object when "
1214 "only [%p..%p) is mapped",
1215 (void *) cur_node.host_start,
1216 (void *) cur_node.host_end,
1217 (void *) n->host_start,
1218 (void *) n->host_end);
1222 void *hostaddr = (void *) cur_node.host_start;
1223 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1224 + cur_node.host_start - n->host_start);
1225 size_t size = cur_node.host_end - cur_node.host_start;
1227 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1228 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1229 NULL);
1230 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1231 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1234 gomp_mutex_unlock (&devicep->lock);
1237 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1238 And insert to splay tree the mapping between addresses from HOST_TABLE and
1239 from loaded target image. We rely in the host and device compiler
1240 emitting variable and functions in the same order. */
1242 static void
1243 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1244 const void *host_table, const void *target_data,
1245 bool is_register_lock)
1247 void **host_func_table = ((void ***) host_table)[0];
1248 void **host_funcs_end = ((void ***) host_table)[1];
1249 void **host_var_table = ((void ***) host_table)[2];
1250 void **host_vars_end = ((void ***) host_table)[3];
1252 /* The func table contains only addresses, the var table contains addresses
1253 and corresponding sizes. */
1254 int num_funcs = host_funcs_end - host_func_table;
1255 int num_vars = (host_vars_end - host_var_table) / 2;
1257 /* Load image to device and get target addresses for the image. */
1258 struct addr_pair *target_table = NULL;
1259 int i, num_target_entries;
1261 num_target_entries
1262 = devicep->load_image_func (devicep->target_id, version,
1263 target_data, &target_table);
1265 if (num_target_entries != num_funcs + num_vars)
1267 gomp_mutex_unlock (&devicep->lock);
1268 if (is_register_lock)
1269 gomp_mutex_unlock (&register_lock);
1270 gomp_fatal ("Cannot map target functions or variables"
1271 " (expected %u, have %u)", num_funcs + num_vars,
1272 num_target_entries);
1275 /* Insert host-target address mapping into splay tree. */
1276 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1277 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1278 tgt->refcount = REFCOUNT_INFINITY;
1279 tgt->tgt_start = 0;
1280 tgt->tgt_end = 0;
1281 tgt->to_free = NULL;
1282 tgt->prev = NULL;
1283 tgt->list_count = 0;
1284 tgt->device_descr = devicep;
1285 splay_tree_node array = tgt->array;
1287 for (i = 0; i < num_funcs; i++)
1289 splay_tree_key k = &array->key;
1290 k->host_start = (uintptr_t) host_func_table[i];
1291 k->host_end = k->host_start + 1;
1292 k->tgt = tgt;
1293 k->tgt_offset = target_table[i].start;
1294 k->refcount = REFCOUNT_INFINITY;
1295 k->link_key = NULL;
1296 array->left = NULL;
1297 array->right = NULL;
1298 splay_tree_insert (&devicep->mem_map, array);
1299 array++;
1302 /* Most significant bit of the size in host and target tables marks
1303 "omp declare target link" variables. */
1304 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1305 const uintptr_t size_mask = ~link_bit;
1307 for (i = 0; i < num_vars; i++)
1309 struct addr_pair *target_var = &target_table[num_funcs + i];
1310 uintptr_t target_size = target_var->end - target_var->start;
1312 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1314 gomp_mutex_unlock (&devicep->lock);
1315 if (is_register_lock)
1316 gomp_mutex_unlock (&register_lock);
1317 gomp_fatal ("Cannot map target variables (size mismatch)");
1320 splay_tree_key k = &array->key;
1321 k->host_start = (uintptr_t) host_var_table[i * 2];
1322 k->host_end
1323 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1324 k->tgt = tgt;
1325 k->tgt_offset = target_var->start;
1326 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1327 k->link_key = NULL;
1328 array->left = NULL;
1329 array->right = NULL;
1330 splay_tree_insert (&devicep->mem_map, array);
1331 array++;
1334 free (target_table);
1337 /* Unload the mappings described by target_data from device DEVICE_P.
1338 The device must be locked. */
1340 static void
1341 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1342 unsigned version,
1343 const void *host_table, const void *target_data)
1345 void **host_func_table = ((void ***) host_table)[0];
1346 void **host_funcs_end = ((void ***) host_table)[1];
1347 void **host_var_table = ((void ***) host_table)[2];
1348 void **host_vars_end = ((void ***) host_table)[3];
1350 /* The func table contains only addresses, the var table contains addresses
1351 and corresponding sizes. */
1352 int num_funcs = host_funcs_end - host_func_table;
1353 int num_vars = (host_vars_end - host_var_table) / 2;
1355 struct splay_tree_key_s k;
1356 splay_tree_key node = NULL;
1358 /* Find mapping at start of node array */
1359 if (num_funcs || num_vars)
1361 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1362 : (uintptr_t) host_var_table[0]);
1363 k.host_end = k.host_start + 1;
1364 node = splay_tree_lookup (&devicep->mem_map, &k);
1367 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1369 gomp_mutex_unlock (&devicep->lock);
1370 gomp_fatal ("image unload fail");
1373 /* Remove mappings from splay tree. */
1374 int i;
1375 for (i = 0; i < num_funcs; i++)
1377 k.host_start = (uintptr_t) host_func_table[i];
1378 k.host_end = k.host_start + 1;
1379 splay_tree_remove (&devicep->mem_map, &k);
1382 /* Most significant bit of the size in host and target tables marks
1383 "omp declare target link" variables. */
1384 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1385 const uintptr_t size_mask = ~link_bit;
1386 bool is_tgt_unmapped = false;
1388 for (i = 0; i < num_vars; i++)
1390 k.host_start = (uintptr_t) host_var_table[i * 2];
1391 k.host_end
1392 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1394 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1395 splay_tree_remove (&devicep->mem_map, &k);
1396 else
1398 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1399 is_tgt_unmapped = gomp_remove_var (devicep, n);
1403 if (node && !is_tgt_unmapped)
1405 free (node->tgt);
1406 free (node);
1410 /* This function should be called from every offload image while loading.
1411 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1412 the target, and TARGET_DATA needed by target plugin. */
1414 void
1415 GOMP_offload_register_ver (unsigned version, const void *host_table,
1416 int target_type, const void *target_data)
1418 int i;
1420 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1421 gomp_fatal ("Library too old for offload (version %u < %u)",
1422 GOMP_VERSION, GOMP_VERSION_LIB (version));
1424 gomp_mutex_lock (&register_lock);
1426 /* Load image to all initialized devices. */
1427 for (i = 0; i < num_devices; i++)
1429 struct gomp_device_descr *devicep = &devices[i];
1430 gomp_mutex_lock (&devicep->lock);
1431 if (devicep->type == target_type
1432 && devicep->state == GOMP_DEVICE_INITIALIZED)
1433 gomp_load_image_to_device (devicep, version,
1434 host_table, target_data, true);
1435 gomp_mutex_unlock (&devicep->lock);
1438 /* Insert image to array of pending images. */
1439 offload_images
1440 = gomp_realloc_unlock (offload_images,
1441 (num_offload_images + 1)
1442 * sizeof (struct offload_image_descr));
1443 offload_images[num_offload_images].version = version;
1444 offload_images[num_offload_images].type = target_type;
1445 offload_images[num_offload_images].host_table = host_table;
1446 offload_images[num_offload_images].target_data = target_data;
1448 num_offload_images++;
1449 gomp_mutex_unlock (&register_lock);
1452 void
1453 GOMP_offload_register (const void *host_table, int target_type,
1454 const void *target_data)
1456 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1459 /* This function should be called from every offload image while unloading.
1460 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1461 the target, and TARGET_DATA needed by target plugin. */
1463 void
1464 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1465 int target_type, const void *target_data)
1467 int i;
1469 gomp_mutex_lock (&register_lock);
1471 /* Unload image from all initialized devices. */
1472 for (i = 0; i < num_devices; i++)
1474 struct gomp_device_descr *devicep = &devices[i];
1475 gomp_mutex_lock (&devicep->lock);
1476 if (devicep->type == target_type
1477 && devicep->state == GOMP_DEVICE_INITIALIZED)
1478 gomp_unload_image_from_device (devicep, version,
1479 host_table, target_data);
1480 gomp_mutex_unlock (&devicep->lock);
1483 /* Remove image from array of pending images. */
1484 for (i = 0; i < num_offload_images; i++)
1485 if (offload_images[i].target_data == target_data)
1487 offload_images[i] = offload_images[--num_offload_images];
1488 break;
1491 gomp_mutex_unlock (&register_lock);
1494 void
1495 GOMP_offload_unregister (const void *host_table, int target_type,
1496 const void *target_data)
1498 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1501 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1502 must be locked on entry, and remains locked on return. */
1504 attribute_hidden void
1505 gomp_init_device (struct gomp_device_descr *devicep)
1507 int i;
1508 if (!devicep->init_device_func (devicep->target_id))
1510 gomp_mutex_unlock (&devicep->lock);
1511 gomp_fatal ("device initialization failed");
1514 /* Load to device all images registered by the moment. */
1515 for (i = 0; i < num_offload_images; i++)
1517 struct offload_image_descr *image = &offload_images[i];
1518 if (image->type == devicep->type)
1519 gomp_load_image_to_device (devicep, image->version,
1520 image->host_table, image->target_data,
1521 false);
1524 /* Initialize OpenACC asynchronous queues. */
1525 goacc_init_asyncqueues (devicep);
1527 devicep->state = GOMP_DEVICE_INITIALIZED;
1530 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1531 must be locked on entry, and remains locked on return. */
1533 attribute_hidden bool
1534 gomp_fini_device (struct gomp_device_descr *devicep)
1536 bool ret = goacc_fini_asyncqueues (devicep);
1537 ret &= devicep->fini_device_func (devicep->target_id);
1538 devicep->state = GOMP_DEVICE_FINALIZED;
1539 return ret;
1542 attribute_hidden void
1543 gomp_unload_device (struct gomp_device_descr *devicep)
1545 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1547 unsigned i;
1549 /* Unload from device all images registered at the moment. */
1550 for (i = 0; i < num_offload_images; i++)
1552 struct offload_image_descr *image = &offload_images[i];
1553 if (image->type == devicep->type)
1554 gomp_unload_image_from_device (devicep, image->version,
1555 image->host_table,
1556 image->target_data);
1561 /* Free address mapping tables. MM must be locked on entry, and remains locked
1562 on return. */
1564 attribute_hidden void
1565 gomp_free_memmap (struct splay_tree_s *mem_map)
1567 while (mem_map->root)
1569 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1571 splay_tree_remove (mem_map, &mem_map->root->key);
1572 free (tgt->array);
1573 free (tgt);
1577 /* Host fallback for GOMP_target{,_ext} routines. */
1579 static void
1580 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1582 struct gomp_thread old_thr, *thr = gomp_thread ();
1583 old_thr = *thr;
1584 memset (thr, '\0', sizeof (*thr));
1585 if (gomp_places_list)
1587 thr->place = old_thr.place;
1588 thr->ts.place_partition_len = gomp_places_list_len;
1590 fn (hostaddrs);
1591 gomp_free_thread (thr);
1592 *thr = old_thr;
1595 /* Calculate alignment and size requirements of a private copy of data shared
1596 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1598 static inline void
1599 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1600 unsigned short *kinds, size_t *tgt_align,
1601 size_t *tgt_size)
1603 size_t i;
1604 for (i = 0; i < mapnum; i++)
1605 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1607 size_t align = (size_t) 1 << (kinds[i] >> 8);
1608 if (*tgt_align < align)
1609 *tgt_align = align;
1610 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1611 *tgt_size += sizes[i];
1615 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1617 static inline void
1618 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1619 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1620 size_t tgt_size)
1622 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1623 if (al)
1624 tgt += tgt_align - al;
1625 tgt_size = 0;
1626 size_t i;
1627 for (i = 0; i < mapnum; i++)
1628 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1630 size_t align = (size_t) 1 << (kinds[i] >> 8);
1631 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1632 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1633 hostaddrs[i] = tgt + tgt_size;
1634 tgt_size = tgt_size + sizes[i];
1638 /* Helper function of GOMP_target{,_ext} routines. */
1640 static void *
1641 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1642 void (*host_fn) (void *))
1644 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1645 return (void *) host_fn;
1646 else
1648 gomp_mutex_lock (&devicep->lock);
1649 if (devicep->state == GOMP_DEVICE_FINALIZED)
1651 gomp_mutex_unlock (&devicep->lock);
1652 return NULL;
1655 struct splay_tree_key_s k;
1656 k.host_start = (uintptr_t) host_fn;
1657 k.host_end = k.host_start + 1;
1658 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1659 gomp_mutex_unlock (&devicep->lock);
1660 if (tgt_fn == NULL)
1661 return NULL;
1663 return (void *) tgt_fn->tgt_offset;
1667 /* Called when encountering a target directive. If DEVICE
1668 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1669 GOMP_DEVICE_HOST_FALLBACK (or any value
1670 larger than last available hw device), use host fallback.
1671 FN is address of host code, UNUSED is part of the current ABI, but
1672 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1673 with MAPNUM entries, with addresses of the host objects,
1674 sizes of the host objects (resp. for pointer kind pointer bias
1675 and assumed sizeof (void *) size) and kinds. */
1677 void
1678 GOMP_target (int device, void (*fn) (void *), const void *unused,
1679 size_t mapnum, void **hostaddrs, size_t *sizes,
1680 unsigned char *kinds)
1682 struct gomp_device_descr *devicep = resolve_device (device);
1684 void *fn_addr;
1685 if (devicep == NULL
1686 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1687 /* All shared memory devices should use the GOMP_target_ext function. */
1688 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1689 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1690 return gomp_target_fallback (fn, hostaddrs);
1692 struct target_mem_desc *tgt_vars
1693 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1694 GOMP_MAP_VARS_TARGET);
1695 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1696 NULL);
1697 gomp_unmap_vars (tgt_vars, true);
1700 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1701 and several arguments have been added:
1702 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1703 DEPEND is array of dependencies, see GOMP_task for details.
1705 ARGS is a pointer to an array consisting of a variable number of both
1706 device-independent and device-specific arguments, which can take one two
1707 elements where the first specifies for which device it is intended, the type
1708 and optionally also the value. If the value is not present in the first
1709 one, the whole second element the actual value. The last element of the
1710 array is a single NULL. Among the device independent can be for example
1711 NUM_TEAMS and THREAD_LIMIT.
1713 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1714 that value, or 1 if teams construct is not present, or 0, if
1715 teams construct does not have num_teams clause and so the choice is
1716 implementation defined, and -1 if it can't be determined on the host
1717 what value will GOMP_teams have on the device.
1718 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1719 body with that value, or 0, if teams construct does not have thread_limit
1720 clause or the teams construct is not present, or -1 if it can't be
1721 determined on the host what value will GOMP_teams have on the device. */
1723 void
1724 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1725 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1726 unsigned int flags, void **depend, void **args)
1728 struct gomp_device_descr *devicep = resolve_device (device);
1729 size_t tgt_align = 0, tgt_size = 0;
1730 bool fpc_done = false;
1732 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1734 struct gomp_thread *thr = gomp_thread ();
1735 /* Create a team if we don't have any around, as nowait
1736 target tasks make sense to run asynchronously even when
1737 outside of any parallel. */
1738 if (__builtin_expect (thr->ts.team == NULL, 0))
1740 struct gomp_team *team = gomp_new_team (1);
1741 struct gomp_task *task = thr->task;
1742 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1743 team->prev_ts = thr->ts;
1744 thr->ts.team = team;
1745 thr->ts.team_id = 0;
1746 thr->ts.work_share = &team->work_shares[0];
1747 thr->ts.last_work_share = NULL;
1748 #ifdef HAVE_SYNC_BUILTINS
1749 thr->ts.single_count = 0;
1750 #endif
1751 thr->ts.static_trip = 0;
1752 thr->task = &team->implicit_task[0];
1753 gomp_init_task (thr->task, NULL, icv);
1754 if (task)
1756 thr->task = task;
1757 gomp_end_task ();
1758 free (task);
1759 thr->task = &team->implicit_task[0];
1761 else
1762 pthread_setspecific (gomp_thread_destructor, thr);
1764 if (thr->ts.team
1765 && !thr->task->final_task)
1767 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1768 sizes, kinds, flags, depend, args,
1769 GOMP_TARGET_TASK_BEFORE_MAP);
1770 return;
1774 /* If there are depend clauses, but nowait is not present
1775 (or we are in a final task), block the parent task until the
1776 dependencies are resolved and then just continue with the rest
1777 of the function as if it is a merged task. */
1778 if (depend != NULL)
1780 struct gomp_thread *thr = gomp_thread ();
1781 if (thr->task && thr->task->depend_hash)
1783 /* If we might need to wait, copy firstprivate now. */
1784 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1785 &tgt_align, &tgt_size);
1786 if (tgt_align)
1788 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1789 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1790 tgt_align, tgt_size);
1792 fpc_done = true;
1793 gomp_task_maybe_wait_for_dependencies (depend);
1797 void *fn_addr;
1798 if (devicep == NULL
1799 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1800 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1801 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1803 if (!fpc_done)
1805 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1806 &tgt_align, &tgt_size);
1807 if (tgt_align)
1809 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1810 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1811 tgt_align, tgt_size);
1814 gomp_target_fallback (fn, hostaddrs);
1815 return;
1818 struct target_mem_desc *tgt_vars;
1819 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1821 if (!fpc_done)
1823 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1824 &tgt_align, &tgt_size);
1825 if (tgt_align)
1827 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1828 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1829 tgt_align, tgt_size);
1832 tgt_vars = NULL;
1834 else
1835 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1836 true, GOMP_MAP_VARS_TARGET);
1837 devicep->run_func (devicep->target_id, fn_addr,
1838 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1839 args);
1840 if (tgt_vars)
1841 gomp_unmap_vars (tgt_vars, true);
1844 /* Host fallback for GOMP_target_data{,_ext} routines. */
1846 static void
1847 gomp_target_data_fallback (void)
1849 struct gomp_task_icv *icv = gomp_icv (false);
1850 if (icv->target_data)
1852 /* Even when doing a host fallback, if there are any active
1853 #pragma omp target data constructs, need to remember the
1854 new #pragma omp target data, otherwise GOMP_target_end_data
1855 would get out of sync. */
1856 struct target_mem_desc *tgt
1857 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1858 GOMP_MAP_VARS_DATA);
1859 tgt->prev = icv->target_data;
1860 icv->target_data = tgt;
1864 void
1865 GOMP_target_data (int device, const void *unused, size_t mapnum,
1866 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1868 struct gomp_device_descr *devicep = resolve_device (device);
1870 if (devicep == NULL
1871 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1872 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1873 return gomp_target_data_fallback ();
1875 struct target_mem_desc *tgt
1876 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1877 GOMP_MAP_VARS_DATA);
1878 struct gomp_task_icv *icv = gomp_icv (true);
1879 tgt->prev = icv->target_data;
1880 icv->target_data = tgt;
1883 void
1884 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1885 size_t *sizes, unsigned short *kinds)
1887 struct gomp_device_descr *devicep = resolve_device (device);
1889 if (devicep == NULL
1890 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1891 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1892 return gomp_target_data_fallback ();
1894 struct target_mem_desc *tgt
1895 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1896 GOMP_MAP_VARS_DATA);
1897 struct gomp_task_icv *icv = gomp_icv (true);
1898 tgt->prev = icv->target_data;
1899 icv->target_data = tgt;
1902 void
1903 GOMP_target_end_data (void)
1905 struct gomp_task_icv *icv = gomp_icv (false);
1906 if (icv->target_data)
1908 struct target_mem_desc *tgt = icv->target_data;
1909 icv->target_data = tgt->prev;
1910 gomp_unmap_vars (tgt, true);
1914 void
1915 GOMP_target_update (int device, const void *unused, size_t mapnum,
1916 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1918 struct gomp_device_descr *devicep = resolve_device (device);
1920 if (devicep == NULL
1921 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1922 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1923 return;
1925 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1928 void
1929 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1930 size_t *sizes, unsigned short *kinds,
1931 unsigned int flags, void **depend)
1933 struct gomp_device_descr *devicep = resolve_device (device);
1935 /* If there are depend clauses, but nowait is not present,
1936 block the parent task until the dependencies are resolved
1937 and then just continue with the rest of the function as if it
1938 is a merged task. Until we are able to schedule task during
1939 variable mapping or unmapping, ignore nowait if depend clauses
1940 are not present. */
1941 if (depend != NULL)
1943 struct gomp_thread *thr = gomp_thread ();
1944 if (thr->task && thr->task->depend_hash)
1946 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1947 && thr->ts.team
1948 && !thr->task->final_task)
1950 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1951 mapnum, hostaddrs, sizes, kinds,
1952 flags | GOMP_TARGET_FLAG_UPDATE,
1953 depend, NULL, GOMP_TARGET_TASK_DATA))
1954 return;
1956 else
1958 struct gomp_team *team = thr->ts.team;
1959 /* If parallel or taskgroup has been cancelled, don't start new
1960 tasks. */
1961 if (__builtin_expect (gomp_cancel_var, 0) && team)
1963 if (gomp_team_barrier_cancelled (&team->barrier))
1964 return;
1965 if (thr->task->taskgroup)
1967 if (thr->task->taskgroup->cancelled)
1968 return;
1969 if (thr->task->taskgroup->workshare
1970 && thr->task->taskgroup->prev
1971 && thr->task->taskgroup->prev->cancelled)
1972 return;
1976 gomp_task_maybe_wait_for_dependencies (depend);
1981 if (devicep == NULL
1982 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1983 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1984 return;
1986 struct gomp_thread *thr = gomp_thread ();
1987 struct gomp_team *team = thr->ts.team;
1988 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1989 if (__builtin_expect (gomp_cancel_var, 0) && team)
1991 if (gomp_team_barrier_cancelled (&team->barrier))
1992 return;
1993 if (thr->task->taskgroup)
1995 if (thr->task->taskgroup->cancelled)
1996 return;
1997 if (thr->task->taskgroup->workshare
1998 && thr->task->taskgroup->prev
1999 && thr->task->taskgroup->prev->cancelled)
2000 return;
2004 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2007 static void
2008 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2009 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2011 const int typemask = 0xff;
2012 size_t i;
2013 gomp_mutex_lock (&devicep->lock);
2014 if (devicep->state == GOMP_DEVICE_FINALIZED)
2016 gomp_mutex_unlock (&devicep->lock);
2017 return;
2020 for (i = 0; i < mapnum; i++)
2022 struct splay_tree_key_s cur_node;
2023 unsigned char kind = kinds[i] & typemask;
2024 switch (kind)
2026 case GOMP_MAP_FROM:
2027 case GOMP_MAP_ALWAYS_FROM:
2028 case GOMP_MAP_DELETE:
2029 case GOMP_MAP_RELEASE:
2030 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2031 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2032 cur_node.host_start = (uintptr_t) hostaddrs[i];
2033 cur_node.host_end = cur_node.host_start + sizes[i];
2034 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2035 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2036 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2037 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2038 if (!k)
2039 continue;
2041 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2042 k->refcount--;
2043 if ((kind == GOMP_MAP_DELETE
2044 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2045 && k->refcount != REFCOUNT_INFINITY)
2046 k->refcount = 0;
2048 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2049 || kind == GOMP_MAP_ALWAYS_FROM)
2050 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2051 (void *) (k->tgt->tgt_start + k->tgt_offset
2052 + cur_node.host_start
2053 - k->host_start),
2054 cur_node.host_end - cur_node.host_start);
2055 if (k->refcount == 0)
2057 splay_tree_remove (&devicep->mem_map, k);
2058 if (k->link_key)
2059 splay_tree_insert (&devicep->mem_map,
2060 (splay_tree_node) k->link_key);
2061 if (k->tgt->refcount > 1)
2062 k->tgt->refcount--;
2063 else
2064 gomp_unmap_tgt (k->tgt);
2067 break;
2068 default:
2069 gomp_mutex_unlock (&devicep->lock);
2070 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2071 kind);
2075 gomp_mutex_unlock (&devicep->lock);
2078 void
2079 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2080 size_t *sizes, unsigned short *kinds,
2081 unsigned int flags, void **depend)
2083 struct gomp_device_descr *devicep = resolve_device (device);
2085 /* If there are depend clauses, but nowait is not present,
2086 block the parent task until the dependencies are resolved
2087 and then just continue with the rest of the function as if it
2088 is a merged task. Until we are able to schedule task during
2089 variable mapping or unmapping, ignore nowait if depend clauses
2090 are not present. */
2091 if (depend != NULL)
2093 struct gomp_thread *thr = gomp_thread ();
2094 if (thr->task && thr->task->depend_hash)
2096 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2097 && thr->ts.team
2098 && !thr->task->final_task)
2100 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2101 mapnum, hostaddrs, sizes, kinds,
2102 flags, depend, NULL,
2103 GOMP_TARGET_TASK_DATA))
2104 return;
2106 else
2108 struct gomp_team *team = thr->ts.team;
2109 /* If parallel or taskgroup has been cancelled, don't start new
2110 tasks. */
2111 if (__builtin_expect (gomp_cancel_var, 0) && team)
2113 if (gomp_team_barrier_cancelled (&team->barrier))
2114 return;
2115 if (thr->task->taskgroup)
2117 if (thr->task->taskgroup->cancelled)
2118 return;
2119 if (thr->task->taskgroup->workshare
2120 && thr->task->taskgroup->prev
2121 && thr->task->taskgroup->prev->cancelled)
2122 return;
2126 gomp_task_maybe_wait_for_dependencies (depend);
2131 if (devicep == NULL
2132 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2133 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2134 return;
2136 struct gomp_thread *thr = gomp_thread ();
2137 struct gomp_team *team = thr->ts.team;
2138 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2139 if (__builtin_expect (gomp_cancel_var, 0) && team)
2141 if (gomp_team_barrier_cancelled (&team->barrier))
2142 return;
2143 if (thr->task->taskgroup)
2145 if (thr->task->taskgroup->cancelled)
2146 return;
2147 if (thr->task->taskgroup->workshare
2148 && thr->task->taskgroup->prev
2149 && thr->task->taskgroup->prev->cancelled)
2150 return;
2154 size_t i;
2155 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2156 for (i = 0; i < mapnum; i++)
2157 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2159 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2160 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2161 i += sizes[i];
2163 else
2164 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2165 true, GOMP_MAP_VARS_ENTER_DATA);
2166 else
2167 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2170 bool
2171 gomp_target_task_fn (void *data)
2173 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2174 struct gomp_device_descr *devicep = ttask->devicep;
2176 if (ttask->fn != NULL)
2178 void *fn_addr;
2179 if (devicep == NULL
2180 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2181 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2182 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2184 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2185 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2186 return false;
2189 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2191 if (ttask->tgt)
2192 gomp_unmap_vars (ttask->tgt, true);
2193 return false;
2196 void *actual_arguments;
2197 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2199 ttask->tgt = NULL;
2200 actual_arguments = ttask->hostaddrs;
2202 else
2204 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2205 NULL, ttask->sizes, ttask->kinds, true,
2206 GOMP_MAP_VARS_TARGET);
2207 actual_arguments = (void *) ttask->tgt->tgt_start;
2209 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2211 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2212 ttask->args, (void *) ttask);
2213 return true;
2215 else if (devicep == NULL
2216 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2217 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2218 return false;
2220 size_t i;
2221 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2222 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2223 ttask->kinds, true);
2224 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2225 for (i = 0; i < ttask->mapnum; i++)
2226 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2228 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2229 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2230 GOMP_MAP_VARS_ENTER_DATA);
2231 i += ttask->sizes[i];
2233 else
2234 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2235 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2236 else
2237 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2238 ttask->kinds);
2239 return false;
2242 void
2243 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2245 if (thread_limit)
2247 struct gomp_task_icv *icv = gomp_icv (true);
2248 icv->thread_limit_var
2249 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2251 (void) num_teams;
2254 void *
2255 omp_target_alloc (size_t size, int device_num)
2257 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2258 return malloc (size);
2260 if (device_num < 0)
2261 return NULL;
2263 struct gomp_device_descr *devicep = resolve_device (device_num);
2264 if (devicep == NULL)
2265 return NULL;
2267 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2268 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2269 return malloc (size);
2271 gomp_mutex_lock (&devicep->lock);
2272 void *ret = devicep->alloc_func (devicep->target_id, size);
2273 gomp_mutex_unlock (&devicep->lock);
2274 return ret;
2277 void
2278 omp_target_free (void *device_ptr, int device_num)
2280 if (device_ptr == NULL)
2281 return;
2283 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2285 free (device_ptr);
2286 return;
2289 if (device_num < 0)
2290 return;
2292 struct gomp_device_descr *devicep = resolve_device (device_num);
2293 if (devicep == NULL)
2294 return;
2296 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2297 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2299 free (device_ptr);
2300 return;
2303 gomp_mutex_lock (&devicep->lock);
2304 gomp_free_device_memory (devicep, device_ptr);
2305 gomp_mutex_unlock (&devicep->lock);
2309 omp_target_is_present (const void *ptr, int device_num)
2311 if (ptr == NULL)
2312 return 1;
2314 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2315 return 1;
2317 if (device_num < 0)
2318 return 0;
2320 struct gomp_device_descr *devicep = resolve_device (device_num);
2321 if (devicep == NULL)
2322 return 0;
2324 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2325 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2326 return 1;
2328 gomp_mutex_lock (&devicep->lock);
2329 struct splay_tree_s *mem_map = &devicep->mem_map;
2330 struct splay_tree_key_s cur_node;
2332 cur_node.host_start = (uintptr_t) ptr;
2333 cur_node.host_end = cur_node.host_start;
2334 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2335 int ret = n != NULL;
2336 gomp_mutex_unlock (&devicep->lock);
2337 return ret;
2341 omp_target_memcpy (void *dst, const void *src, size_t length,
2342 size_t dst_offset, size_t src_offset, int dst_device_num,
2343 int src_device_num)
2345 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2346 bool ret;
2348 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2350 if (dst_device_num < 0)
2351 return EINVAL;
2353 dst_devicep = resolve_device (dst_device_num);
2354 if (dst_devicep == NULL)
2355 return EINVAL;
2357 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2358 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2359 dst_devicep = NULL;
2361 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2363 if (src_device_num < 0)
2364 return EINVAL;
2366 src_devicep = resolve_device (src_device_num);
2367 if (src_devicep == NULL)
2368 return EINVAL;
2370 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2371 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2372 src_devicep = NULL;
2374 if (src_devicep == NULL && dst_devicep == NULL)
2376 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2377 return 0;
2379 if (src_devicep == NULL)
2381 gomp_mutex_lock (&dst_devicep->lock);
2382 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2383 (char *) dst + dst_offset,
2384 (char *) src + src_offset, length);
2385 gomp_mutex_unlock (&dst_devicep->lock);
2386 return (ret ? 0 : EINVAL);
2388 if (dst_devicep == NULL)
2390 gomp_mutex_lock (&src_devicep->lock);
2391 ret = src_devicep->dev2host_func (src_devicep->target_id,
2392 (char *) dst + dst_offset,
2393 (char *) src + src_offset, length);
2394 gomp_mutex_unlock (&src_devicep->lock);
2395 return (ret ? 0 : EINVAL);
2397 if (src_devicep == dst_devicep)
2399 gomp_mutex_lock (&src_devicep->lock);
2400 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2401 (char *) dst + dst_offset,
2402 (char *) src + src_offset, length);
2403 gomp_mutex_unlock (&src_devicep->lock);
2404 return (ret ? 0 : EINVAL);
2406 return EINVAL;
2409 static int
2410 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2411 int num_dims, const size_t *volume,
2412 const size_t *dst_offsets,
2413 const size_t *src_offsets,
2414 const size_t *dst_dimensions,
2415 const size_t *src_dimensions,
2416 struct gomp_device_descr *dst_devicep,
2417 struct gomp_device_descr *src_devicep)
2419 size_t dst_slice = element_size;
2420 size_t src_slice = element_size;
2421 size_t j, dst_off, src_off, length;
2422 int i, ret;
2424 if (num_dims == 1)
2426 if (__builtin_mul_overflow (element_size, volume[0], &length)
2427 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2428 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2429 return EINVAL;
2430 if (dst_devicep == NULL && src_devicep == NULL)
2432 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2433 length);
2434 ret = 1;
2436 else if (src_devicep == NULL)
2437 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2438 (char *) dst + dst_off,
2439 (const char *) src + src_off,
2440 length);
2441 else if (dst_devicep == NULL)
2442 ret = src_devicep->dev2host_func (src_devicep->target_id,
2443 (char *) dst + dst_off,
2444 (const char *) src + src_off,
2445 length);
2446 else if (src_devicep == dst_devicep)
2447 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2448 (char *) dst + dst_off,
2449 (const char *) src + src_off,
2450 length);
2451 else
2452 ret = 0;
2453 return ret ? 0 : EINVAL;
2456 /* FIXME: it would be nice to have some plugin function to handle
2457 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2458 be handled in the generic recursion below, and for host-host it
2459 should be used even for any num_dims >= 2. */
2461 for (i = 1; i < num_dims; i++)
2462 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2463 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2464 return EINVAL;
2465 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2466 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2467 return EINVAL;
2468 for (j = 0; j < volume[0]; j++)
2470 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2471 (const char *) src + src_off,
2472 element_size, num_dims - 1,
2473 volume + 1, dst_offsets + 1,
2474 src_offsets + 1, dst_dimensions + 1,
2475 src_dimensions + 1, dst_devicep,
2476 src_devicep);
2477 if (ret)
2478 return ret;
2479 dst_off += dst_slice;
2480 src_off += src_slice;
2482 return 0;
2486 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2487 int num_dims, const size_t *volume,
2488 const size_t *dst_offsets,
2489 const size_t *src_offsets,
2490 const size_t *dst_dimensions,
2491 const size_t *src_dimensions,
2492 int dst_device_num, int src_device_num)
2494 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2496 if (!dst && !src)
2497 return INT_MAX;
2499 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2501 if (dst_device_num < 0)
2502 return EINVAL;
2504 dst_devicep = resolve_device (dst_device_num);
2505 if (dst_devicep == NULL)
2506 return EINVAL;
2508 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2509 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2510 dst_devicep = NULL;
2512 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2514 if (src_device_num < 0)
2515 return EINVAL;
2517 src_devicep = resolve_device (src_device_num);
2518 if (src_devicep == NULL)
2519 return EINVAL;
2521 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2522 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2523 src_devicep = NULL;
2526 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2527 return EINVAL;
2529 if (src_devicep)
2530 gomp_mutex_lock (&src_devicep->lock);
2531 else if (dst_devicep)
2532 gomp_mutex_lock (&dst_devicep->lock);
2533 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2534 volume, dst_offsets, src_offsets,
2535 dst_dimensions, src_dimensions,
2536 dst_devicep, src_devicep);
2537 if (src_devicep)
2538 gomp_mutex_unlock (&src_devicep->lock);
2539 else if (dst_devicep)
2540 gomp_mutex_unlock (&dst_devicep->lock);
2541 return ret;
2545 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2546 size_t size, size_t device_offset, int device_num)
2548 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2549 return EINVAL;
2551 if (device_num < 0)
2552 return EINVAL;
2554 struct gomp_device_descr *devicep = resolve_device (device_num);
2555 if (devicep == NULL)
2556 return EINVAL;
2558 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2559 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2560 return EINVAL;
2562 gomp_mutex_lock (&devicep->lock);
2564 struct splay_tree_s *mem_map = &devicep->mem_map;
2565 struct splay_tree_key_s cur_node;
2566 int ret = EINVAL;
2568 cur_node.host_start = (uintptr_t) host_ptr;
2569 cur_node.host_end = cur_node.host_start + size;
2570 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2571 if (n)
2573 if (n->tgt->tgt_start + n->tgt_offset
2574 == (uintptr_t) device_ptr + device_offset
2575 && n->host_start <= cur_node.host_start
2576 && n->host_end >= cur_node.host_end)
2577 ret = 0;
2579 else
2581 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2582 tgt->array = gomp_malloc (sizeof (*tgt->array));
2583 tgt->refcount = 1;
2584 tgt->tgt_start = 0;
2585 tgt->tgt_end = 0;
2586 tgt->to_free = NULL;
2587 tgt->prev = NULL;
2588 tgt->list_count = 0;
2589 tgt->device_descr = devicep;
2590 splay_tree_node array = tgt->array;
2591 splay_tree_key k = &array->key;
2592 k->host_start = cur_node.host_start;
2593 k->host_end = cur_node.host_end;
2594 k->tgt = tgt;
2595 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2596 k->refcount = REFCOUNT_INFINITY;
2597 array->left = NULL;
2598 array->right = NULL;
2599 splay_tree_insert (&devicep->mem_map, array);
2600 ret = 0;
2602 gomp_mutex_unlock (&devicep->lock);
2603 return ret;
2607 omp_target_disassociate_ptr (const void *ptr, int device_num)
2609 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2610 return EINVAL;
2612 if (device_num < 0)
2613 return EINVAL;
2615 struct gomp_device_descr *devicep = resolve_device (device_num);
2616 if (devicep == NULL)
2617 return EINVAL;
2619 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2620 return EINVAL;
2622 gomp_mutex_lock (&devicep->lock);
2624 struct splay_tree_s *mem_map = &devicep->mem_map;
2625 struct splay_tree_key_s cur_node;
2626 int ret = EINVAL;
2628 cur_node.host_start = (uintptr_t) ptr;
2629 cur_node.host_end = cur_node.host_start;
2630 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2631 if (n
2632 && n->host_start == cur_node.host_start
2633 && n->refcount == REFCOUNT_INFINITY
2634 && n->tgt->tgt_start == 0
2635 && n->tgt->to_free == NULL
2636 && n->tgt->refcount == 1
2637 && n->tgt->list_count == 0)
2639 splay_tree_remove (&devicep->mem_map, n);
2640 gomp_unmap_tgt (n->tgt);
2641 ret = 0;
2644 gomp_mutex_unlock (&devicep->lock);
2645 return ret;
2649 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2651 (void) kind;
2652 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2653 return gomp_pause_host ();
2654 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2655 return -1;
2656 /* Do nothing for target devices for now. */
2657 return 0;
2661 omp_pause_resource_all (omp_pause_resource_t kind)
2663 (void) kind;
2664 if (gomp_pause_host ())
2665 return -1;
2666 /* Do nothing for target devices for now. */
2667 return 0;
2670 ialias (omp_pause_resource)
2671 ialias (omp_pause_resource_all)
2673 #ifdef PLUGIN_SUPPORT
2675 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2676 in PLUGIN_NAME.
2677 The handles of the found functions are stored in the corresponding fields
2678 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2680 static bool
2681 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2682 const char *plugin_name)
2684 const char *err = NULL, *last_missing = NULL;
2686 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2687 if (!plugin_handle)
2688 goto dl_fail;
2690 /* Check if all required functions are available in the plugin and store
2691 their handlers. None of the symbols can legitimately be NULL,
2692 so we don't need to check dlerror all the time. */
2693 #define DLSYM(f) \
2694 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2695 goto dl_fail
2696 /* Similar, but missing functions are not an error. Return false if
2697 failed, true otherwise. */
2698 #define DLSYM_OPT(f, n) \
2699 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2700 || (last_missing = #n, 0))
2702 DLSYM (version);
2703 if (device->version_func () != GOMP_VERSION)
2705 err = "plugin version mismatch";
2706 goto fail;
2709 DLSYM (get_name);
2710 DLSYM (get_caps);
2711 DLSYM (get_type);
2712 DLSYM (get_num_devices);
2713 DLSYM (init_device);
2714 DLSYM (fini_device);
2715 DLSYM (load_image);
2716 DLSYM (unload_image);
2717 DLSYM (alloc);
2718 DLSYM (free);
2719 DLSYM (dev2host);
2720 DLSYM (host2dev);
2721 device->capabilities = device->get_caps_func ();
2722 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2724 DLSYM (run);
2725 DLSYM (async_run);
2726 DLSYM_OPT (can_run, can_run);
2727 DLSYM (dev2dev);
2729 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2731 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2732 || !DLSYM_OPT (openacc.create_thread_data,
2733 openacc_create_thread_data)
2734 || !DLSYM_OPT (openacc.destroy_thread_data,
2735 openacc_destroy_thread_data)
2736 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
2737 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
2738 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
2739 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
2740 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
2741 || !DLSYM_OPT (openacc.async.queue_callback,
2742 openacc_async_queue_callback)
2743 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
2744 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
2745 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
2747 /* Require all the OpenACC handlers if we have
2748 GOMP_OFFLOAD_CAP_OPENACC_200. */
2749 err = "plugin missing OpenACC handler function";
2750 goto fail;
2753 unsigned cuda = 0;
2754 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2755 openacc_cuda_get_current_device);
2756 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2757 openacc_cuda_get_current_context);
2758 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2759 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2760 if (cuda && cuda != 4)
2762 /* Make sure all the CUDA functions are there if any of them are. */
2763 err = "plugin missing OpenACC CUDA handler function";
2764 goto fail;
2767 #undef DLSYM
2768 #undef DLSYM_OPT
2770 return 1;
2772 dl_fail:
2773 err = dlerror ();
2774 fail:
2775 gomp_error ("while loading %s: %s", plugin_name, err);
2776 if (last_missing)
2777 gomp_error ("missing function was %s", last_missing);
2778 if (plugin_handle)
2779 dlclose (plugin_handle);
2781 return 0;
2784 /* This function finalizes all initialized devices. */
2786 static void
2787 gomp_target_fini (void)
2789 int i;
2790 for (i = 0; i < num_devices; i++)
2792 bool ret = true;
2793 struct gomp_device_descr *devicep = &devices[i];
2794 gomp_mutex_lock (&devicep->lock);
2795 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2796 ret = gomp_fini_device (devicep);
2797 gomp_mutex_unlock (&devicep->lock);
2798 if (!ret)
2799 gomp_fatal ("device finalization failed");
2803 /* This function initializes the runtime for offloading.
2804 It parses the list of offload plugins, and tries to load these.
2805 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2806 will be set, and the array DEVICES initialized, containing descriptors for
2807 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2808 by the others. */
2810 static void
2811 gomp_target_init (void)
2813 const char *prefix ="libgomp-plugin-";
2814 const char *suffix = SONAME_SUFFIX (1);
2815 const char *cur, *next;
2816 char *plugin_name;
2817 int i, new_num_devices;
2819 num_devices = 0;
2820 devices = NULL;
2822 cur = OFFLOAD_PLUGINS;
2823 if (*cur)
2826 struct gomp_device_descr current_device;
2827 size_t prefix_len, suffix_len, cur_len;
2829 next = strchr (cur, ',');
2831 prefix_len = strlen (prefix);
2832 cur_len = next ? next - cur : strlen (cur);
2833 suffix_len = strlen (suffix);
2835 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2836 if (!plugin_name)
2838 num_devices = 0;
2839 break;
2842 memcpy (plugin_name, prefix, prefix_len);
2843 memcpy (plugin_name + prefix_len, cur, cur_len);
2844 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2846 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2848 new_num_devices = current_device.get_num_devices_func ();
2849 if (new_num_devices >= 1)
2851 /* Augment DEVICES and NUM_DEVICES. */
2853 devices = realloc (devices, (num_devices + new_num_devices)
2854 * sizeof (struct gomp_device_descr));
2855 if (!devices)
2857 num_devices = 0;
2858 free (plugin_name);
2859 break;
2862 current_device.name = current_device.get_name_func ();
2863 /* current_device.capabilities has already been set. */
2864 current_device.type = current_device.get_type_func ();
2865 current_device.mem_map.root = NULL;
2866 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2867 current_device.openacc.data_environ = NULL;
2868 for (i = 0; i < new_num_devices; i++)
2870 current_device.target_id = i;
2871 devices[num_devices] = current_device;
2872 gomp_mutex_init (&devices[num_devices].lock);
2873 num_devices++;
2878 free (plugin_name);
2879 cur = next + 1;
2881 while (next);
2883 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2884 NUM_DEVICES_OPENMP. */
2885 struct gomp_device_descr *devices_s
2886 = malloc (num_devices * sizeof (struct gomp_device_descr));
2887 if (!devices_s)
2889 num_devices = 0;
2890 free (devices);
2891 devices = NULL;
2893 num_devices_openmp = 0;
2894 for (i = 0; i < num_devices; i++)
2895 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2896 devices_s[num_devices_openmp++] = devices[i];
2897 int num_devices_after_openmp = num_devices_openmp;
2898 for (i = 0; i < num_devices; i++)
2899 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2900 devices_s[num_devices_after_openmp++] = devices[i];
2901 free (devices);
2902 devices = devices_s;
2904 for (i = 0; i < num_devices; i++)
2906 /* The 'devices' array can be moved (by the realloc call) until we have
2907 found all the plugins, so registering with the OpenACC runtime (which
2908 takes a copy of the pointer argument) must be delayed until now. */
2909 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2910 goacc_register (&devices[i]);
2913 if (atexit (gomp_target_fini) != 0)
2914 gomp_fatal ("atexit failed");
2917 #else /* PLUGIN_SUPPORT */
2918 /* If dlfcn.h is unavailable we always fallback to host execution.
2919 GOMP_target* routines are just stubs for this case. */
2920 static void
2921 gomp_target_init (void)
2924 #endif /* PLUGIN_SUPPORT */