PR c++/61490 - qualified-id in friend function definition.
[official-gcc.git] / libgomp / target.c
blob9416401806fb4fbe60b4661f46c31b389c86d76a
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr {
59 unsigned version;
60 enum offload_target_type type;
61 const void *host_table;
62 const void *target_data;
65 /* Array of descriptors of offload images. */
66 static struct offload_image_descr *offload_images;
68 /* Total number of offload images. */
69 static int num_offload_images;
71 /* Array of descriptors for all available devices. */
72 static struct gomp_device_descr *devices;
74 /* Total number of available devices. */
75 static int num_devices;
77 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
78 static int num_devices_openmp;
80 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 static void *
83 gomp_realloc_unlock (void *old, size_t size)
85 void *ret = realloc (old, size);
86 if (ret == NULL)
88 gomp_mutex_unlock (&register_lock);
89 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 return ret;
94 attribute_hidden void
95 gomp_init_targets_once (void)
97 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 attribute_hidden int
101 gomp_get_num_devices (void)
103 gomp_init_targets_once ();
104 return num_devices_openmp;
107 static struct gomp_device_descr *
108 resolve_device (int device_id)
110 if (device_id == GOMP_DEVICE_ICV)
112 struct gomp_task_icv *icv = gomp_icv (false);
113 device_id = icv->default_device_var;
116 if (device_id < 0 || device_id >= gomp_get_num_devices ())
117 return NULL;
119 gomp_mutex_lock (&devices[device_id].lock);
120 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
121 gomp_init_device (&devices[device_id]);
122 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124 gomp_mutex_unlock (&devices[device_id].lock);
125 return NULL;
127 gomp_mutex_unlock (&devices[device_id].lock);
129 return &devices[device_id];
133 static inline splay_tree_key
134 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 if (key->host_start != key->host_end)
137 return splay_tree_lookup (mem_map, key);
139 key->host_end++;
140 splay_tree_key n = splay_tree_lookup (mem_map, key);
141 key->host_end--;
142 if (n)
143 return n;
144 key->host_start--;
145 n = splay_tree_lookup (mem_map, key);
146 key->host_start++;
147 if (n)
148 return n;
149 return splay_tree_lookup (mem_map, key);
152 static inline splay_tree_key
153 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 if (key->host_start != key->host_end)
156 return splay_tree_lookup (mem_map, key);
158 key->host_end++;
159 splay_tree_key n = splay_tree_lookup (mem_map, key);
160 key->host_end--;
161 return n;
164 static inline void
165 gomp_device_copy (struct gomp_device_descr *devicep,
166 bool (*copy_func) (int, void *, const void *, size_t),
167 const char *dst, void *dstaddr,
168 const char *src, const void *srcaddr,
169 size_t size)
171 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173 gomp_mutex_unlock (&devicep->lock);
174 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
175 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
179 static inline void
180 goacc_device_copy_async (struct gomp_device_descr *devicep,
181 bool (*copy_func) (int, void *, const void *, size_t,
182 struct goacc_asyncqueue *),
183 const char *dst, void *dstaddr,
184 const char *src, const void *srcaddr,
185 size_t size, struct goacc_asyncqueue *aq)
187 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
189 gomp_mutex_unlock (&devicep->lock);
190 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
191 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
195 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
196 host to device memory transfers. */
198 struct gomp_coalesce_chunk
200 /* The starting and ending point of a coalesced chunk of memory. */
201 size_t start, end;
204 struct gomp_coalesce_buf
206 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
207 it will be copied to the device. */
208 void *buf;
209 struct target_mem_desc *tgt;
210 /* Array with offsets, chunks[i].start is the starting offset and
211 chunks[i].end ending offset relative to tgt->tgt_start device address
212 of chunks which are to be copied to buf and later copied to device. */
213 struct gomp_coalesce_chunk *chunks;
214 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
215 be performed. */
216 long chunk_cnt;
217 /* During construction of chunks array, how many memory regions are within
218 the last chunk. If there is just one memory region for a chunk, we copy
219 it directly to device rather than going through buf. */
220 long use_cnt;
223 /* Maximum size of memory region considered for coalescing. Larger copies
224 are performed directly. */
225 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
227 /* Maximum size of a gap in between regions to consider them being copied
228 within the same chunk. All the device offsets considered are within
229 newly allocated device memory, so it isn't fatal if we copy some padding
230 in between from host to device. The gaps come either from alignment
231 padding or from memory regions which are not supposed to be copied from
232 host to device (e.g. map(alloc:), map(from:) etc.). */
233 #define MAX_COALESCE_BUF_GAP (4 * 1024)
235 /* Add region with device tgt_start relative offset and length to CBUF. */
237 static inline void
238 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
240 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
241 return;
242 if (cbuf->chunk_cnt)
244 if (cbuf->chunk_cnt < 0)
245 return;
246 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
248 cbuf->chunk_cnt = -1;
249 return;
251 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
253 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
254 cbuf->use_cnt++;
255 return;
257 /* If the last chunk is only used by one mapping, discard it,
258 as it will be one host to device copy anyway and
259 memcpying it around will only waste cycles. */
260 if (cbuf->use_cnt == 1)
261 cbuf->chunk_cnt--;
263 cbuf->chunks[cbuf->chunk_cnt].start = start;
264 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
265 cbuf->chunk_cnt++;
266 cbuf->use_cnt = 1;
269 /* Return true for mapping kinds which need to copy data from the
270 host to device for regions that weren't previously mapped. */
272 static inline bool
273 gomp_to_device_kind_p (int kind)
275 switch (kind)
277 case GOMP_MAP_ALLOC:
278 case GOMP_MAP_FROM:
279 case GOMP_MAP_FORCE_ALLOC:
280 case GOMP_MAP_ALWAYS_FROM:
281 return false;
282 default:
283 return true;
287 attribute_hidden void
288 gomp_copy_host2dev (struct gomp_device_descr *devicep,
289 struct goacc_asyncqueue *aq,
290 void *d, const void *h, size_t sz,
291 struct gomp_coalesce_buf *cbuf)
293 if (cbuf)
295 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
296 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
298 long first = 0;
299 long last = cbuf->chunk_cnt - 1;
300 while (first <= last)
302 long middle = (first + last) >> 1;
303 if (cbuf->chunks[middle].end <= doff)
304 first = middle + 1;
305 else if (cbuf->chunks[middle].start <= doff)
307 if (doff + sz > cbuf->chunks[middle].end)
308 gomp_fatal ("internal libgomp cbuf error");
309 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
310 h, sz);
311 return;
313 else
314 last = middle - 1;
318 if (__builtin_expect (aq != NULL, 0))
319 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
320 "dev", d, "host", h, sz, aq);
321 else
322 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
325 attribute_hidden void
326 gomp_copy_dev2host (struct gomp_device_descr *devicep,
327 struct goacc_asyncqueue *aq,
328 void *h, const void *d, size_t sz)
330 if (__builtin_expect (aq != NULL, 0))
331 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
332 "host", h, "dev", d, sz, aq);
333 else
334 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
337 static void
338 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
340 if (!devicep->free_func (devicep->target_id, devptr))
342 gomp_mutex_unlock (&devicep->lock);
343 gomp_fatal ("error in freeing device memory block at %p", devptr);
347 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
348 gomp_map_0len_lookup found oldn for newn.
349 Helper function of gomp_map_vars. */
351 static inline void
352 gomp_map_vars_existing (struct gomp_device_descr *devicep,
353 struct goacc_asyncqueue *aq, splay_tree_key oldn,
354 splay_tree_key newn, struct target_var_desc *tgt_var,
355 unsigned char kind, struct gomp_coalesce_buf *cbuf)
357 tgt_var->key = oldn;
358 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
359 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
360 tgt_var->offset = newn->host_start - oldn->host_start;
361 tgt_var->length = newn->host_end - newn->host_start;
363 if ((kind & GOMP_MAP_FLAG_FORCE)
364 || oldn->host_start > newn->host_start
365 || oldn->host_end < newn->host_end)
367 gomp_mutex_unlock (&devicep->lock);
368 gomp_fatal ("Trying to map into device [%p..%p) object when "
369 "[%p..%p) is already mapped",
370 (void *) newn->host_start, (void *) newn->host_end,
371 (void *) oldn->host_start, (void *) oldn->host_end);
374 if (GOMP_MAP_ALWAYS_TO_P (kind))
375 gomp_copy_host2dev (devicep, aq,
376 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
377 + newn->host_start - oldn->host_start),
378 (void *) newn->host_start,
379 newn->host_end - newn->host_start, cbuf);
381 if (oldn->refcount != REFCOUNT_INFINITY)
382 oldn->refcount++;
385 static int
386 get_kind (bool short_mapkind, void *kinds, int idx)
388 return short_mapkind ? ((unsigned short *) kinds)[idx]
389 : ((unsigned char *) kinds)[idx];
392 static void
393 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
394 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
395 struct gomp_coalesce_buf *cbuf)
397 struct gomp_device_descr *devicep = tgt->device_descr;
398 struct splay_tree_s *mem_map = &devicep->mem_map;
399 struct splay_tree_key_s cur_node;
401 cur_node.host_start = host_ptr;
402 if (cur_node.host_start == (uintptr_t) NULL)
404 cur_node.tgt_offset = (uintptr_t) NULL;
405 gomp_copy_host2dev (devicep, aq,
406 (void *) (tgt->tgt_start + target_offset),
407 (void *) &cur_node.tgt_offset,
408 sizeof (void *), cbuf);
409 return;
411 /* Add bias to the pointer value. */
412 cur_node.host_start += bias;
413 cur_node.host_end = cur_node.host_start;
414 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
415 if (n == NULL)
417 gomp_mutex_unlock (&devicep->lock);
418 gomp_fatal ("Pointer target of array section wasn't mapped");
420 cur_node.host_start -= n->host_start;
421 cur_node.tgt_offset
422 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
423 /* At this point tgt_offset is target address of the
424 array section. Now subtract bias to get what we want
425 to initialize the pointer with. */
426 cur_node.tgt_offset -= bias;
427 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
428 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
431 static void
432 gomp_map_fields_existing (struct target_mem_desc *tgt,
433 struct goacc_asyncqueue *aq, splay_tree_key n,
434 size_t first, size_t i, void **hostaddrs,
435 size_t *sizes, void *kinds,
436 struct gomp_coalesce_buf *cbuf)
438 struct gomp_device_descr *devicep = tgt->device_descr;
439 struct splay_tree_s *mem_map = &devicep->mem_map;
440 struct splay_tree_key_s cur_node;
441 int kind;
442 const bool short_mapkind = true;
443 const int typemask = short_mapkind ? 0xff : 0x7;
445 cur_node.host_start = (uintptr_t) hostaddrs[i];
446 cur_node.host_end = cur_node.host_start + sizes[i];
447 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
448 kind = get_kind (short_mapkind, kinds, i);
449 if (n2
450 && n2->tgt == n->tgt
451 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
453 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
454 &tgt->list[i], kind & typemask, cbuf);
455 return;
457 if (sizes[i] == 0)
459 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
461 cur_node.host_start--;
462 n2 = splay_tree_lookup (mem_map, &cur_node);
463 cur_node.host_start++;
464 if (n2
465 && n2->tgt == n->tgt
466 && n2->host_start - n->host_start
467 == n2->tgt_offset - n->tgt_offset)
469 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
470 &tgt->list[i], kind & typemask, cbuf);
471 return;
474 cur_node.host_end++;
475 n2 = splay_tree_lookup (mem_map, &cur_node);
476 cur_node.host_end--;
477 if (n2
478 && n2->tgt == n->tgt
479 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
481 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
482 kind & typemask, cbuf);
483 return;
486 gomp_mutex_unlock (&devicep->lock);
487 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
488 "other mapped elements from the same structure weren't mapped "
489 "together with it", (void *) cur_node.host_start,
490 (void *) cur_node.host_end);
493 static inline uintptr_t
494 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
496 if (tgt->list[i].key != NULL)
497 return tgt->list[i].key->tgt->tgt_start
498 + tgt->list[i].key->tgt_offset
499 + tgt->list[i].offset;
500 if (tgt->list[i].offset == ~(uintptr_t) 0)
501 return (uintptr_t) hostaddrs[i];
502 if (tgt->list[i].offset == ~(uintptr_t) 1)
503 return 0;
504 if (tgt->list[i].offset == ~(uintptr_t) 2)
505 return tgt->list[i + 1].key->tgt->tgt_start
506 + tgt->list[i + 1].key->tgt_offset
507 + tgt->list[i + 1].offset
508 + (uintptr_t) hostaddrs[i]
509 - (uintptr_t) hostaddrs[i + 1];
510 return tgt->tgt_start + tgt->list[i].offset;
513 static inline __attribute__((always_inline)) struct target_mem_desc *
514 gomp_map_vars_internal (struct gomp_device_descr *devicep,
515 struct goacc_asyncqueue *aq, size_t mapnum,
516 void **hostaddrs, void **devaddrs, size_t *sizes,
517 void *kinds, bool short_mapkind,
518 enum gomp_map_vars_kind pragma_kind)
520 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
521 bool has_firstprivate = false;
522 const int rshift = short_mapkind ? 8 : 3;
523 const int typemask = short_mapkind ? 0xff : 0x7;
524 struct splay_tree_s *mem_map = &devicep->mem_map;
525 struct splay_tree_key_s cur_node;
526 struct target_mem_desc *tgt
527 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
528 tgt->list_count = mapnum;
529 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
530 tgt->device_descr = devicep;
531 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
533 if (mapnum == 0)
535 tgt->tgt_start = 0;
536 tgt->tgt_end = 0;
537 return tgt;
540 tgt_align = sizeof (void *);
541 tgt_size = 0;
542 cbuf.chunks = NULL;
543 cbuf.chunk_cnt = -1;
544 cbuf.use_cnt = 0;
545 cbuf.buf = NULL;
546 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
548 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
549 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
550 cbuf.chunk_cnt = 0;
552 if (pragma_kind == GOMP_MAP_VARS_TARGET)
554 size_t align = 4 * sizeof (void *);
555 tgt_align = align;
556 tgt_size = mapnum * sizeof (void *);
557 cbuf.chunk_cnt = 1;
558 cbuf.use_cnt = 1 + (mapnum > 1);
559 cbuf.chunks[0].start = 0;
560 cbuf.chunks[0].end = tgt_size;
563 gomp_mutex_lock (&devicep->lock);
564 if (devicep->state == GOMP_DEVICE_FINALIZED)
566 gomp_mutex_unlock (&devicep->lock);
567 free (tgt);
568 return NULL;
571 for (i = 0; i < mapnum; i++)
573 int kind = get_kind (short_mapkind, kinds, i);
574 if (hostaddrs[i] == NULL
575 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
577 tgt->list[i].key = NULL;
578 tgt->list[i].offset = ~(uintptr_t) 0;
579 continue;
581 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
583 cur_node.host_start = (uintptr_t) hostaddrs[i];
584 cur_node.host_end = cur_node.host_start;
585 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
586 if (n == NULL)
588 gomp_mutex_unlock (&devicep->lock);
589 gomp_fatal ("use_device_ptr pointer wasn't mapped");
591 cur_node.host_start -= n->host_start;
592 hostaddrs[i]
593 = (void *) (n->tgt->tgt_start + n->tgt_offset
594 + cur_node.host_start);
595 tgt->list[i].key = NULL;
596 tgt->list[i].offset = ~(uintptr_t) 0;
597 continue;
599 else if ((kind & typemask) == GOMP_MAP_STRUCT)
601 size_t first = i + 1;
602 size_t last = i + sizes[i];
603 cur_node.host_start = (uintptr_t) hostaddrs[i];
604 cur_node.host_end = (uintptr_t) hostaddrs[last]
605 + sizes[last];
606 tgt->list[i].key = NULL;
607 tgt->list[i].offset = ~(uintptr_t) 2;
608 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
609 if (n == NULL)
611 size_t align = (size_t) 1 << (kind >> rshift);
612 if (tgt_align < align)
613 tgt_align = align;
614 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
615 tgt_size = (tgt_size + align - 1) & ~(align - 1);
616 tgt_size += cur_node.host_end - cur_node.host_start;
617 not_found_cnt += last - i;
618 for (i = first; i <= last; i++)
620 tgt->list[i].key = NULL;
621 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
622 & typemask))
623 gomp_coalesce_buf_add (&cbuf,
624 tgt_size - cur_node.host_end
625 + (uintptr_t) hostaddrs[i],
626 sizes[i]);
628 i--;
629 continue;
631 for (i = first; i <= last; i++)
632 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
633 sizes, kinds, NULL);
634 i--;
635 continue;
637 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
639 tgt->list[i].key = NULL;
640 tgt->list[i].offset = ~(uintptr_t) 1;
641 has_firstprivate = true;
642 continue;
644 cur_node.host_start = (uintptr_t) hostaddrs[i];
645 if (!GOMP_MAP_POINTER_P (kind & typemask))
646 cur_node.host_end = cur_node.host_start + sizes[i];
647 else
648 cur_node.host_end = cur_node.host_start + sizeof (void *);
649 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
651 tgt->list[i].key = NULL;
653 size_t align = (size_t) 1 << (kind >> rshift);
654 if (tgt_align < align)
655 tgt_align = align;
656 tgt_size = (tgt_size + align - 1) & ~(align - 1);
657 gomp_coalesce_buf_add (&cbuf, tgt_size,
658 cur_node.host_end - cur_node.host_start);
659 tgt_size += cur_node.host_end - cur_node.host_start;
660 has_firstprivate = true;
661 continue;
663 splay_tree_key n;
664 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
666 n = gomp_map_0len_lookup (mem_map, &cur_node);
667 if (!n)
669 tgt->list[i].key = NULL;
670 tgt->list[i].offset = ~(uintptr_t) 1;
671 continue;
674 else
675 n = splay_tree_lookup (mem_map, &cur_node);
676 if (n && n->refcount != REFCOUNT_LINK)
677 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
678 kind & typemask, NULL);
679 else
681 tgt->list[i].key = NULL;
683 size_t align = (size_t) 1 << (kind >> rshift);
684 not_found_cnt++;
685 if (tgt_align < align)
686 tgt_align = align;
687 tgt_size = (tgt_size + align - 1) & ~(align - 1);
688 if (gomp_to_device_kind_p (kind & typemask))
689 gomp_coalesce_buf_add (&cbuf, tgt_size,
690 cur_node.host_end - cur_node.host_start);
691 tgt_size += cur_node.host_end - cur_node.host_start;
692 if ((kind & typemask) == GOMP_MAP_TO_PSET)
694 size_t j;
695 for (j = i + 1; j < mapnum; j++)
696 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
697 & typemask))
698 break;
699 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
700 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
701 > cur_node.host_end))
702 break;
703 else
705 tgt->list[j].key = NULL;
706 i++;
712 if (devaddrs)
714 if (mapnum != 1)
716 gomp_mutex_unlock (&devicep->lock);
717 gomp_fatal ("unexpected aggregation");
719 tgt->to_free = devaddrs[0];
720 tgt->tgt_start = (uintptr_t) tgt->to_free;
721 tgt->tgt_end = tgt->tgt_start + sizes[0];
723 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
725 /* Allocate tgt_align aligned tgt_size block of memory. */
726 /* FIXME: Perhaps change interface to allocate properly aligned
727 memory. */
728 tgt->to_free = devicep->alloc_func (devicep->target_id,
729 tgt_size + tgt_align - 1);
730 if (!tgt->to_free)
732 gomp_mutex_unlock (&devicep->lock);
733 gomp_fatal ("device memory allocation fail");
736 tgt->tgt_start = (uintptr_t) tgt->to_free;
737 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
738 tgt->tgt_end = tgt->tgt_start + tgt_size;
740 if (cbuf.use_cnt == 1)
741 cbuf.chunk_cnt--;
742 if (cbuf.chunk_cnt > 0)
744 cbuf.buf
745 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
746 if (cbuf.buf)
748 cbuf.tgt = tgt;
749 cbufp = &cbuf;
753 else
755 tgt->to_free = NULL;
756 tgt->tgt_start = 0;
757 tgt->tgt_end = 0;
760 tgt_size = 0;
761 if (pragma_kind == GOMP_MAP_VARS_TARGET)
762 tgt_size = mapnum * sizeof (void *);
764 tgt->array = NULL;
765 if (not_found_cnt || has_firstprivate)
767 if (not_found_cnt)
768 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
769 splay_tree_node array = tgt->array;
770 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
771 uintptr_t field_tgt_base = 0;
773 for (i = 0; i < mapnum; i++)
774 if (tgt->list[i].key == NULL)
776 int kind = get_kind (short_mapkind, kinds, i);
777 if (hostaddrs[i] == NULL)
778 continue;
779 switch (kind & typemask)
781 size_t align, len, first, last;
782 splay_tree_key n;
783 case GOMP_MAP_FIRSTPRIVATE:
784 align = (size_t) 1 << (kind >> rshift);
785 tgt_size = (tgt_size + align - 1) & ~(align - 1);
786 tgt->list[i].offset = tgt_size;
787 len = sizes[i];
788 gomp_copy_host2dev (devicep, aq,
789 (void *) (tgt->tgt_start + tgt_size),
790 (void *) hostaddrs[i], len, cbufp);
791 tgt_size += len;
792 continue;
793 case GOMP_MAP_FIRSTPRIVATE_INT:
794 case GOMP_MAP_USE_DEVICE_PTR:
795 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
796 continue;
797 case GOMP_MAP_STRUCT:
798 first = i + 1;
799 last = i + sizes[i];
800 cur_node.host_start = (uintptr_t) hostaddrs[i];
801 cur_node.host_end = (uintptr_t) hostaddrs[last]
802 + sizes[last];
803 if (tgt->list[first].key != NULL)
804 continue;
805 n = splay_tree_lookup (mem_map, &cur_node);
806 if (n == NULL)
808 size_t align = (size_t) 1 << (kind >> rshift);
809 tgt_size -= (uintptr_t) hostaddrs[first]
810 - (uintptr_t) hostaddrs[i];
811 tgt_size = (tgt_size + align - 1) & ~(align - 1);
812 tgt_size += (uintptr_t) hostaddrs[first]
813 - (uintptr_t) hostaddrs[i];
814 field_tgt_base = (uintptr_t) hostaddrs[first];
815 field_tgt_offset = tgt_size;
816 field_tgt_clear = last;
817 tgt_size += cur_node.host_end
818 - (uintptr_t) hostaddrs[first];
819 continue;
821 for (i = first; i <= last; i++)
822 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
823 sizes, kinds, cbufp);
824 i--;
825 continue;
826 case GOMP_MAP_ALWAYS_POINTER:
827 cur_node.host_start = (uintptr_t) hostaddrs[i];
828 cur_node.host_end = cur_node.host_start + sizeof (void *);
829 n = splay_tree_lookup (mem_map, &cur_node);
830 if (n == NULL
831 || n->host_start > cur_node.host_start
832 || n->host_end < cur_node.host_end)
834 gomp_mutex_unlock (&devicep->lock);
835 gomp_fatal ("always pointer not mapped");
837 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
838 != GOMP_MAP_ALWAYS_POINTER)
839 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
840 if (cur_node.tgt_offset)
841 cur_node.tgt_offset -= sizes[i];
842 gomp_copy_host2dev (devicep, aq,
843 (void *) (n->tgt->tgt_start
844 + n->tgt_offset
845 + cur_node.host_start
846 - n->host_start),
847 (void *) &cur_node.tgt_offset,
848 sizeof (void *), cbufp);
849 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
850 + cur_node.host_start - n->host_start;
851 continue;
852 default:
853 break;
855 splay_tree_key k = &array->key;
856 k->host_start = (uintptr_t) hostaddrs[i];
857 if (!GOMP_MAP_POINTER_P (kind & typemask))
858 k->host_end = k->host_start + sizes[i];
859 else
860 k->host_end = k->host_start + sizeof (void *);
861 splay_tree_key n = splay_tree_lookup (mem_map, k);
862 if (n && n->refcount != REFCOUNT_LINK)
863 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
864 kind & typemask, cbufp);
865 else
867 k->link_key = NULL;
868 if (n && n->refcount == REFCOUNT_LINK)
870 /* Replace target address of the pointer with target address
871 of mapped object in the splay tree. */
872 splay_tree_remove (mem_map, n);
873 k->link_key = n;
875 size_t align = (size_t) 1 << (kind >> rshift);
876 tgt->list[i].key = k;
877 k->tgt = tgt;
878 if (field_tgt_clear != ~(size_t) 0)
880 k->tgt_offset = k->host_start - field_tgt_base
881 + field_tgt_offset;
882 if (i == field_tgt_clear)
883 field_tgt_clear = ~(size_t) 0;
885 else
887 tgt_size = (tgt_size + align - 1) & ~(align - 1);
888 k->tgt_offset = tgt_size;
889 tgt_size += k->host_end - k->host_start;
891 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
892 tgt->list[i].always_copy_from
893 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
894 tgt->list[i].offset = 0;
895 tgt->list[i].length = k->host_end - k->host_start;
896 k->refcount = 1;
897 k->dynamic_refcount = 0;
898 tgt->refcount++;
899 array->left = NULL;
900 array->right = NULL;
901 splay_tree_insert (mem_map, array);
902 switch (kind & typemask)
904 case GOMP_MAP_ALLOC:
905 case GOMP_MAP_FROM:
906 case GOMP_MAP_FORCE_ALLOC:
907 case GOMP_MAP_FORCE_FROM:
908 case GOMP_MAP_ALWAYS_FROM:
909 break;
910 case GOMP_MAP_TO:
911 case GOMP_MAP_TOFROM:
912 case GOMP_MAP_FORCE_TO:
913 case GOMP_MAP_FORCE_TOFROM:
914 case GOMP_MAP_ALWAYS_TO:
915 case GOMP_MAP_ALWAYS_TOFROM:
916 gomp_copy_host2dev (devicep, aq,
917 (void *) (tgt->tgt_start
918 + k->tgt_offset),
919 (void *) k->host_start,
920 k->host_end - k->host_start, cbufp);
921 break;
922 case GOMP_MAP_POINTER:
923 gomp_map_pointer (tgt, aq,
924 (uintptr_t) *(void **) k->host_start,
925 k->tgt_offset, sizes[i], cbufp);
926 break;
927 case GOMP_MAP_TO_PSET:
928 gomp_copy_host2dev (devicep, aq,
929 (void *) (tgt->tgt_start
930 + k->tgt_offset),
931 (void *) k->host_start,
932 k->host_end - k->host_start, cbufp);
934 for (j = i + 1; j < mapnum; j++)
935 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
937 & typemask))
938 break;
939 else if ((uintptr_t) hostaddrs[j] < k->host_start
940 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
941 > k->host_end))
942 break;
943 else
945 tgt->list[j].key = k;
946 tgt->list[j].copy_from = false;
947 tgt->list[j].always_copy_from = false;
948 if (k->refcount != REFCOUNT_INFINITY)
949 k->refcount++;
950 gomp_map_pointer (tgt, aq,
951 (uintptr_t) *(void **) hostaddrs[j],
952 k->tgt_offset
953 + ((uintptr_t) hostaddrs[j]
954 - k->host_start),
955 sizes[j], cbufp);
956 i++;
958 break;
959 case GOMP_MAP_FORCE_PRESENT:
961 /* We already looked up the memory region above and it
962 was missing. */
963 size_t size = k->host_end - k->host_start;
964 gomp_mutex_unlock (&devicep->lock);
965 #ifdef HAVE_INTTYPES_H
966 gomp_fatal ("present clause: !acc_is_present (%p, "
967 "%"PRIu64" (0x%"PRIx64"))",
968 (void *) k->host_start,
969 (uint64_t) size, (uint64_t) size);
970 #else
971 gomp_fatal ("present clause: !acc_is_present (%p, "
972 "%lu (0x%lx))", (void *) k->host_start,
973 (unsigned long) size, (unsigned long) size);
974 #endif
976 break;
977 case GOMP_MAP_FORCE_DEVICEPTR:
978 assert (k->host_end - k->host_start == sizeof (void *));
979 gomp_copy_host2dev (devicep, aq,
980 (void *) (tgt->tgt_start
981 + k->tgt_offset),
982 (void *) k->host_start,
983 sizeof (void *), cbufp);
984 break;
985 default:
986 gomp_mutex_unlock (&devicep->lock);
987 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
988 kind);
991 if (k->link_key)
993 /* Set link pointer on target to the device address of the
994 mapped object. */
995 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
996 /* We intentionally do not use coalescing here, as it's not
997 data allocated by the current call to this function. */
998 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
999 &tgt_addr, sizeof (void *), NULL);
1001 array++;
1006 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1008 for (i = 0; i < mapnum; i++)
1010 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1011 gomp_copy_host2dev (devicep, aq,
1012 (void *) (tgt->tgt_start + i * sizeof (void *)),
1013 (void *) &cur_node.tgt_offset, sizeof (void *),
1014 cbufp);
1018 if (cbufp)
1020 long c = 0;
1021 for (c = 0; c < cbuf.chunk_cnt; ++c)
1022 gomp_copy_host2dev (devicep, aq,
1023 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1024 (char *) cbuf.buf + (cbuf.chunks[c].start
1025 - cbuf.chunks[0].start),
1026 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1027 free (cbuf.buf);
1028 cbuf.buf = NULL;
1029 cbufp = NULL;
1032 /* If the variable from "omp target enter data" map-list was already mapped,
1033 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1034 gomp_exit_data. */
1035 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1037 free (tgt);
1038 tgt = NULL;
1041 gomp_mutex_unlock (&devicep->lock);
1042 return tgt;
1045 attribute_hidden struct target_mem_desc *
1046 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1047 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1048 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1050 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1051 sizes, kinds, short_mapkind, pragma_kind);
1054 attribute_hidden struct target_mem_desc *
1055 gomp_map_vars_async (struct gomp_device_descr *devicep,
1056 struct goacc_asyncqueue *aq, size_t mapnum,
1057 void **hostaddrs, void **devaddrs, size_t *sizes,
1058 void *kinds, bool short_mapkind,
1059 enum gomp_map_vars_kind pragma_kind)
1061 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1062 sizes, kinds, short_mapkind, pragma_kind);
1065 attribute_hidden void
1066 gomp_unmap_tgt (struct target_mem_desc *tgt)
1068 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1069 if (tgt->tgt_end)
1070 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1072 free (tgt->array);
1073 free (tgt);
1076 attribute_hidden bool
1077 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1079 bool is_tgt_unmapped = false;
1080 splay_tree_remove (&devicep->mem_map, k);
1081 if (k->link_key)
1082 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1083 if (k->tgt->refcount > 1)
1084 k->tgt->refcount--;
1085 else
1087 is_tgt_unmapped = true;
1088 gomp_unmap_tgt (k->tgt);
1090 return is_tgt_unmapped;
1093 static void
1094 gomp_unref_tgt (void *ptr)
1096 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1098 if (tgt->refcount > 1)
1099 tgt->refcount--;
1100 else
1101 gomp_unmap_tgt (tgt);
1104 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1105 variables back from device to host: if it is false, it is assumed that this
1106 has been done already. */
1108 static inline __attribute__((always_inline)) void
1109 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1110 struct goacc_asyncqueue *aq)
1112 struct gomp_device_descr *devicep = tgt->device_descr;
1114 if (tgt->list_count == 0)
1116 free (tgt);
1117 return;
1120 gomp_mutex_lock (&devicep->lock);
1121 if (devicep->state == GOMP_DEVICE_FINALIZED)
1123 gomp_mutex_unlock (&devicep->lock);
1124 free (tgt->array);
1125 free (tgt);
1126 return;
1129 size_t i;
1130 for (i = 0; i < tgt->list_count; i++)
1132 splay_tree_key k = tgt->list[i].key;
1133 if (k == NULL)
1134 continue;
1136 bool do_unmap = false;
1137 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1138 k->refcount--;
1139 else if (k->refcount == 1)
1141 k->refcount--;
1142 do_unmap = true;
1145 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1146 || tgt->list[i].always_copy_from)
1147 gomp_copy_dev2host (devicep, aq,
1148 (void *) (k->host_start + tgt->list[i].offset),
1149 (void *) (k->tgt->tgt_start + k->tgt_offset
1150 + tgt->list[i].offset),
1151 tgt->list[i].length);
1152 if (do_unmap)
1153 gomp_remove_var (devicep, k);
1156 if (aq)
1157 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
1158 (void *) tgt);
1159 else
1160 gomp_unref_tgt ((void *) tgt);
1162 gomp_mutex_unlock (&devicep->lock);
1165 attribute_hidden void
1166 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1168 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1171 attribute_hidden void
1172 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1173 struct goacc_asyncqueue *aq)
1175 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1178 static void
1179 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1180 size_t *sizes, void *kinds, bool short_mapkind)
1182 size_t i;
1183 struct splay_tree_key_s cur_node;
1184 const int typemask = short_mapkind ? 0xff : 0x7;
1186 if (!devicep)
1187 return;
1189 if (mapnum == 0)
1190 return;
1192 gomp_mutex_lock (&devicep->lock);
1193 if (devicep->state == GOMP_DEVICE_FINALIZED)
1195 gomp_mutex_unlock (&devicep->lock);
1196 return;
1199 for (i = 0; i < mapnum; i++)
1200 if (sizes[i])
1202 cur_node.host_start = (uintptr_t) hostaddrs[i];
1203 cur_node.host_end = cur_node.host_start + sizes[i];
1204 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1205 if (n)
1207 int kind = get_kind (short_mapkind, kinds, i);
1208 if (n->host_start > cur_node.host_start
1209 || n->host_end < cur_node.host_end)
1211 gomp_mutex_unlock (&devicep->lock);
1212 gomp_fatal ("Trying to update [%p..%p) object when "
1213 "only [%p..%p) is mapped",
1214 (void *) cur_node.host_start,
1215 (void *) cur_node.host_end,
1216 (void *) n->host_start,
1217 (void *) n->host_end);
1221 void *hostaddr = (void *) cur_node.host_start;
1222 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1223 + cur_node.host_start - n->host_start);
1224 size_t size = cur_node.host_end - cur_node.host_start;
1226 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1227 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1228 NULL);
1229 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1230 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1233 gomp_mutex_unlock (&devicep->lock);
1236 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1237 And insert to splay tree the mapping between addresses from HOST_TABLE and
1238 from loaded target image. We rely in the host and device compiler
1239 emitting variable and functions in the same order. */
1241 static void
1242 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1243 const void *host_table, const void *target_data,
1244 bool is_register_lock)
1246 void **host_func_table = ((void ***) host_table)[0];
1247 void **host_funcs_end = ((void ***) host_table)[1];
1248 void **host_var_table = ((void ***) host_table)[2];
1249 void **host_vars_end = ((void ***) host_table)[3];
1251 /* The func table contains only addresses, the var table contains addresses
1252 and corresponding sizes. */
1253 int num_funcs = host_funcs_end - host_func_table;
1254 int num_vars = (host_vars_end - host_var_table) / 2;
1256 /* Load image to device and get target addresses for the image. */
1257 struct addr_pair *target_table = NULL;
1258 int i, num_target_entries;
1260 num_target_entries
1261 = devicep->load_image_func (devicep->target_id, version,
1262 target_data, &target_table);
1264 if (num_target_entries != num_funcs + num_vars)
1266 gomp_mutex_unlock (&devicep->lock);
1267 if (is_register_lock)
1268 gomp_mutex_unlock (&register_lock);
1269 gomp_fatal ("Cannot map target functions or variables"
1270 " (expected %u, have %u)", num_funcs + num_vars,
1271 num_target_entries);
1274 /* Insert host-target address mapping into splay tree. */
1275 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1276 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1277 tgt->refcount = REFCOUNT_INFINITY;
1278 tgt->tgt_start = 0;
1279 tgt->tgt_end = 0;
1280 tgt->to_free = NULL;
1281 tgt->prev = NULL;
1282 tgt->list_count = 0;
1283 tgt->device_descr = devicep;
1284 splay_tree_node array = tgt->array;
1286 for (i = 0; i < num_funcs; i++)
1288 splay_tree_key k = &array->key;
1289 k->host_start = (uintptr_t) host_func_table[i];
1290 k->host_end = k->host_start + 1;
1291 k->tgt = tgt;
1292 k->tgt_offset = target_table[i].start;
1293 k->refcount = REFCOUNT_INFINITY;
1294 k->link_key = NULL;
1295 array->left = NULL;
1296 array->right = NULL;
1297 splay_tree_insert (&devicep->mem_map, array);
1298 array++;
1301 /* Most significant bit of the size in host and target tables marks
1302 "omp declare target link" variables. */
1303 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1304 const uintptr_t size_mask = ~link_bit;
1306 for (i = 0; i < num_vars; i++)
1308 struct addr_pair *target_var = &target_table[num_funcs + i];
1309 uintptr_t target_size = target_var->end - target_var->start;
1311 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1313 gomp_mutex_unlock (&devicep->lock);
1314 if (is_register_lock)
1315 gomp_mutex_unlock (&register_lock);
1316 gomp_fatal ("Cannot map target variables (size mismatch)");
1319 splay_tree_key k = &array->key;
1320 k->host_start = (uintptr_t) host_var_table[i * 2];
1321 k->host_end
1322 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1323 k->tgt = tgt;
1324 k->tgt_offset = target_var->start;
1325 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1326 k->link_key = NULL;
1327 array->left = NULL;
1328 array->right = NULL;
1329 splay_tree_insert (&devicep->mem_map, array);
1330 array++;
1333 free (target_table);
1336 /* Unload the mappings described by target_data from device DEVICE_P.
1337 The device must be locked. */
1339 static void
1340 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1341 unsigned version,
1342 const void *host_table, const void *target_data)
1344 void **host_func_table = ((void ***) host_table)[0];
1345 void **host_funcs_end = ((void ***) host_table)[1];
1346 void **host_var_table = ((void ***) host_table)[2];
1347 void **host_vars_end = ((void ***) host_table)[3];
1349 /* The func table contains only addresses, the var table contains addresses
1350 and corresponding sizes. */
1351 int num_funcs = host_funcs_end - host_func_table;
1352 int num_vars = (host_vars_end - host_var_table) / 2;
1354 struct splay_tree_key_s k;
1355 splay_tree_key node = NULL;
1357 /* Find mapping at start of node array */
1358 if (num_funcs || num_vars)
1360 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1361 : (uintptr_t) host_var_table[0]);
1362 k.host_end = k.host_start + 1;
1363 node = splay_tree_lookup (&devicep->mem_map, &k);
1366 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1368 gomp_mutex_unlock (&devicep->lock);
1369 gomp_fatal ("image unload fail");
1372 /* Remove mappings from splay tree. */
1373 int i;
1374 for (i = 0; i < num_funcs; i++)
1376 k.host_start = (uintptr_t) host_func_table[i];
1377 k.host_end = k.host_start + 1;
1378 splay_tree_remove (&devicep->mem_map, &k);
1381 /* Most significant bit of the size in host and target tables marks
1382 "omp declare target link" variables. */
1383 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1384 const uintptr_t size_mask = ~link_bit;
1385 bool is_tgt_unmapped = false;
1387 for (i = 0; i < num_vars; i++)
1389 k.host_start = (uintptr_t) host_var_table[i * 2];
1390 k.host_end
1391 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1393 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1394 splay_tree_remove (&devicep->mem_map, &k);
1395 else
1397 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1398 is_tgt_unmapped = gomp_remove_var (devicep, n);
1402 if (node && !is_tgt_unmapped)
1404 free (node->tgt);
1405 free (node);
1409 /* This function should be called from every offload image while loading.
1410 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1411 the target, and TARGET_DATA needed by target plugin. */
1413 void
1414 GOMP_offload_register_ver (unsigned version, const void *host_table,
1415 int target_type, const void *target_data)
1417 int i;
1419 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1420 gomp_fatal ("Library too old for offload (version %u < %u)",
1421 GOMP_VERSION, GOMP_VERSION_LIB (version));
1423 gomp_mutex_lock (&register_lock);
1425 /* Load image to all initialized devices. */
1426 for (i = 0; i < num_devices; i++)
1428 struct gomp_device_descr *devicep = &devices[i];
1429 gomp_mutex_lock (&devicep->lock);
1430 if (devicep->type == target_type
1431 && devicep->state == GOMP_DEVICE_INITIALIZED)
1432 gomp_load_image_to_device (devicep, version,
1433 host_table, target_data, true);
1434 gomp_mutex_unlock (&devicep->lock);
1437 /* Insert image to array of pending images. */
1438 offload_images
1439 = gomp_realloc_unlock (offload_images,
1440 (num_offload_images + 1)
1441 * sizeof (struct offload_image_descr));
1442 offload_images[num_offload_images].version = version;
1443 offload_images[num_offload_images].type = target_type;
1444 offload_images[num_offload_images].host_table = host_table;
1445 offload_images[num_offload_images].target_data = target_data;
1447 num_offload_images++;
1448 gomp_mutex_unlock (&register_lock);
1451 void
1452 GOMP_offload_register (const void *host_table, int target_type,
1453 const void *target_data)
1455 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1458 /* This function should be called from every offload image while unloading.
1459 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1460 the target, and TARGET_DATA needed by target plugin. */
1462 void
1463 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1464 int target_type, const void *target_data)
1466 int i;
1468 gomp_mutex_lock (&register_lock);
1470 /* Unload image from all initialized devices. */
1471 for (i = 0; i < num_devices; i++)
1473 struct gomp_device_descr *devicep = &devices[i];
1474 gomp_mutex_lock (&devicep->lock);
1475 if (devicep->type == target_type
1476 && devicep->state == GOMP_DEVICE_INITIALIZED)
1477 gomp_unload_image_from_device (devicep, version,
1478 host_table, target_data);
1479 gomp_mutex_unlock (&devicep->lock);
1482 /* Remove image from array of pending images. */
1483 for (i = 0; i < num_offload_images; i++)
1484 if (offload_images[i].target_data == target_data)
1486 offload_images[i] = offload_images[--num_offload_images];
1487 break;
1490 gomp_mutex_unlock (&register_lock);
1493 void
1494 GOMP_offload_unregister (const void *host_table, int target_type,
1495 const void *target_data)
1497 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1500 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1501 must be locked on entry, and remains locked on return. */
1503 attribute_hidden void
1504 gomp_init_device (struct gomp_device_descr *devicep)
1506 int i;
1507 if (!devicep->init_device_func (devicep->target_id))
1509 gomp_mutex_unlock (&devicep->lock);
1510 gomp_fatal ("device initialization failed");
1513 /* Load to device all images registered by the moment. */
1514 for (i = 0; i < num_offload_images; i++)
1516 struct offload_image_descr *image = &offload_images[i];
1517 if (image->type == devicep->type)
1518 gomp_load_image_to_device (devicep, image->version,
1519 image->host_table, image->target_data,
1520 false);
1523 /* Initialize OpenACC asynchronous queues. */
1524 goacc_init_asyncqueues (devicep);
1526 devicep->state = GOMP_DEVICE_INITIALIZED;
1529 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1530 must be locked on entry, and remains locked on return. */
1532 attribute_hidden bool
1533 gomp_fini_device (struct gomp_device_descr *devicep)
1535 bool ret = goacc_fini_asyncqueues (devicep);
1536 ret &= devicep->fini_device_func (devicep->target_id);
1537 devicep->state = GOMP_DEVICE_FINALIZED;
1538 return ret;
1541 attribute_hidden void
1542 gomp_unload_device (struct gomp_device_descr *devicep)
1544 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1546 unsigned i;
1548 /* Unload from device all images registered at the moment. */
1549 for (i = 0; i < num_offload_images; i++)
1551 struct offload_image_descr *image = &offload_images[i];
1552 if (image->type == devicep->type)
1553 gomp_unload_image_from_device (devicep, image->version,
1554 image->host_table,
1555 image->target_data);
1560 /* Free address mapping tables. MM must be locked on entry, and remains locked
1561 on return. */
1563 attribute_hidden void
1564 gomp_free_memmap (struct splay_tree_s *mem_map)
1566 while (mem_map->root)
1568 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1570 splay_tree_remove (mem_map, &mem_map->root->key);
1571 free (tgt->array);
1572 free (tgt);
1576 /* Host fallback for GOMP_target{,_ext} routines. */
1578 static void
1579 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1581 struct gomp_thread old_thr, *thr = gomp_thread ();
1582 old_thr = *thr;
1583 memset (thr, '\0', sizeof (*thr));
1584 if (gomp_places_list)
1586 thr->place = old_thr.place;
1587 thr->ts.place_partition_len = gomp_places_list_len;
1589 fn (hostaddrs);
1590 gomp_free_thread (thr);
1591 *thr = old_thr;
1594 /* Calculate alignment and size requirements of a private copy of data shared
1595 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1597 static inline void
1598 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1599 unsigned short *kinds, size_t *tgt_align,
1600 size_t *tgt_size)
1602 size_t i;
1603 for (i = 0; i < mapnum; i++)
1604 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1606 size_t align = (size_t) 1 << (kinds[i] >> 8);
1607 if (*tgt_align < align)
1608 *tgt_align = align;
1609 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1610 *tgt_size += sizes[i];
1614 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1616 static inline void
1617 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1618 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1619 size_t tgt_size)
1621 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1622 if (al)
1623 tgt += tgt_align - al;
1624 tgt_size = 0;
1625 size_t i;
1626 for (i = 0; i < mapnum; i++)
1627 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1629 size_t align = (size_t) 1 << (kinds[i] >> 8);
1630 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1631 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1632 hostaddrs[i] = tgt + tgt_size;
1633 tgt_size = tgt_size + sizes[i];
1637 /* Helper function of GOMP_target{,_ext} routines. */
1639 static void *
1640 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1641 void (*host_fn) (void *))
1643 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1644 return (void *) host_fn;
1645 else
1647 gomp_mutex_lock (&devicep->lock);
1648 if (devicep->state == GOMP_DEVICE_FINALIZED)
1650 gomp_mutex_unlock (&devicep->lock);
1651 return NULL;
1654 struct splay_tree_key_s k;
1655 k.host_start = (uintptr_t) host_fn;
1656 k.host_end = k.host_start + 1;
1657 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1658 gomp_mutex_unlock (&devicep->lock);
1659 if (tgt_fn == NULL)
1660 return NULL;
1662 return (void *) tgt_fn->tgt_offset;
1666 /* Called when encountering a target directive. If DEVICE
1667 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1668 GOMP_DEVICE_HOST_FALLBACK (or any value
1669 larger than last available hw device), use host fallback.
1670 FN is address of host code, UNUSED is part of the current ABI, but
1671 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1672 with MAPNUM entries, with addresses of the host objects,
1673 sizes of the host objects (resp. for pointer kind pointer bias
1674 and assumed sizeof (void *) size) and kinds. */
1676 void
1677 GOMP_target (int device, void (*fn) (void *), const void *unused,
1678 size_t mapnum, void **hostaddrs, size_t *sizes,
1679 unsigned char *kinds)
1681 struct gomp_device_descr *devicep = resolve_device (device);
1683 void *fn_addr;
1684 if (devicep == NULL
1685 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1686 /* All shared memory devices should use the GOMP_target_ext function. */
1687 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1688 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1689 return gomp_target_fallback (fn, hostaddrs);
1691 struct target_mem_desc *tgt_vars
1692 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1693 GOMP_MAP_VARS_TARGET);
1694 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1695 NULL);
1696 gomp_unmap_vars (tgt_vars, true);
1699 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1700 and several arguments have been added:
1701 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1702 DEPEND is array of dependencies, see GOMP_task for details.
1704 ARGS is a pointer to an array consisting of a variable number of both
1705 device-independent and device-specific arguments, which can take one two
1706 elements where the first specifies for which device it is intended, the type
1707 and optionally also the value. If the value is not present in the first
1708 one, the whole second element the actual value. The last element of the
1709 array is a single NULL. Among the device independent can be for example
1710 NUM_TEAMS and THREAD_LIMIT.
1712 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1713 that value, or 1 if teams construct is not present, or 0, if
1714 teams construct does not have num_teams clause and so the choice is
1715 implementation defined, and -1 if it can't be determined on the host
1716 what value will GOMP_teams have on the device.
1717 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1718 body with that value, or 0, if teams construct does not have thread_limit
1719 clause or the teams construct is not present, or -1 if it can't be
1720 determined on the host what value will GOMP_teams have on the device. */
1722 void
1723 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1724 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1725 unsigned int flags, void **depend, void **args)
1727 struct gomp_device_descr *devicep = resolve_device (device);
1728 size_t tgt_align = 0, tgt_size = 0;
1729 bool fpc_done = false;
1731 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1733 struct gomp_thread *thr = gomp_thread ();
1734 /* Create a team if we don't have any around, as nowait
1735 target tasks make sense to run asynchronously even when
1736 outside of any parallel. */
1737 if (__builtin_expect (thr->ts.team == NULL, 0))
1739 struct gomp_team *team = gomp_new_team (1);
1740 struct gomp_task *task = thr->task;
1741 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1742 team->prev_ts = thr->ts;
1743 thr->ts.team = team;
1744 thr->ts.team_id = 0;
1745 thr->ts.work_share = &team->work_shares[0];
1746 thr->ts.last_work_share = NULL;
1747 #ifdef HAVE_SYNC_BUILTINS
1748 thr->ts.single_count = 0;
1749 #endif
1750 thr->ts.static_trip = 0;
1751 thr->task = &team->implicit_task[0];
1752 gomp_init_task (thr->task, NULL, icv);
1753 if (task)
1755 thr->task = task;
1756 gomp_end_task ();
1757 free (task);
1758 thr->task = &team->implicit_task[0];
1760 else
1761 pthread_setspecific (gomp_thread_destructor, thr);
1763 if (thr->ts.team
1764 && !thr->task->final_task)
1766 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1767 sizes, kinds, flags, depend, args,
1768 GOMP_TARGET_TASK_BEFORE_MAP);
1769 return;
1773 /* If there are depend clauses, but nowait is not present
1774 (or we are in a final task), block the parent task until the
1775 dependencies are resolved and then just continue with the rest
1776 of the function as if it is a merged task. */
1777 if (depend != NULL)
1779 struct gomp_thread *thr = gomp_thread ();
1780 if (thr->task && thr->task->depend_hash)
1782 /* If we might need to wait, copy firstprivate now. */
1783 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1784 &tgt_align, &tgt_size);
1785 if (tgt_align)
1787 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1788 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1789 tgt_align, tgt_size);
1791 fpc_done = true;
1792 gomp_task_maybe_wait_for_dependencies (depend);
1796 void *fn_addr;
1797 if (devicep == NULL
1798 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1799 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1800 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1802 if (!fpc_done)
1804 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1805 &tgt_align, &tgt_size);
1806 if (tgt_align)
1808 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1809 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1810 tgt_align, tgt_size);
1813 gomp_target_fallback (fn, hostaddrs);
1814 return;
1817 struct target_mem_desc *tgt_vars;
1818 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1820 if (!fpc_done)
1822 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1823 &tgt_align, &tgt_size);
1824 if (tgt_align)
1826 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1827 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1828 tgt_align, tgt_size);
1831 tgt_vars = NULL;
1833 else
1834 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1835 true, GOMP_MAP_VARS_TARGET);
1836 devicep->run_func (devicep->target_id, fn_addr,
1837 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1838 args);
1839 if (tgt_vars)
1840 gomp_unmap_vars (tgt_vars, true);
1843 /* Host fallback for GOMP_target_data{,_ext} routines. */
1845 static void
1846 gomp_target_data_fallback (void)
1848 struct gomp_task_icv *icv = gomp_icv (false);
1849 if (icv->target_data)
1851 /* Even when doing a host fallback, if there are any active
1852 #pragma omp target data constructs, need to remember the
1853 new #pragma omp target data, otherwise GOMP_target_end_data
1854 would get out of sync. */
1855 struct target_mem_desc *tgt
1856 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1857 GOMP_MAP_VARS_DATA);
1858 tgt->prev = icv->target_data;
1859 icv->target_data = tgt;
1863 void
1864 GOMP_target_data (int device, const void *unused, size_t mapnum,
1865 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1867 struct gomp_device_descr *devicep = resolve_device (device);
1869 if (devicep == NULL
1870 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1871 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1872 return gomp_target_data_fallback ();
1874 struct target_mem_desc *tgt
1875 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1876 GOMP_MAP_VARS_DATA);
1877 struct gomp_task_icv *icv = gomp_icv (true);
1878 tgt->prev = icv->target_data;
1879 icv->target_data = tgt;
1882 void
1883 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1884 size_t *sizes, unsigned short *kinds)
1886 struct gomp_device_descr *devicep = resolve_device (device);
1888 if (devicep == NULL
1889 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1890 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1891 return gomp_target_data_fallback ();
1893 struct target_mem_desc *tgt
1894 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1895 GOMP_MAP_VARS_DATA);
1896 struct gomp_task_icv *icv = gomp_icv (true);
1897 tgt->prev = icv->target_data;
1898 icv->target_data = tgt;
1901 void
1902 GOMP_target_end_data (void)
1904 struct gomp_task_icv *icv = gomp_icv (false);
1905 if (icv->target_data)
1907 struct target_mem_desc *tgt = icv->target_data;
1908 icv->target_data = tgt->prev;
1909 gomp_unmap_vars (tgt, true);
1913 void
1914 GOMP_target_update (int device, const void *unused, size_t mapnum,
1915 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1917 struct gomp_device_descr *devicep = resolve_device (device);
1919 if (devicep == NULL
1920 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1921 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1922 return;
1924 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1927 void
1928 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1929 size_t *sizes, unsigned short *kinds,
1930 unsigned int flags, void **depend)
1932 struct gomp_device_descr *devicep = resolve_device (device);
1934 /* If there are depend clauses, but nowait is not present,
1935 block the parent task until the dependencies are resolved
1936 and then just continue with the rest of the function as if it
1937 is a merged task. Until we are able to schedule task during
1938 variable mapping or unmapping, ignore nowait if depend clauses
1939 are not present. */
1940 if (depend != NULL)
1942 struct gomp_thread *thr = gomp_thread ();
1943 if (thr->task && thr->task->depend_hash)
1945 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1946 && thr->ts.team
1947 && !thr->task->final_task)
1949 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1950 mapnum, hostaddrs, sizes, kinds,
1951 flags | GOMP_TARGET_FLAG_UPDATE,
1952 depend, NULL, GOMP_TARGET_TASK_DATA))
1953 return;
1955 else
1957 struct gomp_team *team = thr->ts.team;
1958 /* If parallel or taskgroup has been cancelled, don't start new
1959 tasks. */
1960 if (__builtin_expect (gomp_cancel_var, 0) && team)
1962 if (gomp_team_barrier_cancelled (&team->barrier))
1963 return;
1964 if (thr->task->taskgroup)
1966 if (thr->task->taskgroup->cancelled)
1967 return;
1968 if (thr->task->taskgroup->workshare
1969 && thr->task->taskgroup->prev
1970 && thr->task->taskgroup->prev->cancelled)
1971 return;
1975 gomp_task_maybe_wait_for_dependencies (depend);
1980 if (devicep == NULL
1981 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1982 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1983 return;
1985 struct gomp_thread *thr = gomp_thread ();
1986 struct gomp_team *team = thr->ts.team;
1987 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1988 if (__builtin_expect (gomp_cancel_var, 0) && team)
1990 if (gomp_team_barrier_cancelled (&team->barrier))
1991 return;
1992 if (thr->task->taskgroup)
1994 if (thr->task->taskgroup->cancelled)
1995 return;
1996 if (thr->task->taskgroup->workshare
1997 && thr->task->taskgroup->prev
1998 && thr->task->taskgroup->prev->cancelled)
1999 return;
2003 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2006 static void
2007 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2008 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2010 const int typemask = 0xff;
2011 size_t i;
2012 gomp_mutex_lock (&devicep->lock);
2013 if (devicep->state == GOMP_DEVICE_FINALIZED)
2015 gomp_mutex_unlock (&devicep->lock);
2016 return;
2019 for (i = 0; i < mapnum; i++)
2021 struct splay_tree_key_s cur_node;
2022 unsigned char kind = kinds[i] & typemask;
2023 switch (kind)
2025 case GOMP_MAP_FROM:
2026 case GOMP_MAP_ALWAYS_FROM:
2027 case GOMP_MAP_DELETE:
2028 case GOMP_MAP_RELEASE:
2029 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2030 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2031 cur_node.host_start = (uintptr_t) hostaddrs[i];
2032 cur_node.host_end = cur_node.host_start + sizes[i];
2033 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2034 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2035 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2036 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2037 if (!k)
2038 continue;
2040 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2041 k->refcount--;
2042 if ((kind == GOMP_MAP_DELETE
2043 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2044 && k->refcount != REFCOUNT_INFINITY)
2045 k->refcount = 0;
2047 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2048 || kind == GOMP_MAP_ALWAYS_FROM)
2049 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2050 (void *) (k->tgt->tgt_start + k->tgt_offset
2051 + cur_node.host_start
2052 - k->host_start),
2053 cur_node.host_end - cur_node.host_start);
2054 if (k->refcount == 0)
2056 splay_tree_remove (&devicep->mem_map, k);
2057 if (k->link_key)
2058 splay_tree_insert (&devicep->mem_map,
2059 (splay_tree_node) k->link_key);
2060 if (k->tgt->refcount > 1)
2061 k->tgt->refcount--;
2062 else
2063 gomp_unmap_tgt (k->tgt);
2066 break;
2067 default:
2068 gomp_mutex_unlock (&devicep->lock);
2069 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2070 kind);
2074 gomp_mutex_unlock (&devicep->lock);
2077 void
2078 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2079 size_t *sizes, unsigned short *kinds,
2080 unsigned int flags, void **depend)
2082 struct gomp_device_descr *devicep = resolve_device (device);
2084 /* If there are depend clauses, but nowait is not present,
2085 block the parent task until the dependencies are resolved
2086 and then just continue with the rest of the function as if it
2087 is a merged task. Until we are able to schedule task during
2088 variable mapping or unmapping, ignore nowait if depend clauses
2089 are not present. */
2090 if (depend != NULL)
2092 struct gomp_thread *thr = gomp_thread ();
2093 if (thr->task && thr->task->depend_hash)
2095 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2096 && thr->ts.team
2097 && !thr->task->final_task)
2099 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2100 mapnum, hostaddrs, sizes, kinds,
2101 flags, depend, NULL,
2102 GOMP_TARGET_TASK_DATA))
2103 return;
2105 else
2107 struct gomp_team *team = thr->ts.team;
2108 /* If parallel or taskgroup has been cancelled, don't start new
2109 tasks. */
2110 if (__builtin_expect (gomp_cancel_var, 0) && team)
2112 if (gomp_team_barrier_cancelled (&team->barrier))
2113 return;
2114 if (thr->task->taskgroup)
2116 if (thr->task->taskgroup->cancelled)
2117 return;
2118 if (thr->task->taskgroup->workshare
2119 && thr->task->taskgroup->prev
2120 && thr->task->taskgroup->prev->cancelled)
2121 return;
2125 gomp_task_maybe_wait_for_dependencies (depend);
2130 if (devicep == NULL
2131 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2132 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2133 return;
2135 struct gomp_thread *thr = gomp_thread ();
2136 struct gomp_team *team = thr->ts.team;
2137 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2138 if (__builtin_expect (gomp_cancel_var, 0) && team)
2140 if (gomp_team_barrier_cancelled (&team->barrier))
2141 return;
2142 if (thr->task->taskgroup)
2144 if (thr->task->taskgroup->cancelled)
2145 return;
2146 if (thr->task->taskgroup->workshare
2147 && thr->task->taskgroup->prev
2148 && thr->task->taskgroup->prev->cancelled)
2149 return;
2153 size_t i;
2154 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2155 for (i = 0; i < mapnum; i++)
2156 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2158 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2159 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2160 i += sizes[i];
2162 else
2163 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2164 true, GOMP_MAP_VARS_ENTER_DATA);
2165 else
2166 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2169 bool
2170 gomp_target_task_fn (void *data)
2172 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2173 struct gomp_device_descr *devicep = ttask->devicep;
2175 if (ttask->fn != NULL)
2177 void *fn_addr;
2178 if (devicep == NULL
2179 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2180 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2181 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2183 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2184 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2185 return false;
2188 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2190 if (ttask->tgt)
2191 gomp_unmap_vars (ttask->tgt, true);
2192 return false;
2195 void *actual_arguments;
2196 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2198 ttask->tgt = NULL;
2199 actual_arguments = ttask->hostaddrs;
2201 else
2203 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2204 NULL, ttask->sizes, ttask->kinds, true,
2205 GOMP_MAP_VARS_TARGET);
2206 actual_arguments = (void *) ttask->tgt->tgt_start;
2208 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2210 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2211 ttask->args, (void *) ttask);
2212 return true;
2214 else if (devicep == NULL
2215 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2216 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2217 return false;
2219 size_t i;
2220 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2221 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2222 ttask->kinds, true);
2223 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2224 for (i = 0; i < ttask->mapnum; i++)
2225 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2227 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2228 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2229 GOMP_MAP_VARS_ENTER_DATA);
2230 i += ttask->sizes[i];
2232 else
2233 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2234 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2235 else
2236 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2237 ttask->kinds);
2238 return false;
2241 void
2242 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2244 if (thread_limit)
2246 struct gomp_task_icv *icv = gomp_icv (true);
2247 icv->thread_limit_var
2248 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2250 (void) num_teams;
2253 void *
2254 omp_target_alloc (size_t size, int device_num)
2256 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2257 return malloc (size);
2259 if (device_num < 0)
2260 return NULL;
2262 struct gomp_device_descr *devicep = resolve_device (device_num);
2263 if (devicep == NULL)
2264 return NULL;
2266 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2267 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2268 return malloc (size);
2270 gomp_mutex_lock (&devicep->lock);
2271 void *ret = devicep->alloc_func (devicep->target_id, size);
2272 gomp_mutex_unlock (&devicep->lock);
2273 return ret;
2276 void
2277 omp_target_free (void *device_ptr, int device_num)
2279 if (device_ptr == NULL)
2280 return;
2282 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2284 free (device_ptr);
2285 return;
2288 if (device_num < 0)
2289 return;
2291 struct gomp_device_descr *devicep = resolve_device (device_num);
2292 if (devicep == NULL)
2293 return;
2295 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2296 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2298 free (device_ptr);
2299 return;
2302 gomp_mutex_lock (&devicep->lock);
2303 gomp_free_device_memory (devicep, device_ptr);
2304 gomp_mutex_unlock (&devicep->lock);
2308 omp_target_is_present (const void *ptr, int device_num)
2310 if (ptr == NULL)
2311 return 1;
2313 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2314 return 1;
2316 if (device_num < 0)
2317 return 0;
2319 struct gomp_device_descr *devicep = resolve_device (device_num);
2320 if (devicep == NULL)
2321 return 0;
2323 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2324 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2325 return 1;
2327 gomp_mutex_lock (&devicep->lock);
2328 struct splay_tree_s *mem_map = &devicep->mem_map;
2329 struct splay_tree_key_s cur_node;
2331 cur_node.host_start = (uintptr_t) ptr;
2332 cur_node.host_end = cur_node.host_start;
2333 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2334 int ret = n != NULL;
2335 gomp_mutex_unlock (&devicep->lock);
2336 return ret;
2340 omp_target_memcpy (void *dst, const void *src, size_t length,
2341 size_t dst_offset, size_t src_offset, int dst_device_num,
2342 int src_device_num)
2344 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2345 bool ret;
2347 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2349 if (dst_device_num < 0)
2350 return EINVAL;
2352 dst_devicep = resolve_device (dst_device_num);
2353 if (dst_devicep == NULL)
2354 return EINVAL;
2356 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2357 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2358 dst_devicep = NULL;
2360 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2362 if (src_device_num < 0)
2363 return EINVAL;
2365 src_devicep = resolve_device (src_device_num);
2366 if (src_devicep == NULL)
2367 return EINVAL;
2369 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2370 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2371 src_devicep = NULL;
2373 if (src_devicep == NULL && dst_devicep == NULL)
2375 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2376 return 0;
2378 if (src_devicep == NULL)
2380 gomp_mutex_lock (&dst_devicep->lock);
2381 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2382 (char *) dst + dst_offset,
2383 (char *) src + src_offset, length);
2384 gomp_mutex_unlock (&dst_devicep->lock);
2385 return (ret ? 0 : EINVAL);
2387 if (dst_devicep == NULL)
2389 gomp_mutex_lock (&src_devicep->lock);
2390 ret = src_devicep->dev2host_func (src_devicep->target_id,
2391 (char *) dst + dst_offset,
2392 (char *) src + src_offset, length);
2393 gomp_mutex_unlock (&src_devicep->lock);
2394 return (ret ? 0 : EINVAL);
2396 if (src_devicep == dst_devicep)
2398 gomp_mutex_lock (&src_devicep->lock);
2399 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2400 (char *) dst + dst_offset,
2401 (char *) src + src_offset, length);
2402 gomp_mutex_unlock (&src_devicep->lock);
2403 return (ret ? 0 : EINVAL);
2405 return EINVAL;
2408 static int
2409 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2410 int num_dims, const size_t *volume,
2411 const size_t *dst_offsets,
2412 const size_t *src_offsets,
2413 const size_t *dst_dimensions,
2414 const size_t *src_dimensions,
2415 struct gomp_device_descr *dst_devicep,
2416 struct gomp_device_descr *src_devicep)
2418 size_t dst_slice = element_size;
2419 size_t src_slice = element_size;
2420 size_t j, dst_off, src_off, length;
2421 int i, ret;
2423 if (num_dims == 1)
2425 if (__builtin_mul_overflow (element_size, volume[0], &length)
2426 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2427 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2428 return EINVAL;
2429 if (dst_devicep == NULL && src_devicep == NULL)
2431 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2432 length);
2433 ret = 1;
2435 else if (src_devicep == NULL)
2436 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2437 (char *) dst + dst_off,
2438 (const char *) src + src_off,
2439 length);
2440 else if (dst_devicep == NULL)
2441 ret = src_devicep->dev2host_func (src_devicep->target_id,
2442 (char *) dst + dst_off,
2443 (const char *) src + src_off,
2444 length);
2445 else if (src_devicep == dst_devicep)
2446 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2447 (char *) dst + dst_off,
2448 (const char *) src + src_off,
2449 length);
2450 else
2451 ret = 0;
2452 return ret ? 0 : EINVAL;
2455 /* FIXME: it would be nice to have some plugin function to handle
2456 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2457 be handled in the generic recursion below, and for host-host it
2458 should be used even for any num_dims >= 2. */
2460 for (i = 1; i < num_dims; i++)
2461 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2462 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2463 return EINVAL;
2464 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2465 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2466 return EINVAL;
2467 for (j = 0; j < volume[0]; j++)
2469 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2470 (const char *) src + src_off,
2471 element_size, num_dims - 1,
2472 volume + 1, dst_offsets + 1,
2473 src_offsets + 1, dst_dimensions + 1,
2474 src_dimensions + 1, dst_devicep,
2475 src_devicep);
2476 if (ret)
2477 return ret;
2478 dst_off += dst_slice;
2479 src_off += src_slice;
2481 return 0;
2485 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2486 int num_dims, const size_t *volume,
2487 const size_t *dst_offsets,
2488 const size_t *src_offsets,
2489 const size_t *dst_dimensions,
2490 const size_t *src_dimensions,
2491 int dst_device_num, int src_device_num)
2493 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2495 if (!dst && !src)
2496 return INT_MAX;
2498 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2500 if (dst_device_num < 0)
2501 return EINVAL;
2503 dst_devicep = resolve_device (dst_device_num);
2504 if (dst_devicep == NULL)
2505 return EINVAL;
2507 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2508 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2509 dst_devicep = NULL;
2511 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2513 if (src_device_num < 0)
2514 return EINVAL;
2516 src_devicep = resolve_device (src_device_num);
2517 if (src_devicep == NULL)
2518 return EINVAL;
2520 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2521 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2522 src_devicep = NULL;
2525 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2526 return EINVAL;
2528 if (src_devicep)
2529 gomp_mutex_lock (&src_devicep->lock);
2530 else if (dst_devicep)
2531 gomp_mutex_lock (&dst_devicep->lock);
2532 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2533 volume, dst_offsets, src_offsets,
2534 dst_dimensions, src_dimensions,
2535 dst_devicep, src_devicep);
2536 if (src_devicep)
2537 gomp_mutex_unlock (&src_devicep->lock);
2538 else if (dst_devicep)
2539 gomp_mutex_unlock (&dst_devicep->lock);
2540 return ret;
2544 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2545 size_t size, size_t device_offset, int device_num)
2547 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2548 return EINVAL;
2550 if (device_num < 0)
2551 return EINVAL;
2553 struct gomp_device_descr *devicep = resolve_device (device_num);
2554 if (devicep == NULL)
2555 return EINVAL;
2557 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2558 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2559 return EINVAL;
2561 gomp_mutex_lock (&devicep->lock);
2563 struct splay_tree_s *mem_map = &devicep->mem_map;
2564 struct splay_tree_key_s cur_node;
2565 int ret = EINVAL;
2567 cur_node.host_start = (uintptr_t) host_ptr;
2568 cur_node.host_end = cur_node.host_start + size;
2569 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2570 if (n)
2572 if (n->tgt->tgt_start + n->tgt_offset
2573 == (uintptr_t) device_ptr + device_offset
2574 && n->host_start <= cur_node.host_start
2575 && n->host_end >= cur_node.host_end)
2576 ret = 0;
2578 else
2580 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2581 tgt->array = gomp_malloc (sizeof (*tgt->array));
2582 tgt->refcount = 1;
2583 tgt->tgt_start = 0;
2584 tgt->tgt_end = 0;
2585 tgt->to_free = NULL;
2586 tgt->prev = NULL;
2587 tgt->list_count = 0;
2588 tgt->device_descr = devicep;
2589 splay_tree_node array = tgt->array;
2590 splay_tree_key k = &array->key;
2591 k->host_start = cur_node.host_start;
2592 k->host_end = cur_node.host_end;
2593 k->tgt = tgt;
2594 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2595 k->refcount = REFCOUNT_INFINITY;
2596 array->left = NULL;
2597 array->right = NULL;
2598 splay_tree_insert (&devicep->mem_map, array);
2599 ret = 0;
2601 gomp_mutex_unlock (&devicep->lock);
2602 return ret;
2606 omp_target_disassociate_ptr (const void *ptr, int device_num)
2608 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2609 return EINVAL;
2611 if (device_num < 0)
2612 return EINVAL;
2614 struct gomp_device_descr *devicep = resolve_device (device_num);
2615 if (devicep == NULL)
2616 return EINVAL;
2618 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2619 return EINVAL;
2621 gomp_mutex_lock (&devicep->lock);
2623 struct splay_tree_s *mem_map = &devicep->mem_map;
2624 struct splay_tree_key_s cur_node;
2625 int ret = EINVAL;
2627 cur_node.host_start = (uintptr_t) ptr;
2628 cur_node.host_end = cur_node.host_start;
2629 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2630 if (n
2631 && n->host_start == cur_node.host_start
2632 && n->refcount == REFCOUNT_INFINITY
2633 && n->tgt->tgt_start == 0
2634 && n->tgt->to_free == NULL
2635 && n->tgt->refcount == 1
2636 && n->tgt->list_count == 0)
2638 splay_tree_remove (&devicep->mem_map, n);
2639 gomp_unmap_tgt (n->tgt);
2640 ret = 0;
2643 gomp_mutex_unlock (&devicep->lock);
2644 return ret;
2648 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2650 (void) kind;
2651 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2652 return gomp_pause_host ();
2653 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2654 return -1;
2655 /* Do nothing for target devices for now. */
2656 return 0;
2660 omp_pause_resource_all (omp_pause_resource_t kind)
2662 (void) kind;
2663 if (gomp_pause_host ())
2664 return -1;
2665 /* Do nothing for target devices for now. */
2666 return 0;
2669 ialias (omp_pause_resource)
2670 ialias (omp_pause_resource_all)
2672 #ifdef PLUGIN_SUPPORT
2674 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2675 in PLUGIN_NAME.
2676 The handles of the found functions are stored in the corresponding fields
2677 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2679 static bool
2680 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2681 const char *plugin_name)
2683 const char *err = NULL, *last_missing = NULL;
2685 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2686 if (!plugin_handle)
2687 goto dl_fail;
2689 /* Check if all required functions are available in the plugin and store
2690 their handlers. None of the symbols can legitimately be NULL,
2691 so we don't need to check dlerror all the time. */
2692 #define DLSYM(f) \
2693 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2694 goto dl_fail
2695 /* Similar, but missing functions are not an error. Return false if
2696 failed, true otherwise. */
2697 #define DLSYM_OPT(f, n) \
2698 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2699 || (last_missing = #n, 0))
2701 DLSYM (version);
2702 if (device->version_func () != GOMP_VERSION)
2704 err = "plugin version mismatch";
2705 goto fail;
2708 DLSYM (get_name);
2709 DLSYM (get_caps);
2710 DLSYM (get_type);
2711 DLSYM (get_num_devices);
2712 DLSYM (init_device);
2713 DLSYM (fini_device);
2714 DLSYM (load_image);
2715 DLSYM (unload_image);
2716 DLSYM (alloc);
2717 DLSYM (free);
2718 DLSYM (dev2host);
2719 DLSYM (host2dev);
2720 device->capabilities = device->get_caps_func ();
2721 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2723 DLSYM (run);
2724 DLSYM (async_run);
2725 DLSYM_OPT (can_run, can_run);
2726 DLSYM (dev2dev);
2728 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2730 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2731 || !DLSYM_OPT (openacc.create_thread_data,
2732 openacc_create_thread_data)
2733 || !DLSYM_OPT (openacc.destroy_thread_data,
2734 openacc_destroy_thread_data)
2735 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
2736 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
2737 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
2738 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
2739 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
2740 || !DLSYM_OPT (openacc.async.queue_callback,
2741 openacc_async_queue_callback)
2742 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
2743 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
2744 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
2746 /* Require all the OpenACC handlers if we have
2747 GOMP_OFFLOAD_CAP_OPENACC_200. */
2748 err = "plugin missing OpenACC handler function";
2749 goto fail;
2752 unsigned cuda = 0;
2753 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2754 openacc_cuda_get_current_device);
2755 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2756 openacc_cuda_get_current_context);
2757 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2758 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2759 if (cuda && cuda != 4)
2761 /* Make sure all the CUDA functions are there if any of them are. */
2762 err = "plugin missing OpenACC CUDA handler function";
2763 goto fail;
2766 #undef DLSYM
2767 #undef DLSYM_OPT
2769 return 1;
2771 dl_fail:
2772 err = dlerror ();
2773 fail:
2774 gomp_error ("while loading %s: %s", plugin_name, err);
2775 if (last_missing)
2776 gomp_error ("missing function was %s", last_missing);
2777 if (plugin_handle)
2778 dlclose (plugin_handle);
2780 return 0;
2783 /* This function finalizes all initialized devices. */
2785 static void
2786 gomp_target_fini (void)
2788 int i;
2789 for (i = 0; i < num_devices; i++)
2791 bool ret = true;
2792 struct gomp_device_descr *devicep = &devices[i];
2793 gomp_mutex_lock (&devicep->lock);
2794 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2795 ret = gomp_fini_device (devicep);
2796 gomp_mutex_unlock (&devicep->lock);
2797 if (!ret)
2798 gomp_fatal ("device finalization failed");
2802 /* This function initializes the runtime for offloading.
2803 It parses the list of offload plugins, and tries to load these.
2804 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2805 will be set, and the array DEVICES initialized, containing descriptors for
2806 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2807 by the others. */
2809 static void
2810 gomp_target_init (void)
2812 const char *prefix ="libgomp-plugin-";
2813 const char *suffix = SONAME_SUFFIX (1);
2814 const char *cur, *next;
2815 char *plugin_name;
2816 int i, new_num_devices;
2818 num_devices = 0;
2819 devices = NULL;
2821 cur = OFFLOAD_PLUGINS;
2822 if (*cur)
2825 struct gomp_device_descr current_device;
2826 size_t prefix_len, suffix_len, cur_len;
2828 next = strchr (cur, ',');
2830 prefix_len = strlen (prefix);
2831 cur_len = next ? next - cur : strlen (cur);
2832 suffix_len = strlen (suffix);
2834 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2835 if (!plugin_name)
2837 num_devices = 0;
2838 break;
2841 memcpy (plugin_name, prefix, prefix_len);
2842 memcpy (plugin_name + prefix_len, cur, cur_len);
2843 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2845 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2847 new_num_devices = current_device.get_num_devices_func ();
2848 if (new_num_devices >= 1)
2850 /* Augment DEVICES and NUM_DEVICES. */
2852 devices = realloc (devices, (num_devices + new_num_devices)
2853 * sizeof (struct gomp_device_descr));
2854 if (!devices)
2856 num_devices = 0;
2857 free (plugin_name);
2858 break;
2861 current_device.name = current_device.get_name_func ();
2862 /* current_device.capabilities has already been set. */
2863 current_device.type = current_device.get_type_func ();
2864 current_device.mem_map.root = NULL;
2865 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2866 current_device.openacc.data_environ = NULL;
2867 for (i = 0; i < new_num_devices; i++)
2869 current_device.target_id = i;
2870 devices[num_devices] = current_device;
2871 gomp_mutex_init (&devices[num_devices].lock);
2872 num_devices++;
2877 free (plugin_name);
2878 cur = next + 1;
2880 while (next);
2882 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2883 NUM_DEVICES_OPENMP. */
2884 struct gomp_device_descr *devices_s
2885 = malloc (num_devices * sizeof (struct gomp_device_descr));
2886 if (!devices_s)
2888 num_devices = 0;
2889 free (devices);
2890 devices = NULL;
2892 num_devices_openmp = 0;
2893 for (i = 0; i < num_devices; i++)
2894 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2895 devices_s[num_devices_openmp++] = devices[i];
2896 int num_devices_after_openmp = num_devices_openmp;
2897 for (i = 0; i < num_devices; i++)
2898 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2899 devices_s[num_devices_after_openmp++] = devices[i];
2900 free (devices);
2901 devices = devices_s;
2903 for (i = 0; i < num_devices; i++)
2905 /* The 'devices' array can be moved (by the realloc call) until we have
2906 found all the plugins, so registering with the OpenACC runtime (which
2907 takes a copy of the pointer argument) must be delayed until now. */
2908 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2909 goacc_register (&devices[i]);
2912 if (atexit (gomp_target_fini) != 0)
2913 gomp_fatal ("atexit failed");
2916 #else /* PLUGIN_SUPPORT */
2917 /* If dlfcn.h is unavailable we always fallback to host execution.
2918 GOMP_target* routines are just stubs for this case. */
2919 static void
2920 gomp_target_init (void)
2923 #endif /* PLUGIN_SUPPORT */