c++: DR 1722: Make lambda to function pointer conv noexcept [PR90583]
[official-gcc.git] / libgomp / target.c
blobab7ac9ba8d299ae6826f3e6a5a00463011d5458f
1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
70 /* Total number of offload images. */
71 static int num_offload_images;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
76 /* Total number of available devices. */
77 static int num_devices;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
87 void *ret = realloc (old, size);
88 if (ret == NULL)
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
93 return ret;
96 attribute_hidden void
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
102 attribute_hidden int
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp;
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
112 if (device_id == GOMP_DEVICE_ICV)
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 return NULL;
121 gomp_mutex_lock (&devices[device_id].lock);
122 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123 gomp_init_device (&devices[device_id]);
124 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
126 gomp_mutex_unlock (&devices[device_id].lock);
127 return NULL;
129 gomp_mutex_unlock (&devices[device_id].lock);
131 return &devices[device_id];
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
138 if (key->host_start != key->host_end)
139 return splay_tree_lookup (mem_map, key);
141 key->host_end++;
142 splay_tree_key n = splay_tree_lookup (mem_map, key);
143 key->host_end--;
144 if (n)
145 return n;
146 key->host_start--;
147 n = splay_tree_lookup (mem_map, key);
148 key->host_start++;
149 if (n)
150 return n;
151 return splay_tree_lookup (mem_map, key);
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
157 if (key->host_start != key->host_end)
158 return splay_tree_lookup (mem_map, key);
160 key->host_end++;
161 splay_tree_key n = splay_tree_lookup (mem_map, key);
162 key->host_end--;
163 return n;
166 static inline void
167 gomp_device_copy (struct gomp_device_descr *devicep,
168 bool (*copy_func) (int, void *, const void *, size_t),
169 const char *dst, void *dstaddr,
170 const char *src, const void *srcaddr,
171 size_t size)
173 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
175 gomp_mutex_unlock (&devicep->lock);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
181 static inline void
182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 bool (*copy_func) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue *),
185 const char *dst, void *dstaddr,
186 const char *src, const void *srcaddr,
187 size_t size, struct goacc_asyncqueue *aq)
189 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
191 gomp_mutex_unlock (&devicep->lock);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
200 struct gomp_coalesce_chunk
202 /* The starting and ending point of a coalesced chunk of memory. */
203 size_t start, end;
206 struct gomp_coalesce_buf
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
210 void *buf;
211 struct target_mem_desc *tgt;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk *chunks;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 be performed. */
218 long chunk_cnt;
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
222 long use_cnt;
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
237 /* Add region with device tgt_start relative offset and length to CBUF. */
239 static inline void
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
242 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243 return;
244 if (cbuf->chunk_cnt)
246 if (cbuf->chunk_cnt < 0)
247 return;
248 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
250 cbuf->chunk_cnt = -1;
251 return;
253 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
255 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 cbuf->use_cnt++;
257 return;
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf->use_cnt == 1)
263 cbuf->chunk_cnt--;
265 cbuf->chunks[cbuf->chunk_cnt].start = start;
266 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267 cbuf->chunk_cnt++;
268 cbuf->use_cnt = 1;
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
274 static inline bool
275 gomp_to_device_kind_p (int kind)
277 switch (kind)
279 case GOMP_MAP_ALLOC:
280 case GOMP_MAP_FROM:
281 case GOMP_MAP_FORCE_ALLOC:
282 case GOMP_MAP_FORCE_FROM:
283 case GOMP_MAP_ALWAYS_FROM:
284 return false;
285 default:
286 return true;
290 attribute_hidden void
291 gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 struct goacc_asyncqueue *aq,
293 void *d, const void *h, size_t sz,
294 struct gomp_coalesce_buf *cbuf)
296 if (cbuf)
298 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
301 long first = 0;
302 long last = cbuf->chunk_cnt - 1;
303 while (first <= last)
305 long middle = (first + last) >> 1;
306 if (cbuf->chunks[middle].end <= doff)
307 first = middle + 1;
308 else if (cbuf->chunks[middle].start <= doff)
310 if (doff + sz > cbuf->chunks[middle].end)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 h, sz);
314 return;
316 else
317 last = middle - 1;
321 if (__builtin_expect (aq != NULL, 0))
322 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 "dev", d, "host", h, sz, aq);
324 else
325 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
328 attribute_hidden void
329 gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 struct goacc_asyncqueue *aq,
331 void *h, const void *d, size_t sz)
333 if (__builtin_expect (aq != NULL, 0))
334 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 "host", h, "dev", d, sz, aq);
336 else
337 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
340 static void
341 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
343 if (!devicep->free_func (devicep->target_id, devptr))
345 gomp_mutex_unlock (&devicep->lock);
346 gomp_fatal ("error in freeing device memory block at %p", devptr);
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
354 static inline void
355 gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 splay_tree_key newn, struct target_var_desc *tgt_var,
358 unsigned char kind, bool always_to_flag,
359 struct gomp_coalesce_buf *cbuf)
361 assert (kind != GOMP_MAP_ATTACH);
363 tgt_var->key = oldn;
364 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
365 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
366 tgt_var->is_attach = false;
367 tgt_var->offset = newn->host_start - oldn->host_start;
368 tgt_var->length = newn->host_end - newn->host_start;
370 if ((kind & GOMP_MAP_FLAG_FORCE)
371 || oldn->host_start > newn->host_start
372 || oldn->host_end < newn->host_end)
374 gomp_mutex_unlock (&devicep->lock);
375 gomp_fatal ("Trying to map into device [%p..%p) object when "
376 "[%p..%p) is already mapped",
377 (void *) newn->host_start, (void *) newn->host_end,
378 (void *) oldn->host_start, (void *) oldn->host_end);
381 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
382 gomp_copy_host2dev (devicep, aq,
383 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
384 + newn->host_start - oldn->host_start),
385 (void *) newn->host_start,
386 newn->host_end - newn->host_start, cbuf);
388 if (oldn->refcount != REFCOUNT_INFINITY)
389 oldn->refcount++;
392 static int
393 get_kind (bool short_mapkind, void *kinds, int idx)
395 return short_mapkind ? ((unsigned short *) kinds)[idx]
396 : ((unsigned char *) kinds)[idx];
399 static void
400 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
401 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
402 struct gomp_coalesce_buf *cbuf)
404 struct gomp_device_descr *devicep = tgt->device_descr;
405 struct splay_tree_s *mem_map = &devicep->mem_map;
406 struct splay_tree_key_s cur_node;
408 cur_node.host_start = host_ptr;
409 if (cur_node.host_start == (uintptr_t) NULL)
411 cur_node.tgt_offset = (uintptr_t) NULL;
412 gomp_copy_host2dev (devicep, aq,
413 (void *) (tgt->tgt_start + target_offset),
414 (void *) &cur_node.tgt_offset,
415 sizeof (void *), cbuf);
416 return;
418 /* Add bias to the pointer value. */
419 cur_node.host_start += bias;
420 cur_node.host_end = cur_node.host_start;
421 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
422 if (n == NULL)
424 gomp_mutex_unlock (&devicep->lock);
425 gomp_fatal ("Pointer target of array section wasn't mapped");
427 cur_node.host_start -= n->host_start;
428 cur_node.tgt_offset
429 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
430 /* At this point tgt_offset is target address of the
431 array section. Now subtract bias to get what we want
432 to initialize the pointer with. */
433 cur_node.tgt_offset -= bias;
434 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
435 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
438 static void
439 gomp_map_fields_existing (struct target_mem_desc *tgt,
440 struct goacc_asyncqueue *aq, splay_tree_key n,
441 size_t first, size_t i, void **hostaddrs,
442 size_t *sizes, void *kinds,
443 struct gomp_coalesce_buf *cbuf)
445 struct gomp_device_descr *devicep = tgt->device_descr;
446 struct splay_tree_s *mem_map = &devicep->mem_map;
447 struct splay_tree_key_s cur_node;
448 int kind;
449 const bool short_mapkind = true;
450 const int typemask = short_mapkind ? 0xff : 0x7;
452 cur_node.host_start = (uintptr_t) hostaddrs[i];
453 cur_node.host_end = cur_node.host_start + sizes[i];
454 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
455 kind = get_kind (short_mapkind, kinds, i);
456 if (n2
457 && n2->tgt == n->tgt
458 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
460 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
461 kind & typemask, false, cbuf);
462 return;
464 if (sizes[i] == 0)
466 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
468 cur_node.host_start--;
469 n2 = splay_tree_lookup (mem_map, &cur_node);
470 cur_node.host_start++;
471 if (n2
472 && n2->tgt == n->tgt
473 && n2->host_start - n->host_start
474 == n2->tgt_offset - n->tgt_offset)
476 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
477 kind & typemask, false, cbuf);
478 return;
481 cur_node.host_end++;
482 n2 = splay_tree_lookup (mem_map, &cur_node);
483 cur_node.host_end--;
484 if (n2
485 && n2->tgt == n->tgt
486 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
488 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
489 kind & typemask, false, cbuf);
490 return;
493 gomp_mutex_unlock (&devicep->lock);
494 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
495 "other mapped elements from the same structure weren't mapped "
496 "together with it", (void *) cur_node.host_start,
497 (void *) cur_node.host_end);
500 attribute_hidden void
501 gomp_attach_pointer (struct gomp_device_descr *devicep,
502 struct goacc_asyncqueue *aq, splay_tree mem_map,
503 splay_tree_key n, uintptr_t attach_to, size_t bias,
504 struct gomp_coalesce_buf *cbufp)
506 struct splay_tree_key_s s;
507 size_t size, idx;
509 if (n == NULL)
511 gomp_mutex_unlock (&devicep->lock);
512 gomp_fatal ("enclosing struct not mapped for attach");
515 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
516 /* We might have a pointer in a packed struct: however we cannot have more
517 than one such pointer in each pointer-sized portion of the struct, so
518 this is safe. */
519 idx = (attach_to - n->host_start) / sizeof (void *);
521 if (!n->aux)
522 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
524 if (!n->aux->attach_count)
525 n->aux->attach_count
526 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
528 if (n->aux->attach_count[idx] < UINTPTR_MAX)
529 n->aux->attach_count[idx]++;
530 else
532 gomp_mutex_unlock (&devicep->lock);
533 gomp_fatal ("attach count overflow");
536 if (n->aux->attach_count[idx] == 1)
538 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
539 - n->host_start;
540 uintptr_t target = (uintptr_t) *(void **) attach_to;
541 splay_tree_key tn;
542 uintptr_t data;
544 if ((void *) target == NULL)
546 gomp_mutex_unlock (&devicep->lock);
547 gomp_fatal ("attempt to attach null pointer");
550 s.host_start = target + bias;
551 s.host_end = s.host_start + 1;
552 tn = splay_tree_lookup (mem_map, &s);
554 if (!tn)
556 gomp_mutex_unlock (&devicep->lock);
557 gomp_fatal ("pointer target not mapped for attach");
560 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
562 gomp_debug (1,
563 "%s: attaching host %p, target %p (struct base %p) to %p\n",
564 __FUNCTION__, (void *) attach_to, (void *) devptr,
565 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
567 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
568 sizeof (void *), cbufp);
570 else
571 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
572 (void *) attach_to, (int) n->aux->attach_count[idx]);
575 attribute_hidden void
576 gomp_detach_pointer (struct gomp_device_descr *devicep,
577 struct goacc_asyncqueue *aq, splay_tree_key n,
578 uintptr_t detach_from, bool finalize,
579 struct gomp_coalesce_buf *cbufp)
581 size_t idx;
583 if (n == NULL)
585 gomp_mutex_unlock (&devicep->lock);
586 gomp_fatal ("enclosing struct not mapped for detach");
589 idx = (detach_from - n->host_start) / sizeof (void *);
591 if (!n->aux || !n->aux->attach_count)
593 gomp_mutex_unlock (&devicep->lock);
594 gomp_fatal ("no attachment counters for struct");
597 if (finalize)
598 n->aux->attach_count[idx] = 1;
600 if (n->aux->attach_count[idx] == 0)
602 gomp_mutex_unlock (&devicep->lock);
603 gomp_fatal ("attach count underflow");
605 else
606 n->aux->attach_count[idx]--;
608 if (n->aux->attach_count[idx] == 0)
610 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
611 - n->host_start;
612 uintptr_t target = (uintptr_t) *(void **) detach_from;
614 gomp_debug (1,
615 "%s: detaching host %p, target %p (struct base %p) to %p\n",
616 __FUNCTION__, (void *) detach_from, (void *) devptr,
617 (void *) (n->tgt->tgt_start + n->tgt_offset),
618 (void *) target);
620 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
621 sizeof (void *), cbufp);
623 else
624 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
625 (void *) detach_from, (int) n->aux->attach_count[idx]);
628 attribute_hidden uintptr_t
629 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
631 if (tgt->list[i].key != NULL)
632 return tgt->list[i].key->tgt->tgt_start
633 + tgt->list[i].key->tgt_offset
634 + tgt->list[i].offset;
636 switch (tgt->list[i].offset)
638 case OFFSET_INLINED:
639 return (uintptr_t) hostaddrs[i];
641 case OFFSET_POINTER:
642 return 0;
644 case OFFSET_STRUCT:
645 return tgt->list[i + 1].key->tgt->tgt_start
646 + tgt->list[i + 1].key->tgt_offset
647 + tgt->list[i + 1].offset
648 + (uintptr_t) hostaddrs[i]
649 - (uintptr_t) hostaddrs[i + 1];
651 default:
652 return tgt->tgt_start + tgt->list[i].offset;
656 static inline __attribute__((always_inline)) struct target_mem_desc *
657 gomp_map_vars_internal (struct gomp_device_descr *devicep,
658 struct goacc_asyncqueue *aq, size_t mapnum,
659 void **hostaddrs, void **devaddrs, size_t *sizes,
660 void *kinds, bool short_mapkind,
661 enum gomp_map_vars_kind pragma_kind)
663 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
664 bool has_firstprivate = false;
665 bool has_always_ptrset = false;
666 const int rshift = short_mapkind ? 8 : 3;
667 const int typemask = short_mapkind ? 0xff : 0x7;
668 struct splay_tree_s *mem_map = &devicep->mem_map;
669 struct splay_tree_key_s cur_node;
670 struct target_mem_desc *tgt
671 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
672 tgt->list_count = mapnum;
673 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
674 tgt->device_descr = devicep;
675 tgt->prev = NULL;
676 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
678 if (mapnum == 0)
680 tgt->tgt_start = 0;
681 tgt->tgt_end = 0;
682 return tgt;
685 tgt_align = sizeof (void *);
686 tgt_size = 0;
687 cbuf.chunks = NULL;
688 cbuf.chunk_cnt = -1;
689 cbuf.use_cnt = 0;
690 cbuf.buf = NULL;
691 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
693 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
694 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
695 cbuf.chunk_cnt = 0;
697 if (pragma_kind == GOMP_MAP_VARS_TARGET)
699 size_t align = 4 * sizeof (void *);
700 tgt_align = align;
701 tgt_size = mapnum * sizeof (void *);
702 cbuf.chunk_cnt = 1;
703 cbuf.use_cnt = 1 + (mapnum > 1);
704 cbuf.chunks[0].start = 0;
705 cbuf.chunks[0].end = tgt_size;
708 gomp_mutex_lock (&devicep->lock);
709 if (devicep->state == GOMP_DEVICE_FINALIZED)
711 gomp_mutex_unlock (&devicep->lock);
712 free (tgt);
713 return NULL;
716 for (i = 0; i < mapnum; i++)
718 int kind = get_kind (short_mapkind, kinds, i);
719 if (hostaddrs[i] == NULL
720 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
722 tgt->list[i].key = NULL;
723 tgt->list[i].offset = OFFSET_INLINED;
724 continue;
726 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
727 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
729 tgt->list[i].key = NULL;
730 if (!not_found_cnt)
732 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
733 on a separate construct prior to using use_device_{addr,ptr}.
734 In OpenMP 5.0, map directives need to be ordered by the
735 middle-end before the use_device_* clauses. If
736 !not_found_cnt, all mappings requested (if any) are already
737 mapped, so use_device_{addr,ptr} can be resolved right away.
738 Otherwise, if not_found_cnt, gomp_map_lookup might fail
739 now but would succeed after performing the mappings in the
740 following loop. We can't defer this always to the second
741 loop, because it is not even invoked when !not_found_cnt
742 after the first loop. */
743 cur_node.host_start = (uintptr_t) hostaddrs[i];
744 cur_node.host_end = cur_node.host_start;
745 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
746 if (n != NULL)
748 cur_node.host_start -= n->host_start;
749 hostaddrs[i]
750 = (void *) (n->tgt->tgt_start + n->tgt_offset
751 + cur_node.host_start);
753 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
755 gomp_mutex_unlock (&devicep->lock);
756 gomp_fatal ("use_device_ptr pointer wasn't mapped");
758 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
759 /* If not present, continue using the host address. */
761 else
762 __builtin_unreachable ();
763 tgt->list[i].offset = OFFSET_INLINED;
765 else
766 tgt->list[i].offset = 0;
767 continue;
769 else if ((kind & typemask) == GOMP_MAP_STRUCT)
771 size_t first = i + 1;
772 size_t last = i + sizes[i];
773 cur_node.host_start = (uintptr_t) hostaddrs[i];
774 cur_node.host_end = (uintptr_t) hostaddrs[last]
775 + sizes[last];
776 tgt->list[i].key = NULL;
777 tgt->list[i].offset = OFFSET_STRUCT;
778 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
779 if (n == NULL)
781 size_t align = (size_t) 1 << (kind >> rshift);
782 if (tgt_align < align)
783 tgt_align = align;
784 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
785 tgt_size = (tgt_size + align - 1) & ~(align - 1);
786 tgt_size += cur_node.host_end - cur_node.host_start;
787 not_found_cnt += last - i;
788 for (i = first; i <= last; i++)
790 tgt->list[i].key = NULL;
791 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
792 & typemask))
793 gomp_coalesce_buf_add (&cbuf,
794 tgt_size - cur_node.host_end
795 + (uintptr_t) hostaddrs[i],
796 sizes[i]);
798 i--;
799 continue;
801 for (i = first; i <= last; i++)
802 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
803 sizes, kinds, NULL);
804 i--;
805 continue;
807 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
809 tgt->list[i].key = NULL;
810 tgt->list[i].offset = OFFSET_POINTER;
811 has_firstprivate = true;
812 continue;
814 else if ((kind & typemask) == GOMP_MAP_ATTACH)
816 tgt->list[i].key = NULL;
817 has_firstprivate = true;
818 continue;
820 cur_node.host_start = (uintptr_t) hostaddrs[i];
821 if (!GOMP_MAP_POINTER_P (kind & typemask))
822 cur_node.host_end = cur_node.host_start + sizes[i];
823 else
824 cur_node.host_end = cur_node.host_start + sizeof (void *);
825 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
827 tgt->list[i].key = NULL;
829 size_t align = (size_t) 1 << (kind >> rshift);
830 if (tgt_align < align)
831 tgt_align = align;
832 tgt_size = (tgt_size + align - 1) & ~(align - 1);
833 gomp_coalesce_buf_add (&cbuf, tgt_size,
834 cur_node.host_end - cur_node.host_start);
835 tgt_size += cur_node.host_end - cur_node.host_start;
836 has_firstprivate = true;
837 continue;
839 splay_tree_key n;
840 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
842 n = gomp_map_0len_lookup (mem_map, &cur_node);
843 if (!n)
845 tgt->list[i].key = NULL;
846 tgt->list[i].offset = OFFSET_POINTER;
847 continue;
850 else
851 n = splay_tree_lookup (mem_map, &cur_node);
852 if (n && n->refcount != REFCOUNT_LINK)
854 int always_to_cnt = 0;
855 if ((kind & typemask) == GOMP_MAP_TO_PSET)
857 bool has_nullptr = false;
858 size_t j;
859 for (j = 0; j < n->tgt->list_count; j++)
860 if (n->tgt->list[j].key == n)
862 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
863 break;
865 if (n->tgt->list_count == 0)
867 /* 'declare target'; assume has_nullptr; it could also be
868 statically assigned pointer, but that it should be to
869 the equivalent variable on the host. */
870 assert (n->refcount == REFCOUNT_INFINITY);
871 has_nullptr = true;
873 else
874 assert (j < n->tgt->list_count);
875 /* Re-map the data if there is an 'always' modifier or if it a
876 null pointer was there and non a nonnull has been found; that
877 permits transparent re-mapping for Fortran array descriptors
878 which were previously mapped unallocated. */
879 for (j = i + 1; j < mapnum; j++)
881 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
882 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
883 && (!has_nullptr
884 || !GOMP_MAP_POINTER_P (ptr_kind)
885 || *(void **) hostaddrs[j] == NULL))
886 break;
887 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
888 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
889 > cur_node.host_end))
890 break;
891 else
893 has_always_ptrset = true;
894 ++always_to_cnt;
898 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
899 kind & typemask, always_to_cnt > 0, NULL);
900 i += always_to_cnt;
902 else
904 tgt->list[i].key = NULL;
906 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
908 /* Not present, hence, skip entry - including its MAP_POINTER,
909 when existing. */
910 tgt->list[i].offset = OFFSET_POINTER;
911 if (i + 1 < mapnum
912 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
913 == GOMP_MAP_POINTER))
915 ++i;
916 tgt->list[i].key = NULL;
917 tgt->list[i].offset = 0;
919 continue;
921 size_t align = (size_t) 1 << (kind >> rshift);
922 not_found_cnt++;
923 if (tgt_align < align)
924 tgt_align = align;
925 tgt_size = (tgt_size + align - 1) & ~(align - 1);
926 if (gomp_to_device_kind_p (kind & typemask))
927 gomp_coalesce_buf_add (&cbuf, tgt_size,
928 cur_node.host_end - cur_node.host_start);
929 tgt_size += cur_node.host_end - cur_node.host_start;
930 if ((kind & typemask) == GOMP_MAP_TO_PSET)
932 size_t j;
933 int kind;
934 for (j = i + 1; j < mapnum; j++)
935 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
936 kinds, j)) & typemask))
937 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
938 break;
939 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
940 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
941 > cur_node.host_end))
942 break;
943 else
945 tgt->list[j].key = NULL;
946 i++;
952 if (devaddrs)
954 if (mapnum != 1)
956 gomp_mutex_unlock (&devicep->lock);
957 gomp_fatal ("unexpected aggregation");
959 tgt->to_free = devaddrs[0];
960 tgt->tgt_start = (uintptr_t) tgt->to_free;
961 tgt->tgt_end = tgt->tgt_start + sizes[0];
963 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
965 /* Allocate tgt_align aligned tgt_size block of memory. */
966 /* FIXME: Perhaps change interface to allocate properly aligned
967 memory. */
968 tgt->to_free = devicep->alloc_func (devicep->target_id,
969 tgt_size + tgt_align - 1);
970 if (!tgt->to_free)
972 gomp_mutex_unlock (&devicep->lock);
973 gomp_fatal ("device memory allocation fail");
976 tgt->tgt_start = (uintptr_t) tgt->to_free;
977 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
978 tgt->tgt_end = tgt->tgt_start + tgt_size;
980 if (cbuf.use_cnt == 1)
981 cbuf.chunk_cnt--;
982 if (cbuf.chunk_cnt > 0)
984 cbuf.buf
985 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
986 if (cbuf.buf)
988 cbuf.tgt = tgt;
989 cbufp = &cbuf;
993 else
995 tgt->to_free = NULL;
996 tgt->tgt_start = 0;
997 tgt->tgt_end = 0;
1000 tgt_size = 0;
1001 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1002 tgt_size = mapnum * sizeof (void *);
1004 tgt->array = NULL;
1005 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1007 if (not_found_cnt)
1008 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1009 splay_tree_node array = tgt->array;
1010 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
1011 uintptr_t field_tgt_base = 0;
1013 for (i = 0; i < mapnum; i++)
1014 if (has_always_ptrset
1015 && tgt->list[i].key
1016 && (get_kind (short_mapkind, kinds, i) & typemask)
1017 == GOMP_MAP_TO_PSET)
1019 splay_tree_key k = tgt->list[i].key;
1020 bool has_nullptr = false;
1021 size_t j;
1022 for (j = 0; j < k->tgt->list_count; j++)
1023 if (k->tgt->list[j].key == k)
1025 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1026 break;
1028 if (k->tgt->list_count == 0)
1029 has_nullptr = true;
1030 else
1031 assert (j < k->tgt->list_count);
1033 tgt->list[i].has_null_ptr_assoc = false;
1034 for (j = i + 1; j < mapnum; j++)
1036 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1037 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1038 && (!has_nullptr
1039 || !GOMP_MAP_POINTER_P (ptr_kind)
1040 || *(void **) hostaddrs[j] == NULL))
1041 break;
1042 else if ((uintptr_t) hostaddrs[j] < k->host_start
1043 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1044 > k->host_end))
1045 break;
1046 else
1048 if (*(void **) hostaddrs[j] == NULL)
1049 tgt->list[i].has_null_ptr_assoc = true;
1050 tgt->list[j].key = k;
1051 tgt->list[j].copy_from = false;
1052 tgt->list[j].always_copy_from = false;
1053 tgt->list[j].is_attach = false;
1054 if (k->refcount != REFCOUNT_INFINITY)
1055 k->refcount++;
1056 gomp_map_pointer (k->tgt, aq,
1057 (uintptr_t) *(void **) hostaddrs[j],
1058 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1059 - k->host_start),
1060 sizes[j], cbufp);
1063 i = j - 1;
1065 else if (tgt->list[i].key == NULL)
1067 int kind = get_kind (short_mapkind, kinds, i);
1068 if (hostaddrs[i] == NULL)
1069 continue;
1070 switch (kind & typemask)
1072 size_t align, len, first, last;
1073 splay_tree_key n;
1074 case GOMP_MAP_FIRSTPRIVATE:
1075 align = (size_t) 1 << (kind >> rshift);
1076 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1077 tgt->list[i].offset = tgt_size;
1078 len = sizes[i];
1079 gomp_copy_host2dev (devicep, aq,
1080 (void *) (tgt->tgt_start + tgt_size),
1081 (void *) hostaddrs[i], len, cbufp);
1082 tgt_size += len;
1083 continue;
1084 case GOMP_MAP_FIRSTPRIVATE_INT:
1085 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1086 continue;
1087 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1088 /* The OpenACC 'host_data' construct only allows 'use_device'
1089 "mapping" clauses, so in the first loop, 'not_found_cnt'
1090 must always have been zero, so all OpenACC 'use_device'
1091 clauses have already been handled. (We can only easily test
1092 'use_device' with 'if_present' clause here.) */
1093 assert (tgt->list[i].offset == OFFSET_INLINED);
1094 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1095 code conceptually simple, similar to the first loop. */
1096 case GOMP_MAP_USE_DEVICE_PTR:
1097 if (tgt->list[i].offset == 0)
1099 cur_node.host_start = (uintptr_t) hostaddrs[i];
1100 cur_node.host_end = cur_node.host_start;
1101 n = gomp_map_lookup (mem_map, &cur_node);
1102 if (n != NULL)
1104 cur_node.host_start -= n->host_start;
1105 hostaddrs[i]
1106 = (void *) (n->tgt->tgt_start + n->tgt_offset
1107 + cur_node.host_start);
1109 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1111 gomp_mutex_unlock (&devicep->lock);
1112 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1114 else if ((kind & typemask)
1115 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1116 /* If not present, continue using the host address. */
1118 else
1119 __builtin_unreachable ();
1120 tgt->list[i].offset = OFFSET_INLINED;
1122 continue;
1123 case GOMP_MAP_STRUCT:
1124 first = i + 1;
1125 last = i + sizes[i];
1126 cur_node.host_start = (uintptr_t) hostaddrs[i];
1127 cur_node.host_end = (uintptr_t) hostaddrs[last]
1128 + sizes[last];
1129 if (tgt->list[first].key != NULL)
1130 continue;
1131 n = splay_tree_lookup (mem_map, &cur_node);
1132 if (n == NULL)
1134 size_t align = (size_t) 1 << (kind >> rshift);
1135 tgt_size -= (uintptr_t) hostaddrs[first]
1136 - (uintptr_t) hostaddrs[i];
1137 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1138 tgt_size += (uintptr_t) hostaddrs[first]
1139 - (uintptr_t) hostaddrs[i];
1140 field_tgt_base = (uintptr_t) hostaddrs[first];
1141 field_tgt_offset = tgt_size;
1142 field_tgt_clear = last;
1143 tgt_size += cur_node.host_end
1144 - (uintptr_t) hostaddrs[first];
1145 continue;
1147 for (i = first; i <= last; i++)
1148 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1149 sizes, kinds, cbufp);
1150 i--;
1151 continue;
1152 case GOMP_MAP_ALWAYS_POINTER:
1153 cur_node.host_start = (uintptr_t) hostaddrs[i];
1154 cur_node.host_end = cur_node.host_start + sizeof (void *);
1155 n = splay_tree_lookup (mem_map, &cur_node);
1156 if (n == NULL
1157 || n->host_start > cur_node.host_start
1158 || n->host_end < cur_node.host_end)
1160 gomp_mutex_unlock (&devicep->lock);
1161 gomp_fatal ("always pointer not mapped");
1163 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1164 != GOMP_MAP_ALWAYS_POINTER)
1165 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1166 if (cur_node.tgt_offset)
1167 cur_node.tgt_offset -= sizes[i];
1168 gomp_copy_host2dev (devicep, aq,
1169 (void *) (n->tgt->tgt_start
1170 + n->tgt_offset
1171 + cur_node.host_start
1172 - n->host_start),
1173 (void *) &cur_node.tgt_offset,
1174 sizeof (void *), cbufp);
1175 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1176 + cur_node.host_start - n->host_start;
1177 continue;
1178 case GOMP_MAP_IF_PRESENT:
1179 /* Not present - otherwise handled above. Skip over its
1180 MAP_POINTER as well. */
1181 if (i + 1 < mapnum
1182 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1183 == GOMP_MAP_POINTER))
1184 ++i;
1185 continue;
1186 case GOMP_MAP_ATTACH:
1188 cur_node.host_start = (uintptr_t) hostaddrs[i];
1189 cur_node.host_end = cur_node.host_start + sizeof (void *);
1190 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1191 if (n != NULL)
1193 tgt->list[i].key = n;
1194 tgt->list[i].offset = cur_node.host_start - n->host_start;
1195 tgt->list[i].length = n->host_end - n->host_start;
1196 tgt->list[i].copy_from = false;
1197 tgt->list[i].always_copy_from = false;
1198 tgt->list[i].is_attach = true;
1199 /* OpenACC 'attach'/'detach' doesn't affect
1200 structured/dynamic reference counts ('n->refcount',
1201 'n->dynamic_refcount'). */
1203 else
1205 gomp_mutex_unlock (&devicep->lock);
1206 gomp_fatal ("outer struct not mapped for attach");
1208 gomp_attach_pointer (devicep, aq, mem_map, n,
1209 (uintptr_t) hostaddrs[i], sizes[i],
1210 cbufp);
1211 continue;
1213 default:
1214 break;
1216 splay_tree_key k = &array->key;
1217 k->host_start = (uintptr_t) hostaddrs[i];
1218 if (!GOMP_MAP_POINTER_P (kind & typemask))
1219 k->host_end = k->host_start + sizes[i];
1220 else
1221 k->host_end = k->host_start + sizeof (void *);
1222 splay_tree_key n = splay_tree_lookup (mem_map, k);
1223 if (n && n->refcount != REFCOUNT_LINK)
1224 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1225 kind & typemask, false, cbufp);
1226 else
1228 k->aux = NULL;
1229 if (n && n->refcount == REFCOUNT_LINK)
1231 /* Replace target address of the pointer with target address
1232 of mapped object in the splay tree. */
1233 splay_tree_remove (mem_map, n);
1234 k->aux
1235 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1236 k->aux->link_key = n;
1238 size_t align = (size_t) 1 << (kind >> rshift);
1239 tgt->list[i].key = k;
1240 k->tgt = tgt;
1241 if (field_tgt_clear != FIELD_TGT_EMPTY)
1243 k->tgt_offset = k->host_start - field_tgt_base
1244 + field_tgt_offset;
1245 if (i == field_tgt_clear)
1246 field_tgt_clear = FIELD_TGT_EMPTY;
1248 else
1250 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1251 k->tgt_offset = tgt_size;
1252 tgt_size += k->host_end - k->host_start;
1254 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1255 tgt->list[i].always_copy_from
1256 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1257 tgt->list[i].is_attach = false;
1258 tgt->list[i].offset = 0;
1259 tgt->list[i].length = k->host_end - k->host_start;
1260 k->refcount = 1;
1261 k->dynamic_refcount = 0;
1262 tgt->refcount++;
1263 array->left = NULL;
1264 array->right = NULL;
1265 splay_tree_insert (mem_map, array);
1266 switch (kind & typemask)
1268 case GOMP_MAP_ALLOC:
1269 case GOMP_MAP_FROM:
1270 case GOMP_MAP_FORCE_ALLOC:
1271 case GOMP_MAP_FORCE_FROM:
1272 case GOMP_MAP_ALWAYS_FROM:
1273 break;
1274 case GOMP_MAP_TO:
1275 case GOMP_MAP_TOFROM:
1276 case GOMP_MAP_FORCE_TO:
1277 case GOMP_MAP_FORCE_TOFROM:
1278 case GOMP_MAP_ALWAYS_TO:
1279 case GOMP_MAP_ALWAYS_TOFROM:
1280 gomp_copy_host2dev (devicep, aq,
1281 (void *) (tgt->tgt_start
1282 + k->tgt_offset),
1283 (void *) k->host_start,
1284 k->host_end - k->host_start, cbufp);
1285 break;
1286 case GOMP_MAP_POINTER:
1287 gomp_map_pointer (tgt, aq,
1288 (uintptr_t) *(void **) k->host_start,
1289 k->tgt_offset, sizes[i], cbufp);
1290 break;
1291 case GOMP_MAP_TO_PSET:
1292 gomp_copy_host2dev (devicep, aq,
1293 (void *) (tgt->tgt_start
1294 + k->tgt_offset),
1295 (void *) k->host_start,
1296 k->host_end - k->host_start, cbufp);
1297 tgt->list[i].has_null_ptr_assoc = false;
1299 for (j = i + 1; j < mapnum; j++)
1301 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1302 & typemask);
1303 if (!GOMP_MAP_POINTER_P (ptr_kind)
1304 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1305 break;
1306 else if ((uintptr_t) hostaddrs[j] < k->host_start
1307 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1308 > k->host_end))
1309 break;
1310 else
1312 tgt->list[j].key = k;
1313 tgt->list[j].copy_from = false;
1314 tgt->list[j].always_copy_from = false;
1315 tgt->list[j].is_attach = false;
1316 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1317 if (k->refcount != REFCOUNT_INFINITY)
1318 k->refcount++;
1319 gomp_map_pointer (tgt, aq,
1320 (uintptr_t) *(void **) hostaddrs[j],
1321 k->tgt_offset
1322 + ((uintptr_t) hostaddrs[j]
1323 - k->host_start),
1324 sizes[j], cbufp);
1327 i = j - 1;
1328 break;
1329 case GOMP_MAP_FORCE_PRESENT:
1331 /* We already looked up the memory region above and it
1332 was missing. */
1333 size_t size = k->host_end - k->host_start;
1334 gomp_mutex_unlock (&devicep->lock);
1335 #ifdef HAVE_INTTYPES_H
1336 gomp_fatal ("present clause: !acc_is_present (%p, "
1337 "%"PRIu64" (0x%"PRIx64"))",
1338 (void *) k->host_start,
1339 (uint64_t) size, (uint64_t) size);
1340 #else
1341 gomp_fatal ("present clause: !acc_is_present (%p, "
1342 "%lu (0x%lx))", (void *) k->host_start,
1343 (unsigned long) size, (unsigned long) size);
1344 #endif
1346 break;
1347 case GOMP_MAP_FORCE_DEVICEPTR:
1348 assert (k->host_end - k->host_start == sizeof (void *));
1349 gomp_copy_host2dev (devicep, aq,
1350 (void *) (tgt->tgt_start
1351 + k->tgt_offset),
1352 (void *) k->host_start,
1353 sizeof (void *), cbufp);
1354 break;
1355 default:
1356 gomp_mutex_unlock (&devicep->lock);
1357 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1358 kind);
1361 if (k->aux && k->aux->link_key)
1363 /* Set link pointer on target to the device address of the
1364 mapped object. */
1365 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1366 /* We intentionally do not use coalescing here, as it's not
1367 data allocated by the current call to this function. */
1368 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1369 &tgt_addr, sizeof (void *), NULL);
1371 array++;
1376 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1378 for (i = 0; i < mapnum; i++)
1380 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1381 gomp_copy_host2dev (devicep, aq,
1382 (void *) (tgt->tgt_start + i * sizeof (void *)),
1383 (void *) &cur_node.tgt_offset, sizeof (void *),
1384 cbufp);
1388 if (cbufp)
1390 long c = 0;
1391 for (c = 0; c < cbuf.chunk_cnt; ++c)
1392 gomp_copy_host2dev (devicep, aq,
1393 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1394 (char *) cbuf.buf + (cbuf.chunks[c].start
1395 - cbuf.chunks[0].start),
1396 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1397 free (cbuf.buf);
1398 cbuf.buf = NULL;
1399 cbufp = NULL;
1402 /* If the variable from "omp target enter data" map-list was already mapped,
1403 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1404 gomp_exit_data. */
1405 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1407 free (tgt);
1408 tgt = NULL;
1411 gomp_mutex_unlock (&devicep->lock);
1412 return tgt;
1415 attribute_hidden struct target_mem_desc *
1416 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1417 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1418 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1420 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1421 sizes, kinds, short_mapkind, pragma_kind);
1424 attribute_hidden struct target_mem_desc *
1425 gomp_map_vars_async (struct gomp_device_descr *devicep,
1426 struct goacc_asyncqueue *aq, size_t mapnum,
1427 void **hostaddrs, void **devaddrs, size_t *sizes,
1428 void *kinds, bool short_mapkind,
1429 enum gomp_map_vars_kind pragma_kind)
1431 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1432 sizes, kinds, short_mapkind, pragma_kind);
1435 static void
1436 gomp_unmap_tgt (struct target_mem_desc *tgt)
1438 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1439 if (tgt->tgt_end)
1440 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1442 free (tgt->array);
1443 free (tgt);
1446 static bool
1447 gomp_unref_tgt (void *ptr)
1449 bool is_tgt_unmapped = false;
1451 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1453 if (tgt->refcount > 1)
1454 tgt->refcount--;
1455 else
1457 gomp_unmap_tgt (tgt);
1458 is_tgt_unmapped = true;
1461 return is_tgt_unmapped;
1464 static void
1465 gomp_unref_tgt_void (void *ptr)
1467 (void) gomp_unref_tgt (ptr);
1470 static inline __attribute__((always_inline)) bool
1471 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1472 struct goacc_asyncqueue *aq)
1474 bool is_tgt_unmapped = false;
1475 splay_tree_remove (&devicep->mem_map, k);
1476 if (k->aux)
1478 if (k->aux->link_key)
1479 splay_tree_insert (&devicep->mem_map,
1480 (splay_tree_node) k->aux->link_key);
1481 if (k->aux->attach_count)
1482 free (k->aux->attach_count);
1483 free (k->aux);
1484 k->aux = NULL;
1486 if (aq)
1487 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1488 (void *) k->tgt);
1489 else
1490 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1491 return is_tgt_unmapped;
1494 attribute_hidden bool
1495 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1497 return gomp_remove_var_internal (devicep, k, NULL);
1500 /* Remove a variable asynchronously. This actually removes the variable
1501 mapping immediately, but retains the linked target_mem_desc until the
1502 asynchronous operation has completed (as it may still refer to target
1503 memory). The device lock must be held before entry, and remains locked on
1504 exit. */
1506 attribute_hidden void
1507 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1508 struct goacc_asyncqueue *aq)
1510 (void) gomp_remove_var_internal (devicep, k, aq);
1513 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1514 variables back from device to host: if it is false, it is assumed that this
1515 has been done already. */
1517 static inline __attribute__((always_inline)) void
1518 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1519 struct goacc_asyncqueue *aq)
1521 struct gomp_device_descr *devicep = tgt->device_descr;
1523 if (tgt->list_count == 0)
1525 free (tgt);
1526 return;
1529 gomp_mutex_lock (&devicep->lock);
1530 if (devicep->state == GOMP_DEVICE_FINALIZED)
1532 gomp_mutex_unlock (&devicep->lock);
1533 free (tgt->array);
1534 free (tgt);
1535 return;
1538 size_t i;
1540 /* We must perform detachments before any copies back to the host. */
1541 for (i = 0; i < tgt->list_count; i++)
1543 splay_tree_key k = tgt->list[i].key;
1545 if (k != NULL && tgt->list[i].is_attach)
1546 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1547 + tgt->list[i].offset,
1548 false, NULL);
1551 for (i = 0; i < tgt->list_count; i++)
1553 splay_tree_key k = tgt->list[i].key;
1554 if (k == NULL)
1555 continue;
1557 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1558 counts ('n->refcount', 'n->dynamic_refcount'). */
1559 if (tgt->list[i].is_attach)
1560 continue;
1562 bool do_unmap = false;
1563 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1564 k->refcount--;
1565 else if (k->refcount == 1)
1567 k->refcount--;
1568 do_unmap = true;
1571 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1572 || tgt->list[i].always_copy_from)
1573 gomp_copy_dev2host (devicep, aq,
1574 (void *) (k->host_start + tgt->list[i].offset),
1575 (void *) (k->tgt->tgt_start + k->tgt_offset
1576 + tgt->list[i].offset),
1577 tgt->list[i].length);
1578 if (do_unmap)
1580 struct target_mem_desc *k_tgt = k->tgt;
1581 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1582 /* It would be bad if TGT got unmapped while we're still iterating
1583 over its LIST_COUNT, and also expect to use it in the following
1584 code. */
1585 assert (!is_tgt_unmapped
1586 || k_tgt != tgt);
1590 if (aq)
1591 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1592 (void *) tgt);
1593 else
1594 gomp_unref_tgt ((void *) tgt);
1596 gomp_mutex_unlock (&devicep->lock);
1599 attribute_hidden void
1600 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1602 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1605 attribute_hidden void
1606 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1607 struct goacc_asyncqueue *aq)
1609 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1612 static void
1613 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1614 size_t *sizes, void *kinds, bool short_mapkind)
1616 size_t i;
1617 struct splay_tree_key_s cur_node;
1618 const int typemask = short_mapkind ? 0xff : 0x7;
1620 if (!devicep)
1621 return;
1623 if (mapnum == 0)
1624 return;
1626 gomp_mutex_lock (&devicep->lock);
1627 if (devicep->state == GOMP_DEVICE_FINALIZED)
1629 gomp_mutex_unlock (&devicep->lock);
1630 return;
1633 for (i = 0; i < mapnum; i++)
1634 if (sizes[i])
1636 cur_node.host_start = (uintptr_t) hostaddrs[i];
1637 cur_node.host_end = cur_node.host_start + sizes[i];
1638 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1639 if (n)
1641 int kind = get_kind (short_mapkind, kinds, i);
1642 if (n->host_start > cur_node.host_start
1643 || n->host_end < cur_node.host_end)
1645 gomp_mutex_unlock (&devicep->lock);
1646 gomp_fatal ("Trying to update [%p..%p) object when "
1647 "only [%p..%p) is mapped",
1648 (void *) cur_node.host_start,
1649 (void *) cur_node.host_end,
1650 (void *) n->host_start,
1651 (void *) n->host_end);
1655 void *hostaddr = (void *) cur_node.host_start;
1656 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1657 + cur_node.host_start - n->host_start);
1658 size_t size = cur_node.host_end - cur_node.host_start;
1660 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1661 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1662 NULL);
1663 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1664 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1667 gomp_mutex_unlock (&devicep->lock);
1670 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1671 And insert to splay tree the mapping between addresses from HOST_TABLE and
1672 from loaded target image. We rely in the host and device compiler
1673 emitting variable and functions in the same order. */
1675 static void
1676 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1677 const void *host_table, const void *target_data,
1678 bool is_register_lock)
1680 void **host_func_table = ((void ***) host_table)[0];
1681 void **host_funcs_end = ((void ***) host_table)[1];
1682 void **host_var_table = ((void ***) host_table)[2];
1683 void **host_vars_end = ((void ***) host_table)[3];
1685 /* The func table contains only addresses, the var table contains addresses
1686 and corresponding sizes. */
1687 int num_funcs = host_funcs_end - host_func_table;
1688 int num_vars = (host_vars_end - host_var_table) / 2;
1690 /* Load image to device and get target addresses for the image. */
1691 struct addr_pair *target_table = NULL;
1692 int i, num_target_entries;
1694 num_target_entries
1695 = devicep->load_image_func (devicep->target_id, version,
1696 target_data, &target_table);
1698 if (num_target_entries != num_funcs + num_vars)
1700 gomp_mutex_unlock (&devicep->lock);
1701 if (is_register_lock)
1702 gomp_mutex_unlock (&register_lock);
1703 gomp_fatal ("Cannot map target functions or variables"
1704 " (expected %u, have %u)", num_funcs + num_vars,
1705 num_target_entries);
1708 /* Insert host-target address mapping into splay tree. */
1709 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1710 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1711 tgt->refcount = REFCOUNT_INFINITY;
1712 tgt->tgt_start = 0;
1713 tgt->tgt_end = 0;
1714 tgt->to_free = NULL;
1715 tgt->prev = NULL;
1716 tgt->list_count = 0;
1717 tgt->device_descr = devicep;
1718 splay_tree_node array = tgt->array;
1720 for (i = 0; i < num_funcs; i++)
1722 splay_tree_key k = &array->key;
1723 k->host_start = (uintptr_t) host_func_table[i];
1724 k->host_end = k->host_start + 1;
1725 k->tgt = tgt;
1726 k->tgt_offset = target_table[i].start;
1727 k->refcount = REFCOUNT_INFINITY;
1728 k->dynamic_refcount = 0;
1729 k->aux = NULL;
1730 array->left = NULL;
1731 array->right = NULL;
1732 splay_tree_insert (&devicep->mem_map, array);
1733 array++;
1736 /* Most significant bit of the size in host and target tables marks
1737 "omp declare target link" variables. */
1738 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1739 const uintptr_t size_mask = ~link_bit;
1741 for (i = 0; i < num_vars; i++)
1743 struct addr_pair *target_var = &target_table[num_funcs + i];
1744 uintptr_t target_size = target_var->end - target_var->start;
1745 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1747 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1749 gomp_mutex_unlock (&devicep->lock);
1750 if (is_register_lock)
1751 gomp_mutex_unlock (&register_lock);
1752 gomp_fatal ("Cannot map target variables (size mismatch)");
1755 splay_tree_key k = &array->key;
1756 k->host_start = (uintptr_t) host_var_table[i * 2];
1757 k->host_end
1758 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1759 k->tgt = tgt;
1760 k->tgt_offset = target_var->start;
1761 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1762 k->dynamic_refcount = 0;
1763 k->aux = NULL;
1764 array->left = NULL;
1765 array->right = NULL;
1766 splay_tree_insert (&devicep->mem_map, array);
1767 array++;
1770 free (target_table);
1773 /* Unload the mappings described by target_data from device DEVICE_P.
1774 The device must be locked. */
1776 static void
1777 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1778 unsigned version,
1779 const void *host_table, const void *target_data)
1781 void **host_func_table = ((void ***) host_table)[0];
1782 void **host_funcs_end = ((void ***) host_table)[1];
1783 void **host_var_table = ((void ***) host_table)[2];
1784 void **host_vars_end = ((void ***) host_table)[3];
1786 /* The func table contains only addresses, the var table contains addresses
1787 and corresponding sizes. */
1788 int num_funcs = host_funcs_end - host_func_table;
1789 int num_vars = (host_vars_end - host_var_table) / 2;
1791 struct splay_tree_key_s k;
1792 splay_tree_key node = NULL;
1794 /* Find mapping at start of node array */
1795 if (num_funcs || num_vars)
1797 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1798 : (uintptr_t) host_var_table[0]);
1799 k.host_end = k.host_start + 1;
1800 node = splay_tree_lookup (&devicep->mem_map, &k);
1803 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1805 gomp_mutex_unlock (&devicep->lock);
1806 gomp_fatal ("image unload fail");
1809 /* Remove mappings from splay tree. */
1810 int i;
1811 for (i = 0; i < num_funcs; i++)
1813 k.host_start = (uintptr_t) host_func_table[i];
1814 k.host_end = k.host_start + 1;
1815 splay_tree_remove (&devicep->mem_map, &k);
1818 /* Most significant bit of the size in host and target tables marks
1819 "omp declare target link" variables. */
1820 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1821 const uintptr_t size_mask = ~link_bit;
1822 bool is_tgt_unmapped = false;
1824 for (i = 0; i < num_vars; i++)
1826 k.host_start = (uintptr_t) host_var_table[i * 2];
1827 k.host_end
1828 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1830 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1831 splay_tree_remove (&devicep->mem_map, &k);
1832 else
1834 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1835 is_tgt_unmapped = gomp_remove_var (devicep, n);
1839 if (node && !is_tgt_unmapped)
1841 free (node->tgt);
1842 free (node);
1846 /* This function should be called from every offload image while loading.
1847 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1848 the target, and TARGET_DATA needed by target plugin. */
1850 void
1851 GOMP_offload_register_ver (unsigned version, const void *host_table,
1852 int target_type, const void *target_data)
1854 int i;
1856 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1857 gomp_fatal ("Library too old for offload (version %u < %u)",
1858 GOMP_VERSION, GOMP_VERSION_LIB (version));
1860 gomp_mutex_lock (&register_lock);
1862 /* Load image to all initialized devices. */
1863 for (i = 0; i < num_devices; i++)
1865 struct gomp_device_descr *devicep = &devices[i];
1866 gomp_mutex_lock (&devicep->lock);
1867 if (devicep->type == target_type
1868 && devicep->state == GOMP_DEVICE_INITIALIZED)
1869 gomp_load_image_to_device (devicep, version,
1870 host_table, target_data, true);
1871 gomp_mutex_unlock (&devicep->lock);
1874 /* Insert image to array of pending images. */
1875 offload_images
1876 = gomp_realloc_unlock (offload_images,
1877 (num_offload_images + 1)
1878 * sizeof (struct offload_image_descr));
1879 offload_images[num_offload_images].version = version;
1880 offload_images[num_offload_images].type = target_type;
1881 offload_images[num_offload_images].host_table = host_table;
1882 offload_images[num_offload_images].target_data = target_data;
1884 num_offload_images++;
1885 gomp_mutex_unlock (&register_lock);
1888 void
1889 GOMP_offload_register (const void *host_table, int target_type,
1890 const void *target_data)
1892 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1895 /* This function should be called from every offload image while unloading.
1896 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1897 the target, and TARGET_DATA needed by target plugin. */
1899 void
1900 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1901 int target_type, const void *target_data)
1903 int i;
1905 gomp_mutex_lock (&register_lock);
1907 /* Unload image from all initialized devices. */
1908 for (i = 0; i < num_devices; i++)
1910 struct gomp_device_descr *devicep = &devices[i];
1911 gomp_mutex_lock (&devicep->lock);
1912 if (devicep->type == target_type
1913 && devicep->state == GOMP_DEVICE_INITIALIZED)
1914 gomp_unload_image_from_device (devicep, version,
1915 host_table, target_data);
1916 gomp_mutex_unlock (&devicep->lock);
1919 /* Remove image from array of pending images. */
1920 for (i = 0; i < num_offload_images; i++)
1921 if (offload_images[i].target_data == target_data)
1923 offload_images[i] = offload_images[--num_offload_images];
1924 break;
1927 gomp_mutex_unlock (&register_lock);
1930 void
1931 GOMP_offload_unregister (const void *host_table, int target_type,
1932 const void *target_data)
1934 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1937 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1938 must be locked on entry, and remains locked on return. */
1940 attribute_hidden void
1941 gomp_init_device (struct gomp_device_descr *devicep)
1943 int i;
1944 if (!devicep->init_device_func (devicep->target_id))
1946 gomp_mutex_unlock (&devicep->lock);
1947 gomp_fatal ("device initialization failed");
1950 /* Load to device all images registered by the moment. */
1951 for (i = 0; i < num_offload_images; i++)
1953 struct offload_image_descr *image = &offload_images[i];
1954 if (image->type == devicep->type)
1955 gomp_load_image_to_device (devicep, image->version,
1956 image->host_table, image->target_data,
1957 false);
1960 /* Initialize OpenACC asynchronous queues. */
1961 goacc_init_asyncqueues (devicep);
1963 devicep->state = GOMP_DEVICE_INITIALIZED;
1966 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1967 must be locked on entry, and remains locked on return. */
1969 attribute_hidden bool
1970 gomp_fini_device (struct gomp_device_descr *devicep)
1972 bool ret = goacc_fini_asyncqueues (devicep);
1973 ret &= devicep->fini_device_func (devicep->target_id);
1974 devicep->state = GOMP_DEVICE_FINALIZED;
1975 return ret;
1978 attribute_hidden void
1979 gomp_unload_device (struct gomp_device_descr *devicep)
1981 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1983 unsigned i;
1985 /* Unload from device all images registered at the moment. */
1986 for (i = 0; i < num_offload_images; i++)
1988 struct offload_image_descr *image = &offload_images[i];
1989 if (image->type == devicep->type)
1990 gomp_unload_image_from_device (devicep, image->version,
1991 image->host_table,
1992 image->target_data);
1997 /* Host fallback for GOMP_target{,_ext} routines. */
1999 static void
2000 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
2002 struct gomp_thread old_thr, *thr = gomp_thread ();
2003 old_thr = *thr;
2004 memset (thr, '\0', sizeof (*thr));
2005 if (gomp_places_list)
2007 thr->place = old_thr.place;
2008 thr->ts.place_partition_len = gomp_places_list_len;
2010 fn (hostaddrs);
2011 gomp_free_thread (thr);
2012 *thr = old_thr;
2015 /* Calculate alignment and size requirements of a private copy of data shared
2016 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2018 static inline void
2019 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2020 unsigned short *kinds, size_t *tgt_align,
2021 size_t *tgt_size)
2023 size_t i;
2024 for (i = 0; i < mapnum; i++)
2025 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2027 size_t align = (size_t) 1 << (kinds[i] >> 8);
2028 if (*tgt_align < align)
2029 *tgt_align = align;
2030 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2031 *tgt_size += sizes[i];
2035 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2037 static inline void
2038 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2039 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2040 size_t tgt_size)
2042 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2043 if (al)
2044 tgt += tgt_align - al;
2045 tgt_size = 0;
2046 size_t i;
2047 for (i = 0; i < mapnum; i++)
2048 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2050 size_t align = (size_t) 1 << (kinds[i] >> 8);
2051 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2052 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2053 hostaddrs[i] = tgt + tgt_size;
2054 tgt_size = tgt_size + sizes[i];
2058 /* Helper function of GOMP_target{,_ext} routines. */
2060 static void *
2061 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2062 void (*host_fn) (void *))
2064 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2065 return (void *) host_fn;
2066 else
2068 gomp_mutex_lock (&devicep->lock);
2069 if (devicep->state == GOMP_DEVICE_FINALIZED)
2071 gomp_mutex_unlock (&devicep->lock);
2072 return NULL;
2075 struct splay_tree_key_s k;
2076 k.host_start = (uintptr_t) host_fn;
2077 k.host_end = k.host_start + 1;
2078 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2079 gomp_mutex_unlock (&devicep->lock);
2080 if (tgt_fn == NULL)
2081 return NULL;
2083 return (void *) tgt_fn->tgt_offset;
2087 /* Called when encountering a target directive. If DEVICE
2088 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2089 GOMP_DEVICE_HOST_FALLBACK (or any value
2090 larger than last available hw device), use host fallback.
2091 FN is address of host code, UNUSED is part of the current ABI, but
2092 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2093 with MAPNUM entries, with addresses of the host objects,
2094 sizes of the host objects (resp. for pointer kind pointer bias
2095 and assumed sizeof (void *) size) and kinds. */
2097 void
2098 GOMP_target (int device, void (*fn) (void *), const void *unused,
2099 size_t mapnum, void **hostaddrs, size_t *sizes,
2100 unsigned char *kinds)
2102 struct gomp_device_descr *devicep = resolve_device (device);
2104 void *fn_addr;
2105 if (devicep == NULL
2106 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2107 /* All shared memory devices should use the GOMP_target_ext function. */
2108 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2109 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2110 return gomp_target_fallback (fn, hostaddrs);
2112 struct target_mem_desc *tgt_vars
2113 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2114 GOMP_MAP_VARS_TARGET);
2115 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2116 NULL);
2117 gomp_unmap_vars (tgt_vars, true);
2120 static inline unsigned int
2121 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2123 /* If we cannot run asynchronously, simply ignore nowait. */
2124 if (devicep != NULL && devicep->async_run_func == NULL)
2125 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2127 return flags;
2130 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2131 and several arguments have been added:
2132 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2133 DEPEND is array of dependencies, see GOMP_task for details.
2135 ARGS is a pointer to an array consisting of a variable number of both
2136 device-independent and device-specific arguments, which can take one two
2137 elements where the first specifies for which device it is intended, the type
2138 and optionally also the value. If the value is not present in the first
2139 one, the whole second element the actual value. The last element of the
2140 array is a single NULL. Among the device independent can be for example
2141 NUM_TEAMS and THREAD_LIMIT.
2143 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2144 that value, or 1 if teams construct is not present, or 0, if
2145 teams construct does not have num_teams clause and so the choice is
2146 implementation defined, and -1 if it can't be determined on the host
2147 what value will GOMP_teams have on the device.
2148 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2149 body with that value, or 0, if teams construct does not have thread_limit
2150 clause or the teams construct is not present, or -1 if it can't be
2151 determined on the host what value will GOMP_teams have on the device. */
2153 void
2154 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2155 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2156 unsigned int flags, void **depend, void **args)
2158 struct gomp_device_descr *devicep = resolve_device (device);
2159 size_t tgt_align = 0, tgt_size = 0;
2160 bool fpc_done = false;
2162 flags = clear_unsupported_flags (devicep, flags);
2164 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2166 struct gomp_thread *thr = gomp_thread ();
2167 /* Create a team if we don't have any around, as nowait
2168 target tasks make sense to run asynchronously even when
2169 outside of any parallel. */
2170 if (__builtin_expect (thr->ts.team == NULL, 0))
2172 struct gomp_team *team = gomp_new_team (1);
2173 struct gomp_task *task = thr->task;
2174 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2175 team->prev_ts = thr->ts;
2176 thr->ts.team = team;
2177 thr->ts.team_id = 0;
2178 thr->ts.work_share = &team->work_shares[0];
2179 thr->ts.last_work_share = NULL;
2180 #ifdef HAVE_SYNC_BUILTINS
2181 thr->ts.single_count = 0;
2182 #endif
2183 thr->ts.static_trip = 0;
2184 thr->task = &team->implicit_task[0];
2185 gomp_init_task (thr->task, NULL, icv);
2186 if (task)
2188 thr->task = task;
2189 gomp_end_task ();
2190 free (task);
2191 thr->task = &team->implicit_task[0];
2193 else
2194 pthread_setspecific (gomp_thread_destructor, thr);
2196 if (thr->ts.team
2197 && !thr->task->final_task)
2199 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2200 sizes, kinds, flags, depend, args,
2201 GOMP_TARGET_TASK_BEFORE_MAP);
2202 return;
2206 /* If there are depend clauses, but nowait is not present
2207 (or we are in a final task), block the parent task until the
2208 dependencies are resolved and then just continue with the rest
2209 of the function as if it is a merged task. */
2210 if (depend != NULL)
2212 struct gomp_thread *thr = gomp_thread ();
2213 if (thr->task && thr->task->depend_hash)
2215 /* If we might need to wait, copy firstprivate now. */
2216 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2217 &tgt_align, &tgt_size);
2218 if (tgt_align)
2220 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2221 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2222 tgt_align, tgt_size);
2224 fpc_done = true;
2225 gomp_task_maybe_wait_for_dependencies (depend);
2229 void *fn_addr;
2230 if (devicep == NULL
2231 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2232 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2233 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2235 if (!fpc_done)
2237 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2238 &tgt_align, &tgt_size);
2239 if (tgt_align)
2241 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2242 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2243 tgt_align, tgt_size);
2246 gomp_target_fallback (fn, hostaddrs);
2247 return;
2250 struct target_mem_desc *tgt_vars;
2251 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2253 if (!fpc_done)
2255 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2256 &tgt_align, &tgt_size);
2257 if (tgt_align)
2259 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2260 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2261 tgt_align, tgt_size);
2264 tgt_vars = NULL;
2266 else
2267 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2268 true, GOMP_MAP_VARS_TARGET);
2269 devicep->run_func (devicep->target_id, fn_addr,
2270 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2271 args);
2272 if (tgt_vars)
2273 gomp_unmap_vars (tgt_vars, true);
2276 /* Host fallback for GOMP_target_data{,_ext} routines. */
2278 static void
2279 gomp_target_data_fallback (void)
2281 struct gomp_task_icv *icv = gomp_icv (false);
2282 if (icv->target_data)
2284 /* Even when doing a host fallback, if there are any active
2285 #pragma omp target data constructs, need to remember the
2286 new #pragma omp target data, otherwise GOMP_target_end_data
2287 would get out of sync. */
2288 struct target_mem_desc *tgt
2289 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2290 GOMP_MAP_VARS_DATA);
2291 tgt->prev = icv->target_data;
2292 icv->target_data = tgt;
2296 void
2297 GOMP_target_data (int device, const void *unused, size_t mapnum,
2298 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2300 struct gomp_device_descr *devicep = resolve_device (device);
2302 if (devicep == NULL
2303 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2304 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2305 return gomp_target_data_fallback ();
2307 struct target_mem_desc *tgt
2308 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2309 GOMP_MAP_VARS_DATA);
2310 struct gomp_task_icv *icv = gomp_icv (true);
2311 tgt->prev = icv->target_data;
2312 icv->target_data = tgt;
2315 void
2316 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2317 size_t *sizes, unsigned short *kinds)
2319 struct gomp_device_descr *devicep = resolve_device (device);
2321 if (devicep == NULL
2322 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2323 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2324 return gomp_target_data_fallback ();
2326 struct target_mem_desc *tgt
2327 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2328 GOMP_MAP_VARS_DATA);
2329 struct gomp_task_icv *icv = gomp_icv (true);
2330 tgt->prev = icv->target_data;
2331 icv->target_data = tgt;
2334 void
2335 GOMP_target_end_data (void)
2337 struct gomp_task_icv *icv = gomp_icv (false);
2338 if (icv->target_data)
2340 struct target_mem_desc *tgt = icv->target_data;
2341 icv->target_data = tgt->prev;
2342 gomp_unmap_vars (tgt, true);
2346 void
2347 GOMP_target_update (int device, const void *unused, size_t mapnum,
2348 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2350 struct gomp_device_descr *devicep = resolve_device (device);
2352 if (devicep == NULL
2353 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2354 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2355 return;
2357 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2360 void
2361 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2362 size_t *sizes, unsigned short *kinds,
2363 unsigned int flags, void **depend)
2365 struct gomp_device_descr *devicep = resolve_device (device);
2367 /* If there are depend clauses, but nowait is not present,
2368 block the parent task until the dependencies are resolved
2369 and then just continue with the rest of the function as if it
2370 is a merged task. Until we are able to schedule task during
2371 variable mapping or unmapping, ignore nowait if depend clauses
2372 are not present. */
2373 if (depend != NULL)
2375 struct gomp_thread *thr = gomp_thread ();
2376 if (thr->task && thr->task->depend_hash)
2378 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2379 && thr->ts.team
2380 && !thr->task->final_task)
2382 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2383 mapnum, hostaddrs, sizes, kinds,
2384 flags | GOMP_TARGET_FLAG_UPDATE,
2385 depend, NULL, GOMP_TARGET_TASK_DATA))
2386 return;
2388 else
2390 struct gomp_team *team = thr->ts.team;
2391 /* If parallel or taskgroup has been cancelled, don't start new
2392 tasks. */
2393 if (__builtin_expect (gomp_cancel_var, 0) && team)
2395 if (gomp_team_barrier_cancelled (&team->barrier))
2396 return;
2397 if (thr->task->taskgroup)
2399 if (thr->task->taskgroup->cancelled)
2400 return;
2401 if (thr->task->taskgroup->workshare
2402 && thr->task->taskgroup->prev
2403 && thr->task->taskgroup->prev->cancelled)
2404 return;
2408 gomp_task_maybe_wait_for_dependencies (depend);
2413 if (devicep == NULL
2414 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2415 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2416 return;
2418 struct gomp_thread *thr = gomp_thread ();
2419 struct gomp_team *team = thr->ts.team;
2420 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2421 if (__builtin_expect (gomp_cancel_var, 0) && team)
2423 if (gomp_team_barrier_cancelled (&team->barrier))
2424 return;
2425 if (thr->task->taskgroup)
2427 if (thr->task->taskgroup->cancelled)
2428 return;
2429 if (thr->task->taskgroup->workshare
2430 && thr->task->taskgroup->prev
2431 && thr->task->taskgroup->prev->cancelled)
2432 return;
2436 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2439 static void
2440 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2441 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2443 const int typemask = 0xff;
2444 size_t i;
2445 gomp_mutex_lock (&devicep->lock);
2446 if (devicep->state == GOMP_DEVICE_FINALIZED)
2448 gomp_mutex_unlock (&devicep->lock);
2449 return;
2452 for (i = 0; i < mapnum; i++)
2454 struct splay_tree_key_s cur_node;
2455 unsigned char kind = kinds[i] & typemask;
2456 switch (kind)
2458 case GOMP_MAP_FROM:
2459 case GOMP_MAP_ALWAYS_FROM:
2460 case GOMP_MAP_DELETE:
2461 case GOMP_MAP_RELEASE:
2462 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2463 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2464 cur_node.host_start = (uintptr_t) hostaddrs[i];
2465 cur_node.host_end = cur_node.host_start + sizes[i];
2466 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2467 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2468 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2469 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2470 if (!k)
2471 continue;
2473 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2474 k->refcount--;
2475 if ((kind == GOMP_MAP_DELETE
2476 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2477 && k->refcount != REFCOUNT_INFINITY)
2478 k->refcount = 0;
2480 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2481 || kind == GOMP_MAP_ALWAYS_FROM)
2482 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2483 (void *) (k->tgt->tgt_start + k->tgt_offset
2484 + cur_node.host_start
2485 - k->host_start),
2486 cur_node.host_end - cur_node.host_start);
2487 if (k->refcount == 0)
2488 gomp_remove_var (devicep, k);
2490 break;
2491 default:
2492 gomp_mutex_unlock (&devicep->lock);
2493 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2494 kind);
2498 gomp_mutex_unlock (&devicep->lock);
2501 void
2502 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2503 size_t *sizes, unsigned short *kinds,
2504 unsigned int flags, void **depend)
2506 struct gomp_device_descr *devicep = resolve_device (device);
2508 /* If there are depend clauses, but nowait is not present,
2509 block the parent task until the dependencies are resolved
2510 and then just continue with the rest of the function as if it
2511 is a merged task. Until we are able to schedule task during
2512 variable mapping or unmapping, ignore nowait if depend clauses
2513 are not present. */
2514 if (depend != NULL)
2516 struct gomp_thread *thr = gomp_thread ();
2517 if (thr->task && thr->task->depend_hash)
2519 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2520 && thr->ts.team
2521 && !thr->task->final_task)
2523 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2524 mapnum, hostaddrs, sizes, kinds,
2525 flags, depend, NULL,
2526 GOMP_TARGET_TASK_DATA))
2527 return;
2529 else
2531 struct gomp_team *team = thr->ts.team;
2532 /* If parallel or taskgroup has been cancelled, don't start new
2533 tasks. */
2534 if (__builtin_expect (gomp_cancel_var, 0) && team)
2536 if (gomp_team_barrier_cancelled (&team->barrier))
2537 return;
2538 if (thr->task->taskgroup)
2540 if (thr->task->taskgroup->cancelled)
2541 return;
2542 if (thr->task->taskgroup->workshare
2543 && thr->task->taskgroup->prev
2544 && thr->task->taskgroup->prev->cancelled)
2545 return;
2549 gomp_task_maybe_wait_for_dependencies (depend);
2554 if (devicep == NULL
2555 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2556 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2557 return;
2559 struct gomp_thread *thr = gomp_thread ();
2560 struct gomp_team *team = thr->ts.team;
2561 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2562 if (__builtin_expect (gomp_cancel_var, 0) && team)
2564 if (gomp_team_barrier_cancelled (&team->barrier))
2565 return;
2566 if (thr->task->taskgroup)
2568 if (thr->task->taskgroup->cancelled)
2569 return;
2570 if (thr->task->taskgroup->workshare
2571 && thr->task->taskgroup->prev
2572 && thr->task->taskgroup->prev->cancelled)
2573 return;
2577 /* The variables are mapped separately such that they can be released
2578 independently. */
2579 size_t i, j;
2580 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2581 for (i = 0; i < mapnum; i++)
2582 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2584 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2585 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2586 i += sizes[i];
2588 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2590 for (j = i + 1; j < mapnum; j++)
2591 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
2592 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
2593 break;
2594 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2595 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2596 i += j - i - 1;
2598 else
2599 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2600 true, GOMP_MAP_VARS_ENTER_DATA);
2601 else
2602 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2605 bool
2606 gomp_target_task_fn (void *data)
2608 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2609 struct gomp_device_descr *devicep = ttask->devicep;
2611 if (ttask->fn != NULL)
2613 void *fn_addr;
2614 if (devicep == NULL
2615 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2616 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2617 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2619 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2620 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2621 return false;
2624 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2626 if (ttask->tgt)
2627 gomp_unmap_vars (ttask->tgt, true);
2628 return false;
2631 void *actual_arguments;
2632 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2634 ttask->tgt = NULL;
2635 actual_arguments = ttask->hostaddrs;
2637 else
2639 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2640 NULL, ttask->sizes, ttask->kinds, true,
2641 GOMP_MAP_VARS_TARGET);
2642 actual_arguments = (void *) ttask->tgt->tgt_start;
2644 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2646 assert (devicep->async_run_func);
2647 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2648 ttask->args, (void *) ttask);
2649 return true;
2651 else if (devicep == NULL
2652 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2653 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2654 return false;
2656 size_t i;
2657 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2658 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2659 ttask->kinds, true);
2660 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2661 for (i = 0; i < ttask->mapnum; i++)
2662 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2664 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2665 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2666 GOMP_MAP_VARS_ENTER_DATA);
2667 i += ttask->sizes[i];
2669 else
2670 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2671 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2672 else
2673 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2674 ttask->kinds);
2675 return false;
2678 void
2679 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2681 if (thread_limit)
2683 struct gomp_task_icv *icv = gomp_icv (true);
2684 icv->thread_limit_var
2685 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2687 (void) num_teams;
2690 void *
2691 omp_target_alloc (size_t size, int device_num)
2693 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2694 return malloc (size);
2696 if (device_num < 0)
2697 return NULL;
2699 struct gomp_device_descr *devicep = resolve_device (device_num);
2700 if (devicep == NULL)
2701 return NULL;
2703 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2704 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2705 return malloc (size);
2707 gomp_mutex_lock (&devicep->lock);
2708 void *ret = devicep->alloc_func (devicep->target_id, size);
2709 gomp_mutex_unlock (&devicep->lock);
2710 return ret;
2713 void
2714 omp_target_free (void *device_ptr, int device_num)
2716 if (device_ptr == NULL)
2717 return;
2719 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2721 free (device_ptr);
2722 return;
2725 if (device_num < 0)
2726 return;
2728 struct gomp_device_descr *devicep = resolve_device (device_num);
2729 if (devicep == NULL)
2730 return;
2732 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2733 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2735 free (device_ptr);
2736 return;
2739 gomp_mutex_lock (&devicep->lock);
2740 gomp_free_device_memory (devicep, device_ptr);
2741 gomp_mutex_unlock (&devicep->lock);
2745 omp_target_is_present (const void *ptr, int device_num)
2747 if (ptr == NULL)
2748 return 1;
2750 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2751 return 1;
2753 if (device_num < 0)
2754 return 0;
2756 struct gomp_device_descr *devicep = resolve_device (device_num);
2757 if (devicep == NULL)
2758 return 0;
2760 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2761 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2762 return 1;
2764 gomp_mutex_lock (&devicep->lock);
2765 struct splay_tree_s *mem_map = &devicep->mem_map;
2766 struct splay_tree_key_s cur_node;
2768 cur_node.host_start = (uintptr_t) ptr;
2769 cur_node.host_end = cur_node.host_start;
2770 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2771 int ret = n != NULL;
2772 gomp_mutex_unlock (&devicep->lock);
2773 return ret;
2777 omp_target_memcpy (void *dst, const void *src, size_t length,
2778 size_t dst_offset, size_t src_offset, int dst_device_num,
2779 int src_device_num)
2781 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2782 bool ret;
2784 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2786 if (dst_device_num < 0)
2787 return EINVAL;
2789 dst_devicep = resolve_device (dst_device_num);
2790 if (dst_devicep == NULL)
2791 return EINVAL;
2793 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2794 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2795 dst_devicep = NULL;
2797 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2799 if (src_device_num < 0)
2800 return EINVAL;
2802 src_devicep = resolve_device (src_device_num);
2803 if (src_devicep == NULL)
2804 return EINVAL;
2806 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2807 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2808 src_devicep = NULL;
2810 if (src_devicep == NULL && dst_devicep == NULL)
2812 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2813 return 0;
2815 if (src_devicep == NULL)
2817 gomp_mutex_lock (&dst_devicep->lock);
2818 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2819 (char *) dst + dst_offset,
2820 (char *) src + src_offset, length);
2821 gomp_mutex_unlock (&dst_devicep->lock);
2822 return (ret ? 0 : EINVAL);
2824 if (dst_devicep == NULL)
2826 gomp_mutex_lock (&src_devicep->lock);
2827 ret = src_devicep->dev2host_func (src_devicep->target_id,
2828 (char *) dst + dst_offset,
2829 (char *) src + src_offset, length);
2830 gomp_mutex_unlock (&src_devicep->lock);
2831 return (ret ? 0 : EINVAL);
2833 if (src_devicep == dst_devicep)
2835 gomp_mutex_lock (&src_devicep->lock);
2836 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2837 (char *) dst + dst_offset,
2838 (char *) src + src_offset, length);
2839 gomp_mutex_unlock (&src_devicep->lock);
2840 return (ret ? 0 : EINVAL);
2842 return EINVAL;
2845 static int
2846 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2847 int num_dims, const size_t *volume,
2848 const size_t *dst_offsets,
2849 const size_t *src_offsets,
2850 const size_t *dst_dimensions,
2851 const size_t *src_dimensions,
2852 struct gomp_device_descr *dst_devicep,
2853 struct gomp_device_descr *src_devicep)
2855 size_t dst_slice = element_size;
2856 size_t src_slice = element_size;
2857 size_t j, dst_off, src_off, length;
2858 int i, ret;
2860 if (num_dims == 1)
2862 if (__builtin_mul_overflow (element_size, volume[0], &length)
2863 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2864 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2865 return EINVAL;
2866 if (dst_devicep == NULL && src_devicep == NULL)
2868 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2869 length);
2870 ret = 1;
2872 else if (src_devicep == NULL)
2873 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2874 (char *) dst + dst_off,
2875 (const char *) src + src_off,
2876 length);
2877 else if (dst_devicep == NULL)
2878 ret = src_devicep->dev2host_func (src_devicep->target_id,
2879 (char *) dst + dst_off,
2880 (const char *) src + src_off,
2881 length);
2882 else if (src_devicep == dst_devicep)
2883 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2884 (char *) dst + dst_off,
2885 (const char *) src + src_off,
2886 length);
2887 else
2888 ret = 0;
2889 return ret ? 0 : EINVAL;
2892 /* FIXME: it would be nice to have some plugin function to handle
2893 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2894 be handled in the generic recursion below, and for host-host it
2895 should be used even for any num_dims >= 2. */
2897 for (i = 1; i < num_dims; i++)
2898 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2899 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2900 return EINVAL;
2901 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2902 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2903 return EINVAL;
2904 for (j = 0; j < volume[0]; j++)
2906 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2907 (const char *) src + src_off,
2908 element_size, num_dims - 1,
2909 volume + 1, dst_offsets + 1,
2910 src_offsets + 1, dst_dimensions + 1,
2911 src_dimensions + 1, dst_devicep,
2912 src_devicep);
2913 if (ret)
2914 return ret;
2915 dst_off += dst_slice;
2916 src_off += src_slice;
2918 return 0;
2922 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2923 int num_dims, const size_t *volume,
2924 const size_t *dst_offsets,
2925 const size_t *src_offsets,
2926 const size_t *dst_dimensions,
2927 const size_t *src_dimensions,
2928 int dst_device_num, int src_device_num)
2930 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2932 if (!dst && !src)
2933 return INT_MAX;
2935 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2937 if (dst_device_num < 0)
2938 return EINVAL;
2940 dst_devicep = resolve_device (dst_device_num);
2941 if (dst_devicep == NULL)
2942 return EINVAL;
2944 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2945 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2946 dst_devicep = NULL;
2948 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2950 if (src_device_num < 0)
2951 return EINVAL;
2953 src_devicep = resolve_device (src_device_num);
2954 if (src_devicep == NULL)
2955 return EINVAL;
2957 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2958 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2959 src_devicep = NULL;
2962 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2963 return EINVAL;
2965 if (src_devicep)
2966 gomp_mutex_lock (&src_devicep->lock);
2967 else if (dst_devicep)
2968 gomp_mutex_lock (&dst_devicep->lock);
2969 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2970 volume, dst_offsets, src_offsets,
2971 dst_dimensions, src_dimensions,
2972 dst_devicep, src_devicep);
2973 if (src_devicep)
2974 gomp_mutex_unlock (&src_devicep->lock);
2975 else if (dst_devicep)
2976 gomp_mutex_unlock (&dst_devicep->lock);
2977 return ret;
2981 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2982 size_t size, size_t device_offset, int device_num)
2984 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2985 return EINVAL;
2987 if (device_num < 0)
2988 return EINVAL;
2990 struct gomp_device_descr *devicep = resolve_device (device_num);
2991 if (devicep == NULL)
2992 return EINVAL;
2994 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2995 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2996 return EINVAL;
2998 gomp_mutex_lock (&devicep->lock);
3000 struct splay_tree_s *mem_map = &devicep->mem_map;
3001 struct splay_tree_key_s cur_node;
3002 int ret = EINVAL;
3004 cur_node.host_start = (uintptr_t) host_ptr;
3005 cur_node.host_end = cur_node.host_start + size;
3006 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3007 if (n)
3009 if (n->tgt->tgt_start + n->tgt_offset
3010 == (uintptr_t) device_ptr + device_offset
3011 && n->host_start <= cur_node.host_start
3012 && n->host_end >= cur_node.host_end)
3013 ret = 0;
3015 else
3017 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3018 tgt->array = gomp_malloc (sizeof (*tgt->array));
3019 tgt->refcount = 1;
3020 tgt->tgt_start = 0;
3021 tgt->tgt_end = 0;
3022 tgt->to_free = NULL;
3023 tgt->prev = NULL;
3024 tgt->list_count = 0;
3025 tgt->device_descr = devicep;
3026 splay_tree_node array = tgt->array;
3027 splay_tree_key k = &array->key;
3028 k->host_start = cur_node.host_start;
3029 k->host_end = cur_node.host_end;
3030 k->tgt = tgt;
3031 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3032 k->refcount = REFCOUNT_INFINITY;
3033 k->dynamic_refcount = 0;
3034 k->aux = NULL;
3035 array->left = NULL;
3036 array->right = NULL;
3037 splay_tree_insert (&devicep->mem_map, array);
3038 ret = 0;
3040 gomp_mutex_unlock (&devicep->lock);
3041 return ret;
3045 omp_target_disassociate_ptr (const void *ptr, int device_num)
3047 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
3048 return EINVAL;
3050 if (device_num < 0)
3051 return EINVAL;
3053 struct gomp_device_descr *devicep = resolve_device (device_num);
3054 if (devicep == NULL)
3055 return EINVAL;
3057 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3058 return EINVAL;
3060 gomp_mutex_lock (&devicep->lock);
3062 struct splay_tree_s *mem_map = &devicep->mem_map;
3063 struct splay_tree_key_s cur_node;
3064 int ret = EINVAL;
3066 cur_node.host_start = (uintptr_t) ptr;
3067 cur_node.host_end = cur_node.host_start;
3068 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3069 if (n
3070 && n->host_start == cur_node.host_start
3071 && n->refcount == REFCOUNT_INFINITY
3072 && n->tgt->tgt_start == 0
3073 && n->tgt->to_free == NULL
3074 && n->tgt->refcount == 1
3075 && n->tgt->list_count == 0)
3077 splay_tree_remove (&devicep->mem_map, n);
3078 gomp_unmap_tgt (n->tgt);
3079 ret = 0;
3082 gomp_mutex_unlock (&devicep->lock);
3083 return ret;
3087 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3089 (void) kind;
3090 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
3091 return gomp_pause_host ();
3092 if (device_num < 0 || device_num >= gomp_get_num_devices ())
3093 return -1;
3094 /* Do nothing for target devices for now. */
3095 return 0;
3099 omp_pause_resource_all (omp_pause_resource_t kind)
3101 (void) kind;
3102 if (gomp_pause_host ())
3103 return -1;
3104 /* Do nothing for target devices for now. */
3105 return 0;
3108 ialias (omp_pause_resource)
3109 ialias (omp_pause_resource_all)
3111 #ifdef PLUGIN_SUPPORT
3113 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3114 in PLUGIN_NAME.
3115 The handles of the found functions are stored in the corresponding fields
3116 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3118 static bool
3119 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3120 const char *plugin_name)
3122 const char *err = NULL, *last_missing = NULL;
3124 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3125 if (!plugin_handle)
3126 goto dl_fail;
3128 /* Check if all required functions are available in the plugin and store
3129 their handlers. None of the symbols can legitimately be NULL,
3130 so we don't need to check dlerror all the time. */
3131 #define DLSYM(f) \
3132 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3133 goto dl_fail
3134 /* Similar, but missing functions are not an error. Return false if
3135 failed, true otherwise. */
3136 #define DLSYM_OPT(f, n) \
3137 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3138 || (last_missing = #n, 0))
3140 DLSYM (version);
3141 if (device->version_func () != GOMP_VERSION)
3143 err = "plugin version mismatch";
3144 goto fail;
3147 DLSYM (get_name);
3148 DLSYM (get_caps);
3149 DLSYM (get_type);
3150 DLSYM (get_num_devices);
3151 DLSYM (init_device);
3152 DLSYM (fini_device);
3153 DLSYM (load_image);
3154 DLSYM (unload_image);
3155 DLSYM (alloc);
3156 DLSYM (free);
3157 DLSYM (dev2host);
3158 DLSYM (host2dev);
3159 device->capabilities = device->get_caps_func ();
3160 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3162 DLSYM (run);
3163 DLSYM_OPT (async_run, async_run);
3164 DLSYM_OPT (can_run, can_run);
3165 DLSYM (dev2dev);
3167 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3169 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3170 || !DLSYM_OPT (openacc.create_thread_data,
3171 openacc_create_thread_data)
3172 || !DLSYM_OPT (openacc.destroy_thread_data,
3173 openacc_destroy_thread_data)
3174 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3175 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3176 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3177 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3178 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3179 || !DLSYM_OPT (openacc.async.queue_callback,
3180 openacc_async_queue_callback)
3181 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3182 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3183 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3184 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3186 /* Require all the OpenACC handlers if we have
3187 GOMP_OFFLOAD_CAP_OPENACC_200. */
3188 err = "plugin missing OpenACC handler function";
3189 goto fail;
3192 unsigned cuda = 0;
3193 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3194 openacc_cuda_get_current_device);
3195 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3196 openacc_cuda_get_current_context);
3197 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3198 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3199 if (cuda && cuda != 4)
3201 /* Make sure all the CUDA functions are there if any of them are. */
3202 err = "plugin missing OpenACC CUDA handler function";
3203 goto fail;
3206 #undef DLSYM
3207 #undef DLSYM_OPT
3209 return 1;
3211 dl_fail:
3212 err = dlerror ();
3213 fail:
3214 gomp_error ("while loading %s: %s", plugin_name, err);
3215 if (last_missing)
3216 gomp_error ("missing function was %s", last_missing);
3217 if (plugin_handle)
3218 dlclose (plugin_handle);
3220 return 0;
3223 /* This function finalizes all initialized devices. */
3225 static void
3226 gomp_target_fini (void)
3228 int i;
3229 for (i = 0; i < num_devices; i++)
3231 bool ret = true;
3232 struct gomp_device_descr *devicep = &devices[i];
3233 gomp_mutex_lock (&devicep->lock);
3234 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3235 ret = gomp_fini_device (devicep);
3236 gomp_mutex_unlock (&devicep->lock);
3237 if (!ret)
3238 gomp_fatal ("device finalization failed");
3242 /* This function initializes the runtime for offloading.
3243 It parses the list of offload plugins, and tries to load these.
3244 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3245 will be set, and the array DEVICES initialized, containing descriptors for
3246 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3247 by the others. */
3249 static void
3250 gomp_target_init (void)
3252 const char *prefix ="libgomp-plugin-";
3253 const char *suffix = SONAME_SUFFIX (1);
3254 const char *cur, *next;
3255 char *plugin_name;
3256 int i, new_num_devices;
3258 num_devices = 0;
3259 devices = NULL;
3261 cur = OFFLOAD_PLUGINS;
3262 if (*cur)
3265 struct gomp_device_descr current_device;
3266 size_t prefix_len, suffix_len, cur_len;
3268 next = strchr (cur, ',');
3270 prefix_len = strlen (prefix);
3271 cur_len = next ? next - cur : strlen (cur);
3272 suffix_len = strlen (suffix);
3274 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3275 if (!plugin_name)
3277 num_devices = 0;
3278 break;
3281 memcpy (plugin_name, prefix, prefix_len);
3282 memcpy (plugin_name + prefix_len, cur, cur_len);
3283 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3285 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3287 new_num_devices = current_device.get_num_devices_func ();
3288 if (new_num_devices >= 1)
3290 /* Augment DEVICES and NUM_DEVICES. */
3292 devices = realloc (devices, (num_devices + new_num_devices)
3293 * sizeof (struct gomp_device_descr));
3294 if (!devices)
3296 num_devices = 0;
3297 free (plugin_name);
3298 break;
3301 current_device.name = current_device.get_name_func ();
3302 /* current_device.capabilities has already been set. */
3303 current_device.type = current_device.get_type_func ();
3304 current_device.mem_map.root = NULL;
3305 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3306 for (i = 0; i < new_num_devices; i++)
3308 current_device.target_id = i;
3309 devices[num_devices] = current_device;
3310 gomp_mutex_init (&devices[num_devices].lock);
3311 num_devices++;
3316 free (plugin_name);
3317 cur = next + 1;
3319 while (next);
3321 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3322 NUM_DEVICES_OPENMP. */
3323 struct gomp_device_descr *devices_s
3324 = malloc (num_devices * sizeof (struct gomp_device_descr));
3325 if (!devices_s)
3327 num_devices = 0;
3328 free (devices);
3329 devices = NULL;
3331 num_devices_openmp = 0;
3332 for (i = 0; i < num_devices; i++)
3333 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3334 devices_s[num_devices_openmp++] = devices[i];
3335 int num_devices_after_openmp = num_devices_openmp;
3336 for (i = 0; i < num_devices; i++)
3337 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3338 devices_s[num_devices_after_openmp++] = devices[i];
3339 free (devices);
3340 devices = devices_s;
3342 for (i = 0; i < num_devices; i++)
3344 /* The 'devices' array can be moved (by the realloc call) until we have
3345 found all the plugins, so registering with the OpenACC runtime (which
3346 takes a copy of the pointer argument) must be delayed until now. */
3347 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3348 goacc_register (&devices[i]);
3351 if (atexit (gomp_target_fini) != 0)
3352 gomp_fatal ("atexit failed");
3355 #else /* PLUGIN_SUPPORT */
3356 /* If dlfcn.h is unavailable we always fallback to host execution.
3357 GOMP_target* routines are just stubs for this case. */
3358 static void
3359 gomp_target_init (void)
3362 #endif /* PLUGIN_SUPPORT */