testsuite: Correct vec-rlmi-rlnm.c testsuite expected result
[official-gcc.git] / libgomp / target.c
blob1a8c67c2df5ac5e569ced528ddd3d313a7f0ee86
1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
70 /* Total number of offload images. */
71 static int num_offload_images;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
76 /* Total number of available devices. */
77 static int num_devices;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
87 void *ret = realloc (old, size);
88 if (ret == NULL)
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
93 return ret;
96 attribute_hidden void
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
102 attribute_hidden int
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp;
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
112 if (device_id == GOMP_DEVICE_ICV)
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
120 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
121 && device_id != GOMP_DEVICE_HOST_FALLBACK
122 && device_id != num_devices_openmp)
123 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
124 "but device not found");
126 return NULL;
129 gomp_mutex_lock (&devices[device_id].lock);
130 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
131 gomp_init_device (&devices[device_id]);
132 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
134 gomp_mutex_unlock (&devices[device_id].lock);
136 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
137 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
138 "but device is finalized");
140 return NULL;
142 gomp_mutex_unlock (&devices[device_id].lock);
144 return &devices[device_id];
148 static inline splay_tree_key
149 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
151 if (key->host_start != key->host_end)
152 return splay_tree_lookup (mem_map, key);
154 key->host_end++;
155 splay_tree_key n = splay_tree_lookup (mem_map, key);
156 key->host_end--;
157 if (n)
158 return n;
159 key->host_start--;
160 n = splay_tree_lookup (mem_map, key);
161 key->host_start++;
162 if (n)
163 return n;
164 return splay_tree_lookup (mem_map, key);
167 static inline splay_tree_key
168 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
170 if (key->host_start != key->host_end)
171 return splay_tree_lookup (mem_map, key);
173 key->host_end++;
174 splay_tree_key n = splay_tree_lookup (mem_map, key);
175 key->host_end--;
176 return n;
179 static inline void
180 gomp_device_copy (struct gomp_device_descr *devicep,
181 bool (*copy_func) (int, void *, const void *, size_t),
182 const char *dst, void *dstaddr,
183 const char *src, const void *srcaddr,
184 size_t size)
186 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
188 gomp_mutex_unlock (&devicep->lock);
189 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
190 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194 static inline void
195 goacc_device_copy_async (struct gomp_device_descr *devicep,
196 bool (*copy_func) (int, void *, const void *, size_t,
197 struct goacc_asyncqueue *),
198 const char *dst, void *dstaddr,
199 const char *src, const void *srcaddr,
200 size_t size, struct goacc_asyncqueue *aq)
202 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
204 gomp_mutex_unlock (&devicep->lock);
205 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
206 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
210 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
211 host to device memory transfers. */
213 struct gomp_coalesce_chunk
215 /* The starting and ending point of a coalesced chunk of memory. */
216 size_t start, end;
219 struct gomp_coalesce_buf
221 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
222 it will be copied to the device. */
223 void *buf;
224 struct target_mem_desc *tgt;
225 /* Array with offsets, chunks[i].start is the starting offset and
226 chunks[i].end ending offset relative to tgt->tgt_start device address
227 of chunks which are to be copied to buf and later copied to device. */
228 struct gomp_coalesce_chunk *chunks;
229 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
230 be performed. */
231 long chunk_cnt;
232 /* During construction of chunks array, how many memory regions are within
233 the last chunk. If there is just one memory region for a chunk, we copy
234 it directly to device rather than going through buf. */
235 long use_cnt;
238 /* Maximum size of memory region considered for coalescing. Larger copies
239 are performed directly. */
240 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
242 /* Maximum size of a gap in between regions to consider them being copied
243 within the same chunk. All the device offsets considered are within
244 newly allocated device memory, so it isn't fatal if we copy some padding
245 in between from host to device. The gaps come either from alignment
246 padding or from memory regions which are not supposed to be copied from
247 host to device (e.g. map(alloc:), map(from:) etc.). */
248 #define MAX_COALESCE_BUF_GAP (4 * 1024)
250 /* Add region with device tgt_start relative offset and length to CBUF. */
252 static inline void
253 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
255 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
256 return;
257 if (cbuf->chunk_cnt)
259 if (cbuf->chunk_cnt < 0)
260 return;
261 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
263 cbuf->chunk_cnt = -1;
264 return;
266 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
268 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
269 cbuf->use_cnt++;
270 return;
272 /* If the last chunk is only used by one mapping, discard it,
273 as it will be one host to device copy anyway and
274 memcpying it around will only waste cycles. */
275 if (cbuf->use_cnt == 1)
276 cbuf->chunk_cnt--;
278 cbuf->chunks[cbuf->chunk_cnt].start = start;
279 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
280 cbuf->chunk_cnt++;
281 cbuf->use_cnt = 1;
284 /* Return true for mapping kinds which need to copy data from the
285 host to device for regions that weren't previously mapped. */
287 static inline bool
288 gomp_to_device_kind_p (int kind)
290 switch (kind)
292 case GOMP_MAP_ALLOC:
293 case GOMP_MAP_FROM:
294 case GOMP_MAP_FORCE_ALLOC:
295 case GOMP_MAP_FORCE_FROM:
296 case GOMP_MAP_ALWAYS_FROM:
297 return false;
298 default:
299 return true;
303 attribute_hidden void
304 gomp_copy_host2dev (struct gomp_device_descr *devicep,
305 struct goacc_asyncqueue *aq,
306 void *d, const void *h, size_t sz,
307 struct gomp_coalesce_buf *cbuf)
309 if (cbuf)
311 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
312 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
314 long first = 0;
315 long last = cbuf->chunk_cnt - 1;
316 while (first <= last)
318 long middle = (first + last) >> 1;
319 if (cbuf->chunks[middle].end <= doff)
320 first = middle + 1;
321 else if (cbuf->chunks[middle].start <= doff)
323 if (doff + sz > cbuf->chunks[middle].end)
324 gomp_fatal ("internal libgomp cbuf error");
325 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
326 h, sz);
327 return;
329 else
330 last = middle - 1;
334 if (__builtin_expect (aq != NULL, 0))
335 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
336 "dev", d, "host", h, sz, aq);
337 else
338 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
341 attribute_hidden void
342 gomp_copy_dev2host (struct gomp_device_descr *devicep,
343 struct goacc_asyncqueue *aq,
344 void *h, const void *d, size_t sz)
346 if (__builtin_expect (aq != NULL, 0))
347 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
348 "host", h, "dev", d, sz, aq);
349 else
350 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
353 static void
354 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
356 if (!devicep->free_func (devicep->target_id, devptr))
358 gomp_mutex_unlock (&devicep->lock);
359 gomp_fatal ("error in freeing device memory block at %p", devptr);
363 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
364 gomp_map_0len_lookup found oldn for newn.
365 Helper function of gomp_map_vars. */
367 static inline void
368 gomp_map_vars_existing (struct gomp_device_descr *devicep,
369 struct goacc_asyncqueue *aq, splay_tree_key oldn,
370 splay_tree_key newn, struct target_var_desc *tgt_var,
371 unsigned char kind, bool always_to_flag,
372 struct gomp_coalesce_buf *cbuf)
374 assert (kind != GOMP_MAP_ATTACH);
376 tgt_var->key = oldn;
377 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
378 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
379 tgt_var->is_attach = false;
380 tgt_var->offset = newn->host_start - oldn->host_start;
381 tgt_var->length = newn->host_end - newn->host_start;
383 if ((kind & GOMP_MAP_FLAG_FORCE)
384 || oldn->host_start > newn->host_start
385 || oldn->host_end < newn->host_end)
387 gomp_mutex_unlock (&devicep->lock);
388 gomp_fatal ("Trying to map into device [%p..%p) object when "
389 "[%p..%p) is already mapped",
390 (void *) newn->host_start, (void *) newn->host_end,
391 (void *) oldn->host_start, (void *) oldn->host_end);
394 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
395 gomp_copy_host2dev (devicep, aq,
396 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
397 + newn->host_start - oldn->host_start),
398 (void *) newn->host_start,
399 newn->host_end - newn->host_start, cbuf);
401 if (oldn->refcount != REFCOUNT_INFINITY)
402 oldn->refcount++;
405 static int
406 get_kind (bool short_mapkind, void *kinds, int idx)
408 return short_mapkind ? ((unsigned short *) kinds)[idx]
409 : ((unsigned char *) kinds)[idx];
412 static void
413 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
414 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
415 struct gomp_coalesce_buf *cbuf)
417 struct gomp_device_descr *devicep = tgt->device_descr;
418 struct splay_tree_s *mem_map = &devicep->mem_map;
419 struct splay_tree_key_s cur_node;
421 cur_node.host_start = host_ptr;
422 if (cur_node.host_start == (uintptr_t) NULL)
424 cur_node.tgt_offset = (uintptr_t) NULL;
425 gomp_copy_host2dev (devicep, aq,
426 (void *) (tgt->tgt_start + target_offset),
427 (void *) &cur_node.tgt_offset,
428 sizeof (void *), cbuf);
429 return;
431 /* Add bias to the pointer value. */
432 cur_node.host_start += bias;
433 cur_node.host_end = cur_node.host_start;
434 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
435 if (n == NULL)
437 gomp_mutex_unlock (&devicep->lock);
438 gomp_fatal ("Pointer target of array section wasn't mapped");
440 cur_node.host_start -= n->host_start;
441 cur_node.tgt_offset
442 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
443 /* At this point tgt_offset is target address of the
444 array section. Now subtract bias to get what we want
445 to initialize the pointer with. */
446 cur_node.tgt_offset -= bias;
447 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
448 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
451 static void
452 gomp_map_fields_existing (struct target_mem_desc *tgt,
453 struct goacc_asyncqueue *aq, splay_tree_key n,
454 size_t first, size_t i, void **hostaddrs,
455 size_t *sizes, void *kinds,
456 struct gomp_coalesce_buf *cbuf)
458 struct gomp_device_descr *devicep = tgt->device_descr;
459 struct splay_tree_s *mem_map = &devicep->mem_map;
460 struct splay_tree_key_s cur_node;
461 int kind;
462 const bool short_mapkind = true;
463 const int typemask = short_mapkind ? 0xff : 0x7;
465 cur_node.host_start = (uintptr_t) hostaddrs[i];
466 cur_node.host_end = cur_node.host_start + sizes[i];
467 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
468 kind = get_kind (short_mapkind, kinds, i);
469 if (n2
470 && n2->tgt == n->tgt
471 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
473 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
474 kind & typemask, false, cbuf);
475 return;
477 if (sizes[i] == 0)
479 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
481 cur_node.host_start--;
482 n2 = splay_tree_lookup (mem_map, &cur_node);
483 cur_node.host_start++;
484 if (n2
485 && n2->tgt == n->tgt
486 && n2->host_start - n->host_start
487 == n2->tgt_offset - n->tgt_offset)
489 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
490 kind & typemask, false, cbuf);
491 return;
494 cur_node.host_end++;
495 n2 = splay_tree_lookup (mem_map, &cur_node);
496 cur_node.host_end--;
497 if (n2
498 && n2->tgt == n->tgt
499 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
501 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
502 kind & typemask, false, cbuf);
503 return;
506 gomp_mutex_unlock (&devicep->lock);
507 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
508 "other mapped elements from the same structure weren't mapped "
509 "together with it", (void *) cur_node.host_start,
510 (void *) cur_node.host_end);
513 attribute_hidden void
514 gomp_attach_pointer (struct gomp_device_descr *devicep,
515 struct goacc_asyncqueue *aq, splay_tree mem_map,
516 splay_tree_key n, uintptr_t attach_to, size_t bias,
517 struct gomp_coalesce_buf *cbufp)
519 struct splay_tree_key_s s;
520 size_t size, idx;
522 if (n == NULL)
524 gomp_mutex_unlock (&devicep->lock);
525 gomp_fatal ("enclosing struct not mapped for attach");
528 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
529 /* We might have a pointer in a packed struct: however we cannot have more
530 than one such pointer in each pointer-sized portion of the struct, so
531 this is safe. */
532 idx = (attach_to - n->host_start) / sizeof (void *);
534 if (!n->aux)
535 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
537 if (!n->aux->attach_count)
538 n->aux->attach_count
539 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
541 if (n->aux->attach_count[idx] < UINTPTR_MAX)
542 n->aux->attach_count[idx]++;
543 else
545 gomp_mutex_unlock (&devicep->lock);
546 gomp_fatal ("attach count overflow");
549 if (n->aux->attach_count[idx] == 1)
551 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
552 - n->host_start;
553 uintptr_t target = (uintptr_t) *(void **) attach_to;
554 splay_tree_key tn;
555 uintptr_t data;
557 if ((void *) target == NULL)
559 gomp_mutex_unlock (&devicep->lock);
560 gomp_fatal ("attempt to attach null pointer");
563 s.host_start = target + bias;
564 s.host_end = s.host_start + 1;
565 tn = splay_tree_lookup (mem_map, &s);
567 if (!tn)
569 gomp_mutex_unlock (&devicep->lock);
570 gomp_fatal ("pointer target not mapped for attach");
573 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
575 gomp_debug (1,
576 "%s: attaching host %p, target %p (struct base %p) to %p\n",
577 __FUNCTION__, (void *) attach_to, (void *) devptr,
578 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
580 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
581 sizeof (void *), cbufp);
583 else
584 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
585 (void *) attach_to, (int) n->aux->attach_count[idx]);
588 attribute_hidden void
589 gomp_detach_pointer (struct gomp_device_descr *devicep,
590 struct goacc_asyncqueue *aq, splay_tree_key n,
591 uintptr_t detach_from, bool finalize,
592 struct gomp_coalesce_buf *cbufp)
594 size_t idx;
596 if (n == NULL)
598 gomp_mutex_unlock (&devicep->lock);
599 gomp_fatal ("enclosing struct not mapped for detach");
602 idx = (detach_from - n->host_start) / sizeof (void *);
604 if (!n->aux || !n->aux->attach_count)
606 gomp_mutex_unlock (&devicep->lock);
607 gomp_fatal ("no attachment counters for struct");
610 if (finalize)
611 n->aux->attach_count[idx] = 1;
613 if (n->aux->attach_count[idx] == 0)
615 gomp_mutex_unlock (&devicep->lock);
616 gomp_fatal ("attach count underflow");
618 else
619 n->aux->attach_count[idx]--;
621 if (n->aux->attach_count[idx] == 0)
623 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
624 - n->host_start;
625 uintptr_t target = (uintptr_t) *(void **) detach_from;
627 gomp_debug (1,
628 "%s: detaching host %p, target %p (struct base %p) to %p\n",
629 __FUNCTION__, (void *) detach_from, (void *) devptr,
630 (void *) (n->tgt->tgt_start + n->tgt_offset),
631 (void *) target);
633 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
634 sizeof (void *), cbufp);
636 else
637 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
638 (void *) detach_from, (int) n->aux->attach_count[idx]);
641 attribute_hidden uintptr_t
642 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
644 if (tgt->list[i].key != NULL)
645 return tgt->list[i].key->tgt->tgt_start
646 + tgt->list[i].key->tgt_offset
647 + tgt->list[i].offset;
649 switch (tgt->list[i].offset)
651 case OFFSET_INLINED:
652 return (uintptr_t) hostaddrs[i];
654 case OFFSET_POINTER:
655 return 0;
657 case OFFSET_STRUCT:
658 return tgt->list[i + 1].key->tgt->tgt_start
659 + tgt->list[i + 1].key->tgt_offset
660 + tgt->list[i + 1].offset
661 + (uintptr_t) hostaddrs[i]
662 - (uintptr_t) hostaddrs[i + 1];
664 default:
665 return tgt->tgt_start + tgt->list[i].offset;
669 static inline __attribute__((always_inline)) struct target_mem_desc *
670 gomp_map_vars_internal (struct gomp_device_descr *devicep,
671 struct goacc_asyncqueue *aq, size_t mapnum,
672 void **hostaddrs, void **devaddrs, size_t *sizes,
673 void *kinds, bool short_mapkind,
674 enum gomp_map_vars_kind pragma_kind)
676 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
677 bool has_firstprivate = false;
678 bool has_always_ptrset = false;
679 const int rshift = short_mapkind ? 8 : 3;
680 const int typemask = short_mapkind ? 0xff : 0x7;
681 struct splay_tree_s *mem_map = &devicep->mem_map;
682 struct splay_tree_key_s cur_node;
683 struct target_mem_desc *tgt
684 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
685 tgt->list_count = mapnum;
686 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
687 tgt->device_descr = devicep;
688 tgt->prev = NULL;
689 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
691 if (mapnum == 0)
693 tgt->tgt_start = 0;
694 tgt->tgt_end = 0;
695 return tgt;
698 tgt_align = sizeof (void *);
699 tgt_size = 0;
700 cbuf.chunks = NULL;
701 cbuf.chunk_cnt = -1;
702 cbuf.use_cnt = 0;
703 cbuf.buf = NULL;
704 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
706 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
707 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
708 cbuf.chunk_cnt = 0;
710 if (pragma_kind == GOMP_MAP_VARS_TARGET)
712 size_t align = 4 * sizeof (void *);
713 tgt_align = align;
714 tgt_size = mapnum * sizeof (void *);
715 cbuf.chunk_cnt = 1;
716 cbuf.use_cnt = 1 + (mapnum > 1);
717 cbuf.chunks[0].start = 0;
718 cbuf.chunks[0].end = tgt_size;
721 gomp_mutex_lock (&devicep->lock);
722 if (devicep->state == GOMP_DEVICE_FINALIZED)
724 gomp_mutex_unlock (&devicep->lock);
725 free (tgt);
726 return NULL;
729 for (i = 0; i < mapnum; i++)
731 int kind = get_kind (short_mapkind, kinds, i);
732 if (hostaddrs[i] == NULL
733 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
735 tgt->list[i].key = NULL;
736 tgt->list[i].offset = OFFSET_INLINED;
737 continue;
739 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
740 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
742 tgt->list[i].key = NULL;
743 if (!not_found_cnt)
745 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
746 on a separate construct prior to using use_device_{addr,ptr}.
747 In OpenMP 5.0, map directives need to be ordered by the
748 middle-end before the use_device_* clauses. If
749 !not_found_cnt, all mappings requested (if any) are already
750 mapped, so use_device_{addr,ptr} can be resolved right away.
751 Otherwise, if not_found_cnt, gomp_map_lookup might fail
752 now but would succeed after performing the mappings in the
753 following loop. We can't defer this always to the second
754 loop, because it is not even invoked when !not_found_cnt
755 after the first loop. */
756 cur_node.host_start = (uintptr_t) hostaddrs[i];
757 cur_node.host_end = cur_node.host_start;
758 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
759 if (n != NULL)
761 cur_node.host_start -= n->host_start;
762 hostaddrs[i]
763 = (void *) (n->tgt->tgt_start + n->tgt_offset
764 + cur_node.host_start);
766 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
768 gomp_mutex_unlock (&devicep->lock);
769 gomp_fatal ("use_device_ptr pointer wasn't mapped");
771 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
772 /* If not present, continue using the host address. */
774 else
775 __builtin_unreachable ();
776 tgt->list[i].offset = OFFSET_INLINED;
778 else
779 tgt->list[i].offset = 0;
780 continue;
782 else if ((kind & typemask) == GOMP_MAP_STRUCT)
784 size_t first = i + 1;
785 size_t last = i + sizes[i];
786 cur_node.host_start = (uintptr_t) hostaddrs[i];
787 cur_node.host_end = (uintptr_t) hostaddrs[last]
788 + sizes[last];
789 tgt->list[i].key = NULL;
790 tgt->list[i].offset = OFFSET_STRUCT;
791 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
792 if (n == NULL)
794 size_t align = (size_t) 1 << (kind >> rshift);
795 if (tgt_align < align)
796 tgt_align = align;
797 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
798 tgt_size = (tgt_size + align - 1) & ~(align - 1);
799 tgt_size += cur_node.host_end - cur_node.host_start;
800 not_found_cnt += last - i;
801 for (i = first; i <= last; i++)
803 tgt->list[i].key = NULL;
804 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
805 & typemask))
806 gomp_coalesce_buf_add (&cbuf,
807 tgt_size - cur_node.host_end
808 + (uintptr_t) hostaddrs[i],
809 sizes[i]);
811 i--;
812 continue;
814 for (i = first; i <= last; i++)
815 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
816 sizes, kinds, NULL);
817 i--;
818 continue;
820 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
822 tgt->list[i].key = NULL;
823 tgt->list[i].offset = OFFSET_POINTER;
824 has_firstprivate = true;
825 continue;
827 else if ((kind & typemask) == GOMP_MAP_ATTACH)
829 tgt->list[i].key = NULL;
830 has_firstprivate = true;
831 continue;
833 cur_node.host_start = (uintptr_t) hostaddrs[i];
834 if (!GOMP_MAP_POINTER_P (kind & typemask))
835 cur_node.host_end = cur_node.host_start + sizes[i];
836 else
837 cur_node.host_end = cur_node.host_start + sizeof (void *);
838 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
840 tgt->list[i].key = NULL;
842 size_t align = (size_t) 1 << (kind >> rshift);
843 if (tgt_align < align)
844 tgt_align = align;
845 tgt_size = (tgt_size + align - 1) & ~(align - 1);
846 gomp_coalesce_buf_add (&cbuf, tgt_size,
847 cur_node.host_end - cur_node.host_start);
848 tgt_size += cur_node.host_end - cur_node.host_start;
849 has_firstprivate = true;
850 continue;
852 splay_tree_key n;
853 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
855 n = gomp_map_0len_lookup (mem_map, &cur_node);
856 if (!n)
858 tgt->list[i].key = NULL;
859 tgt->list[i].offset = OFFSET_POINTER;
860 continue;
863 else
864 n = splay_tree_lookup (mem_map, &cur_node);
865 if (n && n->refcount != REFCOUNT_LINK)
867 int always_to_cnt = 0;
868 if ((kind & typemask) == GOMP_MAP_TO_PSET)
870 bool has_nullptr = false;
871 size_t j;
872 for (j = 0; j < n->tgt->list_count; j++)
873 if (n->tgt->list[j].key == n)
875 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
876 break;
878 if (n->tgt->list_count == 0)
880 /* 'declare target'; assume has_nullptr; it could also be
881 statically assigned pointer, but that it should be to
882 the equivalent variable on the host. */
883 assert (n->refcount == REFCOUNT_INFINITY);
884 has_nullptr = true;
886 else
887 assert (j < n->tgt->list_count);
888 /* Re-map the data if there is an 'always' modifier or if it a
889 null pointer was there and non a nonnull has been found; that
890 permits transparent re-mapping for Fortran array descriptors
891 which were previously mapped unallocated. */
892 for (j = i + 1; j < mapnum; j++)
894 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
895 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
896 && (!has_nullptr
897 || !GOMP_MAP_POINTER_P (ptr_kind)
898 || *(void **) hostaddrs[j] == NULL))
899 break;
900 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
901 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
902 > cur_node.host_end))
903 break;
904 else
906 has_always_ptrset = true;
907 ++always_to_cnt;
911 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
912 kind & typemask, always_to_cnt > 0, NULL);
913 i += always_to_cnt;
915 else
917 tgt->list[i].key = NULL;
919 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
921 /* Not present, hence, skip entry - including its MAP_POINTER,
922 when existing. */
923 tgt->list[i].offset = OFFSET_POINTER;
924 if (i + 1 < mapnum
925 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
926 == GOMP_MAP_POINTER))
928 ++i;
929 tgt->list[i].key = NULL;
930 tgt->list[i].offset = 0;
932 continue;
934 size_t align = (size_t) 1 << (kind >> rshift);
935 not_found_cnt++;
936 if (tgt_align < align)
937 tgt_align = align;
938 tgt_size = (tgt_size + align - 1) & ~(align - 1);
939 if (gomp_to_device_kind_p (kind & typemask))
940 gomp_coalesce_buf_add (&cbuf, tgt_size,
941 cur_node.host_end - cur_node.host_start);
942 tgt_size += cur_node.host_end - cur_node.host_start;
943 if ((kind & typemask) == GOMP_MAP_TO_PSET)
945 size_t j;
946 int kind;
947 for (j = i + 1; j < mapnum; j++)
948 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
949 kinds, j)) & typemask))
950 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
951 break;
952 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
953 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
954 > cur_node.host_end))
955 break;
956 else
958 tgt->list[j].key = NULL;
959 i++;
965 if (devaddrs)
967 if (mapnum != 1)
969 gomp_mutex_unlock (&devicep->lock);
970 gomp_fatal ("unexpected aggregation");
972 tgt->to_free = devaddrs[0];
973 tgt->tgt_start = (uintptr_t) tgt->to_free;
974 tgt->tgt_end = tgt->tgt_start + sizes[0];
976 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
978 /* Allocate tgt_align aligned tgt_size block of memory. */
979 /* FIXME: Perhaps change interface to allocate properly aligned
980 memory. */
981 tgt->to_free = devicep->alloc_func (devicep->target_id,
982 tgt_size + tgt_align - 1);
983 if (!tgt->to_free)
985 gomp_mutex_unlock (&devicep->lock);
986 gomp_fatal ("device memory allocation fail");
989 tgt->tgt_start = (uintptr_t) tgt->to_free;
990 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
991 tgt->tgt_end = tgt->tgt_start + tgt_size;
993 if (cbuf.use_cnt == 1)
994 cbuf.chunk_cnt--;
995 if (cbuf.chunk_cnt > 0)
997 cbuf.buf
998 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
999 if (cbuf.buf)
1001 cbuf.tgt = tgt;
1002 cbufp = &cbuf;
1006 else
1008 tgt->to_free = NULL;
1009 tgt->tgt_start = 0;
1010 tgt->tgt_end = 0;
1013 tgt_size = 0;
1014 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1015 tgt_size = mapnum * sizeof (void *);
1017 tgt->array = NULL;
1018 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1020 if (not_found_cnt)
1021 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1022 splay_tree_node array = tgt->array;
1023 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
1024 uintptr_t field_tgt_base = 0;
1026 for (i = 0; i < mapnum; i++)
1027 if (has_always_ptrset
1028 && tgt->list[i].key
1029 && (get_kind (short_mapkind, kinds, i) & typemask)
1030 == GOMP_MAP_TO_PSET)
1032 splay_tree_key k = tgt->list[i].key;
1033 bool has_nullptr = false;
1034 size_t j;
1035 for (j = 0; j < k->tgt->list_count; j++)
1036 if (k->tgt->list[j].key == k)
1038 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1039 break;
1041 if (k->tgt->list_count == 0)
1042 has_nullptr = true;
1043 else
1044 assert (j < k->tgt->list_count);
1046 tgt->list[i].has_null_ptr_assoc = false;
1047 for (j = i + 1; j < mapnum; j++)
1049 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1050 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1051 && (!has_nullptr
1052 || !GOMP_MAP_POINTER_P (ptr_kind)
1053 || *(void **) hostaddrs[j] == NULL))
1054 break;
1055 else if ((uintptr_t) hostaddrs[j] < k->host_start
1056 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1057 > k->host_end))
1058 break;
1059 else
1061 if (*(void **) hostaddrs[j] == NULL)
1062 tgt->list[i].has_null_ptr_assoc = true;
1063 tgt->list[j].key = k;
1064 tgt->list[j].copy_from = false;
1065 tgt->list[j].always_copy_from = false;
1066 tgt->list[j].is_attach = false;
1067 if (k->refcount != REFCOUNT_INFINITY)
1068 k->refcount++;
1069 gomp_map_pointer (k->tgt, aq,
1070 (uintptr_t) *(void **) hostaddrs[j],
1071 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1072 - k->host_start),
1073 sizes[j], cbufp);
1076 i = j - 1;
1078 else if (tgt->list[i].key == NULL)
1080 int kind = get_kind (short_mapkind, kinds, i);
1081 if (hostaddrs[i] == NULL)
1082 continue;
1083 switch (kind & typemask)
1085 size_t align, len, first, last;
1086 splay_tree_key n;
1087 case GOMP_MAP_FIRSTPRIVATE:
1088 align = (size_t) 1 << (kind >> rshift);
1089 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1090 tgt->list[i].offset = tgt_size;
1091 len = sizes[i];
1092 gomp_copy_host2dev (devicep, aq,
1093 (void *) (tgt->tgt_start + tgt_size),
1094 (void *) hostaddrs[i], len, cbufp);
1095 tgt_size += len;
1096 continue;
1097 case GOMP_MAP_FIRSTPRIVATE_INT:
1098 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1099 continue;
1100 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1101 /* The OpenACC 'host_data' construct only allows 'use_device'
1102 "mapping" clauses, so in the first loop, 'not_found_cnt'
1103 must always have been zero, so all OpenACC 'use_device'
1104 clauses have already been handled. (We can only easily test
1105 'use_device' with 'if_present' clause here.) */
1106 assert (tgt->list[i].offset == OFFSET_INLINED);
1107 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1108 code conceptually simple, similar to the first loop. */
1109 case GOMP_MAP_USE_DEVICE_PTR:
1110 if (tgt->list[i].offset == 0)
1112 cur_node.host_start = (uintptr_t) hostaddrs[i];
1113 cur_node.host_end = cur_node.host_start;
1114 n = gomp_map_lookup (mem_map, &cur_node);
1115 if (n != NULL)
1117 cur_node.host_start -= n->host_start;
1118 hostaddrs[i]
1119 = (void *) (n->tgt->tgt_start + n->tgt_offset
1120 + cur_node.host_start);
1122 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1124 gomp_mutex_unlock (&devicep->lock);
1125 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1127 else if ((kind & typemask)
1128 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1129 /* If not present, continue using the host address. */
1131 else
1132 __builtin_unreachable ();
1133 tgt->list[i].offset = OFFSET_INLINED;
1135 continue;
1136 case GOMP_MAP_STRUCT:
1137 first = i + 1;
1138 last = i + sizes[i];
1139 cur_node.host_start = (uintptr_t) hostaddrs[i];
1140 cur_node.host_end = (uintptr_t) hostaddrs[last]
1141 + sizes[last];
1142 if (tgt->list[first].key != NULL)
1143 continue;
1144 n = splay_tree_lookup (mem_map, &cur_node);
1145 if (n == NULL)
1147 size_t align = (size_t) 1 << (kind >> rshift);
1148 tgt_size -= (uintptr_t) hostaddrs[first]
1149 - (uintptr_t) hostaddrs[i];
1150 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1151 tgt_size += (uintptr_t) hostaddrs[first]
1152 - (uintptr_t) hostaddrs[i];
1153 field_tgt_base = (uintptr_t) hostaddrs[first];
1154 field_tgt_offset = tgt_size;
1155 field_tgt_clear = last;
1156 tgt_size += cur_node.host_end
1157 - (uintptr_t) hostaddrs[first];
1158 continue;
1160 for (i = first; i <= last; i++)
1161 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1162 sizes, kinds, cbufp);
1163 i--;
1164 continue;
1165 case GOMP_MAP_ALWAYS_POINTER:
1166 cur_node.host_start = (uintptr_t) hostaddrs[i];
1167 cur_node.host_end = cur_node.host_start + sizeof (void *);
1168 n = splay_tree_lookup (mem_map, &cur_node);
1169 if (n == NULL
1170 || n->host_start > cur_node.host_start
1171 || n->host_end < cur_node.host_end)
1173 gomp_mutex_unlock (&devicep->lock);
1174 gomp_fatal ("always pointer not mapped");
1176 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1177 != GOMP_MAP_ALWAYS_POINTER)
1178 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1179 if (cur_node.tgt_offset)
1180 cur_node.tgt_offset -= sizes[i];
1181 gomp_copy_host2dev (devicep, aq,
1182 (void *) (n->tgt->tgt_start
1183 + n->tgt_offset
1184 + cur_node.host_start
1185 - n->host_start),
1186 (void *) &cur_node.tgt_offset,
1187 sizeof (void *), cbufp);
1188 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1189 + cur_node.host_start - n->host_start;
1190 continue;
1191 case GOMP_MAP_IF_PRESENT:
1192 /* Not present - otherwise handled above. Skip over its
1193 MAP_POINTER as well. */
1194 if (i + 1 < mapnum
1195 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1196 == GOMP_MAP_POINTER))
1197 ++i;
1198 continue;
1199 case GOMP_MAP_ATTACH:
1201 cur_node.host_start = (uintptr_t) hostaddrs[i];
1202 cur_node.host_end = cur_node.host_start + sizeof (void *);
1203 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1204 if (n != NULL)
1206 tgt->list[i].key = n;
1207 tgt->list[i].offset = cur_node.host_start - n->host_start;
1208 tgt->list[i].length = n->host_end - n->host_start;
1209 tgt->list[i].copy_from = false;
1210 tgt->list[i].always_copy_from = false;
1211 tgt->list[i].is_attach = true;
1212 /* OpenACC 'attach'/'detach' doesn't affect
1213 structured/dynamic reference counts ('n->refcount',
1214 'n->dynamic_refcount'). */
1216 else
1218 gomp_mutex_unlock (&devicep->lock);
1219 gomp_fatal ("outer struct not mapped for attach");
1221 gomp_attach_pointer (devicep, aq, mem_map, n,
1222 (uintptr_t) hostaddrs[i], sizes[i],
1223 cbufp);
1224 continue;
1226 default:
1227 break;
1229 splay_tree_key k = &array->key;
1230 k->host_start = (uintptr_t) hostaddrs[i];
1231 if (!GOMP_MAP_POINTER_P (kind & typemask))
1232 k->host_end = k->host_start + sizes[i];
1233 else
1234 k->host_end = k->host_start + sizeof (void *);
1235 splay_tree_key n = splay_tree_lookup (mem_map, k);
1236 if (n && n->refcount != REFCOUNT_LINK)
1237 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1238 kind & typemask, false, cbufp);
1239 else
1241 k->aux = NULL;
1242 if (n && n->refcount == REFCOUNT_LINK)
1244 /* Replace target address of the pointer with target address
1245 of mapped object in the splay tree. */
1246 splay_tree_remove (mem_map, n);
1247 k->aux
1248 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1249 k->aux->link_key = n;
1251 size_t align = (size_t) 1 << (kind >> rshift);
1252 tgt->list[i].key = k;
1253 k->tgt = tgt;
1254 if (field_tgt_clear != FIELD_TGT_EMPTY)
1256 k->tgt_offset = k->host_start - field_tgt_base
1257 + field_tgt_offset;
1258 if (i == field_tgt_clear)
1259 field_tgt_clear = FIELD_TGT_EMPTY;
1261 else
1263 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1264 k->tgt_offset = tgt_size;
1265 tgt_size += k->host_end - k->host_start;
1267 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1268 tgt->list[i].always_copy_from
1269 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1270 tgt->list[i].is_attach = false;
1271 tgt->list[i].offset = 0;
1272 tgt->list[i].length = k->host_end - k->host_start;
1273 k->refcount = 1;
1274 k->dynamic_refcount = 0;
1275 tgt->refcount++;
1276 array->left = NULL;
1277 array->right = NULL;
1278 splay_tree_insert (mem_map, array);
1279 switch (kind & typemask)
1281 case GOMP_MAP_ALLOC:
1282 case GOMP_MAP_FROM:
1283 case GOMP_MAP_FORCE_ALLOC:
1284 case GOMP_MAP_FORCE_FROM:
1285 case GOMP_MAP_ALWAYS_FROM:
1286 break;
1287 case GOMP_MAP_TO:
1288 case GOMP_MAP_TOFROM:
1289 case GOMP_MAP_FORCE_TO:
1290 case GOMP_MAP_FORCE_TOFROM:
1291 case GOMP_MAP_ALWAYS_TO:
1292 case GOMP_MAP_ALWAYS_TOFROM:
1293 gomp_copy_host2dev (devicep, aq,
1294 (void *) (tgt->tgt_start
1295 + k->tgt_offset),
1296 (void *) k->host_start,
1297 k->host_end - k->host_start, cbufp);
1298 break;
1299 case GOMP_MAP_POINTER:
1300 gomp_map_pointer (tgt, aq,
1301 (uintptr_t) *(void **) k->host_start,
1302 k->tgt_offset, sizes[i], cbufp);
1303 break;
1304 case GOMP_MAP_TO_PSET:
1305 gomp_copy_host2dev (devicep, aq,
1306 (void *) (tgt->tgt_start
1307 + k->tgt_offset),
1308 (void *) k->host_start,
1309 k->host_end - k->host_start, cbufp);
1310 tgt->list[i].has_null_ptr_assoc = false;
1312 for (j = i + 1; j < mapnum; j++)
1314 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1315 & typemask);
1316 if (!GOMP_MAP_POINTER_P (ptr_kind)
1317 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1318 break;
1319 else if ((uintptr_t) hostaddrs[j] < k->host_start
1320 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1321 > k->host_end))
1322 break;
1323 else
1325 tgt->list[j].key = k;
1326 tgt->list[j].copy_from = false;
1327 tgt->list[j].always_copy_from = false;
1328 tgt->list[j].is_attach = false;
1329 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1330 if (k->refcount != REFCOUNT_INFINITY)
1331 k->refcount++;
1332 gomp_map_pointer (tgt, aq,
1333 (uintptr_t) *(void **) hostaddrs[j],
1334 k->tgt_offset
1335 + ((uintptr_t) hostaddrs[j]
1336 - k->host_start),
1337 sizes[j], cbufp);
1340 i = j - 1;
1341 break;
1342 case GOMP_MAP_FORCE_PRESENT:
1344 /* We already looked up the memory region above and it
1345 was missing. */
1346 size_t size = k->host_end - k->host_start;
1347 gomp_mutex_unlock (&devicep->lock);
1348 #ifdef HAVE_INTTYPES_H
1349 gomp_fatal ("present clause: !acc_is_present (%p, "
1350 "%"PRIu64" (0x%"PRIx64"))",
1351 (void *) k->host_start,
1352 (uint64_t) size, (uint64_t) size);
1353 #else
1354 gomp_fatal ("present clause: !acc_is_present (%p, "
1355 "%lu (0x%lx))", (void *) k->host_start,
1356 (unsigned long) size, (unsigned long) size);
1357 #endif
1359 break;
1360 case GOMP_MAP_FORCE_DEVICEPTR:
1361 assert (k->host_end - k->host_start == sizeof (void *));
1362 gomp_copy_host2dev (devicep, aq,
1363 (void *) (tgt->tgt_start
1364 + k->tgt_offset),
1365 (void *) k->host_start,
1366 sizeof (void *), cbufp);
1367 break;
1368 default:
1369 gomp_mutex_unlock (&devicep->lock);
1370 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1371 kind);
1374 if (k->aux && k->aux->link_key)
1376 /* Set link pointer on target to the device address of the
1377 mapped object. */
1378 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1379 /* We intentionally do not use coalescing here, as it's not
1380 data allocated by the current call to this function. */
1381 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1382 &tgt_addr, sizeof (void *), NULL);
1384 array++;
1389 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1391 for (i = 0; i < mapnum; i++)
1393 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1394 gomp_copy_host2dev (devicep, aq,
1395 (void *) (tgt->tgt_start + i * sizeof (void *)),
1396 (void *) &cur_node.tgt_offset, sizeof (void *),
1397 cbufp);
1401 if (cbufp)
1403 long c = 0;
1404 for (c = 0; c < cbuf.chunk_cnt; ++c)
1405 gomp_copy_host2dev (devicep, aq,
1406 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1407 (char *) cbuf.buf + (cbuf.chunks[c].start
1408 - cbuf.chunks[0].start),
1409 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1410 free (cbuf.buf);
1411 cbuf.buf = NULL;
1412 cbufp = NULL;
1415 /* If the variable from "omp target enter data" map-list was already mapped,
1416 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1417 gomp_exit_data. */
1418 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1420 free (tgt);
1421 tgt = NULL;
1424 gomp_mutex_unlock (&devicep->lock);
1425 return tgt;
1428 attribute_hidden struct target_mem_desc *
1429 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1430 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1431 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1433 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1434 sizes, kinds, short_mapkind, pragma_kind);
1437 attribute_hidden struct target_mem_desc *
1438 gomp_map_vars_async (struct gomp_device_descr *devicep,
1439 struct goacc_asyncqueue *aq, size_t mapnum,
1440 void **hostaddrs, void **devaddrs, size_t *sizes,
1441 void *kinds, bool short_mapkind,
1442 enum gomp_map_vars_kind pragma_kind)
1444 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1445 sizes, kinds, short_mapkind, pragma_kind);
1448 static void
1449 gomp_unmap_tgt (struct target_mem_desc *tgt)
1451 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1452 if (tgt->tgt_end)
1453 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1455 free (tgt->array);
1456 free (tgt);
1459 static bool
1460 gomp_unref_tgt (void *ptr)
1462 bool is_tgt_unmapped = false;
1464 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1466 if (tgt->refcount > 1)
1467 tgt->refcount--;
1468 else
1470 gomp_unmap_tgt (tgt);
1471 is_tgt_unmapped = true;
1474 return is_tgt_unmapped;
1477 static void
1478 gomp_unref_tgt_void (void *ptr)
1480 (void) gomp_unref_tgt (ptr);
1483 static inline __attribute__((always_inline)) bool
1484 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1485 struct goacc_asyncqueue *aq)
1487 bool is_tgt_unmapped = false;
1488 splay_tree_remove (&devicep->mem_map, k);
1489 if (k->aux)
1491 if (k->aux->link_key)
1492 splay_tree_insert (&devicep->mem_map,
1493 (splay_tree_node) k->aux->link_key);
1494 if (k->aux->attach_count)
1495 free (k->aux->attach_count);
1496 free (k->aux);
1497 k->aux = NULL;
1499 if (aq)
1500 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1501 (void *) k->tgt);
1502 else
1503 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1504 return is_tgt_unmapped;
1507 attribute_hidden bool
1508 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1510 return gomp_remove_var_internal (devicep, k, NULL);
1513 /* Remove a variable asynchronously. This actually removes the variable
1514 mapping immediately, but retains the linked target_mem_desc until the
1515 asynchronous operation has completed (as it may still refer to target
1516 memory). The device lock must be held before entry, and remains locked on
1517 exit. */
1519 attribute_hidden void
1520 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1521 struct goacc_asyncqueue *aq)
1523 (void) gomp_remove_var_internal (devicep, k, aq);
1526 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1527 variables back from device to host: if it is false, it is assumed that this
1528 has been done already. */
1530 static inline __attribute__((always_inline)) void
1531 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1532 struct goacc_asyncqueue *aq)
1534 struct gomp_device_descr *devicep = tgt->device_descr;
1536 if (tgt->list_count == 0)
1538 free (tgt);
1539 return;
1542 gomp_mutex_lock (&devicep->lock);
1543 if (devicep->state == GOMP_DEVICE_FINALIZED)
1545 gomp_mutex_unlock (&devicep->lock);
1546 free (tgt->array);
1547 free (tgt);
1548 return;
1551 size_t i;
1553 /* We must perform detachments before any copies back to the host. */
1554 for (i = 0; i < tgt->list_count; i++)
1556 splay_tree_key k = tgt->list[i].key;
1558 if (k != NULL && tgt->list[i].is_attach)
1559 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1560 + tgt->list[i].offset,
1561 false, NULL);
1564 for (i = 0; i < tgt->list_count; i++)
1566 splay_tree_key k = tgt->list[i].key;
1567 if (k == NULL)
1568 continue;
1570 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1571 counts ('n->refcount', 'n->dynamic_refcount'). */
1572 if (tgt->list[i].is_attach)
1573 continue;
1575 bool do_unmap = false;
1576 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1577 k->refcount--;
1578 else if (k->refcount == 1)
1580 k->refcount--;
1581 do_unmap = true;
1584 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1585 || tgt->list[i].always_copy_from)
1586 gomp_copy_dev2host (devicep, aq,
1587 (void *) (k->host_start + tgt->list[i].offset),
1588 (void *) (k->tgt->tgt_start + k->tgt_offset
1589 + tgt->list[i].offset),
1590 tgt->list[i].length);
1591 if (do_unmap)
1593 struct target_mem_desc *k_tgt = k->tgt;
1594 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1595 /* It would be bad if TGT got unmapped while we're still iterating
1596 over its LIST_COUNT, and also expect to use it in the following
1597 code. */
1598 assert (!is_tgt_unmapped
1599 || k_tgt != tgt);
1603 if (aq)
1604 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1605 (void *) tgt);
1606 else
1607 gomp_unref_tgt ((void *) tgt);
1609 gomp_mutex_unlock (&devicep->lock);
1612 attribute_hidden void
1613 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1615 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1618 attribute_hidden void
1619 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1620 struct goacc_asyncqueue *aq)
1622 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1625 static void
1626 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1627 size_t *sizes, void *kinds, bool short_mapkind)
1629 size_t i;
1630 struct splay_tree_key_s cur_node;
1631 const int typemask = short_mapkind ? 0xff : 0x7;
1633 if (!devicep)
1634 return;
1636 if (mapnum == 0)
1637 return;
1639 gomp_mutex_lock (&devicep->lock);
1640 if (devicep->state == GOMP_DEVICE_FINALIZED)
1642 gomp_mutex_unlock (&devicep->lock);
1643 return;
1646 for (i = 0; i < mapnum; i++)
1647 if (sizes[i])
1649 cur_node.host_start = (uintptr_t) hostaddrs[i];
1650 cur_node.host_end = cur_node.host_start + sizes[i];
1651 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1652 if (n)
1654 int kind = get_kind (short_mapkind, kinds, i);
1655 if (n->host_start > cur_node.host_start
1656 || n->host_end < cur_node.host_end)
1658 gomp_mutex_unlock (&devicep->lock);
1659 gomp_fatal ("Trying to update [%p..%p) object when "
1660 "only [%p..%p) is mapped",
1661 (void *) cur_node.host_start,
1662 (void *) cur_node.host_end,
1663 (void *) n->host_start,
1664 (void *) n->host_end);
1668 void *hostaddr = (void *) cur_node.host_start;
1669 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1670 + cur_node.host_start - n->host_start);
1671 size_t size = cur_node.host_end - cur_node.host_start;
1673 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1674 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1675 NULL);
1676 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1677 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1680 gomp_mutex_unlock (&devicep->lock);
1683 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1684 And insert to splay tree the mapping between addresses from HOST_TABLE and
1685 from loaded target image. We rely in the host and device compiler
1686 emitting variable and functions in the same order. */
1688 static void
1689 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1690 const void *host_table, const void *target_data,
1691 bool is_register_lock)
1693 void **host_func_table = ((void ***) host_table)[0];
1694 void **host_funcs_end = ((void ***) host_table)[1];
1695 void **host_var_table = ((void ***) host_table)[2];
1696 void **host_vars_end = ((void ***) host_table)[3];
1698 /* The func table contains only addresses, the var table contains addresses
1699 and corresponding sizes. */
1700 int num_funcs = host_funcs_end - host_func_table;
1701 int num_vars = (host_vars_end - host_var_table) / 2;
1703 /* Load image to device and get target addresses for the image. */
1704 struct addr_pair *target_table = NULL;
1705 int i, num_target_entries;
1707 num_target_entries
1708 = devicep->load_image_func (devicep->target_id, version,
1709 target_data, &target_table);
1711 if (num_target_entries != num_funcs + num_vars)
1713 gomp_mutex_unlock (&devicep->lock);
1714 if (is_register_lock)
1715 gomp_mutex_unlock (&register_lock);
1716 gomp_fatal ("Cannot map target functions or variables"
1717 " (expected %u, have %u)", num_funcs + num_vars,
1718 num_target_entries);
1721 /* Insert host-target address mapping into splay tree. */
1722 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1723 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1724 tgt->refcount = REFCOUNT_INFINITY;
1725 tgt->tgt_start = 0;
1726 tgt->tgt_end = 0;
1727 tgt->to_free = NULL;
1728 tgt->prev = NULL;
1729 tgt->list_count = 0;
1730 tgt->device_descr = devicep;
1731 splay_tree_node array = tgt->array;
1733 for (i = 0; i < num_funcs; i++)
1735 splay_tree_key k = &array->key;
1736 k->host_start = (uintptr_t) host_func_table[i];
1737 k->host_end = k->host_start + 1;
1738 k->tgt = tgt;
1739 k->tgt_offset = target_table[i].start;
1740 k->refcount = REFCOUNT_INFINITY;
1741 k->dynamic_refcount = 0;
1742 k->aux = NULL;
1743 array->left = NULL;
1744 array->right = NULL;
1745 splay_tree_insert (&devicep->mem_map, array);
1746 array++;
1749 /* Most significant bit of the size in host and target tables marks
1750 "omp declare target link" variables. */
1751 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1752 const uintptr_t size_mask = ~link_bit;
1754 for (i = 0; i < num_vars; i++)
1756 struct addr_pair *target_var = &target_table[num_funcs + i];
1757 uintptr_t target_size = target_var->end - target_var->start;
1758 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1760 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1762 gomp_mutex_unlock (&devicep->lock);
1763 if (is_register_lock)
1764 gomp_mutex_unlock (&register_lock);
1765 gomp_fatal ("Cannot map target variables (size mismatch)");
1768 splay_tree_key k = &array->key;
1769 k->host_start = (uintptr_t) host_var_table[i * 2];
1770 k->host_end
1771 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1772 k->tgt = tgt;
1773 k->tgt_offset = target_var->start;
1774 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1775 k->dynamic_refcount = 0;
1776 k->aux = NULL;
1777 array->left = NULL;
1778 array->right = NULL;
1779 splay_tree_insert (&devicep->mem_map, array);
1780 array++;
1783 free (target_table);
1786 /* Unload the mappings described by target_data from device DEVICE_P.
1787 The device must be locked. */
1789 static void
1790 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1791 unsigned version,
1792 const void *host_table, const void *target_data)
1794 void **host_func_table = ((void ***) host_table)[0];
1795 void **host_funcs_end = ((void ***) host_table)[1];
1796 void **host_var_table = ((void ***) host_table)[2];
1797 void **host_vars_end = ((void ***) host_table)[3];
1799 /* The func table contains only addresses, the var table contains addresses
1800 and corresponding sizes. */
1801 int num_funcs = host_funcs_end - host_func_table;
1802 int num_vars = (host_vars_end - host_var_table) / 2;
1804 struct splay_tree_key_s k;
1805 splay_tree_key node = NULL;
1807 /* Find mapping at start of node array */
1808 if (num_funcs || num_vars)
1810 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1811 : (uintptr_t) host_var_table[0]);
1812 k.host_end = k.host_start + 1;
1813 node = splay_tree_lookup (&devicep->mem_map, &k);
1816 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1818 gomp_mutex_unlock (&devicep->lock);
1819 gomp_fatal ("image unload fail");
1822 /* Remove mappings from splay tree. */
1823 int i;
1824 for (i = 0; i < num_funcs; i++)
1826 k.host_start = (uintptr_t) host_func_table[i];
1827 k.host_end = k.host_start + 1;
1828 splay_tree_remove (&devicep->mem_map, &k);
1831 /* Most significant bit of the size in host and target tables marks
1832 "omp declare target link" variables. */
1833 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1834 const uintptr_t size_mask = ~link_bit;
1835 bool is_tgt_unmapped = false;
1837 for (i = 0; i < num_vars; i++)
1839 k.host_start = (uintptr_t) host_var_table[i * 2];
1840 k.host_end
1841 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1843 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1844 splay_tree_remove (&devicep->mem_map, &k);
1845 else
1847 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1848 is_tgt_unmapped = gomp_remove_var (devicep, n);
1852 if (node && !is_tgt_unmapped)
1854 free (node->tgt);
1855 free (node);
1859 /* This function should be called from every offload image while loading.
1860 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1861 the target, and TARGET_DATA needed by target plugin. */
1863 void
1864 GOMP_offload_register_ver (unsigned version, const void *host_table,
1865 int target_type, const void *target_data)
1867 int i;
1869 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1870 gomp_fatal ("Library too old for offload (version %u < %u)",
1871 GOMP_VERSION, GOMP_VERSION_LIB (version));
1873 gomp_mutex_lock (&register_lock);
1875 /* Load image to all initialized devices. */
1876 for (i = 0; i < num_devices; i++)
1878 struct gomp_device_descr *devicep = &devices[i];
1879 gomp_mutex_lock (&devicep->lock);
1880 if (devicep->type == target_type
1881 && devicep->state == GOMP_DEVICE_INITIALIZED)
1882 gomp_load_image_to_device (devicep, version,
1883 host_table, target_data, true);
1884 gomp_mutex_unlock (&devicep->lock);
1887 /* Insert image to array of pending images. */
1888 offload_images
1889 = gomp_realloc_unlock (offload_images,
1890 (num_offload_images + 1)
1891 * sizeof (struct offload_image_descr));
1892 offload_images[num_offload_images].version = version;
1893 offload_images[num_offload_images].type = target_type;
1894 offload_images[num_offload_images].host_table = host_table;
1895 offload_images[num_offload_images].target_data = target_data;
1897 num_offload_images++;
1898 gomp_mutex_unlock (&register_lock);
1901 void
1902 GOMP_offload_register (const void *host_table, int target_type,
1903 const void *target_data)
1905 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1908 /* This function should be called from every offload image while unloading.
1909 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1910 the target, and TARGET_DATA needed by target plugin. */
1912 void
1913 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1914 int target_type, const void *target_data)
1916 int i;
1918 gomp_mutex_lock (&register_lock);
1920 /* Unload image from all initialized devices. */
1921 for (i = 0; i < num_devices; i++)
1923 struct gomp_device_descr *devicep = &devices[i];
1924 gomp_mutex_lock (&devicep->lock);
1925 if (devicep->type == target_type
1926 && devicep->state == GOMP_DEVICE_INITIALIZED)
1927 gomp_unload_image_from_device (devicep, version,
1928 host_table, target_data);
1929 gomp_mutex_unlock (&devicep->lock);
1932 /* Remove image from array of pending images. */
1933 for (i = 0; i < num_offload_images; i++)
1934 if (offload_images[i].target_data == target_data)
1936 offload_images[i] = offload_images[--num_offload_images];
1937 break;
1940 gomp_mutex_unlock (&register_lock);
1943 void
1944 GOMP_offload_unregister (const void *host_table, int target_type,
1945 const void *target_data)
1947 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1950 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1951 must be locked on entry, and remains locked on return. */
1953 attribute_hidden void
1954 gomp_init_device (struct gomp_device_descr *devicep)
1956 int i;
1957 if (!devicep->init_device_func (devicep->target_id))
1959 gomp_mutex_unlock (&devicep->lock);
1960 gomp_fatal ("device initialization failed");
1963 /* Load to device all images registered by the moment. */
1964 for (i = 0; i < num_offload_images; i++)
1966 struct offload_image_descr *image = &offload_images[i];
1967 if (image->type == devicep->type)
1968 gomp_load_image_to_device (devicep, image->version,
1969 image->host_table, image->target_data,
1970 false);
1973 /* Initialize OpenACC asynchronous queues. */
1974 goacc_init_asyncqueues (devicep);
1976 devicep->state = GOMP_DEVICE_INITIALIZED;
1979 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1980 must be locked on entry, and remains locked on return. */
1982 attribute_hidden bool
1983 gomp_fini_device (struct gomp_device_descr *devicep)
1985 bool ret = goacc_fini_asyncqueues (devicep);
1986 ret &= devicep->fini_device_func (devicep->target_id);
1987 devicep->state = GOMP_DEVICE_FINALIZED;
1988 return ret;
1991 attribute_hidden void
1992 gomp_unload_device (struct gomp_device_descr *devicep)
1994 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1996 unsigned i;
1998 /* Unload from device all images registered at the moment. */
1999 for (i = 0; i < num_offload_images; i++)
2001 struct offload_image_descr *image = &offload_images[i];
2002 if (image->type == devicep->type)
2003 gomp_unload_image_from_device (devicep, image->version,
2004 image->host_table,
2005 image->target_data);
2010 /* Host fallback for GOMP_target{,_ext} routines. */
2012 static void
2013 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2014 struct gomp_device_descr *devicep)
2016 struct gomp_thread old_thr, *thr = gomp_thread ();
2018 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2019 && devicep != NULL)
2020 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2021 "be used for offloading");
2023 old_thr = *thr;
2024 memset (thr, '\0', sizeof (*thr));
2025 if (gomp_places_list)
2027 thr->place = old_thr.place;
2028 thr->ts.place_partition_len = gomp_places_list_len;
2030 fn (hostaddrs);
2031 gomp_free_thread (thr);
2032 *thr = old_thr;
2035 /* Calculate alignment and size requirements of a private copy of data shared
2036 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2038 static inline void
2039 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2040 unsigned short *kinds, size_t *tgt_align,
2041 size_t *tgt_size)
2043 size_t i;
2044 for (i = 0; i < mapnum; i++)
2045 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2047 size_t align = (size_t) 1 << (kinds[i] >> 8);
2048 if (*tgt_align < align)
2049 *tgt_align = align;
2050 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2051 *tgt_size += sizes[i];
2055 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2057 static inline void
2058 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2059 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2060 size_t tgt_size)
2062 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2063 if (al)
2064 tgt += tgt_align - al;
2065 tgt_size = 0;
2066 size_t i;
2067 for (i = 0; i < mapnum; i++)
2068 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2070 size_t align = (size_t) 1 << (kinds[i] >> 8);
2071 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2072 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2073 hostaddrs[i] = tgt + tgt_size;
2074 tgt_size = tgt_size + sizes[i];
2078 /* Helper function of GOMP_target{,_ext} routines. */
2080 static void *
2081 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2082 void (*host_fn) (void *))
2084 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2085 return (void *) host_fn;
2086 else
2088 gomp_mutex_lock (&devicep->lock);
2089 if (devicep->state == GOMP_DEVICE_FINALIZED)
2091 gomp_mutex_unlock (&devicep->lock);
2092 return NULL;
2095 struct splay_tree_key_s k;
2096 k.host_start = (uintptr_t) host_fn;
2097 k.host_end = k.host_start + 1;
2098 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2099 gomp_mutex_unlock (&devicep->lock);
2100 if (tgt_fn == NULL)
2101 return NULL;
2103 return (void *) tgt_fn->tgt_offset;
2107 /* Called when encountering a target directive. If DEVICE
2108 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2109 GOMP_DEVICE_HOST_FALLBACK (or any value
2110 larger than last available hw device), use host fallback.
2111 FN is address of host code, UNUSED is part of the current ABI, but
2112 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2113 with MAPNUM entries, with addresses of the host objects,
2114 sizes of the host objects (resp. for pointer kind pointer bias
2115 and assumed sizeof (void *) size) and kinds. */
2117 void
2118 GOMP_target (int device, void (*fn) (void *), const void *unused,
2119 size_t mapnum, void **hostaddrs, size_t *sizes,
2120 unsigned char *kinds)
2122 struct gomp_device_descr *devicep = resolve_device (device);
2124 void *fn_addr;
2125 if (devicep == NULL
2126 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2127 /* All shared memory devices should use the GOMP_target_ext function. */
2128 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2129 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2130 return gomp_target_fallback (fn, hostaddrs, devicep);
2132 struct target_mem_desc *tgt_vars
2133 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2134 GOMP_MAP_VARS_TARGET);
2135 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2136 NULL);
2137 gomp_unmap_vars (tgt_vars, true);
2140 static inline unsigned int
2141 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2143 /* If we cannot run asynchronously, simply ignore nowait. */
2144 if (devicep != NULL && devicep->async_run_func == NULL)
2145 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2147 return flags;
2150 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2151 and several arguments have been added:
2152 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2153 DEPEND is array of dependencies, see GOMP_task for details.
2155 ARGS is a pointer to an array consisting of a variable number of both
2156 device-independent and device-specific arguments, which can take one two
2157 elements where the first specifies for which device it is intended, the type
2158 and optionally also the value. If the value is not present in the first
2159 one, the whole second element the actual value. The last element of the
2160 array is a single NULL. Among the device independent can be for example
2161 NUM_TEAMS and THREAD_LIMIT.
2163 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2164 that value, or 1 if teams construct is not present, or 0, if
2165 teams construct does not have num_teams clause and so the choice is
2166 implementation defined, and -1 if it can't be determined on the host
2167 what value will GOMP_teams have on the device.
2168 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2169 body with that value, or 0, if teams construct does not have thread_limit
2170 clause or the teams construct is not present, or -1 if it can't be
2171 determined on the host what value will GOMP_teams have on the device. */
2173 void
2174 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2175 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2176 unsigned int flags, void **depend, void **args)
2178 struct gomp_device_descr *devicep = resolve_device (device);
2179 size_t tgt_align = 0, tgt_size = 0;
2180 bool fpc_done = false;
2182 flags = clear_unsupported_flags (devicep, flags);
2184 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2186 struct gomp_thread *thr = gomp_thread ();
2187 /* Create a team if we don't have any around, as nowait
2188 target tasks make sense to run asynchronously even when
2189 outside of any parallel. */
2190 if (__builtin_expect (thr->ts.team == NULL, 0))
2192 struct gomp_team *team = gomp_new_team (1);
2193 struct gomp_task *task = thr->task;
2194 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2195 team->prev_ts = thr->ts;
2196 thr->ts.team = team;
2197 thr->ts.team_id = 0;
2198 thr->ts.work_share = &team->work_shares[0];
2199 thr->ts.last_work_share = NULL;
2200 #ifdef HAVE_SYNC_BUILTINS
2201 thr->ts.single_count = 0;
2202 #endif
2203 thr->ts.static_trip = 0;
2204 thr->task = &team->implicit_task[0];
2205 gomp_init_task (thr->task, NULL, icv);
2206 if (task)
2208 thr->task = task;
2209 gomp_end_task ();
2210 free (task);
2211 thr->task = &team->implicit_task[0];
2213 else
2214 pthread_setspecific (gomp_thread_destructor, thr);
2216 if (thr->ts.team
2217 && !thr->task->final_task)
2219 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2220 sizes, kinds, flags, depend, args,
2221 GOMP_TARGET_TASK_BEFORE_MAP);
2222 return;
2226 /* If there are depend clauses, but nowait is not present
2227 (or we are in a final task), block the parent task until the
2228 dependencies are resolved and then just continue with the rest
2229 of the function as if it is a merged task. */
2230 if (depend != NULL)
2232 struct gomp_thread *thr = gomp_thread ();
2233 if (thr->task && thr->task->depend_hash)
2235 /* If we might need to wait, copy firstprivate now. */
2236 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2237 &tgt_align, &tgt_size);
2238 if (tgt_align)
2240 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2241 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2242 tgt_align, tgt_size);
2244 fpc_done = true;
2245 gomp_task_maybe_wait_for_dependencies (depend);
2249 void *fn_addr;
2250 if (devicep == NULL
2251 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2252 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2253 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2255 if (!fpc_done)
2257 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2258 &tgt_align, &tgt_size);
2259 if (tgt_align)
2261 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2262 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2263 tgt_align, tgt_size);
2266 gomp_target_fallback (fn, hostaddrs, devicep);
2267 return;
2270 struct target_mem_desc *tgt_vars;
2271 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2273 if (!fpc_done)
2275 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2276 &tgt_align, &tgt_size);
2277 if (tgt_align)
2279 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2280 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2281 tgt_align, tgt_size);
2284 tgt_vars = NULL;
2286 else
2287 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2288 true, GOMP_MAP_VARS_TARGET);
2289 devicep->run_func (devicep->target_id, fn_addr,
2290 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2291 args);
2292 if (tgt_vars)
2293 gomp_unmap_vars (tgt_vars, true);
2296 /* Host fallback for GOMP_target_data{,_ext} routines. */
2298 static void
2299 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2301 struct gomp_task_icv *icv = gomp_icv (false);
2303 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2304 && devicep != NULL)
2305 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2306 "be used for offloading");
2308 if (icv->target_data)
2310 /* Even when doing a host fallback, if there are any active
2311 #pragma omp target data constructs, need to remember the
2312 new #pragma omp target data, otherwise GOMP_target_end_data
2313 would get out of sync. */
2314 struct target_mem_desc *tgt
2315 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2316 GOMP_MAP_VARS_DATA);
2317 tgt->prev = icv->target_data;
2318 icv->target_data = tgt;
2322 void
2323 GOMP_target_data (int device, const void *unused, size_t mapnum,
2324 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2326 struct gomp_device_descr *devicep = resolve_device (device);
2328 if (devicep == NULL
2329 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2330 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2331 return gomp_target_data_fallback (devicep);
2333 struct target_mem_desc *tgt
2334 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2335 GOMP_MAP_VARS_DATA);
2336 struct gomp_task_icv *icv = gomp_icv (true);
2337 tgt->prev = icv->target_data;
2338 icv->target_data = tgt;
2341 void
2342 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2343 size_t *sizes, unsigned short *kinds)
2345 struct gomp_device_descr *devicep = resolve_device (device);
2347 if (devicep == NULL
2348 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2349 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2350 return gomp_target_data_fallback (devicep);
2352 struct target_mem_desc *tgt
2353 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2354 GOMP_MAP_VARS_DATA);
2355 struct gomp_task_icv *icv = gomp_icv (true);
2356 tgt->prev = icv->target_data;
2357 icv->target_data = tgt;
2360 void
2361 GOMP_target_end_data (void)
2363 struct gomp_task_icv *icv = gomp_icv (false);
2364 if (icv->target_data)
2366 struct target_mem_desc *tgt = icv->target_data;
2367 icv->target_data = tgt->prev;
2368 gomp_unmap_vars (tgt, true);
2372 void
2373 GOMP_target_update (int device, const void *unused, size_t mapnum,
2374 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2376 struct gomp_device_descr *devicep = resolve_device (device);
2378 if (devicep == NULL
2379 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2380 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2381 return;
2383 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2386 void
2387 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2388 size_t *sizes, unsigned short *kinds,
2389 unsigned int flags, void **depend)
2391 struct gomp_device_descr *devicep = resolve_device (device);
2393 /* If there are depend clauses, but nowait is not present,
2394 block the parent task until the dependencies are resolved
2395 and then just continue with the rest of the function as if it
2396 is a merged task. Until we are able to schedule task during
2397 variable mapping or unmapping, ignore nowait if depend clauses
2398 are not present. */
2399 if (depend != NULL)
2401 struct gomp_thread *thr = gomp_thread ();
2402 if (thr->task && thr->task->depend_hash)
2404 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2405 && thr->ts.team
2406 && !thr->task->final_task)
2408 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2409 mapnum, hostaddrs, sizes, kinds,
2410 flags | GOMP_TARGET_FLAG_UPDATE,
2411 depend, NULL, GOMP_TARGET_TASK_DATA))
2412 return;
2414 else
2416 struct gomp_team *team = thr->ts.team;
2417 /* If parallel or taskgroup has been cancelled, don't start new
2418 tasks. */
2419 if (__builtin_expect (gomp_cancel_var, 0) && team)
2421 if (gomp_team_barrier_cancelled (&team->barrier))
2422 return;
2423 if (thr->task->taskgroup)
2425 if (thr->task->taskgroup->cancelled)
2426 return;
2427 if (thr->task->taskgroup->workshare
2428 && thr->task->taskgroup->prev
2429 && thr->task->taskgroup->prev->cancelled)
2430 return;
2434 gomp_task_maybe_wait_for_dependencies (depend);
2439 if (devicep == NULL
2440 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2441 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2442 return;
2444 struct gomp_thread *thr = gomp_thread ();
2445 struct gomp_team *team = thr->ts.team;
2446 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2447 if (__builtin_expect (gomp_cancel_var, 0) && team)
2449 if (gomp_team_barrier_cancelled (&team->barrier))
2450 return;
2451 if (thr->task->taskgroup)
2453 if (thr->task->taskgroup->cancelled)
2454 return;
2455 if (thr->task->taskgroup->workshare
2456 && thr->task->taskgroup->prev
2457 && thr->task->taskgroup->prev->cancelled)
2458 return;
2462 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2465 static void
2466 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2467 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2469 const int typemask = 0xff;
2470 size_t i;
2471 gomp_mutex_lock (&devicep->lock);
2472 if (devicep->state == GOMP_DEVICE_FINALIZED)
2474 gomp_mutex_unlock (&devicep->lock);
2475 return;
2478 for (i = 0; i < mapnum; i++)
2480 struct splay_tree_key_s cur_node;
2481 unsigned char kind = kinds[i] & typemask;
2482 switch (kind)
2484 case GOMP_MAP_FROM:
2485 case GOMP_MAP_ALWAYS_FROM:
2486 case GOMP_MAP_DELETE:
2487 case GOMP_MAP_RELEASE:
2488 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2489 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2490 cur_node.host_start = (uintptr_t) hostaddrs[i];
2491 cur_node.host_end = cur_node.host_start + sizes[i];
2492 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2493 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2494 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2495 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2496 if (!k)
2497 continue;
2499 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2500 k->refcount--;
2501 if ((kind == GOMP_MAP_DELETE
2502 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2503 && k->refcount != REFCOUNT_INFINITY)
2504 k->refcount = 0;
2506 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2507 || kind == GOMP_MAP_ALWAYS_FROM)
2508 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2509 (void *) (k->tgt->tgt_start + k->tgt_offset
2510 + cur_node.host_start
2511 - k->host_start),
2512 cur_node.host_end - cur_node.host_start);
2513 if (k->refcount == 0)
2514 gomp_remove_var (devicep, k);
2516 break;
2517 default:
2518 gomp_mutex_unlock (&devicep->lock);
2519 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2520 kind);
2524 gomp_mutex_unlock (&devicep->lock);
2527 void
2528 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2529 size_t *sizes, unsigned short *kinds,
2530 unsigned int flags, void **depend)
2532 struct gomp_device_descr *devicep = resolve_device (device);
2534 /* If there are depend clauses, but nowait is not present,
2535 block the parent task until the dependencies are resolved
2536 and then just continue with the rest of the function as if it
2537 is a merged task. Until we are able to schedule task during
2538 variable mapping or unmapping, ignore nowait if depend clauses
2539 are not present. */
2540 if (depend != NULL)
2542 struct gomp_thread *thr = gomp_thread ();
2543 if (thr->task && thr->task->depend_hash)
2545 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2546 && thr->ts.team
2547 && !thr->task->final_task)
2549 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2550 mapnum, hostaddrs, sizes, kinds,
2551 flags, depend, NULL,
2552 GOMP_TARGET_TASK_DATA))
2553 return;
2555 else
2557 struct gomp_team *team = thr->ts.team;
2558 /* If parallel or taskgroup has been cancelled, don't start new
2559 tasks. */
2560 if (__builtin_expect (gomp_cancel_var, 0) && team)
2562 if (gomp_team_barrier_cancelled (&team->barrier))
2563 return;
2564 if (thr->task->taskgroup)
2566 if (thr->task->taskgroup->cancelled)
2567 return;
2568 if (thr->task->taskgroup->workshare
2569 && thr->task->taskgroup->prev
2570 && thr->task->taskgroup->prev->cancelled)
2571 return;
2575 gomp_task_maybe_wait_for_dependencies (depend);
2580 if (devicep == NULL
2581 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2582 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2583 return;
2585 struct gomp_thread *thr = gomp_thread ();
2586 struct gomp_team *team = thr->ts.team;
2587 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2588 if (__builtin_expect (gomp_cancel_var, 0) && team)
2590 if (gomp_team_barrier_cancelled (&team->barrier))
2591 return;
2592 if (thr->task->taskgroup)
2594 if (thr->task->taskgroup->cancelled)
2595 return;
2596 if (thr->task->taskgroup->workshare
2597 && thr->task->taskgroup->prev
2598 && thr->task->taskgroup->prev->cancelled)
2599 return;
2603 /* The variables are mapped separately such that they can be released
2604 independently. */
2605 size_t i, j;
2606 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2607 for (i = 0; i < mapnum; i++)
2608 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2610 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2611 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2612 i += sizes[i];
2614 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2616 for (j = i + 1; j < mapnum; j++)
2617 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
2618 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
2619 break;
2620 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2621 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2622 i += j - i - 1;
2624 else
2625 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2626 true, GOMP_MAP_VARS_ENTER_DATA);
2627 else
2628 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2631 bool
2632 gomp_target_task_fn (void *data)
2634 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2635 struct gomp_device_descr *devicep = ttask->devicep;
2637 if (ttask->fn != NULL)
2639 void *fn_addr;
2640 if (devicep == NULL
2641 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2642 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2643 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2645 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2646 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
2647 return false;
2650 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2652 if (ttask->tgt)
2653 gomp_unmap_vars (ttask->tgt, true);
2654 return false;
2657 void *actual_arguments;
2658 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2660 ttask->tgt = NULL;
2661 actual_arguments = ttask->hostaddrs;
2663 else
2665 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2666 NULL, ttask->sizes, ttask->kinds, true,
2667 GOMP_MAP_VARS_TARGET);
2668 actual_arguments = (void *) ttask->tgt->tgt_start;
2670 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2672 assert (devicep->async_run_func);
2673 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2674 ttask->args, (void *) ttask);
2675 return true;
2677 else if (devicep == NULL
2678 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2679 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2680 return false;
2682 size_t i;
2683 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2684 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2685 ttask->kinds, true);
2686 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2687 for (i = 0; i < ttask->mapnum; i++)
2688 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2690 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2691 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2692 GOMP_MAP_VARS_ENTER_DATA);
2693 i += ttask->sizes[i];
2695 else
2696 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2697 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2698 else
2699 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2700 ttask->kinds);
2701 return false;
2704 void
2705 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2707 if (thread_limit)
2709 struct gomp_task_icv *icv = gomp_icv (true);
2710 icv->thread_limit_var
2711 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2713 (void) num_teams;
2716 void *
2717 omp_target_alloc (size_t size, int device_num)
2719 if (device_num == gomp_get_num_devices ())
2720 return malloc (size);
2722 if (device_num < 0)
2723 return NULL;
2725 struct gomp_device_descr *devicep = resolve_device (device_num);
2726 if (devicep == NULL)
2727 return NULL;
2729 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2730 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2731 return malloc (size);
2733 gomp_mutex_lock (&devicep->lock);
2734 void *ret = devicep->alloc_func (devicep->target_id, size);
2735 gomp_mutex_unlock (&devicep->lock);
2736 return ret;
2739 void
2740 omp_target_free (void *device_ptr, int device_num)
2742 if (device_ptr == NULL)
2743 return;
2745 if (device_num == gomp_get_num_devices ())
2747 free (device_ptr);
2748 return;
2751 if (device_num < 0)
2752 return;
2754 struct gomp_device_descr *devicep = resolve_device (device_num);
2755 if (devicep == NULL)
2756 return;
2758 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2759 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2761 free (device_ptr);
2762 return;
2765 gomp_mutex_lock (&devicep->lock);
2766 gomp_free_device_memory (devicep, device_ptr);
2767 gomp_mutex_unlock (&devicep->lock);
2771 omp_target_is_present (const void *ptr, int device_num)
2773 if (ptr == NULL)
2774 return 1;
2776 if (device_num == gomp_get_num_devices ())
2777 return 1;
2779 if (device_num < 0)
2780 return 0;
2782 struct gomp_device_descr *devicep = resolve_device (device_num);
2783 if (devicep == NULL)
2784 return 0;
2786 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2787 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2788 return 1;
2790 gomp_mutex_lock (&devicep->lock);
2791 struct splay_tree_s *mem_map = &devicep->mem_map;
2792 struct splay_tree_key_s cur_node;
2794 cur_node.host_start = (uintptr_t) ptr;
2795 cur_node.host_end = cur_node.host_start;
2796 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2797 int ret = n != NULL;
2798 gomp_mutex_unlock (&devicep->lock);
2799 return ret;
2803 omp_target_memcpy (void *dst, const void *src, size_t length,
2804 size_t dst_offset, size_t src_offset, int dst_device_num,
2805 int src_device_num)
2807 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2808 bool ret;
2810 if (dst_device_num != gomp_get_num_devices ())
2812 if (dst_device_num < 0)
2813 return EINVAL;
2815 dst_devicep = resolve_device (dst_device_num);
2816 if (dst_devicep == NULL)
2817 return EINVAL;
2819 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2820 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2821 dst_devicep = NULL;
2823 if (src_device_num != num_devices_openmp)
2825 if (src_device_num < 0)
2826 return EINVAL;
2828 src_devicep = resolve_device (src_device_num);
2829 if (src_devicep == NULL)
2830 return EINVAL;
2832 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2833 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2834 src_devicep = NULL;
2836 if (src_devicep == NULL && dst_devicep == NULL)
2838 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2839 return 0;
2841 if (src_devicep == NULL)
2843 gomp_mutex_lock (&dst_devicep->lock);
2844 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2845 (char *) dst + dst_offset,
2846 (char *) src + src_offset, length);
2847 gomp_mutex_unlock (&dst_devicep->lock);
2848 return (ret ? 0 : EINVAL);
2850 if (dst_devicep == NULL)
2852 gomp_mutex_lock (&src_devicep->lock);
2853 ret = src_devicep->dev2host_func (src_devicep->target_id,
2854 (char *) dst + dst_offset,
2855 (char *) src + src_offset, length);
2856 gomp_mutex_unlock (&src_devicep->lock);
2857 return (ret ? 0 : EINVAL);
2859 if (src_devicep == dst_devicep)
2861 gomp_mutex_lock (&src_devicep->lock);
2862 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2863 (char *) dst + dst_offset,
2864 (char *) src + src_offset, length);
2865 gomp_mutex_unlock (&src_devicep->lock);
2866 return (ret ? 0 : EINVAL);
2868 return EINVAL;
2871 static int
2872 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2873 int num_dims, const size_t *volume,
2874 const size_t *dst_offsets,
2875 const size_t *src_offsets,
2876 const size_t *dst_dimensions,
2877 const size_t *src_dimensions,
2878 struct gomp_device_descr *dst_devicep,
2879 struct gomp_device_descr *src_devicep)
2881 size_t dst_slice = element_size;
2882 size_t src_slice = element_size;
2883 size_t j, dst_off, src_off, length;
2884 int i, ret;
2886 if (num_dims == 1)
2888 if (__builtin_mul_overflow (element_size, volume[0], &length)
2889 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2890 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2891 return EINVAL;
2892 if (dst_devicep == NULL && src_devicep == NULL)
2894 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2895 length);
2896 ret = 1;
2898 else if (src_devicep == NULL)
2899 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2900 (char *) dst + dst_off,
2901 (const char *) src + src_off,
2902 length);
2903 else if (dst_devicep == NULL)
2904 ret = src_devicep->dev2host_func (src_devicep->target_id,
2905 (char *) dst + dst_off,
2906 (const char *) src + src_off,
2907 length);
2908 else if (src_devicep == dst_devicep)
2909 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2910 (char *) dst + dst_off,
2911 (const char *) src + src_off,
2912 length);
2913 else
2914 ret = 0;
2915 return ret ? 0 : EINVAL;
2918 /* FIXME: it would be nice to have some plugin function to handle
2919 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2920 be handled in the generic recursion below, and for host-host it
2921 should be used even for any num_dims >= 2. */
2923 for (i = 1; i < num_dims; i++)
2924 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2925 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2926 return EINVAL;
2927 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2928 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2929 return EINVAL;
2930 for (j = 0; j < volume[0]; j++)
2932 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2933 (const char *) src + src_off,
2934 element_size, num_dims - 1,
2935 volume + 1, dst_offsets + 1,
2936 src_offsets + 1, dst_dimensions + 1,
2937 src_dimensions + 1, dst_devicep,
2938 src_devicep);
2939 if (ret)
2940 return ret;
2941 dst_off += dst_slice;
2942 src_off += src_slice;
2944 return 0;
2948 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2949 int num_dims, const size_t *volume,
2950 const size_t *dst_offsets,
2951 const size_t *src_offsets,
2952 const size_t *dst_dimensions,
2953 const size_t *src_dimensions,
2954 int dst_device_num, int src_device_num)
2956 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2958 if (!dst && !src)
2959 return INT_MAX;
2961 if (dst_device_num != gomp_get_num_devices ())
2963 if (dst_device_num < 0)
2964 return EINVAL;
2966 dst_devicep = resolve_device (dst_device_num);
2967 if (dst_devicep == NULL)
2968 return EINVAL;
2970 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2971 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2972 dst_devicep = NULL;
2974 if (src_device_num != num_devices_openmp)
2976 if (src_device_num < 0)
2977 return EINVAL;
2979 src_devicep = resolve_device (src_device_num);
2980 if (src_devicep == NULL)
2981 return EINVAL;
2983 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2984 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2985 src_devicep = NULL;
2988 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2989 return EINVAL;
2991 if (src_devicep)
2992 gomp_mutex_lock (&src_devicep->lock);
2993 else if (dst_devicep)
2994 gomp_mutex_lock (&dst_devicep->lock);
2995 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2996 volume, dst_offsets, src_offsets,
2997 dst_dimensions, src_dimensions,
2998 dst_devicep, src_devicep);
2999 if (src_devicep)
3000 gomp_mutex_unlock (&src_devicep->lock);
3001 else if (dst_devicep)
3002 gomp_mutex_unlock (&dst_devicep->lock);
3003 return ret;
3007 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3008 size_t size, size_t device_offset, int device_num)
3010 if (device_num == gomp_get_num_devices ())
3011 return EINVAL;
3013 if (device_num < 0)
3014 return EINVAL;
3016 struct gomp_device_descr *devicep = resolve_device (device_num);
3017 if (devicep == NULL)
3018 return EINVAL;
3020 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3021 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3022 return EINVAL;
3024 gomp_mutex_lock (&devicep->lock);
3026 struct splay_tree_s *mem_map = &devicep->mem_map;
3027 struct splay_tree_key_s cur_node;
3028 int ret = EINVAL;
3030 cur_node.host_start = (uintptr_t) host_ptr;
3031 cur_node.host_end = cur_node.host_start + size;
3032 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3033 if (n)
3035 if (n->tgt->tgt_start + n->tgt_offset
3036 == (uintptr_t) device_ptr + device_offset
3037 && n->host_start <= cur_node.host_start
3038 && n->host_end >= cur_node.host_end)
3039 ret = 0;
3041 else
3043 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3044 tgt->array = gomp_malloc (sizeof (*tgt->array));
3045 tgt->refcount = 1;
3046 tgt->tgt_start = 0;
3047 tgt->tgt_end = 0;
3048 tgt->to_free = NULL;
3049 tgt->prev = NULL;
3050 tgt->list_count = 0;
3051 tgt->device_descr = devicep;
3052 splay_tree_node array = tgt->array;
3053 splay_tree_key k = &array->key;
3054 k->host_start = cur_node.host_start;
3055 k->host_end = cur_node.host_end;
3056 k->tgt = tgt;
3057 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3058 k->refcount = REFCOUNT_INFINITY;
3059 k->dynamic_refcount = 0;
3060 k->aux = NULL;
3061 array->left = NULL;
3062 array->right = NULL;
3063 splay_tree_insert (&devicep->mem_map, array);
3064 ret = 0;
3066 gomp_mutex_unlock (&devicep->lock);
3067 return ret;
3071 omp_target_disassociate_ptr (const void *ptr, int device_num)
3073 if (device_num == gomp_get_num_devices ())
3074 return EINVAL;
3076 if (device_num < 0)
3077 return EINVAL;
3079 struct gomp_device_descr *devicep = resolve_device (device_num);
3080 if (devicep == NULL)
3081 return EINVAL;
3083 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3084 return EINVAL;
3086 gomp_mutex_lock (&devicep->lock);
3088 struct splay_tree_s *mem_map = &devicep->mem_map;
3089 struct splay_tree_key_s cur_node;
3090 int ret = EINVAL;
3092 cur_node.host_start = (uintptr_t) ptr;
3093 cur_node.host_end = cur_node.host_start;
3094 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3095 if (n
3096 && n->host_start == cur_node.host_start
3097 && n->refcount == REFCOUNT_INFINITY
3098 && n->tgt->tgt_start == 0
3099 && n->tgt->to_free == NULL
3100 && n->tgt->refcount == 1
3101 && n->tgt->list_count == 0)
3103 splay_tree_remove (&devicep->mem_map, n);
3104 gomp_unmap_tgt (n->tgt);
3105 ret = 0;
3108 gomp_mutex_unlock (&devicep->lock);
3109 return ret;
3113 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3115 (void) kind;
3116 if (device_num == gomp_get_num_devices ())
3117 return gomp_pause_host ();
3118 if (device_num < 0 || device_num >= num_devices_openmp)
3119 return -1;
3120 /* Do nothing for target devices for now. */
3121 return 0;
3125 omp_pause_resource_all (omp_pause_resource_t kind)
3127 (void) kind;
3128 if (gomp_pause_host ())
3129 return -1;
3130 /* Do nothing for target devices for now. */
3131 return 0;
3134 ialias (omp_pause_resource)
3135 ialias (omp_pause_resource_all)
3137 #ifdef PLUGIN_SUPPORT
3139 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3140 in PLUGIN_NAME.
3141 The handles of the found functions are stored in the corresponding fields
3142 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3144 static bool
3145 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3146 const char *plugin_name)
3148 const char *err = NULL, *last_missing = NULL;
3150 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3151 if (!plugin_handle)
3152 goto dl_fail;
3154 /* Check if all required functions are available in the plugin and store
3155 their handlers. None of the symbols can legitimately be NULL,
3156 so we don't need to check dlerror all the time. */
3157 #define DLSYM(f) \
3158 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3159 goto dl_fail
3160 /* Similar, but missing functions are not an error. Return false if
3161 failed, true otherwise. */
3162 #define DLSYM_OPT(f, n) \
3163 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3164 || (last_missing = #n, 0))
3166 DLSYM (version);
3167 if (device->version_func () != GOMP_VERSION)
3169 err = "plugin version mismatch";
3170 goto fail;
3173 DLSYM (get_name);
3174 DLSYM (get_caps);
3175 DLSYM (get_type);
3176 DLSYM (get_num_devices);
3177 DLSYM (init_device);
3178 DLSYM (fini_device);
3179 DLSYM (load_image);
3180 DLSYM (unload_image);
3181 DLSYM (alloc);
3182 DLSYM (free);
3183 DLSYM (dev2host);
3184 DLSYM (host2dev);
3185 device->capabilities = device->get_caps_func ();
3186 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3188 DLSYM (run);
3189 DLSYM_OPT (async_run, async_run);
3190 DLSYM_OPT (can_run, can_run);
3191 DLSYM (dev2dev);
3193 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3195 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3196 || !DLSYM_OPT (openacc.create_thread_data,
3197 openacc_create_thread_data)
3198 || !DLSYM_OPT (openacc.destroy_thread_data,
3199 openacc_destroy_thread_data)
3200 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3201 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3202 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3203 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3204 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3205 || !DLSYM_OPT (openacc.async.queue_callback,
3206 openacc_async_queue_callback)
3207 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3208 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3209 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3210 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3212 /* Require all the OpenACC handlers if we have
3213 GOMP_OFFLOAD_CAP_OPENACC_200. */
3214 err = "plugin missing OpenACC handler function";
3215 goto fail;
3218 unsigned cuda = 0;
3219 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3220 openacc_cuda_get_current_device);
3221 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3222 openacc_cuda_get_current_context);
3223 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3224 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3225 if (cuda && cuda != 4)
3227 /* Make sure all the CUDA functions are there if any of them are. */
3228 err = "plugin missing OpenACC CUDA handler function";
3229 goto fail;
3232 #undef DLSYM
3233 #undef DLSYM_OPT
3235 return 1;
3237 dl_fail:
3238 err = dlerror ();
3239 fail:
3240 gomp_error ("while loading %s: %s", plugin_name, err);
3241 if (last_missing)
3242 gomp_error ("missing function was %s", last_missing);
3243 if (plugin_handle)
3244 dlclose (plugin_handle);
3246 return 0;
3249 /* This function finalizes all initialized devices. */
3251 static void
3252 gomp_target_fini (void)
3254 int i;
3255 for (i = 0; i < num_devices; i++)
3257 bool ret = true;
3258 struct gomp_device_descr *devicep = &devices[i];
3259 gomp_mutex_lock (&devicep->lock);
3260 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3261 ret = gomp_fini_device (devicep);
3262 gomp_mutex_unlock (&devicep->lock);
3263 if (!ret)
3264 gomp_fatal ("device finalization failed");
3268 /* This function initializes the runtime for offloading.
3269 It parses the list of offload plugins, and tries to load these.
3270 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3271 will be set, and the array DEVICES initialized, containing descriptors for
3272 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3273 by the others. */
3275 static void
3276 gomp_target_init (void)
3278 const char *prefix ="libgomp-plugin-";
3279 const char *suffix = SONAME_SUFFIX (1);
3280 const char *cur, *next;
3281 char *plugin_name;
3282 int i, new_num_devs;
3283 int num_devs = 0, num_devs_openmp;
3284 struct gomp_device_descr *devs = NULL;
3286 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3287 return;
3289 cur = OFFLOAD_PLUGINS;
3290 if (*cur)
3293 struct gomp_device_descr current_device;
3294 size_t prefix_len, suffix_len, cur_len;
3296 next = strchr (cur, ',');
3298 prefix_len = strlen (prefix);
3299 cur_len = next ? next - cur : strlen (cur);
3300 suffix_len = strlen (suffix);
3302 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3303 if (!plugin_name)
3305 num_devs = 0;
3306 break;
3309 memcpy (plugin_name, prefix, prefix_len);
3310 memcpy (plugin_name + prefix_len, cur, cur_len);
3311 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3313 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3315 new_num_devs = current_device.get_num_devices_func ();
3316 if (new_num_devs >= 1)
3318 /* Augment DEVICES and NUM_DEVICES. */
3320 devs = realloc (devs, (num_devs + new_num_devs)
3321 * sizeof (struct gomp_device_descr));
3322 if (!devs)
3324 num_devs = 0;
3325 free (plugin_name);
3326 break;
3329 current_device.name = current_device.get_name_func ();
3330 /* current_device.capabilities has already been set. */
3331 current_device.type = current_device.get_type_func ();
3332 current_device.mem_map.root = NULL;
3333 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3334 for (i = 0; i < new_num_devs; i++)
3336 current_device.target_id = i;
3337 devs[num_devs] = current_device;
3338 gomp_mutex_init (&devs[num_devs].lock);
3339 num_devs++;
3344 free (plugin_name);
3345 cur = next + 1;
3347 while (next);
3349 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3350 NUM_DEVICES_OPENMP. */
3351 struct gomp_device_descr *devs_s
3352 = malloc (num_devs * sizeof (struct gomp_device_descr));
3353 if (!devs_s)
3355 num_devs = 0;
3356 free (devs);
3357 devs = NULL;
3359 num_devs_openmp = 0;
3360 for (i = 0; i < num_devs; i++)
3361 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3362 devs_s[num_devs_openmp++] = devs[i];
3363 int num_devs_after_openmp = num_devs_openmp;
3364 for (i = 0; i < num_devs; i++)
3365 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3366 devs_s[num_devs_after_openmp++] = devs[i];
3367 free (devs);
3368 devs = devs_s;
3370 for (i = 0; i < num_devs; i++)
3372 /* The 'devices' array can be moved (by the realloc call) until we have
3373 found all the plugins, so registering with the OpenACC runtime (which
3374 takes a copy of the pointer argument) must be delayed until now. */
3375 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3376 goacc_register (&devs[i]);
3379 num_devices = num_devs;
3380 num_devices_openmp = num_devs_openmp;
3381 devices = devs;
3382 if (atexit (gomp_target_fini) != 0)
3383 gomp_fatal ("atexit failed");
3386 #else /* PLUGIN_SUPPORT */
3387 /* If dlfcn.h is unavailable we always fallback to host execution.
3388 GOMP_target* routines are just stubs for this case. */
3389 static void
3390 gomp_target_init (void)
3393 #endif /* PLUGIN_SUPPORT */