value-range: Fix handling of POLY_INT_CST anti-ranges [PR96146]
[official-gcc.git] / libgomp / target.c
blobd6b3572c8d88e649cefe005d7e7831c897397958
1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
70 /* Total number of offload images. */
71 static int num_offload_images;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
76 /* Total number of available devices. */
77 static int num_devices;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
87 void *ret = realloc (old, size);
88 if (ret == NULL)
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
93 return ret;
96 attribute_hidden void
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
102 attribute_hidden int
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp;
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
112 if (device_id == GOMP_DEVICE_ICV)
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 return NULL;
121 gomp_mutex_lock (&devices[device_id].lock);
122 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123 gomp_init_device (&devices[device_id]);
124 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
126 gomp_mutex_unlock (&devices[device_id].lock);
127 return NULL;
129 gomp_mutex_unlock (&devices[device_id].lock);
131 return &devices[device_id];
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
138 if (key->host_start != key->host_end)
139 return splay_tree_lookup (mem_map, key);
141 key->host_end++;
142 splay_tree_key n = splay_tree_lookup (mem_map, key);
143 key->host_end--;
144 if (n)
145 return n;
146 key->host_start--;
147 n = splay_tree_lookup (mem_map, key);
148 key->host_start++;
149 if (n)
150 return n;
151 return splay_tree_lookup (mem_map, key);
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
157 if (key->host_start != key->host_end)
158 return splay_tree_lookup (mem_map, key);
160 key->host_end++;
161 splay_tree_key n = splay_tree_lookup (mem_map, key);
162 key->host_end--;
163 return n;
166 static inline void
167 gomp_device_copy (struct gomp_device_descr *devicep,
168 bool (*copy_func) (int, void *, const void *, size_t),
169 const char *dst, void *dstaddr,
170 const char *src, const void *srcaddr,
171 size_t size)
173 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
175 gomp_mutex_unlock (&devicep->lock);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
181 static inline void
182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 bool (*copy_func) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue *),
185 const char *dst, void *dstaddr,
186 const char *src, const void *srcaddr,
187 size_t size, struct goacc_asyncqueue *aq)
189 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
191 gomp_mutex_unlock (&devicep->lock);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
200 struct gomp_coalesce_chunk
202 /* The starting and ending point of a coalesced chunk of memory. */
203 size_t start, end;
206 struct gomp_coalesce_buf
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
210 void *buf;
211 struct target_mem_desc *tgt;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk *chunks;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 be performed. */
218 long chunk_cnt;
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
222 long use_cnt;
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
237 /* Add region with device tgt_start relative offset and length to CBUF. */
239 static inline void
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
242 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243 return;
244 if (cbuf->chunk_cnt)
246 if (cbuf->chunk_cnt < 0)
247 return;
248 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
250 cbuf->chunk_cnt = -1;
251 return;
253 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
255 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 cbuf->use_cnt++;
257 return;
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf->use_cnt == 1)
263 cbuf->chunk_cnt--;
265 cbuf->chunks[cbuf->chunk_cnt].start = start;
266 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267 cbuf->chunk_cnt++;
268 cbuf->use_cnt = 1;
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
274 static inline bool
275 gomp_to_device_kind_p (int kind)
277 switch (kind)
279 case GOMP_MAP_ALLOC:
280 case GOMP_MAP_FROM:
281 case GOMP_MAP_FORCE_ALLOC:
282 case GOMP_MAP_FORCE_FROM:
283 case GOMP_MAP_ALWAYS_FROM:
284 return false;
285 default:
286 return true;
290 attribute_hidden void
291 gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 struct goacc_asyncqueue *aq,
293 void *d, const void *h, size_t sz,
294 struct gomp_coalesce_buf *cbuf)
296 if (cbuf)
298 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
301 long first = 0;
302 long last = cbuf->chunk_cnt - 1;
303 while (first <= last)
305 long middle = (first + last) >> 1;
306 if (cbuf->chunks[middle].end <= doff)
307 first = middle + 1;
308 else if (cbuf->chunks[middle].start <= doff)
310 if (doff + sz > cbuf->chunks[middle].end)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 h, sz);
314 return;
316 else
317 last = middle - 1;
321 if (__builtin_expect (aq != NULL, 0))
322 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 "dev", d, "host", h, sz, aq);
324 else
325 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
328 attribute_hidden void
329 gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 struct goacc_asyncqueue *aq,
331 void *h, const void *d, size_t sz)
333 if (__builtin_expect (aq != NULL, 0))
334 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 "host", h, "dev", d, sz, aq);
336 else
337 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
340 static void
341 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
343 if (!devicep->free_func (devicep->target_id, devptr))
345 gomp_mutex_unlock (&devicep->lock);
346 gomp_fatal ("error in freeing device memory block at %p", devptr);
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
354 static inline void
355 gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 splay_tree_key newn, struct target_var_desc *tgt_var,
358 unsigned char kind, struct gomp_coalesce_buf *cbuf)
360 assert (kind != GOMP_MAP_ATTACH);
362 tgt_var->key = oldn;
363 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
364 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
365 tgt_var->do_detach = false;
366 tgt_var->offset = newn->host_start - oldn->host_start;
367 tgt_var->length = newn->host_end - newn->host_start;
369 if ((kind & GOMP_MAP_FLAG_FORCE)
370 || oldn->host_start > newn->host_start
371 || oldn->host_end < newn->host_end)
373 gomp_mutex_unlock (&devicep->lock);
374 gomp_fatal ("Trying to map into device [%p..%p) object when "
375 "[%p..%p) is already mapped",
376 (void *) newn->host_start, (void *) newn->host_end,
377 (void *) oldn->host_start, (void *) oldn->host_end);
380 if (GOMP_MAP_ALWAYS_TO_P (kind))
381 gomp_copy_host2dev (devicep, aq,
382 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
383 + newn->host_start - oldn->host_start),
384 (void *) newn->host_start,
385 newn->host_end - newn->host_start, cbuf);
387 if (oldn->refcount != REFCOUNT_INFINITY)
388 oldn->refcount++;
391 static int
392 get_kind (bool short_mapkind, void *kinds, int idx)
394 return short_mapkind ? ((unsigned short *) kinds)[idx]
395 : ((unsigned char *) kinds)[idx];
398 static void
399 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
400 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
401 struct gomp_coalesce_buf *cbuf)
403 struct gomp_device_descr *devicep = tgt->device_descr;
404 struct splay_tree_s *mem_map = &devicep->mem_map;
405 struct splay_tree_key_s cur_node;
407 cur_node.host_start = host_ptr;
408 if (cur_node.host_start == (uintptr_t) NULL)
410 cur_node.tgt_offset = (uintptr_t) NULL;
411 gomp_copy_host2dev (devicep, aq,
412 (void *) (tgt->tgt_start + target_offset),
413 (void *) &cur_node.tgt_offset,
414 sizeof (void *), cbuf);
415 return;
417 /* Add bias to the pointer value. */
418 cur_node.host_start += bias;
419 cur_node.host_end = cur_node.host_start;
420 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
421 if (n == NULL)
423 gomp_mutex_unlock (&devicep->lock);
424 gomp_fatal ("Pointer target of array section wasn't mapped");
426 cur_node.host_start -= n->host_start;
427 cur_node.tgt_offset
428 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
429 /* At this point tgt_offset is target address of the
430 array section. Now subtract bias to get what we want
431 to initialize the pointer with. */
432 cur_node.tgt_offset -= bias;
433 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
434 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
437 static void
438 gomp_map_fields_existing (struct target_mem_desc *tgt,
439 struct goacc_asyncqueue *aq, splay_tree_key n,
440 size_t first, size_t i, void **hostaddrs,
441 size_t *sizes, void *kinds,
442 struct gomp_coalesce_buf *cbuf)
444 struct gomp_device_descr *devicep = tgt->device_descr;
445 struct splay_tree_s *mem_map = &devicep->mem_map;
446 struct splay_tree_key_s cur_node;
447 int kind;
448 const bool short_mapkind = true;
449 const int typemask = short_mapkind ? 0xff : 0x7;
451 cur_node.host_start = (uintptr_t) hostaddrs[i];
452 cur_node.host_end = cur_node.host_start + sizes[i];
453 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
454 kind = get_kind (short_mapkind, kinds, i);
455 if (n2
456 && n2->tgt == n->tgt
457 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
459 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
460 &tgt->list[i], kind & typemask, cbuf);
461 return;
463 if (sizes[i] == 0)
465 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
467 cur_node.host_start--;
468 n2 = splay_tree_lookup (mem_map, &cur_node);
469 cur_node.host_start++;
470 if (n2
471 && n2->tgt == n->tgt
472 && n2->host_start - n->host_start
473 == n2->tgt_offset - n->tgt_offset)
475 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
476 &tgt->list[i], kind & typemask, cbuf);
477 return;
480 cur_node.host_end++;
481 n2 = splay_tree_lookup (mem_map, &cur_node);
482 cur_node.host_end--;
483 if (n2
484 && n2->tgt == n->tgt
485 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
487 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
488 kind & typemask, cbuf);
489 return;
492 gomp_mutex_unlock (&devicep->lock);
493 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
494 "other mapped elements from the same structure weren't mapped "
495 "together with it", (void *) cur_node.host_start,
496 (void *) cur_node.host_end);
499 attribute_hidden void
500 gomp_attach_pointer (struct gomp_device_descr *devicep,
501 struct goacc_asyncqueue *aq, splay_tree mem_map,
502 splay_tree_key n, uintptr_t attach_to, size_t bias,
503 struct gomp_coalesce_buf *cbufp)
505 struct splay_tree_key_s s;
506 size_t size, idx;
508 if (n == NULL)
510 gomp_mutex_unlock (&devicep->lock);
511 gomp_fatal ("enclosing struct not mapped for attach");
514 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
515 /* We might have a pointer in a packed struct: however we cannot have more
516 than one such pointer in each pointer-sized portion of the struct, so
517 this is safe. */
518 idx = (attach_to - n->host_start) / sizeof (void *);
520 if (!n->aux)
521 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
523 if (!n->aux->attach_count)
524 n->aux->attach_count
525 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
527 if (n->aux->attach_count[idx] < UINTPTR_MAX)
528 n->aux->attach_count[idx]++;
529 else
531 gomp_mutex_unlock (&devicep->lock);
532 gomp_fatal ("attach count overflow");
535 if (n->aux->attach_count[idx] == 1)
537 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
538 - n->host_start;
539 uintptr_t target = (uintptr_t) *(void **) attach_to;
540 splay_tree_key tn;
541 uintptr_t data;
543 if ((void *) target == NULL)
545 gomp_mutex_unlock (&devicep->lock);
546 gomp_fatal ("attempt to attach null pointer");
549 s.host_start = target + bias;
550 s.host_end = s.host_start + 1;
551 tn = splay_tree_lookup (mem_map, &s);
553 if (!tn)
555 gomp_mutex_unlock (&devicep->lock);
556 gomp_fatal ("pointer target not mapped for attach");
559 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
561 gomp_debug (1,
562 "%s: attaching host %p, target %p (struct base %p) to %p\n",
563 __FUNCTION__, (void *) attach_to, (void *) devptr,
564 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
566 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
567 sizeof (void *), cbufp);
569 else
570 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
571 (void *) attach_to, (int) n->aux->attach_count[idx]);
574 attribute_hidden void
575 gomp_detach_pointer (struct gomp_device_descr *devicep,
576 struct goacc_asyncqueue *aq, splay_tree_key n,
577 uintptr_t detach_from, bool finalize,
578 struct gomp_coalesce_buf *cbufp)
580 size_t idx;
582 if (n == NULL)
584 gomp_mutex_unlock (&devicep->lock);
585 gomp_fatal ("enclosing struct not mapped for detach");
588 idx = (detach_from - n->host_start) / sizeof (void *);
590 if (!n->aux || !n->aux->attach_count)
592 gomp_mutex_unlock (&devicep->lock);
593 gomp_fatal ("no attachment counters for struct");
596 if (finalize)
597 n->aux->attach_count[idx] = 1;
599 if (n->aux->attach_count[idx] == 0)
601 gomp_mutex_unlock (&devicep->lock);
602 gomp_fatal ("attach count underflow");
604 else
605 n->aux->attach_count[idx]--;
607 if (n->aux->attach_count[idx] == 0)
609 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
610 - n->host_start;
611 uintptr_t target = (uintptr_t) *(void **) detach_from;
613 gomp_debug (1,
614 "%s: detaching host %p, target %p (struct base %p) to %p\n",
615 __FUNCTION__, (void *) detach_from, (void *) devptr,
616 (void *) (n->tgt->tgt_start + n->tgt_offset),
617 (void *) target);
619 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
620 sizeof (void *), cbufp);
622 else
623 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
624 (void *) detach_from, (int) n->aux->attach_count[idx]);
627 attribute_hidden uintptr_t
628 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
630 if (tgt->list[i].key != NULL)
631 return tgt->list[i].key->tgt->tgt_start
632 + tgt->list[i].key->tgt_offset
633 + tgt->list[i].offset;
635 switch (tgt->list[i].offset)
637 case OFFSET_INLINED:
638 return (uintptr_t) hostaddrs[i];
640 case OFFSET_POINTER:
641 return 0;
643 case OFFSET_STRUCT:
644 return tgt->list[i + 1].key->tgt->tgt_start
645 + tgt->list[i + 1].key->tgt_offset
646 + tgt->list[i + 1].offset
647 + (uintptr_t) hostaddrs[i]
648 - (uintptr_t) hostaddrs[i + 1];
650 default:
651 return tgt->tgt_start + tgt->list[i].offset;
655 static inline __attribute__((always_inline)) struct target_mem_desc *
656 gomp_map_vars_internal (struct gomp_device_descr *devicep,
657 struct goacc_asyncqueue *aq, size_t mapnum,
658 void **hostaddrs, void **devaddrs, size_t *sizes,
659 void *kinds, bool short_mapkind,
660 enum gomp_map_vars_kind pragma_kind)
662 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
663 bool has_firstprivate = false;
664 const int rshift = short_mapkind ? 8 : 3;
665 const int typemask = short_mapkind ? 0xff : 0x7;
666 struct splay_tree_s *mem_map = &devicep->mem_map;
667 struct splay_tree_key_s cur_node;
668 struct target_mem_desc *tgt
669 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
670 tgt->list_count = mapnum;
671 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
672 tgt->device_descr = devicep;
673 tgt->prev = NULL;
674 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
676 if (mapnum == 0)
678 tgt->tgt_start = 0;
679 tgt->tgt_end = 0;
680 return tgt;
683 tgt_align = sizeof (void *);
684 tgt_size = 0;
685 cbuf.chunks = NULL;
686 cbuf.chunk_cnt = -1;
687 cbuf.use_cnt = 0;
688 cbuf.buf = NULL;
689 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
691 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
692 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
693 cbuf.chunk_cnt = 0;
695 if (pragma_kind == GOMP_MAP_VARS_TARGET)
697 size_t align = 4 * sizeof (void *);
698 tgt_align = align;
699 tgt_size = mapnum * sizeof (void *);
700 cbuf.chunk_cnt = 1;
701 cbuf.use_cnt = 1 + (mapnum > 1);
702 cbuf.chunks[0].start = 0;
703 cbuf.chunks[0].end = tgt_size;
706 gomp_mutex_lock (&devicep->lock);
707 if (devicep->state == GOMP_DEVICE_FINALIZED)
709 gomp_mutex_unlock (&devicep->lock);
710 free (tgt);
711 return NULL;
714 for (i = 0; i < mapnum; i++)
716 int kind = get_kind (short_mapkind, kinds, i);
717 if (hostaddrs[i] == NULL
718 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
720 tgt->list[i].key = NULL;
721 tgt->list[i].offset = OFFSET_INLINED;
722 continue;
724 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
725 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
727 tgt->list[i].key = NULL;
728 if (!not_found_cnt)
730 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
731 on a separate construct prior to using use_device_{addr,ptr}.
732 In OpenMP 5.0, map directives need to be ordered by the
733 middle-end before the use_device_* clauses. If
734 !not_found_cnt, all mappings requested (if any) are already
735 mapped, so use_device_{addr,ptr} can be resolved right away.
736 Otherwise, if not_found_cnt, gomp_map_lookup might fail
737 now but would succeed after performing the mappings in the
738 following loop. We can't defer this always to the second
739 loop, because it is not even invoked when !not_found_cnt
740 after the first loop. */
741 cur_node.host_start = (uintptr_t) hostaddrs[i];
742 cur_node.host_end = cur_node.host_start;
743 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
744 if (n != NULL)
746 cur_node.host_start -= n->host_start;
747 hostaddrs[i]
748 = (void *) (n->tgt->tgt_start + n->tgt_offset
749 + cur_node.host_start);
751 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
753 gomp_mutex_unlock (&devicep->lock);
754 gomp_fatal ("use_device_ptr pointer wasn't mapped");
756 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
757 /* If not present, continue using the host address. */
759 else
760 __builtin_unreachable ();
761 tgt->list[i].offset = OFFSET_INLINED;
763 else
764 tgt->list[i].offset = 0;
765 continue;
767 else if ((kind & typemask) == GOMP_MAP_STRUCT)
769 size_t first = i + 1;
770 size_t last = i + sizes[i];
771 cur_node.host_start = (uintptr_t) hostaddrs[i];
772 cur_node.host_end = (uintptr_t) hostaddrs[last]
773 + sizes[last];
774 tgt->list[i].key = NULL;
775 tgt->list[i].offset = OFFSET_STRUCT;
776 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
777 if (n == NULL)
779 size_t align = (size_t) 1 << (kind >> rshift);
780 if (tgt_align < align)
781 tgt_align = align;
782 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
783 tgt_size = (tgt_size + align - 1) & ~(align - 1);
784 tgt_size += cur_node.host_end - cur_node.host_start;
785 not_found_cnt += last - i;
786 for (i = first; i <= last; i++)
788 tgt->list[i].key = NULL;
789 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
790 & typemask))
791 gomp_coalesce_buf_add (&cbuf,
792 tgt_size - cur_node.host_end
793 + (uintptr_t) hostaddrs[i],
794 sizes[i]);
796 i--;
797 continue;
799 for (i = first; i <= last; i++)
800 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
801 sizes, kinds, NULL);
802 i--;
803 continue;
805 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
807 tgt->list[i].key = NULL;
808 tgt->list[i].offset = OFFSET_POINTER;
809 has_firstprivate = true;
810 continue;
812 else if ((kind & typemask) == GOMP_MAP_ATTACH)
814 tgt->list[i].key = NULL;
815 has_firstprivate = true;
816 continue;
818 cur_node.host_start = (uintptr_t) hostaddrs[i];
819 if (!GOMP_MAP_POINTER_P (kind & typemask))
820 cur_node.host_end = cur_node.host_start + sizes[i];
821 else
822 cur_node.host_end = cur_node.host_start + sizeof (void *);
823 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
825 tgt->list[i].key = NULL;
827 size_t align = (size_t) 1 << (kind >> rshift);
828 if (tgt_align < align)
829 tgt_align = align;
830 tgt_size = (tgt_size + align - 1) & ~(align - 1);
831 gomp_coalesce_buf_add (&cbuf, tgt_size,
832 cur_node.host_end - cur_node.host_start);
833 tgt_size += cur_node.host_end - cur_node.host_start;
834 has_firstprivate = true;
835 continue;
837 splay_tree_key n;
838 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
840 n = gomp_map_0len_lookup (mem_map, &cur_node);
841 if (!n)
843 tgt->list[i].key = NULL;
844 tgt->list[i].offset = OFFSET_POINTER;
845 continue;
848 else
849 n = splay_tree_lookup (mem_map, &cur_node);
850 if (n && n->refcount != REFCOUNT_LINK)
851 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
852 kind & typemask, NULL);
853 else
855 tgt->list[i].key = NULL;
857 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
859 /* Not present, hence, skip entry - including its MAP_POINTER,
860 when existing. */
861 tgt->list[i].offset = OFFSET_POINTER;
862 if (i + 1 < mapnum
863 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
864 == GOMP_MAP_POINTER))
866 ++i;
867 tgt->list[i].key = NULL;
868 tgt->list[i].offset = 0;
870 continue;
872 size_t align = (size_t) 1 << (kind >> rshift);
873 not_found_cnt++;
874 if (tgt_align < align)
875 tgt_align = align;
876 tgt_size = (tgt_size + align - 1) & ~(align - 1);
877 if (gomp_to_device_kind_p (kind & typemask))
878 gomp_coalesce_buf_add (&cbuf, tgt_size,
879 cur_node.host_end - cur_node.host_start);
880 tgt_size += cur_node.host_end - cur_node.host_start;
881 if ((kind & typemask) == GOMP_MAP_TO_PSET)
883 size_t j;
884 for (j = i + 1; j < mapnum; j++)
885 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
886 & typemask))
887 break;
888 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
889 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
890 > cur_node.host_end))
891 break;
892 else
894 tgt->list[j].key = NULL;
895 i++;
901 if (devaddrs)
903 if (mapnum != 1)
905 gomp_mutex_unlock (&devicep->lock);
906 gomp_fatal ("unexpected aggregation");
908 tgt->to_free = devaddrs[0];
909 tgt->tgt_start = (uintptr_t) tgt->to_free;
910 tgt->tgt_end = tgt->tgt_start + sizes[0];
912 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
914 /* Allocate tgt_align aligned tgt_size block of memory. */
915 /* FIXME: Perhaps change interface to allocate properly aligned
916 memory. */
917 tgt->to_free = devicep->alloc_func (devicep->target_id,
918 tgt_size + tgt_align - 1);
919 if (!tgt->to_free)
921 gomp_mutex_unlock (&devicep->lock);
922 gomp_fatal ("device memory allocation fail");
925 tgt->tgt_start = (uintptr_t) tgt->to_free;
926 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
927 tgt->tgt_end = tgt->tgt_start + tgt_size;
929 if (cbuf.use_cnt == 1)
930 cbuf.chunk_cnt--;
931 if (cbuf.chunk_cnt > 0)
933 cbuf.buf
934 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
935 if (cbuf.buf)
937 cbuf.tgt = tgt;
938 cbufp = &cbuf;
942 else
944 tgt->to_free = NULL;
945 tgt->tgt_start = 0;
946 tgt->tgt_end = 0;
949 tgt_size = 0;
950 if (pragma_kind == GOMP_MAP_VARS_TARGET)
951 tgt_size = mapnum * sizeof (void *);
953 tgt->array = NULL;
954 if (not_found_cnt || has_firstprivate)
956 if (not_found_cnt)
957 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
958 splay_tree_node array = tgt->array;
959 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
960 uintptr_t field_tgt_base = 0;
962 for (i = 0; i < mapnum; i++)
963 if (tgt->list[i].key == NULL)
965 int kind = get_kind (short_mapkind, kinds, i);
966 if (hostaddrs[i] == NULL)
967 continue;
968 switch (kind & typemask)
970 size_t align, len, first, last;
971 splay_tree_key n;
972 case GOMP_MAP_FIRSTPRIVATE:
973 align = (size_t) 1 << (kind >> rshift);
974 tgt_size = (tgt_size + align - 1) & ~(align - 1);
975 tgt->list[i].offset = tgt_size;
976 len = sizes[i];
977 gomp_copy_host2dev (devicep, aq,
978 (void *) (tgt->tgt_start + tgt_size),
979 (void *) hostaddrs[i], len, cbufp);
980 tgt_size += len;
981 continue;
982 case GOMP_MAP_FIRSTPRIVATE_INT:
983 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
984 continue;
985 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
986 /* The OpenACC 'host_data' construct only allows 'use_device'
987 "mapping" clauses, so in the first loop, 'not_found_cnt'
988 must always have been zero, so all OpenACC 'use_device'
989 clauses have already been handled. (We can only easily test
990 'use_device' with 'if_present' clause here.) */
991 assert (tgt->list[i].offset == OFFSET_INLINED);
992 /* Nevertheless, FALLTHRU to the normal handling, to keep the
993 code conceptually simple, similar to the first loop. */
994 case GOMP_MAP_USE_DEVICE_PTR:
995 if (tgt->list[i].offset == 0)
997 cur_node.host_start = (uintptr_t) hostaddrs[i];
998 cur_node.host_end = cur_node.host_start;
999 n = gomp_map_lookup (mem_map, &cur_node);
1000 if (n != NULL)
1002 cur_node.host_start -= n->host_start;
1003 hostaddrs[i]
1004 = (void *) (n->tgt->tgt_start + n->tgt_offset
1005 + cur_node.host_start);
1007 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1009 gomp_mutex_unlock (&devicep->lock);
1010 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1012 else if ((kind & typemask)
1013 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1014 /* If not present, continue using the host address. */
1016 else
1017 __builtin_unreachable ();
1018 tgt->list[i].offset = OFFSET_INLINED;
1020 continue;
1021 case GOMP_MAP_STRUCT:
1022 first = i + 1;
1023 last = i + sizes[i];
1024 cur_node.host_start = (uintptr_t) hostaddrs[i];
1025 cur_node.host_end = (uintptr_t) hostaddrs[last]
1026 + sizes[last];
1027 if (tgt->list[first].key != NULL)
1028 continue;
1029 n = splay_tree_lookup (mem_map, &cur_node);
1030 if (n == NULL)
1032 size_t align = (size_t) 1 << (kind >> rshift);
1033 tgt_size -= (uintptr_t) hostaddrs[first]
1034 - (uintptr_t) hostaddrs[i];
1035 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1036 tgt_size += (uintptr_t) hostaddrs[first]
1037 - (uintptr_t) hostaddrs[i];
1038 field_tgt_base = (uintptr_t) hostaddrs[first];
1039 field_tgt_offset = tgt_size;
1040 field_tgt_clear = last;
1041 tgt_size += cur_node.host_end
1042 - (uintptr_t) hostaddrs[first];
1043 continue;
1045 for (i = first; i <= last; i++)
1046 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1047 sizes, kinds, cbufp);
1048 i--;
1049 continue;
1050 case GOMP_MAP_ALWAYS_POINTER:
1051 cur_node.host_start = (uintptr_t) hostaddrs[i];
1052 cur_node.host_end = cur_node.host_start + sizeof (void *);
1053 n = splay_tree_lookup (mem_map, &cur_node);
1054 if (n == NULL
1055 || n->host_start > cur_node.host_start
1056 || n->host_end < cur_node.host_end)
1058 gomp_mutex_unlock (&devicep->lock);
1059 gomp_fatal ("always pointer not mapped");
1061 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1062 != GOMP_MAP_ALWAYS_POINTER)
1063 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1064 if (cur_node.tgt_offset)
1065 cur_node.tgt_offset -= sizes[i];
1066 gomp_copy_host2dev (devicep, aq,
1067 (void *) (n->tgt->tgt_start
1068 + n->tgt_offset
1069 + cur_node.host_start
1070 - n->host_start),
1071 (void *) &cur_node.tgt_offset,
1072 sizeof (void *), cbufp);
1073 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1074 + cur_node.host_start - n->host_start;
1075 continue;
1076 case GOMP_MAP_IF_PRESENT:
1077 /* Not present - otherwise handled above. Skip over its
1078 MAP_POINTER as well. */
1079 if (i + 1 < mapnum
1080 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1081 == GOMP_MAP_POINTER))
1082 ++i;
1083 continue;
1084 case GOMP_MAP_ATTACH:
1086 cur_node.host_start = (uintptr_t) hostaddrs[i];
1087 cur_node.host_end = cur_node.host_start + sizeof (void *);
1088 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1089 if (n != NULL)
1091 tgt->list[i].key = n;
1092 tgt->list[i].offset = cur_node.host_start - n->host_start;
1093 tgt->list[i].length = n->host_end - n->host_start;
1094 tgt->list[i].copy_from = false;
1095 tgt->list[i].always_copy_from = false;
1096 tgt->list[i].do_detach
1097 = (pragma_kind != GOMP_MAP_VARS_ENTER_DATA);
1098 n->refcount++;
1100 else
1102 gomp_mutex_unlock (&devicep->lock);
1103 gomp_fatal ("outer struct not mapped for attach");
1105 gomp_attach_pointer (devicep, aq, mem_map, n,
1106 (uintptr_t) hostaddrs[i], sizes[i],
1107 cbufp);
1108 continue;
1110 default:
1111 break;
1113 splay_tree_key k = &array->key;
1114 k->host_start = (uintptr_t) hostaddrs[i];
1115 if (!GOMP_MAP_POINTER_P (kind & typemask))
1116 k->host_end = k->host_start + sizes[i];
1117 else
1118 k->host_end = k->host_start + sizeof (void *);
1119 splay_tree_key n = splay_tree_lookup (mem_map, k);
1120 if (n && n->refcount != REFCOUNT_LINK)
1121 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1122 kind & typemask, cbufp);
1123 else
1125 k->aux = NULL;
1126 if (n && n->refcount == REFCOUNT_LINK)
1128 /* Replace target address of the pointer with target address
1129 of mapped object in the splay tree. */
1130 splay_tree_remove (mem_map, n);
1131 k->aux
1132 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1133 k->aux->link_key = n;
1135 size_t align = (size_t) 1 << (kind >> rshift);
1136 tgt->list[i].key = k;
1137 k->tgt = tgt;
1138 if (field_tgt_clear != FIELD_TGT_EMPTY)
1140 k->tgt_offset = k->host_start - field_tgt_base
1141 + field_tgt_offset;
1142 if (i == field_tgt_clear)
1143 field_tgt_clear = FIELD_TGT_EMPTY;
1145 else
1147 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1148 k->tgt_offset = tgt_size;
1149 tgt_size += k->host_end - k->host_start;
1151 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1152 tgt->list[i].always_copy_from
1153 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1154 tgt->list[i].do_detach = false;
1155 tgt->list[i].offset = 0;
1156 tgt->list[i].length = k->host_end - k->host_start;
1157 k->refcount = 1;
1158 k->dynamic_refcount = 0;
1159 tgt->refcount++;
1160 array->left = NULL;
1161 array->right = NULL;
1162 splay_tree_insert (mem_map, array);
1163 switch (kind & typemask)
1165 case GOMP_MAP_ALLOC:
1166 case GOMP_MAP_FROM:
1167 case GOMP_MAP_FORCE_ALLOC:
1168 case GOMP_MAP_FORCE_FROM:
1169 case GOMP_MAP_ALWAYS_FROM:
1170 break;
1171 case GOMP_MAP_TO:
1172 case GOMP_MAP_TOFROM:
1173 case GOMP_MAP_FORCE_TO:
1174 case GOMP_MAP_FORCE_TOFROM:
1175 case GOMP_MAP_ALWAYS_TO:
1176 case GOMP_MAP_ALWAYS_TOFROM:
1177 gomp_copy_host2dev (devicep, aq,
1178 (void *) (tgt->tgt_start
1179 + k->tgt_offset),
1180 (void *) k->host_start,
1181 k->host_end - k->host_start, cbufp);
1182 break;
1183 case GOMP_MAP_POINTER:
1184 gomp_map_pointer (tgt, aq,
1185 (uintptr_t) *(void **) k->host_start,
1186 k->tgt_offset, sizes[i], cbufp);
1187 break;
1188 case GOMP_MAP_TO_PSET:
1189 gomp_copy_host2dev (devicep, aq,
1190 (void *) (tgt->tgt_start
1191 + k->tgt_offset),
1192 (void *) k->host_start,
1193 k->host_end - k->host_start, cbufp);
1195 for (j = i + 1; j < mapnum; j++)
1196 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1198 & typemask))
1199 break;
1200 else if ((uintptr_t) hostaddrs[j] < k->host_start
1201 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1202 > k->host_end))
1203 break;
1204 else
1206 tgt->list[j].key = k;
1207 tgt->list[j].copy_from = false;
1208 tgt->list[j].always_copy_from = false;
1209 tgt->list[j].do_detach = false;
1210 if (k->refcount != REFCOUNT_INFINITY)
1211 k->refcount++;
1212 gomp_map_pointer (tgt, aq,
1213 (uintptr_t) *(void **) hostaddrs[j],
1214 k->tgt_offset
1215 + ((uintptr_t) hostaddrs[j]
1216 - k->host_start),
1217 sizes[j], cbufp);
1218 i++;
1220 break;
1221 case GOMP_MAP_FORCE_PRESENT:
1223 /* We already looked up the memory region above and it
1224 was missing. */
1225 size_t size = k->host_end - k->host_start;
1226 gomp_mutex_unlock (&devicep->lock);
1227 #ifdef HAVE_INTTYPES_H
1228 gomp_fatal ("present clause: !acc_is_present (%p, "
1229 "%"PRIu64" (0x%"PRIx64"))",
1230 (void *) k->host_start,
1231 (uint64_t) size, (uint64_t) size);
1232 #else
1233 gomp_fatal ("present clause: !acc_is_present (%p, "
1234 "%lu (0x%lx))", (void *) k->host_start,
1235 (unsigned long) size, (unsigned long) size);
1236 #endif
1238 break;
1239 case GOMP_MAP_FORCE_DEVICEPTR:
1240 assert (k->host_end - k->host_start == sizeof (void *));
1241 gomp_copy_host2dev (devicep, aq,
1242 (void *) (tgt->tgt_start
1243 + k->tgt_offset),
1244 (void *) k->host_start,
1245 sizeof (void *), cbufp);
1246 break;
1247 default:
1248 gomp_mutex_unlock (&devicep->lock);
1249 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1250 kind);
1253 if (k->aux && k->aux->link_key)
1255 /* Set link pointer on target to the device address of the
1256 mapped object. */
1257 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1258 /* We intentionally do not use coalescing here, as it's not
1259 data allocated by the current call to this function. */
1260 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1261 &tgt_addr, sizeof (void *), NULL);
1263 array++;
1268 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1270 for (i = 0; i < mapnum; i++)
1272 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1273 gomp_copy_host2dev (devicep, aq,
1274 (void *) (tgt->tgt_start + i * sizeof (void *)),
1275 (void *) &cur_node.tgt_offset, sizeof (void *),
1276 cbufp);
1280 if (cbufp)
1282 long c = 0;
1283 for (c = 0; c < cbuf.chunk_cnt; ++c)
1284 gomp_copy_host2dev (devicep, aq,
1285 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1286 (char *) cbuf.buf + (cbuf.chunks[c].start
1287 - cbuf.chunks[0].start),
1288 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1289 free (cbuf.buf);
1290 cbuf.buf = NULL;
1291 cbufp = NULL;
1294 /* If the variable from "omp target enter data" map-list was already mapped,
1295 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1296 gomp_exit_data. */
1297 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1299 free (tgt);
1300 tgt = NULL;
1303 gomp_mutex_unlock (&devicep->lock);
1304 return tgt;
1307 attribute_hidden struct target_mem_desc *
1308 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1309 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1310 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1312 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1313 sizes, kinds, short_mapkind, pragma_kind);
1316 attribute_hidden struct target_mem_desc *
1317 gomp_map_vars_async (struct gomp_device_descr *devicep,
1318 struct goacc_asyncqueue *aq, size_t mapnum,
1319 void **hostaddrs, void **devaddrs, size_t *sizes,
1320 void *kinds, bool short_mapkind,
1321 enum gomp_map_vars_kind pragma_kind)
1323 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1324 sizes, kinds, short_mapkind, pragma_kind);
1327 static void
1328 gomp_unmap_tgt (struct target_mem_desc *tgt)
1330 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1331 if (tgt->tgt_end)
1332 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1334 free (tgt->array);
1335 free (tgt);
1338 static bool
1339 gomp_unref_tgt (void *ptr)
1341 bool is_tgt_unmapped = false;
1343 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1345 if (tgt->refcount > 1)
1346 tgt->refcount--;
1347 else
1349 gomp_unmap_tgt (tgt);
1350 is_tgt_unmapped = true;
1353 return is_tgt_unmapped;
1356 static void
1357 gomp_unref_tgt_void (void *ptr)
1359 (void) gomp_unref_tgt (ptr);
1362 static inline __attribute__((always_inline)) bool
1363 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1364 struct goacc_asyncqueue *aq)
1366 bool is_tgt_unmapped = false;
1367 splay_tree_remove (&devicep->mem_map, k);
1368 if (k->aux)
1370 if (k->aux->link_key)
1371 splay_tree_insert (&devicep->mem_map,
1372 (splay_tree_node) k->aux->link_key);
1373 if (k->aux->attach_count)
1374 free (k->aux->attach_count);
1375 free (k->aux);
1376 k->aux = NULL;
1378 if (aq)
1379 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1380 (void *) k->tgt);
1381 else
1382 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1383 return is_tgt_unmapped;
1386 attribute_hidden bool
1387 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1389 return gomp_remove_var_internal (devicep, k, NULL);
1392 /* Remove a variable asynchronously. This actually removes the variable
1393 mapping immediately, but retains the linked target_mem_desc until the
1394 asynchronous operation has completed (as it may still refer to target
1395 memory). The device lock must be held before entry, and remains locked on
1396 exit. */
1398 attribute_hidden void
1399 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1400 struct goacc_asyncqueue *aq)
1402 (void) gomp_remove_var_internal (devicep, k, aq);
1405 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1406 variables back from device to host: if it is false, it is assumed that this
1407 has been done already. */
1409 static inline __attribute__((always_inline)) void
1410 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1411 struct goacc_asyncqueue *aq)
1413 struct gomp_device_descr *devicep = tgt->device_descr;
1415 if (tgt->list_count == 0)
1417 free (tgt);
1418 return;
1421 gomp_mutex_lock (&devicep->lock);
1422 if (devicep->state == GOMP_DEVICE_FINALIZED)
1424 gomp_mutex_unlock (&devicep->lock);
1425 free (tgt->array);
1426 free (tgt);
1427 return;
1430 size_t i;
1432 /* We must perform detachments before any copies back to the host. */
1433 for (i = 0; i < tgt->list_count; i++)
1435 splay_tree_key k = tgt->list[i].key;
1437 if (k != NULL && tgt->list[i].do_detach)
1438 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1439 + tgt->list[i].offset,
1440 k->refcount == 1, NULL);
1443 for (i = 0; i < tgt->list_count; i++)
1445 splay_tree_key k = tgt->list[i].key;
1446 if (k == NULL)
1447 continue;
1449 bool do_unmap = false;
1450 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1451 k->refcount--;
1452 else if (k->refcount == 1)
1454 k->refcount--;
1455 do_unmap = true;
1458 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1459 || tgt->list[i].always_copy_from)
1460 gomp_copy_dev2host (devicep, aq,
1461 (void *) (k->host_start + tgt->list[i].offset),
1462 (void *) (k->tgt->tgt_start + k->tgt_offset
1463 + tgt->list[i].offset),
1464 tgt->list[i].length);
1465 if (do_unmap)
1467 struct target_mem_desc *k_tgt = k->tgt;
1468 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1469 /* It would be bad if TGT got unmapped while we're still iterating
1470 over its LIST_COUNT, and also expect to use it in the following
1471 code. */
1472 assert (!is_tgt_unmapped
1473 || k_tgt != tgt);
1477 if (aq)
1478 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1479 (void *) tgt);
1480 else
1481 gomp_unref_tgt ((void *) tgt);
1483 gomp_mutex_unlock (&devicep->lock);
1486 attribute_hidden void
1487 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1489 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1492 attribute_hidden void
1493 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1494 struct goacc_asyncqueue *aq)
1496 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1499 static void
1500 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1501 size_t *sizes, void *kinds, bool short_mapkind)
1503 size_t i;
1504 struct splay_tree_key_s cur_node;
1505 const int typemask = short_mapkind ? 0xff : 0x7;
1507 if (!devicep)
1508 return;
1510 if (mapnum == 0)
1511 return;
1513 gomp_mutex_lock (&devicep->lock);
1514 if (devicep->state == GOMP_DEVICE_FINALIZED)
1516 gomp_mutex_unlock (&devicep->lock);
1517 return;
1520 for (i = 0; i < mapnum; i++)
1521 if (sizes[i])
1523 cur_node.host_start = (uintptr_t) hostaddrs[i];
1524 cur_node.host_end = cur_node.host_start + sizes[i];
1525 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1526 if (n)
1528 int kind = get_kind (short_mapkind, kinds, i);
1529 if (n->host_start > cur_node.host_start
1530 || n->host_end < cur_node.host_end)
1532 gomp_mutex_unlock (&devicep->lock);
1533 gomp_fatal ("Trying to update [%p..%p) object when "
1534 "only [%p..%p) is mapped",
1535 (void *) cur_node.host_start,
1536 (void *) cur_node.host_end,
1537 (void *) n->host_start,
1538 (void *) n->host_end);
1542 void *hostaddr = (void *) cur_node.host_start;
1543 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1544 + cur_node.host_start - n->host_start);
1545 size_t size = cur_node.host_end - cur_node.host_start;
1547 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1548 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1549 NULL);
1550 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1551 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1554 gomp_mutex_unlock (&devicep->lock);
1557 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1558 And insert to splay tree the mapping between addresses from HOST_TABLE and
1559 from loaded target image. We rely in the host and device compiler
1560 emitting variable and functions in the same order. */
1562 static void
1563 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1564 const void *host_table, const void *target_data,
1565 bool is_register_lock)
1567 void **host_func_table = ((void ***) host_table)[0];
1568 void **host_funcs_end = ((void ***) host_table)[1];
1569 void **host_var_table = ((void ***) host_table)[2];
1570 void **host_vars_end = ((void ***) host_table)[3];
1572 /* The func table contains only addresses, the var table contains addresses
1573 and corresponding sizes. */
1574 int num_funcs = host_funcs_end - host_func_table;
1575 int num_vars = (host_vars_end - host_var_table) / 2;
1577 /* Load image to device and get target addresses for the image. */
1578 struct addr_pair *target_table = NULL;
1579 int i, num_target_entries;
1581 num_target_entries
1582 = devicep->load_image_func (devicep->target_id, version,
1583 target_data, &target_table);
1585 if (num_target_entries != num_funcs + num_vars)
1587 gomp_mutex_unlock (&devicep->lock);
1588 if (is_register_lock)
1589 gomp_mutex_unlock (&register_lock);
1590 gomp_fatal ("Cannot map target functions or variables"
1591 " (expected %u, have %u)", num_funcs + num_vars,
1592 num_target_entries);
1595 /* Insert host-target address mapping into splay tree. */
1596 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1597 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1598 tgt->refcount = REFCOUNT_INFINITY;
1599 tgt->tgt_start = 0;
1600 tgt->tgt_end = 0;
1601 tgt->to_free = NULL;
1602 tgt->prev = NULL;
1603 tgt->list_count = 0;
1604 tgt->device_descr = devicep;
1605 splay_tree_node array = tgt->array;
1607 for (i = 0; i < num_funcs; i++)
1609 splay_tree_key k = &array->key;
1610 k->host_start = (uintptr_t) host_func_table[i];
1611 k->host_end = k->host_start + 1;
1612 k->tgt = tgt;
1613 k->tgt_offset = target_table[i].start;
1614 k->refcount = REFCOUNT_INFINITY;
1615 k->dynamic_refcount = 0;
1616 k->aux = NULL;
1617 array->left = NULL;
1618 array->right = NULL;
1619 splay_tree_insert (&devicep->mem_map, array);
1620 array++;
1623 /* Most significant bit of the size in host and target tables marks
1624 "omp declare target link" variables. */
1625 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1626 const uintptr_t size_mask = ~link_bit;
1628 for (i = 0; i < num_vars; i++)
1630 struct addr_pair *target_var = &target_table[num_funcs + i];
1631 uintptr_t target_size = target_var->end - target_var->start;
1632 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1634 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1636 gomp_mutex_unlock (&devicep->lock);
1637 if (is_register_lock)
1638 gomp_mutex_unlock (&register_lock);
1639 gomp_fatal ("Cannot map target variables (size mismatch)");
1642 splay_tree_key k = &array->key;
1643 k->host_start = (uintptr_t) host_var_table[i * 2];
1644 k->host_end
1645 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1646 k->tgt = tgt;
1647 k->tgt_offset = target_var->start;
1648 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1649 k->dynamic_refcount = 0;
1650 k->aux = NULL;
1651 array->left = NULL;
1652 array->right = NULL;
1653 splay_tree_insert (&devicep->mem_map, array);
1654 array++;
1657 free (target_table);
1660 /* Unload the mappings described by target_data from device DEVICE_P.
1661 The device must be locked. */
1663 static void
1664 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1665 unsigned version,
1666 const void *host_table, const void *target_data)
1668 void **host_func_table = ((void ***) host_table)[0];
1669 void **host_funcs_end = ((void ***) host_table)[1];
1670 void **host_var_table = ((void ***) host_table)[2];
1671 void **host_vars_end = ((void ***) host_table)[3];
1673 /* The func table contains only addresses, the var table contains addresses
1674 and corresponding sizes. */
1675 int num_funcs = host_funcs_end - host_func_table;
1676 int num_vars = (host_vars_end - host_var_table) / 2;
1678 struct splay_tree_key_s k;
1679 splay_tree_key node = NULL;
1681 /* Find mapping at start of node array */
1682 if (num_funcs || num_vars)
1684 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1685 : (uintptr_t) host_var_table[0]);
1686 k.host_end = k.host_start + 1;
1687 node = splay_tree_lookup (&devicep->mem_map, &k);
1690 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1692 gomp_mutex_unlock (&devicep->lock);
1693 gomp_fatal ("image unload fail");
1696 /* Remove mappings from splay tree. */
1697 int i;
1698 for (i = 0; i < num_funcs; i++)
1700 k.host_start = (uintptr_t) host_func_table[i];
1701 k.host_end = k.host_start + 1;
1702 splay_tree_remove (&devicep->mem_map, &k);
1705 /* Most significant bit of the size in host and target tables marks
1706 "omp declare target link" variables. */
1707 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1708 const uintptr_t size_mask = ~link_bit;
1709 bool is_tgt_unmapped = false;
1711 for (i = 0; i < num_vars; i++)
1713 k.host_start = (uintptr_t) host_var_table[i * 2];
1714 k.host_end
1715 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1717 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1718 splay_tree_remove (&devicep->mem_map, &k);
1719 else
1721 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1722 is_tgt_unmapped = gomp_remove_var (devicep, n);
1726 if (node && !is_tgt_unmapped)
1728 free (node->tgt);
1729 free (node);
1733 /* This function should be called from every offload image while loading.
1734 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1735 the target, and TARGET_DATA needed by target plugin. */
1737 void
1738 GOMP_offload_register_ver (unsigned version, const void *host_table,
1739 int target_type, const void *target_data)
1741 int i;
1743 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1744 gomp_fatal ("Library too old for offload (version %u < %u)",
1745 GOMP_VERSION, GOMP_VERSION_LIB (version));
1747 gomp_mutex_lock (&register_lock);
1749 /* Load image to all initialized devices. */
1750 for (i = 0; i < num_devices; i++)
1752 struct gomp_device_descr *devicep = &devices[i];
1753 gomp_mutex_lock (&devicep->lock);
1754 if (devicep->type == target_type
1755 && devicep->state == GOMP_DEVICE_INITIALIZED)
1756 gomp_load_image_to_device (devicep, version,
1757 host_table, target_data, true);
1758 gomp_mutex_unlock (&devicep->lock);
1761 /* Insert image to array of pending images. */
1762 offload_images
1763 = gomp_realloc_unlock (offload_images,
1764 (num_offload_images + 1)
1765 * sizeof (struct offload_image_descr));
1766 offload_images[num_offload_images].version = version;
1767 offload_images[num_offload_images].type = target_type;
1768 offload_images[num_offload_images].host_table = host_table;
1769 offload_images[num_offload_images].target_data = target_data;
1771 num_offload_images++;
1772 gomp_mutex_unlock (&register_lock);
1775 void
1776 GOMP_offload_register (const void *host_table, int target_type,
1777 const void *target_data)
1779 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1782 /* This function should be called from every offload image while unloading.
1783 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1784 the target, and TARGET_DATA needed by target plugin. */
1786 void
1787 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1788 int target_type, const void *target_data)
1790 int i;
1792 gomp_mutex_lock (&register_lock);
1794 /* Unload image from all initialized devices. */
1795 for (i = 0; i < num_devices; i++)
1797 struct gomp_device_descr *devicep = &devices[i];
1798 gomp_mutex_lock (&devicep->lock);
1799 if (devicep->type == target_type
1800 && devicep->state == GOMP_DEVICE_INITIALIZED)
1801 gomp_unload_image_from_device (devicep, version,
1802 host_table, target_data);
1803 gomp_mutex_unlock (&devicep->lock);
1806 /* Remove image from array of pending images. */
1807 for (i = 0; i < num_offload_images; i++)
1808 if (offload_images[i].target_data == target_data)
1810 offload_images[i] = offload_images[--num_offload_images];
1811 break;
1814 gomp_mutex_unlock (&register_lock);
1817 void
1818 GOMP_offload_unregister (const void *host_table, int target_type,
1819 const void *target_data)
1821 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1824 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1825 must be locked on entry, and remains locked on return. */
1827 attribute_hidden void
1828 gomp_init_device (struct gomp_device_descr *devicep)
1830 int i;
1831 if (!devicep->init_device_func (devicep->target_id))
1833 gomp_mutex_unlock (&devicep->lock);
1834 gomp_fatal ("device initialization failed");
1837 /* Load to device all images registered by the moment. */
1838 for (i = 0; i < num_offload_images; i++)
1840 struct offload_image_descr *image = &offload_images[i];
1841 if (image->type == devicep->type)
1842 gomp_load_image_to_device (devicep, image->version,
1843 image->host_table, image->target_data,
1844 false);
1847 /* Initialize OpenACC asynchronous queues. */
1848 goacc_init_asyncqueues (devicep);
1850 devicep->state = GOMP_DEVICE_INITIALIZED;
1853 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1854 must be locked on entry, and remains locked on return. */
1856 attribute_hidden bool
1857 gomp_fini_device (struct gomp_device_descr *devicep)
1859 bool ret = goacc_fini_asyncqueues (devicep);
1860 ret &= devicep->fini_device_func (devicep->target_id);
1861 devicep->state = GOMP_DEVICE_FINALIZED;
1862 return ret;
1865 attribute_hidden void
1866 gomp_unload_device (struct gomp_device_descr *devicep)
1868 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1870 unsigned i;
1872 /* Unload from device all images registered at the moment. */
1873 for (i = 0; i < num_offload_images; i++)
1875 struct offload_image_descr *image = &offload_images[i];
1876 if (image->type == devicep->type)
1877 gomp_unload_image_from_device (devicep, image->version,
1878 image->host_table,
1879 image->target_data);
1884 /* Host fallback for GOMP_target{,_ext} routines. */
1886 static void
1887 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1889 struct gomp_thread old_thr, *thr = gomp_thread ();
1890 old_thr = *thr;
1891 memset (thr, '\0', sizeof (*thr));
1892 if (gomp_places_list)
1894 thr->place = old_thr.place;
1895 thr->ts.place_partition_len = gomp_places_list_len;
1897 fn (hostaddrs);
1898 gomp_free_thread (thr);
1899 *thr = old_thr;
1902 /* Calculate alignment and size requirements of a private copy of data shared
1903 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1905 static inline void
1906 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1907 unsigned short *kinds, size_t *tgt_align,
1908 size_t *tgt_size)
1910 size_t i;
1911 for (i = 0; i < mapnum; i++)
1912 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1914 size_t align = (size_t) 1 << (kinds[i] >> 8);
1915 if (*tgt_align < align)
1916 *tgt_align = align;
1917 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1918 *tgt_size += sizes[i];
1922 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1924 static inline void
1925 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1926 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1927 size_t tgt_size)
1929 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1930 if (al)
1931 tgt += tgt_align - al;
1932 tgt_size = 0;
1933 size_t i;
1934 for (i = 0; i < mapnum; i++)
1935 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1937 size_t align = (size_t) 1 << (kinds[i] >> 8);
1938 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1939 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1940 hostaddrs[i] = tgt + tgt_size;
1941 tgt_size = tgt_size + sizes[i];
1945 /* Helper function of GOMP_target{,_ext} routines. */
1947 static void *
1948 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1949 void (*host_fn) (void *))
1951 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1952 return (void *) host_fn;
1953 else
1955 gomp_mutex_lock (&devicep->lock);
1956 if (devicep->state == GOMP_DEVICE_FINALIZED)
1958 gomp_mutex_unlock (&devicep->lock);
1959 return NULL;
1962 struct splay_tree_key_s k;
1963 k.host_start = (uintptr_t) host_fn;
1964 k.host_end = k.host_start + 1;
1965 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1966 gomp_mutex_unlock (&devicep->lock);
1967 if (tgt_fn == NULL)
1968 return NULL;
1970 return (void *) tgt_fn->tgt_offset;
1974 /* Called when encountering a target directive. If DEVICE
1975 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1976 GOMP_DEVICE_HOST_FALLBACK (or any value
1977 larger than last available hw device), use host fallback.
1978 FN is address of host code, UNUSED is part of the current ABI, but
1979 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1980 with MAPNUM entries, with addresses of the host objects,
1981 sizes of the host objects (resp. for pointer kind pointer bias
1982 and assumed sizeof (void *) size) and kinds. */
1984 void
1985 GOMP_target (int device, void (*fn) (void *), const void *unused,
1986 size_t mapnum, void **hostaddrs, size_t *sizes,
1987 unsigned char *kinds)
1989 struct gomp_device_descr *devicep = resolve_device (device);
1991 void *fn_addr;
1992 if (devicep == NULL
1993 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1994 /* All shared memory devices should use the GOMP_target_ext function. */
1995 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1996 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1997 return gomp_target_fallback (fn, hostaddrs);
1999 struct target_mem_desc *tgt_vars
2000 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2001 GOMP_MAP_VARS_TARGET);
2002 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2003 NULL);
2004 gomp_unmap_vars (tgt_vars, true);
2007 static inline unsigned int
2008 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2010 /* If we cannot run asynchronously, simply ignore nowait. */
2011 if (devicep != NULL && devicep->async_run_func == NULL)
2012 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2014 return flags;
2017 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2018 and several arguments have been added:
2019 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2020 DEPEND is array of dependencies, see GOMP_task for details.
2022 ARGS is a pointer to an array consisting of a variable number of both
2023 device-independent and device-specific arguments, which can take one two
2024 elements where the first specifies for which device it is intended, the type
2025 and optionally also the value. If the value is not present in the first
2026 one, the whole second element the actual value. The last element of the
2027 array is a single NULL. Among the device independent can be for example
2028 NUM_TEAMS and THREAD_LIMIT.
2030 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2031 that value, or 1 if teams construct is not present, or 0, if
2032 teams construct does not have num_teams clause and so the choice is
2033 implementation defined, and -1 if it can't be determined on the host
2034 what value will GOMP_teams have on the device.
2035 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2036 body with that value, or 0, if teams construct does not have thread_limit
2037 clause or the teams construct is not present, or -1 if it can't be
2038 determined on the host what value will GOMP_teams have on the device. */
2040 void
2041 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2042 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2043 unsigned int flags, void **depend, void **args)
2045 struct gomp_device_descr *devicep = resolve_device (device);
2046 size_t tgt_align = 0, tgt_size = 0;
2047 bool fpc_done = false;
2049 flags = clear_unsupported_flags (devicep, flags);
2051 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2053 struct gomp_thread *thr = gomp_thread ();
2054 /* Create a team if we don't have any around, as nowait
2055 target tasks make sense to run asynchronously even when
2056 outside of any parallel. */
2057 if (__builtin_expect (thr->ts.team == NULL, 0))
2059 struct gomp_team *team = gomp_new_team (1);
2060 struct gomp_task *task = thr->task;
2061 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2062 team->prev_ts = thr->ts;
2063 thr->ts.team = team;
2064 thr->ts.team_id = 0;
2065 thr->ts.work_share = &team->work_shares[0];
2066 thr->ts.last_work_share = NULL;
2067 #ifdef HAVE_SYNC_BUILTINS
2068 thr->ts.single_count = 0;
2069 #endif
2070 thr->ts.static_trip = 0;
2071 thr->task = &team->implicit_task[0];
2072 gomp_init_task (thr->task, NULL, icv);
2073 if (task)
2075 thr->task = task;
2076 gomp_end_task ();
2077 free (task);
2078 thr->task = &team->implicit_task[0];
2080 else
2081 pthread_setspecific (gomp_thread_destructor, thr);
2083 if (thr->ts.team
2084 && !thr->task->final_task)
2086 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2087 sizes, kinds, flags, depend, args,
2088 GOMP_TARGET_TASK_BEFORE_MAP);
2089 return;
2093 /* If there are depend clauses, but nowait is not present
2094 (or we are in a final task), block the parent task until the
2095 dependencies are resolved and then just continue with the rest
2096 of the function as if it is a merged task. */
2097 if (depend != NULL)
2099 struct gomp_thread *thr = gomp_thread ();
2100 if (thr->task && thr->task->depend_hash)
2102 /* If we might need to wait, copy firstprivate now. */
2103 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2104 &tgt_align, &tgt_size);
2105 if (tgt_align)
2107 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2108 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2109 tgt_align, tgt_size);
2111 fpc_done = true;
2112 gomp_task_maybe_wait_for_dependencies (depend);
2116 void *fn_addr;
2117 if (devicep == NULL
2118 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2119 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2120 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2122 if (!fpc_done)
2124 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2125 &tgt_align, &tgt_size);
2126 if (tgt_align)
2128 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2129 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2130 tgt_align, tgt_size);
2133 gomp_target_fallback (fn, hostaddrs);
2134 return;
2137 struct target_mem_desc *tgt_vars;
2138 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2140 if (!fpc_done)
2142 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2143 &tgt_align, &tgt_size);
2144 if (tgt_align)
2146 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2147 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2148 tgt_align, tgt_size);
2151 tgt_vars = NULL;
2153 else
2154 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2155 true, GOMP_MAP_VARS_TARGET);
2156 devicep->run_func (devicep->target_id, fn_addr,
2157 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2158 args);
2159 if (tgt_vars)
2160 gomp_unmap_vars (tgt_vars, true);
2163 /* Host fallback for GOMP_target_data{,_ext} routines. */
2165 static void
2166 gomp_target_data_fallback (void)
2168 struct gomp_task_icv *icv = gomp_icv (false);
2169 if (icv->target_data)
2171 /* Even when doing a host fallback, if there are any active
2172 #pragma omp target data constructs, need to remember the
2173 new #pragma omp target data, otherwise GOMP_target_end_data
2174 would get out of sync. */
2175 struct target_mem_desc *tgt
2176 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2177 GOMP_MAP_VARS_DATA);
2178 tgt->prev = icv->target_data;
2179 icv->target_data = tgt;
2183 void
2184 GOMP_target_data (int device, const void *unused, size_t mapnum,
2185 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2187 struct gomp_device_descr *devicep = resolve_device (device);
2189 if (devicep == NULL
2190 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2191 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2192 return gomp_target_data_fallback ();
2194 struct target_mem_desc *tgt
2195 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2196 GOMP_MAP_VARS_DATA);
2197 struct gomp_task_icv *icv = gomp_icv (true);
2198 tgt->prev = icv->target_data;
2199 icv->target_data = tgt;
2202 void
2203 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2204 size_t *sizes, unsigned short *kinds)
2206 struct gomp_device_descr *devicep = resolve_device (device);
2208 if (devicep == NULL
2209 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2210 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2211 return gomp_target_data_fallback ();
2213 struct target_mem_desc *tgt
2214 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2215 GOMP_MAP_VARS_DATA);
2216 struct gomp_task_icv *icv = gomp_icv (true);
2217 tgt->prev = icv->target_data;
2218 icv->target_data = tgt;
2221 void
2222 GOMP_target_end_data (void)
2224 struct gomp_task_icv *icv = gomp_icv (false);
2225 if (icv->target_data)
2227 struct target_mem_desc *tgt = icv->target_data;
2228 icv->target_data = tgt->prev;
2229 gomp_unmap_vars (tgt, true);
2233 void
2234 GOMP_target_update (int device, const void *unused, size_t mapnum,
2235 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2237 struct gomp_device_descr *devicep = resolve_device (device);
2239 if (devicep == NULL
2240 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2241 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2242 return;
2244 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2247 void
2248 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2249 size_t *sizes, unsigned short *kinds,
2250 unsigned int flags, void **depend)
2252 struct gomp_device_descr *devicep = resolve_device (device);
2254 /* If there are depend clauses, but nowait is not present,
2255 block the parent task until the dependencies are resolved
2256 and then just continue with the rest of the function as if it
2257 is a merged task. Until we are able to schedule task during
2258 variable mapping or unmapping, ignore nowait if depend clauses
2259 are not present. */
2260 if (depend != NULL)
2262 struct gomp_thread *thr = gomp_thread ();
2263 if (thr->task && thr->task->depend_hash)
2265 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2266 && thr->ts.team
2267 && !thr->task->final_task)
2269 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2270 mapnum, hostaddrs, sizes, kinds,
2271 flags | GOMP_TARGET_FLAG_UPDATE,
2272 depend, NULL, GOMP_TARGET_TASK_DATA))
2273 return;
2275 else
2277 struct gomp_team *team = thr->ts.team;
2278 /* If parallel or taskgroup has been cancelled, don't start new
2279 tasks. */
2280 if (__builtin_expect (gomp_cancel_var, 0) && team)
2282 if (gomp_team_barrier_cancelled (&team->barrier))
2283 return;
2284 if (thr->task->taskgroup)
2286 if (thr->task->taskgroup->cancelled)
2287 return;
2288 if (thr->task->taskgroup->workshare
2289 && thr->task->taskgroup->prev
2290 && thr->task->taskgroup->prev->cancelled)
2291 return;
2295 gomp_task_maybe_wait_for_dependencies (depend);
2300 if (devicep == NULL
2301 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2302 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2303 return;
2305 struct gomp_thread *thr = gomp_thread ();
2306 struct gomp_team *team = thr->ts.team;
2307 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2308 if (__builtin_expect (gomp_cancel_var, 0) && team)
2310 if (gomp_team_barrier_cancelled (&team->barrier))
2311 return;
2312 if (thr->task->taskgroup)
2314 if (thr->task->taskgroup->cancelled)
2315 return;
2316 if (thr->task->taskgroup->workshare
2317 && thr->task->taskgroup->prev
2318 && thr->task->taskgroup->prev->cancelled)
2319 return;
2323 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2326 static void
2327 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2328 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2330 const int typemask = 0xff;
2331 size_t i;
2332 gomp_mutex_lock (&devicep->lock);
2333 if (devicep->state == GOMP_DEVICE_FINALIZED)
2335 gomp_mutex_unlock (&devicep->lock);
2336 return;
2339 for (i = 0; i < mapnum; i++)
2341 struct splay_tree_key_s cur_node;
2342 unsigned char kind = kinds[i] & typemask;
2343 switch (kind)
2345 case GOMP_MAP_FROM:
2346 case GOMP_MAP_ALWAYS_FROM:
2347 case GOMP_MAP_DELETE:
2348 case GOMP_MAP_RELEASE:
2349 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2350 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2351 cur_node.host_start = (uintptr_t) hostaddrs[i];
2352 cur_node.host_end = cur_node.host_start + sizes[i];
2353 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2354 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2355 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2356 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2357 if (!k)
2358 continue;
2360 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2361 k->refcount--;
2362 if ((kind == GOMP_MAP_DELETE
2363 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2364 && k->refcount != REFCOUNT_INFINITY)
2365 k->refcount = 0;
2367 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2368 || kind == GOMP_MAP_ALWAYS_FROM)
2369 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2370 (void *) (k->tgt->tgt_start + k->tgt_offset
2371 + cur_node.host_start
2372 - k->host_start),
2373 cur_node.host_end - cur_node.host_start);
2374 if (k->refcount == 0)
2375 gomp_remove_var (devicep, k);
2377 break;
2378 default:
2379 gomp_mutex_unlock (&devicep->lock);
2380 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2381 kind);
2385 gomp_mutex_unlock (&devicep->lock);
2388 void
2389 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2390 size_t *sizes, unsigned short *kinds,
2391 unsigned int flags, void **depend)
2393 struct gomp_device_descr *devicep = resolve_device (device);
2395 /* If there are depend clauses, but nowait is not present,
2396 block the parent task until the dependencies are resolved
2397 and then just continue with the rest of the function as if it
2398 is a merged task. Until we are able to schedule task during
2399 variable mapping or unmapping, ignore nowait if depend clauses
2400 are not present. */
2401 if (depend != NULL)
2403 struct gomp_thread *thr = gomp_thread ();
2404 if (thr->task && thr->task->depend_hash)
2406 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2407 && thr->ts.team
2408 && !thr->task->final_task)
2410 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2411 mapnum, hostaddrs, sizes, kinds,
2412 flags, depend, NULL,
2413 GOMP_TARGET_TASK_DATA))
2414 return;
2416 else
2418 struct gomp_team *team = thr->ts.team;
2419 /* If parallel or taskgroup has been cancelled, don't start new
2420 tasks. */
2421 if (__builtin_expect (gomp_cancel_var, 0) && team)
2423 if (gomp_team_barrier_cancelled (&team->barrier))
2424 return;
2425 if (thr->task->taskgroup)
2427 if (thr->task->taskgroup->cancelled)
2428 return;
2429 if (thr->task->taskgroup->workshare
2430 && thr->task->taskgroup->prev
2431 && thr->task->taskgroup->prev->cancelled)
2432 return;
2436 gomp_task_maybe_wait_for_dependencies (depend);
2441 if (devicep == NULL
2442 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2443 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2444 return;
2446 struct gomp_thread *thr = gomp_thread ();
2447 struct gomp_team *team = thr->ts.team;
2448 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2449 if (__builtin_expect (gomp_cancel_var, 0) && team)
2451 if (gomp_team_barrier_cancelled (&team->barrier))
2452 return;
2453 if (thr->task->taskgroup)
2455 if (thr->task->taskgroup->cancelled)
2456 return;
2457 if (thr->task->taskgroup->workshare
2458 && thr->task->taskgroup->prev
2459 && thr->task->taskgroup->prev->cancelled)
2460 return;
2464 /* The variables are mapped separately such that they can be released
2465 independently. */
2466 size_t i, j;
2467 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2468 for (i = 0; i < mapnum; i++)
2469 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2471 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2472 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2473 i += sizes[i];
2475 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2477 for (j = i + 1; j < mapnum; j++)
2478 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff))
2479 break;
2480 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2481 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2482 i += j - i - 1;
2484 else
2485 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2486 true, GOMP_MAP_VARS_ENTER_DATA);
2487 else
2488 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2491 bool
2492 gomp_target_task_fn (void *data)
2494 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2495 struct gomp_device_descr *devicep = ttask->devicep;
2497 if (ttask->fn != NULL)
2499 void *fn_addr;
2500 if (devicep == NULL
2501 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2502 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2503 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2505 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2506 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2507 return false;
2510 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2512 if (ttask->tgt)
2513 gomp_unmap_vars (ttask->tgt, true);
2514 return false;
2517 void *actual_arguments;
2518 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2520 ttask->tgt = NULL;
2521 actual_arguments = ttask->hostaddrs;
2523 else
2525 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2526 NULL, ttask->sizes, ttask->kinds, true,
2527 GOMP_MAP_VARS_TARGET);
2528 actual_arguments = (void *) ttask->tgt->tgt_start;
2530 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2532 assert (devicep->async_run_func);
2533 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2534 ttask->args, (void *) ttask);
2535 return true;
2537 else if (devicep == NULL
2538 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2539 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2540 return false;
2542 size_t i;
2543 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2544 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2545 ttask->kinds, true);
2546 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2547 for (i = 0; i < ttask->mapnum; i++)
2548 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2550 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2551 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2552 GOMP_MAP_VARS_ENTER_DATA);
2553 i += ttask->sizes[i];
2555 else
2556 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2557 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2558 else
2559 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2560 ttask->kinds);
2561 return false;
2564 void
2565 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2567 if (thread_limit)
2569 struct gomp_task_icv *icv = gomp_icv (true);
2570 icv->thread_limit_var
2571 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2573 (void) num_teams;
2576 void *
2577 omp_target_alloc (size_t size, int device_num)
2579 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2580 return malloc (size);
2582 if (device_num < 0)
2583 return NULL;
2585 struct gomp_device_descr *devicep = resolve_device (device_num);
2586 if (devicep == NULL)
2587 return NULL;
2589 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2590 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2591 return malloc (size);
2593 gomp_mutex_lock (&devicep->lock);
2594 void *ret = devicep->alloc_func (devicep->target_id, size);
2595 gomp_mutex_unlock (&devicep->lock);
2596 return ret;
2599 void
2600 omp_target_free (void *device_ptr, int device_num)
2602 if (device_ptr == NULL)
2603 return;
2605 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2607 free (device_ptr);
2608 return;
2611 if (device_num < 0)
2612 return;
2614 struct gomp_device_descr *devicep = resolve_device (device_num);
2615 if (devicep == NULL)
2616 return;
2618 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2619 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2621 free (device_ptr);
2622 return;
2625 gomp_mutex_lock (&devicep->lock);
2626 gomp_free_device_memory (devicep, device_ptr);
2627 gomp_mutex_unlock (&devicep->lock);
2631 omp_target_is_present (const void *ptr, int device_num)
2633 if (ptr == NULL)
2634 return 1;
2636 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2637 return 1;
2639 if (device_num < 0)
2640 return 0;
2642 struct gomp_device_descr *devicep = resolve_device (device_num);
2643 if (devicep == NULL)
2644 return 0;
2646 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2647 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2648 return 1;
2650 gomp_mutex_lock (&devicep->lock);
2651 struct splay_tree_s *mem_map = &devicep->mem_map;
2652 struct splay_tree_key_s cur_node;
2654 cur_node.host_start = (uintptr_t) ptr;
2655 cur_node.host_end = cur_node.host_start;
2656 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2657 int ret = n != NULL;
2658 gomp_mutex_unlock (&devicep->lock);
2659 return ret;
2663 omp_target_memcpy (void *dst, const void *src, size_t length,
2664 size_t dst_offset, size_t src_offset, int dst_device_num,
2665 int src_device_num)
2667 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2668 bool ret;
2670 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2672 if (dst_device_num < 0)
2673 return EINVAL;
2675 dst_devicep = resolve_device (dst_device_num);
2676 if (dst_devicep == NULL)
2677 return EINVAL;
2679 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2680 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2681 dst_devicep = NULL;
2683 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2685 if (src_device_num < 0)
2686 return EINVAL;
2688 src_devicep = resolve_device (src_device_num);
2689 if (src_devicep == NULL)
2690 return EINVAL;
2692 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2693 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2694 src_devicep = NULL;
2696 if (src_devicep == NULL && dst_devicep == NULL)
2698 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2699 return 0;
2701 if (src_devicep == NULL)
2703 gomp_mutex_lock (&dst_devicep->lock);
2704 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2705 (char *) dst + dst_offset,
2706 (char *) src + src_offset, length);
2707 gomp_mutex_unlock (&dst_devicep->lock);
2708 return (ret ? 0 : EINVAL);
2710 if (dst_devicep == NULL)
2712 gomp_mutex_lock (&src_devicep->lock);
2713 ret = src_devicep->dev2host_func (src_devicep->target_id,
2714 (char *) dst + dst_offset,
2715 (char *) src + src_offset, length);
2716 gomp_mutex_unlock (&src_devicep->lock);
2717 return (ret ? 0 : EINVAL);
2719 if (src_devicep == dst_devicep)
2721 gomp_mutex_lock (&src_devicep->lock);
2722 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2723 (char *) dst + dst_offset,
2724 (char *) src + src_offset, length);
2725 gomp_mutex_unlock (&src_devicep->lock);
2726 return (ret ? 0 : EINVAL);
2728 return EINVAL;
2731 static int
2732 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2733 int num_dims, const size_t *volume,
2734 const size_t *dst_offsets,
2735 const size_t *src_offsets,
2736 const size_t *dst_dimensions,
2737 const size_t *src_dimensions,
2738 struct gomp_device_descr *dst_devicep,
2739 struct gomp_device_descr *src_devicep)
2741 size_t dst_slice = element_size;
2742 size_t src_slice = element_size;
2743 size_t j, dst_off, src_off, length;
2744 int i, ret;
2746 if (num_dims == 1)
2748 if (__builtin_mul_overflow (element_size, volume[0], &length)
2749 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2750 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2751 return EINVAL;
2752 if (dst_devicep == NULL && src_devicep == NULL)
2754 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2755 length);
2756 ret = 1;
2758 else if (src_devicep == NULL)
2759 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2760 (char *) dst + dst_off,
2761 (const char *) src + src_off,
2762 length);
2763 else if (dst_devicep == NULL)
2764 ret = src_devicep->dev2host_func (src_devicep->target_id,
2765 (char *) dst + dst_off,
2766 (const char *) src + src_off,
2767 length);
2768 else if (src_devicep == dst_devicep)
2769 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2770 (char *) dst + dst_off,
2771 (const char *) src + src_off,
2772 length);
2773 else
2774 ret = 0;
2775 return ret ? 0 : EINVAL;
2778 /* FIXME: it would be nice to have some plugin function to handle
2779 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2780 be handled in the generic recursion below, and for host-host it
2781 should be used even for any num_dims >= 2. */
2783 for (i = 1; i < num_dims; i++)
2784 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2785 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2786 return EINVAL;
2787 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2788 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2789 return EINVAL;
2790 for (j = 0; j < volume[0]; j++)
2792 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2793 (const char *) src + src_off,
2794 element_size, num_dims - 1,
2795 volume + 1, dst_offsets + 1,
2796 src_offsets + 1, dst_dimensions + 1,
2797 src_dimensions + 1, dst_devicep,
2798 src_devicep);
2799 if (ret)
2800 return ret;
2801 dst_off += dst_slice;
2802 src_off += src_slice;
2804 return 0;
2808 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2809 int num_dims, const size_t *volume,
2810 const size_t *dst_offsets,
2811 const size_t *src_offsets,
2812 const size_t *dst_dimensions,
2813 const size_t *src_dimensions,
2814 int dst_device_num, int src_device_num)
2816 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2818 if (!dst && !src)
2819 return INT_MAX;
2821 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2823 if (dst_device_num < 0)
2824 return EINVAL;
2826 dst_devicep = resolve_device (dst_device_num);
2827 if (dst_devicep == NULL)
2828 return EINVAL;
2830 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2831 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2832 dst_devicep = NULL;
2834 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2836 if (src_device_num < 0)
2837 return EINVAL;
2839 src_devicep = resolve_device (src_device_num);
2840 if (src_devicep == NULL)
2841 return EINVAL;
2843 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2844 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2845 src_devicep = NULL;
2848 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2849 return EINVAL;
2851 if (src_devicep)
2852 gomp_mutex_lock (&src_devicep->lock);
2853 else if (dst_devicep)
2854 gomp_mutex_lock (&dst_devicep->lock);
2855 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2856 volume, dst_offsets, src_offsets,
2857 dst_dimensions, src_dimensions,
2858 dst_devicep, src_devicep);
2859 if (src_devicep)
2860 gomp_mutex_unlock (&src_devicep->lock);
2861 else if (dst_devicep)
2862 gomp_mutex_unlock (&dst_devicep->lock);
2863 return ret;
2867 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2868 size_t size, size_t device_offset, int device_num)
2870 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2871 return EINVAL;
2873 if (device_num < 0)
2874 return EINVAL;
2876 struct gomp_device_descr *devicep = resolve_device (device_num);
2877 if (devicep == NULL)
2878 return EINVAL;
2880 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2881 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2882 return EINVAL;
2884 gomp_mutex_lock (&devicep->lock);
2886 struct splay_tree_s *mem_map = &devicep->mem_map;
2887 struct splay_tree_key_s cur_node;
2888 int ret = EINVAL;
2890 cur_node.host_start = (uintptr_t) host_ptr;
2891 cur_node.host_end = cur_node.host_start + size;
2892 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2893 if (n)
2895 if (n->tgt->tgt_start + n->tgt_offset
2896 == (uintptr_t) device_ptr + device_offset
2897 && n->host_start <= cur_node.host_start
2898 && n->host_end >= cur_node.host_end)
2899 ret = 0;
2901 else
2903 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2904 tgt->array = gomp_malloc (sizeof (*tgt->array));
2905 tgt->refcount = 1;
2906 tgt->tgt_start = 0;
2907 tgt->tgt_end = 0;
2908 tgt->to_free = NULL;
2909 tgt->prev = NULL;
2910 tgt->list_count = 0;
2911 tgt->device_descr = devicep;
2912 splay_tree_node array = tgt->array;
2913 splay_tree_key k = &array->key;
2914 k->host_start = cur_node.host_start;
2915 k->host_end = cur_node.host_end;
2916 k->tgt = tgt;
2917 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2918 k->refcount = REFCOUNT_INFINITY;
2919 k->dynamic_refcount = 0;
2920 k->aux = NULL;
2921 array->left = NULL;
2922 array->right = NULL;
2923 splay_tree_insert (&devicep->mem_map, array);
2924 ret = 0;
2926 gomp_mutex_unlock (&devicep->lock);
2927 return ret;
2931 omp_target_disassociate_ptr (const void *ptr, int device_num)
2933 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2934 return EINVAL;
2936 if (device_num < 0)
2937 return EINVAL;
2939 struct gomp_device_descr *devicep = resolve_device (device_num);
2940 if (devicep == NULL)
2941 return EINVAL;
2943 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2944 return EINVAL;
2946 gomp_mutex_lock (&devicep->lock);
2948 struct splay_tree_s *mem_map = &devicep->mem_map;
2949 struct splay_tree_key_s cur_node;
2950 int ret = EINVAL;
2952 cur_node.host_start = (uintptr_t) ptr;
2953 cur_node.host_end = cur_node.host_start;
2954 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2955 if (n
2956 && n->host_start == cur_node.host_start
2957 && n->refcount == REFCOUNT_INFINITY
2958 && n->tgt->tgt_start == 0
2959 && n->tgt->to_free == NULL
2960 && n->tgt->refcount == 1
2961 && n->tgt->list_count == 0)
2963 splay_tree_remove (&devicep->mem_map, n);
2964 gomp_unmap_tgt (n->tgt);
2965 ret = 0;
2968 gomp_mutex_unlock (&devicep->lock);
2969 return ret;
2973 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2975 (void) kind;
2976 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2977 return gomp_pause_host ();
2978 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2979 return -1;
2980 /* Do nothing for target devices for now. */
2981 return 0;
2985 omp_pause_resource_all (omp_pause_resource_t kind)
2987 (void) kind;
2988 if (gomp_pause_host ())
2989 return -1;
2990 /* Do nothing for target devices for now. */
2991 return 0;
2994 ialias (omp_pause_resource)
2995 ialias (omp_pause_resource_all)
2997 #ifdef PLUGIN_SUPPORT
2999 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3000 in PLUGIN_NAME.
3001 The handles of the found functions are stored in the corresponding fields
3002 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3004 static bool
3005 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3006 const char *plugin_name)
3008 const char *err = NULL, *last_missing = NULL;
3010 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3011 if (!plugin_handle)
3012 goto dl_fail;
3014 /* Check if all required functions are available in the plugin and store
3015 their handlers. None of the symbols can legitimately be NULL,
3016 so we don't need to check dlerror all the time. */
3017 #define DLSYM(f) \
3018 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3019 goto dl_fail
3020 /* Similar, but missing functions are not an error. Return false if
3021 failed, true otherwise. */
3022 #define DLSYM_OPT(f, n) \
3023 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3024 || (last_missing = #n, 0))
3026 DLSYM (version);
3027 if (device->version_func () != GOMP_VERSION)
3029 err = "plugin version mismatch";
3030 goto fail;
3033 DLSYM (get_name);
3034 DLSYM (get_caps);
3035 DLSYM (get_type);
3036 DLSYM (get_num_devices);
3037 DLSYM (init_device);
3038 DLSYM (fini_device);
3039 DLSYM (load_image);
3040 DLSYM (unload_image);
3041 DLSYM (alloc);
3042 DLSYM (free);
3043 DLSYM (dev2host);
3044 DLSYM (host2dev);
3045 device->capabilities = device->get_caps_func ();
3046 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3048 DLSYM (run);
3049 DLSYM_OPT (async_run, async_run);
3050 DLSYM_OPT (can_run, can_run);
3051 DLSYM (dev2dev);
3053 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3055 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3056 || !DLSYM_OPT (openacc.create_thread_data,
3057 openacc_create_thread_data)
3058 || !DLSYM_OPT (openacc.destroy_thread_data,
3059 openacc_destroy_thread_data)
3060 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3061 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3062 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3063 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3064 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3065 || !DLSYM_OPT (openacc.async.queue_callback,
3066 openacc_async_queue_callback)
3067 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3068 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3069 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3070 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3072 /* Require all the OpenACC handlers if we have
3073 GOMP_OFFLOAD_CAP_OPENACC_200. */
3074 err = "plugin missing OpenACC handler function";
3075 goto fail;
3078 unsigned cuda = 0;
3079 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3080 openacc_cuda_get_current_device);
3081 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3082 openacc_cuda_get_current_context);
3083 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3084 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3085 if (cuda && cuda != 4)
3087 /* Make sure all the CUDA functions are there if any of them are. */
3088 err = "plugin missing OpenACC CUDA handler function";
3089 goto fail;
3092 #undef DLSYM
3093 #undef DLSYM_OPT
3095 return 1;
3097 dl_fail:
3098 err = dlerror ();
3099 fail:
3100 gomp_error ("while loading %s: %s", plugin_name, err);
3101 if (last_missing)
3102 gomp_error ("missing function was %s", last_missing);
3103 if (plugin_handle)
3104 dlclose (plugin_handle);
3106 return 0;
3109 /* This function finalizes all initialized devices. */
3111 static void
3112 gomp_target_fini (void)
3114 int i;
3115 for (i = 0; i < num_devices; i++)
3117 bool ret = true;
3118 struct gomp_device_descr *devicep = &devices[i];
3119 gomp_mutex_lock (&devicep->lock);
3120 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3121 ret = gomp_fini_device (devicep);
3122 gomp_mutex_unlock (&devicep->lock);
3123 if (!ret)
3124 gomp_fatal ("device finalization failed");
3128 /* This function initializes the runtime for offloading.
3129 It parses the list of offload plugins, and tries to load these.
3130 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3131 will be set, and the array DEVICES initialized, containing descriptors for
3132 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3133 by the others. */
3135 static void
3136 gomp_target_init (void)
3138 const char *prefix ="libgomp-plugin-";
3139 const char *suffix = SONAME_SUFFIX (1);
3140 const char *cur, *next;
3141 char *plugin_name;
3142 int i, new_num_devices;
3144 num_devices = 0;
3145 devices = NULL;
3147 cur = OFFLOAD_PLUGINS;
3148 if (*cur)
3151 struct gomp_device_descr current_device;
3152 size_t prefix_len, suffix_len, cur_len;
3154 next = strchr (cur, ',');
3156 prefix_len = strlen (prefix);
3157 cur_len = next ? next - cur : strlen (cur);
3158 suffix_len = strlen (suffix);
3160 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3161 if (!plugin_name)
3163 num_devices = 0;
3164 break;
3167 memcpy (plugin_name, prefix, prefix_len);
3168 memcpy (plugin_name + prefix_len, cur, cur_len);
3169 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3171 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3173 new_num_devices = current_device.get_num_devices_func ();
3174 if (new_num_devices >= 1)
3176 /* Augment DEVICES and NUM_DEVICES. */
3178 devices = realloc (devices, (num_devices + new_num_devices)
3179 * sizeof (struct gomp_device_descr));
3180 if (!devices)
3182 num_devices = 0;
3183 free (plugin_name);
3184 break;
3187 current_device.name = current_device.get_name_func ();
3188 /* current_device.capabilities has already been set. */
3189 current_device.type = current_device.get_type_func ();
3190 current_device.mem_map.root = NULL;
3191 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3192 for (i = 0; i < new_num_devices; i++)
3194 current_device.target_id = i;
3195 devices[num_devices] = current_device;
3196 gomp_mutex_init (&devices[num_devices].lock);
3197 num_devices++;
3202 free (plugin_name);
3203 cur = next + 1;
3205 while (next);
3207 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3208 NUM_DEVICES_OPENMP. */
3209 struct gomp_device_descr *devices_s
3210 = malloc (num_devices * sizeof (struct gomp_device_descr));
3211 if (!devices_s)
3213 num_devices = 0;
3214 free (devices);
3215 devices = NULL;
3217 num_devices_openmp = 0;
3218 for (i = 0; i < num_devices; i++)
3219 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3220 devices_s[num_devices_openmp++] = devices[i];
3221 int num_devices_after_openmp = num_devices_openmp;
3222 for (i = 0; i < num_devices; i++)
3223 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3224 devices_s[num_devices_after_openmp++] = devices[i];
3225 free (devices);
3226 devices = devices_s;
3228 for (i = 0; i < num_devices; i++)
3230 /* The 'devices' array can be moved (by the realloc call) until we have
3231 found all the plugins, so registering with the OpenACC runtime (which
3232 takes a copy of the pointer argument) must be delayed until now. */
3233 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3234 goacc_register (&devices[i]);
3237 if (atexit (gomp_target_fini) != 0)
3238 gomp_fatal ("atexit failed");
3241 #else /* PLUGIN_SUPPORT */
3242 /* If dlfcn.h is unavailable we always fallback to host execution.
3243 GOMP_target* routines are just stubs for this case. */
3244 static void
3245 gomp_target_init (void)
3248 #endif /* PLUGIN_SUPPORT */